Lines Matching +full:we +full:- +full:on +full:- +full:ns

1 // SPDX-License-Identifier: GPL-2.0-only
7 * Based on code from fs/super.c, copyright Linus Torvalds and others.
137 struct ns_common *ns; in node_to_mnt_ns() local
141 ns = rb_entry(node, struct ns_common, ns_tree_node); in node_to_mnt_ns()
142 return container_of(ns, struct mnt_namespace, ns); in node_to_mnt_ns()
145 static void mnt_ns_release(struct mnt_namespace *ns) in mnt_ns_release() argument
148 if (ns && refcount_dec_and_test(&ns->passive)) { in mnt_ns_release()
149 fsnotify_mntns_delete(ns); in mnt_ns_release()
150 put_user_ns(ns->user_ns); in mnt_ns_release()
151 kfree(ns); in mnt_ns_release()
158 mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu)); in DEFINE_FREE()
161 static void mnt_ns_tree_remove(struct mnt_namespace *ns) in mnt_ns_tree_remove() argument
164 if (ns_tree_active(ns)) in mnt_ns_tree_remove()
165 ns_tree_remove(ns); in mnt_ns_tree_remove()
167 call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu); in mnt_ns_tree_remove()
178 * Note the lookup is lockless protected by a sequence counter. We only
180 * possible. So if we didn't find a mount namespace and the sequence
181 * counter has changed we need to retry. If the sequence counter is
182 * still the same we know the search actually failed.
187 struct ns_common *ns; in lookup_mnt_ns() local
190 ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS); in lookup_mnt_ns()
191 if (!ns) in lookup_mnt_ns()
195 * The last reference count is put with RCU delay so we can in lookup_mnt_ns()
198 mnt_ns = container_of(ns, struct mnt_namespace, ns); in lookup_mnt_ns()
199 refcount_inc(&mnt_ns->passive); in lookup_mnt_ns()
233 res = __xa_alloc(&mnt_id_xa, &mnt->mnt_id, mnt, XA_LIMIT(1, INT_MAX), GFP_KERNEL); in mnt_alloc_id()
235 mnt->mnt_id_unique = ++mnt_id_ctr; in mnt_alloc_id()
242 xa_erase(&mnt_id_xa, mnt->mnt_id); in mnt_free_id()
254 mnt->mnt_group_id = res; in mnt_alloc_group_id()
263 ida_free(&mnt_group_ida, mnt->mnt_group_id); in mnt_release_group_id()
264 mnt->mnt_group_id = 0; in mnt_release_group_id()
273 this_cpu_add(mnt->mnt_pcp->mnt_count, n); in mnt_add_count()
276 mnt->mnt_count += n; in mnt_add_count()
291 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; in mnt_get_count()
296 return mnt->mnt_count; in mnt_get_count()
311 mnt->mnt_devname = kstrdup_const(name, in alloc_vfsmnt()
314 mnt->mnt_devname = "none"; in alloc_vfsmnt()
315 if (!mnt->mnt_devname) in alloc_vfsmnt()
319 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); in alloc_vfsmnt()
320 if (!mnt->mnt_pcp) in alloc_vfsmnt()
323 this_cpu_add(mnt->mnt_pcp->mnt_count, 1); in alloc_vfsmnt()
325 mnt->mnt_count = 1; in alloc_vfsmnt()
326 mnt->mnt_writers = 0; in alloc_vfsmnt()
329 INIT_HLIST_NODE(&mnt->mnt_hash); in alloc_vfsmnt()
330 INIT_LIST_HEAD(&mnt->mnt_child); in alloc_vfsmnt()
331 INIT_LIST_HEAD(&mnt->mnt_mounts); in alloc_vfsmnt()
332 INIT_LIST_HEAD(&mnt->mnt_list); in alloc_vfsmnt()
333 INIT_LIST_HEAD(&mnt->mnt_expire); in alloc_vfsmnt()
334 INIT_LIST_HEAD(&mnt->mnt_share); in alloc_vfsmnt()
335 INIT_HLIST_HEAD(&mnt->mnt_slave_list); in alloc_vfsmnt()
336 INIT_HLIST_NODE(&mnt->mnt_slave); in alloc_vfsmnt()
337 INIT_HLIST_NODE(&mnt->mnt_mp_list); in alloc_vfsmnt()
338 INIT_HLIST_HEAD(&mnt->mnt_stuck_children); in alloc_vfsmnt()
339 RB_CLEAR_NODE(&mnt->mnt_node); in alloc_vfsmnt()
340 mnt->mnt.mnt_idmap = &nop_mnt_idmap; in alloc_vfsmnt()
346 kfree_const(mnt->mnt_devname); in alloc_vfsmnt()
356 * Most r/o checks on a fs are for operations that take
358 * We must keep track of when those operations start
360 * we can determine when writes are able to occur to
364 * __mnt_is_readonly: check whether a mount is read-only
376 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); in __mnt_is_readonly()
383 this_cpu_inc(mnt->mnt_pcp->mnt_writers); in mnt_inc_writers()
385 mnt->mnt_writers++; in mnt_inc_writers()
392 this_cpu_dec(mnt->mnt_pcp->mnt_writers); in mnt_dec_writers()
394 mnt->mnt_writers--; in mnt_dec_writers()
405 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; in mnt_get_writers()
410 return mnt->mnt_writers; in mnt_get_writers()
416 if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) in mnt_is_readonly()
420 * making sure if we don't see s_readonly_remount set yet, we also will in mnt_is_readonly()
423 * assuring that if we see s_readonly_remount already cleared, we will in mnt_is_readonly()
431 * Most r/o & frozen checks on a fs are for operations that take discrete
432 * amounts of time, like a write() or unlink(). We must keep track of when
433 * those operations start (for permission checks) and when they end, so that we
437 * mnt_get_write_access - get write access to a mount without freeze protection
438 * @m: the mount on which to take a write
440 * This tells the low-level filesystem that a write is about to be performed to
441 * it, and makes sure that writes are allowed (mnt it read-write) before
454 * The store to mnt_inc_writers must be visible before we pass in mnt_get_write_access()
460 while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) { in mnt_get_write_access()
466 * setting WRITE_HOLD got preempted on a remote in mnt_get_write_access()
479 * sure that if we see WRITE_HOLD cleared, we will also see in mnt_get_write_access()
481 * mnt_is_readonly() and bail in case we are racing with remount in mnt_get_write_access()
482 * read-only. in mnt_get_write_access()
487 ret = -EROFS; in mnt_get_write_access()
496 * mnt_want_write - get write access to a mount
497 * @m: the mount on which to take a write
499 * This tells the low-level filesystem that a write is about to be performed to
500 * it, and makes sure that writes are allowed (mount is read-write, filesystem
508 sb_start_write(m->mnt_sb); in mnt_want_write()
511 sb_end_write(m->mnt_sb); in mnt_want_write()
517 * mnt_get_write_access_file - get write access to a file's mount
518 * @file: the file who's mount on which to take a write
527 if (file->f_mode & FMODE_WRITER) { in mnt_get_write_access_file()
530 * writable fd's, e.g. due to a fs error with errors=remount-ro in mnt_get_write_access_file()
532 if (__mnt_is_readonly(file->f_path.mnt)) in mnt_get_write_access_file()
533 return -EROFS; in mnt_get_write_access_file()
536 return mnt_get_write_access(file->f_path.mnt); in mnt_get_write_access_file()
540 * mnt_want_write_file - get write access to a file's mount
541 * @file: the file who's mount on which to take a write
552 sb_start_write(file_inode(file)->i_sb); in mnt_want_write_file()
555 sb_end_write(file_inode(file)->i_sb); in mnt_want_write_file()
561 * mnt_put_write_access - give up write access to a mount
562 * @mnt: the mount on which to give up write access
564 * Tells the low-level filesystem that we are done
577 * mnt_drop_write - give up write access to a mount
578 * @mnt: the mount on which to give up write access
580 * Tells the low-level filesystem that we are done performing writes to it and
587 sb_end_write(mnt->mnt_sb); in mnt_drop_write()
593 if (!(file->f_mode & FMODE_WRITER)) in mnt_put_write_access_file()
594 mnt_put_write_access(file->f_path.mnt); in mnt_put_write_access_file()
600 sb_end_write(file_inode(file)->i_sb); in mnt_drop_write_file()
605 * mnt_hold_writers - prevent write access to the given mount
619 * Return: On success 0 is returned.
620 * On error, -EBUSY is returned.
626 * After storing WRITE_HOLD, we'll read the counters. This store in mnt_hold_writers()
627 * should be visible before we do. in mnt_hold_writers()
632 * With writers on hold, if this value is zero, then there are in mnt_hold_writers()
637 * It is OK to have counter incremented on one CPU and decremented on in mnt_hold_writers()
638 * another: the sum will add up correctly. The danger would be when we in mnt_hold_writers()
639 * sum up each counter, if we read a counter before it is incremented, in mnt_hold_writers()
641 * decremented from -- we would see more decrements than we should. in mnt_hold_writers()
643 * mnt_want_write first increments count, then smp_mb, then spins on in mnt_hold_writers()
645 * we're counting up here. in mnt_hold_writers()
648 return -EBUSY; in mnt_hold_writers()
654 * mnt_unhold_writers - stop preventing write access to the given mount
679 struct mount **p = m->mnt_pprev_for_sb; in mnt_del_instance()
680 struct mount *next = m->mnt_next_for_sb; in mnt_del_instance()
683 next->mnt_pprev_for_sb = p; in mnt_del_instance()
689 struct mount *first = s->s_mounts; in mnt_add_instance()
692 first->mnt_pprev_for_sb = &m->mnt_next_for_sb; in mnt_add_instance()
693 m->mnt_next_for_sb = first; in mnt_add_instance()
694 m->mnt_pprev_for_sb = &s->s_mounts; in mnt_add_instance()
695 s->s_mounts = m; in mnt_add_instance()
704 mnt->mnt.mnt_flags |= MNT_READONLY; in mnt_make_readonly()
714 if (atomic_long_read(&sb->s_remove_count)) in sb_prepare_remount_readonly()
715 return -EBUSY; in sb_prepare_remount_readonly()
719 for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { in sb_prepare_remount_readonly()
720 if (!(m->mnt.mnt_flags & MNT_READONLY)) { in sb_prepare_remount_readonly()
726 if (!err && atomic_long_read(&sb->s_remove_count)) in sb_prepare_remount_readonly()
727 err = -EBUSY; in sb_prepare_remount_readonly()
731 for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { in sb_prepare_remount_readonly()
741 mnt_idmap_put(mnt_idmap(&mnt->mnt)); in free_vfsmnt()
742 kfree_const(mnt->mnt_devname); in free_vfsmnt()
744 free_percpu(mnt->mnt_pcp); in free_vfsmnt()
768 if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) { in __legitimize_mnt()
769 mnt_add_count(mnt, -1); in __legitimize_mnt()
775 return -1; in __legitimize_mnt()
793 * __lookup_mnt - mount hash lookup
797 * If @mnt has a child mount @c mounted on @dentry find and return it.
802 * Return: The child of @mnt mounted on @dentry or %NULL.
810 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) in __lookup_mnt()
816 * lookup_mnt - Return the child mount mounted at given location
831 child_mnt = __lookup_mnt(path->mnt, path->dentry); in lookup_mnt()
832 m = child_mnt ? &child_mnt->mnt : NULL; in lookup_mnt()
839 * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
843 * test is handled inline. For the slow case when we are actually
848 * The mount_hashtable is not usable in the context because we
855 struct mnt_namespace *ns = current->nsproxy->mnt_ns; in __is_local_mountpoint() local
860 rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) in __is_local_mountpoint()
861 if (mnt->mnt_mountpoint == dentry) in __is_local_mountpoint()
879 if (mp->m_dentry == dentry) { in lookup_mountpoint()
880 hlist_add_head(&m->node, &mp->m_list); in lookup_mountpoint()
881 m->mp = mp; in lookup_mountpoint()
897 return -ENOENT; in get_mountpoint()
909 return -ENOMEM; in get_mountpoint()
915 if (ret == -EBUSY) in get_mountpoint()
924 mp->m_dentry = dget(dentry); in get_mountpoint()
925 hlist_add_head(&mp->m_hash, mp_hash(dentry)); in get_mountpoint()
926 INIT_HLIST_HEAD(&mp->m_list); in get_mountpoint()
927 hlist_add_head(&m->node, &mp->m_list); in get_mountpoint()
928 m->mp = no_free_ptr(mp); in get_mountpoint()
939 if (hlist_empty(&mp->m_list)) { in maybe_free_mountpoint()
940 struct dentry *dentry = mp->m_dentry; in maybe_free_mountpoint()
941 spin_lock(&dentry->d_lock); in maybe_free_mountpoint()
942 dentry->d_flags &= ~DCACHE_MOUNTED; in maybe_free_mountpoint()
943 spin_unlock(&dentry->d_lock); in maybe_free_mountpoint()
945 hlist_del(&mp->m_hash); in maybe_free_mountpoint()
955 if (m->mp) { in unpin_mountpoint()
956 hlist_del(&m->node); in unpin_mountpoint()
957 maybe_free_mountpoint(m->mp, &ex_mountpoints); in unpin_mountpoint()
963 return mnt->mnt_ns == current->nsproxy->mnt_ns; in check_mnt()
970 if (!is_anon_ns(mnt->mnt_ns)) in check_anonymous_mnt()
973 seq = mnt->mnt_ns->seq_origin; in check_anonymous_mnt()
974 return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id); in check_anonymous_mnt()
980 static void touch_mnt_namespace(struct mnt_namespace *ns) in touch_mnt_namespace() argument
982 if (ns) { in touch_mnt_namespace()
983 ns->event = ++event; in touch_mnt_namespace()
984 wake_up_interruptible(&ns->poll); in touch_mnt_namespace()
991 static void __touch_mnt_namespace(struct mnt_namespace *ns) in __touch_mnt_namespace() argument
993 if (ns && ns->event != event) { in __touch_mnt_namespace()
994 ns->event = event; in __touch_mnt_namespace()
995 wake_up_interruptible(&ns->poll); in __touch_mnt_namespace()
1005 struct mount *parent = mnt->mnt_parent; in __umount_mnt()
1006 if (unlikely(parent->overmount == mnt)) in __umount_mnt()
1007 parent->overmount = NULL; in __umount_mnt()
1008 mnt->mnt_parent = mnt; in __umount_mnt()
1009 mnt->mnt_mountpoint = mnt->mnt.mnt_root; in __umount_mnt()
1010 list_del_init(&mnt->mnt_child); in __umount_mnt()
1011 hlist_del_init_rcu(&mnt->mnt_hash); in __umount_mnt()
1012 hlist_del_init(&mnt->mnt_mp_list); in __umount_mnt()
1013 mp = mnt->mnt_mp; in __umount_mnt()
1014 mnt->mnt_mp = NULL; in __umount_mnt()
1033 child_mnt->mnt_mountpoint = mp->m_dentry; in mnt_set_mountpoint()
1034 child_mnt->mnt_parent = mnt; in mnt_set_mountpoint()
1035 child_mnt->mnt_mp = mp; in mnt_set_mountpoint()
1036 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); in mnt_set_mountpoint()
1041 struct mount *parent = mnt->mnt_parent; in make_visible()
1042 if (unlikely(mnt->mnt_mountpoint == parent->mnt.mnt_root)) in make_visible()
1043 parent->overmount = mnt; in make_visible()
1044 hlist_add_head_rcu(&mnt->mnt_hash, in make_visible()
1045 m_hash(&parent->mnt, mnt->mnt_mountpoint)); in make_visible()
1046 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); in make_visible()
1050 * attach_mnt - mount a mount, attach to @mount_hashtable and parent's
1056 * Mount @mnt at @mp on @parent. Then attach @mnt
1059 * Note, when make_visible() is called @mnt->mnt_parent already points
1074 struct mountpoint *old_mp = mnt->mnt_mp; in mnt_change_mountpoint()
1076 list_del_init(&mnt->mnt_child); in mnt_change_mountpoint()
1077 hlist_del_init(&mnt->mnt_mp_list); in mnt_change_mountpoint()
1078 hlist_del_init_rcu(&mnt->mnt_hash); in mnt_change_mountpoint()
1090 static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) in mnt_add_to_ns() argument
1092 struct rb_node **link = &ns->mounts.rb_node; in mnt_add_to_ns()
1097 mnt->mnt_ns = ns; in mnt_add_to_ns()
1100 if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) { in mnt_add_to_ns()
1101 link = &parent->rb_left; in mnt_add_to_ns()
1104 link = &parent->rb_right; in mnt_add_to_ns()
1110 ns->mnt_last_node = &mnt->mnt_node; in mnt_add_to_ns()
1112 ns->mnt_first_node = &mnt->mnt_node; in mnt_add_to_ns()
1113 rb_link_node(&mnt->mnt_node, parent, link); in mnt_add_to_ns()
1114 rb_insert_color(&mnt->mnt_node, &ns->mounts); in mnt_add_to_ns()
1121 struct list_head *next = p->mnt_mounts.next; in next_mnt()
1122 if (next == &p->mnt_mounts) { in next_mnt()
1126 next = p->mnt_child.next; in next_mnt()
1127 if (next != &p->mnt_parent->mnt_mounts) in next_mnt()
1129 p = p->mnt_parent; in next_mnt()
1137 struct list_head *prev = p->mnt_mounts.prev; in skip_mnt_tree()
1138 while (prev != &p->mnt_mounts) { in skip_mnt_tree()
1140 prev = p->mnt_mounts.prev; in skip_mnt_tree()
1150 struct mnt_namespace *n = mnt->mnt_parent->mnt_ns; in commit_tree()
1155 n->nr_mounts += n->pending_mounts; in commit_tree()
1156 n->pending_mounts = 0; in commit_tree()
1165 struct super_block *s = root->d_sb; in setup_mnt()
1167 atomic_inc(&s->s_active); in setup_mnt()
1168 m->mnt.mnt_sb = s; in setup_mnt()
1169 m->mnt.mnt_root = dget(root); in setup_mnt()
1170 m->mnt_mountpoint = m->mnt.mnt_root; in setup_mnt()
1171 m->mnt_parent = m; in setup_mnt()
1178 * vfs_create_mount - Create a mount for a configured superblock
1190 if (!fc->root) in vfs_create_mount()
1191 return ERR_PTR(-EINVAL); in vfs_create_mount()
1193 mnt = alloc_vfsmnt(fc->source); in vfs_create_mount()
1195 return ERR_PTR(-ENOMEM); in vfs_create_mount()
1197 if (fc->sb_flags & SB_KERNMOUNT) in vfs_create_mount()
1198 mnt->mnt.mnt_flags = MNT_INTERNAL; in vfs_create_mount()
1200 setup_mnt(mnt, fc->root); in vfs_create_mount()
1202 return &mnt->mnt; in vfs_create_mount()
1210 up_write(&fc->root->d_sb->s_umount); in fc_mount()
1221 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; in fc_mount_longterm()
1235 return ERR_PTR(-EINVAL); in vfs_kern_mount()
1261 mnt = alloc_vfsmnt(old->mnt_devname); in clone_mnt()
1263 return ERR_PTR(-ENOMEM); in clone_mnt()
1265 mnt->mnt.mnt_flags = READ_ONCE(old->mnt.mnt_flags) & in clone_mnt()
1269 mnt->mnt_group_id = 0; /* not a peer of original */ in clone_mnt()
1271 mnt->mnt_group_id = old->mnt_group_id; in clone_mnt()
1273 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { in clone_mnt()
1279 if (mnt->mnt_group_id) in clone_mnt()
1282 mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); in clone_mnt()
1286 if (flag & CL_PRIVATE) // we are done with it in clone_mnt()
1290 list_add(&mnt->mnt_share, &old->mnt_share); in clone_mnt()
1292 if ((flag & CL_SLAVE) && old->mnt_group_id) { in clone_mnt()
1293 hlist_add_head(&mnt->mnt_slave, &old->mnt_slave_list); in clone_mnt()
1294 mnt->mnt_master = old; in clone_mnt()
1296 hlist_add_behind(&mnt->mnt_slave, &old->mnt_slave); in clone_mnt()
1297 mnt->mnt_master = old->mnt_master; in clone_mnt()
1314 * filesystem was probably unable to make r/w->r/o transitions. in cleanup_mnt()
1319 if (unlikely(mnt->mnt_pins.first)) in cleanup_mnt()
1321 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) { in cleanup_mnt()
1322 hlist_del(&m->mnt_umount); in cleanup_mnt()
1323 mntput(&m->mnt); in cleanup_mnt()
1325 fsnotify_vfsmount_delete(&mnt->mnt); in cleanup_mnt()
1326 dput(mnt->mnt.mnt_root); in cleanup_mnt()
1327 deactivate_super(mnt->mnt.mnt_sb); in cleanup_mnt()
1329 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); in cleanup_mnt()
1354 if (likely(READ_ONCE(mnt->mnt_ns))) { in mntput_no_expire()
1356 * Since we don't do lock_mount_hash() here, in mntput_no_expire()
1357 * ->mnt_ns can change under us. However, if it's in mntput_no_expire()
1358 * non-NULL, then there's a reference that won't in mntput_no_expire()
1360 * turning ->mnt_ns NULL. So if we observe it in mntput_no_expire()
1361 * non-NULL under rcu_read_lock(), the reference in mntput_no_expire()
1362 * we are dropping is not the final one. in mntput_no_expire()
1364 mnt_add_count(mnt, -1); in mntput_no_expire()
1371 * mount_lock, we'll see their refcount increment here. in mntput_no_expire()
1374 mnt_add_count(mnt, -1); in mntput_no_expire()
1382 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { in mntput_no_expire()
1387 mnt->mnt.mnt_flags |= MNT_DOOMED; in mntput_no_expire()
1391 if (unlikely(!list_empty(&mnt->mnt_expire))) in mntput_no_expire()
1392 list_del(&mnt->mnt_expire); in mntput_no_expire()
1394 if (unlikely(!list_empty(&mnt->mnt_mounts))) { in mntput_no_expire()
1396 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { in mntput_no_expire()
1398 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children); in mntput_no_expire()
1404 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { in mntput_no_expire()
1406 if (likely(!(task->flags & PF_KTHREAD))) { in mntput_no_expire()
1407 init_task_work(&mnt->mnt_rcu, __cleanup_mnt); in mntput_no_expire()
1408 if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME)) in mntput_no_expire()
1411 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list)) in mntput_no_expire()
1423 if (unlikely(m->mnt_expiry_mark)) in mntput()
1424 WRITE_ONCE(m->mnt_expiry_mark, 0); in mntput()
1446 real_mount(mnt)->mnt_ns = NULL; in mnt_make_shortterm()
1450 * path_is_mountpoint() - Check if path is a mount in the current namespace.
1465 if (!d_mountpoint(path->dentry)) in path_is_mountpoint()
1482 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); in mnt_clone_internal()
1485 p->mnt.mnt_flags |= MNT_INTERNAL; in mnt_clone_internal()
1486 return &p->mnt; in mnt_clone_internal()
1493 static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id) in mnt_find_id_at() argument
1495 struct rb_node *node = ns->mounts.rb_node; in mnt_find_id_at()
1501 if (mnt_id <= m->mnt_id_unique) { in mnt_find_id_at()
1503 if (mnt_id == m->mnt_id_unique) in mnt_find_id_at()
1505 node = node->rb_left; in mnt_find_id_at()
1507 node = node->rb_right; in mnt_find_id_at()
1517 static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id) in mnt_find_id_at_reverse() argument
1519 struct rb_node *node = ns->mounts.rb_node; in mnt_find_id_at_reverse()
1525 if (mnt_id >= m->mnt_id_unique) { in mnt_find_id_at_reverse()
1527 if (mnt_id == m->mnt_id_unique) in mnt_find_id_at_reverse()
1529 node = node->rb_right; in mnt_find_id_at_reverse()
1531 node = node->rb_left; in mnt_find_id_at_reverse()
1539 /* iterator; we want it to have access to namespace_sem, thus here... */
1542 struct proc_mounts *p = m->private; in m_start()
1546 return mnt_find_id_at(p->ns, *pos); in m_start()
1552 struct rb_node *node = rb_next(&mnt->mnt_node); in m_next()
1557 *pos = next->mnt_id_unique; in m_next()
1569 struct proc_mounts *p = m->private; in m_show()
1571 return p->show(m, &r->mnt); in m_show()
1584 * may_umount_tree - check if a mount tree is busy
1612 * may_umount - check if a mount point is busy
1621 * give false negatives. The main reason why it's here is that we need
1622 * a non-destructive way to look for easily umountable filesystems.
1641 if (!p->prev_ns && p->mnt_ns) { in mnt_notify()
1642 fsnotify_mnt_attach(p->mnt_ns, &p->mnt); in mnt_notify()
1643 } else if (p->prev_ns && !p->mnt_ns) { in mnt_notify()
1644 fsnotify_mnt_detach(p->prev_ns, &p->mnt); in mnt_notify()
1645 } else if (p->prev_ns == p->mnt_ns) { in mnt_notify()
1646 fsnotify_mnt_move(p->mnt_ns, &p->mnt); in mnt_notify()
1648 fsnotify_mnt_detach(p->prev_ns, &p->mnt); in mnt_notify()
1649 fsnotify_mnt_attach(p->mnt_ns, &p->mnt); in mnt_notify()
1651 p->prev_ns = p->mnt_ns; in mnt_notify()
1663 list_del_init(&m->to_notify); in notify_mnt_list()
1688 struct mnt_namespace *ns = emptied_ns; in namespace_unlock() local
1707 if (unlikely(ns)) { in namespace_unlock()
1708 /* Make sure we notice when we leak mounts. */ in namespace_unlock()
1709 VFS_WARN_ON_ONCE(!mnt_ns_empty(ns)); in namespace_unlock()
1710 free_mnt_ns(ns); in namespace_unlock()
1721 hlist_del(&m->mnt_umount); in namespace_unlock()
1722 mntput(&m->mnt); in namespace_unlock()
1751 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) in disconnect_mount()
1780 p->mnt.mnt_flags |= MNT_UMOUNT; in umount_tree()
1783 list_add_tail(&p->mnt_list, &tmp_list); in umount_tree()
1788 list_del_init(&p->mnt_child); in umount_tree()
1798 struct mnt_namespace *ns; in umount_tree() local
1801 list_del_init(&p->mnt_expire); in umount_tree()
1802 list_del_init(&p->mnt_list); in umount_tree()
1803 ns = p->mnt_ns; in umount_tree()
1804 if (ns) { in umount_tree()
1805 ns->nr_mounts--; in umount_tree()
1806 __touch_mnt_namespace(ns); in umount_tree()
1808 p->mnt_ns = NULL; in umount_tree()
1810 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; in umount_tree()
1816 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); in umount_tree()
1822 hlist_add_head(&p->mnt_umount, &unmounted); in umount_tree()
1825 * At this point p->mnt_ns is NULL, notification will be queued in umount_tree()
1828 * - p->prev_ns is non-NULL *and* in umount_tree()
1829 * - p->prev_ns->n_fsnotify_marks is non-NULL in umount_tree()
1845 down_write(&sb->s_umount); in do_umount_root()
1849 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, in do_umount_root()
1860 up_write(&sb->s_umount); in do_umount_root()
1866 struct super_block *sb = mnt->mnt.mnt_sb; in do_umount()
1869 retval = security_sb_umount(&mnt->mnt, flags); in do_umount()
1880 if (&mnt->mnt == current->fs->root.mnt || in do_umount()
1882 return -EINVAL; in do_umount()
1885 * probably don't strictly need the lock here if we examined in do_umount()
1889 if (!list_empty(&mnt->mnt_mounts) || mnt_get_count(mnt) != 2) { in do_umount()
1891 return -EBUSY; in do_umount()
1895 if (!xchg(&mnt->mnt_expiry_mark, 1)) in do_umount()
1896 return -EAGAIN; in do_umount()
1900 * If we may have to abort operations to get out of this in do_umount()
1901 * mount, and they will themselves hold resources we must in do_umount()
1904 * might fail to complete on the first run through as other tasks in do_umount()
1909 if (flags & MNT_FORCE && sb->s_op->umount_begin) { in do_umount()
1910 sb->s_op->umount_begin(sb); in do_umount()
1916 * Ho-hum... In principle, we might treat that as umount + switch in do_umount()
1919 * /reboot - static binary that would close all descriptors and in do_umount()
1922 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) { in do_umount()
1925 * we just try to remount it readonly. in do_umount()
1927 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) in do_umount()
1928 return -EPERM; in do_umount()
1935 /* Repeat the earlier racy checks, now that we are holding the locks */ in do_umount()
1936 retval = -EINVAL; in do_umount()
1940 if (mnt->mnt.mnt_flags & MNT_LOCKED) in do_umount()
1953 retval = -EBUSY; in do_umount()
1966 * __detach_mounts - lazily unmount all mounts on the specified dentry
1973 * The caller may hold dentry->d_inode->i_rwsem.
1989 if (mnt->mnt.mnt_flags & MNT_UMOUNT) { in __detach_mounts()
1991 hlist_add_head(&mnt->mnt_umount, &unmounted); in __detach_mounts()
2003 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); in may_mount()
2017 struct mount *mnt = real_mount(path->mnt); in can_umount()
2018 struct super_block *sb = path->dentry->d_sb; in can_umount()
2021 return -EPERM; in can_umount()
2023 return -EINVAL; in can_umount()
2025 return -EINVAL; in can_umount()
2026 if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ in can_umount()
2027 return -EINVAL; in can_umount()
2028 if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) in can_umount()
2029 return -EPERM; in can_umount()
2036 struct mount *mnt = real_mount(path->mnt); in path_umount()
2043 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ in path_umount()
2044 dput(path->dentry); in path_umount()
2057 return -EINVAL; in ksys_umount()
2086 struct ns_common *ns; in is_mnt_ns_file() local
2089 if (dentry->d_op != &ns_dentry_operations) in is_mnt_ns_file()
2092 ns = d_inode(dentry)->i_private; in is_mnt_ns_file()
2094 return ns->ops == &mntns_operations; in is_mnt_ns_file()
2099 return &mnt->ns; in from_mnt_ns()
2104 struct ns_common *ns; in get_sequential_mnt_ns() local
2109 ns = ns_tree_adjoined_rcu(mntns, previous); in get_sequential_mnt_ns()
2110 if (IS_ERR(ns)) in get_sequential_mnt_ns()
2111 return ERR_CAST(ns); in get_sequential_mnt_ns()
2113 mntns = to_mnt_ns(ns); in get_sequential_mnt_ns()
2120 if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN)) in get_sequential_mnt_ns()
2124 * We need an active reference count as we're persisting in get_sequential_mnt_ns()
2125 * the mount namespace and it might already be on its in get_sequential_mnt_ns()
2140 return to_mnt_ns(get_proc_ns(dentry->d_inode)); in mnt_ns_from_dentry()
2153 return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id; in mnt_ns_loop()
2163 return ERR_PTR(-EINVAL); in copy_tree()
2166 return ERR_PTR(-EINVAL); in copy_tree()
2174 list_for_each_entry(src_root_child, &src_root->mnt_mounts, mnt_child) { in copy_tree()
2175 if (!is_subdir(src_root_child->mnt_mountpoint, dentry)) in copy_tree()
2182 if (src_mnt->mnt.mnt_flags & MNT_LOCKED) { in copy_tree()
2184 dst_mnt = ERR_PTR(-EPERM); in copy_tree()
2192 is_mnt_ns_file(src_mnt->mnt.mnt_root)) { in copy_tree()
2196 while (src_parent != src_mnt->mnt_parent) { in copy_tree()
2197 src_parent = src_parent->mnt_parent; in copy_tree()
2198 dst_mnt = dst_mnt->mnt_parent; in copy_tree()
2203 dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag); in copy_tree()
2207 if (src_mnt->mnt.mnt_flags & MNT_LOCKED) in copy_tree()
2208 dst_mnt->mnt.mnt_flags |= MNT_LOCKED; in copy_tree()
2210 /* stick the duplicate mount on the same expiry in copy_tree()
2211 * list as the original if that was on one */ in copy_tree()
2212 if (!list_empty(&src_mnt->mnt_expire)) in copy_tree()
2213 list_add(&dst_mnt->mnt_expire, in copy_tree()
2214 &src_mnt->mnt_expire); in copy_tree()
2216 attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp); in copy_tree()
2250 struct mount *root = real_mount(path->mnt); in collect_paths()
2258 return ERR_PTR(-EINVAL); in collect_paths()
2260 return ERR_PTR(-ENOMEM); in collect_paths()
2262 list_for_each_entry(child, &root->mnt_mounts, mnt_child) { in collect_paths()
2263 if (!is_subdir(child->mnt_mountpoint, path->dentry)) in collect_paths()
2267 return ERR_PTR(-ENOMEM); in collect_paths()
2268 res[n].mnt = &m->mnt; in collect_paths()
2269 res[n].dentry = m->mnt.mnt_root; in collect_paths()
2274 return ERR_PTR(-ENOMEM); in collect_paths()
2275 memset(res + n, 0, (count - n) * sizeof(struct path)); in collect_paths()
2276 for (struct path *p = res; p->mnt; p++) in collect_paths()
2283 for (const struct path *p = paths; p->mnt; p++) in drop_collected_paths()
2297 * we need to dissolve the mount tree and free that namespace. in dissolve_on_fput()
2298 * Let's try to avoid taking namespace_sem if we can determine in dissolve_on_fput()
2299 * that there's nothing to do without it - rcu_read_lock() is in dissolve_on_fput()
2300 * enough to make anon_ns_root() memory-safe and once m has in dissolve_on_fput()
2302 * never become a root of anon ns again. in dissolve_on_fput()
2314 emptied_ns = m->mnt_ns; in dissolve_on_fput()
2326 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { in __has_locked_children()
2327 if (!is_subdir(child->mnt_mountpoint, dentry)) in __has_locked_children()
2330 if (child->mnt.mnt_flags & MNT_LOCKED) in __has_locked_children()
2345 * that aren't checked by the mount-cycle checking code, thereby allowing
2353 if (mnt_ns_loop(p->mnt.mnt_root)) in check_for_nsfs_mounts()
2359 * clone_private_mount - create a private clone of a path
2372 struct mount *old_mnt = real_mount(path->mnt); in clone_private_mount()
2378 return ERR_PTR(-EINVAL); in clone_private_mount()
2384 * namespace, and we need to make sure no namespace in clone_private_mount()
2389 return ERR_PTR(-EINVAL); in clone_private_mount()
2392 return ERR_PTR(-EINVAL); in clone_private_mount()
2395 if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) in clone_private_mount()
2396 return ERR_PTR(-EPERM); in clone_private_mount()
2398 if (__has_locked_children(old_mnt, path->dentry)) in clone_private_mount()
2399 return ERR_PTR(-EINVAL); in clone_private_mount()
2401 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); in clone_private_mount()
2403 return ERR_PTR(-EINVAL); in clone_private_mount()
2406 new_mnt->mnt_ns = MNT_NS_INTERNAL; in clone_private_mount()
2407 return &new_mnt->mnt; in clone_private_mount()
2416 int flags = p->mnt.mnt_flags; in lock_mnt_tree()
2432 if (list_empty(&p->mnt_expire) && p != mnt) in lock_mnt_tree()
2434 p->mnt.mnt_flags = flags; in lock_mnt_tree()
2443 if (p->mnt_group_id && !IS_MNT_SHARED(p)) in cleanup_group_ids()
2453 if (!p->mnt_group_id) { in invent_group_ids()
2465 int count_mounts(struct mnt_namespace *ns, struct mount *mnt) in count_mounts() argument
2471 if (ns->nr_mounts >= max) in count_mounts()
2472 return -ENOSPC; in count_mounts()
2473 max -= ns->nr_mounts; in count_mounts()
2474 if (ns->pending_mounts >= max) in count_mounts()
2475 return -ENOSPC; in count_mounts()
2476 max -= ns->pending_mounts; in count_mounts()
2482 return -ENOSPC; in count_mounts()
2484 ns->pending_mounts += mounts; in count_mounts()
2494 * attach_recursive_mnt - attach a source mount tree
2500 * ---------------------------------------------------------------------------
2503 * | source-->| shared | private | slave | unbindable |
2510 * |non-shared| shared (+) | private | slave (*) | invalid |
2512 * A bind operation clones the source mount and mounts the clone on the
2528 * ---------------------------------------------------------------------------
2531 * | source-->| shared | private | slave | unbindable |
2538 * |non-shared| shared (+*) | private | slave (*) | unbindable |
2561 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; in attach_recursive_mnt()
2562 struct mount *dest_mnt = dest->parent; in attach_recursive_mnt()
2563 struct mountpoint *dest_mp = dest->mp; in attach_recursive_mnt()
2565 struct mnt_namespace *ns = dest_mnt->mnt_ns; in attach_recursive_mnt() local
2576 * mounted beneath mounts on the same mountpoint. in attach_recursive_mnt()
2578 for (top = source_mnt; unlikely(top->overmount); top = top->overmount) { in attach_recursive_mnt()
2579 if (!shorter && is_mnt_ns_file(top->mnt.mnt_root)) in attach_recursive_mnt()
2580 shorter = top->mnt_mp; in attach_recursive_mnt()
2582 err = get_mountpoint(top->mnt.mnt_root, &root); in attach_recursive_mnt()
2588 err = count_mounts(ns, source_mnt); in attach_recursive_mnt()
2613 list_del_init(&source_mnt->mnt_expire); in attach_recursive_mnt()
2615 if (source_mnt->mnt_ns) { in attach_recursive_mnt()
2616 /* move from anon - the caller will destroy */ in attach_recursive_mnt()
2617 emptied_ns = source_mnt->mnt_ns; in attach_recursive_mnt()
2625 * Now the original copy is in the same state as the secondaries - in attach_recursive_mnt()
2631 hlist_add_head(&source_mnt->mnt_hash, &tree_list); in attach_recursive_mnt()
2635 hlist_del_init(&child->mnt_hash); in attach_recursive_mnt()
2636 /* Notice when we are propagating across user namespaces */ in attach_recursive_mnt()
2637 if (child->mnt_parent->mnt_ns->user_ns != user_ns) in attach_recursive_mnt()
2639 q = __lookup_mnt(&child->mnt_parent->mnt, in attach_recursive_mnt()
2640 child->mnt_mountpoint); in attach_recursive_mnt()
2659 child->mnt_parent->mnt_ns->pending_mounts = 0; in attach_recursive_mnt()
2665 ns->pending_mounts = 0; in attach_recursive_mnt()
2681 m = topmost_overmount(real_mount(path->mnt)); in where_to_mount()
2682 *dentry = m->mnt_mountpoint; in where_to_mount()
2683 return m->mnt_parent; in where_to_mount()
2685 m = __lookup_mnt(path->mnt, path->dentry); in where_to_mount()
2688 *dentry = m->mnt.mnt_root; in where_to_mount()
2691 *dentry = path->dentry; in where_to_mount()
2692 return real_mount(path->mnt); in where_to_mount()
2696 * do_lock_mount - acquire environment for mounting
2701 * To mount something at given location, we need
2703 * inode of dentry we are mounting on locked exclusive
2705 * struct mount we are mounting on
2707 * Results are stored in caller-supplied context (pinned_mountpoint);
2708 * on success we have res->parent and res->mp pointing to parent and
2709 * mountpoint respectively and res->node inserted into the ->m_list
2711 * On failure we have res->parent set to ERR_PTR(-E...), res->mp
2712 * left NULL, res->node - empty.
2714 * proper order - inode lock nests outside of namespace_sem).
2716 * Request to mount on overmounted location is treated as "mount on
2718 * a location - "mount immediately beneath the topmost mount at that
2722 * chosen mountpoint must be allowed to be mounted on. For "beneath"
2723 * case we also require the location to be at the root of a mount
2733 res->parent = ERR_PTR(-EINVAL); in do_lock_mount()
2743 if (&m->mnt != path->mnt) { in do_lock_mount()
2744 mntget(&m->mnt); in do_lock_mount()
2749 inode_lock(dentry->d_inode); in do_lock_mount()
2757 err = -EAGAIN; // something moved, retry in do_lock_mount()
2758 else if (unlikely(cant_mount(dentry) || !is_mounted(path->mnt))) in do_lock_mount()
2759 err = -ENOENT; // not to be mounted on in do_lock_mount()
2760 else if (beneath && &m->mnt == path->mnt && !m->overmount) in do_lock_mount()
2761 err = -EINVAL; in do_lock_mount()
2766 res->parent = ERR_PTR(err); in do_lock_mount()
2768 inode_unlock(dentry->d_inode); in do_lock_mount()
2770 res->parent = m; in do_lock_mount()
2773 * Drop the temporary references. This is subtle - on success in do_lock_mount()
2774 * we are doing that under namespace_sem, which would normally in do_lock_mount()
2775 * be forbidden. However, in that case we are guaranteed that in do_lock_mount()
2776 * refcounts won't reach zero, since we know that path->mnt in do_lock_mount()
2780 if (&m->mnt != path->mnt) { in do_lock_mount()
2782 mntput(&m->mnt); in do_lock_mount()
2784 } while (err == -EAGAIN); in do_lock_mount()
2789 inode_unlock(m->mp->m_dentry->d_inode); in __unlock_mount()
2798 if (!IS_ERR(m->parent)) in unlock_mount()
2812 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER) in graft_tree()
2813 return -EINVAL; in graft_tree()
2815 if (d_is_dir(mp->mp->m_dentry) != in graft_tree()
2816 d_is_dir(mnt->mnt.mnt_root)) in graft_tree()
2817 return -ENOTDIR; in graft_tree()
2824 struct mnt_namespace *ns = m->mnt_ns; in may_change_propagation() local
2827 if (IS_ERR_OR_NULL(ns)) // is_mounted() in may_change_propagation()
2828 return -EINVAL; in may_change_propagation()
2830 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) in may_change_propagation()
2831 return -EPERM; in may_change_propagation()
2843 /* Fail if any non-propagation flags are set */ in flags_to_propagation_type()
2858 struct mount *mnt = real_mount(path->mnt); in do_change_type()
2864 return -EINVAL; in do_change_type()
2868 return -EINVAL; in do_change_type()
2888 /* may_copy_tree() - check if a mount tree can be copied
2892 * from @path->mnt. The caller may copy the mount tree under the
2917 * The ownership of a non-anonymous mount namespace such as the
2919 * => We know that the caller's mount namespace is stable.
2925 * ==> The earlier capability check on the owning namespace of the
2933 struct mount *mnt = real_mount(path->mnt); in may_copy_tree()
2939 d_op = path->dentry->d_op; in may_copy_tree()
2946 if (!is_mounted(path->mnt)) in may_copy_tree()
2955 struct mount *old = real_mount(old_path->mnt); in __do_loopback()
2958 return ERR_PTR(-EINVAL); in __do_loopback()
2961 return ERR_PTR(-EINVAL); in __do_loopback()
2963 if (!recurse && __has_locked_children(old, old_path->dentry)) in __do_loopback()
2964 return ERR_PTR(-EINVAL); in __do_loopback()
2967 return copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE); in __do_loopback()
2969 return clone_mnt(old, old_path->dentry, 0); in __do_loopback()
2982 return -EINVAL; in do_loopback()
2988 return -EINVAL; in do_loopback()
2995 return -EINVAL; in do_loopback()
3012 struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns; in get_detached_copy() local
3013 struct user_namespace *user_ns = mnt_ns->user_ns; in get_detached_copy()
3016 ns = alloc_mnt_ns(user_ns, true); in get_detached_copy()
3017 if (IS_ERR(ns)) in get_detached_copy()
3018 return ns; in get_detached_copy()
3027 if (is_mounted(path->mnt)) { in get_detached_copy()
3028 src_mnt_ns = real_mount(path->mnt)->mnt_ns; in get_detached_copy()
3030 ns->seq_origin = src_mnt_ns->seq_origin; in get_detached_copy()
3032 ns->seq_origin = src_mnt_ns->ns.ns_id; in get_detached_copy()
3037 emptied_ns = ns; in get_detached_copy()
3042 mnt_add_to_ns(ns, p); in get_detached_copy()
3043 ns->nr_mounts++; in get_detached_copy()
3045 ns->root = mnt; in get_detached_copy()
3046 return ns; in get_detached_copy()
3051 struct mnt_namespace *ns = get_detached_copy(path, recursive); in open_detached_copy() local
3054 if (IS_ERR(ns)) in open_detached_copy()
3055 return ERR_CAST(ns); in open_detached_copy()
3057 mntput(path->mnt); in open_detached_copy()
3058 path->mnt = mntget(&ns->root->mnt); in open_detached_copy()
3061 dissolve_on_fput(path->mnt); in open_detached_copy()
3063 file->f_mode |= FMODE_NEED_UNMOUNT; in open_detached_copy()
3079 return ERR_PTR(-EINVAL); in vfs_open_tree()
3082 return ERR_PTR(-EINVAL); in vfs_open_tree()
3092 return ERR_PTR(-EPERM); in vfs_open_tree()
3129 unsigned int fl = mnt->mnt.mnt_flags; in can_change_locked_flags()
3158 if (readonly_request == __mnt_is_readonly(&mnt->mnt)) in change_mount_ro_state()
3164 mnt->mnt.mnt_flags &= ~MNT_READONLY; in change_mount_ro_state()
3170 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; in set_mount_attributes()
3171 mnt->mnt.mnt_flags = mnt_flags; in set_mount_attributes()
3172 touch_mnt_namespace(mnt->mnt_ns); in set_mount_attributes()
3178 struct super_block *sb = mnt->mnt_sb; in mnt_warn_timestamp_expiry()
3181 (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) && in mnt_warn_timestamp_expiry()
3182 (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) { in mnt_warn_timestamp_expiry()
3189 mntpath = ERR_PTR(-ENOMEM); in mnt_warn_timestamp_expiry()
3194 sb->s_type->name, in mnt_warn_timestamp_expiry()
3196 mntpath, &sb->s_time_max, in mnt_warn_timestamp_expiry()
3197 (unsigned long long)sb->s_time_max); in mnt_warn_timestamp_expiry()
3199 sb->s_iflags |= SB_I_TS_EXPIRY_WARNED; in mnt_warn_timestamp_expiry()
3212 struct super_block *sb = path->mnt->mnt_sb; in do_reconfigure_mnt()
3213 struct mount *mnt = real_mount(path->mnt); in do_reconfigure_mnt()
3217 return -EINVAL; in do_reconfigure_mnt()
3220 return -EINVAL; in do_reconfigure_mnt()
3223 return -EPERM; in do_reconfigure_mnt()
3226 * We're only checking whether the superblock is read-only not in do_reconfigure_mnt()
3227 * changing it, so only take down_read(&sb->s_umount). in do_reconfigure_mnt()
3229 down_read(&sb->s_umount); in do_reconfigure_mnt()
3235 up_read(&sb->s_umount); in do_reconfigure_mnt()
3237 mnt_warn_timestamp_expiry(path, &mnt->mnt); in do_reconfigure_mnt()
3244 * If you've mounted a non-root directory somewhere and want to do remount
3245 * on it - tough luck.
3251 struct super_block *sb = path->mnt->mnt_sb; in do_remount()
3252 struct mount *mnt = real_mount(path->mnt); in do_remount()
3256 return -EINVAL; in do_remount()
3259 return -EINVAL; in do_remount()
3262 return -EPERM; in do_remount()
3264 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); in do_remount()
3272 fc->oldapi = true; in do_remount()
3276 down_write(&sb->s_umount); in do_remount()
3277 err = -EPERM; in do_remount()
3278 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { in do_remount()
3286 up_write(&sb->s_umount); in do_remount()
3289 mnt_warn_timestamp_expiry(path, &mnt->mnt); in do_remount()
3307 struct mount *from = real_mount(from_path->mnt); in do_set_group()
3308 struct mount *to = real_mount(to_path->mnt); in do_set_group()
3322 return -EINVAL; in do_set_group()
3324 return -EINVAL; in do_set_group()
3327 if (from->mnt.mnt_sb != to->mnt.mnt_sb) in do_set_group()
3328 return -EINVAL; in do_set_group()
3331 if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root)) in do_set_group()
3332 return -EINVAL; in do_set_group()
3335 if (__has_locked_children(from, to->mnt.mnt_root)) in do_set_group()
3336 return -EINVAL; in do_set_group()
3338 /* Setting sharing groups is only allowed on private mounts */ in do_set_group()
3340 return -EINVAL; in do_set_group()
3344 return -EINVAL; in do_set_group()
3347 hlist_add_behind(&to->mnt_slave, &from->mnt_slave); in do_set_group()
3348 to->mnt_master = from->mnt_master; in do_set_group()
3352 to->mnt_group_id = from->mnt_group_id; in do_set_group()
3353 list_add(&to->mnt_share, &from->mnt_share); in do_set_group()
3360 * path_overmounted - check if path is overmounted
3363 * Check if path is overmounted, i.e., if there's a mount on top of
3364 * @path->mnt with @path->dentry as mountpoint.
3377 no_child = !__lookup_mnt(path->mnt, path->dentry); in path_overmounted()
3381 no_child = !__lookup_mnt(path->mnt, path->dentry); in path_overmounted()
3394 p2 = p2->mnt_parent; in mount_is_ancestor()
3399 * can_move_mount_beneath - check that we can mount beneath the top mount
3400 * @mnt_from: mount we are trying to move
3404 * - Make sure that nothing can be mounted beneath the caller's current
3406 * - Make sure that the caller can unmount the topmost mount ensuring
3408 * - Ensure that nothing has been mounted on top of @mnt_from before we
3410 * - Prevent mounting beneath a mount if the propagation relationship
3415 * Return: On success 0, and on error a negative error code is returned.
3421 struct mount *parent_mnt_to = mnt_to->mnt_parent; in can_move_mount_beneath()
3424 return -EINVAL; in can_move_mount_beneath()
3427 if (mnt_from->overmount) in can_move_mount_beneath()
3428 return -EINVAL; in can_move_mount_beneath()
3434 if (&mnt_to->mnt == current->fs->root.mnt) in can_move_mount_beneath()
3435 return -EINVAL; in can_move_mount_beneath()
3436 if (parent_mnt_to == current->nsproxy->mnt_ns->root) in can_move_mount_beneath()
3437 return -EINVAL; in can_move_mount_beneath()
3440 return -EINVAL; in can_move_mount_beneath()
3444 * mean mounting @mnt_from on @mnt_to->mnt_parent and then in can_move_mount_beneath()
3445 * propagating a copy @c of @mnt_from on top of @mnt_to. This in can_move_mount_beneath()
3449 return -EINVAL; in can_move_mount_beneath()
3452 * If @mnt_to->mnt_parent propagates to @mnt_from this would in can_move_mount_beneath()
3453 * mean propagating a copy @c of @mnt_from on top of @mnt_from. in can_move_mount_beneath()
3454 * Afterwards @mnt_from would be mounted on top of in can_move_mount_beneath()
3455 * @mnt_to->mnt_parent and @mnt_to would be unmounted from in can_move_mount_beneath()
3456 * @mnt->mnt_parent and remounted on @mnt_from. But since @c is in can_move_mount_beneath()
3457 * already mounted on @mnt_from, @mnt_to would ultimately be in can_move_mount_beneath()
3458 * remounted on top of @c. Afterwards, @mnt_from would be in can_move_mount_beneath()
3465 return -EINVAL; in can_move_mount_beneath()
3470 /* may_use_mount() - check if a mount tree can be used
3474 * from @path->mnt. The caller may use the mount tree under the
3488 * The ownership of a non-anonymous mount namespace such as the
3490 * => We know that the caller's mount namespace is stable.
3496 * ==> The earlier capability check on the owning namespace of the
3509 * managed to get their hands on something purely kernel in may_use_mount()
3512 if (!is_mounted(&mnt->mnt)) in may_use_mount()
3522 struct mount *old = real_mount(old_path->mnt); in do_move_mount()
3527 return -EINVAL; in do_move_mount()
3529 if (d_is_dir(new_path->dentry) != d_is_dir(old_path->dentry)) in do_move_mount()
3530 return -EINVAL; in do_move_mount()
3540 return -EINVAL; in do_move_mount()
3542 if (IS_MNT_SHARED(old->mnt_parent)) in do_move_mount()
3543 return -EINVAL; in do_move_mount()
3546 return -EINVAL; in do_move_mount()
3552 return -EINVAL; in do_move_mount()
3554 * Bail out early if the target is within the same namespace - in do_move_mount()
3556 * some corner cases if we check it early. in do_move_mount()
3558 if (old->mnt_ns == mp.parent->mnt_ns) in do_move_mount()
3559 return -EINVAL; in do_move_mount()
3565 return -EINVAL; in do_move_mount()
3569 struct mount *over = real_mount(new_path->mnt); in do_move_mount()
3571 if (mp.parent != over->mnt_parent) in do_move_mount()
3572 over = mp.parent->overmount; in do_move_mount()
3583 return -EINVAL; in do_move_mount()
3585 return -ELOOP; in do_move_mount()
3587 return -ELOOP; in do_move_mount()
3598 return -EINVAL; in do_move_mount_old()
3613 struct mount *parent = mp->parent; in do_add_mount()
3621 /* that's acceptable only for automounts done in private ns */ in do_add_mount()
3623 return -EINVAL; in do_add_mount()
3624 /* ... and for those we'd better have mountpoint still alive */ in do_add_mount()
3625 if (!parent->mnt_ns) in do_add_mount()
3626 return -EINVAL; in do_add_mount()
3629 /* Refuse the same filesystem on the same mount point */ in do_add_mount()
3630 if (parent->mnt.mnt_sb == newmnt->mnt.mnt_sb && in do_add_mount()
3631 parent->mnt.mnt_root == mp->mp->m_dentry) in do_add_mount()
3632 return -EBUSY; in do_add_mount()
3634 if (d_is_symlink(newmnt->mnt.mnt_root)) in do_add_mount()
3635 return -EINVAL; in do_add_mount()
3637 newmnt->mnt.mnt_flags = mnt_flags; in do_add_mount()
3657 sb = fc->root->d_sb; in do_new_mount_fc()
3664 return -EPERM; in do_new_mount_fc()
3672 retain_and_null_ptr(mnt); // consumed on success in do_new_mount_fc()
3690 return -EINVAL; in do_new_mount()
3694 return -ENODEV; in do_new_mount()
3696 if (type->fs_flags & FS_HAS_SUBTYPE) { in do_new_mount()
3702 return -EINVAL; in do_new_mount()
3716 fc->oldapi = true; in do_new_mount()
3725 err = -EPERM; in do_new_mount()
3736 struct dentry *dentry = path->dentry; in lock_mount_exact()
3739 inode_lock(dentry->d_inode); in lock_mount_exact()
3742 err = -ENOENT; in lock_mount_exact()
3744 err = -EBUSY; in lock_mount_exact()
3749 inode_unlock(dentry->d_inode); in lock_mount_exact()
3750 mp->parent = ERR_PTR(err); in lock_mount_exact()
3752 mp->parent = real_mount(path->mnt); in lock_mount_exact()
3769 if (m->mnt_root == path->dentry) in finish_automount()
3770 return -ELOOP; in finish_automount()
3773 * we don't want to use LOCK_MOUNT() - in this case finding something in finish_automount()
3774 * that overmounts our mountpoint to be means "quitely drop what we've in finish_automount()
3775 * got", not "try to mount it on top". in finish_automount()
3778 if (mp.parent == ERR_PTR(-EBUSY)) in finish_automount()
3781 err = do_add_mount(mnt, &mp, path->mnt->mnt_flags | MNT_SHRINKABLE); in finish_automount()
3788 * mnt_set_expiry - Put a mount on an expiration list
3795 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); in mnt_set_expiry()
3801 * mountpoints that aren't in use and haven't been touched since last we came
3817 * - already mounted in mark_mounts_for_expiry()
3818 * - only referenced by its parent vfsmount in mark_mounts_for_expiry()
3819 * - still marked for expiry (marked on the last call here; marks are in mark_mounts_for_expiry()
3823 if (!is_mounted(&mnt->mnt)) in mark_mounts_for_expiry()
3825 if (!xchg(&mnt->mnt_expiry_mark, 1) || in mark_mounts_for_expiry()
3828 list_move(&mnt->mnt_expire, &graveyard); in mark_mounts_for_expiry()
3832 touch_mnt_namespace(mnt->mnt_ns); in mark_mounts_for_expiry()
3852 next = this_parent->mnt_mounts.next; in select_submounts()
3854 while (next != &this_parent->mnt_mounts) { in select_submounts()
3858 next = tmp->next; in select_submounts()
3859 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE)) in select_submounts()
3862 * Descend a level if the d_mounts list is non-empty. in select_submounts()
3864 if (!list_empty(&mnt->mnt_mounts)) { in select_submounts()
3870 list_move_tail(&mnt->mnt_expire, graveyard); in select_submounts()
3878 next = this_parent->mnt_child.next; in select_submounts()
3879 this_parent = this_parent->mnt_parent; in select_submounts()
3901 touch_mnt_namespace(m->mnt_ns); in shrink_submounts()
3917 return ERR_PTR(-ENOMEM); in copy_mount_options()
3925 offset = PAGE_SIZE - left; in copy_mount_options()
3931 left--; in copy_mount_options()
3937 return ERR_PTR(-EFAULT); in copy_mount_options()
3949 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
3950 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
3953 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
3956 * Pre-0.97 versions of mount() didn't have a flags word.
3958 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
3974 ((char *)data_page)[PAGE_SIZE - 1] = 0; in path_mount()
3977 return -EINVAL; in path_mount()
3983 return -EPERM; in path_mount()
3991 /* Separate the per-mountpoint flags */ in path_mount()
4014 mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK; in path_mount()
4053 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) in inc_mnt_namespaces() argument
4055 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES); in inc_mnt_namespaces()
4063 static void free_mnt_ns(struct mnt_namespace *ns) in free_mnt_ns() argument
4065 if (!is_anon_ns(ns)) in free_mnt_ns()
4066 ns_common_free(ns); in free_mnt_ns()
4067 dec_mnt_namespaces(ns->ucounts); in free_mnt_ns()
4068 mnt_ns_tree_remove(ns); in free_mnt_ns()
4079 return ERR_PTR(-ENOSPC); in alloc_mnt_ns()
4084 return ERR_PTR(-ENOMEM); in alloc_mnt_ns()
4097 ns_tree_gen_id(&new_ns->ns); in alloc_mnt_ns()
4098 refcount_set(&new_ns->passive, 1); in alloc_mnt_ns()
4099 new_ns->mounts = RB_ROOT; in alloc_mnt_ns()
4100 init_waitqueue_head(&new_ns->poll); in alloc_mnt_ns()
4101 new_ns->user_ns = get_user_ns(user_ns); in alloc_mnt_ns()
4102 new_ns->ucounts = ucounts; in alloc_mnt_ns()
4107 struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns, in copy_mnt_ns() argument
4118 BUG_ON(!ns); in copy_mnt_ns()
4121 get_mnt_ns(ns); in copy_mnt_ns()
4122 return ns; in copy_mnt_ns()
4125 old = ns->root; in copy_mnt_ns()
4134 if (user_ns != ns->user_ns) in copy_mnt_ns()
4136 new = copy_tree(old, old->mnt.mnt_root, copy_flags); in copy_mnt_ns()
4141 if (user_ns != ns->user_ns) { in copy_mnt_ns()
4145 new_ns->root = new; in copy_mnt_ns()
4148 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts in copy_mnt_ns()
4149 * as belonging to new namespace. We have already acquired a private in copy_mnt_ns()
4150 * fs_struct, so tsk->fs->lock is not needed. in copy_mnt_ns()
4156 new_ns->nr_mounts++; in copy_mnt_ns()
4158 if (&p->mnt == new_fs->root.mnt) { in copy_mnt_ns()
4159 new_fs->root.mnt = mntget(&q->mnt); in copy_mnt_ns()
4160 rootmnt = &p->mnt; in copy_mnt_ns()
4162 if (&p->mnt == new_fs->pwd.mnt) { in copy_mnt_ns()
4163 new_fs->pwd.mnt = mntget(&q->mnt); in copy_mnt_ns()
4164 pwdmnt = &p->mnt; in copy_mnt_ns()
4171 // an mntns binding we'd skipped? in copy_mnt_ns()
4172 while (p->mnt.mnt_root != q->mnt.mnt_root) in copy_mnt_ns()
4182 struct mnt_namespace *ns; in mount_subtree() local
4187 ns = alloc_mnt_ns(&init_user_ns, true); in mount_subtree()
4188 if (IS_ERR(ns)) { in mount_subtree()
4190 return ERR_CAST(ns); in mount_subtree()
4192 ns->root = mnt; in mount_subtree()
4193 ns->nr_mounts++; in mount_subtree()
4194 mnt_add_to_ns(ns, mnt); in mount_subtree()
4196 err = vfs_path_lookup(m->mnt_root, m, in mount_subtree()
4199 put_mnt_ns(ns); in mount_subtree()
4205 s = path.mnt->mnt_sb; in mount_subtree()
4206 atomic_inc(&s->s_active); in mount_subtree()
4209 down_write(&s->s_umount); in mount_subtree()
4210 /* ... and return the root of (sub)tree on it */ in mount_subtree()
4281 * (specified by fs_fd) and attach to an open_tree-like file descriptor.
4286 struct mnt_namespace *ns; in SYSCALL_DEFINE3() local
4295 return -EPERM; in SYSCALL_DEFINE3()
4298 return -EINVAL; in SYSCALL_DEFINE3()
4301 return -EINVAL; in SYSCALL_DEFINE3()
4315 return -EINVAL; in SYSCALL_DEFINE3()
4320 return -EBADF; in SYSCALL_DEFINE3()
4322 if (fd_file(f)->f_op != &fscontext_fops) in SYSCALL_DEFINE3()
4323 return -EINVAL; in SYSCALL_DEFINE3()
4325 fc = fd_file(f)->private_data; in SYSCALL_DEFINE3()
4327 ret = mutex_lock_interruptible(&fc->uapi_mutex); in SYSCALL_DEFINE3()
4331 /* There must be a valid superblock or we can't mount it */ in SYSCALL_DEFINE3()
4332 ret = -EINVAL; in SYSCALL_DEFINE3()
4333 if (!fc->root) in SYSCALL_DEFINE3()
4336 ret = -EPERM; in SYSCALL_DEFINE3()
4337 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) { in SYSCALL_DEFINE3()
4342 ret = -EBUSY; in SYSCALL_DEFINE3()
4343 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT) in SYSCALL_DEFINE3()
4346 if (fc->sb_flags & SB_MANDLOCK) in SYSCALL_DEFINE3()
4354 newmount.dentry = dget(fc->root); in SYSCALL_DEFINE3()
4355 newmount.mnt->mnt_flags = mnt_flags; in SYSCALL_DEFINE3()
4357 /* We've done the mount bit - now move the file context into more or in SYSCALL_DEFINE3()
4358 * less the same state as if we'd done an fspick(). We don't want to in SYSCALL_DEFINE3()
4359 * do any memory allocation or anything like that at this point as we in SYSCALL_DEFINE3()
4364 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true); in SYSCALL_DEFINE3()
4365 if (IS_ERR(ns)) { in SYSCALL_DEFINE3()
4366 ret = PTR_ERR(ns); in SYSCALL_DEFINE3()
4370 ns->root = mnt; in SYSCALL_DEFINE3()
4371 ns->nr_mounts = 1; in SYSCALL_DEFINE3()
4372 mnt_add_to_ns(ns, mnt); in SYSCALL_DEFINE3()
4375 /* Attach to an apparent O_PATH fd with a note that we need to unmount in SYSCALL_DEFINE3()
4378 file = dentry_open(&newmount, O_PATH, fc->cred); in SYSCALL_DEFINE3()
4384 file->f_mode |= FMODE_NEED_UNMOUNT; in SYSCALL_DEFINE3()
4395 mutex_unlock(&fc->uapi_mutex); in SYSCALL_DEFINE3()
4437 return -EPERM; in SYSCALL_DEFINE5()
4440 return -EINVAL; in SYSCALL_DEFINE5()
4444 return -EINVAL; in SYSCALL_DEFINE5()
4460 return -EBADF; in SYSCALL_DEFINE5()
4462 to_path = fd_file(f_to)->f_path; in SYSCALL_DEFINE5()
4486 return -EBADF; in SYSCALL_DEFINE5()
4488 return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags); in SYSCALL_DEFINE5()
4511 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) { in is_path_reachable()
4512 dentry = mnt->mnt_mountpoint; in is_path_reachable()
4513 mnt = mnt->mnt_parent; in is_path_reachable()
4515 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry); in is_path_reachable()
4521 return is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); in path_is_under()
4529 * root/cwd of all processes which had them on the current root to new_root.
4532 * The new_root and put_old must be directories, and must not be on the
4534 * underneath new_root, i.e. adding a non-zero number of /.. to the string
4536 * file system may be mounted on put_old. After all, new_root is a mountpoint.
4538 * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
4539 * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
4543 * - we don't move root/cwd if they are not at the root (reason: if something
4545 * - it's okay to pick a root that isn't the root of a file system, e.g.
4547 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
4560 return -EPERM; in SYSCALL_DEFINE2()
4576 get_fs_root(current->fs, &root); in SYSCALL_DEFINE2()
4585 ex_parent = new_mnt->mnt_parent; in SYSCALL_DEFINE2()
4586 root_parent = root_mnt->mnt_parent; in SYSCALL_DEFINE2()
4590 return -EINVAL; in SYSCALL_DEFINE2()
4592 return -EINVAL; in SYSCALL_DEFINE2()
4593 if (new_mnt->mnt.mnt_flags & MNT_LOCKED) in SYSCALL_DEFINE2()
4594 return -EINVAL; in SYSCALL_DEFINE2()
4596 return -ENOENT; in SYSCALL_DEFINE2()
4598 return -EBUSY; /* loop, on the same file system */ in SYSCALL_DEFINE2()
4600 return -EINVAL; /* not a mountpoint */ in SYSCALL_DEFINE2()
4602 return -EINVAL; /* absolute root */ in SYSCALL_DEFINE2()
4604 return -EINVAL; /* not a mountpoint */ in SYSCALL_DEFINE2()
4606 return -EINVAL; /* absolute root */ in SYSCALL_DEFINE2()
4607 /* make sure we can reach put_old from new_root */ in SYSCALL_DEFINE2()
4608 if (!is_path_reachable(old_mnt, old_mp.mp->m_dentry, &new)) in SYSCALL_DEFINE2()
4609 return -EINVAL; in SYSCALL_DEFINE2()
4612 return -EINVAL; in SYSCALL_DEFINE2()
4615 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { in SYSCALL_DEFINE2()
4616 new_mnt->mnt.mnt_flags |= MNT_LOCKED; in SYSCALL_DEFINE2()
4617 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; in SYSCALL_DEFINE2()
4619 /* mount new_root on / */ in SYSCALL_DEFINE2()
4620 attach_mnt(new_mnt, root_parent, root_mnt->mnt_mp); in SYSCALL_DEFINE2()
4622 /* mount old root on put_old */ in SYSCALL_DEFINE2()
4624 touch_mnt_namespace(current->nsproxy->mnt_ns); in SYSCALL_DEFINE2()
4626 list_del_init(&new_mnt->mnt_expire); in SYSCALL_DEFINE2()
4636 unsigned int flags = mnt->mnt.mnt_flags; in recalc_flags()
4639 flags &= ~kattr->attr_clr; in recalc_flags()
4641 flags |= kattr->attr_set; in recalc_flags()
4648 struct vfsmount *m = &mnt->mnt; in can_idmap_mount()
4649 struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; in can_idmap_mount()
4651 if (!kattr->mnt_idmap) in can_idmap_mount()
4656 * doesn't make sense so block that. We don't allow mushy semantics. in can_idmap_mount()
4658 if (kattr->mnt_userns == m->mnt_sb->s_user_ns) in can_idmap_mount()
4659 return -EINVAL; in can_idmap_mount()
4662 * We only allow an mount to change it's idmapping if it has in can_idmap_mount()
4665 if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m)) in can_idmap_mount()
4666 return -EPERM; in can_idmap_mount()
4669 if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) in can_idmap_mount()
4670 return -EINVAL; in can_idmap_mount()
4673 if (m->mnt_sb->s_iflags & SB_I_NOIDMAP) in can_idmap_mount()
4674 return -EINVAL; in can_idmap_mount()
4676 /* We're not controlling the superblock. */ in can_idmap_mount()
4678 return -EPERM; in can_idmap_mount()
4681 if (!is_anon_ns(mnt->mnt_ns)) in can_idmap_mount()
4682 return -EINVAL; in can_idmap_mount()
4688 * mnt_allow_writers() - check whether the attribute change allows writers
4699 return (!(kattr->attr_set & MNT_READONLY) || in mnt_allow_writers()
4700 (mnt->mnt.mnt_flags & MNT_READONLY)) && in mnt_allow_writers()
4701 !kattr->mnt_idmap; in mnt_allow_writers()
4711 err = -EPERM; in mount_setattr_prepare()
4727 if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) in mount_setattr_prepare()
4732 /* undo all mnt_hold_writers() we'd done */ in mount_setattr_prepare()
4743 if (!kattr->mnt_idmap) in do_idmap_mount()
4746 old_idmap = mnt_idmap(&mnt->mnt); in do_idmap_mount()
4749 smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap)); in do_idmap_mount()
4762 WRITE_ONCE(m->mnt.mnt_flags, flags); in mount_setattr_commit()
4764 /* If we had to hold writers unblock them. */ in mount_setattr_commit()
4767 if (kattr->propagation) in mount_setattr_commit()
4768 change_mnt_propagation(m, kattr->propagation); in mount_setattr_commit()
4769 if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) in mount_setattr_commit()
4772 touch_mnt_namespace(mnt->mnt_ns); in mount_setattr_commit()
4777 struct mount *mnt = real_mount(path->mnt); in do_mount_setattr()
4781 return -EINVAL; in do_mount_setattr()
4783 if (kattr->mnt_userns) { in do_mount_setattr()
4786 mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns); in do_mount_setattr()
4789 kattr->mnt_idmap = mnt_idmap; in do_mount_setattr()
4792 if (kattr->propagation) { in do_mount_setattr()
4794 * Only take namespace_lock() if we're actually changing in do_mount_setattr()
4798 if (kattr->propagation == MS_SHARED) { in do_mount_setattr()
4799 err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE); in do_mount_setattr()
4807 err = -EINVAL; in do_mount_setattr()
4814 * First, we get the mount tree in a shape where we can change mount in do_mount_setattr()
4815 * properties without failure. If we succeeded to do so we commit all in do_mount_setattr()
4816 * changes and if we failed we clean up. in do_mount_setattr()
4825 if (kattr->propagation) { in do_mount_setattr()
4837 struct ns_common *ns; in build_mount_idmapped() local
4840 if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP)) in build_mount_idmapped()
4843 if (attr->attr_clr & MOUNT_ATTR_IDMAP) { in build_mount_idmapped()
4845 * We can only remove an idmapping if it's never been in build_mount_idmapped()
4848 if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE)) in build_mount_idmapped()
4849 return -EINVAL; in build_mount_idmapped()
4855 if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) { in build_mount_idmapped()
4856 kattr->mnt_idmap = &nop_mnt_idmap; in build_mount_idmapped()
4861 if (attr->userns_fd > INT_MAX) in build_mount_idmapped()
4862 return -EINVAL; in build_mount_idmapped()
4864 CLASS(fd, f)(attr->userns_fd); in build_mount_idmapped()
4866 return -EBADF; in build_mount_idmapped()
4869 return -EINVAL; in build_mount_idmapped()
4871 ns = get_proc_ns(file_inode(fd_file(f))); in build_mount_idmapped()
4872 if (ns->ns_type != CLONE_NEWUSER) in build_mount_idmapped()
4873 return -EINVAL; in build_mount_idmapped()
4877 * mount. We use the initial idmapping as an indicator of a mount in build_mount_idmapped()
4883 mnt_userns = container_of(ns, struct user_namespace, ns); in build_mount_idmapped()
4885 return -EPERM; in build_mount_idmapped()
4887 /* We're not controlling the target namespace. */ in build_mount_idmapped()
4889 return -EPERM; in build_mount_idmapped()
4891 kattr->mnt_userns = get_user_ns(mnt_userns); in build_mount_idmapped()
4898 if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS) in build_mount_kattr()
4899 return -EINVAL; in build_mount_kattr()
4900 if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1) in build_mount_kattr()
4901 return -EINVAL; in build_mount_kattr()
4902 kattr->propagation = attr->propagation; in build_mount_kattr()
4904 if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS) in build_mount_kattr()
4905 return -EINVAL; in build_mount_kattr()
4907 kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set); in build_mount_kattr()
4908 kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr); in build_mount_kattr()
4919 if (attr->attr_clr & MOUNT_ATTR__ATIME) { in build_mount_kattr()
4920 if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME) in build_mount_kattr()
4921 return -EINVAL; in build_mount_kattr()
4927 kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME; in build_mount_kattr()
4928 switch (attr->attr_set & MOUNT_ATTR__ATIME) { in build_mount_kattr()
4930 kattr->attr_set |= MNT_RELATIME; in build_mount_kattr()
4933 kattr->attr_set |= MNT_NOATIME; in build_mount_kattr()
4938 return -EINVAL; in build_mount_kattr()
4941 if (attr->attr_set & MOUNT_ATTR__ATIME) in build_mount_kattr()
4942 return -EINVAL; in build_mount_kattr()
4950 if (kattr->mnt_userns) { in finish_mount_kattr()
4951 put_user_ns(kattr->mnt_userns); in finish_mount_kattr()
4952 kattr->mnt_userns = NULL; in finish_mount_kattr()
4955 if (kattr->mnt_idmap) in finish_mount_kattr()
4956 mnt_idmap_put(kattr->mnt_idmap); in finish_mount_kattr()
4968 return -E2BIG; in wants_mount_setattr()
4970 return -EINVAL; in wants_mount_setattr()
4973 return -EPERM; in wants_mount_setattr()
5005 return -EINVAL; in SYSCALL_DEFINE5()
5042 return -EINVAL; in SYSCALL_DEFINE5()
5059 ret = do_mount_setattr(&file->f_path, &kattr); in SYSCALL_DEFINE5()
5076 if (root->d_sb->s_op->show_path) in show_path()
5077 return root->d_sb->s_op->show_path(m, root); in show_path()
5083 static struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns) in lookup_mnt_in_ns() argument
5085 struct mount *mnt = mnt_find_id_at(ns, id); in lookup_mnt_in_ns()
5087 if (!mnt || mnt->mnt_id_unique != id) in lookup_mnt_in_ns()
5090 return &mnt->mnt; in lookup_mnt_in_ns()
5102 /* Must be last --ends in a flexible-array member. */
5108 unsigned int mnt_flags = READ_ONCE(mnt->mnt_flags); in mnt_to_attr_flags()
5155 struct super_block *sb = s->mnt->mnt_sb; in statmount_sb_basic()
5157 s->sm.mask |= STATMOUNT_SB_BASIC; in statmount_sb_basic()
5158 s->sm.sb_dev_major = MAJOR(sb->s_dev); in statmount_sb_basic()
5159 s->sm.sb_dev_minor = MINOR(sb->s_dev); in statmount_sb_basic()
5160 s->sm.sb_magic = sb->s_magic; in statmount_sb_basic()
5161 s->sm.sb_flags = sb->s_flags & (SB_RDONLY|SB_SYNCHRONOUS|SB_DIRSYNC|SB_LAZYTIME); in statmount_sb_basic()
5166 struct mount *m = real_mount(s->mnt); in statmount_mnt_basic()
5168 s->sm.mask |= STATMOUNT_MNT_BASIC; in statmount_mnt_basic()
5169 s->sm.mnt_id = m->mnt_id_unique; in statmount_mnt_basic()
5170 s->sm.mnt_parent_id = m->mnt_parent->mnt_id_unique; in statmount_mnt_basic()
5171 s->sm.mnt_id_old = m->mnt_id; in statmount_mnt_basic()
5172 s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id; in statmount_mnt_basic()
5173 s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt); in statmount_mnt_basic()
5174 s->sm.mnt_propagation = mnt_to_propagation_flags(m); in statmount_mnt_basic()
5175 s->sm.mnt_peer_group = m->mnt_group_id; in statmount_mnt_basic()
5176 s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0; in statmount_mnt_basic()
5181 struct mount *m = real_mount(s->mnt); in statmount_propagate_from()
5183 s->sm.mask |= STATMOUNT_PROPAGATE_FROM; in statmount_propagate_from()
5185 s->sm.propagate_from = get_dominating_id(m, &current->fs->root); in statmount_propagate_from()
5191 size_t start = seq->count; in statmount_mnt_root()
5193 ret = show_path(seq, s->mnt->mnt_root); in statmount_mnt_root()
5198 return -EAGAIN; in statmount_mnt_root()
5204 seq->buf[seq->count] = '\0'; in statmount_mnt_root()
5205 seq->count = start; in statmount_mnt_root()
5206 seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL)); in statmount_mnt_root()
5212 struct vfsmount *mnt = s->mnt; in statmount_mnt_point()
5213 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; in statmount_mnt_point()
5216 err = seq_path_root(seq, &mnt_path, &s->root, ""); in statmount_mnt_point()
5222 struct super_block *sb = s->mnt->mnt_sb; in statmount_fs_type()
5224 seq_puts(seq, sb->s_type->name); in statmount_fs_type()
5230 struct super_block *sb = s->mnt->mnt_sb; in statmount_fs_subtype()
5232 if (sb->s_subtype) in statmount_fs_subtype()
5233 seq_puts(seq, sb->s_subtype); in statmount_fs_subtype()
5238 struct super_block *sb = s->mnt->mnt_sb; in statmount_sb_source()
5239 struct mount *r = real_mount(s->mnt); in statmount_sb_source()
5241 if (sb->s_op->show_devname) { in statmount_sb_source()
5242 size_t start = seq->count; in statmount_sb_source()
5245 ret = sb->s_op->show_devname(seq, s->mnt->mnt_root); in statmount_sb_source()
5250 return -EAGAIN; in statmount_sb_source()
5253 seq->buf[seq->count] = '\0'; in statmount_sb_source()
5254 seq->count = start; in statmount_sb_source()
5255 seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL)); in statmount_sb_source()
5257 seq_puts(seq, r->mnt_devname); in statmount_sb_source()
5262 static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns) in statmount_mnt_ns_id() argument
5264 s->sm.mask |= STATMOUNT_MNT_NS_ID; in statmount_mnt_ns_id()
5265 s->sm.mnt_ns_id = ns->ns.ns_id; in statmount_mnt_ns_id()
5270 struct vfsmount *mnt = s->mnt; in statmount_mnt_opts()
5271 struct super_block *sb = mnt->mnt_sb; in statmount_mnt_opts()
5272 size_t start = seq->count; in statmount_mnt_opts()
5279 if (sb->s_op->show_options) { in statmount_mnt_opts()
5280 err = sb->s_op->show_options(seq, mnt->mnt_root); in statmount_mnt_opts()
5286 return -EAGAIN; in statmount_mnt_opts()
5288 if (seq->count == start) in statmount_mnt_opts()
5292 memmove(seq->buf + start, seq->buf + start + 1, in statmount_mnt_opts()
5293 seq->count - start - 1); in statmount_mnt_opts()
5294 seq->count--; in statmount_mnt_opts()
5305 return -EAGAIN; in statmount_opt_process()
5307 buf_end = seq->buf + seq->count; in statmount_opt_process()
5308 dst = seq->buf + start; in statmount_opt_process()
5312 seq->count = start; in statmount_opt_process()
5322 return -EOVERFLOW; in statmount_opt_process()
5324 seq->count = dst - 1 - seq->buf; in statmount_opt_process()
5330 struct vfsmount *mnt = s->mnt; in statmount_opt_array()
5331 struct super_block *sb = mnt->mnt_sb; in statmount_opt_array()
5332 size_t start = seq->count; in statmount_opt_array()
5335 if (!sb->s_op->show_options) in statmount_opt_array()
5338 err = sb->s_op->show_options(seq, mnt->mnt_root); in statmount_opt_array()
5346 s->sm.opt_num = err; in statmount_opt_array()
5352 struct vfsmount *mnt = s->mnt; in statmount_opt_sec_array()
5353 struct super_block *sb = mnt->mnt_sb; in statmount_opt_sec_array()
5354 size_t start = seq->count; in statmount_opt_sec_array()
5365 s->sm.opt_sec_num = err; in statmount_opt_sec_array()
5373 ret = statmount_mnt_idmap(s->idmap, seq, true); in statmount_mnt_uidmap()
5377 s->sm.mnt_uidmap_num = ret; in statmount_mnt_uidmap()
5381 * non-idmapped mount and an idmapped mount where none of the in statmount_mnt_uidmap()
5384 if (is_valid_mnt_idmap(s->idmap)) in statmount_mnt_uidmap()
5385 s->sm.mask |= STATMOUNT_MNT_UIDMAP; in statmount_mnt_uidmap()
5393 ret = statmount_mnt_idmap(s->idmap, seq, false); in statmount_mnt_gidmap()
5397 s->sm.mnt_gidmap_num = ret; in statmount_mnt_gidmap()
5401 * non-idmapped mount and an idmapped mount where none of the in statmount_mnt_gidmap()
5404 if (is_valid_mnt_idmap(s->idmap)) in statmount_mnt_gidmap()
5405 s->sm.mask |= STATMOUNT_MNT_GIDMAP; in statmount_mnt_gidmap()
5413 struct seq_file *seq = &s->seq; in statmount_string()
5414 struct statmount *sm = &s->sm; in statmount_string()
5418 if (!seq->count) in statmount_string()
5421 start = seq->count; in statmount_string()
5425 offp = &sm->fs_type; in statmount_string()
5429 offp = &sm->mnt_root; in statmount_string()
5433 offp = &sm->mnt_point; in statmount_string()
5437 offp = &sm->mnt_opts; in statmount_string()
5441 offp = &sm->opt_array; in statmount_string()
5445 offp = &sm->opt_sec_array; in statmount_string()
5449 offp = &sm->fs_subtype; in statmount_string()
5453 offp = &sm->sb_source; in statmount_string()
5457 sm->mnt_uidmap = start; in statmount_string()
5461 sm->mnt_gidmap = start; in statmount_string()
5466 return -EINVAL; in statmount_string()
5473 if (seq->count == start) in statmount_string()
5475 if (unlikely(check_add_overflow(sizeof(*sm), seq->count, &kbufsize))) in statmount_string()
5476 return -EOVERFLOW; in statmount_string()
5477 if (kbufsize >= s->bufsize) in statmount_string()
5478 return -EOVERFLOW; in statmount_string()
5482 return -EAGAIN; in statmount_string()
5487 seq->buf[seq->count++] = '\0'; in statmount_string()
5488 sm->mask |= flag; in statmount_string()
5495 struct statmount *sm = &s->sm; in copy_statmount_to_user()
5496 struct seq_file *seq = &s->seq; in copy_statmount_to_user()
5497 char __user *str = ((char __user *)s->buf) + sizeof(*sm); in copy_statmount_to_user()
5498 size_t copysize = min_t(size_t, s->bufsize, sizeof(*sm)); in copy_statmount_to_user()
5500 if (seq->count && copy_to_user(str, seq->buf, seq->count)) in copy_statmount_to_user()
5501 return -EFAULT; in copy_statmount_to_user()
5504 sm->size = copysize + seq->count; in copy_statmount_to_user()
5505 if (copy_to_user(s->buf, sm, copysize)) in copy_statmount_to_user()
5506 return -EFAULT; in copy_statmount_to_user()
5516 node = rb_prev(&curr->mnt_node); in listmnt_next()
5518 node = rb_next(&curr->mnt_node); in listmnt_next()
5523 static int grab_requested_root(struct mnt_namespace *ns, struct path *root) in grab_requested_root() argument
5529 /* We're looking at our own ns, just use get_fs_root. */ in grab_requested_root()
5530 if (ns == current->nsproxy->mnt_ns) { in grab_requested_root()
5531 get_fs_root(current->fs, root); in grab_requested_root()
5536 * We have to find the first mount in our ns and use that, however it in grab_requested_root()
5539 if (mnt_ns_empty(ns)) in grab_requested_root()
5540 return -ENOENT; in grab_requested_root()
5542 first = child = ns->root; in grab_requested_root()
5546 return -ENOENT; in grab_requested_root()
5547 if (child->mnt_parent == first) in grab_requested_root()
5551 root->mnt = mntget(&child->mnt); in grab_requested_root()
5552 root->dentry = dget(root->mnt->mnt_root); in grab_requested_root()
5575 struct mnt_namespace *ns) in do_statmount() argument
5581 if (mnt_ns_id && mnt_ns_empty(ns)) in do_statmount()
5582 return -ENOENT; in do_statmount()
5584 s->mnt = lookup_mnt_in_ns(mnt_id, ns); in do_statmount()
5585 if (!s->mnt) in do_statmount()
5586 return -ENOENT; in do_statmount()
5588 err = grab_requested_root(ns, &s->root); in do_statmount()
5593 * Don't trigger audit denials. We just want to determine what in do_statmount()
5596 m = real_mount(s->mnt); in do_statmount()
5597 if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && in do_statmount()
5598 !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) in do_statmount()
5599 return -EPERM; in do_statmount()
5601 err = security_sb_statfs(s->mnt->mnt_root); in do_statmount()
5606 * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap in do_statmount()
5607 * can change concurrently as we only hold the read-side of the in do_statmount()
5611 * We could sample the mount lock sequence counter to detect in do_statmount()
5613 * happens is that the mnt->mnt_idmap pointer is already changed in do_statmount()
5614 * while mnt->mnt_flags isn't or vica versa. So what. in do_statmount()
5616 * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved in do_statmount()
5618 * torn read/write. That's all we care about right now. in do_statmount()
5620 s->idmap = mnt_idmap(s->mnt); in do_statmount()
5621 if (s->mask & STATMOUNT_MNT_BASIC) in do_statmount()
5624 if (s->mask & STATMOUNT_SB_BASIC) in do_statmount()
5627 if (s->mask & STATMOUNT_PROPAGATE_FROM) in do_statmount()
5630 if (s->mask & STATMOUNT_FS_TYPE) in do_statmount()
5633 if (!err && s->mask & STATMOUNT_MNT_ROOT) in do_statmount()
5636 if (!err && s->mask & STATMOUNT_MNT_POINT) in do_statmount()
5639 if (!err && s->mask & STATMOUNT_MNT_OPTS) in do_statmount()
5642 if (!err && s->mask & STATMOUNT_OPT_ARRAY) in do_statmount()
5645 if (!err && s->mask & STATMOUNT_OPT_SEC_ARRAY) in do_statmount()
5648 if (!err && s->mask & STATMOUNT_FS_SUBTYPE) in do_statmount()
5651 if (!err && s->mask & STATMOUNT_SB_SOURCE) in do_statmount()
5654 if (!err && s->mask & STATMOUNT_MNT_UIDMAP) in do_statmount()
5657 if (!err && s->mask & STATMOUNT_MNT_GIDMAP) in do_statmount()
5660 if (!err && s->mask & STATMOUNT_MNT_NS_ID) in do_statmount()
5661 statmount_mnt_ns_id(s, ns); in do_statmount()
5663 if (!err && s->mask & STATMOUNT_SUPPORTED_MASK) { in do_statmount()
5664 s->sm.mask |= STATMOUNT_SUPPORTED_MASK; in do_statmount()
5665 s->sm.supported_mask = STATMOUNT_SUPPORTED; in do_statmount()
5672 WARN_ON_ONCE(~STATMOUNT_SUPPORTED & s->sm.mask); in do_statmount()
5679 if (likely(ret != -EAGAIN)) in retry_statmount()
5699 return -EFAULT; in prepare_kstatmount()
5702 ks->mask = kreq->param; in prepare_kstatmount()
5703 ks->buf = buf; in prepare_kstatmount()
5704 ks->bufsize = bufsize; in prepare_kstatmount()
5706 if (ks->mask & STATMOUNT_STRING_REQ) { in prepare_kstatmount()
5707 if (bufsize == sizeof(ks->sm)) in prepare_kstatmount()
5708 return -EOVERFLOW; in prepare_kstatmount()
5710 ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT); in prepare_kstatmount()
5711 if (!ks->seq.buf) in prepare_kstatmount()
5712 return -ENOMEM; in prepare_kstatmount()
5714 ks->seq.size = seq_size; in prepare_kstatmount()
5728 ret = get_user(usize, &req->size); in copy_mnt_id_req()
5730 return -EFAULT; in copy_mnt_id_req()
5732 return -E2BIG; in copy_mnt_id_req()
5734 return -EINVAL; in copy_mnt_id_req()
5739 if (kreq->spare != 0) in copy_mnt_id_req()
5740 return -EINVAL; in copy_mnt_id_req()
5742 if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET) in copy_mnt_id_req()
5743 return -EINVAL; in copy_mnt_id_req()
5749 * that, or if not simply grab a passive reference on our mount namespace and
5756 if (kreq->mnt_ns_id && kreq->spare) in grab_requested_mnt_ns()
5757 return ERR_PTR(-EINVAL); in grab_requested_mnt_ns()
5759 if (kreq->mnt_ns_id) in grab_requested_mnt_ns()
5760 return lookup_mnt_ns(kreq->mnt_ns_id); in grab_requested_mnt_ns()
5762 if (kreq->spare) { in grab_requested_mnt_ns()
5763 struct ns_common *ns; in grab_requested_mnt_ns() local
5765 CLASS(fd, f)(kreq->spare); in grab_requested_mnt_ns()
5767 return ERR_PTR(-EBADF); in grab_requested_mnt_ns()
5770 return ERR_PTR(-EINVAL); in grab_requested_mnt_ns()
5772 ns = get_proc_ns(file_inode(fd_file(f))); in grab_requested_mnt_ns()
5773 if (ns->ns_type != CLONE_NEWNS) in grab_requested_mnt_ns()
5774 return ERR_PTR(-EINVAL); in grab_requested_mnt_ns()
5776 mnt_ns = to_mnt_ns(ns); in grab_requested_mnt_ns()
5778 mnt_ns = current->nsproxy->mnt_ns; in grab_requested_mnt_ns()
5781 refcount_inc(&mnt_ns->passive); in grab_requested_mnt_ns()
5789 struct mnt_namespace *ns __free(mnt_ns_release) = NULL; in SYSCALL_DEFINE4()
5792 /* We currently support retrieval of 3 strings. */ in SYSCALL_DEFINE4()
5797 return -EINVAL; in SYSCALL_DEFINE4()
5803 ns = grab_requested_mnt_ns(&kreq); in SYSCALL_DEFINE4()
5804 if (!ns) in SYSCALL_DEFINE4()
5805 return -ENOENT; in SYSCALL_DEFINE4()
5807 if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && in SYSCALL_DEFINE4()
5808 !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) in SYSCALL_DEFINE4()
5809 return -ENOENT; in SYSCALL_DEFINE4()
5813 return -ENOMEM; in SYSCALL_DEFINE4()
5821 ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns); in SYSCALL_DEFINE4()
5825 kvfree(ks->seq.buf); in SYSCALL_DEFINE4()
5826 path_put(&ks->root); in SYSCALL_DEFINE4()
5837 struct mnt_namespace *ns; member
5844 struct mnt_namespace *ns = kls->ns; in do_listmount() local
5845 u64 mnt_parent_id = kls->mnt_parent_id; in do_listmount()
5846 u64 last_mnt_id = kls->last_mnt_id; in do_listmount()
5847 u64 *mnt_ids = kls->kmnt_ids; in do_listmount()
5848 size_t nr_mnt_ids = kls->nr_mnt_ids; in do_listmount()
5855 ret = grab_requested_root(ns, &kls->root); in do_listmount()
5860 orig = kls->root; in do_listmount()
5862 orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns); in do_listmount()
5864 return -ENOENT; in do_listmount()
5865 orig.dentry = orig.mnt->mnt_root; in do_listmount()
5869 * Don't trigger audit denials. We just want to determine what in do_listmount()
5872 if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &kls->root) && in do_listmount()
5873 !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) in do_listmount()
5874 return -EPERM; in do_listmount()
5882 first = node_to_mount(ns->mnt_last_node); in do_listmount()
5884 first = node_to_mount(ns->mnt_first_node); in do_listmount()
5887 first = mnt_find_id_at_reverse(ns, last_mnt_id - 1); in do_listmount()
5889 first = mnt_find_id_at(ns, last_mnt_id + 1); in do_listmount()
5893 if (r->mnt_id_unique == mnt_parent_id) in do_listmount()
5895 if (!is_path_reachable(r, r->mnt.mnt_root, &orig)) in do_listmount()
5897 *mnt_ids = r->mnt_id_unique; in do_listmount()
5899 nr_mnt_ids--; in do_listmount()
5907 path_put(&kls->root); in __free_klistmount_free()
5908 kvfree(kls->kmnt_ids); in __free_klistmount_free()
5909 mnt_ns_release(kls->ns); in __free_klistmount_free()
5916 u64 last_mnt_id = kreq->param; in prepare_klistmount()
5920 return -EINVAL; in prepare_klistmount()
5922 kls->last_mnt_id = last_mnt_id; in prepare_klistmount()
5924 kls->nr_mnt_ids = nr_mnt_ids; in prepare_klistmount()
5925 kls->kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kls->kmnt_ids), in prepare_klistmount()
5927 if (!kls->kmnt_ids) in prepare_klistmount()
5928 return -ENOMEM; in prepare_klistmount()
5930 kls->ns = grab_requested_mnt_ns(kreq); in prepare_klistmount()
5931 if (!kls->ns) in prepare_klistmount()
5932 return -ENOENT; in prepare_klistmount()
5934 kls->mnt_parent_id = kreq->mnt_id; in prepare_klistmount()
5947 return -EINVAL; in SYSCALL_DEFINE4()
5955 return -EOVERFLOW; in SYSCALL_DEFINE4()
5958 return -EFAULT; in SYSCALL_DEFINE4()
5968 if (kreq.mnt_ns_id && (kls.ns != current->nsproxy->mnt_ns) && in SYSCALL_DEFINE4()
5969 !ns_capable_noaudit(kls.ns->user_ns, CAP_SYS_ADMIN)) in SYSCALL_DEFINE4()
5970 return -ENOENT; in SYSCALL_DEFINE4()
5973 * We only need to guard against mount topology changes as in SYSCALL_DEFINE4()
5982 return -EFAULT; in SYSCALL_DEFINE4()
5988 .ns.inum = ns_init_inum(&init_mnt_ns),
5989 .ns.ops = &mntns_operations,
5991 .ns.__ns_ref = REFCOUNT_INIT(1),
5992 .ns.ns_type = ns_common_type(&init_mnt_ns),
6012 init_task.nsproxy->mnt_ns = &init_mnt_ns; in init_mount_tree()
6016 root.dentry = mnt->mnt_root; in init_mount_tree()
6018 set_fs_pwd(current->fs, &root); in init_mount_tree()
6019 set_fs_root(current->fs, &root); in init_mount_tree()
6031 mount_hashtable = alloc_large_system_hash("Mount-cache", in mnt_init()
6036 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", in mnt_init()
6059 void put_mnt_ns(struct mnt_namespace *ns) in put_mnt_ns() argument
6061 if (!ns_ref_put(ns)) in put_mnt_ns()
6064 emptied_ns = ns; in put_mnt_ns()
6066 umount_tree(ns->root, 0); in put_mnt_ns()
6072 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); in kern_mount()
6076 * we unmount before file sys is unregistered in kern_mount()
6078 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; in kern_mount()
6114 /* Does the current process have a non-standard root */ in current_chrooted()
6118 get_fs_root(current->fs, &fs_root); in current_chrooted()
6124 root = topmost_overmount(current->nsproxy->mnt_ns->root); in current_chrooted()
6126 return fs_root.mnt != &root->mnt || !path_mounted(&fs_root); in current_chrooted()
6129 static bool mnt_already_visible(struct mnt_namespace *ns, in mnt_already_visible() argument
6137 rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { in mnt_already_visible()
6141 if (mnt->mnt.mnt_sb->s_type != sb->s_type) in mnt_already_visible()
6147 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) in mnt_already_visible()
6151 mnt_flags = mnt->mnt.mnt_flags; in mnt_already_visible()
6154 if (sb_rdonly(mnt->mnt.mnt_sb)) in mnt_already_visible()
6171 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { in mnt_already_visible()
6172 struct inode *inode = child->mnt_mountpoint->d_inode; in mnt_already_visible()
6174 if (!(child->mnt.mnt_flags & MNT_LOCKED)) in mnt_already_visible()
6192 struct mnt_namespace *ns = current->nsproxy->mnt_ns; in mount_too_revealing() local
6195 if (ns->user_ns == &init_user_ns) in mount_too_revealing()
6199 s_iflags = sb->s_iflags; in mount_too_revealing()
6209 return !mnt_already_visible(ns, sb, new_mnt_flags); in mount_too_revealing()
6221 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) && in mnt_may_suid()
6222 current_in_userns(mnt->mnt_sb->s_user_ns); in mnt_may_suid()
6227 struct ns_common *ns = NULL; in mntns_get() local
6231 nsproxy = task->nsproxy; in mntns_get()
6233 ns = &nsproxy->mnt_ns->ns; in mntns_get()
6234 get_mnt_ns(to_mnt_ns(ns)); in mntns_get()
6238 return ns; in mntns_get()
6241 static void mntns_put(struct ns_common *ns) in mntns_put() argument
6243 put_mnt_ns(to_mnt_ns(ns)); in mntns_put()
6246 static int mntns_install(struct nsset *nsset, struct ns_common *ns) in mntns_install() argument
6248 struct nsproxy *nsproxy = nsset->nsproxy; in mntns_install()
6249 struct fs_struct *fs = nsset->fs; in mntns_install()
6250 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns; in mntns_install()
6251 struct user_namespace *user_ns = nsset->cred->user_ns; in mntns_install()
6255 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || in mntns_install()
6258 return -EPERM; in mntns_install()
6261 return -EINVAL; in mntns_install()
6263 if (fs->users != 1) in mntns_install()
6264 return -EINVAL; in mntns_install()
6267 old_mnt_ns = nsproxy->mnt_ns; in mntns_install()
6268 nsproxy->mnt_ns = mnt_ns; in mntns_install()
6271 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt, in mntns_install()
6275 nsproxy->mnt_ns = old_mnt_ns; in mntns_install()
6290 static struct user_namespace *mntns_owner(struct ns_common *ns) in mntns_owner() argument
6292 return to_mnt_ns(ns)->user_ns; in mntns_owner()
6306 .procname = "mount-max",