namespace.c - OpenGrok cross reference for /linux/fs/namespace.c

Lines Matching +full:we +full:- +full:on +full:- +full:ns
1 // SPDX-License-Identifier: GPL-2.0-only
7  * Based on code from fs/super.c, copyright Linus Torvalds and others.
137 	struct ns_common *ns;  in node_to_mnt_ns()  local
141 	ns = rb_entry(node, struct ns_common, ns_tree_node);  in node_to_mnt_ns()
142 	return container_of(ns, struct mnt_namespace, ns);  in node_to_mnt_ns()
145 static void mnt_ns_release(struct mnt_namespace *ns)  in mnt_ns_release()  argument
148 	if (ns && refcount_dec_and_test(&ns->passive)) {  in mnt_ns_release()
149 		fsnotify_mntns_delete(ns);  in mnt_ns_release()
150 		put_user_ns(ns->user_ns);  in mnt_ns_release()
151 		kfree(ns);  in mnt_ns_release()
158 	mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu));  in DEFINE_FREE()
161 static void mnt_ns_tree_remove(struct mnt_namespace *ns)  in mnt_ns_tree_remove()  argument
164 	if (ns_tree_active(ns))  in mnt_ns_tree_remove()
165 		ns_tree_remove(ns);  in mnt_ns_tree_remove()
167 	call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu);  in mnt_ns_tree_remove()
178  * Note the lookup is lockless protected by a sequence counter. We only
180  * possible. So if we didn't find a mount namespace and the sequence
181  * counter has changed we need to retry. If the sequence counter is
182  * still the same we know the search actually failed.
187 	struct ns_common *ns;  in lookup_mnt_ns()  local
190 	ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS);  in lookup_mnt_ns()
191 	if (!ns)  in lookup_mnt_ns()
195 	 * The last reference count is put with RCU delay so we can  in lookup_mnt_ns()
198 	mnt_ns = container_of(ns, struct mnt_namespace, ns);  in lookup_mnt_ns()
199 	refcount_inc(&mnt_ns->passive);  in lookup_mnt_ns()
233 	res = __xa_alloc(&mnt_id_xa, &mnt->mnt_id, mnt, XA_LIMIT(1, INT_MAX), GFP_KERNEL);  in mnt_alloc_id()
235 		mnt->mnt_id_unique = ++mnt_id_ctr;  in mnt_alloc_id()
242 	xa_erase(&mnt_id_xa, mnt->mnt_id);  in mnt_free_id()
254 	mnt->mnt_group_id = res;  in mnt_alloc_group_id()
263 	ida_free(&mnt_group_ida, mnt->mnt_group_id);  in mnt_release_group_id()
264 	mnt->mnt_group_id = 0;  in mnt_release_group_id()
273 	this_cpu_add(mnt->mnt_pcp->mnt_count, n);  in mnt_add_count()
276 	mnt->mnt_count += n;  in mnt_add_count()
291 		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;  in mnt_get_count()
296 	return mnt->mnt_count;  in mnt_get_count()
311 			mnt->mnt_devname = kstrdup_const(name,  in alloc_vfsmnt()
314 			mnt->mnt_devname = "none";  in alloc_vfsmnt()
315 		if (!mnt->mnt_devname)  in alloc_vfsmnt()
319 		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);  in alloc_vfsmnt()
320 		if (!mnt->mnt_pcp)  in alloc_vfsmnt()
323 		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);  in alloc_vfsmnt()
325 		mnt->mnt_count = 1;  in alloc_vfsmnt()
326 		mnt->mnt_writers = 0;  in alloc_vfsmnt()
329 		INIT_HLIST_NODE(&mnt->mnt_hash);  in alloc_vfsmnt()
330 		INIT_LIST_HEAD(&mnt->mnt_child);  in alloc_vfsmnt()
331 		INIT_LIST_HEAD(&mnt->mnt_mounts);  in alloc_vfsmnt()
332 		INIT_LIST_HEAD(&mnt->mnt_list);  in alloc_vfsmnt()
333 		INIT_LIST_HEAD(&mnt->mnt_expire);  in alloc_vfsmnt()
334 		INIT_LIST_HEAD(&mnt->mnt_share);  in alloc_vfsmnt()
335 		INIT_HLIST_HEAD(&mnt->mnt_slave_list);  in alloc_vfsmnt()
336 		INIT_HLIST_NODE(&mnt->mnt_slave);  in alloc_vfsmnt()
337 		INIT_HLIST_NODE(&mnt->mnt_mp_list);  in alloc_vfsmnt()
338 		INIT_HLIST_HEAD(&mnt->mnt_stuck_children);  in alloc_vfsmnt()
339 		RB_CLEAR_NODE(&mnt->mnt_node);  in alloc_vfsmnt()
340 		mnt->mnt.mnt_idmap = &nop_mnt_idmap;  in alloc_vfsmnt()
346 	kfree_const(mnt->mnt_devname);  in alloc_vfsmnt()
356  * Most r/o checks on a fs are for operations that take
358  * We must keep track of when those operations start
360  * we can determine when writes are able to occur to
364  * __mnt_is_readonly: check whether a mount is read-only
376 	return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);  in __mnt_is_readonly()
383 	this_cpu_inc(mnt->mnt_pcp->mnt_writers);  in mnt_inc_writers()
385 	mnt->mnt_writers++;  in mnt_inc_writers()
392 	this_cpu_dec(mnt->mnt_pcp->mnt_writers);  in mnt_dec_writers()
394 	mnt->mnt_writers--;  in mnt_dec_writers()
405 		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;  in mnt_get_writers()
410 	return mnt->mnt_writers;  in mnt_get_writers()
416 	if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))  in mnt_is_readonly()
420 	 * making sure if we don't see s_readonly_remount set yet, we also will  in mnt_is_readonly()
423 	 * assuring that if we see s_readonly_remount already cleared, we will  in mnt_is_readonly()
431  * Most r/o & frozen checks on a fs are for operations that take discrete
432  * amounts of time, like a write() or unlink().  We must keep track of when
433  * those operations start (for permission checks) and when they end, so that we
437  * mnt_get_write_access - get write access to a mount without freeze protection
438  * @m: the mount on which to take a write
440  * This tells the low-level filesystem that a write is about to be performed to
441  * it, and makes sure that writes are allowed (mnt it read-write) before
454 	 * The store to mnt_inc_writers must be visible before we pass  in mnt_get_write_access()
460 	while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) {  in mnt_get_write_access()
466 			 * setting WRITE_HOLD got preempted on a remote  in mnt_get_write_access()
479 	 * sure that if we see WRITE_HOLD cleared, we will also see  in mnt_get_write_access()
481 	 * mnt_is_readonly() and bail in case we are racing with remount  in mnt_get_write_access()
482 	 * read-only.  in mnt_get_write_access()
487 		ret = -EROFS;  in mnt_get_write_access()
496  * mnt_want_write - get write access to a mount
497  * @m: the mount on which to take a write
499  * This tells the low-level filesystem that a write is about to be performed to
500  * it, and makes sure that writes are allowed (mount is read-write, filesystem
508 	sb_start_write(m->mnt_sb);  in mnt_want_write()
511 		sb_end_write(m->mnt_sb);  in mnt_want_write()
517  * mnt_get_write_access_file - get write access to a file's mount
518  * @file: the file who's mount on which to take a write
527 	if (file->f_mode & FMODE_WRITER) {  in mnt_get_write_access_file()
530 		 * writable fd's, e.g. due to a fs error with errors=remount-ro  in mnt_get_write_access_file()
532 		if (__mnt_is_readonly(file->f_path.mnt))  in mnt_get_write_access_file()
533 			return -EROFS;  in mnt_get_write_access_file()
536 	return mnt_get_write_access(file->f_path.mnt);  in mnt_get_write_access_file()
540  * mnt_want_write_file - get write access to a file's mount
541  * @file: the file who's mount on which to take a write
552 	sb_start_write(file_inode(file)->i_sb);  in mnt_want_write_file()
555 		sb_end_write(file_inode(file)->i_sb);  in mnt_want_write_file()
561  * mnt_put_write_access - give up write access to a mount
562  * @mnt: the mount on which to give up write access
564  * Tells the low-level filesystem that we are done
577  * mnt_drop_write - give up write access to a mount
578  * @mnt: the mount on which to give up write access
580  * Tells the low-level filesystem that we are done performing writes to it and
587 	sb_end_write(mnt->mnt_sb);  in mnt_drop_write()
593 	if (!(file->f_mode & FMODE_WRITER))  in mnt_put_write_access_file()
594 		mnt_put_write_access(file->f_path.mnt);  in mnt_put_write_access_file()
600 	sb_end_write(file_inode(file)->i_sb);  in mnt_drop_write_file()
605  * mnt_hold_writers - prevent write access to the given mount
619  * Return: On success 0 is returned.
620  *	   On error, -EBUSY is returned.
626 	 * After storing WRITE_HOLD, we'll read the counters. This store  in mnt_hold_writers()
627 	 * should be visible before we do.  in mnt_hold_writers()
632 	 * With writers on hold, if this value is zero, then there are  in mnt_hold_writers()
637 	 * It is OK to have counter incremented on one CPU and decremented on  in mnt_hold_writers()
638 	 * another: the sum will add up correctly. The danger would be when we  in mnt_hold_writers()
639 	 * sum up each counter, if we read a counter before it is incremented,  in mnt_hold_writers()
641 	 * decremented from -- we would see more decrements than we should.  in mnt_hold_writers()
643 	 * mnt_want_write first increments count, then smp_mb, then spins on  in mnt_hold_writers()
645 	 * we're counting up here.  in mnt_hold_writers()
648 		return -EBUSY;  in mnt_hold_writers()
654  * mnt_unhold_writers - stop preventing write access to the given mount
679 	struct mount **p = m->mnt_pprev_for_sb;  in mnt_del_instance()
680 	struct mount *next = m->mnt_next_for_sb;  in mnt_del_instance()
683 		next->mnt_pprev_for_sb = p;  in mnt_del_instance()
689 	struct mount *first = s->s_mounts;  in mnt_add_instance()
692 		first->mnt_pprev_for_sb = &m->mnt_next_for_sb;  in mnt_add_instance()
693 	m->mnt_next_for_sb = first;  in mnt_add_instance()
694 	m->mnt_pprev_for_sb = &s->s_mounts;  in mnt_add_instance()
695 	s->s_mounts = m;  in mnt_add_instance()
704 		mnt->mnt.mnt_flags |= MNT_READONLY;  in mnt_make_readonly()
714 	if (atomic_long_read(&sb->s_remove_count))  in sb_prepare_remount_readonly()
715 		return -EBUSY;  in sb_prepare_remount_readonly()
719 	for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {  in sb_prepare_remount_readonly()
720 		if (!(m->mnt.mnt_flags & MNT_READONLY)) {  in sb_prepare_remount_readonly()
726 	if (!err && atomic_long_read(&sb->s_remove_count))  in sb_prepare_remount_readonly()
727 		err = -EBUSY;  in sb_prepare_remount_readonly()
731 	for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {  in sb_prepare_remount_readonly()
741 	mnt_idmap_put(mnt_idmap(&mnt->mnt));  in free_vfsmnt()
742 	kfree_const(mnt->mnt_devname);  in free_vfsmnt()
744 	free_percpu(mnt->mnt_pcp);  in free_vfsmnt()
768 	if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) {  in __legitimize_mnt()
769 		mnt_add_count(mnt, -1);  in __legitimize_mnt()
775 	return -1;  in __legitimize_mnt()
793  * __lookup_mnt - mount hash lookup
797  * If @mnt has a child mount @c mounted on @dentry find and return it.
802  * Return: The child of @mnt mounted on @dentry or %NULL.
810 		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)  in __lookup_mnt()
816  * lookup_mnt - Return the child mount mounted at given location
831 		child_mnt = __lookup_mnt(path->mnt, path->dentry);  in lookup_mnt()
832 		m = child_mnt ? &child_mnt->mnt : NULL;  in lookup_mnt()
839  * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
843  * test is handled inline.  For the slow case when we are actually
848  * The mount_hashtable is not usable in the context because we
855 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;  in __is_local_mountpoint()  local
860 	rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node)  in __is_local_mountpoint()
861 		if (mnt->mnt_mountpoint == dentry)  in __is_local_mountpoint()
879 		if (mp->m_dentry == dentry) {  in lookup_mountpoint()
880 			hlist_add_head(&m->node, &mp->m_list);  in lookup_mountpoint()
881 			m->mp = mp;  in lookup_mountpoint()
897 			return -ENOENT;  in get_mountpoint()
909 		return -ENOMEM;  in get_mountpoint()
915 	if (ret == -EBUSY)  in get_mountpoint()
924 	mp->m_dentry = dget(dentry);  in get_mountpoint()
925 	hlist_add_head(&mp->m_hash, mp_hash(dentry));  in get_mountpoint()
926 	INIT_HLIST_HEAD(&mp->m_list);  in get_mountpoint()
927 	hlist_add_head(&m->node, &mp->m_list);  in get_mountpoint()
928 	m->mp = no_free_ptr(mp);  in get_mountpoint()
939 	if (hlist_empty(&mp->m_list)) {  in maybe_free_mountpoint()
940 		struct dentry *dentry = mp->m_dentry;  in maybe_free_mountpoint()
941 		spin_lock(&dentry->d_lock);  in maybe_free_mountpoint()
942 		dentry->d_flags &= ~DCACHE_MOUNTED;  in maybe_free_mountpoint()
943 		spin_unlock(&dentry->d_lock);  in maybe_free_mountpoint()
945 		hlist_del(&mp->m_hash);  in maybe_free_mountpoint()
955 	if (m->mp) {  in unpin_mountpoint()
956 		hlist_del(&m->node);  in unpin_mountpoint()
957 		maybe_free_mountpoint(m->mp, &ex_mountpoints);  in unpin_mountpoint()
963 	return mnt->mnt_ns == current->nsproxy->mnt_ns;  in check_mnt()
970 	if (!is_anon_ns(mnt->mnt_ns))  in check_anonymous_mnt()
973 	seq = mnt->mnt_ns->seq_origin;  in check_anonymous_mnt()
974 	return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id);  in check_anonymous_mnt()
980 static void touch_mnt_namespace(struct mnt_namespace *ns)  in touch_mnt_namespace()  argument
982 	if (ns) {  in touch_mnt_namespace()
983 		ns->event = ++event;  in touch_mnt_namespace()
984 		wake_up_interruptible(&ns->poll);  in touch_mnt_namespace()
991 static void __touch_mnt_namespace(struct mnt_namespace *ns)  in __touch_mnt_namespace()  argument
993 	if (ns && ns->event != event) {  in __touch_mnt_namespace()
994 		ns->event = event;  in __touch_mnt_namespace()
995 		wake_up_interruptible(&ns->poll);  in __touch_mnt_namespace()
1005 	struct mount *parent = mnt->mnt_parent;  in __umount_mnt()
1006 	if (unlikely(parent->overmount == mnt))  in __umount_mnt()
1007 		parent->overmount = NULL;  in __umount_mnt()
1008 	mnt->mnt_parent = mnt;  in __umount_mnt()
1009 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;  in __umount_mnt()
1010 	list_del_init(&mnt->mnt_child);  in __umount_mnt()
1011 	hlist_del_init_rcu(&mnt->mnt_hash);  in __umount_mnt()
1012 	hlist_del_init(&mnt->mnt_mp_list);  in __umount_mnt()
1013 	mp = mnt->mnt_mp;  in __umount_mnt()
1014 	mnt->mnt_mp = NULL;  in __umount_mnt()
1033 	child_mnt->mnt_mountpoint = mp->m_dentry;  in mnt_set_mountpoint()
1034 	child_mnt->mnt_parent = mnt;  in mnt_set_mountpoint()
1035 	child_mnt->mnt_mp = mp;  in mnt_set_mountpoint()
1036 	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);  in mnt_set_mountpoint()
1041 	struct mount *parent = mnt->mnt_parent;  in make_visible()
1042 	if (unlikely(mnt->mnt_mountpoint == parent->mnt.mnt_root))  in make_visible()
1043 		parent->overmount = mnt;  in make_visible()
1044 	hlist_add_head_rcu(&mnt->mnt_hash,  in make_visible()
1045 			   m_hash(&parent->mnt, mnt->mnt_mountpoint));  in make_visible()
1046 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);  in make_visible()
1050  * attach_mnt - mount a mount, attach to @mount_hashtable and parent's
1056  * Mount @mnt at @mp on @parent. Then attach @mnt
1059  * Note, when make_visible() is called @mnt->mnt_parent already points
1074 	struct mountpoint *old_mp = mnt->mnt_mp;  in mnt_change_mountpoint()
1076 	list_del_init(&mnt->mnt_child);  in mnt_change_mountpoint()
1077 	hlist_del_init(&mnt->mnt_mp_list);  in mnt_change_mountpoint()
1078 	hlist_del_init_rcu(&mnt->mnt_hash);  in mnt_change_mountpoint()
1090 static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)  in mnt_add_to_ns()  argument
1092 	struct rb_node **link = &ns->mounts.rb_node;  in mnt_add_to_ns()
1097 	mnt->mnt_ns = ns;  in mnt_add_to_ns()
1100 		if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) {  in mnt_add_to_ns()
1101 			link = &parent->rb_left;  in mnt_add_to_ns()
1104 			link = &parent->rb_right;  in mnt_add_to_ns()
1110 		ns->mnt_last_node = &mnt->mnt_node;  in mnt_add_to_ns()
1112 		ns->mnt_first_node = &mnt->mnt_node;  in mnt_add_to_ns()
1113 	rb_link_node(&mnt->mnt_node, parent, link);  in mnt_add_to_ns()
1114 	rb_insert_color(&mnt->mnt_node, &ns->mounts);  in mnt_add_to_ns()
1121 	struct list_head *next = p->mnt_mounts.next;  in next_mnt()
1122 	if (next == &p->mnt_mounts) {  in next_mnt()
1126 			next = p->mnt_child.next;  in next_mnt()
1127 			if (next != &p->mnt_parent->mnt_mounts)  in next_mnt()
1129 			p = p->mnt_parent;  in next_mnt()
1137 	struct list_head *prev = p->mnt_mounts.prev;  in skip_mnt_tree()
1138 	while (prev != &p->mnt_mounts) {  in skip_mnt_tree()
1140 		prev = p->mnt_mounts.prev;  in skip_mnt_tree()
1150 	struct mnt_namespace *n = mnt->mnt_parent->mnt_ns;  in commit_tree()
1155 		n->nr_mounts += n->pending_mounts;  in commit_tree()
1156 		n->pending_mounts = 0;  in commit_tree()
1165 	struct super_block *s = root->d_sb;  in setup_mnt()
1167 	atomic_inc(&s->s_active);  in setup_mnt()
1168 	m->mnt.mnt_sb = s;  in setup_mnt()
1169 	m->mnt.mnt_root = dget(root);  in setup_mnt()
1170 	m->mnt_mountpoint = m->mnt.mnt_root;  in setup_mnt()
1171 	m->mnt_parent = m;  in setup_mnt()
1178  * vfs_create_mount - Create a mount for a configured superblock
1190 	if (!fc->root)  in vfs_create_mount()
1191 		return ERR_PTR(-EINVAL);  in vfs_create_mount()
1193 	mnt = alloc_vfsmnt(fc->source);  in vfs_create_mount()
1195 		return ERR_PTR(-ENOMEM);  in vfs_create_mount()
1197 	if (fc->sb_flags & SB_KERNMOUNT)  in vfs_create_mount()
1198 		mnt->mnt.mnt_flags = MNT_INTERNAL;  in vfs_create_mount()
1200 	setup_mnt(mnt, fc->root);  in vfs_create_mount()
1202 	return &mnt->mnt;  in vfs_create_mount()
1210 		up_write(&fc->root->d_sb->s_umount);  in fc_mount()
1221 		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;  in fc_mount_longterm()
1235 		return ERR_PTR(-EINVAL);  in vfs_kern_mount()
1261 	mnt = alloc_vfsmnt(old->mnt_devname);  in clone_mnt()
1263 		return ERR_PTR(-ENOMEM);  in clone_mnt()
1265 	mnt->mnt.mnt_flags = READ_ONCE(old->mnt.mnt_flags) &  in clone_mnt()
1269 		mnt->mnt_group_id = 0; /* not a peer of original */  in clone_mnt()
1271 		mnt->mnt_group_id = old->mnt_group_id;  in clone_mnt()
1273 	if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {  in clone_mnt()
1279 	if (mnt->mnt_group_id)  in clone_mnt()
1282 	mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));  in clone_mnt()
1286 	if (flag & CL_PRIVATE)	// we are done with it  in clone_mnt()
1290 		list_add(&mnt->mnt_share, &old->mnt_share);  in clone_mnt()
1292 	if ((flag & CL_SLAVE) && old->mnt_group_id) {  in clone_mnt()
1293 		hlist_add_head(&mnt->mnt_slave, &old->mnt_slave_list);  in clone_mnt()
1294 		mnt->mnt_master = old;  in clone_mnt()
1296 		hlist_add_behind(&mnt->mnt_slave, &old->mnt_slave);  in clone_mnt()
1297 		mnt->mnt_master = old->mnt_master;  in clone_mnt()
1314 	 * filesystem was probably unable to make r/w->r/o transitions.  in cleanup_mnt()
1319 	if (unlikely(mnt->mnt_pins.first))  in cleanup_mnt()
1321 	hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {  in cleanup_mnt()
1322 		hlist_del(&m->mnt_umount);  in cleanup_mnt()
1323 		mntput(&m->mnt);  in cleanup_mnt()
1325 	fsnotify_vfsmount_delete(&mnt->mnt);  in cleanup_mnt()
1326 	dput(mnt->mnt.mnt_root);  in cleanup_mnt()
1327 	deactivate_super(mnt->mnt.mnt_sb);  in cleanup_mnt()
1329 	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);  in cleanup_mnt()
1354 	if (likely(READ_ONCE(mnt->mnt_ns))) {  in mntput_no_expire()
1356 		 * Since we don't do lock_mount_hash() here,  in mntput_no_expire()
1357 		 * ->mnt_ns can change under us.  However, if it's  in mntput_no_expire()
1358 		 * non-NULL, then there's a reference that won't  in mntput_no_expire()
1360 		 * turning ->mnt_ns NULL.  So if we observe it  in mntput_no_expire()
1361 		 * non-NULL under rcu_read_lock(), the reference  in mntput_no_expire()
1362 		 * we are dropping is not the final one.  in mntput_no_expire()
1364 		mnt_add_count(mnt, -1);  in mntput_no_expire()
1371 	 * mount_lock, we'll see their refcount increment here.  in mntput_no_expire()
1374 	mnt_add_count(mnt, -1);  in mntput_no_expire()
1382 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {  in mntput_no_expire()
1387 	mnt->mnt.mnt_flags |= MNT_DOOMED;  in mntput_no_expire()
1391 	if (unlikely(!list_empty(&mnt->mnt_expire)))  in mntput_no_expire()
1392 		list_del(&mnt->mnt_expire);  in mntput_no_expire()
1394 	if (unlikely(!list_empty(&mnt->mnt_mounts))) {  in mntput_no_expire()
1396 		list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {  in mntput_no_expire()
1398 			hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);  in mntput_no_expire()
1404 	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {  in mntput_no_expire()
1406 		if (likely(!(task->flags & PF_KTHREAD))) {  in mntput_no_expire()
1407 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);  in mntput_no_expire()
1408 			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))  in mntput_no_expire()
1411 		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))  in mntput_no_expire()
1423 		if (unlikely(m->mnt_expiry_mark))  in mntput()
1424 			WRITE_ONCE(m->mnt_expiry_mark, 0);  in mntput()
1446 		real_mount(mnt)->mnt_ns = NULL;  in mnt_make_shortterm()
1450  * path_is_mountpoint() - Check if path is a mount in the current namespace.
1465 	if (!d_mountpoint(path->dentry))  in path_is_mountpoint()
1482 	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);  in mnt_clone_internal()
1485 	p->mnt.mnt_flags |= MNT_INTERNAL;  in mnt_clone_internal()
1486 	return &p->mnt;  in mnt_clone_internal()
1493 static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)  in mnt_find_id_at()  argument
1495 	struct rb_node *node = ns->mounts.rb_node;  in mnt_find_id_at()
1501 		if (mnt_id <= m->mnt_id_unique) {  in mnt_find_id_at()
1503 			if (mnt_id == m->mnt_id_unique)  in mnt_find_id_at()
1505 			node = node->rb_left;  in mnt_find_id_at()
1507 			node = node->rb_right;  in mnt_find_id_at()
1517 static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id)  in mnt_find_id_at_reverse()  argument
1519 	struct rb_node *node = ns->mounts.rb_node;  in mnt_find_id_at_reverse()
1525 		if (mnt_id >= m->mnt_id_unique) {  in mnt_find_id_at_reverse()
1527 			if (mnt_id == m->mnt_id_unique)  in mnt_find_id_at_reverse()
1529 			node = node->rb_right;  in mnt_find_id_at_reverse()
1531 			node = node->rb_left;  in mnt_find_id_at_reverse()
1539 /* iterator; we want it to have access to namespace_sem, thus here... */
1542 	struct proc_mounts *p = m->private;  in m_start()
1546 	return mnt_find_id_at(p->ns, *pos);  in m_start()
1552 	struct rb_node *node = rb_next(&mnt->mnt_node);  in m_next()
1557 		*pos = next->mnt_id_unique;  in m_next()
1569 	struct proc_mounts *p = m->private;  in m_show()
1571 	return p->show(m, &r->mnt);  in m_show()
1584  * may_umount_tree - check if a mount tree is busy
1612  * may_umount - check if a mount point is busy
1621  * give false negatives. The main reason why it's here is that we need
1622  * a non-destructive way to look for easily umountable filesystems.
1641 	if (!p->prev_ns && p->mnt_ns) {  in mnt_notify()
1642 		fsnotify_mnt_attach(p->mnt_ns, &p->mnt);  in mnt_notify()
1643 	} else if (p->prev_ns && !p->mnt_ns) {  in mnt_notify()
1644 		fsnotify_mnt_detach(p->prev_ns, &p->mnt);  in mnt_notify()
1645 	} else if (p->prev_ns == p->mnt_ns) {  in mnt_notify()
1646 		fsnotify_mnt_move(p->mnt_ns, &p->mnt);  in mnt_notify()
1648 		fsnotify_mnt_detach(p->prev_ns, &p->mnt);  in mnt_notify()
1649 		fsnotify_mnt_attach(p->mnt_ns, &p->mnt);  in mnt_notify()
1651 	p->prev_ns = p->mnt_ns;  in mnt_notify()
1663 		list_del_init(&m->to_notify);  in notify_mnt_list()
1688 	struct mnt_namespace *ns = emptied_ns;  in namespace_unlock()  local
1707 	if (unlikely(ns)) {  in namespace_unlock()
1708 		/* Make sure we notice when we leak mounts. */  in namespace_unlock()
1709 		VFS_WARN_ON_ONCE(!mnt_ns_empty(ns));  in namespace_unlock()
1710 		free_mnt_ns(ns);  in namespace_unlock()
1721 		hlist_del(&m->mnt_umount);  in namespace_unlock()
1722 		mntput(&m->mnt);  in namespace_unlock()
1751 	if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))  in disconnect_mount()
1780 		p->mnt.mnt_flags |= MNT_UMOUNT;  in umount_tree()
1783 		list_add_tail(&p->mnt_list, &tmp_list);  in umount_tree()
1788 		list_del_init(&p->mnt_child);  in umount_tree()
1798 		struct mnt_namespace *ns;  in umount_tree()  local
1801 		list_del_init(&p->mnt_expire);  in umount_tree()
1802 		list_del_init(&p->mnt_list);  in umount_tree()
1803 		ns = p->mnt_ns;  in umount_tree()
1804 		if (ns) {  in umount_tree()
1805 			ns->nr_mounts--;  in umount_tree()
1806 			__touch_mnt_namespace(ns);  in umount_tree()
1808 		p->mnt_ns = NULL;  in umount_tree()
1810 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;  in umount_tree()
1816 				list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);  in umount_tree()
1822 			hlist_add_head(&p->mnt_umount, &unmounted);  in umount_tree()
1825 		 * At this point p->mnt_ns is NULL, notification will be queued  in umount_tree()
1828 		 *  - p->prev_ns is non-NULL *and*  in umount_tree()
1829 		 *  - p->prev_ns->n_fsnotify_marks is non-NULL  in umount_tree()
1845 	down_write(&sb->s_umount);  in do_umount_root()
1849 		fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,  in do_umount_root()
1860 	up_write(&sb->s_umount);  in do_umount_root()
1866 	struct super_block *sb = mnt->mnt.mnt_sb;  in do_umount()
1869 	retval = security_sb_umount(&mnt->mnt, flags);  in do_umount()
1880 		if (&mnt->mnt == current->fs->root.mnt ||  in do_umount()
1882 			return -EINVAL;  in do_umount()
1885 		 * probably don't strictly need the lock here if we examined  in do_umount()
1889 		if (!list_empty(&mnt->mnt_mounts) || mnt_get_count(mnt) != 2) {  in do_umount()
1891 			return -EBUSY;  in do_umount()
1895 		if (!xchg(&mnt->mnt_expiry_mark, 1))  in do_umount()
1896 			return -EAGAIN;  in do_umount()
1900 	 * If we may have to abort operations to get out of this  in do_umount()
1901 	 * mount, and they will themselves hold resources we must  in do_umount()
1904 	 * might fail to complete on the first run through as other tasks  in do_umount()
1909 	if (flags & MNT_FORCE && sb->s_op->umount_begin) {  in do_umount()
1910 		sb->s_op->umount_begin(sb);  in do_umount()
1916 	 * Ho-hum... In principle, we might treat that as umount + switch  in do_umount()
1919 	 * /reboot - static binary that would close all descriptors and  in do_umount()
1922 	if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {  in do_umount()
1925 		 * we just try to remount it readonly.  in do_umount()
1927 		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))  in do_umount()
1928 			return -EPERM;  in do_umount()
1935 	/* Repeat the earlier racy checks, now that we are holding the locks */  in do_umount()
1936 	retval = -EINVAL;  in do_umount()
1940 	if (mnt->mnt.mnt_flags & MNT_LOCKED)  in do_umount()
1953 		retval = -EBUSY;  in do_umount()
1966  * __detach_mounts - lazily unmount all mounts on the specified dentry
1973  * The caller may hold dentry->d_inode->i_rwsem.
1989 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {  in __detach_mounts()
1991 			hlist_add_head(&mnt->mnt_umount, &unmounted);  in __detach_mounts()
2003 	return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);  in may_mount()
2017 	struct mount *mnt = real_mount(path->mnt);  in can_umount()
2018 	struct super_block *sb = path->dentry->d_sb;  in can_umount()
2021 		return -EPERM;  in can_umount()
2023 		return -EINVAL;  in can_umount()
2025 		return -EINVAL;  in can_umount()
2026 	if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */  in can_umount()
2027 		return -EINVAL;  in can_umount()
2028 	if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))  in can_umount()
2029 		return -EPERM;  in can_umount()
2036 	struct mount *mnt = real_mount(path->mnt);  in path_umount()
2043 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */  in path_umount()
2044 	dput(path->dentry);  in path_umount()
2057 		return -EINVAL;  in ksys_umount()
2086 	struct ns_common *ns;  in is_mnt_ns_file()  local
2089 	if (dentry->d_op != &ns_dentry_operations)  in is_mnt_ns_file()
2092 	ns = d_inode(dentry)->i_private;  in is_mnt_ns_file()
2094 	return ns->ops == &mntns_operations;  in is_mnt_ns_file()
2099 	return &mnt->ns;  in from_mnt_ns()
2104 	struct ns_common *ns;  in get_sequential_mnt_ns()  local
2109 		ns = ns_tree_adjoined_rcu(mntns, previous);  in get_sequential_mnt_ns()
2110 		if (IS_ERR(ns))  in get_sequential_mnt_ns()
2111 			return ERR_CAST(ns);  in get_sequential_mnt_ns()
2113 		mntns = to_mnt_ns(ns);  in get_sequential_mnt_ns()
2120 		if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN))  in get_sequential_mnt_ns()
2124 		 * We need an active reference count as we're persisting  in get_sequential_mnt_ns()
2125 		 * the mount namespace and it might already be on its  in get_sequential_mnt_ns()
2140 	return to_mnt_ns(get_proc_ns(dentry->d_inode));  in mnt_ns_from_dentry()
2153 	return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id;  in mnt_ns_loop()
2163 		return ERR_PTR(-EINVAL);  in copy_tree()
2166 		return ERR_PTR(-EINVAL);  in copy_tree()
2174 	list_for_each_entry(src_root_child, &src_root->mnt_mounts, mnt_child) {  in copy_tree()
2175 		if (!is_subdir(src_root_child->mnt_mountpoint, dentry))  in copy_tree()
2182 				if (src_mnt->mnt.mnt_flags & MNT_LOCKED) {  in copy_tree()
2184 					dst_mnt = ERR_PTR(-EPERM);  in copy_tree()
2192 			    is_mnt_ns_file(src_mnt->mnt.mnt_root)) {  in copy_tree()
2196 			while (src_parent != src_mnt->mnt_parent) {  in copy_tree()
2197 				src_parent = src_parent->mnt_parent;  in copy_tree()
2198 				dst_mnt = dst_mnt->mnt_parent;  in copy_tree()
2203 			dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag);  in copy_tree()
2207 			if (src_mnt->mnt.mnt_flags & MNT_LOCKED)  in copy_tree()
2208 				dst_mnt->mnt.mnt_flags |= MNT_LOCKED;  in copy_tree()
2210 				/* stick the duplicate mount on the same expiry  in copy_tree()
2211 				 * list as the original if that was on one */  in copy_tree()
2212 				if (!list_empty(&src_mnt->mnt_expire))  in copy_tree()
2213 					list_add(&dst_mnt->mnt_expire,  in copy_tree()
2214 						 &src_mnt->mnt_expire);  in copy_tree()
2216 			attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp);  in copy_tree()
2250 	struct mount *root = real_mount(path->mnt);  in collect_paths()
2258 		return ERR_PTR(-EINVAL);  in collect_paths()
2260 		return ERR_PTR(-ENOMEM);  in collect_paths()
2262 	list_for_each_entry(child, &root->mnt_mounts, mnt_child) {  in collect_paths()
2263 		if (!is_subdir(child->mnt_mountpoint, path->dentry))  in collect_paths()
2267 				return ERR_PTR(-ENOMEM);  in collect_paths()
2268 			res[n].mnt = &m->mnt;  in collect_paths()
2269 			res[n].dentry = m->mnt.mnt_root;  in collect_paths()
2274 		return ERR_PTR(-ENOMEM);  in collect_paths()
2275 	memset(res + n, 0, (count - n) * sizeof(struct path));  in collect_paths()
2276 	for (struct path *p = res; p->mnt; p++)  in collect_paths()
2283 	for (const struct path *p = paths; p->mnt; p++)  in drop_collected_paths()
2297 	 * we need to dissolve the mount tree and free that namespace.  in dissolve_on_fput()
2298 	 * Let's try to avoid taking namespace_sem if we can determine  in dissolve_on_fput()
2299 	 * that there's nothing to do without it - rcu_read_lock() is  in dissolve_on_fput()
2300 	 * enough to make anon_ns_root() memory-safe and once m has  in dissolve_on_fput()
2302 	 * never become a root of anon ns again.  in dissolve_on_fput()
2314 		emptied_ns = m->mnt_ns;  in dissolve_on_fput()
2326 	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {  in __has_locked_children()
2327 		if (!is_subdir(child->mnt_mountpoint, dentry))  in __has_locked_children()
2330 		if (child->mnt.mnt_flags & MNT_LOCKED)  in __has_locked_children()
2345  * that aren't checked by the mount-cycle checking code, thereby allowing
2353 		if (mnt_ns_loop(p->mnt.mnt_root))  in check_for_nsfs_mounts()
2359  * clone_private_mount - create a private clone of a path
2372 	struct mount *old_mnt = real_mount(path->mnt);  in clone_private_mount()
2378 		return ERR_PTR(-EINVAL);  in clone_private_mount()
2384 	 * namespace, and we need to make sure no namespace  in clone_private_mount()
2389 			return ERR_PTR(-EINVAL);  in clone_private_mount()
2392 			return ERR_PTR(-EINVAL);  in clone_private_mount()
2395 	if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))  in clone_private_mount()
2396 		return ERR_PTR(-EPERM);  in clone_private_mount()
2398 	if (__has_locked_children(old_mnt, path->dentry))  in clone_private_mount()
2399 		return ERR_PTR(-EINVAL);  in clone_private_mount()
2401 	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);  in clone_private_mount()
2403 		return ERR_PTR(-EINVAL);  in clone_private_mount()
2406 	new_mnt->mnt_ns = MNT_NS_INTERNAL;  in clone_private_mount()
2407 	return &new_mnt->mnt;  in clone_private_mount()
2416 		int flags = p->mnt.mnt_flags;  in lock_mnt_tree()
2432 		if (list_empty(&p->mnt_expire) && p != mnt)  in lock_mnt_tree()
2434 		p->mnt.mnt_flags = flags;  in lock_mnt_tree()
2443 		if (p->mnt_group_id && !IS_MNT_SHARED(p))  in cleanup_group_ids()
2453 		if (!p->mnt_group_id) {  in invent_group_ids()
2465 int count_mounts(struct mnt_namespace *ns, struct mount *mnt)  in count_mounts()  argument
2471 	if (ns->nr_mounts >= max)  in count_mounts()
2472 		return -ENOSPC;  in count_mounts()
2473 	max -= ns->nr_mounts;  in count_mounts()
2474 	if (ns->pending_mounts >= max)  in count_mounts()
2475 		return -ENOSPC;  in count_mounts()
2476 	max -= ns->pending_mounts;  in count_mounts()
2482 		return -ENOSPC;  in count_mounts()
2484 	ns->pending_mounts += mounts;  in count_mounts()
2494  * attach_recursive_mnt - attach a source mount tree
2500  * ---------------------------------------------------------------------------
2503  * | source-->| shared        |       private  |       slave    | unbindable |
2510  * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
2512  * A bind operation clones the source mount and mounts the clone on the
2528  * ---------------------------------------------------------------------------
2531  * | source-->| shared        |       private  |       slave    | unbindable |
2538  * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
2561 	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;  in attach_recursive_mnt()
2562 	struct mount *dest_mnt = dest->parent;  in attach_recursive_mnt()
2563 	struct mountpoint *dest_mp = dest->mp;  in attach_recursive_mnt()
2565 	struct mnt_namespace *ns = dest_mnt->mnt_ns;  in attach_recursive_mnt()  local
2576 	 * mounted beneath mounts on the same mountpoint.  in attach_recursive_mnt()
2578 	for (top = source_mnt; unlikely(top->overmount); top = top->overmount) {  in attach_recursive_mnt()
2579 		if (!shorter && is_mnt_ns_file(top->mnt.mnt_root))  in attach_recursive_mnt()
2580 			shorter = top->mnt_mp;  in attach_recursive_mnt()
2582 	err = get_mountpoint(top->mnt.mnt_root, &root);  in attach_recursive_mnt()
2588 		err = count_mounts(ns, source_mnt);  in attach_recursive_mnt()
2613 		list_del_init(&source_mnt->mnt_expire);  in attach_recursive_mnt()
2615 		if (source_mnt->mnt_ns) {  in attach_recursive_mnt()
2616 			/* move from anon - the caller will destroy */  in attach_recursive_mnt()
2617 			emptied_ns = source_mnt->mnt_ns;  in attach_recursive_mnt()
2625 	 * Now the original copy is in the same state as the secondaries -  in attach_recursive_mnt()
2631 	hlist_add_head(&source_mnt->mnt_hash, &tree_list);  in attach_recursive_mnt()
2635 		hlist_del_init(&child->mnt_hash);  in attach_recursive_mnt()
2636 		/* Notice when we are propagating across user namespaces */  in attach_recursive_mnt()
2637 		if (child->mnt_parent->mnt_ns->user_ns != user_ns)  in attach_recursive_mnt()
2639 		q = __lookup_mnt(&child->mnt_parent->mnt,  in attach_recursive_mnt()
2640 				 child->mnt_mountpoint);  in attach_recursive_mnt()
2659 		child->mnt_parent->mnt_ns->pending_mounts = 0;  in attach_recursive_mnt()
2665 	ns->pending_mounts = 0;  in attach_recursive_mnt()
2681 		m = topmost_overmount(real_mount(path->mnt));  in where_to_mount()
2682 		*dentry = m->mnt_mountpoint;  in where_to_mount()
2683 		return m->mnt_parent;  in where_to_mount()
2685 	m = __lookup_mnt(path->mnt, path->dentry);  in where_to_mount()
2688 		*dentry = m->mnt.mnt_root;  in where_to_mount()
2691 	*dentry = path->dentry;  in where_to_mount()
2692 	return real_mount(path->mnt);  in where_to_mount()
2696  * do_lock_mount - acquire environment for mounting
2701  * To mount something at given location, we need
2703  *	inode of dentry we are mounting on locked exclusive
2705  *	struct mount we are mounting on
2707  * Results are stored in caller-supplied context (pinned_mountpoint);
2708  * on success we have res->parent and res->mp pointing to parent and
2709  * mountpoint respectively and res->node inserted into the ->m_list
2711  * On failure we have res->parent set to ERR_PTR(-E...), res->mp
2712  * left NULL, res->node - empty.
2714  * proper order - inode lock nests outside of namespace_sem).
2716  * Request to mount on overmounted location is treated as "mount on
2718  * a location - "mount immediately beneath the topmost mount at that
2722  * chosen mountpoint must be allowed to be mounted on.  For "beneath"
2723  * case we also require the location to be at the root of a mount
2733 		res->parent = ERR_PTR(-EINVAL);  in do_lock_mount()
2743 			if (&m->mnt != path->mnt) {  in do_lock_mount()
2744 				mntget(&m->mnt);  in do_lock_mount()
2749 		inode_lock(dentry->d_inode);  in do_lock_mount()
2757 			err = -EAGAIN;		// something moved, retry  in do_lock_mount()
2758 		else if (unlikely(cant_mount(dentry) || !is_mounted(path->mnt)))  in do_lock_mount()
2759 			err = -ENOENT;		// not to be mounted on  in do_lock_mount()
2760 		else if (beneath && &m->mnt == path->mnt && !m->overmount)  in do_lock_mount()
2761 			err = -EINVAL;  in do_lock_mount()
2766 			res->parent = ERR_PTR(err);  in do_lock_mount()
2768 			inode_unlock(dentry->d_inode);  in do_lock_mount()
2770 			res->parent = m;  in do_lock_mount()
2773 		 * Drop the temporary references.  This is subtle - on success  in do_lock_mount()
2774 		 * we are doing that under namespace_sem, which would normally  in do_lock_mount()
2775 		 * be forbidden.  However, in that case we are guaranteed that  in do_lock_mount()
2776 		 * refcounts won't reach zero, since we know that path->mnt  in do_lock_mount()
2780 		if (&m->mnt != path->mnt) {  in do_lock_mount()
2782 			mntput(&m->mnt);  in do_lock_mount()
2784 	} while (err == -EAGAIN);  in do_lock_mount()
2789 	inode_unlock(m->mp->m_dentry->d_inode);  in __unlock_mount()
2798 	if (!IS_ERR(m->parent))  in unlock_mount()
2812 	if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)  in graft_tree()
2813 		return -EINVAL;  in graft_tree()
2815 	if (d_is_dir(mp->mp->m_dentry) !=  in graft_tree()
2816 	      d_is_dir(mnt->mnt.mnt_root))  in graft_tree()
2817 		return -ENOTDIR;  in graft_tree()
2824         struct mnt_namespace *ns = m->mnt_ns;  in may_change_propagation()  local
2827 	 if (IS_ERR_OR_NULL(ns))         // is_mounted()  in may_change_propagation()
2828 		 return -EINVAL;  in may_change_propagation()
2830 	 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))  in may_change_propagation()
2831 		 return -EPERM;  in may_change_propagation()
2843 	/* Fail if any non-propagation flags are set */  in flags_to_propagation_type()
2858 	struct mount *mnt = real_mount(path->mnt);  in do_change_type()
2864 		return -EINVAL;  in do_change_type()
2868 		return -EINVAL;  in do_change_type()
2888 /* may_copy_tree() - check if a mount tree can be copied
2892  * from @path->mnt. The caller may copy the mount tree under the
2917  *     The ownership of a non-anonymous mount namespace such as the
2919  *     => We know that the caller's mount namespace is stable.
2925  *     ==> The earlier capability check on the owning namespace of the
2933 	struct mount *mnt = real_mount(path->mnt);  in may_copy_tree()
2939 	d_op = path->dentry->d_op;  in may_copy_tree()
2946 	if (!is_mounted(path->mnt))  in may_copy_tree()
2955 	struct mount *old = real_mount(old_path->mnt);  in __do_loopback()
2958 		return ERR_PTR(-EINVAL);  in __do_loopback()
2961 		return ERR_PTR(-EINVAL);  in __do_loopback()
2963 	if (!recurse && __has_locked_children(old, old_path->dentry))  in __do_loopback()
2964 		return ERR_PTR(-EINVAL);  in __do_loopback()
2967 		return copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);  in __do_loopback()
2969 		return clone_mnt(old, old_path->dentry, 0);  in __do_loopback()
2982 		return -EINVAL;  in do_loopback()
2988 		return -EINVAL;  in do_loopback()
2995 		return -EINVAL;  in do_loopback()
3012 	struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns;  in get_detached_copy()  local
3013 	struct user_namespace *user_ns = mnt_ns->user_ns;  in get_detached_copy()
3016 	ns = alloc_mnt_ns(user_ns, true);  in get_detached_copy()
3017 	if (IS_ERR(ns))  in get_detached_copy()
3018 		return ns;  in get_detached_copy()
3027 	if (is_mounted(path->mnt)) {  in get_detached_copy()
3028 		src_mnt_ns = real_mount(path->mnt)->mnt_ns;  in get_detached_copy()
3030 			ns->seq_origin = src_mnt_ns->seq_origin;  in get_detached_copy()
3032 			ns->seq_origin = src_mnt_ns->ns.ns_id;  in get_detached_copy()
3037 		emptied_ns = ns;  in get_detached_copy()
3042 		mnt_add_to_ns(ns, p);  in get_detached_copy()
3043 		ns->nr_mounts++;  in get_detached_copy()
3045 	ns->root = mnt;  in get_detached_copy()
3046 	return ns;  in get_detached_copy()
3051 	struct mnt_namespace *ns = get_detached_copy(path, recursive);  in open_detached_copy()  local
3054 	if (IS_ERR(ns))  in open_detached_copy()
3055 		return ERR_CAST(ns);  in open_detached_copy()
3057 	mntput(path->mnt);  in open_detached_copy()
3058 	path->mnt = mntget(&ns->root->mnt);  in open_detached_copy()
3061 		dissolve_on_fput(path->mnt);  in open_detached_copy()
3063 		file->f_mode |= FMODE_NEED_UNMOUNT;  in open_detached_copy()
3079 		return ERR_PTR(-EINVAL);  in vfs_open_tree()
3082 		return ERR_PTR(-EINVAL);  in vfs_open_tree()
3092 		return ERR_PTR(-EPERM);  in vfs_open_tree()
3129 	unsigned int fl = mnt->mnt.mnt_flags;  in can_change_locked_flags()
3158 	if (readonly_request == __mnt_is_readonly(&mnt->mnt))  in change_mount_ro_state()
3164 	mnt->mnt.mnt_flags &= ~MNT_READONLY;  in change_mount_ro_state()
3170 	mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;  in set_mount_attributes()
3171 	mnt->mnt.mnt_flags = mnt_flags;  in set_mount_attributes()
3172 	touch_mnt_namespace(mnt->mnt_ns);  in set_mount_attributes()
3178 	struct super_block *sb = mnt->mnt_sb;  in mnt_warn_timestamp_expiry()
3181 	   (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&  in mnt_warn_timestamp_expiry()
3182 	   (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {  in mnt_warn_timestamp_expiry()
3189 			mntpath = ERR_PTR(-ENOMEM);  in mnt_warn_timestamp_expiry()
3194 			sb->s_type->name,  in mnt_warn_timestamp_expiry()
3196 			mntpath, &sb->s_time_max,  in mnt_warn_timestamp_expiry()
3197 			(unsigned long long)sb->s_time_max);  in mnt_warn_timestamp_expiry()
3199 		sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;  in mnt_warn_timestamp_expiry()
3212 	struct super_block *sb = path->mnt->mnt_sb;  in do_reconfigure_mnt()
3213 	struct mount *mnt = real_mount(path->mnt);  in do_reconfigure_mnt()
3217 		return -EINVAL;  in do_reconfigure_mnt()
3220 		return -EINVAL;  in do_reconfigure_mnt()
3223 		return -EPERM;  in do_reconfigure_mnt()
3226 	 * We're only checking whether the superblock is read-only not  in do_reconfigure_mnt()
3227 	 * changing it, so only take down_read(&sb->s_umount).  in do_reconfigure_mnt()
3229 	down_read(&sb->s_umount);  in do_reconfigure_mnt()
3235 	up_read(&sb->s_umount);  in do_reconfigure_mnt()
3237 	mnt_warn_timestamp_expiry(path, &mnt->mnt);  in do_reconfigure_mnt()
3244  * If you've mounted a non-root directory somewhere and want to do remount
3245  * on it - tough luck.
3251 	struct super_block *sb = path->mnt->mnt_sb;  in do_remount()
3252 	struct mount *mnt = real_mount(path->mnt);  in do_remount()
3256 		return -EINVAL;  in do_remount()
3259 		return -EINVAL;  in do_remount()
3262 		return -EPERM;  in do_remount()
3264 	fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);  in do_remount()
3272 	fc->oldapi = true;  in do_remount()
3276 		down_write(&sb->s_umount);  in do_remount()
3277 		err = -EPERM;  in do_remount()
3278 		if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {  in do_remount()
3286 		up_write(&sb->s_umount);  in do_remount()
3289 	mnt_warn_timestamp_expiry(path, &mnt->mnt);  in do_remount()
3307 	struct mount *from = real_mount(from_path->mnt);  in do_set_group()
3308 	struct mount *to = real_mount(to_path->mnt);  in do_set_group()
3322 		return -EINVAL;  in do_set_group()
3324 		return -EINVAL;  in do_set_group()
3327 	if (from->mnt.mnt_sb != to->mnt.mnt_sb)  in do_set_group()
3328 		return -EINVAL;  in do_set_group()
3331 	if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))  in do_set_group()
3332 		return -EINVAL;  in do_set_group()
3335 	if (__has_locked_children(from, to->mnt.mnt_root))  in do_set_group()
3336 		return -EINVAL;  in do_set_group()
3338 	/* Setting sharing groups is only allowed on private mounts */  in do_set_group()
3340 		return -EINVAL;  in do_set_group()
3344 		return -EINVAL;  in do_set_group()
3347 		hlist_add_behind(&to->mnt_slave, &from->mnt_slave);  in do_set_group()
3348 		to->mnt_master = from->mnt_master;  in do_set_group()
3352 		to->mnt_group_id = from->mnt_group_id;  in do_set_group()
3353 		list_add(&to->mnt_share, &from->mnt_share);  in do_set_group()
3360  * path_overmounted - check if path is overmounted
3363  * Check if path is overmounted, i.e., if there's a mount on top of
3364  * @path->mnt with @path->dentry as mountpoint.
3377 	no_child = !__lookup_mnt(path->mnt, path->dentry);  in path_overmounted()
3381 		no_child = !__lookup_mnt(path->mnt, path->dentry);  in path_overmounted()
3394 		p2 = p2->mnt_parent;  in mount_is_ancestor()
3399  * can_move_mount_beneath - check that we can mount beneath the top mount
3400  * @mnt_from: mount we are trying to move
3404  * - Make sure that nothing can be mounted beneath the caller's current
3406  * - Make sure that the caller can unmount the topmost mount ensuring
3408  * - Ensure that nothing has been mounted on top of @mnt_from before we
3410  * - Prevent mounting beneath a mount if the propagation relationship
3415  * Return: On success 0, and on error a negative error code is returned.
3421 	struct mount *parent_mnt_to = mnt_to->mnt_parent;  in can_move_mount_beneath()
3424 		return -EINVAL;  in can_move_mount_beneath()
3427 	if (mnt_from->overmount)  in can_move_mount_beneath()
3428 		return -EINVAL;  in can_move_mount_beneath()
3434 	if (&mnt_to->mnt == current->fs->root.mnt)  in can_move_mount_beneath()
3435 		return -EINVAL;  in can_move_mount_beneath()
3436 	if (parent_mnt_to == current->nsproxy->mnt_ns->root)  in can_move_mount_beneath()
3437 		return -EINVAL;  in can_move_mount_beneath()
3440 		return -EINVAL;  in can_move_mount_beneath()
3444 	 * mean mounting @mnt_from on @mnt_to->mnt_parent and then  in can_move_mount_beneath()
3445 	 * propagating a copy @c of @mnt_from on top of @mnt_to. This  in can_move_mount_beneath()
3449 		return -EINVAL;  in can_move_mount_beneath()
3452 	 * If @mnt_to->mnt_parent propagates to @mnt_from this would  in can_move_mount_beneath()
3453 	 * mean propagating a copy @c of @mnt_from on top of @mnt_from.  in can_move_mount_beneath()
3454 	 * Afterwards @mnt_from would be mounted on top of  in can_move_mount_beneath()
3455 	 * @mnt_to->mnt_parent and @mnt_to would be unmounted from  in can_move_mount_beneath()
3456 	 * @mnt->mnt_parent and remounted on @mnt_from. But since @c is  in can_move_mount_beneath()
3457 	 * already mounted on @mnt_from, @mnt_to would ultimately be  in can_move_mount_beneath()
3458 	 * remounted on top of @c. Afterwards, @mnt_from would be  in can_move_mount_beneath()
3465 		return -EINVAL;  in can_move_mount_beneath()
3470 /* may_use_mount() - check if a mount tree can be used
3474  * from @path->mnt. The caller may use the mount tree under the
3488  *     The ownership of a non-anonymous mount namespace such as the
3490  *     => We know that the caller's mount namespace is stable.
3496  *     ==> The earlier capability check on the owning namespace of the
3509 	 * managed to get their hands on something purely kernel  in may_use_mount()
3512 	if (!is_mounted(&mnt->mnt))  in may_use_mount()
3522 	struct mount *old = real_mount(old_path->mnt);  in do_move_mount()
3527 		return -EINVAL;  in do_move_mount()
3529 	if (d_is_dir(new_path->dentry) != d_is_dir(old_path->dentry))  in do_move_mount()
3530 		return -EINVAL;  in do_move_mount()
3540 			return -EINVAL;  in do_move_mount()
3542 		if (IS_MNT_SHARED(old->mnt_parent))  in do_move_mount()
3543 			return -EINVAL;  in do_move_mount()
3546 			return -EINVAL;  in do_move_mount()
3552 			return -EINVAL;  in do_move_mount()
3554 		 * Bail out early if the target is within the same namespace -  in do_move_mount()
3556 		 * some corner cases if we check it early.  in do_move_mount()
3558 		if (old->mnt_ns == mp.parent->mnt_ns)  in do_move_mount()
3559 			return -EINVAL;  in do_move_mount()
3565 			return -EINVAL;  in do_move_mount()
3569 		struct mount *over = real_mount(new_path->mnt);  in do_move_mount()
3571 		if (mp.parent != over->mnt_parent)  in do_move_mount()
3572 			over = mp.parent->overmount;  in do_move_mount()
3583 		return -EINVAL;  in do_move_mount()
3585 		return -ELOOP;  in do_move_mount()
3587 		return -ELOOP;  in do_move_mount()
3598 		return -EINVAL;  in do_move_mount_old()
3613 	struct mount *parent = mp->parent;  in do_add_mount()
3621 		/* that's acceptable only for automounts done in private ns */  in do_add_mount()
3623 			return -EINVAL;  in do_add_mount()
3624 		/* ... and for those we'd better have mountpoint still alive */  in do_add_mount()
3625 		if (!parent->mnt_ns)  in do_add_mount()
3626 			return -EINVAL;  in do_add_mount()
3629 	/* Refuse the same filesystem on the same mount point */  in do_add_mount()
3630 	if (parent->mnt.mnt_sb == newmnt->mnt.mnt_sb &&  in do_add_mount()
3631 	    parent->mnt.mnt_root == mp->mp->m_dentry)  in do_add_mount()
3632 		return -EBUSY;  in do_add_mount()
3634 	if (d_is_symlink(newmnt->mnt.mnt_root))  in do_add_mount()
3635 		return -EINVAL;  in do_add_mount()
3637 	newmnt->mnt.mnt_flags = mnt_flags;  in do_add_mount()
3657 	sb = fc->root->d_sb;  in do_new_mount_fc()
3664 		return -EPERM;  in do_new_mount_fc()
3672 		retain_and_null_ptr(mnt); // consumed on success  in do_new_mount_fc()
3690 		return -EINVAL;  in do_new_mount()
3694 		return -ENODEV;  in do_new_mount()
3696 	if (type->fs_flags & FS_HAS_SUBTYPE) {  in do_new_mount()
3702 				return -EINVAL;  in do_new_mount()
3716 	fc->oldapi = true;  in do_new_mount()
3725 		err = -EPERM;  in do_new_mount()
3736 	struct dentry *dentry = path->dentry;  in lock_mount_exact()
3739 	inode_lock(dentry->d_inode);  in lock_mount_exact()
3742 		err = -ENOENT;  in lock_mount_exact()
3744 		err = -EBUSY;  in lock_mount_exact()
3749 		inode_unlock(dentry->d_inode);  in lock_mount_exact()
3750 		mp->parent = ERR_PTR(err);  in lock_mount_exact()
3752 		mp->parent = real_mount(path->mnt);  in lock_mount_exact()
3769 	if (m->mnt_root == path->dentry)  in finish_automount()
3770 		return -ELOOP;  in finish_automount()
3773 	 * we don't want to use LOCK_MOUNT() - in this case finding something  in finish_automount()
3774 	 * that overmounts our mountpoint to be means "quitely drop what we've  in finish_automount()
3775 	 * got", not "try to mount it on top".  in finish_automount()
3778 	if (mp.parent == ERR_PTR(-EBUSY))  in finish_automount()
3781 	err = do_add_mount(mnt, &mp, path->mnt->mnt_flags | MNT_SHRINKABLE);  in finish_automount()
3788  * mnt_set_expiry - Put a mount on an expiration list
3795 	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);  in mnt_set_expiry()
3801  * mountpoints that aren't in use and haven't been touched since last we came
3817 	 * - already mounted  in mark_mounts_for_expiry()
3818 	 * - only referenced by its parent vfsmount  in mark_mounts_for_expiry()
3819 	 * - still marked for expiry (marked on the last call here; marks are  in mark_mounts_for_expiry()
3823 		if (!is_mounted(&mnt->mnt))  in mark_mounts_for_expiry()
3825 		if (!xchg(&mnt->mnt_expiry_mark, 1) ||  in mark_mounts_for_expiry()
3828 		list_move(&mnt->mnt_expire, &graveyard);  in mark_mounts_for_expiry()
3832 		touch_mnt_namespace(mnt->mnt_ns);  in mark_mounts_for_expiry()
3852 	next = this_parent->mnt_mounts.next;  in select_submounts()
3854 	while (next != &this_parent->mnt_mounts) {  in select_submounts()
3858 		next = tmp->next;  in select_submounts()
3859 		if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))  in select_submounts()
3862 		 * Descend a level if the d_mounts list is non-empty.  in select_submounts()
3864 		if (!list_empty(&mnt->mnt_mounts)) {  in select_submounts()
3870 			list_move_tail(&mnt->mnt_expire, graveyard);  in select_submounts()
3878 		next = this_parent->mnt_child.next;  in select_submounts()
3879 		this_parent = this_parent->mnt_parent;  in select_submounts()
3901 			touch_mnt_namespace(m->mnt_ns);  in shrink_submounts()
3917 		return ERR_PTR(-ENOMEM);  in copy_mount_options()
3925 	offset = PAGE_SIZE - left;  in copy_mount_options()
3931 		left--;  in copy_mount_options()
3937 		return ERR_PTR(-EFAULT);  in copy_mount_options()
3949  * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
3950  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
3953  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
3956  * Pre-0.97 versions of mount() didn't have a flags word.
3958  * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
3974 		((char *)data_page)[PAGE_SIZE - 1] = 0;  in path_mount()
3977 		return -EINVAL;  in path_mount()
3983 		return -EPERM;  in path_mount()
3991 	/* Separate the per-mountpoint flags */  in path_mount()
4014 		mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;  in path_mount()
4053 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)  in inc_mnt_namespaces()  argument
4055 	return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);  in inc_mnt_namespaces()
4063 static void free_mnt_ns(struct mnt_namespace *ns)  in free_mnt_ns()  argument
4065 	if (!is_anon_ns(ns))  in free_mnt_ns()
4066 		ns_common_free(ns);  in free_mnt_ns()
4067 	dec_mnt_namespaces(ns->ucounts);  in free_mnt_ns()
4068 	mnt_ns_tree_remove(ns);  in free_mnt_ns()
4079 		return ERR_PTR(-ENOSPC);  in alloc_mnt_ns()
4084 		return ERR_PTR(-ENOMEM);  in alloc_mnt_ns()
4097 		ns_tree_gen_id(&new_ns->ns);  in alloc_mnt_ns()
4098 	refcount_set(&new_ns->passive, 1);  in alloc_mnt_ns()
4099 	new_ns->mounts = RB_ROOT;  in alloc_mnt_ns()
4100 	init_waitqueue_head(&new_ns->poll);  in alloc_mnt_ns()
4101 	new_ns->user_ns = get_user_ns(user_ns);  in alloc_mnt_ns()
4102 	new_ns->ucounts = ucounts;  in alloc_mnt_ns()
4107 struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,  in copy_mnt_ns()  argument
4118 	BUG_ON(!ns);  in copy_mnt_ns()
4121 		get_mnt_ns(ns);  in copy_mnt_ns()
4122 		return ns;  in copy_mnt_ns()
4125 	old = ns->root;  in copy_mnt_ns()
4134 	if (user_ns != ns->user_ns)  in copy_mnt_ns()
4136 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);  in copy_mnt_ns()
4141 	if (user_ns != ns->user_ns) {  in copy_mnt_ns()
4145 	new_ns->root = new;  in copy_mnt_ns()
4148 	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts  in copy_mnt_ns()
4149 	 * as belonging to new namespace.  We have already acquired a private  in copy_mnt_ns()
4150 	 * fs_struct, so tsk->fs->lock is not needed.  in copy_mnt_ns()
4156 		new_ns->nr_mounts++;  in copy_mnt_ns()
4158 			if (&p->mnt == new_fs->root.mnt) {  in copy_mnt_ns()
4159 				new_fs->root.mnt = mntget(&q->mnt);  in copy_mnt_ns()
4160 				rootmnt = &p->mnt;  in copy_mnt_ns()
4162 			if (&p->mnt == new_fs->pwd.mnt) {  in copy_mnt_ns()
4163 				new_fs->pwd.mnt = mntget(&q->mnt);  in copy_mnt_ns()
4164 				pwdmnt = &p->mnt;  in copy_mnt_ns()
4171 		// an mntns binding we'd skipped?  in copy_mnt_ns()
4172 		while (p->mnt.mnt_root != q->mnt.mnt_root)  in copy_mnt_ns()
4182 	struct mnt_namespace *ns;  in mount_subtree()  local
4187 	ns = alloc_mnt_ns(&init_user_ns, true);  in mount_subtree()
4188 	if (IS_ERR(ns)) {  in mount_subtree()
4190 		return ERR_CAST(ns);  in mount_subtree()
4192 	ns->root = mnt;  in mount_subtree()
4193 	ns->nr_mounts++;  in mount_subtree()
4194 	mnt_add_to_ns(ns, mnt);  in mount_subtree()
4196 	err = vfs_path_lookup(m->mnt_root, m,  in mount_subtree()
4199 	put_mnt_ns(ns);  in mount_subtree()
4205 	s = path.mnt->mnt_sb;  in mount_subtree()
4206 	atomic_inc(&s->s_active);  in mount_subtree()
4209 	down_write(&s->s_umount);  in mount_subtree()
4210 	/* ... and return the root of (sub)tree on it */  in mount_subtree()
4281  * (specified by fs_fd) and attach to an open_tree-like file descriptor.
4286 	struct mnt_namespace *ns;  in SYSCALL_DEFINE3()  local
4295 		return -EPERM;  in SYSCALL_DEFINE3()
4298 		return -EINVAL;  in SYSCALL_DEFINE3()
4301 		return -EINVAL;  in SYSCALL_DEFINE3()
4315 		return -EINVAL;  in SYSCALL_DEFINE3()
4320 		return -EBADF;  in SYSCALL_DEFINE3()
4322 	if (fd_file(f)->f_op != &fscontext_fops)  in SYSCALL_DEFINE3()
4323 		return -EINVAL;  in SYSCALL_DEFINE3()
4325 	fc = fd_file(f)->private_data;  in SYSCALL_DEFINE3()
4327 	ret = mutex_lock_interruptible(&fc->uapi_mutex);  in SYSCALL_DEFINE3()
4331 	/* There must be a valid superblock or we can't mount it */  in SYSCALL_DEFINE3()
4332 	ret = -EINVAL;  in SYSCALL_DEFINE3()
4333 	if (!fc->root)  in SYSCALL_DEFINE3()
4336 	ret = -EPERM;  in SYSCALL_DEFINE3()
4337 	if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {  in SYSCALL_DEFINE3()
4342 	ret = -EBUSY;  in SYSCALL_DEFINE3()
4343 	if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)  in SYSCALL_DEFINE3()
4346 	if (fc->sb_flags & SB_MANDLOCK)  in SYSCALL_DEFINE3()
4354 	newmount.dentry = dget(fc->root);  in SYSCALL_DEFINE3()
4355 	newmount.mnt->mnt_flags = mnt_flags;  in SYSCALL_DEFINE3()
4357 	/* We've done the mount bit - now move the file context into more or  in SYSCALL_DEFINE3()
4358 	 * less the same state as if we'd done an fspick().  We don't want to  in SYSCALL_DEFINE3()
4359 	 * do any memory allocation or anything like that at this point as we  in SYSCALL_DEFINE3()
4364 	ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);  in SYSCALL_DEFINE3()
4365 	if (IS_ERR(ns)) {  in SYSCALL_DEFINE3()
4366 		ret = PTR_ERR(ns);  in SYSCALL_DEFINE3()
4370 	ns->root = mnt;  in SYSCALL_DEFINE3()
4371 	ns->nr_mounts = 1;  in SYSCALL_DEFINE3()
4372 	mnt_add_to_ns(ns, mnt);  in SYSCALL_DEFINE3()
4375 	/* Attach to an apparent O_PATH fd with a note that we need to unmount  in SYSCALL_DEFINE3()
4378 	file = dentry_open(&newmount, O_PATH, fc->cred);  in SYSCALL_DEFINE3()
4384 	file->f_mode |= FMODE_NEED_UNMOUNT;  in SYSCALL_DEFINE3()
4395 	mutex_unlock(&fc->uapi_mutex);  in SYSCALL_DEFINE3()
4437 		return -EPERM;  in SYSCALL_DEFINE5()
4440 		return -EINVAL;  in SYSCALL_DEFINE5()
4444 		return -EINVAL;  in SYSCALL_DEFINE5()
4460 			return -EBADF;  in SYSCALL_DEFINE5()
4462 		to_path = fd_file(f_to)->f_path;  in SYSCALL_DEFINE5()
4486 			return -EBADF;  in SYSCALL_DEFINE5()
4488 		return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags);  in SYSCALL_DEFINE5()
4511 	while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {  in is_path_reachable()
4512 		dentry = mnt->mnt_mountpoint;  in is_path_reachable()
4513 		mnt = mnt->mnt_parent;  in is_path_reachable()
4515 	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);  in is_path_reachable()
4521 	return is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);  in path_is_under()
4529  * root/cwd of all processes which had them on the current root to new_root.
4532  * The new_root and put_old must be directories, and  must not be on the
4534  * underneath new_root,  i.e. adding a non-zero number of /.. to the string
4536  * file system may be mounted on put_old. After all, new_root is a mountpoint.
4538  * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
4539  * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
4543  *  - we don't move root/cwd if they are not at the root (reason: if something
4545  *  - it's okay to pick a root that isn't the root of a file system, e.g.
4547  *    though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
4560 		return -EPERM;  in SYSCALL_DEFINE2()
4576 	get_fs_root(current->fs, &root);  in SYSCALL_DEFINE2()
4585 	ex_parent = new_mnt->mnt_parent;  in SYSCALL_DEFINE2()
4586 	root_parent = root_mnt->mnt_parent;  in SYSCALL_DEFINE2()
4590 		return -EINVAL;  in SYSCALL_DEFINE2()
4592 		return -EINVAL;  in SYSCALL_DEFINE2()
4593 	if (new_mnt->mnt.mnt_flags & MNT_LOCKED)  in SYSCALL_DEFINE2()
4594 		return -EINVAL;  in SYSCALL_DEFINE2()
4596 		return -ENOENT;  in SYSCALL_DEFINE2()
4598 		return -EBUSY; /* loop, on the same file system  */  in SYSCALL_DEFINE2()
4600 		return -EINVAL; /* not a mountpoint */  in SYSCALL_DEFINE2()
4602 		return -EINVAL; /* absolute root */  in SYSCALL_DEFINE2()
4604 		return -EINVAL; /* not a mountpoint */  in SYSCALL_DEFINE2()
4606 		return -EINVAL; /* absolute root */  in SYSCALL_DEFINE2()
4607 	/* make sure we can reach put_old from new_root */  in SYSCALL_DEFINE2()
4608 	if (!is_path_reachable(old_mnt, old_mp.mp->m_dentry, &new))  in SYSCALL_DEFINE2()
4609 		return -EINVAL;  in SYSCALL_DEFINE2()
4612 		return -EINVAL;  in SYSCALL_DEFINE2()
4615 	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {  in SYSCALL_DEFINE2()
4616 		new_mnt->mnt.mnt_flags |= MNT_LOCKED;  in SYSCALL_DEFINE2()
4617 		root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;  in SYSCALL_DEFINE2()
4619 	/* mount new_root on / */  in SYSCALL_DEFINE2()
4620 	attach_mnt(new_mnt, root_parent, root_mnt->mnt_mp);  in SYSCALL_DEFINE2()
4622 	/* mount old root on put_old */  in SYSCALL_DEFINE2()
4624 	touch_mnt_namespace(current->nsproxy->mnt_ns);  in SYSCALL_DEFINE2()
4626 	list_del_init(&new_mnt->mnt_expire);  in SYSCALL_DEFINE2()
4636 	unsigned int flags = mnt->mnt.mnt_flags;  in recalc_flags()
4639 	flags &= ~kattr->attr_clr;  in recalc_flags()
4641 	flags |= kattr->attr_set;  in recalc_flags()
4648 	struct vfsmount *m = &mnt->mnt;  in can_idmap_mount()
4649 	struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;  in can_idmap_mount()
4651 	if (!kattr->mnt_idmap)  in can_idmap_mount()
4656 	 * doesn't make sense so block that. We don't allow mushy semantics.  in can_idmap_mount()
4658 	if (kattr->mnt_userns == m->mnt_sb->s_user_ns)  in can_idmap_mount()
4659 		return -EINVAL;  in can_idmap_mount()
4662 	 * We only allow an mount to change it's idmapping if it has  in can_idmap_mount()
4665 	if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m))  in can_idmap_mount()
4666 		return -EPERM;  in can_idmap_mount()
4669 	if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))  in can_idmap_mount()
4670 		return -EINVAL;  in can_idmap_mount()
4673 	if (m->mnt_sb->s_iflags & SB_I_NOIDMAP)  in can_idmap_mount()
4674 		return -EINVAL;  in can_idmap_mount()
4676 	/* We're not controlling the superblock. */  in can_idmap_mount()
4678 		return -EPERM;  in can_idmap_mount()
4681 	if (!is_anon_ns(mnt->mnt_ns))  in can_idmap_mount()
4682 		return -EINVAL;  in can_idmap_mount()
4688  * mnt_allow_writers() - check whether the attribute change allows writers
4699 	return (!(kattr->attr_set & MNT_READONLY) ||  in mnt_allow_writers()
4700 		(mnt->mnt.mnt_flags & MNT_READONLY)) &&  in mnt_allow_writers()
4701 	       !kattr->mnt_idmap;  in mnt_allow_writers()
4711 			err = -EPERM;  in mount_setattr_prepare()
4727 		if (!(kattr->kflags & MOUNT_KATTR_RECURSE))  in mount_setattr_prepare()
4732 		/* undo all mnt_hold_writers() we'd done */  in mount_setattr_prepare()
4743 	if (!kattr->mnt_idmap)  in do_idmap_mount()
4746 	old_idmap = mnt_idmap(&mnt->mnt);  in do_idmap_mount()
4749 	smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));  in do_idmap_mount()
4762 		WRITE_ONCE(m->mnt.mnt_flags, flags);  in mount_setattr_commit()
4764 		/* If we had to hold writers unblock them. */  in mount_setattr_commit()
4767 		if (kattr->propagation)  in mount_setattr_commit()
4768 			change_mnt_propagation(m, kattr->propagation);  in mount_setattr_commit()
4769 		if (!(kattr->kflags & MOUNT_KATTR_RECURSE))  in mount_setattr_commit()
4772 	touch_mnt_namespace(mnt->mnt_ns);  in mount_setattr_commit()
4777 	struct mount *mnt = real_mount(path->mnt);  in do_mount_setattr()
4781 		return -EINVAL;  in do_mount_setattr()
4783 	if (kattr->mnt_userns) {  in do_mount_setattr()
4786 		mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns);  in do_mount_setattr()
4789 		kattr->mnt_idmap = mnt_idmap;  in do_mount_setattr()
4792 	if (kattr->propagation) {  in do_mount_setattr()
4794 		 * Only take namespace_lock() if we're actually changing  in do_mount_setattr()
4798 		if (kattr->propagation == MS_SHARED) {  in do_mount_setattr()
4799 			err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE);  in do_mount_setattr()
4807 	err = -EINVAL;  in do_mount_setattr()
4814 	 * First, we get the mount tree in a shape where we can change mount  in do_mount_setattr()
4815 	 * properties without failure. If we succeeded to do so we commit all  in do_mount_setattr()
4816 	 * changes and if we failed we clean up.  in do_mount_setattr()
4825 	if (kattr->propagation) {  in do_mount_setattr()
4837 	struct ns_common *ns;  in build_mount_idmapped()  local
4840 	if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))  in build_mount_idmapped()
4843 	if (attr->attr_clr & MOUNT_ATTR_IDMAP) {  in build_mount_idmapped()
4845 		 * We can only remove an idmapping if it's never been  in build_mount_idmapped()
4848 		if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE))  in build_mount_idmapped()
4849 			return -EINVAL;  in build_mount_idmapped()
4855 		if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) {  in build_mount_idmapped()
4856 			kattr->mnt_idmap = &nop_mnt_idmap;  in build_mount_idmapped()
4861 	if (attr->userns_fd > INT_MAX)  in build_mount_idmapped()
4862 		return -EINVAL;  in build_mount_idmapped()
4864 	CLASS(fd, f)(attr->userns_fd);  in build_mount_idmapped()
4866 		return -EBADF;  in build_mount_idmapped()
4869 		return -EINVAL;  in build_mount_idmapped()
4871 	ns = get_proc_ns(file_inode(fd_file(f)));  in build_mount_idmapped()
4872 	if (ns->ns_type != CLONE_NEWUSER)  in build_mount_idmapped()
4873 		return -EINVAL;  in build_mount_idmapped()
4877 	 * mount. We use the initial idmapping as an indicator of a mount  in build_mount_idmapped()
4883 	mnt_userns = container_of(ns, struct user_namespace, ns);  in build_mount_idmapped()
4885 		return -EPERM;  in build_mount_idmapped()
4887 	/* We're not controlling the target namespace. */  in build_mount_idmapped()
4889 		return -EPERM;  in build_mount_idmapped()
4891 	kattr->mnt_userns = get_user_ns(mnt_userns);  in build_mount_idmapped()
4898 	if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)  in build_mount_kattr()
4899 		return -EINVAL;  in build_mount_kattr()
4900 	if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)  in build_mount_kattr()
4901 		return -EINVAL;  in build_mount_kattr()
4902 	kattr->propagation = attr->propagation;  in build_mount_kattr()
4904 	if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)  in build_mount_kattr()
4905 		return -EINVAL;  in build_mount_kattr()
4907 	kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);  in build_mount_kattr()
4908 	kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);  in build_mount_kattr()
4919 	if (attr->attr_clr & MOUNT_ATTR__ATIME) {  in build_mount_kattr()
4920 		if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)  in build_mount_kattr()
4921 			return -EINVAL;  in build_mount_kattr()
4927 		kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;  in build_mount_kattr()
4928 		switch (attr->attr_set & MOUNT_ATTR__ATIME) {  in build_mount_kattr()
4930 			kattr->attr_set |= MNT_RELATIME;  in build_mount_kattr()
4933 			kattr->attr_set |= MNT_NOATIME;  in build_mount_kattr()
4938 			return -EINVAL;  in build_mount_kattr()
4941 		if (attr->attr_set & MOUNT_ATTR__ATIME)  in build_mount_kattr()
4942 			return -EINVAL;  in build_mount_kattr()
4950 	if (kattr->mnt_userns) {  in finish_mount_kattr()
4951 		put_user_ns(kattr->mnt_userns);  in finish_mount_kattr()
4952 		kattr->mnt_userns = NULL;  in finish_mount_kattr()
4955 	if (kattr->mnt_idmap)  in finish_mount_kattr()
4956 		mnt_idmap_put(kattr->mnt_idmap);  in finish_mount_kattr()
4968 		return -E2BIG;  in wants_mount_setattr()
4970 		return -EINVAL;  in wants_mount_setattr()
4973 		return -EPERM;  in wants_mount_setattr()
5005 		return -EINVAL;  in SYSCALL_DEFINE5()
5042 		return -EINVAL;  in SYSCALL_DEFINE5()
5059 			ret = do_mount_setattr(&file->f_path, &kattr);  in SYSCALL_DEFINE5()
5076 	if (root->d_sb->s_op->show_path)  in show_path()
5077 		return root->d_sb->s_op->show_path(m, root);  in show_path()
5083 static struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns)  in lookup_mnt_in_ns()  argument
5085 	struct mount *mnt = mnt_find_id_at(ns, id);  in lookup_mnt_in_ns()
5087 	if (!mnt || mnt->mnt_id_unique != id)  in lookup_mnt_in_ns()
5090 	return &mnt->mnt;  in lookup_mnt_in_ns()
5102 	/* Must be last --ends in a flexible-array member. */
5108 	unsigned int mnt_flags = READ_ONCE(mnt->mnt_flags);  in mnt_to_attr_flags()
5155 	struct super_block *sb = s->mnt->mnt_sb;  in statmount_sb_basic()
5157 	s->sm.mask |= STATMOUNT_SB_BASIC;  in statmount_sb_basic()
5158 	s->sm.sb_dev_major = MAJOR(sb->s_dev);  in statmount_sb_basic()
5159 	s->sm.sb_dev_minor = MINOR(sb->s_dev);  in statmount_sb_basic()
5160 	s->sm.sb_magic = sb->s_magic;  in statmount_sb_basic()
5161 	s->sm.sb_flags = sb->s_flags & (SB_RDONLY|SB_SYNCHRONOUS|SB_DIRSYNC|SB_LAZYTIME);  in statmount_sb_basic()
5166 	struct mount *m = real_mount(s->mnt);  in statmount_mnt_basic()
5168 	s->sm.mask |= STATMOUNT_MNT_BASIC;  in statmount_mnt_basic()
5169 	s->sm.mnt_id = m->mnt_id_unique;  in statmount_mnt_basic()
5170 	s->sm.mnt_parent_id = m->mnt_parent->mnt_id_unique;  in statmount_mnt_basic()
5171 	s->sm.mnt_id_old = m->mnt_id;  in statmount_mnt_basic()
5172 	s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id;  in statmount_mnt_basic()
5173 	s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt);  in statmount_mnt_basic()
5174 	s->sm.mnt_propagation = mnt_to_propagation_flags(m);  in statmount_mnt_basic()
5175 	s->sm.mnt_peer_group = m->mnt_group_id;  in statmount_mnt_basic()
5176 	s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0;  in statmount_mnt_basic()
5181 	struct mount *m = real_mount(s->mnt);  in statmount_propagate_from()
5183 	s->sm.mask |= STATMOUNT_PROPAGATE_FROM;  in statmount_propagate_from()
5185 		s->sm.propagate_from = get_dominating_id(m, &current->fs->root);  in statmount_propagate_from()
5191 	size_t start = seq->count;  in statmount_mnt_root()
5193 	ret = show_path(seq, s->mnt->mnt_root);  in statmount_mnt_root()
5198 		return -EAGAIN;  in statmount_mnt_root()
5204 	seq->buf[seq->count] = '\0';  in statmount_mnt_root()
5205 	seq->count = start;  in statmount_mnt_root()
5206 	seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL));  in statmount_mnt_root()
5212 	struct vfsmount *mnt = s->mnt;  in statmount_mnt_point()
5213 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };  in statmount_mnt_point()
5216 	err = seq_path_root(seq, &mnt_path, &s->root, "");  in statmount_mnt_point()
5222 	struct super_block *sb = s->mnt->mnt_sb;  in statmount_fs_type()
5224 	seq_puts(seq, sb->s_type->name);  in statmount_fs_type()
5230 	struct super_block *sb = s->mnt->mnt_sb;  in statmount_fs_subtype()
5232 	if (sb->s_subtype)  in statmount_fs_subtype()
5233 		seq_puts(seq, sb->s_subtype);  in statmount_fs_subtype()
5238 	struct super_block *sb = s->mnt->mnt_sb;  in statmount_sb_source()
5239 	struct mount *r = real_mount(s->mnt);  in statmount_sb_source()
5241 	if (sb->s_op->show_devname) {  in statmount_sb_source()
5242 		size_t start = seq->count;  in statmount_sb_source()
5245 		ret = sb->s_op->show_devname(seq, s->mnt->mnt_root);  in statmount_sb_source()
5250 			return -EAGAIN;  in statmount_sb_source()
5253 		seq->buf[seq->count] = '\0';  in statmount_sb_source()
5254 		seq->count = start;  in statmount_sb_source()
5255 		seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL));  in statmount_sb_source()
5257 		seq_puts(seq, r->mnt_devname);  in statmount_sb_source()
5262 static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)  in statmount_mnt_ns_id()  argument
5264 	s->sm.mask |= STATMOUNT_MNT_NS_ID;  in statmount_mnt_ns_id()
5265 	s->sm.mnt_ns_id = ns->ns.ns_id;  in statmount_mnt_ns_id()
5270 	struct vfsmount *mnt = s->mnt;  in statmount_mnt_opts()
5271 	struct super_block *sb = mnt->mnt_sb;  in statmount_mnt_opts()
5272 	size_t start = seq->count;  in statmount_mnt_opts()
5279 	if (sb->s_op->show_options) {  in statmount_mnt_opts()
5280 		err = sb->s_op->show_options(seq, mnt->mnt_root);  in statmount_mnt_opts()
5286 		return -EAGAIN;  in statmount_mnt_opts()
5288 	if (seq->count == start)  in statmount_mnt_opts()
5292 	memmove(seq->buf + start, seq->buf + start + 1,  in statmount_mnt_opts()
5293 		seq->count - start - 1);  in statmount_mnt_opts()
5294 	seq->count--;  in statmount_mnt_opts()
5305 		return -EAGAIN;  in statmount_opt_process()
5307 	buf_end = seq->buf + seq->count;  in statmount_opt_process()
5308 	dst = seq->buf + start;  in statmount_opt_process()
5312 		seq->count = start;  in statmount_opt_process()
5322 			return -EOVERFLOW;  in statmount_opt_process()
5324 	seq->count = dst - 1 - seq->buf;  in statmount_opt_process()
5330 	struct vfsmount *mnt = s->mnt;  in statmount_opt_array()
5331 	struct super_block *sb = mnt->mnt_sb;  in statmount_opt_array()
5332 	size_t start = seq->count;  in statmount_opt_array()
5335 	if (!sb->s_op->show_options)  in statmount_opt_array()
5338 	err = sb->s_op->show_options(seq, mnt->mnt_root);  in statmount_opt_array()
5346 	s->sm.opt_num = err;  in statmount_opt_array()
5352 	struct vfsmount *mnt = s->mnt;  in statmount_opt_sec_array()
5353 	struct super_block *sb = mnt->mnt_sb;  in statmount_opt_sec_array()
5354 	size_t start = seq->count;  in statmount_opt_sec_array()
5365 	s->sm.opt_sec_num = err;  in statmount_opt_sec_array()
5373 	ret = statmount_mnt_idmap(s->idmap, seq, true);  in statmount_mnt_uidmap()
5377 	s->sm.mnt_uidmap_num = ret;  in statmount_mnt_uidmap()
5381 	 * non-idmapped mount and an idmapped mount where none of the  in statmount_mnt_uidmap()
5384 	if (is_valid_mnt_idmap(s->idmap))  in statmount_mnt_uidmap()
5385 		s->sm.mask |= STATMOUNT_MNT_UIDMAP;  in statmount_mnt_uidmap()
5393 	ret = statmount_mnt_idmap(s->idmap, seq, false);  in statmount_mnt_gidmap()
5397 	s->sm.mnt_gidmap_num = ret;  in statmount_mnt_gidmap()
5401 	 * non-idmapped mount and an idmapped mount where none of the  in statmount_mnt_gidmap()
5404 	if (is_valid_mnt_idmap(s->idmap))  in statmount_mnt_gidmap()
5405 		s->sm.mask |= STATMOUNT_MNT_GIDMAP;  in statmount_mnt_gidmap()
5413 	struct seq_file *seq = &s->seq;  in statmount_string()
5414 	struct statmount *sm = &s->sm;  in statmount_string()
5418 	if (!seq->count)  in statmount_string()
5421 	start = seq->count;  in statmount_string()
5425 		offp = &sm->fs_type;  in statmount_string()
5429 		offp = &sm->mnt_root;  in statmount_string()
5433 		offp = &sm->mnt_point;  in statmount_string()
5437 		offp = &sm->mnt_opts;  in statmount_string()
5441 		offp = &sm->opt_array;  in statmount_string()
5445 		offp = &sm->opt_sec_array;  in statmount_string()
5449 		offp = &sm->fs_subtype;  in statmount_string()
5453 		offp = &sm->sb_source;  in statmount_string()
5457 		sm->mnt_uidmap = start;  in statmount_string()
5461 		sm->mnt_gidmap = start;  in statmount_string()
5466 		return -EINVAL;  in statmount_string()
5473 	if (seq->count == start)  in statmount_string()
5475 	if (unlikely(check_add_overflow(sizeof(*sm), seq->count, &kbufsize)))  in statmount_string()
5476 		return -EOVERFLOW;  in statmount_string()
5477 	if (kbufsize >= s->bufsize)  in statmount_string()
5478 		return -EOVERFLOW;  in statmount_string()
5482 		return -EAGAIN;  in statmount_string()
5487 	seq->buf[seq->count++] = '\0';  in statmount_string()
5488 	sm->mask |= flag;  in statmount_string()
5495 	struct statmount *sm = &s->sm;  in copy_statmount_to_user()
5496 	struct seq_file *seq = &s->seq;  in copy_statmount_to_user()
5497 	char __user *str = ((char __user *)s->buf) + sizeof(*sm);  in copy_statmount_to_user()
5498 	size_t copysize = min_t(size_t, s->bufsize, sizeof(*sm));  in copy_statmount_to_user()
5500 	if (seq->count && copy_to_user(str, seq->buf, seq->count))  in copy_statmount_to_user()
5501 		return -EFAULT;  in copy_statmount_to_user()
5504 	sm->size = copysize + seq->count;  in copy_statmount_to_user()
5505 	if (copy_to_user(s->buf, sm, copysize))  in copy_statmount_to_user()
5506 		return -EFAULT;  in copy_statmount_to_user()
5516 		node = rb_prev(&curr->mnt_node);  in listmnt_next()
5518 		node = rb_next(&curr->mnt_node);  in listmnt_next()
5523 static int grab_requested_root(struct mnt_namespace *ns, struct path *root)  in grab_requested_root()  argument
5529 	/* We're looking at our own ns, just use get_fs_root. */  in grab_requested_root()
5530 	if (ns == current->nsproxy->mnt_ns) {  in grab_requested_root()
5531 		get_fs_root(current->fs, root);  in grab_requested_root()
5536 	 * We have to find the first mount in our ns and use that, however it  in grab_requested_root()
5539 	if (mnt_ns_empty(ns))  in grab_requested_root()
5540 		return -ENOENT;  in grab_requested_root()
5542 	first = child = ns->root;  in grab_requested_root()
5546 			return -ENOENT;  in grab_requested_root()
5547 		if (child->mnt_parent == first)  in grab_requested_root()
5551 	root->mnt = mntget(&child->mnt);  in grab_requested_root()
5552 	root->dentry = dget(root->mnt->mnt_root);  in grab_requested_root()
5575 			struct mnt_namespace *ns)  in do_statmount()  argument
5581 	if (mnt_ns_id && mnt_ns_empty(ns))  in do_statmount()
5582 		return -ENOENT;  in do_statmount()
5584 	s->mnt = lookup_mnt_in_ns(mnt_id, ns);  in do_statmount()
5585 	if (!s->mnt)  in do_statmount()
5586 		return -ENOENT;  in do_statmount()
5588 	err = grab_requested_root(ns, &s->root);  in do_statmount()
5593 	 * Don't trigger audit denials. We just want to determine what  in do_statmount()
5596 	m = real_mount(s->mnt);  in do_statmount()
5597 	if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&  in do_statmount()
5598 	    !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))  in do_statmount()
5599 		return -EPERM;  in do_statmount()
5601 	err = security_sb_statfs(s->mnt->mnt_root);  in do_statmount()
5606 	 * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap  in do_statmount()
5607 	 * can change concurrently as we only hold the read-side of the  in do_statmount()
5611 	 * We could sample the mount lock sequence counter to detect  in do_statmount()
5613 	 * happens is that the mnt->mnt_idmap pointer is already changed  in do_statmount()
5614 	 * while mnt->mnt_flags isn't or vica versa. So what.  in do_statmount()
5616 	 * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved  in do_statmount()
5618 	 * torn read/write. That's all we care about right now.  in do_statmount()
5620 	s->idmap = mnt_idmap(s->mnt);  in do_statmount()
5621 	if (s->mask & STATMOUNT_MNT_BASIC)  in do_statmount()
5624 	if (s->mask & STATMOUNT_SB_BASIC)  in do_statmount()
5627 	if (s->mask & STATMOUNT_PROPAGATE_FROM)  in do_statmount()
5630 	if (s->mask & STATMOUNT_FS_TYPE)  in do_statmount()
5633 	if (!err && s->mask & STATMOUNT_MNT_ROOT)  in do_statmount()
5636 	if (!err && s->mask & STATMOUNT_MNT_POINT)  in do_statmount()
5639 	if (!err && s->mask & STATMOUNT_MNT_OPTS)  in do_statmount()
5642 	if (!err && s->mask & STATMOUNT_OPT_ARRAY)  in do_statmount()
5645 	if (!err && s->mask & STATMOUNT_OPT_SEC_ARRAY)  in do_statmount()
5648 	if (!err && s->mask & STATMOUNT_FS_SUBTYPE)  in do_statmount()
5651 	if (!err && s->mask & STATMOUNT_SB_SOURCE)  in do_statmount()
5654 	if (!err && s->mask & STATMOUNT_MNT_UIDMAP)  in do_statmount()
5657 	if (!err && s->mask & STATMOUNT_MNT_GIDMAP)  in do_statmount()
5660 	if (!err && s->mask & STATMOUNT_MNT_NS_ID)  in do_statmount()
5661 		statmount_mnt_ns_id(s, ns);  in do_statmount()
5663 	if (!err && s->mask & STATMOUNT_SUPPORTED_MASK) {  in do_statmount()
5664 		s->sm.mask |= STATMOUNT_SUPPORTED_MASK;  in do_statmount()
5665 		s->sm.supported_mask = STATMOUNT_SUPPORTED;  in do_statmount()
5672 	WARN_ON_ONCE(~STATMOUNT_SUPPORTED & s->sm.mask);  in do_statmount()
5679 	if (likely(ret != -EAGAIN))  in retry_statmount()
5699 		return -EFAULT;  in prepare_kstatmount()
5702 	ks->mask = kreq->param;  in prepare_kstatmount()
5703 	ks->buf = buf;  in prepare_kstatmount()
5704 	ks->bufsize = bufsize;  in prepare_kstatmount()
5706 	if (ks->mask & STATMOUNT_STRING_REQ) {  in prepare_kstatmount()
5707 		if (bufsize == sizeof(ks->sm))  in prepare_kstatmount()
5708 			return -EOVERFLOW;  in prepare_kstatmount()
5710 		ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);  in prepare_kstatmount()
5711 		if (!ks->seq.buf)  in prepare_kstatmount()
5712 			return -ENOMEM;  in prepare_kstatmount()
5714 		ks->seq.size = seq_size;  in prepare_kstatmount()
5728 	ret = get_user(usize, &req->size);  in copy_mnt_id_req()
5730 		return -EFAULT;  in copy_mnt_id_req()
5732 		return -E2BIG;  in copy_mnt_id_req()
5734 		return -EINVAL;  in copy_mnt_id_req()
5739 	if (kreq->spare != 0)  in copy_mnt_id_req()
5740 		return -EINVAL;  in copy_mnt_id_req()
5742 	if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)  in copy_mnt_id_req()
5743 		return -EINVAL;  in copy_mnt_id_req()
5749  * that, or if not simply grab a passive reference on our mount namespace and
5756 	if (kreq->mnt_ns_id && kreq->spare)  in grab_requested_mnt_ns()
5757 		return ERR_PTR(-EINVAL);  in grab_requested_mnt_ns()
5759 	if (kreq->mnt_ns_id)  in grab_requested_mnt_ns()
5760 		return lookup_mnt_ns(kreq->mnt_ns_id);  in grab_requested_mnt_ns()
5762 	if (kreq->spare) {  in grab_requested_mnt_ns()
5763 		struct ns_common *ns;  in grab_requested_mnt_ns()  local
5765 		CLASS(fd, f)(kreq->spare);  in grab_requested_mnt_ns()
5767 			return ERR_PTR(-EBADF);  in grab_requested_mnt_ns()
5770 			return ERR_PTR(-EINVAL);  in grab_requested_mnt_ns()
5772 		ns = get_proc_ns(file_inode(fd_file(f)));  in grab_requested_mnt_ns()
5773 		if (ns->ns_type != CLONE_NEWNS)  in grab_requested_mnt_ns()
5774 			return ERR_PTR(-EINVAL);  in grab_requested_mnt_ns()
5776 		mnt_ns = to_mnt_ns(ns);  in grab_requested_mnt_ns()
5778 		mnt_ns = current->nsproxy->mnt_ns;  in grab_requested_mnt_ns()
5781 	refcount_inc(&mnt_ns->passive);  in grab_requested_mnt_ns()
5789 	struct mnt_namespace *ns __free(mnt_ns_release) = NULL;  in SYSCALL_DEFINE4()
5792 	/* We currently support retrieval of 3 strings. */  in SYSCALL_DEFINE4()
5797 		return -EINVAL;  in SYSCALL_DEFINE4()
5803 	ns = grab_requested_mnt_ns(&kreq);  in SYSCALL_DEFINE4()
5804 	if (!ns)  in SYSCALL_DEFINE4()
5805 		return -ENOENT;  in SYSCALL_DEFINE4()
5807 	if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&  in SYSCALL_DEFINE4()
5808 	    !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))  in SYSCALL_DEFINE4()
5809 		return -ENOENT;  in SYSCALL_DEFINE4()
5813 		return -ENOMEM;  in SYSCALL_DEFINE4()
5821 		ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);  in SYSCALL_DEFINE4()
5825 	kvfree(ks->seq.buf);  in SYSCALL_DEFINE4()
5826 	path_put(&ks->root);  in SYSCALL_DEFINE4()
5837 	struct mnt_namespace *ns;  member
5844 	struct mnt_namespace *ns = kls->ns;  in do_listmount()  local
5845 	u64 mnt_parent_id = kls->mnt_parent_id;  in do_listmount()
5846 	u64 last_mnt_id = kls->last_mnt_id;  in do_listmount()
5847 	u64 *mnt_ids = kls->kmnt_ids;  in do_listmount()
5848 	size_t nr_mnt_ids = kls->nr_mnt_ids;  in do_listmount()
5855 	ret = grab_requested_root(ns, &kls->root);  in do_listmount()
5860 		orig = kls->root;  in do_listmount()
5862 		orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);  in do_listmount()
5864 			return -ENOENT;  in do_listmount()
5865 		orig.dentry = orig.mnt->mnt_root;  in do_listmount()
5869 	 * Don't trigger audit denials. We just want to determine what  in do_listmount()
5872 	if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &kls->root) &&  in do_listmount()
5873 	    !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))  in do_listmount()
5874 		return -EPERM;  in do_listmount()
5882 			first = node_to_mount(ns->mnt_last_node);  in do_listmount()
5884 			first = node_to_mount(ns->mnt_first_node);  in do_listmount()
5887 			first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);  in do_listmount()
5889 			first = mnt_find_id_at(ns, last_mnt_id + 1);  in do_listmount()
5893 		if (r->mnt_id_unique == mnt_parent_id)  in do_listmount()
5895 		if (!is_path_reachable(r, r->mnt.mnt_root, &orig))  in do_listmount()
5897 		*mnt_ids = r->mnt_id_unique;  in do_listmount()
5899 		nr_mnt_ids--;  in do_listmount()
5907 	path_put(&kls->root);  in __free_klistmount_free()
5908 	kvfree(kls->kmnt_ids);  in __free_klistmount_free()
5909 	mnt_ns_release(kls->ns);  in __free_klistmount_free()
5916 	u64 last_mnt_id = kreq->param;  in prepare_klistmount()
5920 		return -EINVAL;  in prepare_klistmount()
5922 	kls->last_mnt_id = last_mnt_id;  in prepare_klistmount()
5924 	kls->nr_mnt_ids = nr_mnt_ids;  in prepare_klistmount()
5925 	kls->kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kls->kmnt_ids),  in prepare_klistmount()
5927 	if (!kls->kmnt_ids)  in prepare_klistmount()
5928 		return -ENOMEM;  in prepare_klistmount()
5930 	kls->ns = grab_requested_mnt_ns(kreq);  in prepare_klistmount()
5931 	if (!kls->ns)  in prepare_klistmount()
5932 		return -ENOENT;  in prepare_klistmount()
5934 	kls->mnt_parent_id = kreq->mnt_id;  in prepare_klistmount()
5947 		return -EINVAL;  in SYSCALL_DEFINE4()
5955 		return -EOVERFLOW;  in SYSCALL_DEFINE4()
5958 		return -EFAULT;  in SYSCALL_DEFINE4()
5968 	if (kreq.mnt_ns_id && (kls.ns != current->nsproxy->mnt_ns) &&  in SYSCALL_DEFINE4()
5969 	    !ns_capable_noaudit(kls.ns->user_ns, CAP_SYS_ADMIN))  in SYSCALL_DEFINE4()
5970 		return -ENOENT;  in SYSCALL_DEFINE4()
5973 	 * We only need to guard against mount topology changes as  in SYSCALL_DEFINE4()
5982 		return -EFAULT;  in SYSCALL_DEFINE4()
5988 	.ns.inum	= ns_init_inum(&init_mnt_ns),
5989 	.ns.ops		= &mntns_operations,
5991 	.ns.__ns_ref	= REFCOUNT_INIT(1),
5992 	.ns.ns_type	= ns_common_type(&init_mnt_ns),
6012 	init_task.nsproxy->mnt_ns = &init_mnt_ns;  in init_mount_tree()
6016 	root.dentry = mnt->mnt_root;  in init_mount_tree()
6018 	set_fs_pwd(current->fs, &root);  in init_mount_tree()
6019 	set_fs_root(current->fs, &root);  in init_mount_tree()
6031 	mount_hashtable = alloc_large_system_hash("Mount-cache",  in mnt_init()
6036 	mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",  in mnt_init()
6059 void put_mnt_ns(struct mnt_namespace *ns)  in put_mnt_ns()  argument
6061 	if (!ns_ref_put(ns))  in put_mnt_ns()
6064 	emptied_ns = ns;  in put_mnt_ns()
6066 	umount_tree(ns->root, 0);  in put_mnt_ns()
6072 	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);  in kern_mount()
6076 		 * we unmount before file sys is unregistered  in kern_mount()
6078 		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;  in kern_mount()
6114 	/* Does the current process have a non-standard root */  in current_chrooted()
6118 	get_fs_root(current->fs, &fs_root);  in current_chrooted()
6124 	root = topmost_overmount(current->nsproxy->mnt_ns->root);  in current_chrooted()
6126 	return fs_root.mnt != &root->mnt || !path_mounted(&fs_root);  in current_chrooted()
6129 static bool mnt_already_visible(struct mnt_namespace *ns,  in mnt_already_visible()  argument
6137 	rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {  in mnt_already_visible()
6141 		if (mnt->mnt.mnt_sb->s_type != sb->s_type)  in mnt_already_visible()
6147 		if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)  in mnt_already_visible()
6151 		mnt_flags = mnt->mnt.mnt_flags;  in mnt_already_visible()
6154 		if (sb_rdonly(mnt->mnt.mnt_sb))  in mnt_already_visible()
6171 		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {  in mnt_already_visible()
6172 			struct inode *inode = child->mnt_mountpoint->d_inode;  in mnt_already_visible()
6174 			if (!(child->mnt.mnt_flags & MNT_LOCKED))  in mnt_already_visible()
6192 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;  in mount_too_revealing()  local
6195 	if (ns->user_ns == &init_user_ns)  in mount_too_revealing()
6199 	s_iflags = sb->s_iflags;  in mount_too_revealing()
6209 	return !mnt_already_visible(ns, sb, new_mnt_flags);  in mount_too_revealing()
6221 	return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&  in mnt_may_suid()
6222 	       current_in_userns(mnt->mnt_sb->s_user_ns);  in mnt_may_suid()
6227 	struct ns_common *ns = NULL;  in mntns_get()  local
6231 	nsproxy = task->nsproxy;  in mntns_get()
6233 		ns = &nsproxy->mnt_ns->ns;  in mntns_get()
6234 		get_mnt_ns(to_mnt_ns(ns));  in mntns_get()
6238 	return ns;  in mntns_get()
6241 static void mntns_put(struct ns_common *ns)  in mntns_put()  argument
6243 	put_mnt_ns(to_mnt_ns(ns));  in mntns_put()
6246 static int mntns_install(struct nsset *nsset, struct ns_common *ns)  in mntns_install()  argument
6248 	struct nsproxy *nsproxy = nsset->nsproxy;  in mntns_install()
6249 	struct fs_struct *fs = nsset->fs;  in mntns_install()
6250 	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;  in mntns_install()
6251 	struct user_namespace *user_ns = nsset->cred->user_ns;  in mntns_install()
6255 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||  in mntns_install()
6258 		return -EPERM;  in mntns_install()
6261 		return -EINVAL;  in mntns_install()
6263 	if (fs->users != 1)  in mntns_install()
6264 		return -EINVAL;  in mntns_install()
6267 	old_mnt_ns = nsproxy->mnt_ns;  in mntns_install()
6268 	nsproxy->mnt_ns = mnt_ns;  in mntns_install()
6271 	err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,  in mntns_install()
6275 		nsproxy->mnt_ns = old_mnt_ns;  in mntns_install()
6290 static struct user_namespace *mntns_owner(struct ns_common *ns)  in mntns_owner()  argument
6292 	return to_mnt_ns(ns)->user_ns;  in mntns_owner()
6306 		.procname	= "mount-max",