xref: /linux/fs/pnode.c (revision 34bec35cbbb23e5fd18100f2a2b217ebb6cb129c)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/pnode.c
4   *
5   * (C) Copyright IBM Corporation 2005.
6   *	Author : Ram Pai (linuxram@us.ibm.com)
7   */
8  #include <linux/mnt_namespace.h>
9  #include <linux/mount.h>
10  #include <linux/fs.h>
11  #include <linux/nsproxy.h>
12  #include <uapi/linux/mount.h>
13  #include "internal.h"
14  #include "pnode.h"
15  
16  /* return the next shared peer mount of @p */
17  static inline struct mount *next_peer(struct mount *p)
18  {
19  	return list_entry(p->mnt_share.next, struct mount, mnt_share);
20  }
21  
22  static inline struct mount *first_slave(struct mount *p)
23  {
24  	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
25  }
26  
27  static inline struct mount *last_slave(struct mount *p)
28  {
29  	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
30  }
31  
32  static inline struct mount *next_slave(struct mount *p)
33  {
34  	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
35  }
36  
37  static struct mount *get_peer_under_root(struct mount *mnt,
38  					 struct mnt_namespace *ns,
39  					 const struct path *root)
40  {
41  	struct mount *m = mnt;
42  
43  	do {
44  		/* Check the namespace first for optimization */
45  		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
46  			return m;
47  
48  		m = next_peer(m);
49  	} while (m != mnt);
50  
51  	return NULL;
52  }
53  
54  /*
55   * Get ID of closest dominating peer group having a representative
56   * under the given root.
57   *
58   * Caller must hold namespace_sem
59   */
60  int get_dominating_id(struct mount *mnt, const struct path *root)
61  {
62  	struct mount *m;
63  
64  	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
65  		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
66  		if (d)
67  			return d->mnt_group_id;
68  	}
69  
70  	return 0;
71  }
72  
73  static int do_make_slave(struct mount *mnt)
74  {
75  	struct mount *master, *slave_mnt;
76  
77  	if (list_empty(&mnt->mnt_share)) {
78  		if (IS_MNT_SHARED(mnt)) {
79  			mnt_release_group_id(mnt);
80  			CLEAR_MNT_SHARED(mnt);
81  		}
82  		master = mnt->mnt_master;
83  		if (!master) {
84  			struct list_head *p = &mnt->mnt_slave_list;
85  			while (!list_empty(p)) {
86  				slave_mnt = list_first_entry(p,
87  						struct mount, mnt_slave);
88  				list_del_init(&slave_mnt->mnt_slave);
89  				slave_mnt->mnt_master = NULL;
90  			}
91  			return 0;
92  		}
93  	} else {
94  		struct mount *m;
95  		/*
96  		 * slave 'mnt' to a peer mount that has the
97  		 * same root dentry. If none is available then
98  		 * slave it to anything that is available.
99  		 */
100  		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
101  			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
102  				master = m;
103  				break;
104  			}
105  		}
106  		list_del_init(&mnt->mnt_share);
107  		mnt->mnt_group_id = 0;
108  		CLEAR_MNT_SHARED(mnt);
109  	}
110  	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
111  		slave_mnt->mnt_master = master;
112  	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
113  	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
114  	INIT_LIST_HEAD(&mnt->mnt_slave_list);
115  	mnt->mnt_master = master;
116  	return 0;
117  }
118  
119  /*
120   * vfsmount lock must be held for write
121   */
122  void change_mnt_propagation(struct mount *mnt, int type)
123  {
124  	if (type == MS_SHARED) {
125  		set_mnt_shared(mnt);
126  		return;
127  	}
128  	do_make_slave(mnt);
129  	if (type != MS_SLAVE) {
130  		list_del_init(&mnt->mnt_slave);
131  		mnt->mnt_master = NULL;
132  		if (type == MS_UNBINDABLE)
133  			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
134  		else
135  			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
136  	}
137  }
138  
139  /*
140   * get the next mount in the propagation tree.
141   * @m: the mount seen last
142   * @origin: the original mount from where the tree walk initiated
143   *
144   * Note that peer groups form contiguous segments of slave lists.
145   * We rely on that in get_source() to be able to find out if
146   * vfsmount found while iterating with propagation_next() is
147   * a peer of one we'd found earlier.
148   */
149  static struct mount *propagation_next(struct mount *m,
150  					 struct mount *origin)
151  {
152  	/* are there any slaves of this mount? */
153  	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
154  		return first_slave(m);
155  
156  	while (1) {
157  		struct mount *master = m->mnt_master;
158  
159  		if (master == origin->mnt_master) {
160  			struct mount *next = next_peer(m);
161  			return (next == origin) ? NULL : next;
162  		} else if (m->mnt_slave.next != &master->mnt_slave_list)
163  			return next_slave(m);
164  
165  		/* back at master */
166  		m = master;
167  	}
168  }
169  
170  static struct mount *skip_propagation_subtree(struct mount *m,
171  						struct mount *origin)
172  {
173  	/*
174  	 * Advance m such that propagation_next will not return
175  	 * the slaves of m.
176  	 */
177  	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
178  		m = last_slave(m);
179  
180  	return m;
181  }
182  
183  static struct mount *next_group(struct mount *m, struct mount *origin)
184  {
185  	while (1) {
186  		while (1) {
187  			struct mount *next;
188  			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
189  				return first_slave(m);
190  			next = next_peer(m);
191  			if (m->mnt_group_id == origin->mnt_group_id) {
192  				if (next == origin)
193  					return NULL;
194  			} else if (m->mnt_slave.next != &next->mnt_slave)
195  				break;
196  			m = next;
197  		}
198  		/* m is the last peer */
199  		while (1) {
200  			struct mount *master = m->mnt_master;
201  			if (m->mnt_slave.next != &master->mnt_slave_list)
202  				return next_slave(m);
203  			m = next_peer(master);
204  			if (master->mnt_group_id == origin->mnt_group_id)
205  				break;
206  			if (master->mnt_slave.next == &m->mnt_slave)
207  				break;
208  			m = master;
209  		}
210  		if (m == origin)
211  			return NULL;
212  	}
213  }
214  
215  /* all accesses are serialized by namespace_sem */
216  static struct mount *last_dest, *first_source, *last_source, *dest_master;
217  static struct mountpoint *mp;
218  static struct hlist_head *list;
219  
220  static inline bool peers(struct mount *m1, struct mount *m2)
221  {
222  	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
223  }
224  
225  static int propagate_one(struct mount *m)
226  {
227  	struct mount *child;
228  	int type;
229  	/* skip ones added by this propagate_mnt() */
230  	if (IS_MNT_NEW(m))
231  		return 0;
232  	/* skip if mountpoint isn't covered by it */
233  	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
234  		return 0;
235  	if (peers(m, last_dest)) {
236  		type = CL_MAKE_SHARED;
237  	} else {
238  		struct mount *n, *p;
239  		bool done;
240  		for (n = m; ; n = p) {
241  			p = n->mnt_master;
242  			if (p == dest_master || IS_MNT_MARKED(p))
243  				break;
244  		}
245  		do {
246  			struct mount *parent = last_source->mnt_parent;
247  			if (last_source == first_source)
248  				break;
249  			done = parent->mnt_master == p;
250  			if (done && peers(n, parent))
251  				break;
252  			last_source = last_source->mnt_master;
253  		} while (!done);
254  
255  		type = CL_SLAVE;
256  		/* beginning of peer group among the slaves? */
257  		if (IS_MNT_SHARED(m))
258  			type |= CL_MAKE_SHARED;
259  	}
260  
261  	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
262  	if (IS_ERR(child))
263  		return PTR_ERR(child);
264  	read_seqlock_excl(&mount_lock);
265  	mnt_set_mountpoint(m, mp, child);
266  	if (m->mnt_master != dest_master)
267  		SET_MNT_MARK(m->mnt_master);
268  	read_sequnlock_excl(&mount_lock);
269  	last_dest = m;
270  	last_source = child;
271  	hlist_add_head(&child->mnt_hash, list);
272  	return count_mounts(m->mnt_ns, child);
273  }
274  
275  /*
276   * mount 'source_mnt' under the destination 'dest_mnt' at
277   * dentry 'dest_dentry'. And propagate that mount to
278   * all the peer and slave mounts of 'dest_mnt'.
279   * Link all the new mounts into a propagation tree headed at
280   * source_mnt. Also link all the new mounts using ->mnt_list
281   * headed at source_mnt's ->mnt_list
282   *
283   * @dest_mnt: destination mount.
284   * @dest_dentry: destination dentry.
285   * @source_mnt: source mount.
286   * @tree_list : list of heads of trees to be attached.
287   */
288  int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
289  		    struct mount *source_mnt, struct hlist_head *tree_list)
290  {
291  	struct mount *m, *n;
292  	int ret = 0;
293  
294  	/*
295  	 * we don't want to bother passing tons of arguments to
296  	 * propagate_one(); everything is serialized by namespace_sem,
297  	 * so globals will do just fine.
298  	 */
299  	last_dest = dest_mnt;
300  	first_source = source_mnt;
301  	last_source = source_mnt;
302  	mp = dest_mp;
303  	list = tree_list;
304  	dest_master = dest_mnt->mnt_master;
305  
306  	/* all peers of dest_mnt, except dest_mnt itself */
307  	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
308  		ret = propagate_one(n);
309  		if (ret)
310  			goto out;
311  	}
312  
313  	/* all slave groups */
314  	for (m = next_group(dest_mnt, dest_mnt); m;
315  			m = next_group(m, dest_mnt)) {
316  		/* everything in that slave group */
317  		n = m;
318  		do {
319  			ret = propagate_one(n);
320  			if (ret)
321  				goto out;
322  			n = next_peer(n);
323  		} while (n != m);
324  	}
325  out:
326  	read_seqlock_excl(&mount_lock);
327  	hlist_for_each_entry(n, tree_list, mnt_hash) {
328  		m = n->mnt_parent;
329  		if (m->mnt_master != dest_mnt->mnt_master)
330  			CLEAR_MNT_MARK(m->mnt_master);
331  	}
332  	read_sequnlock_excl(&mount_lock);
333  	return ret;
334  }
335  
336  static struct mount *find_topper(struct mount *mnt)
337  {
338  	/* If there is exactly one mount covering mnt completely return it. */
339  	struct mount *child;
340  
341  	if (!list_is_singular(&mnt->mnt_mounts))
342  		return NULL;
343  
344  	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
345  	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
346  		return NULL;
347  
348  	return child;
349  }
350  
351  /*
352   * return true if the refcount is greater than count
353   */
354  static inline int do_refcount_check(struct mount *mnt, int count)
355  {
356  	return mnt_get_count(mnt) > count;
357  }
358  
359  /*
360   * check if the mount 'mnt' can be unmounted successfully.
361   * @mnt: the mount to be checked for unmount
362   * NOTE: unmounting 'mnt' would naturally propagate to all
363   * other mounts its parent propagates to.
364   * Check if any of these mounts that **do not have submounts**
365   * have more references than 'refcnt'. If so return busy.
366   *
367   * vfsmount lock must be held for write
368   */
369  int propagate_mount_busy(struct mount *mnt, int refcnt)
370  {
371  	struct mount *m, *child, *topper;
372  	struct mount *parent = mnt->mnt_parent;
373  
374  	if (mnt == parent)
375  		return do_refcount_check(mnt, refcnt);
376  
377  	/*
378  	 * quickly check if the current mount can be unmounted.
379  	 * If not, we don't have to go checking for all other
380  	 * mounts
381  	 */
382  	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
383  		return 1;
384  
385  	for (m = propagation_next(parent, parent); m;
386  	     		m = propagation_next(m, parent)) {
387  		int count = 1;
388  		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
389  		if (!child)
390  			continue;
391  
392  		/* Is there exactly one mount on the child that covers
393  		 * it completely whose reference should be ignored?
394  		 */
395  		topper = find_topper(child);
396  		if (topper)
397  			count += 1;
398  		else if (!list_empty(&child->mnt_mounts))
399  			continue;
400  
401  		if (do_refcount_check(child, count))
402  			return 1;
403  	}
404  	return 0;
405  }
406  
407  /*
408   * Clear MNT_LOCKED when it can be shown to be safe.
409   *
410   * mount_lock lock must be held for write
411   */
412  void propagate_mount_unlock(struct mount *mnt)
413  {
414  	struct mount *parent = mnt->mnt_parent;
415  	struct mount *m, *child;
416  
417  	BUG_ON(parent == mnt);
418  
419  	for (m = propagation_next(parent, parent); m;
420  			m = propagation_next(m, parent)) {
421  		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
422  		if (child)
423  			child->mnt.mnt_flags &= ~MNT_LOCKED;
424  	}
425  }
426  
427  static void umount_one(struct mount *mnt, struct list_head *to_umount)
428  {
429  	CLEAR_MNT_MARK(mnt);
430  	mnt->mnt.mnt_flags |= MNT_UMOUNT;
431  	list_del_init(&mnt->mnt_child);
432  	list_del_init(&mnt->mnt_umounting);
433  	list_move_tail(&mnt->mnt_list, to_umount);
434  }
435  
436  /*
437   * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
438   * parent propagates to.
439   */
440  static bool __propagate_umount(struct mount *mnt,
441  			       struct list_head *to_umount,
442  			       struct list_head *to_restore)
443  {
444  	bool progress = false;
445  	struct mount *child;
446  
447  	/*
448  	 * The state of the parent won't change if this mount is
449  	 * already unmounted or marked as without children.
450  	 */
451  	if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
452  		goto out;
453  
454  	/* Verify topper is the only grandchild that has not been
455  	 * speculatively unmounted.
456  	 */
457  	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
458  		if (child->mnt_mountpoint == mnt->mnt.mnt_root)
459  			continue;
460  		if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
461  			continue;
462  		/* Found a mounted child */
463  		goto children;
464  	}
465  
466  	/* Mark mounts that can be unmounted if not locked */
467  	SET_MNT_MARK(mnt);
468  	progress = true;
469  
470  	/* If a mount is without children and not locked umount it. */
471  	if (!IS_MNT_LOCKED(mnt)) {
472  		umount_one(mnt, to_umount);
473  	} else {
474  children:
475  		list_move_tail(&mnt->mnt_umounting, to_restore);
476  	}
477  out:
478  	return progress;
479  }
480  
481  static void umount_list(struct list_head *to_umount,
482  			struct list_head *to_restore)
483  {
484  	struct mount *mnt, *child, *tmp;
485  	list_for_each_entry(mnt, to_umount, mnt_list) {
486  		list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
487  			/* topper? */
488  			if (child->mnt_mountpoint == mnt->mnt.mnt_root)
489  				list_move_tail(&child->mnt_umounting, to_restore);
490  			else
491  				umount_one(child, to_umount);
492  		}
493  	}
494  }
495  
496  static void restore_mounts(struct list_head *to_restore)
497  {
498  	/* Restore mounts to a clean working state */
499  	while (!list_empty(to_restore)) {
500  		struct mount *mnt, *parent;
501  		struct mountpoint *mp;
502  
503  		mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
504  		CLEAR_MNT_MARK(mnt);
505  		list_del_init(&mnt->mnt_umounting);
506  
507  		/* Should this mount be reparented? */
508  		mp = mnt->mnt_mp;
509  		parent = mnt->mnt_parent;
510  		while (parent->mnt.mnt_flags & MNT_UMOUNT) {
511  			mp = parent->mnt_mp;
512  			parent = parent->mnt_parent;
513  		}
514  		if (parent != mnt->mnt_parent)
515  			mnt_change_mountpoint(parent, mp, mnt);
516  	}
517  }
518  
519  static void cleanup_umount_visitations(struct list_head *visited)
520  {
521  	while (!list_empty(visited)) {
522  		struct mount *mnt =
523  			list_first_entry(visited, struct mount, mnt_umounting);
524  		list_del_init(&mnt->mnt_umounting);
525  	}
526  }
527  
528  /*
529   * collect all mounts that receive propagation from the mount in @list,
530   * and return these additional mounts in the same list.
531   * @list: the list of mounts to be unmounted.
532   *
533   * vfsmount lock must be held for write
534   */
535  int propagate_umount(struct list_head *list)
536  {
537  	struct mount *mnt;
538  	LIST_HEAD(to_restore);
539  	LIST_HEAD(to_umount);
540  	LIST_HEAD(visited);
541  
542  	/* Find candidates for unmounting */
543  	list_for_each_entry_reverse(mnt, list, mnt_list) {
544  		struct mount *parent = mnt->mnt_parent;
545  		struct mount *m;
546  
547  		/*
548  		 * If this mount has already been visited it is known that it's
549  		 * entire peer group and all of their slaves in the propagation
550  		 * tree for the mountpoint has already been visited and there is
551  		 * no need to visit them again.
552  		 */
553  		if (!list_empty(&mnt->mnt_umounting))
554  			continue;
555  
556  		list_add_tail(&mnt->mnt_umounting, &visited);
557  		for (m = propagation_next(parent, parent); m;
558  		     m = propagation_next(m, parent)) {
559  			struct mount *child = __lookup_mnt(&m->mnt,
560  							   mnt->mnt_mountpoint);
561  			if (!child)
562  				continue;
563  
564  			if (!list_empty(&child->mnt_umounting)) {
565  				/*
566  				 * If the child has already been visited it is
567  				 * know that it's entire peer group and all of
568  				 * their slaves in the propgation tree for the
569  				 * mountpoint has already been visited and there
570  				 * is no need to visit this subtree again.
571  				 */
572  				m = skip_propagation_subtree(m, parent);
573  				continue;
574  			} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
575  				/*
576  				 * We have come accross an partially unmounted
577  				 * mount in list that has not been visited yet.
578  				 * Remember it has been visited and continue
579  				 * about our merry way.
580  				 */
581  				list_add_tail(&child->mnt_umounting, &visited);
582  				continue;
583  			}
584  
585  			/* Check the child and parents while progress is made */
586  			while (__propagate_umount(child,
587  						  &to_umount, &to_restore)) {
588  				/* Is the parent a umount candidate? */
589  				child = child->mnt_parent;
590  				if (list_empty(&child->mnt_umounting))
591  					break;
592  			}
593  		}
594  	}
595  
596  	umount_list(&to_umount, &to_restore);
597  	restore_mounts(&to_restore);
598  	cleanup_umount_visitations(&visited);
599  	list_splice_tail(&to_umount, list);
600  
601  	return 0;
602  }
603