xref: /linux/fs/filesystems.c (revision 056a5087d87ead77dedbe9cf5bde53b7cd4b4651)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/filesystems.c
4  *
5  *  Copyright (C) 1991, 1992  Linus Torvalds
6  *
7  *  table of configured filesystems
8  */
9 
10 #include <linux/syscalls.h>
11 #include <linux/fs.h>
12 #include <linux/proc_fs.h>
13 #include <linux/seq_file.h>
14 #include <linux/kmod.h>
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/slab.h>
18 #include <linux/uaccess.h>
19 #include <linux/fs_parser.h>
20 #include <linux/rculist.h>
21 
22 /*
23  * Read-mostly filesystem drivers list.
24  *
25  * Readers walk under rcu_read_lock(); writers take file_systems_lock
26  * and publish via _rcu hlist primitives.  unregister_filesystem()
27  * synchronize_rcu()s after unlock so the embedded file_system_type
28  * can't go away under a reader.  To keep using a filesystem after
29  * the RCU section ends, take a module reference via try_module_get().
30  */
31 static HLIST_HEAD(file_systems);
32 static DEFINE_SPINLOCK(file_systems_lock);
33 
34 #ifdef CONFIG_PROC_FS
35 /*
36  * Cache a stringified version of the filesystem list.
37  *
38  * The fs list gets queried a lot by userspace because of libselinux, including
39  * rather surprising programs (would you guess *sed* is on the list?). In order
40  * to reduce the overhead we cache the resulting string, which normally hangs
41  * around below 512 bytes in size.
42  *
43  * As the list almost never changes, its creation is not particularly optimized
44  * to keep things simple.
45  *
46  * We sort it out on read in order to not introduce a failure point for fs
47  * registration (in principle we may be unable to alloc memory for the list).
48  */
49 struct file_systems_string {
50 	struct rcu_head rcu;
51 	unsigned long gen;
52 	size_t len;
53 	char string[];
54 };
55 
56 static unsigned long file_systems_gen;
57 static struct file_systems_string __read_mostly __rcu *file_systems_string;
58 
59 static void invalidate_filesystems_string(void);
60 #else
61 static inline void invalidate_filesystems_string(void) { }
62 #endif
63 
64 /* WARNING: This can be used only if we _already_ own a reference */
65 struct file_system_type *get_filesystem(struct file_system_type *fs)
66 {
67 	__module_get(fs->owner);
68 	return fs;
69 }
70 
71 void put_filesystem(struct file_system_type *fs)
72 {
73 	module_put(fs->owner);
74 }
75 
76 static struct file_system_type *find_filesystem(const char *name, unsigned len)
77 {
78 	struct file_system_type *fs;
79 
80 	hlist_for_each_entry_rcu(fs, &file_systems, list,
81 				 lockdep_is_held(&file_systems_lock))
82 		if (strncmp(fs->name, name, len) == 0 && !fs->name[len])
83 			return fs;
84 	return NULL;
85 }
86 
87 /**
88  *	register_filesystem - register a new filesystem
89  *	@fs: the file system structure
90  *
91  *	Adds the file system passed to the list of file systems the kernel
92  *	is aware of for mount and other syscalls. Returns 0 on success,
93  *	or a negative errno code on an error.
94  *
95  *	The &struct file_system_type that is passed is linked into the kernel
96  *	structures and must not be freed until the file system has been
97  *	unregistered.
98  */
99 int register_filesystem(struct file_system_type *fs)
100 {
101 	if (fs->parameters &&
102 	    !fs_validate_description(fs->name, fs->parameters))
103 		return -EINVAL;
104 
105 	BUG_ON(strchr(fs->name, '.'));
106 	if (!hlist_unhashed_lockless(&fs->list))
107 		return -EBUSY;
108 
109 	guard(spinlock)(&file_systems_lock);
110 	if (find_filesystem(fs->name, strlen(fs->name)))
111 		return -EBUSY;
112 	hlist_add_tail_rcu(&fs->list, &file_systems);
113 	invalidate_filesystems_string();
114 	return 0;
115 }
116 EXPORT_SYMBOL(register_filesystem);
117 
118 /**
119  *	unregister_filesystem - unregister a file system
120  *	@fs: filesystem to unregister
121  *
122  *	Remove a file system that was previously successfully registered
123  *	with the kernel. An error is returned if the file system is not found.
124  *	Zero is returned on a success.
125  *
126  *	Once this function has returned the &struct file_system_type structure
127  *	may be freed or reused.
128  */
129 int unregister_filesystem(struct file_system_type *fs)
130 {
131 	scoped_guard(spinlock, &file_systems_lock) {
132 		if (hlist_unhashed(&fs->list))
133 			return -EINVAL;
134 		hlist_del_init_rcu(&fs->list);
135 		invalidate_filesystems_string();
136 	}
137 	synchronize_rcu();
138 	return 0;
139 }
140 EXPORT_SYMBOL(unregister_filesystem);
141 
142 #ifdef CONFIG_SYSFS_SYSCALL
143 static int fs_index(const char __user *__name)
144 {
145 	struct file_system_type *p;
146 	char *name __free(kfree) = strndup_user(__name, PATH_MAX);
147 	int index = 0;
148 
149 	if (IS_ERR(name))
150 		return PTR_ERR(name);
151 
152 	guard(rcu)();
153 	hlist_for_each_entry_rcu(p, &file_systems, list) {
154 		if (strcmp(p->name, name) == 0)
155 			return index;
156 		index++;
157 	}
158 	return -EINVAL;
159 }
160 
161 static int fs_name(unsigned int index, char __user *buf)
162 {
163 	struct file_system_type *p, *found = NULL;
164 	int len, res;
165 
166 	scoped_guard(rcu) {
167 		hlist_for_each_entry_rcu(p, &file_systems, list) {
168 			if (index--)
169 				continue;
170 			if (try_module_get(p->owner))
171 				found = p;
172 			break;
173 		}
174 	}
175 	if (!found)
176 		return -EINVAL;
177 
178 	/* OK, we got the reference, so we can safely block */
179 	len = strlen(found->name) + 1;
180 	res = copy_to_user(buf, found->name, len) ? -EFAULT : 0;
181 	put_filesystem(found);
182 	return res;
183 }
184 
185 static int fs_maxindex(void)
186 {
187 	struct file_system_type *p;
188 	int index = 0;
189 
190 	guard(rcu)();
191 	hlist_for_each_entry_rcu(p, &file_systems, list)
192 		index++;
193 	return index;
194 }
195 
196 /*
197  * Whee.. Weird sysv syscall.
198  */
199 SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
200 {
201 	int retval = -EINVAL;
202 
203 	switch (option) {
204 		case 1:
205 			retval = fs_index((const char __user *) arg1);
206 			break;
207 
208 		case 2:
209 			retval = fs_name(arg1, (char __user *) arg2);
210 			break;
211 
212 		case 3:
213 			retval = fs_maxindex();
214 			break;
215 	}
216 	return retval;
217 }
218 #endif
219 
220 int __init list_bdev_fs_names(char *buf, size_t size)
221 {
222 	struct file_system_type *p;
223 	size_t len;
224 	int count = 0;
225 
226 	guard(rcu)();
227 	hlist_for_each_entry_rcu(p, &file_systems, list) {
228 		if (!(p->fs_flags & FS_REQUIRES_DEV))
229 			continue;
230 		len = strlen(p->name) + 1;
231 		if (len > size) {
232 			pr_warn("%s: truncating file system list\n", __func__);
233 			break;
234 		}
235 		memcpy(buf, p->name, len);
236 		buf += len;
237 		size -= len;
238 		count++;
239 	}
240 	return count;
241 }
242 
243 #ifdef CONFIG_PROC_FS
244 static void invalidate_filesystems_string(void)
245 {
246 	struct file_systems_string *old;
247 
248 	lockdep_assert_held_write(&file_systems_lock);
249 	file_systems_gen++;
250 	old = rcu_replace_pointer(file_systems_string, NULL,
251 			   lockdep_is_held(&file_systems_lock));
252 	if (old)
253 		kfree_rcu(old, rcu);
254 }
255 
256 static __cold noinline int regen_filesystems_string(void)
257 {
258 	struct file_system_type *p;
259 	struct file_systems_string *old, *new;
260 	size_t newlen, usedlen;
261 	unsigned long gen;
262 
263 retry:
264 	newlen = 0;
265 
266 	/* pre-calc space for each fs */
267 	spin_lock(&file_systems_lock);
268 	gen = file_systems_gen;
269 	hlist_for_each_entry_rcu(p, &file_systems, list) {
270 		if (!(p->fs_flags & FS_REQUIRES_DEV))
271 			newlen += strlen("nodev");
272 		newlen += strlen("\t") + strlen(p->name) + strlen("\n");
273 	}
274 	spin_unlock(&file_systems_lock);
275 
276 	new = kmalloc(offsetof(struct file_systems_string, string) + newlen + 1,
277 		      GFP_KERNEL);
278 	if (!new)
279 		return -ENOMEM;
280 
281 	new->gen = gen;
282 	new->len = newlen;
283 	new->string[newlen] = '\0';
284 
285 	spin_lock(&file_systems_lock);
286 	old = file_systems_string;
287 
288 	/*
289 	 * Did someone beat us to it?
290 	 */
291 	if (old && old->gen == file_systems_gen) {
292 		spin_unlock(&file_systems_lock);
293 		kfree(new);
294 		return 0;
295 	}
296 
297 	/*
298 	 * Did the list change in the meantime?
299 	 */
300 	if (gen != file_systems_gen) {
301 		spin_unlock(&file_systems_lock);
302 		kfree(new);
303 		goto retry;
304 	}
305 
306 	/*
307 	 * Populate the string.
308 	 *
309 	 * We know we have just enough space because we calculated the right
310 	 * size the previous time we had the lock and confirmed the list has
311 	 * not changed after reacquiring it.
312 	 */
313 	usedlen = 0;
314 	hlist_for_each_entry_rcu(p, &file_systems, list) {
315 		usedlen += sprintf(&new->string[usedlen], "%s\t%s\n",
316 				   (p->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
317 				   p->name);
318 	}
319 
320 	if (WARN_ON_ONCE(new->len != strlen(new->string))) {
321 		/*
322 		 * Should never happen of course, keep this in case someone changes string
323 		 * generation above and messes it up.
324 		 */
325 		spin_unlock(&file_systems_lock);
326 		kfree(new);
327 		return -EINVAL;
328 	}
329 
330 	rcu_assign_pointer(file_systems_string, new);
331 	spin_unlock(&file_systems_lock);
332 	if (old)
333 		kfree_rcu(old, rcu);
334 	return 0;
335 }
336 
337 static __cold noinline int filesystems_proc_show_fallback(struct seq_file *m, void *v)
338 {
339 	struct file_system_type *p;
340 
341 	guard(rcu)();
342 	hlist_for_each_entry_rcu(p, &file_systems, list) {
343 		seq_printf(m, "%s\t%s\n",
344 			   (p->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
345 			   p->name);
346 	}
347 	return 0;
348 }
349 
350 static int filesystems_proc_show(struct seq_file *m, void *v)
351 {
352 	struct file_systems_string *fss;
353 
354 	for (;;) {
355 		scoped_guard(rcu) {
356 			fss = rcu_dereference(file_systems_string);
357 			if (likely(fss)) {
358 				seq_write(m, fss->string, fss->len);
359 				return 0;
360 			}
361 		}
362 
363 		int err = regen_filesystems_string();
364 		if (unlikely(err))
365 			return filesystems_proc_show_fallback(m, v);
366 	}
367 }
368 
369 static int __init proc_filesystems_init(void)
370 {
371 	struct proc_dir_entry *pde;
372 
373 	pde = proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
374 	if (!pde)
375 		return -ENOMEM;
376 	proc_make_permanent(pde);
377 	return 0;
378 }
379 module_init(proc_filesystems_init);
380 #endif
381 
382 static struct file_system_type *__get_fs_type(const char *name, int len)
383 {
384 	struct file_system_type *fs;
385 
386 	guard(rcu)();
387 	fs = find_filesystem(name, len);
388 	if (fs && !try_module_get(fs->owner))
389 		fs = NULL;
390 	return fs;
391 }
392 
393 struct file_system_type *get_fs_type(const char *name)
394 {
395 	struct file_system_type *fs;
396 	const char *dot = strchr(name, '.');
397 	int len = dot ? dot - name : strlen(name);
398 
399 	fs = __get_fs_type(name, len);
400 	if (!fs && (request_module("fs-%.*s", len, name) == 0)) {
401 		fs = __get_fs_type(name, len);
402 		if (!fs)
403 			pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n",
404 				     len, name);
405 	}
406 
407 	if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
408 		put_filesystem(fs);
409 		fs = NULL;
410 	}
411 	return fs;
412 }
413 EXPORT_SYMBOL(get_fs_type);
414