xref: /linux/fs/readdir.c (revision af0bc3ac9a9e830cb52b718ecb237c4e76a466be)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/readdir.c
4  *
5  *  Copyright (C) 1995  Linus Torvalds
6  */
7 
8 #include <linux/stddef.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/time.h>
12 #include <linux/mm.h>
13 #include <linux/errno.h>
14 #include <linux/stat.h>
15 #include <linux/file.h>
16 #include <linux/fs.h>
17 #include <linux/fsnotify.h>
18 #include <linux/dirent.h>
19 #include <linux/security.h>
20 #include <linux/syscalls.h>
21 #include <linux/unistd.h>
22 #include <linux/compat.h>
23 #include <linux/uaccess.h>
24 
25 /*
26  * Some filesystems were never converted to '->iterate_shared()'
27  * and their directory iterators want the inode lock held for
28  * writing. This wrapper allows for converting from the shared
29  * semantics to the exclusive inode use.
30  */
31 int wrap_directory_iterator(struct file *file,
32 			    struct dir_context *ctx,
33 			    int (*iter)(struct file *, struct dir_context *))
34 {
35 	struct inode *inode = file_inode(file);
36 	int ret;
37 
38 	/*
39 	 * We'd love to have an 'inode_upgrade_trylock()' operation,
40 	 * see the comment in mmap_upgrade_trylock() in mm/memory.c.
41 	 *
42 	 * But considering this is for "filesystems that never got
43 	 * converted", it really doesn't matter.
44 	 *
45 	 * Also note that since we have to return with the lock held
46 	 * for reading, we can't use the "killable()" locking here,
47 	 * since we do need to get the lock even if we're dying.
48 	 *
49 	 * We could do the write part killably and then get the read
50 	 * lock unconditionally if it mattered, but see above on why
51 	 * this does the very simplistic conversion.
52 	 */
53 	up_read(&inode->i_rwsem);
54 	down_write(&inode->i_rwsem);
55 
56 	/*
57 	 * Since we dropped the inode lock, we should do the
58 	 * DEADDIR test again. See 'iterate_dir()' below.
59 	 *
60 	 * Note that we don't need to re-do the f_pos games,
61 	 * since the file must be locked wrt f_pos anyway.
62 	 */
63 	ret = -ENOENT;
64 	if (!IS_DEADDIR(inode))
65 		ret = iter(file, ctx);
66 
67 	downgrade_write(&inode->i_rwsem);
68 	return ret;
69 }
70 EXPORT_SYMBOL(wrap_directory_iterator);
71 
72 /*
73  * Note the "unsafe_put_user()" semantics: we goto a
74  * label for errors.
75  */
76 #define unsafe_copy_dirent_name(_dst, _src, _len, label) do {	\
77 	char __user *dst = (_dst);				\
78 	const char *src = (_src);				\
79 	size_t len = (_len);					\
80 	unsafe_put_user(0, dst+len, label);			\
81 	unsafe_copy_to_user(dst, src, len, label);		\
82 } while (0)
83 
84 
85 int iterate_dir(struct file *file, struct dir_context *ctx)
86 {
87 	struct inode *inode = file_inode(file);
88 	int res = -ENOTDIR;
89 
90 	if (!file->f_op->iterate_shared)
91 		goto out;
92 
93 	res = security_file_permission(file, MAY_READ);
94 	if (res)
95 		goto out;
96 
97 	res = fsnotify_file_perm(file, MAY_READ);
98 	if (res)
99 		goto out;
100 
101 	res = down_read_killable(&inode->i_rwsem);
102 	if (res)
103 		goto out;
104 
105 	res = -ENOENT;
106 	if (!IS_DEADDIR(inode)) {
107 		ctx->pos = file->f_pos;
108 		res = file->f_op->iterate_shared(file, ctx);
109 		file->f_pos = ctx->pos;
110 		fsnotify_access(file);
111 		file_accessed(file);
112 	}
113 	inode_unlock_shared(inode);
114 out:
115 	return res;
116 }
117 EXPORT_SYMBOL(iterate_dir);
118 
119 /*
120  * POSIX says that a dirent name cannot contain NULL or a '/'.
121  *
122  * It's not 100% clear what we should really do in this case.
123  * The filesystem is clearly corrupted, but returning a hard
124  * error means that you now don't see any of the other names
125  * either, so that isn't a perfect alternative.
126  *
127  * And if you return an error, what error do you use? Several
128  * filesystems seem to have decided on EUCLEAN being the error
129  * code for EFSCORRUPTED, and that may be the error to use. Or
130  * just EIO, which is perhaps more obvious to users.
131  *
132  * In order to see the other file names in the directory, the
133  * caller might want to make this a "soft" error: skip the
134  * entry, and return the error at the end instead.
135  *
136  * Note that this should likely do a "memchr(name, 0, len)"
137  * check too, since that would be filesystem corruption as
138  * well. However, that case can't actually confuse user space,
139  * which has to do a strlen() on the name anyway to find the
140  * filename length, and the above "soft error" worry means
141  * that it's probably better left alone until we have that
142  * issue clarified.
143  *
144  * Note the PATH_MAX check - it's arbitrary but the real
145  * kernel limit on a possible path component, not NAME_MAX,
146  * which is the technical standard limit.
147  */
148 static int verify_dirent_name(const char *name, int len)
149 {
150 	if (len <= 0 || len >= PATH_MAX)
151 		return -EIO;
152 	if (memchr(name, '/', len))
153 		return -EIO;
154 	return 0;
155 }
156 
157 /*
158  * Traditional linux readdir() handling..
159  *
160  * "count=1" is a special case, meaning that the buffer is one
161  * dirent-structure in size and that the code can't handle more
162  * anyway. Thus the special "fillonedir()" function for that
163  * case (the low-level handlers don't need to care about this).
164  */
165 
166 #ifdef __ARCH_WANT_OLD_READDIR
167 
168 struct old_linux_dirent {
169 	unsigned long	d_ino;
170 	unsigned long	d_offset;
171 	unsigned short	d_namlen;
172 	char		d_name[];
173 };
174 
175 struct readdir_callback {
176 	struct dir_context ctx;
177 	struct old_linux_dirent __user * dirent;
178 	int result;
179 };
180 
181 static bool fillonedir(struct dir_context *ctx, const char *name, int namlen,
182 		      loff_t offset, u64 ino, unsigned int d_type)
183 {
184 	struct readdir_callback *buf =
185 		container_of(ctx, struct readdir_callback, ctx);
186 	struct old_linux_dirent __user * dirent;
187 	unsigned long d_ino;
188 
189 	if (buf->result)
190 		return false;
191 	buf->result = verify_dirent_name(name, namlen);
192 	if (buf->result)
193 		return false;
194 	d_ino = ino;
195 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
196 		buf->result = -EOVERFLOW;
197 		return false;
198 	}
199 	buf->result++;
200 	dirent = buf->dirent;
201 	if (!user_write_access_begin(dirent,
202 			(unsigned long)(dirent->d_name + namlen + 1) -
203 				(unsigned long)dirent))
204 		goto efault;
205 	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
206 	unsafe_put_user(offset, &dirent->d_offset, efault_end);
207 	unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
208 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
209 	user_write_access_end();
210 	return true;
211 efault_end:
212 	user_write_access_end();
213 efault:
214 	buf->result = -EFAULT;
215 	return false;
216 }
217 
218 SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
219 		struct old_linux_dirent __user *, dirent, unsigned int, count)
220 {
221 	int error;
222 	CLASS(fd_pos, f)(fd);
223 	struct readdir_callback buf = {
224 		.ctx.actor = fillonedir,
225 		.ctx.count = 1, /* Hint to fs: just one entry. */
226 		.dirent = dirent
227 	};
228 
229 	if (fd_empty(f))
230 		return -EBADF;
231 
232 	error = iterate_dir(fd_file(f), &buf.ctx);
233 	if (buf.result)
234 		error = buf.result;
235 
236 	return error;
237 }
238 
239 #endif /* __ARCH_WANT_OLD_READDIR */
240 
241 /*
242  * New, all-improved, singing, dancing, iBCS2-compliant getdents()
243  * interface.
244  */
245 struct linux_dirent {
246 	unsigned long	d_ino;
247 	unsigned long	d_off;
248 	unsigned short	d_reclen;
249 	char		d_name[];
250 };
251 
252 struct getdents_callback {
253 	struct dir_context ctx;
254 	struct linux_dirent __user * current_dir;
255 	int prev_reclen;
256 	int error;
257 };
258 
259 static bool filldir(struct dir_context *ctx, const char *name, int namlen,
260 		   loff_t offset, u64 ino, unsigned int d_type)
261 {
262 	struct linux_dirent __user *dirent, *prev;
263 	struct getdents_callback *buf =
264 		container_of(ctx, struct getdents_callback, ctx);
265 	unsigned long d_ino;
266 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
267 		sizeof(long));
268 	int prev_reclen;
269 	unsigned int flags = d_type;
270 
271 	BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
272 	d_type &= S_DT_MASK;
273 
274 	buf->error = verify_dirent_name(name, namlen);
275 	if (unlikely(buf->error))
276 		return false;
277 	buf->error = -EINVAL;	/* only used if we fail.. */
278 	if (reclen > ctx->count)
279 		return false;
280 	d_ino = ino;
281 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
282 		buf->error = -EOVERFLOW;
283 		return false;
284 	}
285 	prev_reclen = buf->prev_reclen;
286 	if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
287 		return false;
288 	dirent = buf->current_dir;
289 	prev = (void __user *) dirent - prev_reclen;
290 	if (!user_write_access_begin(prev, reclen + prev_reclen))
291 		goto efault;
292 
293 	/* This might be 'dirent->d_off', but if so it will get overwritten */
294 	unsafe_put_user(offset, &prev->d_off, efault_end);
295 	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
296 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
297 	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
298 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
299 	user_write_access_end();
300 
301 	buf->current_dir = (void __user *)dirent + reclen;
302 	buf->prev_reclen = reclen;
303 	ctx->count -= reclen;
304 	return true;
305 efault_end:
306 	user_write_access_end();
307 efault:
308 	buf->error = -EFAULT;
309 	return false;
310 }
311 
312 SYSCALL_DEFINE3(getdents, unsigned int, fd,
313 		struct linux_dirent __user *, dirent, unsigned int, count)
314 {
315 	CLASS(fd_pos, f)(fd);
316 	struct getdents_callback buf = {
317 		.ctx.actor = filldir,
318 		.ctx.count = count,
319 		.ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
320 		.current_dir = dirent
321 	};
322 	int error;
323 
324 	if (fd_empty(f))
325 		return -EBADF;
326 
327 	error = iterate_dir(fd_file(f), &buf.ctx);
328 	if (error >= 0)
329 		error = buf.error;
330 	if (buf.prev_reclen) {
331 		struct linux_dirent __user * lastdirent;
332 		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
333 
334 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
335 			error = -EFAULT;
336 		else
337 			error = count - buf.ctx.count;
338 	}
339 	return error;
340 }
341 
342 struct getdents_callback64 {
343 	struct dir_context ctx;
344 	struct linux_dirent64 __user * current_dir;
345 	int prev_reclen;
346 	int error;
347 };
348 
349 static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
350 		     loff_t offset, u64 ino, unsigned int d_type)
351 {
352 	struct linux_dirent64 __user *dirent, *prev;
353 	struct getdents_callback64 *buf =
354 		container_of(ctx, struct getdents_callback64, ctx);
355 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
356 		sizeof(u64));
357 	int prev_reclen;
358 	unsigned int flags = d_type;
359 
360 	BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
361 	d_type &= S_DT_MASK;
362 
363 	buf->error = verify_dirent_name(name, namlen);
364 	if (unlikely(buf->error))
365 		return false;
366 	buf->error = -EINVAL;	/* only used if we fail.. */
367 	if (reclen > ctx->count)
368 		return false;
369 	prev_reclen = buf->prev_reclen;
370 	if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
371 		return false;
372 	dirent = buf->current_dir;
373 	prev = (void __user *)dirent - prev_reclen;
374 	if (!user_write_access_begin(prev, reclen + prev_reclen))
375 		goto efault;
376 
377 	/* This might be 'dirent->d_off', but if so it will get overwritten */
378 	unsafe_put_user(offset, &prev->d_off, efault_end);
379 	unsafe_put_user(ino, &dirent->d_ino, efault_end);
380 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
381 	unsafe_put_user(d_type, &dirent->d_type, efault_end);
382 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
383 	user_write_access_end();
384 
385 	buf->prev_reclen = reclen;
386 	buf->current_dir = (void __user *)dirent + reclen;
387 	ctx->count -= reclen;
388 	return true;
389 
390 efault_end:
391 	user_write_access_end();
392 efault:
393 	buf->error = -EFAULT;
394 	return false;
395 }
396 
397 SYSCALL_DEFINE3(getdents64, unsigned int, fd,
398 		struct linux_dirent64 __user *, dirent, unsigned int, count)
399 {
400 	CLASS(fd_pos, f)(fd);
401 	struct getdents_callback64 buf = {
402 		.ctx.actor = filldir64,
403 		.ctx.count = count,
404 		.ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
405 		.current_dir = dirent
406 	};
407 	int error;
408 
409 	if (fd_empty(f))
410 		return -EBADF;
411 
412 	error = iterate_dir(fd_file(f), &buf.ctx);
413 	if (error >= 0)
414 		error = buf.error;
415 	if (buf.prev_reclen) {
416 		struct linux_dirent64 __user * lastdirent;
417 		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
418 
419 		lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
420 		if (put_user(d_off, &lastdirent->d_off))
421 			error = -EFAULT;
422 		else
423 			error = count - buf.ctx.count;
424 	}
425 	return error;
426 }
427 
428 #ifdef CONFIG_COMPAT
429 struct compat_old_linux_dirent {
430 	compat_ulong_t	d_ino;
431 	compat_ulong_t	d_offset;
432 	unsigned short	d_namlen;
433 	char		d_name[];
434 };
435 
436 struct compat_readdir_callback {
437 	struct dir_context ctx;
438 	struct compat_old_linux_dirent __user *dirent;
439 	int result;
440 };
441 
442 static bool compat_fillonedir(struct dir_context *ctx, const char *name,
443 			     int namlen, loff_t offset, u64 ino,
444 			     unsigned int d_type)
445 {
446 	struct compat_readdir_callback *buf =
447 		container_of(ctx, struct compat_readdir_callback, ctx);
448 	struct compat_old_linux_dirent __user *dirent;
449 	compat_ulong_t d_ino;
450 
451 	if (buf->result)
452 		return false;
453 	buf->result = verify_dirent_name(name, namlen);
454 	if (buf->result)
455 		return false;
456 	d_ino = ino;
457 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
458 		buf->result = -EOVERFLOW;
459 		return false;
460 	}
461 	buf->result++;
462 	dirent = buf->dirent;
463 	if (!user_write_access_begin(dirent,
464 			(unsigned long)(dirent->d_name + namlen + 1) -
465 				(unsigned long)dirent))
466 		goto efault;
467 	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
468 	unsafe_put_user(offset, &dirent->d_offset, efault_end);
469 	unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
470 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
471 	user_write_access_end();
472 	return true;
473 efault_end:
474 	user_write_access_end();
475 efault:
476 	buf->result = -EFAULT;
477 	return false;
478 }
479 
480 COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
481 		struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
482 {
483 	int error;
484 	CLASS(fd_pos, f)(fd);
485 	struct compat_readdir_callback buf = {
486 		.ctx.actor = compat_fillonedir,
487 		.ctx.count = 1, /* Hint to fs: just one entry. */
488 		.dirent = dirent
489 	};
490 
491 	if (fd_empty(f))
492 		return -EBADF;
493 
494 	error = iterate_dir(fd_file(f), &buf.ctx);
495 	if (buf.result)
496 		error = buf.result;
497 
498 	return error;
499 }
500 
501 struct compat_linux_dirent {
502 	compat_ulong_t	d_ino;
503 	compat_ulong_t	d_off;
504 	unsigned short	d_reclen;
505 	char		d_name[];
506 };
507 
508 struct compat_getdents_callback {
509 	struct dir_context ctx;
510 	struct compat_linux_dirent __user *current_dir;
511 	int prev_reclen;
512 	int error;
513 };
514 
515 static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen,
516 		loff_t offset, u64 ino, unsigned int d_type)
517 {
518 	struct compat_linux_dirent __user *dirent, *prev;
519 	struct compat_getdents_callback *buf =
520 		container_of(ctx, struct compat_getdents_callback, ctx);
521 	compat_ulong_t d_ino;
522 	int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
523 		namlen + 2, sizeof(compat_long_t));
524 	int prev_reclen;
525 	unsigned int flags = d_type;
526 
527 	BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
528 	d_type &= S_DT_MASK;
529 
530 	buf->error = verify_dirent_name(name, namlen);
531 	if (unlikely(buf->error))
532 		return false;
533 	buf->error = -EINVAL;	/* only used if we fail.. */
534 	if (reclen > ctx->count)
535 		return false;
536 	d_ino = ino;
537 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
538 		buf->error = -EOVERFLOW;
539 		return false;
540 	}
541 	prev_reclen = buf->prev_reclen;
542 	if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
543 		return false;
544 	dirent = buf->current_dir;
545 	prev = (void __user *) dirent - prev_reclen;
546 	if (!user_write_access_begin(prev, reclen + prev_reclen))
547 		goto efault;
548 
549 	unsafe_put_user(offset, &prev->d_off, efault_end);
550 	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
551 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
552 	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
553 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
554 	user_write_access_end();
555 
556 	buf->prev_reclen = reclen;
557 	buf->current_dir = (void __user *)dirent + reclen;
558 	ctx->count -= reclen;
559 	return true;
560 efault_end:
561 	user_write_access_end();
562 efault:
563 	buf->error = -EFAULT;
564 	return false;
565 }
566 
567 COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
568 		struct compat_linux_dirent __user *, dirent, unsigned int, count)
569 {
570 	CLASS(fd_pos, f)(fd);
571 	struct compat_getdents_callback buf = {
572 		.ctx.actor = compat_filldir,
573 		.ctx.count = count,
574 		.ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
575 		.current_dir = dirent,
576 	};
577 	int error;
578 
579 	if (fd_empty(f))
580 		return -EBADF;
581 
582 	error = iterate_dir(fd_file(f), &buf.ctx);
583 	if (error >= 0)
584 		error = buf.error;
585 	if (buf.prev_reclen) {
586 		struct compat_linux_dirent __user * lastdirent;
587 		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
588 
589 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
590 			error = -EFAULT;
591 		else
592 			error = count - buf.ctx.count;
593 	}
594 	return error;
595 }
596 #endif
597