xref: /linux/fs/readdir.c (revision b98f7363f72ff83b9f5194d26e7f9fe74f45b46a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/readdir.c
4  *
5  *  Copyright (C) 1995  Linus Torvalds
6  */
7 
8 #include <linux/stddef.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/time.h>
12 #include <linux/mm.h>
13 #include <linux/errno.h>
14 #include <linux/stat.h>
15 #include <linux/file.h>
16 #include <linux/fs.h>
17 #include <linux/fsnotify.h>
18 #include <linux/dirent.h>
19 #include <linux/security.h>
20 #include <linux/syscalls.h>
21 #include <linux/unistd.h>
22 #include <linux/compat.h>
23 #include <linux/uaccess.h>
24 
25 #define dirent_size(dirent, len) offsetof(typeof(*(dirent)), d_name[len])
26 
27 /*
28  * Some filesystems were never converted to '->iterate_shared()'
29  * and their directory iterators want the inode lock held for
30  * writing. This wrapper allows for converting from the shared
31  * semantics to the exclusive inode use.
32  */
33 int wrap_directory_iterator(struct file *file,
34 			    struct dir_context *ctx,
35 			    int (*iter)(struct file *, struct dir_context *))
36 {
37 	struct inode *inode = file_inode(file);
38 	int ret;
39 
40 	/*
41 	 * We'd love to have an 'inode_upgrade_trylock()' operation,
42 	 * see the comment in mmap_upgrade_trylock() in mm/memory.c.
43 	 *
44 	 * But considering this is for "filesystems that never got
45 	 * converted", it really doesn't matter.
46 	 *
47 	 * Also note that since we have to return with the lock held
48 	 * for reading, we can't use the "killable()" locking here,
49 	 * since we do need to get the lock even if we're dying.
50 	 *
51 	 * We could do the write part killably and then get the read
52 	 * lock unconditionally if it mattered, but see above on why
53 	 * this does the very simplistic conversion.
54 	 */
55 	up_read(&inode->i_rwsem);
56 	down_write(&inode->i_rwsem);
57 
58 	/*
59 	 * Since we dropped the inode lock, we should do the
60 	 * DEADDIR test again. See 'iterate_dir()' below.
61 	 *
62 	 * Note that we don't need to re-do the f_pos games,
63 	 * since the file must be locked wrt f_pos anyway.
64 	 */
65 	ret = -ENOENT;
66 	if (!IS_DEADDIR(inode))
67 		ret = iter(file, ctx);
68 
69 	downgrade_write(&inode->i_rwsem);
70 	return ret;
71 }
72 EXPORT_SYMBOL(wrap_directory_iterator);
73 
74 /*
75  * Note the "unsafe_put_user()" semantics: we goto a
76  * label for errors.
77  */
78 #define unsafe_copy_dirent_name(_dst, _src, _len, label) do {	\
79 	char __user *dst = (_dst);				\
80 	const char *src = (_src);				\
81 	size_t len = (_len);					\
82 	unsafe_put_user(0, dst+len, label);			\
83 	unsafe_copy_to_user(dst, src, len, label);		\
84 } while (0)
85 
86 
87 int iterate_dir(struct file *file, struct dir_context *ctx)
88 {
89 	struct inode *inode = file_inode(file);
90 	int res = -ENOTDIR;
91 
92 	if (!file->f_op->iterate_shared)
93 		goto out;
94 
95 	res = security_file_permission(file, MAY_READ);
96 	if (res)
97 		goto out;
98 
99 	res = fsnotify_file_perm(file, MAY_READ);
100 	if (res)
101 		goto out;
102 
103 	res = down_read_killable(&inode->i_rwsem);
104 	if (res)
105 		goto out;
106 
107 	res = -ENOENT;
108 	if (!IS_DEADDIR(inode)) {
109 		ctx->pos = file->f_pos;
110 		res = file->f_op->iterate_shared(file, ctx);
111 		file->f_pos = ctx->pos;
112 		fsnotify_access(file);
113 		file_accessed(file);
114 	}
115 	inode_unlock_shared(inode);
116 out:
117 	return res;
118 }
119 EXPORT_SYMBOL(iterate_dir);
120 
121 /*
122  * POSIX says that a dirent name cannot contain NULL or a '/'.
123  *
124  * It's not 100% clear what we should really do in this case.
125  * The filesystem is clearly corrupted, but returning a hard
126  * error means that you now don't see any of the other names
127  * either, so that isn't a perfect alternative.
128  *
129  * And if you return an error, what error do you use? Several
130  * filesystems seem to have decided on EUCLEAN being the error
131  * code for EFSCORRUPTED, and that may be the error to use. Or
132  * just EIO, which is perhaps more obvious to users.
133  *
134  * In order to see the other file names in the directory, the
135  * caller might want to make this a "soft" error: skip the
136  * entry, and return the error at the end instead.
137  *
138  * Note that this should likely do a "memchr(name, 0, len)"
139  * check too, since that would be filesystem corruption as
140  * well. However, that case can't actually confuse user space,
141  * which has to do a strlen() on the name anyway to find the
142  * filename length, and the above "soft error" worry means
143  * that it's probably better left alone until we have that
144  * issue clarified.
145  *
146  * Note the PATH_MAX check - it's arbitrary but the real
147  * kernel limit on a possible path component, not NAME_MAX,
148  * which is the technical standard limit.
149  */
150 static int verify_dirent_name(const char *name, int len)
151 {
152 	if (len <= 0 || len >= PATH_MAX)
153 		return -EIO;
154 	if (memchr(name, '/', len))
155 		return -EIO;
156 	return 0;
157 }
158 
159 /*
160  * Traditional linux readdir() handling..
161  *
162  * "count=1" is a special case, meaning that the buffer is one
163  * dirent-structure in size and that the code can't handle more
164  * anyway. Thus the special "fillonedir()" function for that
165  * case (the low-level handlers don't need to care about this).
166  */
167 
168 #ifdef __ARCH_WANT_OLD_READDIR
169 
170 struct old_linux_dirent {
171 	unsigned long	d_ino;
172 	unsigned long	d_offset;
173 	unsigned short	d_namlen;
174 	char		d_name[];
175 };
176 
177 struct readdir_callback {
178 	struct dir_context ctx;
179 	struct old_linux_dirent __user * dirent;
180 	int result;
181 };
182 
183 static bool fillonedir(struct dir_context *ctx, const char *name, int namlen,
184 		      loff_t offset, u64 ino, unsigned int d_type)
185 {
186 	struct readdir_callback *buf =
187 		container_of(ctx, struct readdir_callback, ctx);
188 	struct old_linux_dirent __user * dirent;
189 	unsigned long d_ino;
190 
191 	if (buf->result)
192 		return false;
193 	buf->result = verify_dirent_name(name, namlen);
194 	if (buf->result)
195 		return false;
196 	d_ino = ino;
197 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
198 		buf->result = -EOVERFLOW;
199 		return false;
200 	}
201 	buf->result++;
202 	dirent = buf->dirent;
203 	scoped_user_write_access_size(dirent, dirent_size(dirent, namlen + 1), efault) {
204 		unsafe_put_user(d_ino, &dirent->d_ino, efault);
205 		unsafe_put_user(offset, &dirent->d_offset, efault);
206 		unsafe_put_user(namlen, &dirent->d_namlen, efault);
207 		unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
208 	}
209 	return true;
210 efault:
211 	buf->result = -EFAULT;
212 	return false;
213 }
214 
215 SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
216 		struct old_linux_dirent __user *, dirent, unsigned int, count)
217 {
218 	int error;
219 	CLASS(fd_pos, f)(fd);
220 	struct readdir_callback buf = {
221 		.ctx.actor = fillonedir,
222 		.ctx.count = 1, /* Hint to fs: just one entry. */
223 		.dirent = dirent
224 	};
225 
226 	if (fd_empty(f))
227 		return -EBADF;
228 
229 	error = iterate_dir(fd_file(f), &buf.ctx);
230 	if (buf.result)
231 		error = buf.result;
232 
233 	return error;
234 }
235 
236 #endif /* __ARCH_WANT_OLD_READDIR */
237 
238 /*
239  * New, all-improved, singing, dancing, iBCS2-compliant getdents()
240  * interface.
241  */
242 struct linux_dirent {
243 	unsigned long	d_ino;
244 	unsigned long	d_off;
245 	unsigned short	d_reclen;
246 	char		d_name[];
247 };
248 
249 struct getdents_callback {
250 	struct dir_context ctx;
251 	struct linux_dirent __user * current_dir;
252 	int prev_reclen;
253 	int error;
254 };
255 
256 static bool filldir(struct dir_context *ctx, const char *name, int namlen,
257 		   loff_t offset, u64 ino, unsigned int d_type)
258 {
259 	struct linux_dirent __user *dirent, *prev;
260 	struct getdents_callback *buf =
261 		container_of(ctx, struct getdents_callback, ctx);
262 	unsigned long d_ino;
263 	int reclen = ALIGN(dirent_size(dirent, namlen + 2), sizeof(long));
264 	int prev_reclen;
265 	unsigned int flags = d_type;
266 
267 	BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
268 	d_type &= S_DT_MASK;
269 
270 	buf->error = verify_dirent_name(name, namlen);
271 	if (unlikely(buf->error))
272 		return false;
273 	buf->error = -EINVAL;	/* only used if we fail.. */
274 	if (reclen > ctx->count)
275 		return false;
276 	d_ino = ino;
277 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
278 		buf->error = -EOVERFLOW;
279 		return false;
280 	}
281 	prev_reclen = buf->prev_reclen;
282 	if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
283 		return false;
284 	dirent = buf->current_dir;
285 	prev = (void __user *) dirent - prev_reclen;
286 	scoped_user_write_access_size(prev, reclen + prev_reclen, efault) {
287 		/* This might be 'dirent->d_off', but if so it will get overwritten */
288 		unsafe_put_user(offset, &prev->d_off, efault);
289 		unsafe_put_user(d_ino, &dirent->d_ino, efault);
290 		unsafe_put_user(reclen, &dirent->d_reclen, efault);
291 		unsafe_put_user(d_type, (char __user *)dirent + reclen - 1, efault);
292 		unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
293 	}
294 
295 	buf->current_dir = (void __user *)dirent + reclen;
296 	buf->prev_reclen = reclen;
297 	ctx->count -= reclen;
298 	return true;
299 efault:
300 	buf->error = -EFAULT;
301 	return false;
302 }
303 
304 SYSCALL_DEFINE3(getdents, unsigned int, fd,
305 		struct linux_dirent __user *, dirent, unsigned int, count)
306 {
307 	CLASS(fd_pos, f)(fd);
308 	struct getdents_callback buf = {
309 		.ctx.actor = filldir,
310 		.ctx.count = count,
311 		.ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
312 		.current_dir = dirent
313 	};
314 	int error;
315 
316 	if (fd_empty(f))
317 		return -EBADF;
318 
319 	error = iterate_dir(fd_file(f), &buf.ctx);
320 	if (error >= 0)
321 		error = buf.error;
322 	if (buf.prev_reclen) {
323 		struct linux_dirent __user * lastdirent;
324 		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
325 
326 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
327 			error = -EFAULT;
328 		else
329 			error = count - buf.ctx.count;
330 	}
331 	return error;
332 }
333 
334 struct getdents_callback64 {
335 	struct dir_context ctx;
336 	struct linux_dirent64 __user * current_dir;
337 	int prev_reclen;
338 	int error;
339 };
340 
341 static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
342 		     loff_t offset, u64 ino, unsigned int d_type)
343 {
344 	struct linux_dirent64 __user *dirent, *prev;
345 	struct getdents_callback64 *buf =
346 		container_of(ctx, struct getdents_callback64, ctx);
347 	int reclen = ALIGN(dirent_size(dirent, namlen + 1), sizeof(u64));
348 	int prev_reclen;
349 	unsigned int flags = d_type;
350 
351 	BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
352 	d_type &= S_DT_MASK;
353 
354 	buf->error = verify_dirent_name(name, namlen);
355 	if (unlikely(buf->error))
356 		return false;
357 	buf->error = -EINVAL;	/* only used if we fail.. */
358 	if (reclen > ctx->count)
359 		return false;
360 	prev_reclen = buf->prev_reclen;
361 	if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
362 		return false;
363 	dirent = buf->current_dir;
364 	prev = (void __user *)dirent - prev_reclen;
365 	scoped_user_write_access_size(prev, reclen + prev_reclen, efault) {
366 		/* This might be 'dirent->d_off', but if so it will get overwritten */
367 		unsafe_put_user(offset, &prev->d_off, efault);
368 		unsafe_put_user(ino, &dirent->d_ino, efault);
369 		unsafe_put_user(reclen, &dirent->d_reclen, efault);
370 		unsafe_put_user(d_type, &dirent->d_type, efault);
371 		unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
372 	}
373 
374 	buf->prev_reclen = reclen;
375 	buf->current_dir = (void __user *)dirent + reclen;
376 	ctx->count -= reclen;
377 	return true;
378 
379 efault:
380 	buf->error = -EFAULT;
381 	return false;
382 }
383 
384 SYSCALL_DEFINE3(getdents64, unsigned int, fd,
385 		struct linux_dirent64 __user *, dirent, unsigned int, count)
386 {
387 	CLASS(fd_pos, f)(fd);
388 	struct getdents_callback64 buf = {
389 		.ctx.actor = filldir64,
390 		.ctx.count = count,
391 		.ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
392 		.current_dir = dirent
393 	};
394 	int error;
395 
396 	if (fd_empty(f))
397 		return -EBADF;
398 
399 	error = iterate_dir(fd_file(f), &buf.ctx);
400 	if (error >= 0)
401 		error = buf.error;
402 	if (buf.prev_reclen) {
403 		struct linux_dirent64 __user * lastdirent;
404 		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
405 
406 		lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
407 		if (put_user(d_off, &lastdirent->d_off))
408 			error = -EFAULT;
409 		else
410 			error = count - buf.ctx.count;
411 	}
412 	return error;
413 }
414 
415 #ifdef CONFIG_COMPAT
416 struct compat_old_linux_dirent {
417 	compat_ulong_t	d_ino;
418 	compat_ulong_t	d_offset;
419 	unsigned short	d_namlen;
420 	char		d_name[];
421 };
422 
423 struct compat_readdir_callback {
424 	struct dir_context ctx;
425 	struct compat_old_linux_dirent __user *dirent;
426 	int result;
427 };
428 
429 static bool compat_fillonedir(struct dir_context *ctx, const char *name,
430 			     int namlen, loff_t offset, u64 ino,
431 			     unsigned int d_type)
432 {
433 	struct compat_readdir_callback *buf =
434 		container_of(ctx, struct compat_readdir_callback, ctx);
435 	struct compat_old_linux_dirent __user *dirent;
436 	compat_ulong_t d_ino;
437 
438 	if (buf->result)
439 		return false;
440 	buf->result = verify_dirent_name(name, namlen);
441 	if (buf->result)
442 		return false;
443 	d_ino = ino;
444 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
445 		buf->result = -EOVERFLOW;
446 		return false;
447 	}
448 	buf->result++;
449 	dirent = buf->dirent;
450 	scoped_user_write_access_size(dirent, dirent_size(dirent, namlen + 1), efault) {
451 		unsafe_put_user(d_ino, &dirent->d_ino, efault);
452 		unsafe_put_user(offset, &dirent->d_offset, efault);
453 		unsafe_put_user(namlen, &dirent->d_namlen, efault);
454 		unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
455 	}
456 	return true;
457 efault:
458 	buf->result = -EFAULT;
459 	return false;
460 }
461 
462 COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
463 		struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
464 {
465 	int error;
466 	CLASS(fd_pos, f)(fd);
467 	struct compat_readdir_callback buf = {
468 		.ctx.actor = compat_fillonedir,
469 		.ctx.count = 1, /* Hint to fs: just one entry. */
470 		.dirent = dirent
471 	};
472 
473 	if (fd_empty(f))
474 		return -EBADF;
475 
476 	error = iterate_dir(fd_file(f), &buf.ctx);
477 	if (buf.result)
478 		error = buf.result;
479 
480 	return error;
481 }
482 
483 struct compat_linux_dirent {
484 	compat_ulong_t	d_ino;
485 	compat_ulong_t	d_off;
486 	unsigned short	d_reclen;
487 	char		d_name[];
488 };
489 
490 struct compat_getdents_callback {
491 	struct dir_context ctx;
492 	struct compat_linux_dirent __user *current_dir;
493 	int prev_reclen;
494 	int error;
495 };
496 
497 static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen,
498 		loff_t offset, u64 ino, unsigned int d_type)
499 {
500 	struct compat_linux_dirent __user *dirent, *prev;
501 	struct compat_getdents_callback *buf =
502 		container_of(ctx, struct compat_getdents_callback, ctx);
503 	compat_ulong_t d_ino;
504 	int reclen = ALIGN(dirent_size(dirent, namlen + 2), sizeof(compat_long_t));
505 	int prev_reclen;
506 	unsigned int flags = d_type;
507 
508 	BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
509 	d_type &= S_DT_MASK;
510 
511 	buf->error = verify_dirent_name(name, namlen);
512 	if (unlikely(buf->error))
513 		return false;
514 	buf->error = -EINVAL;	/* only used if we fail.. */
515 	if (reclen > ctx->count)
516 		return false;
517 	d_ino = ino;
518 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
519 		buf->error = -EOVERFLOW;
520 		return false;
521 	}
522 	prev_reclen = buf->prev_reclen;
523 	if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
524 		return false;
525 	dirent = buf->current_dir;
526 	prev = (void __user *) dirent - prev_reclen;
527 	scoped_user_write_access_size(prev, reclen + prev_reclen, efault) {
528 		unsafe_put_user(offset, &prev->d_off, efault);
529 		unsafe_put_user(d_ino, &dirent->d_ino, efault);
530 		unsafe_put_user(reclen, &dirent->d_reclen, efault);
531 		unsafe_put_user(d_type, (char __user *)dirent + reclen - 1, efault);
532 		unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
533 	}
534 
535 	buf->prev_reclen = reclen;
536 	buf->current_dir = (void __user *)dirent + reclen;
537 	ctx->count -= reclen;
538 	return true;
539 efault:
540 	buf->error = -EFAULT;
541 	return false;
542 }
543 
544 COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
545 		struct compat_linux_dirent __user *, dirent, unsigned int, count)
546 {
547 	CLASS(fd_pos, f)(fd);
548 	struct compat_getdents_callback buf = {
549 		.ctx.actor = compat_filldir,
550 		.ctx.count = count,
551 		.ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
552 		.current_dir = dirent,
553 	};
554 	int error;
555 
556 	if (fd_empty(f))
557 		return -EBADF;
558 
559 	error = iterate_dir(fd_file(f), &buf.ctx);
560 	if (error >= 0)
561 		error = buf.error;
562 	if (buf.prev_reclen) {
563 		struct compat_linux_dirent __user * lastdirent;
564 		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
565 
566 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
567 			error = -EFAULT;
568 		else
569 			error = count - buf.ctx.count;
570 	}
571 	return error;
572 }
573 #endif
574