1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/readdir.c 4 * 5 * Copyright (C) 1995 Linus Torvalds 6 */ 7 8 #include <linux/stddef.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/time.h> 12 #include <linux/mm.h> 13 #include <linux/errno.h> 14 #include <linux/stat.h> 15 #include <linux/file.h> 16 #include <linux/fs.h> 17 #include <linux/fsnotify.h> 18 #include <linux/dirent.h> 19 #include <linux/security.h> 20 #include <linux/syscalls.h> 21 #include <linux/unistd.h> 22 #include <linux/compat.h> 23 #include <linux/uaccess.h> 24 25 /* 26 * Some filesystems were never converted to '->iterate_shared()' 27 * and their directory iterators want the inode lock held for 28 * writing. This wrapper allows for converting from the shared 29 * semantics to the exclusive inode use. 30 */ 31 int wrap_directory_iterator(struct file *file, 32 struct dir_context *ctx, 33 int (*iter)(struct file *, struct dir_context *)) 34 { 35 struct inode *inode = file_inode(file); 36 int ret; 37 38 /* 39 * We'd love to have an 'inode_upgrade_trylock()' operation, 40 * see the comment in mmap_upgrade_trylock() in mm/memory.c. 41 * 42 * But considering this is for "filesystems that never got 43 * converted", it really doesn't matter. 44 * 45 * Also note that since we have to return with the lock held 46 * for reading, we can't use the "killable()" locking here, 47 * since we do need to get the lock even if we're dying. 48 * 49 * We could do the write part killably and then get the read 50 * lock unconditionally if it mattered, but see above on why 51 * this does the very simplistic conversion. 52 */ 53 up_read(&inode->i_rwsem); 54 down_write(&inode->i_rwsem); 55 56 /* 57 * Since we dropped the inode lock, we should do the 58 * DEADDIR test again. See 'iterate_dir()' below. 59 * 60 * Note that we don't need to re-do the f_pos games, 61 * since the file must be locked wrt f_pos anyway. 62 */ 63 ret = -ENOENT; 64 if (!IS_DEADDIR(inode)) 65 ret = iter(file, ctx); 66 67 downgrade_write(&inode->i_rwsem); 68 return ret; 69 } 70 EXPORT_SYMBOL(wrap_directory_iterator); 71 72 /* 73 * Note the "unsafe_put_user()" semantics: we goto a 74 * label for errors. 75 */ 76 #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ 77 char __user *dst = (_dst); \ 78 const char *src = (_src); \ 79 size_t len = (_len); \ 80 unsafe_put_user(0, dst+len, label); \ 81 unsafe_copy_to_user(dst, src, len, label); \ 82 } while (0) 83 84 85 int iterate_dir(struct file *file, struct dir_context *ctx) 86 { 87 struct inode *inode = file_inode(file); 88 int res = -ENOTDIR; 89 90 if (!file->f_op->iterate_shared) 91 goto out; 92 93 res = security_file_permission(file, MAY_READ); 94 if (res) 95 goto out; 96 97 res = fsnotify_file_perm(file, MAY_READ); 98 if (res) 99 goto out; 100 101 res = down_read_killable(&inode->i_rwsem); 102 if (res) 103 goto out; 104 105 res = -ENOENT; 106 if (!IS_DEADDIR(inode)) { 107 ctx->pos = file->f_pos; 108 res = file->f_op->iterate_shared(file, ctx); 109 file->f_pos = ctx->pos; 110 fsnotify_access(file); 111 file_accessed(file); 112 } 113 inode_unlock_shared(inode); 114 out: 115 return res; 116 } 117 EXPORT_SYMBOL(iterate_dir); 118 119 /* 120 * POSIX says that a dirent name cannot contain NULL or a '/'. 121 * 122 * It's not 100% clear what we should really do in this case. 123 * The filesystem is clearly corrupted, but returning a hard 124 * error means that you now don't see any of the other names 125 * either, so that isn't a perfect alternative. 126 * 127 * And if you return an error, what error do you use? Several 128 * filesystems seem to have decided on EUCLEAN being the error 129 * code for EFSCORRUPTED, and that may be the error to use. Or 130 * just EIO, which is perhaps more obvious to users. 131 * 132 * In order to see the other file names in the directory, the 133 * caller might want to make this a "soft" error: skip the 134 * entry, and return the error at the end instead. 135 * 136 * Note that this should likely do a "memchr(name, 0, len)" 137 * check too, since that would be filesystem corruption as 138 * well. However, that case can't actually confuse user space, 139 * which has to do a strlen() on the name anyway to find the 140 * filename length, and the above "soft error" worry means 141 * that it's probably better left alone until we have that 142 * issue clarified. 143 * 144 * Note the PATH_MAX check - it's arbitrary but the real 145 * kernel limit on a possible path component, not NAME_MAX, 146 * which is the technical standard limit. 147 */ 148 static int verify_dirent_name(const char *name, int len) 149 { 150 if (len <= 0 || len >= PATH_MAX) 151 return -EIO; 152 if (memchr(name, '/', len)) 153 return -EIO; 154 return 0; 155 } 156 157 /* 158 * Traditional linux readdir() handling.. 159 * 160 * "count=1" is a special case, meaning that the buffer is one 161 * dirent-structure in size and that the code can't handle more 162 * anyway. Thus the special "fillonedir()" function for that 163 * case (the low-level handlers don't need to care about this). 164 */ 165 166 #ifdef __ARCH_WANT_OLD_READDIR 167 168 struct old_linux_dirent { 169 unsigned long d_ino; 170 unsigned long d_offset; 171 unsigned short d_namlen; 172 char d_name[]; 173 }; 174 175 struct readdir_callback { 176 struct dir_context ctx; 177 struct old_linux_dirent __user * dirent; 178 int result; 179 }; 180 181 static bool fillonedir(struct dir_context *ctx, const char *name, int namlen, 182 loff_t offset, u64 ino, unsigned int d_type) 183 { 184 struct readdir_callback *buf = 185 container_of(ctx, struct readdir_callback, ctx); 186 struct old_linux_dirent __user * dirent; 187 unsigned long d_ino; 188 189 if (buf->result) 190 return false; 191 buf->result = verify_dirent_name(name, namlen); 192 if (buf->result) 193 return false; 194 d_ino = ino; 195 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 196 buf->result = -EOVERFLOW; 197 return false; 198 } 199 buf->result++; 200 dirent = buf->dirent; 201 if (!user_write_access_begin(dirent, 202 (unsigned long)(dirent->d_name + namlen + 1) - 203 (unsigned long)dirent)) 204 goto efault; 205 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 206 unsafe_put_user(offset, &dirent->d_offset, efault_end); 207 unsafe_put_user(namlen, &dirent->d_namlen, efault_end); 208 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 209 user_write_access_end(); 210 return true; 211 efault_end: 212 user_write_access_end(); 213 efault: 214 buf->result = -EFAULT; 215 return false; 216 } 217 218 SYSCALL_DEFINE3(old_readdir, unsigned int, fd, 219 struct old_linux_dirent __user *, dirent, unsigned int, count) 220 { 221 int error; 222 CLASS(fd_pos, f)(fd); 223 struct readdir_callback buf = { 224 .ctx.actor = fillonedir, 225 .ctx.count = 1, /* Hint to fs: just one entry. */ 226 .dirent = dirent 227 }; 228 229 if (fd_empty(f)) 230 return -EBADF; 231 232 error = iterate_dir(fd_file(f), &buf.ctx); 233 if (buf.result) 234 error = buf.result; 235 236 return error; 237 } 238 239 #endif /* __ARCH_WANT_OLD_READDIR */ 240 241 /* 242 * New, all-improved, singing, dancing, iBCS2-compliant getdents() 243 * interface. 244 */ 245 struct linux_dirent { 246 unsigned long d_ino; 247 unsigned long d_off; 248 unsigned short d_reclen; 249 char d_name[]; 250 }; 251 252 struct getdents_callback { 253 struct dir_context ctx; 254 struct linux_dirent __user * current_dir; 255 int prev_reclen; 256 int error; 257 }; 258 259 static bool filldir(struct dir_context *ctx, const char *name, int namlen, 260 loff_t offset, u64 ino, unsigned int d_type) 261 { 262 struct linux_dirent __user *dirent, *prev; 263 struct getdents_callback *buf = 264 container_of(ctx, struct getdents_callback, ctx); 265 unsigned long d_ino; 266 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, 267 sizeof(long)); 268 int prev_reclen; 269 unsigned int flags = d_type; 270 271 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); 272 d_type &= S_DT_MASK; 273 274 buf->error = verify_dirent_name(name, namlen); 275 if (unlikely(buf->error)) 276 return false; 277 buf->error = -EINVAL; /* only used if we fail.. */ 278 if (reclen > ctx->count) 279 return false; 280 d_ino = ino; 281 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 282 buf->error = -EOVERFLOW; 283 return false; 284 } 285 prev_reclen = buf->prev_reclen; 286 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) 287 return false; 288 dirent = buf->current_dir; 289 prev = (void __user *) dirent - prev_reclen; 290 if (!user_write_access_begin(prev, reclen + prev_reclen)) 291 goto efault; 292 293 /* This might be 'dirent->d_off', but if so it will get overwritten */ 294 unsafe_put_user(offset, &prev->d_off, efault_end); 295 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 296 unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 297 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); 298 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 299 user_write_access_end(); 300 301 buf->current_dir = (void __user *)dirent + reclen; 302 buf->prev_reclen = reclen; 303 ctx->count -= reclen; 304 return true; 305 efault_end: 306 user_write_access_end(); 307 efault: 308 buf->error = -EFAULT; 309 return false; 310 } 311 312 SYSCALL_DEFINE3(getdents, unsigned int, fd, 313 struct linux_dirent __user *, dirent, unsigned int, count) 314 { 315 CLASS(fd_pos, f)(fd); 316 struct getdents_callback buf = { 317 .ctx.actor = filldir, 318 .ctx.count = count, 319 .current_dir = dirent 320 }; 321 int error; 322 323 if (fd_empty(f)) 324 return -EBADF; 325 326 error = iterate_dir(fd_file(f), &buf.ctx); 327 if (error >= 0) 328 error = buf.error; 329 if (buf.prev_reclen) { 330 struct linux_dirent __user * lastdirent; 331 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; 332 333 if (put_user(buf.ctx.pos, &lastdirent->d_off)) 334 error = -EFAULT; 335 else 336 error = count - buf.ctx.count; 337 } 338 return error; 339 } 340 341 struct getdents_callback64 { 342 struct dir_context ctx; 343 struct linux_dirent64 __user * current_dir; 344 int prev_reclen; 345 int error; 346 }; 347 348 static bool filldir64(struct dir_context *ctx, const char *name, int namlen, 349 loff_t offset, u64 ino, unsigned int d_type) 350 { 351 struct linux_dirent64 __user *dirent, *prev; 352 struct getdents_callback64 *buf = 353 container_of(ctx, struct getdents_callback64, ctx); 354 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, 355 sizeof(u64)); 356 int prev_reclen; 357 unsigned int flags = d_type; 358 359 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); 360 d_type &= S_DT_MASK; 361 362 buf->error = verify_dirent_name(name, namlen); 363 if (unlikely(buf->error)) 364 return false; 365 buf->error = -EINVAL; /* only used if we fail.. */ 366 if (reclen > ctx->count) 367 return false; 368 prev_reclen = buf->prev_reclen; 369 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) 370 return false; 371 dirent = buf->current_dir; 372 prev = (void __user *)dirent - prev_reclen; 373 if (!user_write_access_begin(prev, reclen + prev_reclen)) 374 goto efault; 375 376 /* This might be 'dirent->d_off', but if so it will get overwritten */ 377 unsafe_put_user(offset, &prev->d_off, efault_end); 378 unsafe_put_user(ino, &dirent->d_ino, efault_end); 379 unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 380 unsafe_put_user(d_type, &dirent->d_type, efault_end); 381 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 382 user_write_access_end(); 383 384 buf->prev_reclen = reclen; 385 buf->current_dir = (void __user *)dirent + reclen; 386 ctx->count -= reclen; 387 return true; 388 389 efault_end: 390 user_write_access_end(); 391 efault: 392 buf->error = -EFAULT; 393 return false; 394 } 395 396 SYSCALL_DEFINE3(getdents64, unsigned int, fd, 397 struct linux_dirent64 __user *, dirent, unsigned int, count) 398 { 399 CLASS(fd_pos, f)(fd); 400 struct getdents_callback64 buf = { 401 .ctx.actor = filldir64, 402 .ctx.count = count, 403 .current_dir = dirent 404 }; 405 int error; 406 407 if (fd_empty(f)) 408 return -EBADF; 409 410 error = iterate_dir(fd_file(f), &buf.ctx); 411 if (error >= 0) 412 error = buf.error; 413 if (buf.prev_reclen) { 414 struct linux_dirent64 __user * lastdirent; 415 typeof(lastdirent->d_off) d_off = buf.ctx.pos; 416 417 lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; 418 if (put_user(d_off, &lastdirent->d_off)) 419 error = -EFAULT; 420 else 421 error = count - buf.ctx.count; 422 } 423 return error; 424 } 425 426 #ifdef CONFIG_COMPAT 427 struct compat_old_linux_dirent { 428 compat_ulong_t d_ino; 429 compat_ulong_t d_offset; 430 unsigned short d_namlen; 431 char d_name[]; 432 }; 433 434 struct compat_readdir_callback { 435 struct dir_context ctx; 436 struct compat_old_linux_dirent __user *dirent; 437 int result; 438 }; 439 440 static bool compat_fillonedir(struct dir_context *ctx, const char *name, 441 int namlen, loff_t offset, u64 ino, 442 unsigned int d_type) 443 { 444 struct compat_readdir_callback *buf = 445 container_of(ctx, struct compat_readdir_callback, ctx); 446 struct compat_old_linux_dirent __user *dirent; 447 compat_ulong_t d_ino; 448 449 if (buf->result) 450 return false; 451 buf->result = verify_dirent_name(name, namlen); 452 if (buf->result) 453 return false; 454 d_ino = ino; 455 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 456 buf->result = -EOVERFLOW; 457 return false; 458 } 459 buf->result++; 460 dirent = buf->dirent; 461 if (!user_write_access_begin(dirent, 462 (unsigned long)(dirent->d_name + namlen + 1) - 463 (unsigned long)dirent)) 464 goto efault; 465 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 466 unsafe_put_user(offset, &dirent->d_offset, efault_end); 467 unsafe_put_user(namlen, &dirent->d_namlen, efault_end); 468 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 469 user_write_access_end(); 470 return true; 471 efault_end: 472 user_write_access_end(); 473 efault: 474 buf->result = -EFAULT; 475 return false; 476 } 477 478 COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, 479 struct compat_old_linux_dirent __user *, dirent, unsigned int, count) 480 { 481 int error; 482 CLASS(fd_pos, f)(fd); 483 struct compat_readdir_callback buf = { 484 .ctx.actor = compat_fillonedir, 485 .ctx.count = 1, /* Hint to fs: just one entry. */ 486 .dirent = dirent 487 }; 488 489 if (fd_empty(f)) 490 return -EBADF; 491 492 error = iterate_dir(fd_file(f), &buf.ctx); 493 if (buf.result) 494 error = buf.result; 495 496 return error; 497 } 498 499 struct compat_linux_dirent { 500 compat_ulong_t d_ino; 501 compat_ulong_t d_off; 502 unsigned short d_reclen; 503 char d_name[]; 504 }; 505 506 struct compat_getdents_callback { 507 struct dir_context ctx; 508 struct compat_linux_dirent __user *current_dir; 509 int prev_reclen; 510 int error; 511 }; 512 513 static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen, 514 loff_t offset, u64 ino, unsigned int d_type) 515 { 516 struct compat_linux_dirent __user *dirent, *prev; 517 struct compat_getdents_callback *buf = 518 container_of(ctx, struct compat_getdents_callback, ctx); 519 compat_ulong_t d_ino; 520 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + 521 namlen + 2, sizeof(compat_long_t)); 522 int prev_reclen; 523 unsigned int flags = d_type; 524 525 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); 526 d_type &= S_DT_MASK; 527 528 buf->error = verify_dirent_name(name, namlen); 529 if (unlikely(buf->error)) 530 return false; 531 buf->error = -EINVAL; /* only used if we fail.. */ 532 if (reclen > ctx->count) 533 return false; 534 d_ino = ino; 535 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 536 buf->error = -EOVERFLOW; 537 return false; 538 } 539 prev_reclen = buf->prev_reclen; 540 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) 541 return false; 542 dirent = buf->current_dir; 543 prev = (void __user *) dirent - prev_reclen; 544 if (!user_write_access_begin(prev, reclen + prev_reclen)) 545 goto efault; 546 547 unsafe_put_user(offset, &prev->d_off, efault_end); 548 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 549 unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 550 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); 551 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 552 user_write_access_end(); 553 554 buf->prev_reclen = reclen; 555 buf->current_dir = (void __user *)dirent + reclen; 556 ctx->count -= reclen; 557 return true; 558 efault_end: 559 user_write_access_end(); 560 efault: 561 buf->error = -EFAULT; 562 return false; 563 } 564 565 COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, 566 struct compat_linux_dirent __user *, dirent, unsigned int, count) 567 { 568 CLASS(fd_pos, f)(fd); 569 struct compat_getdents_callback buf = { 570 .ctx.actor = compat_filldir, 571 .ctx.count = count, 572 .current_dir = dirent, 573 }; 574 int error; 575 576 if (fd_empty(f)) 577 return -EBADF; 578 579 error = iterate_dir(fd_file(f), &buf.ctx); 580 if (error >= 0) 581 error = buf.error; 582 if (buf.prev_reclen) { 583 struct compat_linux_dirent __user * lastdirent; 584 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; 585 586 if (put_user(buf.ctx.pos, &lastdirent->d_off)) 587 error = -EFAULT; 588 else 589 error = count - buf.ctx.count; 590 } 591 return error; 592 } 593 #endif 594