1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 FUSE: Filesystem in Userspace 4 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> 5 */ 6 7 8 #include "fuse_i.h" 9 #include <linux/iversion.h> 10 #include <linux/posix_acl.h> 11 #include <linux/pagemap.h> 12 #include <linux/highmem.h> 13 #include <linux/vmalloc.h> 14 15 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) 16 { 17 struct fuse_conn *fc = get_fuse_conn(dir); 18 struct fuse_inode *fi = get_fuse_inode(dir); 19 20 if (!fc->do_readdirplus) 21 return false; 22 if (!fc->readdirplus_auto) 23 return true; 24 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) 25 return true; 26 if (ctx->pos == 0) 27 return true; 28 return false; 29 } 30 31 static void fuse_add_dirent_to_cache(struct file *file, 32 struct fuse_dirent *dirent, loff_t pos) 33 { 34 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 35 size_t reclen = FUSE_DIRENT_SIZE(dirent); 36 pgoff_t index; 37 struct page *page; 38 loff_t size; 39 u64 version; 40 unsigned int offset; 41 void *addr; 42 43 /* Dirent doesn't fit in readdir cache page? Skip caching. */ 44 if (reclen > PAGE_SIZE) 45 return; 46 47 spin_lock(&fi->rdc.lock); 48 /* 49 * Is cache already completed? Or this entry does not go at the end of 50 * cache? 51 */ 52 if (fi->rdc.cached || pos != fi->rdc.pos) { 53 spin_unlock(&fi->rdc.lock); 54 return; 55 } 56 version = fi->rdc.version; 57 size = fi->rdc.size; 58 offset = offset_in_page(size); 59 index = size >> PAGE_SHIFT; 60 /* Dirent doesn't fit in current page? Jump to next page. */ 61 if (offset + reclen > PAGE_SIZE) { 62 index++; 63 offset = 0; 64 } 65 spin_unlock(&fi->rdc.lock); 66 67 if (offset) { 68 page = find_lock_page(file->f_mapping, index); 69 } else { 70 page = find_or_create_page(file->f_mapping, index, 71 mapping_gfp_mask(file->f_mapping)); 72 } 73 if (!page) 74 return; 75 76 spin_lock(&fi->rdc.lock); 77 /* Raced with another readdir */ 78 if (fi->rdc.version != version || fi->rdc.size != size || 79 WARN_ON(fi->rdc.pos != pos)) 80 goto unlock; 81 82 addr = kmap_local_page(page); 83 if (!offset) { 84 clear_page(addr); 85 SetPageUptodate(page); 86 } 87 memcpy(addr + offset, dirent, reclen); 88 kunmap_local(addr); 89 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; 90 fi->rdc.pos = dirent->off; 91 unlock: 92 spin_unlock(&fi->rdc.lock); 93 unlock_page(page); 94 put_page(page); 95 } 96 97 static void fuse_readdir_cache_end(struct file *file, loff_t pos) 98 { 99 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 100 loff_t end; 101 102 spin_lock(&fi->rdc.lock); 103 /* does cache end position match current position? */ 104 if (fi->rdc.pos != pos) { 105 spin_unlock(&fi->rdc.lock); 106 return; 107 } 108 109 fi->rdc.cached = true; 110 end = ALIGN(fi->rdc.size, PAGE_SIZE); 111 spin_unlock(&fi->rdc.lock); 112 113 /* truncate unused tail of cache */ 114 truncate_inode_pages(file->f_mapping, end); 115 } 116 117 static bool fuse_emit(struct file *file, struct dir_context *ctx, 118 struct fuse_dirent *dirent) 119 { 120 struct fuse_file *ff = file->private_data; 121 122 if (ff->open_flags & FOPEN_CACHE_DIR) 123 fuse_add_dirent_to_cache(file, dirent, ctx->pos); 124 125 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, 126 dirent->type | FILLDIR_FLAG_NOINTR); 127 } 128 129 static int parse_dirfile(char *buf, size_t nbytes, struct file *file, 130 struct dir_context *ctx) 131 { 132 while (nbytes >= FUSE_NAME_OFFSET) { 133 struct fuse_dirent *dirent = (struct fuse_dirent *) buf; 134 size_t reclen = FUSE_DIRENT_SIZE(dirent); 135 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 136 return -EIO; 137 if (reclen > nbytes) 138 break; 139 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 140 return -EIO; 141 142 if (!fuse_emit(file, ctx, dirent)) 143 break; 144 145 buf += reclen; 146 nbytes -= reclen; 147 ctx->pos = dirent->off; 148 } 149 150 return 0; 151 } 152 153 static int fuse_direntplus_link(struct file *file, 154 struct fuse_direntplus *direntplus, 155 u64 attr_version, u64 evict_ctr) 156 { 157 struct fuse_entry_out *o = &direntplus->entry_out; 158 struct fuse_dirent *dirent = &direntplus->dirent; 159 struct dentry *parent = file->f_path.dentry; 160 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); 161 struct dentry *dentry; 162 struct dentry *alias; 163 struct inode *dir = d_inode(parent); 164 struct fuse_conn *fc; 165 struct inode *inode; 166 int epoch; 167 168 if (!o->nodeid) { 169 /* 170 * Unlike in the case of fuse_lookup, zero nodeid does not mean 171 * ENOENT. Instead, it only means the userspace filesystem did 172 * not want to return attributes/handle for this entry. 173 * 174 * So do nothing. 175 */ 176 return 0; 177 } 178 179 if (name.name[0] == '.') { 180 /* 181 * We could potentially refresh the attributes of the directory 182 * and its parent? 183 */ 184 if (name.len == 1) 185 return 0; 186 if (name.name[1] == '.' && name.len == 2) 187 return 0; 188 } 189 190 if (invalid_nodeid(o->nodeid)) 191 return -EIO; 192 if (fuse_invalid_attr(&o->attr)) 193 return -EIO; 194 195 fc = get_fuse_conn(dir); 196 epoch = atomic_read(&fc->epoch); 197 198 name.hash = full_name_hash(parent, name.name, name.len); 199 dentry = d_lookup(parent, &name); 200 if (!dentry) { 201 retry: 202 dentry = d_alloc_parallel(parent, &name); 203 if (IS_ERR(dentry)) 204 return PTR_ERR(dentry); 205 } 206 if (!d_in_lookup(dentry)) { 207 struct fuse_inode *fi; 208 inode = d_inode(dentry); 209 if (inode && get_node_id(inode) != o->nodeid) 210 inode = NULL; 211 if (!inode || 212 fuse_stale_inode(inode, o->generation, &o->attr)) { 213 if (inode) 214 fuse_make_bad(inode); 215 d_invalidate(dentry); 216 dput(dentry); 217 goto retry; 218 } 219 if (fuse_is_bad(inode)) { 220 dput(dentry); 221 return -EIO; 222 } 223 224 fi = get_fuse_inode(inode); 225 spin_lock(&fi->lock); 226 fi->nlookup++; 227 spin_unlock(&fi->lock); 228 229 forget_all_cached_acls(inode); 230 fuse_change_attributes(inode, &o->attr, NULL, 231 ATTR_TIMEOUT(o), 232 attr_version); 233 /* 234 * The other branch comes via fuse_iget() 235 * which bumps nlookup inside 236 */ 237 } else { 238 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, 239 &o->attr, ATTR_TIMEOUT(o), 240 attr_version, evict_ctr); 241 if (!inode) 242 inode = ERR_PTR(-ENOMEM); 243 244 alias = d_splice_alias(inode, dentry); 245 d_lookup_done(dentry); 246 if (alias) { 247 dput(dentry); 248 dentry = alias; 249 } 250 if (IS_ERR(dentry)) { 251 if (!IS_ERR(inode)) { 252 struct fuse_inode *fi = get_fuse_inode(inode); 253 254 spin_lock(&fi->lock); 255 fi->nlookup--; 256 spin_unlock(&fi->lock); 257 } 258 return PTR_ERR(dentry); 259 } 260 } 261 if (fc->readdirplus_auto) 262 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); 263 dentry->d_time = epoch; 264 fuse_change_entry_timeout(dentry, o); 265 266 dput(dentry); 267 return 0; 268 } 269 270 static void fuse_force_forget(struct file *file, u64 nodeid) 271 { 272 struct inode *inode = file_inode(file); 273 struct fuse_mount *fm = get_fuse_mount(inode); 274 struct fuse_forget_in inarg; 275 FUSE_ARGS(args); 276 277 memset(&inarg, 0, sizeof(inarg)); 278 inarg.nlookup = 1; 279 args.opcode = FUSE_FORGET; 280 args.nodeid = nodeid; 281 args.in_numargs = 1; 282 args.in_args[0].size = sizeof(inarg); 283 args.in_args[0].value = &inarg; 284 args.force = true; 285 args.noreply = true; 286 287 fuse_simple_request(fm, &args); 288 /* ignore errors */ 289 } 290 291 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, 292 struct dir_context *ctx, u64 attr_version, 293 u64 evict_ctr) 294 { 295 struct fuse_direntplus *direntplus; 296 struct fuse_dirent *dirent; 297 size_t reclen; 298 int over = 0; 299 int ret; 300 301 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { 302 direntplus = (struct fuse_direntplus *) buf; 303 dirent = &direntplus->dirent; 304 reclen = FUSE_DIRENTPLUS_SIZE(direntplus); 305 306 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 307 return -EIO; 308 if (reclen > nbytes) 309 break; 310 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 311 return -EIO; 312 313 if (!over) { 314 /* We fill entries into dstbuf only as much as 315 it can hold. But we still continue iterating 316 over remaining entries to link them. If not, 317 we need to send a FORGET for each of those 318 which we did not link. 319 */ 320 over = !fuse_emit(file, ctx, dirent); 321 if (!over) 322 ctx->pos = dirent->off; 323 } 324 325 buf += reclen; 326 nbytes -= reclen; 327 328 ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); 329 if (ret) 330 fuse_force_forget(file, direntplus->entry_out.nodeid); 331 } 332 333 return 0; 334 } 335 336 static struct page **fuse_readdir_alloc_buf(struct fuse_args_pages *ap, size_t *bufsize) 337 { 338 unsigned int i, nr_alloc, nr_pages = DIV_ROUND_UP(*bufsize, PAGE_SIZE); 339 struct page **pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); 340 341 if (!pages) 342 return NULL; 343 344 nr_alloc = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); 345 if (!nr_alloc) 346 goto free_array; 347 348 if (nr_alloc < nr_pages) { 349 nr_pages = nr_alloc; 350 *bufsize = (size_t) nr_pages << PAGE_SHIFT; 351 } 352 353 ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs); 354 if (!ap->folios) 355 goto release_pages; 356 357 for (i = 0; i < nr_pages; i++) { 358 ap->folios[i] = page_folio(pages[i]); 359 ap->descs[i].length = min_t(size_t, *bufsize - (size_t)i * PAGE_SIZE, PAGE_SIZE); 360 } 361 ap->num_folios = nr_pages; 362 ap->args.out_pages = true; 363 364 return pages; 365 366 release_pages: 367 release_pages(pages, nr_pages); 368 free_array: 369 kfree(pages); 370 return NULL; 371 } 372 373 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) 374 { 375 int plus; 376 ssize_t res; 377 struct inode *inode = file_inode(file); 378 struct fuse_mount *fm = get_fuse_mount(inode); 379 struct fuse_conn *fc = fm->fc; 380 struct fuse_io_args ia = {}; 381 struct fuse_args_pages *ap = &ia.ap; 382 void *buf; 383 size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT); 384 u64 attr_version = 0, evict_ctr = 0; 385 bool locked; 386 struct page **pages = fuse_readdir_alloc_buf(ap, &bufsize); 387 388 if (!pages) 389 return -ENOMEM; 390 391 plus = fuse_use_readdirplus(inode, ctx); 392 if (plus) { 393 attr_version = fuse_get_attr_version(fm->fc); 394 evict_ctr = fuse_get_evict_ctr(fm->fc); 395 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS); 396 } else { 397 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); 398 } 399 locked = fuse_lock_inode(inode); 400 res = fuse_simple_request(fm, &ap->args); 401 fuse_unlock_inode(inode, locked); 402 if (res < 0) 403 goto out; 404 405 if (!res) { 406 struct fuse_file *ff = file->private_data; 407 408 if (ff->open_flags & FOPEN_CACHE_DIR) 409 fuse_readdir_cache_end(file, ctx->pos); 410 goto out; 411 } 412 413 buf = vm_map_ram(pages, ap->num_folios, -1); 414 if (!buf) { 415 res = -ENOMEM; 416 } else { 417 if (plus) 418 res = parse_dirplusfile(buf, res, file, ctx, attr_version, evict_ctr); 419 else 420 res = parse_dirfile(buf, res, file, ctx); 421 422 vm_unmap_ram(buf, ap->num_folios); 423 } 424 out: 425 kfree(ap->folios); 426 release_pages(pages, ap->num_folios); 427 kfree(pages); 428 429 fuse_invalidate_atime(inode); 430 431 return res; 432 } 433 434 enum fuse_parse_result { 435 FOUND_ERR = -1, 436 FOUND_NONE = 0, 437 FOUND_SOME, 438 FOUND_ALL, 439 }; 440 441 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, 442 void *addr, unsigned int size, 443 struct dir_context *ctx) 444 { 445 unsigned int offset = offset_in_page(ff->readdir.cache_off); 446 enum fuse_parse_result res = FOUND_NONE; 447 448 WARN_ON(offset >= size); 449 450 for (;;) { 451 struct fuse_dirent *dirent = addr + offset; 452 unsigned int nbytes = size - offset; 453 size_t reclen; 454 455 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) 456 break; 457 458 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ 459 460 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) 461 return FOUND_ERR; 462 if (WARN_ON(reclen > nbytes)) 463 return FOUND_ERR; 464 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL)) 465 return FOUND_ERR; 466 467 if (ff->readdir.pos == ctx->pos) { 468 res = FOUND_SOME; 469 if (!dir_emit(ctx, dirent->name, dirent->namelen, 470 dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR)) 471 return FOUND_ALL; 472 ctx->pos = dirent->off; 473 } 474 ff->readdir.pos = dirent->off; 475 ff->readdir.cache_off += reclen; 476 477 offset += reclen; 478 } 479 480 return res; 481 } 482 483 static void fuse_rdc_reset(struct inode *inode) 484 { 485 struct fuse_inode *fi = get_fuse_inode(inode); 486 487 fi->rdc.cached = false; 488 fi->rdc.version++; 489 fi->rdc.size = 0; 490 fi->rdc.pos = 0; 491 fi->rdc.epoch = 0; 492 } 493 494 #define UNCACHED 1 495 496 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) 497 { 498 struct fuse_file *ff = file->private_data; 499 struct inode *inode = file_inode(file); 500 struct fuse_conn *fc = get_fuse_conn(inode); 501 struct fuse_inode *fi = get_fuse_inode(inode); 502 enum fuse_parse_result res; 503 pgoff_t index; 504 unsigned int size; 505 struct page *page; 506 void *addr; 507 508 /* Seeked? If so, reset the cache stream */ 509 if (ff->readdir.pos != ctx->pos) { 510 ff->readdir.pos = 0; 511 ff->readdir.cache_off = 0; 512 } 513 514 /* 515 * We're just about to start reading into the cache or reading the 516 * cache; both cases require an up-to-date mtime value. 517 */ 518 if (!ctx->pos && fc->auto_inval_data) { 519 int err = fuse_update_attributes(inode, file, STATX_MTIME); 520 521 if (err) 522 return err; 523 } 524 525 retry: 526 spin_lock(&fi->rdc.lock); 527 retry_locked: 528 if (!fi->rdc.cached) { 529 /* Starting cache? Set cache mtime. */ 530 if (!ctx->pos && !fi->rdc.size) { 531 fi->rdc.mtime = inode_get_mtime(inode); 532 fi->rdc.iversion = inode_query_iversion(inode); 533 fi->rdc.epoch = atomic_read(&fc->epoch); 534 } 535 spin_unlock(&fi->rdc.lock); 536 return UNCACHED; 537 } 538 /* 539 * When at the beginning of the directory (i.e. just after opendir(3) or 540 * rewinddir(3)), then need to check whether directory contents have 541 * changed, and reset the cache if so. 542 */ 543 if (!ctx->pos) { 544 struct timespec64 mtime = inode_get_mtime(inode); 545 546 if (inode_peek_iversion(inode) != fi->rdc.iversion || 547 !timespec64_equal(&fi->rdc.mtime, &mtime) || 548 fi->rdc.epoch != atomic_read(&fc->epoch)) { 549 fuse_rdc_reset(inode); 550 goto retry_locked; 551 } 552 } 553 554 /* 555 * If cache version changed since the last getdents() call, then reset 556 * the cache stream. 557 */ 558 if (ff->readdir.version != fi->rdc.version) { 559 ff->readdir.pos = 0; 560 ff->readdir.cache_off = 0; 561 } 562 /* 563 * If at the beginning of the cache, than reset version to 564 * current. 565 */ 566 if (ff->readdir.pos == 0) 567 ff->readdir.version = fi->rdc.version; 568 569 WARN_ON(fi->rdc.size < ff->readdir.cache_off); 570 571 index = ff->readdir.cache_off >> PAGE_SHIFT; 572 573 if (index == (fi->rdc.size >> PAGE_SHIFT)) 574 size = offset_in_page(fi->rdc.size); 575 else 576 size = PAGE_SIZE; 577 spin_unlock(&fi->rdc.lock); 578 579 /* EOF? */ 580 if (offset_in_page(ff->readdir.cache_off) == size) 581 return 0; 582 583 page = find_get_page_flags(file->f_mapping, index, 584 FGP_ACCESSED | FGP_LOCK); 585 /* Page gone missing, then re-added to cache, but not initialized? */ 586 if (page && !PageUptodate(page)) { 587 unlock_page(page); 588 put_page(page); 589 page = NULL; 590 } 591 spin_lock(&fi->rdc.lock); 592 if (!page) { 593 /* 594 * Uh-oh: page gone missing, cache is useless 595 */ 596 if (fi->rdc.version == ff->readdir.version) 597 fuse_rdc_reset(inode); 598 goto retry_locked; 599 } 600 601 /* Make sure it's still the same version after getting the page. */ 602 if (ff->readdir.version != fi->rdc.version) { 603 spin_unlock(&fi->rdc.lock); 604 unlock_page(page); 605 put_page(page); 606 goto retry; 607 } 608 spin_unlock(&fi->rdc.lock); 609 610 /* 611 * Contents of the page are now protected against changing by holding 612 * the page lock. 613 */ 614 addr = kmap_local_page(page); 615 res = fuse_parse_cache(ff, addr, size, ctx); 616 kunmap_local(addr); 617 unlock_page(page); 618 put_page(page); 619 620 if (res == FOUND_ERR) 621 return -EIO; 622 623 if (res == FOUND_ALL) 624 return 0; 625 626 if (size == PAGE_SIZE) { 627 /* We hit end of page: skip to next page. */ 628 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE); 629 goto retry; 630 } 631 632 /* 633 * End of cache reached. If found position, then we are done, otherwise 634 * need to fall back to uncached, since the position we were looking for 635 * wasn't in the cache. 636 */ 637 return res == FOUND_SOME ? 0 : UNCACHED; 638 } 639 640 int fuse_readdir(struct file *file, struct dir_context *ctx) 641 { 642 struct fuse_file *ff = file->private_data; 643 struct inode *inode = file_inode(file); 644 int err; 645 646 if (fuse_is_bad(inode)) 647 return -EIO; 648 649 err = UNCACHED; 650 if (ff->open_flags & FOPEN_CACHE_DIR) 651 err = fuse_readdir_cached(file, ctx); 652 if (err == UNCACHED) 653 err = fuse_readdir_uncached(file, ctx); 654 655 return err; 656 } 657