1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 10 #include "fuse_i.h" 11 #include <linux/iversion.h> 12 #include <linux/posix_acl.h> 13 #include <linux/pagemap.h> 14 #include <linux/highmem.h> 15 16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) 17 { 18 struct fuse_conn *fc = get_fuse_conn(dir); 19 struct fuse_inode *fi = get_fuse_inode(dir); 20 21 if (!fc->do_readdirplus) 22 return false; 23 if (!fc->readdirplus_auto) 24 return true; 25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) 26 return true; 27 if (ctx->pos == 0) 28 return true; 29 return false; 30 } 31 32 static void fuse_add_dirent_to_cache(struct file *file, 33 struct fuse_dirent *dirent, loff_t pos) 34 { 35 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 36 size_t reclen = FUSE_DIRENT_SIZE(dirent); 37 pgoff_t index; 38 struct page *page; 39 loff_t size; 40 u64 version; 41 unsigned int offset; 42 void *addr; 43 44 spin_lock(&fi->rdc.lock); 45 /* 46 * Is cache already completed? Or this entry does not go at the end of 47 * cache? 48 */ 49 if (fi->rdc.cached || pos != fi->rdc.pos) { 50 spin_unlock(&fi->rdc.lock); 51 return; 52 } 53 version = fi->rdc.version; 54 size = fi->rdc.size; 55 offset = size & ~PAGE_MASK; 56 index = size >> PAGE_SHIFT; 57 /* Dirent doesn't fit in current page? Jump to next page. */ 58 if (offset + reclen > PAGE_SIZE) { 59 index++; 60 offset = 0; 61 } 62 spin_unlock(&fi->rdc.lock); 63 64 if (offset) { 65 page = find_lock_page(file->f_mapping, index); 66 } else { 67 page = find_or_create_page(file->f_mapping, index, 68 mapping_gfp_mask(file->f_mapping)); 69 } 70 if (!page) 71 return; 72 73 spin_lock(&fi->rdc.lock); 74 /* Raced with another readdir */ 75 if (fi->rdc.version != version || fi->rdc.size != size || 76 WARN_ON(fi->rdc.pos != pos)) 77 goto unlock; 78 79 addr = kmap_local_page(page); 80 if (!offset) { 81 clear_page(addr); 82 SetPageUptodate(page); 83 } 84 memcpy(addr + offset, dirent, reclen); 85 kunmap_local(addr); 86 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; 87 fi->rdc.pos = dirent->off; 88 unlock: 89 spin_unlock(&fi->rdc.lock); 90 unlock_page(page); 91 put_page(page); 92 } 93 94 static void fuse_readdir_cache_end(struct file *file, loff_t pos) 95 { 96 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 97 loff_t end; 98 99 spin_lock(&fi->rdc.lock); 100 /* does cache end position match current position? */ 101 if (fi->rdc.pos != pos) { 102 spin_unlock(&fi->rdc.lock); 103 return; 104 } 105 106 fi->rdc.cached = true; 107 end = ALIGN(fi->rdc.size, PAGE_SIZE); 108 spin_unlock(&fi->rdc.lock); 109 110 /* truncate unused tail of cache */ 111 truncate_inode_pages(file->f_mapping, end); 112 } 113 114 static bool fuse_emit(struct file *file, struct dir_context *ctx, 115 struct fuse_dirent *dirent) 116 { 117 struct fuse_file *ff = file->private_data; 118 119 if (ff->open_flags & FOPEN_CACHE_DIR) 120 fuse_add_dirent_to_cache(file, dirent, ctx->pos); 121 122 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, 123 dirent->type); 124 } 125 126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file, 127 struct dir_context *ctx) 128 { 129 while (nbytes >= FUSE_NAME_OFFSET) { 130 struct fuse_dirent *dirent = (struct fuse_dirent *) buf; 131 size_t reclen = FUSE_DIRENT_SIZE(dirent); 132 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 133 return -EIO; 134 if (reclen > nbytes) 135 break; 136 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 137 return -EIO; 138 139 if (!fuse_emit(file, ctx, dirent)) 140 break; 141 142 buf += reclen; 143 nbytes -= reclen; 144 ctx->pos = dirent->off; 145 } 146 147 return 0; 148 } 149 150 static int fuse_direntplus_link(struct file *file, 151 struct fuse_direntplus *direntplus, 152 u64 attr_version, u64 evict_ctr) 153 { 154 struct fuse_entry_out *o = &direntplus->entry_out; 155 struct fuse_dirent *dirent = &direntplus->dirent; 156 struct dentry *parent = file->f_path.dentry; 157 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); 158 struct dentry *dentry; 159 struct dentry *alias; 160 struct inode *dir = d_inode(parent); 161 struct fuse_conn *fc; 162 struct inode *inode; 163 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 164 165 if (!o->nodeid) { 166 /* 167 * Unlike in the case of fuse_lookup, zero nodeid does not mean 168 * ENOENT. Instead, it only means the userspace filesystem did 169 * not want to return attributes/handle for this entry. 170 * 171 * So do nothing. 172 */ 173 return 0; 174 } 175 176 if (name.name[0] == '.') { 177 /* 178 * We could potentially refresh the attributes of the directory 179 * and its parent? 180 */ 181 if (name.len == 1) 182 return 0; 183 if (name.name[1] == '.' && name.len == 2) 184 return 0; 185 } 186 187 if (invalid_nodeid(o->nodeid)) 188 return -EIO; 189 if (fuse_invalid_attr(&o->attr)) 190 return -EIO; 191 192 fc = get_fuse_conn(dir); 193 194 name.hash = full_name_hash(parent, name.name, name.len); 195 dentry = d_lookup(parent, &name); 196 if (!dentry) { 197 retry: 198 dentry = d_alloc_parallel(parent, &name, &wq); 199 if (IS_ERR(dentry)) 200 return PTR_ERR(dentry); 201 } 202 if (!d_in_lookup(dentry)) { 203 struct fuse_inode *fi; 204 inode = d_inode(dentry); 205 if (inode && get_node_id(inode) != o->nodeid) 206 inode = NULL; 207 if (!inode || 208 fuse_stale_inode(inode, o->generation, &o->attr)) { 209 if (inode) 210 fuse_make_bad(inode); 211 d_invalidate(dentry); 212 dput(dentry); 213 goto retry; 214 } 215 if (fuse_is_bad(inode)) { 216 dput(dentry); 217 return -EIO; 218 } 219 220 fi = get_fuse_inode(inode); 221 spin_lock(&fi->lock); 222 fi->nlookup++; 223 spin_unlock(&fi->lock); 224 225 forget_all_cached_acls(inode); 226 fuse_change_attributes(inode, &o->attr, NULL, 227 ATTR_TIMEOUT(o), 228 attr_version); 229 /* 230 * The other branch comes via fuse_iget() 231 * which bumps nlookup inside 232 */ 233 } else { 234 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, 235 &o->attr, ATTR_TIMEOUT(o), 236 attr_version, evict_ctr); 237 if (!inode) 238 inode = ERR_PTR(-ENOMEM); 239 240 alias = d_splice_alias(inode, dentry); 241 d_lookup_done(dentry); 242 if (alias) { 243 dput(dentry); 244 dentry = alias; 245 } 246 if (IS_ERR(dentry)) { 247 if (!IS_ERR(inode)) { 248 struct fuse_inode *fi = get_fuse_inode(inode); 249 250 spin_lock(&fi->lock); 251 fi->nlookup--; 252 spin_unlock(&fi->lock); 253 } 254 return PTR_ERR(dentry); 255 } 256 } 257 if (fc->readdirplus_auto) 258 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); 259 fuse_change_entry_timeout(dentry, o); 260 261 dput(dentry); 262 return 0; 263 } 264 265 static void fuse_force_forget(struct file *file, u64 nodeid) 266 { 267 struct inode *inode = file_inode(file); 268 struct fuse_mount *fm = get_fuse_mount(inode); 269 struct fuse_forget_in inarg; 270 FUSE_ARGS(args); 271 272 memset(&inarg, 0, sizeof(inarg)); 273 inarg.nlookup = 1; 274 args.opcode = FUSE_FORGET; 275 args.nodeid = nodeid; 276 args.in_numargs = 1; 277 args.in_args[0].size = sizeof(inarg); 278 args.in_args[0].value = &inarg; 279 args.force = true; 280 args.noreply = true; 281 282 fuse_simple_request(fm, &args); 283 /* ignore errors */ 284 } 285 286 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, 287 struct dir_context *ctx, u64 attr_version, 288 u64 evict_ctr) 289 { 290 struct fuse_direntplus *direntplus; 291 struct fuse_dirent *dirent; 292 size_t reclen; 293 int over = 0; 294 int ret; 295 296 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { 297 direntplus = (struct fuse_direntplus *) buf; 298 dirent = &direntplus->dirent; 299 reclen = FUSE_DIRENTPLUS_SIZE(direntplus); 300 301 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 302 return -EIO; 303 if (reclen > nbytes) 304 break; 305 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 306 return -EIO; 307 308 if (!over) { 309 /* We fill entries into dstbuf only as much as 310 it can hold. But we still continue iterating 311 over remaining entries to link them. If not, 312 we need to send a FORGET for each of those 313 which we did not link. 314 */ 315 over = !fuse_emit(file, ctx, dirent); 316 if (!over) 317 ctx->pos = dirent->off; 318 } 319 320 buf += reclen; 321 nbytes -= reclen; 322 323 ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); 324 if (ret) 325 fuse_force_forget(file, direntplus->entry_out.nodeid); 326 } 327 328 return 0; 329 } 330 331 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) 332 { 333 int plus; 334 ssize_t res; 335 struct folio *folio; 336 struct inode *inode = file_inode(file); 337 struct fuse_mount *fm = get_fuse_mount(inode); 338 struct fuse_io_args ia = {}; 339 struct fuse_args_pages *ap = &ia.ap; 340 struct fuse_folio_desc desc = { .length = PAGE_SIZE }; 341 u64 attr_version = 0, evict_ctr = 0; 342 bool locked; 343 344 folio = folio_alloc(GFP_KERNEL, 0); 345 if (!folio) 346 return -ENOMEM; 347 348 plus = fuse_use_readdirplus(inode, ctx); 349 ap->args.out_pages = true; 350 ap->num_folios = 1; 351 ap->folios = &folio; 352 ap->descs = &desc; 353 if (plus) { 354 attr_version = fuse_get_attr_version(fm->fc); 355 evict_ctr = fuse_get_evict_ctr(fm->fc); 356 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, 357 FUSE_READDIRPLUS); 358 } else { 359 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, 360 FUSE_READDIR); 361 } 362 locked = fuse_lock_inode(inode); 363 res = fuse_simple_request(fm, &ap->args); 364 fuse_unlock_inode(inode, locked); 365 if (res >= 0) { 366 if (!res) { 367 struct fuse_file *ff = file->private_data; 368 369 if (ff->open_flags & FOPEN_CACHE_DIR) 370 fuse_readdir_cache_end(file, ctx->pos); 371 } else if (plus) { 372 res = parse_dirplusfile(folio_address(folio), res, 373 file, ctx, attr_version, 374 evict_ctr); 375 } else { 376 res = parse_dirfile(folio_address(folio), res, file, 377 ctx); 378 } 379 } 380 381 folio_put(folio); 382 fuse_invalidate_atime(inode); 383 return res; 384 } 385 386 enum fuse_parse_result { 387 FOUND_ERR = -1, 388 FOUND_NONE = 0, 389 FOUND_SOME, 390 FOUND_ALL, 391 }; 392 393 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, 394 void *addr, unsigned int size, 395 struct dir_context *ctx) 396 { 397 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK; 398 enum fuse_parse_result res = FOUND_NONE; 399 400 WARN_ON(offset >= size); 401 402 for (;;) { 403 struct fuse_dirent *dirent = addr + offset; 404 unsigned int nbytes = size - offset; 405 size_t reclen; 406 407 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) 408 break; 409 410 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ 411 412 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) 413 return FOUND_ERR; 414 if (WARN_ON(reclen > nbytes)) 415 return FOUND_ERR; 416 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL)) 417 return FOUND_ERR; 418 419 if (ff->readdir.pos == ctx->pos) { 420 res = FOUND_SOME; 421 if (!dir_emit(ctx, dirent->name, dirent->namelen, 422 dirent->ino, dirent->type)) 423 return FOUND_ALL; 424 ctx->pos = dirent->off; 425 } 426 ff->readdir.pos = dirent->off; 427 ff->readdir.cache_off += reclen; 428 429 offset += reclen; 430 } 431 432 return res; 433 } 434 435 static void fuse_rdc_reset(struct inode *inode) 436 { 437 struct fuse_inode *fi = get_fuse_inode(inode); 438 439 fi->rdc.cached = false; 440 fi->rdc.version++; 441 fi->rdc.size = 0; 442 fi->rdc.pos = 0; 443 } 444 445 #define UNCACHED 1 446 447 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) 448 { 449 struct fuse_file *ff = file->private_data; 450 struct inode *inode = file_inode(file); 451 struct fuse_conn *fc = get_fuse_conn(inode); 452 struct fuse_inode *fi = get_fuse_inode(inode); 453 enum fuse_parse_result res; 454 pgoff_t index; 455 unsigned int size; 456 struct page *page; 457 void *addr; 458 459 /* Seeked? If so, reset the cache stream */ 460 if (ff->readdir.pos != ctx->pos) { 461 ff->readdir.pos = 0; 462 ff->readdir.cache_off = 0; 463 } 464 465 /* 466 * We're just about to start reading into the cache or reading the 467 * cache; both cases require an up-to-date mtime value. 468 */ 469 if (!ctx->pos && fc->auto_inval_data) { 470 int err = fuse_update_attributes(inode, file, STATX_MTIME); 471 472 if (err) 473 return err; 474 } 475 476 retry: 477 spin_lock(&fi->rdc.lock); 478 retry_locked: 479 if (!fi->rdc.cached) { 480 /* Starting cache? Set cache mtime. */ 481 if (!ctx->pos && !fi->rdc.size) { 482 fi->rdc.mtime = inode_get_mtime(inode); 483 fi->rdc.iversion = inode_query_iversion(inode); 484 } 485 spin_unlock(&fi->rdc.lock); 486 return UNCACHED; 487 } 488 /* 489 * When at the beginning of the directory (i.e. just after opendir(3) or 490 * rewinddir(3)), then need to check whether directory contents have 491 * changed, and reset the cache if so. 492 */ 493 if (!ctx->pos) { 494 struct timespec64 mtime = inode_get_mtime(inode); 495 496 if (inode_peek_iversion(inode) != fi->rdc.iversion || 497 !timespec64_equal(&fi->rdc.mtime, &mtime)) { 498 fuse_rdc_reset(inode); 499 goto retry_locked; 500 } 501 } 502 503 /* 504 * If cache version changed since the last getdents() call, then reset 505 * the cache stream. 506 */ 507 if (ff->readdir.version != fi->rdc.version) { 508 ff->readdir.pos = 0; 509 ff->readdir.cache_off = 0; 510 } 511 /* 512 * If at the beginning of the cache, than reset version to 513 * current. 514 */ 515 if (ff->readdir.pos == 0) 516 ff->readdir.version = fi->rdc.version; 517 518 WARN_ON(fi->rdc.size < ff->readdir.cache_off); 519 520 index = ff->readdir.cache_off >> PAGE_SHIFT; 521 522 if (index == (fi->rdc.size >> PAGE_SHIFT)) 523 size = fi->rdc.size & ~PAGE_MASK; 524 else 525 size = PAGE_SIZE; 526 spin_unlock(&fi->rdc.lock); 527 528 /* EOF? */ 529 if ((ff->readdir.cache_off & ~PAGE_MASK) == size) 530 return 0; 531 532 page = find_get_page_flags(file->f_mapping, index, 533 FGP_ACCESSED | FGP_LOCK); 534 /* Page gone missing, then re-added to cache, but not initialized? */ 535 if (page && !PageUptodate(page)) { 536 unlock_page(page); 537 put_page(page); 538 page = NULL; 539 } 540 spin_lock(&fi->rdc.lock); 541 if (!page) { 542 /* 543 * Uh-oh: page gone missing, cache is useless 544 */ 545 if (fi->rdc.version == ff->readdir.version) 546 fuse_rdc_reset(inode); 547 goto retry_locked; 548 } 549 550 /* Make sure it's still the same version after getting the page. */ 551 if (ff->readdir.version != fi->rdc.version) { 552 spin_unlock(&fi->rdc.lock); 553 unlock_page(page); 554 put_page(page); 555 goto retry; 556 } 557 spin_unlock(&fi->rdc.lock); 558 559 /* 560 * Contents of the page are now protected against changing by holding 561 * the page lock. 562 */ 563 addr = kmap_local_page(page); 564 res = fuse_parse_cache(ff, addr, size, ctx); 565 kunmap_local(addr); 566 unlock_page(page); 567 put_page(page); 568 569 if (res == FOUND_ERR) 570 return -EIO; 571 572 if (res == FOUND_ALL) 573 return 0; 574 575 if (size == PAGE_SIZE) { 576 /* We hit end of page: skip to next page. */ 577 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE); 578 goto retry; 579 } 580 581 /* 582 * End of cache reached. If found position, then we are done, otherwise 583 * need to fall back to uncached, since the position we were looking for 584 * wasn't in the cache. 585 */ 586 return res == FOUND_SOME ? 0 : UNCACHED; 587 } 588 589 int fuse_readdir(struct file *file, struct dir_context *ctx) 590 { 591 struct fuse_file *ff = file->private_data; 592 struct inode *inode = file_inode(file); 593 int err; 594 595 if (fuse_is_bad(inode)) 596 return -EIO; 597 598 err = UNCACHED; 599 if (ff->open_flags & FOPEN_CACHE_DIR) 600 err = fuse_readdir_cached(file, ctx); 601 if (err == UNCACHED) 602 err = fuse_readdir_uncached(file, ctx); 603 604 return err; 605 } 606