1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 10 #include "fuse_i.h" 11 #include <linux/iversion.h> 12 #include <linux/posix_acl.h> 13 #include <linux/pagemap.h> 14 #include <linux/highmem.h> 15 16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) 17 { 18 struct fuse_conn *fc = get_fuse_conn(dir); 19 struct fuse_inode *fi = get_fuse_inode(dir); 20 21 if (!fc->do_readdirplus) 22 return false; 23 if (!fc->readdirplus_auto) 24 return true; 25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) 26 return true; 27 if (ctx->pos == 0) 28 return true; 29 return false; 30 } 31 32 static void fuse_add_dirent_to_cache(struct file *file, 33 struct fuse_dirent *dirent, loff_t pos) 34 { 35 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 36 size_t reclen = FUSE_DIRENT_SIZE(dirent); 37 pgoff_t index; 38 struct page *page; 39 loff_t size; 40 u64 version; 41 unsigned int offset; 42 void *addr; 43 44 /* Dirent doesn't fit in readdir cache page? Skip caching. */ 45 if (reclen > PAGE_SIZE) 46 return; 47 48 spin_lock(&fi->rdc.lock); 49 /* 50 * Is cache already completed? Or this entry does not go at the end of 51 * cache? 52 */ 53 if (fi->rdc.cached || pos != fi->rdc.pos) { 54 spin_unlock(&fi->rdc.lock); 55 return; 56 } 57 version = fi->rdc.version; 58 size = fi->rdc.size; 59 offset = offset_in_page(size); 60 index = size >> PAGE_SHIFT; 61 /* Dirent doesn't fit in current page? Jump to next page. */ 62 if (offset + reclen > PAGE_SIZE) { 63 index++; 64 offset = 0; 65 } 66 spin_unlock(&fi->rdc.lock); 67 68 if (offset) { 69 page = find_lock_page(file->f_mapping, index); 70 } else { 71 page = find_or_create_page(file->f_mapping, index, 72 mapping_gfp_mask(file->f_mapping)); 73 } 74 if (!page) 75 return; 76 77 spin_lock(&fi->rdc.lock); 78 /* Raced with another readdir */ 79 if (fi->rdc.version != version || fi->rdc.size != size || 80 WARN_ON(fi->rdc.pos != pos)) 81 goto unlock; 82 83 addr = kmap_local_page(page); 84 if (!offset) { 85 clear_page(addr); 86 SetPageUptodate(page); 87 } 88 memcpy(addr + offset, dirent, reclen); 89 kunmap_local(addr); 90 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; 91 fi->rdc.pos = dirent->off; 92 unlock: 93 spin_unlock(&fi->rdc.lock); 94 unlock_page(page); 95 put_page(page); 96 } 97 98 static void fuse_readdir_cache_end(struct file *file, loff_t pos) 99 { 100 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 101 loff_t end; 102 103 spin_lock(&fi->rdc.lock); 104 /* does cache end position match current position? */ 105 if (fi->rdc.pos != pos) { 106 spin_unlock(&fi->rdc.lock); 107 return; 108 } 109 110 fi->rdc.cached = true; 111 end = ALIGN(fi->rdc.size, PAGE_SIZE); 112 spin_unlock(&fi->rdc.lock); 113 114 /* truncate unused tail of cache */ 115 truncate_inode_pages(file->f_mapping, end); 116 } 117 118 static bool fuse_emit(struct file *file, struct dir_context *ctx, 119 struct fuse_dirent *dirent) 120 { 121 struct fuse_file *ff = file->private_data; 122 123 if (ff->open_flags & FOPEN_CACHE_DIR) 124 fuse_add_dirent_to_cache(file, dirent, ctx->pos); 125 126 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, 127 dirent->type | FILLDIR_FLAG_NOINTR); 128 } 129 130 static int parse_dirfile(char *buf, size_t nbytes, struct file *file, 131 struct dir_context *ctx) 132 { 133 while (nbytes >= FUSE_NAME_OFFSET) { 134 struct fuse_dirent *dirent = (struct fuse_dirent *) buf; 135 size_t reclen = FUSE_DIRENT_SIZE(dirent); 136 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 137 return -EIO; 138 if (reclen > nbytes) 139 break; 140 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 141 return -EIO; 142 143 if (!fuse_emit(file, ctx, dirent)) 144 break; 145 146 buf += reclen; 147 nbytes -= reclen; 148 ctx->pos = dirent->off; 149 } 150 151 return 0; 152 } 153 154 static int fuse_direntplus_link(struct file *file, 155 struct fuse_direntplus *direntplus, 156 u64 attr_version, u64 evict_ctr) 157 { 158 struct fuse_entry_out *o = &direntplus->entry_out; 159 struct fuse_dirent *dirent = &direntplus->dirent; 160 struct dentry *parent = file->f_path.dentry; 161 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); 162 struct dentry *dentry; 163 struct dentry *alias; 164 struct inode *dir = d_inode(parent); 165 struct fuse_conn *fc; 166 struct inode *inode; 167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 168 int epoch; 169 170 if (!o->nodeid) { 171 /* 172 * Unlike in the case of fuse_lookup, zero nodeid does not mean 173 * ENOENT. Instead, it only means the userspace filesystem did 174 * not want to return attributes/handle for this entry. 175 * 176 * So do nothing. 177 */ 178 return 0; 179 } 180 181 if (name.name[0] == '.') { 182 /* 183 * We could potentially refresh the attributes of the directory 184 * and its parent? 185 */ 186 if (name.len == 1) 187 return 0; 188 if (name.name[1] == '.' && name.len == 2) 189 return 0; 190 } 191 192 if (invalid_nodeid(o->nodeid)) 193 return -EIO; 194 if (fuse_invalid_attr(&o->attr)) 195 return -EIO; 196 197 fc = get_fuse_conn(dir); 198 epoch = atomic_read(&fc->epoch); 199 200 name.hash = full_name_hash(parent, name.name, name.len); 201 dentry = d_lookup(parent, &name); 202 if (!dentry) { 203 retry: 204 dentry = d_alloc_parallel(parent, &name, &wq); 205 if (IS_ERR(dentry)) 206 return PTR_ERR(dentry); 207 } 208 if (!d_in_lookup(dentry)) { 209 struct fuse_inode *fi; 210 inode = d_inode(dentry); 211 if (inode && get_node_id(inode) != o->nodeid) 212 inode = NULL; 213 if (!inode || 214 fuse_stale_inode(inode, o->generation, &o->attr)) { 215 if (inode) 216 fuse_make_bad(inode); 217 d_invalidate(dentry); 218 dput(dentry); 219 goto retry; 220 } 221 if (fuse_is_bad(inode)) { 222 dput(dentry); 223 return -EIO; 224 } 225 226 fi = get_fuse_inode(inode); 227 spin_lock(&fi->lock); 228 fi->nlookup++; 229 spin_unlock(&fi->lock); 230 231 forget_all_cached_acls(inode); 232 fuse_change_attributes(inode, &o->attr, NULL, 233 ATTR_TIMEOUT(o), 234 attr_version); 235 /* 236 * The other branch comes via fuse_iget() 237 * which bumps nlookup inside 238 */ 239 } else { 240 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, 241 &o->attr, ATTR_TIMEOUT(o), 242 attr_version, evict_ctr); 243 if (!inode) 244 inode = ERR_PTR(-ENOMEM); 245 246 alias = d_splice_alias(inode, dentry); 247 d_lookup_done(dentry); 248 if (alias) { 249 dput(dentry); 250 dentry = alias; 251 } 252 if (IS_ERR(dentry)) { 253 if (!IS_ERR(inode)) { 254 struct fuse_inode *fi = get_fuse_inode(inode); 255 256 spin_lock(&fi->lock); 257 fi->nlookup--; 258 spin_unlock(&fi->lock); 259 } 260 return PTR_ERR(dentry); 261 } 262 } 263 if (fc->readdirplus_auto) 264 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); 265 dentry->d_time = epoch; 266 fuse_change_entry_timeout(dentry, o); 267 268 dput(dentry); 269 return 0; 270 } 271 272 static void fuse_force_forget(struct file *file, u64 nodeid) 273 { 274 struct inode *inode = file_inode(file); 275 struct fuse_mount *fm = get_fuse_mount(inode); 276 struct fuse_forget_in inarg; 277 FUSE_ARGS(args); 278 279 memset(&inarg, 0, sizeof(inarg)); 280 inarg.nlookup = 1; 281 args.opcode = FUSE_FORGET; 282 args.nodeid = nodeid; 283 args.in_numargs = 1; 284 args.in_args[0].size = sizeof(inarg); 285 args.in_args[0].value = &inarg; 286 args.force = true; 287 args.noreply = true; 288 289 fuse_simple_request(fm, &args); 290 /* ignore errors */ 291 } 292 293 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, 294 struct dir_context *ctx, u64 attr_version, 295 u64 evict_ctr) 296 { 297 struct fuse_direntplus *direntplus; 298 struct fuse_dirent *dirent; 299 size_t reclen; 300 int over = 0; 301 int ret; 302 303 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { 304 direntplus = (struct fuse_direntplus *) buf; 305 dirent = &direntplus->dirent; 306 reclen = FUSE_DIRENTPLUS_SIZE(direntplus); 307 308 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 309 return -EIO; 310 if (reclen > nbytes) 311 break; 312 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 313 return -EIO; 314 315 if (!over) { 316 /* We fill entries into dstbuf only as much as 317 it can hold. But we still continue iterating 318 over remaining entries to link them. If not, 319 we need to send a FORGET for each of those 320 which we did not link. 321 */ 322 over = !fuse_emit(file, ctx, dirent); 323 if (!over) 324 ctx->pos = dirent->off; 325 } 326 327 buf += reclen; 328 nbytes -= reclen; 329 330 ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); 331 if (ret) 332 fuse_force_forget(file, direntplus->entry_out.nodeid); 333 } 334 335 return 0; 336 } 337 338 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) 339 { 340 int plus; 341 ssize_t res; 342 struct inode *inode = file_inode(file); 343 struct fuse_mount *fm = get_fuse_mount(inode); 344 struct fuse_conn *fc = fm->fc; 345 struct fuse_io_args ia = {}; 346 struct fuse_args *args = &ia.ap.args; 347 void *buf; 348 size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT); 349 u64 attr_version = 0, evict_ctr = 0; 350 bool locked; 351 352 buf = kvmalloc(bufsize, GFP_KERNEL); 353 if (!buf) 354 return -ENOMEM; 355 356 args->out_args[0].value = buf; 357 358 plus = fuse_use_readdirplus(inode, ctx); 359 if (plus) { 360 attr_version = fuse_get_attr_version(fm->fc); 361 evict_ctr = fuse_get_evict_ctr(fm->fc); 362 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS); 363 } else { 364 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); 365 } 366 locked = fuse_lock_inode(inode); 367 res = fuse_simple_request(fm, args); 368 fuse_unlock_inode(inode, locked); 369 if (res >= 0) { 370 if (!res) { 371 struct fuse_file *ff = file->private_data; 372 373 if (ff->open_flags & FOPEN_CACHE_DIR) 374 fuse_readdir_cache_end(file, ctx->pos); 375 } else if (plus) { 376 res = parse_dirplusfile(buf, res, file, ctx, attr_version, 377 evict_ctr); 378 } else { 379 res = parse_dirfile(buf, res, file, ctx); 380 } 381 } 382 383 kvfree(buf); 384 fuse_invalidate_atime(inode); 385 return res; 386 } 387 388 enum fuse_parse_result { 389 FOUND_ERR = -1, 390 FOUND_NONE = 0, 391 FOUND_SOME, 392 FOUND_ALL, 393 }; 394 395 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, 396 void *addr, unsigned int size, 397 struct dir_context *ctx) 398 { 399 unsigned int offset = offset_in_page(ff->readdir.cache_off); 400 enum fuse_parse_result res = FOUND_NONE; 401 402 WARN_ON(offset >= size); 403 404 for (;;) { 405 struct fuse_dirent *dirent = addr + offset; 406 unsigned int nbytes = size - offset; 407 size_t reclen; 408 409 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) 410 break; 411 412 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ 413 414 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) 415 return FOUND_ERR; 416 if (WARN_ON(reclen > nbytes)) 417 return FOUND_ERR; 418 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL)) 419 return FOUND_ERR; 420 421 if (ff->readdir.pos == ctx->pos) { 422 res = FOUND_SOME; 423 if (!dir_emit(ctx, dirent->name, dirent->namelen, 424 dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR)) 425 return FOUND_ALL; 426 ctx->pos = dirent->off; 427 } 428 ff->readdir.pos = dirent->off; 429 ff->readdir.cache_off += reclen; 430 431 offset += reclen; 432 } 433 434 return res; 435 } 436 437 static void fuse_rdc_reset(struct inode *inode) 438 { 439 struct fuse_inode *fi = get_fuse_inode(inode); 440 441 fi->rdc.cached = false; 442 fi->rdc.version++; 443 fi->rdc.size = 0; 444 fi->rdc.pos = 0; 445 } 446 447 #define UNCACHED 1 448 449 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) 450 { 451 struct fuse_file *ff = file->private_data; 452 struct inode *inode = file_inode(file); 453 struct fuse_conn *fc = get_fuse_conn(inode); 454 struct fuse_inode *fi = get_fuse_inode(inode); 455 enum fuse_parse_result res; 456 pgoff_t index; 457 unsigned int size; 458 struct page *page; 459 void *addr; 460 461 /* Seeked? If so, reset the cache stream */ 462 if (ff->readdir.pos != ctx->pos) { 463 ff->readdir.pos = 0; 464 ff->readdir.cache_off = 0; 465 } 466 467 /* 468 * We're just about to start reading into the cache or reading the 469 * cache; both cases require an up-to-date mtime value. 470 */ 471 if (!ctx->pos && fc->auto_inval_data) { 472 int err = fuse_update_attributes(inode, file, STATX_MTIME); 473 474 if (err) 475 return err; 476 } 477 478 retry: 479 spin_lock(&fi->rdc.lock); 480 retry_locked: 481 if (!fi->rdc.cached) { 482 /* Starting cache? Set cache mtime. */ 483 if (!ctx->pos && !fi->rdc.size) { 484 fi->rdc.mtime = inode_get_mtime(inode); 485 fi->rdc.iversion = inode_query_iversion(inode); 486 } 487 spin_unlock(&fi->rdc.lock); 488 return UNCACHED; 489 } 490 /* 491 * When at the beginning of the directory (i.e. just after opendir(3) or 492 * rewinddir(3)), then need to check whether directory contents have 493 * changed, and reset the cache if so. 494 */ 495 if (!ctx->pos) { 496 struct timespec64 mtime = inode_get_mtime(inode); 497 498 if (inode_peek_iversion(inode) != fi->rdc.iversion || 499 !timespec64_equal(&fi->rdc.mtime, &mtime)) { 500 fuse_rdc_reset(inode); 501 goto retry_locked; 502 } 503 } 504 505 /* 506 * If cache version changed since the last getdents() call, then reset 507 * the cache stream. 508 */ 509 if (ff->readdir.version != fi->rdc.version) { 510 ff->readdir.pos = 0; 511 ff->readdir.cache_off = 0; 512 } 513 /* 514 * If at the beginning of the cache, than reset version to 515 * current. 516 */ 517 if (ff->readdir.pos == 0) 518 ff->readdir.version = fi->rdc.version; 519 520 WARN_ON(fi->rdc.size < ff->readdir.cache_off); 521 522 index = ff->readdir.cache_off >> PAGE_SHIFT; 523 524 if (index == (fi->rdc.size >> PAGE_SHIFT)) 525 size = offset_in_page(fi->rdc.size); 526 else 527 size = PAGE_SIZE; 528 spin_unlock(&fi->rdc.lock); 529 530 /* EOF? */ 531 if (offset_in_page(ff->readdir.cache_off) == size) 532 return 0; 533 534 page = find_get_page_flags(file->f_mapping, index, 535 FGP_ACCESSED | FGP_LOCK); 536 /* Page gone missing, then re-added to cache, but not initialized? */ 537 if (page && !PageUptodate(page)) { 538 unlock_page(page); 539 put_page(page); 540 page = NULL; 541 } 542 spin_lock(&fi->rdc.lock); 543 if (!page) { 544 /* 545 * Uh-oh: page gone missing, cache is useless 546 */ 547 if (fi->rdc.version == ff->readdir.version) 548 fuse_rdc_reset(inode); 549 goto retry_locked; 550 } 551 552 /* Make sure it's still the same version after getting the page. */ 553 if (ff->readdir.version != fi->rdc.version) { 554 spin_unlock(&fi->rdc.lock); 555 unlock_page(page); 556 put_page(page); 557 goto retry; 558 } 559 spin_unlock(&fi->rdc.lock); 560 561 /* 562 * Contents of the page are now protected against changing by holding 563 * the page lock. 564 */ 565 addr = kmap_local_page(page); 566 res = fuse_parse_cache(ff, addr, size, ctx); 567 kunmap_local(addr); 568 unlock_page(page); 569 put_page(page); 570 571 if (res == FOUND_ERR) 572 return -EIO; 573 574 if (res == FOUND_ALL) 575 return 0; 576 577 if (size == PAGE_SIZE) { 578 /* We hit end of page: skip to next page. */ 579 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE); 580 goto retry; 581 } 582 583 /* 584 * End of cache reached. If found position, then we are done, otherwise 585 * need to fall back to uncached, since the position we were looking for 586 * wasn't in the cache. 587 */ 588 return res == FOUND_SOME ? 0 : UNCACHED; 589 } 590 591 int fuse_readdir(struct file *file, struct dir_context *ctx) 592 { 593 struct fuse_file *ff = file->private_data; 594 struct inode *inode = file_inode(file); 595 int err; 596 597 if (fuse_is_bad(inode)) 598 return -EIO; 599 600 err = UNCACHED; 601 if (ff->open_flags & FOPEN_CACHE_DIR) 602 err = fuse_readdir_cached(file, ctx); 603 if (err == UNCACHED) 604 err = fuse_readdir_uncached(file, ctx); 605 606 return err; 607 } 608