1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 10 #include "fuse_i.h" 11 #include <linux/iversion.h> 12 #include <linux/posix_acl.h> 13 #include <linux/pagemap.h> 14 #include <linux/highmem.h> 15 16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) 17 { 18 struct fuse_conn *fc = get_fuse_conn(dir); 19 struct fuse_inode *fi = get_fuse_inode(dir); 20 21 if (!fc->do_readdirplus) 22 return false; 23 if (!fc->readdirplus_auto) 24 return true; 25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) 26 return true; 27 if (ctx->pos == 0) 28 return true; 29 return false; 30 } 31 32 static void fuse_add_dirent_to_cache(struct file *file, 33 struct fuse_dirent *dirent, loff_t pos) 34 { 35 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 36 size_t reclen = FUSE_DIRENT_SIZE(dirent); 37 pgoff_t index; 38 struct page *page; 39 loff_t size; 40 u64 version; 41 unsigned int offset; 42 void *addr; 43 44 /* Dirent doesn't fit in readdir cache page? Skip caching. */ 45 if (reclen > PAGE_SIZE) 46 return; 47 48 spin_lock(&fi->rdc.lock); 49 /* 50 * Is cache already completed? Or this entry does not go at the end of 51 * cache? 52 */ 53 if (fi->rdc.cached || pos != fi->rdc.pos) { 54 spin_unlock(&fi->rdc.lock); 55 return; 56 } 57 version = fi->rdc.version; 58 size = fi->rdc.size; 59 offset = offset_in_page(size); 60 index = size >> PAGE_SHIFT; 61 /* Dirent doesn't fit in current page? Jump to next page. */ 62 if (offset + reclen > PAGE_SIZE) { 63 index++; 64 offset = 0; 65 } 66 spin_unlock(&fi->rdc.lock); 67 68 if (offset) { 69 page = find_lock_page(file->f_mapping, index); 70 } else { 71 page = find_or_create_page(file->f_mapping, index, 72 mapping_gfp_mask(file->f_mapping)); 73 } 74 if (!page) 75 return; 76 77 spin_lock(&fi->rdc.lock); 78 /* Raced with another readdir */ 79 if (fi->rdc.version != version || fi->rdc.size != size || 80 WARN_ON(fi->rdc.pos != pos)) 81 goto unlock; 82 83 addr = kmap_local_page(page); 84 if (!offset) { 85 clear_page(addr); 86 SetPageUptodate(page); 87 } 88 memcpy(addr + offset, dirent, reclen); 89 kunmap_local(addr); 90 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; 91 fi->rdc.pos = dirent->off; 92 unlock: 93 spin_unlock(&fi->rdc.lock); 94 unlock_page(page); 95 put_page(page); 96 } 97 98 static void fuse_readdir_cache_end(struct file *file, loff_t pos) 99 { 100 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 101 loff_t end; 102 103 spin_lock(&fi->rdc.lock); 104 /* does cache end position match current position? */ 105 if (fi->rdc.pos != pos) { 106 spin_unlock(&fi->rdc.lock); 107 return; 108 } 109 110 fi->rdc.cached = true; 111 end = ALIGN(fi->rdc.size, PAGE_SIZE); 112 spin_unlock(&fi->rdc.lock); 113 114 /* truncate unused tail of cache */ 115 truncate_inode_pages(file->f_mapping, end); 116 } 117 118 static bool fuse_emit(struct file *file, struct dir_context *ctx, 119 struct fuse_dirent *dirent) 120 { 121 struct fuse_file *ff = file->private_data; 122 123 if (ff->open_flags & FOPEN_CACHE_DIR) 124 fuse_add_dirent_to_cache(file, dirent, ctx->pos); 125 126 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, 127 dirent->type | FILLDIR_FLAG_NOINTR); 128 } 129 130 static int parse_dirfile(char *buf, size_t nbytes, struct file *file, 131 struct dir_context *ctx) 132 { 133 while (nbytes >= FUSE_NAME_OFFSET) { 134 struct fuse_dirent *dirent = (struct fuse_dirent *) buf; 135 size_t reclen = FUSE_DIRENT_SIZE(dirent); 136 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 137 return -EIO; 138 if (reclen > nbytes) 139 break; 140 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 141 return -EIO; 142 143 if (!fuse_emit(file, ctx, dirent)) 144 break; 145 146 buf += reclen; 147 nbytes -= reclen; 148 ctx->pos = dirent->off; 149 } 150 151 return 0; 152 } 153 154 static int fuse_direntplus_link(struct file *file, 155 struct fuse_direntplus *direntplus, 156 u64 attr_version, u64 evict_ctr) 157 { 158 struct fuse_entry_out *o = &direntplus->entry_out; 159 struct fuse_dirent *dirent = &direntplus->dirent; 160 struct dentry *parent = file->f_path.dentry; 161 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); 162 struct dentry *dentry; 163 struct dentry *alias; 164 struct inode *dir = d_inode(parent); 165 struct fuse_conn *fc; 166 struct inode *inode; 167 int epoch; 168 169 if (!o->nodeid) { 170 /* 171 * Unlike in the case of fuse_lookup, zero nodeid does not mean 172 * ENOENT. Instead, it only means the userspace filesystem did 173 * not want to return attributes/handle for this entry. 174 * 175 * So do nothing. 176 */ 177 return 0; 178 } 179 180 if (name.name[0] == '.') { 181 /* 182 * We could potentially refresh the attributes of the directory 183 * and its parent? 184 */ 185 if (name.len == 1) 186 return 0; 187 if (name.name[1] == '.' && name.len == 2) 188 return 0; 189 } 190 191 if (invalid_nodeid(o->nodeid)) 192 return -EIO; 193 if (fuse_invalid_attr(&o->attr)) 194 return -EIO; 195 196 fc = get_fuse_conn(dir); 197 epoch = atomic_read(&fc->epoch); 198 199 name.hash = full_name_hash(parent, name.name, name.len); 200 dentry = d_lookup(parent, &name); 201 if (!dentry) { 202 retry: 203 dentry = d_alloc_parallel(parent, &name); 204 if (IS_ERR(dentry)) 205 return PTR_ERR(dentry); 206 } 207 if (!d_in_lookup(dentry)) { 208 struct fuse_inode *fi; 209 inode = d_inode(dentry); 210 if (inode && get_node_id(inode) != o->nodeid) 211 inode = NULL; 212 if (!inode || 213 fuse_stale_inode(inode, o->generation, &o->attr)) { 214 if (inode) 215 fuse_make_bad(inode); 216 d_invalidate(dentry); 217 dput(dentry); 218 goto retry; 219 } 220 if (fuse_is_bad(inode)) { 221 dput(dentry); 222 return -EIO; 223 } 224 225 fi = get_fuse_inode(inode); 226 spin_lock(&fi->lock); 227 fi->nlookup++; 228 spin_unlock(&fi->lock); 229 230 forget_all_cached_acls(inode); 231 fuse_change_attributes(inode, &o->attr, NULL, 232 ATTR_TIMEOUT(o), 233 attr_version); 234 /* 235 * The other branch comes via fuse_iget() 236 * which bumps nlookup inside 237 */ 238 } else { 239 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, 240 &o->attr, ATTR_TIMEOUT(o), 241 attr_version, evict_ctr); 242 if (!inode) 243 inode = ERR_PTR(-ENOMEM); 244 245 alias = d_splice_alias(inode, dentry); 246 d_lookup_done(dentry); 247 if (alias) { 248 dput(dentry); 249 dentry = alias; 250 } 251 if (IS_ERR(dentry)) { 252 if (!IS_ERR(inode)) { 253 struct fuse_inode *fi = get_fuse_inode(inode); 254 255 spin_lock(&fi->lock); 256 fi->nlookup--; 257 spin_unlock(&fi->lock); 258 } 259 return PTR_ERR(dentry); 260 } 261 } 262 if (fc->readdirplus_auto) 263 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); 264 dentry->d_time = epoch; 265 fuse_change_entry_timeout(dentry, o); 266 267 dput(dentry); 268 return 0; 269 } 270 271 static void fuse_force_forget(struct file *file, u64 nodeid) 272 { 273 struct inode *inode = file_inode(file); 274 struct fuse_mount *fm = get_fuse_mount(inode); 275 struct fuse_forget_in inarg; 276 FUSE_ARGS(args); 277 278 memset(&inarg, 0, sizeof(inarg)); 279 inarg.nlookup = 1; 280 args.opcode = FUSE_FORGET; 281 args.nodeid = nodeid; 282 args.in_numargs = 1; 283 args.in_args[0].size = sizeof(inarg); 284 args.in_args[0].value = &inarg; 285 args.force = true; 286 args.noreply = true; 287 288 fuse_simple_request(fm, &args); 289 /* ignore errors */ 290 } 291 292 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, 293 struct dir_context *ctx, u64 attr_version, 294 u64 evict_ctr) 295 { 296 struct fuse_direntplus *direntplus; 297 struct fuse_dirent *dirent; 298 size_t reclen; 299 int over = 0; 300 int ret; 301 302 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { 303 direntplus = (struct fuse_direntplus *) buf; 304 dirent = &direntplus->dirent; 305 reclen = FUSE_DIRENTPLUS_SIZE(direntplus); 306 307 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 308 return -EIO; 309 if (reclen > nbytes) 310 break; 311 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 312 return -EIO; 313 314 if (!over) { 315 /* We fill entries into dstbuf only as much as 316 it can hold. But we still continue iterating 317 over remaining entries to link them. If not, 318 we need to send a FORGET for each of those 319 which we did not link. 320 */ 321 over = !fuse_emit(file, ctx, dirent); 322 if (!over) 323 ctx->pos = dirent->off; 324 } 325 326 buf += reclen; 327 nbytes -= reclen; 328 329 ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); 330 if (ret) 331 fuse_force_forget(file, direntplus->entry_out.nodeid); 332 } 333 334 return 0; 335 } 336 337 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) 338 { 339 int plus; 340 ssize_t res; 341 struct inode *inode = file_inode(file); 342 struct fuse_mount *fm = get_fuse_mount(inode); 343 struct fuse_conn *fc = fm->fc; 344 struct fuse_io_args ia = {}; 345 struct fuse_args *args = &ia.ap.args; 346 void *buf; 347 size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT); 348 u64 attr_version = 0, evict_ctr = 0; 349 bool locked; 350 351 buf = kvmalloc(bufsize, GFP_KERNEL); 352 if (!buf) 353 return -ENOMEM; 354 355 args->out_args[0].value = buf; 356 357 plus = fuse_use_readdirplus(inode, ctx); 358 if (plus) { 359 attr_version = fuse_get_attr_version(fm->fc); 360 evict_ctr = fuse_get_evict_ctr(fm->fc); 361 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS); 362 } else { 363 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); 364 } 365 locked = fuse_lock_inode(inode); 366 res = fuse_simple_request(fm, args); 367 fuse_unlock_inode(inode, locked); 368 if (res >= 0) { 369 if (!res) { 370 struct fuse_file *ff = file->private_data; 371 372 if (ff->open_flags & FOPEN_CACHE_DIR) 373 fuse_readdir_cache_end(file, ctx->pos); 374 } else if (plus) { 375 res = parse_dirplusfile(buf, res, file, ctx, attr_version, 376 evict_ctr); 377 } else { 378 res = parse_dirfile(buf, res, file, ctx); 379 } 380 } 381 382 kvfree(buf); 383 fuse_invalidate_atime(inode); 384 return res; 385 } 386 387 enum fuse_parse_result { 388 FOUND_ERR = -1, 389 FOUND_NONE = 0, 390 FOUND_SOME, 391 FOUND_ALL, 392 }; 393 394 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, 395 void *addr, unsigned int size, 396 struct dir_context *ctx) 397 { 398 unsigned int offset = offset_in_page(ff->readdir.cache_off); 399 enum fuse_parse_result res = FOUND_NONE; 400 401 WARN_ON(offset >= size); 402 403 for (;;) { 404 struct fuse_dirent *dirent = addr + offset; 405 unsigned int nbytes = size - offset; 406 size_t reclen; 407 408 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) 409 break; 410 411 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ 412 413 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) 414 return FOUND_ERR; 415 if (WARN_ON(reclen > nbytes)) 416 return FOUND_ERR; 417 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL)) 418 return FOUND_ERR; 419 420 if (ff->readdir.pos == ctx->pos) { 421 res = FOUND_SOME; 422 if (!dir_emit(ctx, dirent->name, dirent->namelen, 423 dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR)) 424 return FOUND_ALL; 425 ctx->pos = dirent->off; 426 } 427 ff->readdir.pos = dirent->off; 428 ff->readdir.cache_off += reclen; 429 430 offset += reclen; 431 } 432 433 return res; 434 } 435 436 static void fuse_rdc_reset(struct inode *inode) 437 { 438 struct fuse_inode *fi = get_fuse_inode(inode); 439 440 fi->rdc.cached = false; 441 fi->rdc.version++; 442 fi->rdc.size = 0; 443 fi->rdc.pos = 0; 444 } 445 446 #define UNCACHED 1 447 448 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) 449 { 450 struct fuse_file *ff = file->private_data; 451 struct inode *inode = file_inode(file); 452 struct fuse_conn *fc = get_fuse_conn(inode); 453 struct fuse_inode *fi = get_fuse_inode(inode); 454 enum fuse_parse_result res; 455 pgoff_t index; 456 unsigned int size; 457 struct page *page; 458 void *addr; 459 460 /* Seeked? If so, reset the cache stream */ 461 if (ff->readdir.pos != ctx->pos) { 462 ff->readdir.pos = 0; 463 ff->readdir.cache_off = 0; 464 } 465 466 /* 467 * We're just about to start reading into the cache or reading the 468 * cache; both cases require an up-to-date mtime value. 469 */ 470 if (!ctx->pos && fc->auto_inval_data) { 471 int err = fuse_update_attributes(inode, file, STATX_MTIME); 472 473 if (err) 474 return err; 475 } 476 477 retry: 478 spin_lock(&fi->rdc.lock); 479 retry_locked: 480 if (!fi->rdc.cached) { 481 /* Starting cache? Set cache mtime. */ 482 if (!ctx->pos && !fi->rdc.size) { 483 fi->rdc.mtime = inode_get_mtime(inode); 484 fi->rdc.iversion = inode_query_iversion(inode); 485 } 486 spin_unlock(&fi->rdc.lock); 487 return UNCACHED; 488 } 489 /* 490 * When at the beginning of the directory (i.e. just after opendir(3) or 491 * rewinddir(3)), then need to check whether directory contents have 492 * changed, and reset the cache if so. 493 */ 494 if (!ctx->pos) { 495 struct timespec64 mtime = inode_get_mtime(inode); 496 497 if (inode_peek_iversion(inode) != fi->rdc.iversion || 498 !timespec64_equal(&fi->rdc.mtime, &mtime)) { 499 fuse_rdc_reset(inode); 500 goto retry_locked; 501 } 502 } 503 504 /* 505 * If cache version changed since the last getdents() call, then reset 506 * the cache stream. 507 */ 508 if (ff->readdir.version != fi->rdc.version) { 509 ff->readdir.pos = 0; 510 ff->readdir.cache_off = 0; 511 } 512 /* 513 * If at the beginning of the cache, than reset version to 514 * current. 515 */ 516 if (ff->readdir.pos == 0) 517 ff->readdir.version = fi->rdc.version; 518 519 WARN_ON(fi->rdc.size < ff->readdir.cache_off); 520 521 index = ff->readdir.cache_off >> PAGE_SHIFT; 522 523 if (index == (fi->rdc.size >> PAGE_SHIFT)) 524 size = offset_in_page(fi->rdc.size); 525 else 526 size = PAGE_SIZE; 527 spin_unlock(&fi->rdc.lock); 528 529 /* EOF? */ 530 if (offset_in_page(ff->readdir.cache_off) == size) 531 return 0; 532 533 page = find_get_page_flags(file->f_mapping, index, 534 FGP_ACCESSED | FGP_LOCK); 535 /* Page gone missing, then re-added to cache, but not initialized? */ 536 if (page && !PageUptodate(page)) { 537 unlock_page(page); 538 put_page(page); 539 page = NULL; 540 } 541 spin_lock(&fi->rdc.lock); 542 if (!page) { 543 /* 544 * Uh-oh: page gone missing, cache is useless 545 */ 546 if (fi->rdc.version == ff->readdir.version) 547 fuse_rdc_reset(inode); 548 goto retry_locked; 549 } 550 551 /* Make sure it's still the same version after getting the page. */ 552 if (ff->readdir.version != fi->rdc.version) { 553 spin_unlock(&fi->rdc.lock); 554 unlock_page(page); 555 put_page(page); 556 goto retry; 557 } 558 spin_unlock(&fi->rdc.lock); 559 560 /* 561 * Contents of the page are now protected against changing by holding 562 * the page lock. 563 */ 564 addr = kmap_local_page(page); 565 res = fuse_parse_cache(ff, addr, size, ctx); 566 kunmap_local(addr); 567 unlock_page(page); 568 put_page(page); 569 570 if (res == FOUND_ERR) 571 return -EIO; 572 573 if (res == FOUND_ALL) 574 return 0; 575 576 if (size == PAGE_SIZE) { 577 /* We hit end of page: skip to next page. */ 578 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE); 579 goto retry; 580 } 581 582 /* 583 * End of cache reached. If found position, then we are done, otherwise 584 * need to fall back to uncached, since the position we were looking for 585 * wasn't in the cache. 586 */ 587 return res == FOUND_SOME ? 0 : UNCACHED; 588 } 589 590 int fuse_readdir(struct file *file, struct dir_context *ctx) 591 { 592 struct fuse_file *ff = file->private_data; 593 struct inode *inode = file_inode(file); 594 int err; 595 596 if (fuse_is_bad(inode)) 597 return -EIO; 598 599 err = UNCACHED; 600 if (ff->open_flags & FOPEN_CACHE_DIR) 601 err = fuse_readdir_cached(file, ctx); 602 if (err == UNCACHED) 603 err = fuse_readdir_uncached(file, ctx); 604 605 return err; 606 } 607