xref: /linux/fs/fuse/readdir.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3   FUSE: Filesystem in Userspace
4   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
5 */
6 
7 
8 #include "fuse_i.h"
9 #include <linux/iversion.h>
10 #include <linux/posix_acl.h>
11 #include <linux/pagemap.h>
12 #include <linux/highmem.h>
13 #include <linux/vmalloc.h>
14 
15 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
16 {
17 	struct fuse_conn *fc = get_fuse_conn(dir);
18 	struct fuse_inode *fi = get_fuse_inode(dir);
19 
20 	if (!fc->do_readdirplus)
21 		return false;
22 	if (!fc->readdirplus_auto)
23 		return true;
24 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
25 		return true;
26 	if (ctx->pos == 0)
27 		return true;
28 	return false;
29 }
30 
31 static void fuse_add_dirent_to_cache(struct file *file,
32 				     struct fuse_dirent *dirent, loff_t pos)
33 {
34 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
35 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
36 	pgoff_t index;
37 	struct page *page;
38 	loff_t size;
39 	u64 version;
40 	unsigned int offset;
41 	void *addr;
42 
43 	/* Dirent doesn't fit in readdir cache page?  Skip caching. */
44 	if (reclen > PAGE_SIZE)
45 		return;
46 
47 	spin_lock(&fi->rdc.lock);
48 	/*
49 	 * Is cache already completed?  Or this entry does not go at the end of
50 	 * cache?
51 	 */
52 	if (fi->rdc.cached || pos != fi->rdc.pos) {
53 		spin_unlock(&fi->rdc.lock);
54 		return;
55 	}
56 	version = fi->rdc.version;
57 	size = fi->rdc.size;
58 	offset = offset_in_page(size);
59 	index = size >> PAGE_SHIFT;
60 	/* Dirent doesn't fit in current page?  Jump to next page. */
61 	if (offset + reclen > PAGE_SIZE) {
62 		index++;
63 		offset = 0;
64 	}
65 	spin_unlock(&fi->rdc.lock);
66 
67 	if (offset) {
68 		page = find_lock_page(file->f_mapping, index);
69 	} else {
70 		page = find_or_create_page(file->f_mapping, index,
71 					   mapping_gfp_mask(file->f_mapping));
72 	}
73 	if (!page)
74 		return;
75 
76 	spin_lock(&fi->rdc.lock);
77 	/* Raced with another readdir */
78 	if (fi->rdc.version != version || fi->rdc.size != size ||
79 	    WARN_ON(fi->rdc.pos != pos))
80 		goto unlock;
81 
82 	addr = kmap_local_page(page);
83 	if (!offset) {
84 		clear_page(addr);
85 		SetPageUptodate(page);
86 	}
87 	memcpy(addr + offset, dirent, reclen);
88 	kunmap_local(addr);
89 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
90 	fi->rdc.pos = dirent->off;
91 unlock:
92 	spin_unlock(&fi->rdc.lock);
93 	unlock_page(page);
94 	put_page(page);
95 }
96 
97 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
98 {
99 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
100 	loff_t end;
101 
102 	spin_lock(&fi->rdc.lock);
103 	/* does cache end position match current position? */
104 	if (fi->rdc.pos != pos) {
105 		spin_unlock(&fi->rdc.lock);
106 		return;
107 	}
108 
109 	fi->rdc.cached = true;
110 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
111 	spin_unlock(&fi->rdc.lock);
112 
113 	/* truncate unused tail of cache */
114 	truncate_inode_pages(file->f_mapping, end);
115 }
116 
117 static bool fuse_emit(struct file *file, struct dir_context *ctx,
118 		      struct fuse_dirent *dirent)
119 {
120 	struct fuse_file *ff = file->private_data;
121 
122 	if (ff->open_flags & FOPEN_CACHE_DIR)
123 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
124 
125 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
126 			dirent->type | FILLDIR_FLAG_NOINTR);
127 }
128 
129 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
130 			 struct dir_context *ctx)
131 {
132 	while (nbytes >= FUSE_NAME_OFFSET) {
133 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
134 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
135 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
136 			return -EIO;
137 		if (reclen > nbytes)
138 			break;
139 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
140 			return -EIO;
141 
142 		if (!fuse_emit(file, ctx, dirent))
143 			break;
144 
145 		buf += reclen;
146 		nbytes -= reclen;
147 		ctx->pos = dirent->off;
148 	}
149 
150 	return 0;
151 }
152 
153 static int fuse_direntplus_link(struct file *file,
154 				struct fuse_direntplus *direntplus,
155 				u64 attr_version, u64 evict_ctr)
156 {
157 	struct fuse_entry_out *o = &direntplus->entry_out;
158 	struct fuse_dirent *dirent = &direntplus->dirent;
159 	struct dentry *parent = file->f_path.dentry;
160 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
161 	struct dentry *dentry;
162 	struct dentry *alias;
163 	struct inode *dir = d_inode(parent);
164 	struct fuse_conn *fc;
165 	struct inode *inode;
166 	int epoch;
167 
168 	if (!o->nodeid) {
169 		/*
170 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
171 		 * ENOENT. Instead, it only means the userspace filesystem did
172 		 * not want to return attributes/handle for this entry.
173 		 *
174 		 * So do nothing.
175 		 */
176 		return 0;
177 	}
178 
179 	if (name.name[0] == '.') {
180 		/*
181 		 * We could potentially refresh the attributes of the directory
182 		 * and its parent?
183 		 */
184 		if (name.len == 1)
185 			return 0;
186 		if (name.name[1] == '.' && name.len == 2)
187 			return 0;
188 	}
189 
190 	if (invalid_nodeid(o->nodeid))
191 		return -EIO;
192 	if (fuse_invalid_attr(&o->attr))
193 		return -EIO;
194 
195 	fc = get_fuse_conn(dir);
196 	epoch = atomic_read(&fc->epoch);
197 
198 	name.hash = full_name_hash(parent, name.name, name.len);
199 	dentry = d_lookup(parent, &name);
200 	if (!dentry) {
201 retry:
202 		dentry = d_alloc_parallel(parent, &name);
203 		if (IS_ERR(dentry))
204 			return PTR_ERR(dentry);
205 	}
206 	if (!d_in_lookup(dentry)) {
207 		struct fuse_inode *fi;
208 		inode = d_inode(dentry);
209 		if (inode && get_node_id(inode) != o->nodeid)
210 			inode = NULL;
211 		if (!inode ||
212 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
213 			if (inode)
214 				fuse_make_bad(inode);
215 			d_invalidate(dentry);
216 			dput(dentry);
217 			goto retry;
218 		}
219 		if (fuse_is_bad(inode)) {
220 			dput(dentry);
221 			return -EIO;
222 		}
223 
224 		fi = get_fuse_inode(inode);
225 		spin_lock(&fi->lock);
226 		fi->nlookup++;
227 		spin_unlock(&fi->lock);
228 
229 		forget_all_cached_acls(inode);
230 		fuse_change_attributes(inode, &o->attr, NULL,
231 				       ATTR_TIMEOUT(o),
232 				       attr_version);
233 		/*
234 		 * The other branch comes via fuse_iget()
235 		 * which bumps nlookup inside
236 		 */
237 	} else {
238 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
239 				  &o->attr, ATTR_TIMEOUT(o),
240 				  attr_version, evict_ctr);
241 		if (!inode)
242 			inode = ERR_PTR(-ENOMEM);
243 
244 		alias = d_splice_alias(inode, dentry);
245 		d_lookup_done(dentry);
246 		if (alias) {
247 			dput(dentry);
248 			dentry = alias;
249 		}
250 		if (IS_ERR(dentry)) {
251 			if (!IS_ERR(inode)) {
252 				struct fuse_inode *fi = get_fuse_inode(inode);
253 
254 				spin_lock(&fi->lock);
255 				fi->nlookup--;
256 				spin_unlock(&fi->lock);
257 			}
258 			return PTR_ERR(dentry);
259 		}
260 	}
261 	if (fc->readdirplus_auto)
262 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
263 	dentry->d_time = epoch;
264 	fuse_change_entry_timeout(dentry, o);
265 
266 	dput(dentry);
267 	return 0;
268 }
269 
270 static void fuse_force_forget(struct file *file, u64 nodeid)
271 {
272 	struct inode *inode = file_inode(file);
273 	struct fuse_mount *fm = get_fuse_mount(inode);
274 	struct fuse_forget_in inarg;
275 	FUSE_ARGS(args);
276 
277 	memset(&inarg, 0, sizeof(inarg));
278 	inarg.nlookup = 1;
279 	args.opcode = FUSE_FORGET;
280 	args.nodeid = nodeid;
281 	args.in_numargs = 1;
282 	args.in_args[0].size = sizeof(inarg);
283 	args.in_args[0].value = &inarg;
284 	args.force = true;
285 	args.noreply = true;
286 
287 	fuse_simple_request(fm, &args);
288 	/* ignore errors */
289 }
290 
291 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
292 			     struct dir_context *ctx, u64 attr_version,
293 			     u64 evict_ctr)
294 {
295 	struct fuse_direntplus *direntplus;
296 	struct fuse_dirent *dirent;
297 	size_t reclen;
298 	int over = 0;
299 	int ret;
300 
301 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
302 		direntplus = (struct fuse_direntplus *) buf;
303 		dirent = &direntplus->dirent;
304 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
305 
306 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
307 			return -EIO;
308 		if (reclen > nbytes)
309 			break;
310 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
311 			return -EIO;
312 
313 		if (!over) {
314 			/* We fill entries into dstbuf only as much as
315 			   it can hold. But we still continue iterating
316 			   over remaining entries to link them. If not,
317 			   we need to send a FORGET for each of those
318 			   which we did not link.
319 			*/
320 			over = !fuse_emit(file, ctx, dirent);
321 			if (!over)
322 				ctx->pos = dirent->off;
323 		}
324 
325 		buf += reclen;
326 		nbytes -= reclen;
327 
328 		ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
329 		if (ret)
330 			fuse_force_forget(file, direntplus->entry_out.nodeid);
331 	}
332 
333 	return 0;
334 }
335 
336 static struct page **fuse_readdir_alloc_buf(struct fuse_args_pages *ap, size_t *bufsize)
337 {
338 	unsigned int i, nr_alloc, nr_pages = DIV_ROUND_UP(*bufsize, PAGE_SIZE);
339 	struct page **pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
340 
341 	if (!pages)
342 		return NULL;
343 
344 	nr_alloc = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages);
345 	if (!nr_alloc)
346 		goto free_array;
347 
348 	if (nr_alloc < nr_pages) {
349 		nr_pages = nr_alloc;
350 		*bufsize = (size_t) nr_pages << PAGE_SHIFT;
351 	}
352 
353 	ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs);
354 	if (!ap->folios)
355 		goto release_pages;
356 
357 	for (i = 0; i < nr_pages; i++) {
358 		ap->folios[i] = page_folio(pages[i]);
359 		ap->descs[i].length = min_t(size_t, *bufsize - (size_t)i * PAGE_SIZE, PAGE_SIZE);
360 	}
361 	ap->num_folios = nr_pages;
362 	ap->args.out_pages = true;
363 
364 	return pages;
365 
366 release_pages:
367 	release_pages(pages, nr_pages);
368 free_array:
369 	kfree(pages);
370 	return NULL;
371 }
372 
373 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
374 {
375 	int plus;
376 	ssize_t res;
377 	struct inode *inode = file_inode(file);
378 	struct fuse_mount *fm = get_fuse_mount(inode);
379 	struct fuse_conn *fc = fm->fc;
380 	struct fuse_io_args ia = {};
381 	struct fuse_args_pages *ap = &ia.ap;
382 	void *buf;
383 	size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT);
384 	u64 attr_version = 0, evict_ctr = 0;
385 	bool locked;
386 	struct page **pages = fuse_readdir_alloc_buf(ap, &bufsize);
387 
388 	if (!pages)
389 		return -ENOMEM;
390 
391 	plus = fuse_use_readdirplus(inode, ctx);
392 	if (plus) {
393 		attr_version = fuse_get_attr_version(fm->fc);
394 		evict_ctr = fuse_get_evict_ctr(fm->fc);
395 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS);
396 	} else {
397 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR);
398 	}
399 	locked = fuse_lock_inode(inode);
400 	res = fuse_simple_request(fm, &ap->args);
401 	fuse_unlock_inode(inode, locked);
402 	if (res < 0)
403 		goto out;
404 
405 	if (!res) {
406 		struct fuse_file *ff = file->private_data;
407 
408 		if (ff->open_flags & FOPEN_CACHE_DIR)
409 			fuse_readdir_cache_end(file, ctx->pos);
410 		goto out;
411 	}
412 
413 	buf = vm_map_ram(pages, ap->num_folios, -1);
414 	if (!buf) {
415 		res = -ENOMEM;
416 	} else {
417 		if (plus)
418 			res = parse_dirplusfile(buf, res, file, ctx, attr_version, evict_ctr);
419 		else
420 			res = parse_dirfile(buf, res, file, ctx);
421 
422 		vm_unmap_ram(buf, ap->num_folios);
423 	}
424 out:
425 	kfree(ap->folios);
426 	release_pages(pages, ap->num_folios);
427 	kfree(pages);
428 
429 	fuse_invalidate_atime(inode);
430 
431 	return res;
432 }
433 
434 enum fuse_parse_result {
435 	FOUND_ERR = -1,
436 	FOUND_NONE = 0,
437 	FOUND_SOME,
438 	FOUND_ALL,
439 };
440 
441 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
442 					       void *addr, unsigned int size,
443 					       struct dir_context *ctx)
444 {
445 	unsigned int offset = offset_in_page(ff->readdir.cache_off);
446 	enum fuse_parse_result res = FOUND_NONE;
447 
448 	WARN_ON(offset >= size);
449 
450 	for (;;) {
451 		struct fuse_dirent *dirent = addr + offset;
452 		unsigned int nbytes = size - offset;
453 		size_t reclen;
454 
455 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
456 			break;
457 
458 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
459 
460 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
461 			return FOUND_ERR;
462 		if (WARN_ON(reclen > nbytes))
463 			return FOUND_ERR;
464 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
465 			return FOUND_ERR;
466 
467 		if (ff->readdir.pos == ctx->pos) {
468 			res = FOUND_SOME;
469 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
470 				      dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
471 				return FOUND_ALL;
472 			ctx->pos = dirent->off;
473 		}
474 		ff->readdir.pos = dirent->off;
475 		ff->readdir.cache_off += reclen;
476 
477 		offset += reclen;
478 	}
479 
480 	return res;
481 }
482 
483 static void fuse_rdc_reset(struct inode *inode)
484 {
485 	struct fuse_inode *fi = get_fuse_inode(inode);
486 
487 	fi->rdc.cached = false;
488 	fi->rdc.version++;
489 	fi->rdc.size = 0;
490 	fi->rdc.pos = 0;
491 	fi->rdc.epoch = 0;
492 }
493 
494 #define UNCACHED 1
495 
496 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
497 {
498 	struct fuse_file *ff = file->private_data;
499 	struct inode *inode = file_inode(file);
500 	struct fuse_conn *fc = get_fuse_conn(inode);
501 	struct fuse_inode *fi = get_fuse_inode(inode);
502 	enum fuse_parse_result res;
503 	pgoff_t index;
504 	unsigned int size;
505 	struct page *page;
506 	void *addr;
507 
508 	/* Seeked?  If so, reset the cache stream */
509 	if (ff->readdir.pos != ctx->pos) {
510 		ff->readdir.pos = 0;
511 		ff->readdir.cache_off = 0;
512 	}
513 
514 	/*
515 	 * We're just about to start reading into the cache or reading the
516 	 * cache; both cases require an up-to-date mtime value.
517 	 */
518 	if (!ctx->pos && fc->auto_inval_data) {
519 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
520 
521 		if (err)
522 			return err;
523 	}
524 
525 retry:
526 	spin_lock(&fi->rdc.lock);
527 retry_locked:
528 	if (!fi->rdc.cached) {
529 		/* Starting cache? Set cache mtime. */
530 		if (!ctx->pos && !fi->rdc.size) {
531 			fi->rdc.mtime = inode_get_mtime(inode);
532 			fi->rdc.iversion = inode_query_iversion(inode);
533 			fi->rdc.epoch = atomic_read(&fc->epoch);
534 		}
535 		spin_unlock(&fi->rdc.lock);
536 		return UNCACHED;
537 	}
538 	/*
539 	 * When at the beginning of the directory (i.e. just after opendir(3) or
540 	 * rewinddir(3)), then need to check whether directory contents have
541 	 * changed, and reset the cache if so.
542 	 */
543 	if (!ctx->pos) {
544 		struct timespec64 mtime = inode_get_mtime(inode);
545 
546 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
547 		    !timespec64_equal(&fi->rdc.mtime, &mtime) ||
548 		    fi->rdc.epoch != atomic_read(&fc->epoch)) {
549 			fuse_rdc_reset(inode);
550 			goto retry_locked;
551 		}
552 	}
553 
554 	/*
555 	 * If cache version changed since the last getdents() call, then reset
556 	 * the cache stream.
557 	 */
558 	if (ff->readdir.version != fi->rdc.version) {
559 		ff->readdir.pos = 0;
560 		ff->readdir.cache_off = 0;
561 	}
562 	/*
563 	 * If at the beginning of the cache, than reset version to
564 	 * current.
565 	 */
566 	if (ff->readdir.pos == 0)
567 		ff->readdir.version = fi->rdc.version;
568 
569 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
570 
571 	index = ff->readdir.cache_off >> PAGE_SHIFT;
572 
573 	if (index == (fi->rdc.size >> PAGE_SHIFT))
574 		size = offset_in_page(fi->rdc.size);
575 	else
576 		size = PAGE_SIZE;
577 	spin_unlock(&fi->rdc.lock);
578 
579 	/* EOF? */
580 	if (offset_in_page(ff->readdir.cache_off) == size)
581 		return 0;
582 
583 	page = find_get_page_flags(file->f_mapping, index,
584 				   FGP_ACCESSED | FGP_LOCK);
585 	/* Page gone missing, then re-added to cache, but not initialized? */
586 	if (page && !PageUptodate(page)) {
587 		unlock_page(page);
588 		put_page(page);
589 		page = NULL;
590 	}
591 	spin_lock(&fi->rdc.lock);
592 	if (!page) {
593 		/*
594 		 * Uh-oh: page gone missing, cache is useless
595 		 */
596 		if (fi->rdc.version == ff->readdir.version)
597 			fuse_rdc_reset(inode);
598 		goto retry_locked;
599 	}
600 
601 	/* Make sure it's still the same version after getting the page. */
602 	if (ff->readdir.version != fi->rdc.version) {
603 		spin_unlock(&fi->rdc.lock);
604 		unlock_page(page);
605 		put_page(page);
606 		goto retry;
607 	}
608 	spin_unlock(&fi->rdc.lock);
609 
610 	/*
611 	 * Contents of the page are now protected against changing by holding
612 	 * the page lock.
613 	 */
614 	addr = kmap_local_page(page);
615 	res = fuse_parse_cache(ff, addr, size, ctx);
616 	kunmap_local(addr);
617 	unlock_page(page);
618 	put_page(page);
619 
620 	if (res == FOUND_ERR)
621 		return -EIO;
622 
623 	if (res == FOUND_ALL)
624 		return 0;
625 
626 	if (size == PAGE_SIZE) {
627 		/* We hit end of page: skip to next page. */
628 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
629 		goto retry;
630 	}
631 
632 	/*
633 	 * End of cache reached.  If found position, then we are done, otherwise
634 	 * need to fall back to uncached, since the position we were looking for
635 	 * wasn't in the cache.
636 	 */
637 	return res == FOUND_SOME ? 0 : UNCACHED;
638 }
639 
640 int fuse_readdir(struct file *file, struct dir_context *ctx)
641 {
642 	struct fuse_file *ff = file->private_data;
643 	struct inode *inode = file_inode(file);
644 	int err;
645 
646 	if (fuse_is_bad(inode))
647 		return -EIO;
648 
649 	err = UNCACHED;
650 	if (ff->open_flags & FOPEN_CACHE_DIR)
651 		err = fuse_readdir_cached(file, ctx);
652 	if (err == UNCACHED)
653 		err = fuse_readdir_uncached(file, ctx);
654 
655 	return err;
656 }
657