xref: /linux/fs/fuse/readdir.c (revision 572af9f284669d31d9175122bbef9bc62cea8ded)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_local_page(page);
80 	if (!offset) {
81 		clear_page(addr);
82 		SetPageUptodate(page);
83 	}
84 	memcpy(addr + offset, dirent, reclen);
85 	kunmap_local(addr);
86 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87 	fi->rdc.pos = dirent->off;
88 unlock:
89 	spin_unlock(&fi->rdc.lock);
90 	unlock_page(page);
91 	put_page(page);
92 }
93 
94 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95 {
96 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97 	loff_t end;
98 
99 	spin_lock(&fi->rdc.lock);
100 	/* does cache end position match current position? */
101 	if (fi->rdc.pos != pos) {
102 		spin_unlock(&fi->rdc.lock);
103 		return;
104 	}
105 
106 	fi->rdc.cached = true;
107 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
108 	spin_unlock(&fi->rdc.lock);
109 
110 	/* truncate unused tail of cache */
111 	truncate_inode_pages(file->f_mapping, end);
112 }
113 
114 static bool fuse_emit(struct file *file, struct dir_context *ctx,
115 		      struct fuse_dirent *dirent)
116 {
117 	struct fuse_file *ff = file->private_data;
118 
119 	if (ff->open_flags & FOPEN_CACHE_DIR)
120 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121 
122 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123 			dirent->type);
124 }
125 
126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127 			 struct dir_context *ctx)
128 {
129 	while (nbytes >= FUSE_NAME_OFFSET) {
130 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
132 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133 			return -EIO;
134 		if (reclen > nbytes)
135 			break;
136 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137 			return -EIO;
138 
139 		if (!fuse_emit(file, ctx, dirent))
140 			break;
141 
142 		buf += reclen;
143 		nbytes -= reclen;
144 		ctx->pos = dirent->off;
145 	}
146 
147 	return 0;
148 }
149 
150 static int fuse_direntplus_link(struct file *file,
151 				struct fuse_direntplus *direntplus,
152 				u64 attr_version, u64 evict_ctr)
153 {
154 	struct fuse_entry_out *o = &direntplus->entry_out;
155 	struct fuse_dirent *dirent = &direntplus->dirent;
156 	struct dentry *parent = file->f_path.dentry;
157 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158 	struct dentry *dentry;
159 	struct dentry *alias;
160 	struct inode *dir = d_inode(parent);
161 	struct fuse_conn *fc;
162 	struct inode *inode;
163 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164 
165 	if (!o->nodeid) {
166 		/*
167 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
168 		 * ENOENT. Instead, it only means the userspace filesystem did
169 		 * not want to return attributes/handle for this entry.
170 		 *
171 		 * So do nothing.
172 		 */
173 		return 0;
174 	}
175 
176 	if (name.name[0] == '.') {
177 		/*
178 		 * We could potentially refresh the attributes of the directory
179 		 * and its parent?
180 		 */
181 		if (name.len == 1)
182 			return 0;
183 		if (name.name[1] == '.' && name.len == 2)
184 			return 0;
185 	}
186 
187 	if (invalid_nodeid(o->nodeid))
188 		return -EIO;
189 	if (fuse_invalid_attr(&o->attr))
190 		return -EIO;
191 
192 	fc = get_fuse_conn(dir);
193 
194 	name.hash = full_name_hash(parent, name.name, name.len);
195 	dentry = d_lookup(parent, &name);
196 	if (!dentry) {
197 retry:
198 		dentry = d_alloc_parallel(parent, &name, &wq);
199 		if (IS_ERR(dentry))
200 			return PTR_ERR(dentry);
201 	}
202 	if (!d_in_lookup(dentry)) {
203 		struct fuse_inode *fi;
204 		inode = d_inode(dentry);
205 		if (inode && get_node_id(inode) != o->nodeid)
206 			inode = NULL;
207 		if (!inode ||
208 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
209 			if (inode)
210 				fuse_make_bad(inode);
211 			d_invalidate(dentry);
212 			dput(dentry);
213 			goto retry;
214 		}
215 		if (fuse_is_bad(inode)) {
216 			dput(dentry);
217 			return -EIO;
218 		}
219 
220 		fi = get_fuse_inode(inode);
221 		spin_lock(&fi->lock);
222 		fi->nlookup++;
223 		spin_unlock(&fi->lock);
224 
225 		forget_all_cached_acls(inode);
226 		fuse_change_attributes(inode, &o->attr, NULL,
227 				       ATTR_TIMEOUT(o),
228 				       attr_version);
229 		/*
230 		 * The other branch comes via fuse_iget()
231 		 * which bumps nlookup inside
232 		 */
233 	} else {
234 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
235 				  &o->attr, ATTR_TIMEOUT(o),
236 				  attr_version, evict_ctr);
237 		if (!inode)
238 			inode = ERR_PTR(-ENOMEM);
239 
240 		alias = d_splice_alias(inode, dentry);
241 		d_lookup_done(dentry);
242 		if (alias) {
243 			dput(dentry);
244 			dentry = alias;
245 		}
246 		if (IS_ERR(dentry)) {
247 			if (!IS_ERR(inode)) {
248 				struct fuse_inode *fi = get_fuse_inode(inode);
249 
250 				spin_lock(&fi->lock);
251 				fi->nlookup--;
252 				spin_unlock(&fi->lock);
253 			}
254 			return PTR_ERR(dentry);
255 		}
256 	}
257 	if (fc->readdirplus_auto)
258 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
259 	fuse_change_entry_timeout(dentry, o);
260 
261 	dput(dentry);
262 	return 0;
263 }
264 
265 static void fuse_force_forget(struct file *file, u64 nodeid)
266 {
267 	struct inode *inode = file_inode(file);
268 	struct fuse_mount *fm = get_fuse_mount(inode);
269 	struct fuse_forget_in inarg;
270 	FUSE_ARGS(args);
271 
272 	memset(&inarg, 0, sizeof(inarg));
273 	inarg.nlookup = 1;
274 	args.opcode = FUSE_FORGET;
275 	args.nodeid = nodeid;
276 	args.in_numargs = 1;
277 	args.in_args[0].size = sizeof(inarg);
278 	args.in_args[0].value = &inarg;
279 	args.force = true;
280 	args.noreply = true;
281 
282 	fuse_simple_request(fm, &args);
283 	/* ignore errors */
284 }
285 
286 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
287 			     struct dir_context *ctx, u64 attr_version,
288 			     u64 evict_ctr)
289 {
290 	struct fuse_direntplus *direntplus;
291 	struct fuse_dirent *dirent;
292 	size_t reclen;
293 	int over = 0;
294 	int ret;
295 
296 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
297 		direntplus = (struct fuse_direntplus *) buf;
298 		dirent = &direntplus->dirent;
299 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
300 
301 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
302 			return -EIO;
303 		if (reclen > nbytes)
304 			break;
305 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
306 			return -EIO;
307 
308 		if (!over) {
309 			/* We fill entries into dstbuf only as much as
310 			   it can hold. But we still continue iterating
311 			   over remaining entries to link them. If not,
312 			   we need to send a FORGET for each of those
313 			   which we did not link.
314 			*/
315 			over = !fuse_emit(file, ctx, dirent);
316 			if (!over)
317 				ctx->pos = dirent->off;
318 		}
319 
320 		buf += reclen;
321 		nbytes -= reclen;
322 
323 		ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
324 		if (ret)
325 			fuse_force_forget(file, direntplus->entry_out.nodeid);
326 	}
327 
328 	return 0;
329 }
330 
331 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
332 {
333 	int plus;
334 	ssize_t res;
335 	struct folio *folio;
336 	struct inode *inode = file_inode(file);
337 	struct fuse_mount *fm = get_fuse_mount(inode);
338 	struct fuse_io_args ia = {};
339 	struct fuse_args_pages *ap = &ia.ap;
340 	struct fuse_folio_desc desc = { .length = PAGE_SIZE };
341 	u64 attr_version = 0, evict_ctr = 0;
342 	bool locked;
343 
344 	folio = folio_alloc(GFP_KERNEL, 0);
345 	if (!folio)
346 		return -ENOMEM;
347 
348 	plus = fuse_use_readdirplus(inode, ctx);
349 	ap->args.out_pages = true;
350 	ap->num_folios = 1;
351 	ap->folios = &folio;
352 	ap->descs = &desc;
353 	if (plus) {
354 		attr_version = fuse_get_attr_version(fm->fc);
355 		evict_ctr = fuse_get_evict_ctr(fm->fc);
356 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
357 				    FUSE_READDIRPLUS);
358 	} else {
359 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
360 				    FUSE_READDIR);
361 	}
362 	locked = fuse_lock_inode(inode);
363 	res = fuse_simple_request(fm, &ap->args);
364 	fuse_unlock_inode(inode, locked);
365 	if (res >= 0) {
366 		if (!res) {
367 			struct fuse_file *ff = file->private_data;
368 
369 			if (ff->open_flags & FOPEN_CACHE_DIR)
370 				fuse_readdir_cache_end(file, ctx->pos);
371 		} else if (plus) {
372 			res = parse_dirplusfile(folio_address(folio), res,
373 						file, ctx, attr_version,
374 						evict_ctr);
375 		} else {
376 			res = parse_dirfile(folio_address(folio), res, file,
377 					    ctx);
378 		}
379 	}
380 
381 	folio_put(folio);
382 	fuse_invalidate_atime(inode);
383 	return res;
384 }
385 
386 enum fuse_parse_result {
387 	FOUND_ERR = -1,
388 	FOUND_NONE = 0,
389 	FOUND_SOME,
390 	FOUND_ALL,
391 };
392 
393 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
394 					       void *addr, unsigned int size,
395 					       struct dir_context *ctx)
396 {
397 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
398 	enum fuse_parse_result res = FOUND_NONE;
399 
400 	WARN_ON(offset >= size);
401 
402 	for (;;) {
403 		struct fuse_dirent *dirent = addr + offset;
404 		unsigned int nbytes = size - offset;
405 		size_t reclen;
406 
407 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
408 			break;
409 
410 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
411 
412 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
413 			return FOUND_ERR;
414 		if (WARN_ON(reclen > nbytes))
415 			return FOUND_ERR;
416 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
417 			return FOUND_ERR;
418 
419 		if (ff->readdir.pos == ctx->pos) {
420 			res = FOUND_SOME;
421 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
422 				      dirent->ino, dirent->type))
423 				return FOUND_ALL;
424 			ctx->pos = dirent->off;
425 		}
426 		ff->readdir.pos = dirent->off;
427 		ff->readdir.cache_off += reclen;
428 
429 		offset += reclen;
430 	}
431 
432 	return res;
433 }
434 
435 static void fuse_rdc_reset(struct inode *inode)
436 {
437 	struct fuse_inode *fi = get_fuse_inode(inode);
438 
439 	fi->rdc.cached = false;
440 	fi->rdc.version++;
441 	fi->rdc.size = 0;
442 	fi->rdc.pos = 0;
443 }
444 
445 #define UNCACHED 1
446 
447 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
448 {
449 	struct fuse_file *ff = file->private_data;
450 	struct inode *inode = file_inode(file);
451 	struct fuse_conn *fc = get_fuse_conn(inode);
452 	struct fuse_inode *fi = get_fuse_inode(inode);
453 	enum fuse_parse_result res;
454 	pgoff_t index;
455 	unsigned int size;
456 	struct page *page;
457 	void *addr;
458 
459 	/* Seeked?  If so, reset the cache stream */
460 	if (ff->readdir.pos != ctx->pos) {
461 		ff->readdir.pos = 0;
462 		ff->readdir.cache_off = 0;
463 	}
464 
465 	/*
466 	 * We're just about to start reading into the cache or reading the
467 	 * cache; both cases require an up-to-date mtime value.
468 	 */
469 	if (!ctx->pos && fc->auto_inval_data) {
470 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
471 
472 		if (err)
473 			return err;
474 	}
475 
476 retry:
477 	spin_lock(&fi->rdc.lock);
478 retry_locked:
479 	if (!fi->rdc.cached) {
480 		/* Starting cache? Set cache mtime. */
481 		if (!ctx->pos && !fi->rdc.size) {
482 			fi->rdc.mtime = inode_get_mtime(inode);
483 			fi->rdc.iversion = inode_query_iversion(inode);
484 		}
485 		spin_unlock(&fi->rdc.lock);
486 		return UNCACHED;
487 	}
488 	/*
489 	 * When at the beginning of the directory (i.e. just after opendir(3) or
490 	 * rewinddir(3)), then need to check whether directory contents have
491 	 * changed, and reset the cache if so.
492 	 */
493 	if (!ctx->pos) {
494 		struct timespec64 mtime = inode_get_mtime(inode);
495 
496 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
497 		    !timespec64_equal(&fi->rdc.mtime, &mtime)) {
498 			fuse_rdc_reset(inode);
499 			goto retry_locked;
500 		}
501 	}
502 
503 	/*
504 	 * If cache version changed since the last getdents() call, then reset
505 	 * the cache stream.
506 	 */
507 	if (ff->readdir.version != fi->rdc.version) {
508 		ff->readdir.pos = 0;
509 		ff->readdir.cache_off = 0;
510 	}
511 	/*
512 	 * If at the beginning of the cache, than reset version to
513 	 * current.
514 	 */
515 	if (ff->readdir.pos == 0)
516 		ff->readdir.version = fi->rdc.version;
517 
518 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
519 
520 	index = ff->readdir.cache_off >> PAGE_SHIFT;
521 
522 	if (index == (fi->rdc.size >> PAGE_SHIFT))
523 		size = fi->rdc.size & ~PAGE_MASK;
524 	else
525 		size = PAGE_SIZE;
526 	spin_unlock(&fi->rdc.lock);
527 
528 	/* EOF? */
529 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
530 		return 0;
531 
532 	page = find_get_page_flags(file->f_mapping, index,
533 				   FGP_ACCESSED | FGP_LOCK);
534 	/* Page gone missing, then re-added to cache, but not initialized? */
535 	if (page && !PageUptodate(page)) {
536 		unlock_page(page);
537 		put_page(page);
538 		page = NULL;
539 	}
540 	spin_lock(&fi->rdc.lock);
541 	if (!page) {
542 		/*
543 		 * Uh-oh: page gone missing, cache is useless
544 		 */
545 		if (fi->rdc.version == ff->readdir.version)
546 			fuse_rdc_reset(inode);
547 		goto retry_locked;
548 	}
549 
550 	/* Make sure it's still the same version after getting the page. */
551 	if (ff->readdir.version != fi->rdc.version) {
552 		spin_unlock(&fi->rdc.lock);
553 		unlock_page(page);
554 		put_page(page);
555 		goto retry;
556 	}
557 	spin_unlock(&fi->rdc.lock);
558 
559 	/*
560 	 * Contents of the page are now protected against changing by holding
561 	 * the page lock.
562 	 */
563 	addr = kmap_local_page(page);
564 	res = fuse_parse_cache(ff, addr, size, ctx);
565 	kunmap_local(addr);
566 	unlock_page(page);
567 	put_page(page);
568 
569 	if (res == FOUND_ERR)
570 		return -EIO;
571 
572 	if (res == FOUND_ALL)
573 		return 0;
574 
575 	if (size == PAGE_SIZE) {
576 		/* We hit end of page: skip to next page. */
577 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
578 		goto retry;
579 	}
580 
581 	/*
582 	 * End of cache reached.  If found position, then we are done, otherwise
583 	 * need to fall back to uncached, since the position we were looking for
584 	 * wasn't in the cache.
585 	 */
586 	return res == FOUND_SOME ? 0 : UNCACHED;
587 }
588 
589 int fuse_readdir(struct file *file, struct dir_context *ctx)
590 {
591 	struct fuse_file *ff = file->private_data;
592 	struct inode *inode = file_inode(file);
593 	int err;
594 
595 	if (fuse_is_bad(inode))
596 		return -EIO;
597 
598 	err = UNCACHED;
599 	if (ff->open_flags & FOPEN_CACHE_DIR)
600 		err = fuse_readdir_cached(file, ctx);
601 	if (err == UNCACHED)
602 		err = fuse_readdir_uncached(file, ctx);
603 
604 	return err;
605 }
606