xref: /linux/fs/fuse/readdir.c (revision 87e801e1678342fc23b1eb92c0eecedf5dca79cb)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	/* Dirent doesn't fit in readdir cache page?  Skip caching. */
45 	if (reclen > PAGE_SIZE)
46 		return;
47 
48 	spin_lock(&fi->rdc.lock);
49 	/*
50 	 * Is cache already completed?  Or this entry does not go at the end of
51 	 * cache?
52 	 */
53 	if (fi->rdc.cached || pos != fi->rdc.pos) {
54 		spin_unlock(&fi->rdc.lock);
55 		return;
56 	}
57 	version = fi->rdc.version;
58 	size = fi->rdc.size;
59 	offset = offset_in_page(size);
60 	index = size >> PAGE_SHIFT;
61 	/* Dirent doesn't fit in current page?  Jump to next page. */
62 	if (offset + reclen > PAGE_SIZE) {
63 		index++;
64 		offset = 0;
65 	}
66 	spin_unlock(&fi->rdc.lock);
67 
68 	if (offset) {
69 		page = find_lock_page(file->f_mapping, index);
70 	} else {
71 		page = find_or_create_page(file->f_mapping, index,
72 					   mapping_gfp_mask(file->f_mapping));
73 	}
74 	if (!page)
75 		return;
76 
77 	spin_lock(&fi->rdc.lock);
78 	/* Raced with another readdir */
79 	if (fi->rdc.version != version || fi->rdc.size != size ||
80 	    WARN_ON(fi->rdc.pos != pos))
81 		goto unlock;
82 
83 	addr = kmap_local_page(page);
84 	if (!offset) {
85 		clear_page(addr);
86 		SetPageUptodate(page);
87 	}
88 	memcpy(addr + offset, dirent, reclen);
89 	kunmap_local(addr);
90 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
91 	fi->rdc.pos = dirent->off;
92 unlock:
93 	spin_unlock(&fi->rdc.lock);
94 	unlock_page(page);
95 	put_page(page);
96 }
97 
98 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
99 {
100 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
101 	loff_t end;
102 
103 	spin_lock(&fi->rdc.lock);
104 	/* does cache end position match current position? */
105 	if (fi->rdc.pos != pos) {
106 		spin_unlock(&fi->rdc.lock);
107 		return;
108 	}
109 
110 	fi->rdc.cached = true;
111 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
112 	spin_unlock(&fi->rdc.lock);
113 
114 	/* truncate unused tail of cache */
115 	truncate_inode_pages(file->f_mapping, end);
116 }
117 
118 static bool fuse_emit(struct file *file, struct dir_context *ctx,
119 		      struct fuse_dirent *dirent)
120 {
121 	struct fuse_file *ff = file->private_data;
122 
123 	if (ff->open_flags & FOPEN_CACHE_DIR)
124 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
125 
126 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
127 			dirent->type | FILLDIR_FLAG_NOINTR);
128 }
129 
130 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
131 			 struct dir_context *ctx)
132 {
133 	while (nbytes >= FUSE_NAME_OFFSET) {
134 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
135 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
136 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
137 			return -EIO;
138 		if (reclen > nbytes)
139 			break;
140 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
141 			return -EIO;
142 
143 		if (!fuse_emit(file, ctx, dirent))
144 			break;
145 
146 		buf += reclen;
147 		nbytes -= reclen;
148 		ctx->pos = dirent->off;
149 	}
150 
151 	return 0;
152 }
153 
154 static int fuse_direntplus_link(struct file *file,
155 				struct fuse_direntplus *direntplus,
156 				u64 attr_version, u64 evict_ctr)
157 {
158 	struct fuse_entry_out *o = &direntplus->entry_out;
159 	struct fuse_dirent *dirent = &direntplus->dirent;
160 	struct dentry *parent = file->f_path.dentry;
161 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
162 	struct dentry *dentry;
163 	struct dentry *alias;
164 	struct inode *dir = d_inode(parent);
165 	struct fuse_conn *fc;
166 	struct inode *inode;
167 	int epoch;
168 
169 	if (!o->nodeid) {
170 		/*
171 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
172 		 * ENOENT. Instead, it only means the userspace filesystem did
173 		 * not want to return attributes/handle for this entry.
174 		 *
175 		 * So do nothing.
176 		 */
177 		return 0;
178 	}
179 
180 	if (name.name[0] == '.') {
181 		/*
182 		 * We could potentially refresh the attributes of the directory
183 		 * and its parent?
184 		 */
185 		if (name.len == 1)
186 			return 0;
187 		if (name.name[1] == '.' && name.len == 2)
188 			return 0;
189 	}
190 
191 	if (invalid_nodeid(o->nodeid))
192 		return -EIO;
193 	if (fuse_invalid_attr(&o->attr))
194 		return -EIO;
195 
196 	fc = get_fuse_conn(dir);
197 	epoch = atomic_read(&fc->epoch);
198 
199 	name.hash = full_name_hash(parent, name.name, name.len);
200 	dentry = d_lookup(parent, &name);
201 	if (!dentry) {
202 retry:
203 		dentry = d_alloc_parallel(parent, &name);
204 		if (IS_ERR(dentry))
205 			return PTR_ERR(dentry);
206 	}
207 	if (!d_in_lookup(dentry)) {
208 		struct fuse_inode *fi;
209 		inode = d_inode(dentry);
210 		if (inode && get_node_id(inode) != o->nodeid)
211 			inode = NULL;
212 		if (!inode ||
213 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
214 			if (inode)
215 				fuse_make_bad(inode);
216 			d_invalidate(dentry);
217 			dput(dentry);
218 			goto retry;
219 		}
220 		if (fuse_is_bad(inode)) {
221 			dput(dentry);
222 			return -EIO;
223 		}
224 
225 		fi = get_fuse_inode(inode);
226 		spin_lock(&fi->lock);
227 		fi->nlookup++;
228 		spin_unlock(&fi->lock);
229 
230 		forget_all_cached_acls(inode);
231 		fuse_change_attributes(inode, &o->attr, NULL,
232 				       ATTR_TIMEOUT(o),
233 				       attr_version);
234 		/*
235 		 * The other branch comes via fuse_iget()
236 		 * which bumps nlookup inside
237 		 */
238 	} else {
239 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
240 				  &o->attr, ATTR_TIMEOUT(o),
241 				  attr_version, evict_ctr);
242 		if (!inode)
243 			inode = ERR_PTR(-ENOMEM);
244 
245 		alias = d_splice_alias(inode, dentry);
246 		d_lookup_done(dentry);
247 		if (alias) {
248 			dput(dentry);
249 			dentry = alias;
250 		}
251 		if (IS_ERR(dentry)) {
252 			if (!IS_ERR(inode)) {
253 				struct fuse_inode *fi = get_fuse_inode(inode);
254 
255 				spin_lock(&fi->lock);
256 				fi->nlookup--;
257 				spin_unlock(&fi->lock);
258 			}
259 			return PTR_ERR(dentry);
260 		}
261 	}
262 	if (fc->readdirplus_auto)
263 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
264 	dentry->d_time = epoch;
265 	fuse_change_entry_timeout(dentry, o);
266 
267 	dput(dentry);
268 	return 0;
269 }
270 
271 static void fuse_force_forget(struct file *file, u64 nodeid)
272 {
273 	struct inode *inode = file_inode(file);
274 	struct fuse_mount *fm = get_fuse_mount(inode);
275 	struct fuse_forget_in inarg;
276 	FUSE_ARGS(args);
277 
278 	memset(&inarg, 0, sizeof(inarg));
279 	inarg.nlookup = 1;
280 	args.opcode = FUSE_FORGET;
281 	args.nodeid = nodeid;
282 	args.in_numargs = 1;
283 	args.in_args[0].size = sizeof(inarg);
284 	args.in_args[0].value = &inarg;
285 	args.force = true;
286 	args.noreply = true;
287 
288 	fuse_simple_request(fm, &args);
289 	/* ignore errors */
290 }
291 
292 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
293 			     struct dir_context *ctx, u64 attr_version,
294 			     u64 evict_ctr)
295 {
296 	struct fuse_direntplus *direntplus;
297 	struct fuse_dirent *dirent;
298 	size_t reclen;
299 	int over = 0;
300 	int ret;
301 
302 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
303 		direntplus = (struct fuse_direntplus *) buf;
304 		dirent = &direntplus->dirent;
305 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
306 
307 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
308 			return -EIO;
309 		if (reclen > nbytes)
310 			break;
311 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
312 			return -EIO;
313 
314 		if (!over) {
315 			/* We fill entries into dstbuf only as much as
316 			   it can hold. But we still continue iterating
317 			   over remaining entries to link them. If not,
318 			   we need to send a FORGET for each of those
319 			   which we did not link.
320 			*/
321 			over = !fuse_emit(file, ctx, dirent);
322 			if (!over)
323 				ctx->pos = dirent->off;
324 		}
325 
326 		buf += reclen;
327 		nbytes -= reclen;
328 
329 		ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
330 		if (ret)
331 			fuse_force_forget(file, direntplus->entry_out.nodeid);
332 	}
333 
334 	return 0;
335 }
336 
337 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
338 {
339 	int plus;
340 	ssize_t res;
341 	struct inode *inode = file_inode(file);
342 	struct fuse_mount *fm = get_fuse_mount(inode);
343 	struct fuse_conn *fc = fm->fc;
344 	struct fuse_io_args ia = {};
345 	struct fuse_args *args = &ia.ap.args;
346 	void *buf;
347 	size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT);
348 	u64 attr_version = 0, evict_ctr = 0;
349 	bool locked;
350 
351 	buf = kvmalloc(bufsize, GFP_KERNEL);
352 	if (!buf)
353 		return -ENOMEM;
354 
355 	args->out_args[0].value = buf;
356 
357 	plus = fuse_use_readdirplus(inode, ctx);
358 	if (plus) {
359 		attr_version = fuse_get_attr_version(fm->fc);
360 		evict_ctr = fuse_get_evict_ctr(fm->fc);
361 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS);
362 	} else {
363 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR);
364 	}
365 	locked = fuse_lock_inode(inode);
366 	res = fuse_simple_request(fm, args);
367 	fuse_unlock_inode(inode, locked);
368 	if (res >= 0) {
369 		if (!res) {
370 			struct fuse_file *ff = file->private_data;
371 
372 			if (ff->open_flags & FOPEN_CACHE_DIR)
373 				fuse_readdir_cache_end(file, ctx->pos);
374 		} else if (plus) {
375 			res = parse_dirplusfile(buf, res, file, ctx, attr_version,
376 						evict_ctr);
377 		} else {
378 			res = parse_dirfile(buf, res, file, ctx);
379 		}
380 	}
381 
382 	kvfree(buf);
383 	fuse_invalidate_atime(inode);
384 	return res;
385 }
386 
387 enum fuse_parse_result {
388 	FOUND_ERR = -1,
389 	FOUND_NONE = 0,
390 	FOUND_SOME,
391 	FOUND_ALL,
392 };
393 
394 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
395 					       void *addr, unsigned int size,
396 					       struct dir_context *ctx)
397 {
398 	unsigned int offset = offset_in_page(ff->readdir.cache_off);
399 	enum fuse_parse_result res = FOUND_NONE;
400 
401 	WARN_ON(offset >= size);
402 
403 	for (;;) {
404 		struct fuse_dirent *dirent = addr + offset;
405 		unsigned int nbytes = size - offset;
406 		size_t reclen;
407 
408 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
409 			break;
410 
411 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
412 
413 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
414 			return FOUND_ERR;
415 		if (WARN_ON(reclen > nbytes))
416 			return FOUND_ERR;
417 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
418 			return FOUND_ERR;
419 
420 		if (ff->readdir.pos == ctx->pos) {
421 			res = FOUND_SOME;
422 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
423 				      dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
424 				return FOUND_ALL;
425 			ctx->pos = dirent->off;
426 		}
427 		ff->readdir.pos = dirent->off;
428 		ff->readdir.cache_off += reclen;
429 
430 		offset += reclen;
431 	}
432 
433 	return res;
434 }
435 
436 static void fuse_rdc_reset(struct inode *inode)
437 {
438 	struct fuse_inode *fi = get_fuse_inode(inode);
439 
440 	fi->rdc.cached = false;
441 	fi->rdc.version++;
442 	fi->rdc.size = 0;
443 	fi->rdc.pos = 0;
444 }
445 
446 #define UNCACHED 1
447 
448 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
449 {
450 	struct fuse_file *ff = file->private_data;
451 	struct inode *inode = file_inode(file);
452 	struct fuse_conn *fc = get_fuse_conn(inode);
453 	struct fuse_inode *fi = get_fuse_inode(inode);
454 	enum fuse_parse_result res;
455 	pgoff_t index;
456 	unsigned int size;
457 	struct page *page;
458 	void *addr;
459 
460 	/* Seeked?  If so, reset the cache stream */
461 	if (ff->readdir.pos != ctx->pos) {
462 		ff->readdir.pos = 0;
463 		ff->readdir.cache_off = 0;
464 	}
465 
466 	/*
467 	 * We're just about to start reading into the cache or reading the
468 	 * cache; both cases require an up-to-date mtime value.
469 	 */
470 	if (!ctx->pos && fc->auto_inval_data) {
471 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
472 
473 		if (err)
474 			return err;
475 	}
476 
477 retry:
478 	spin_lock(&fi->rdc.lock);
479 retry_locked:
480 	if (!fi->rdc.cached) {
481 		/* Starting cache? Set cache mtime. */
482 		if (!ctx->pos && !fi->rdc.size) {
483 			fi->rdc.mtime = inode_get_mtime(inode);
484 			fi->rdc.iversion = inode_query_iversion(inode);
485 		}
486 		spin_unlock(&fi->rdc.lock);
487 		return UNCACHED;
488 	}
489 	/*
490 	 * When at the beginning of the directory (i.e. just after opendir(3) or
491 	 * rewinddir(3)), then need to check whether directory contents have
492 	 * changed, and reset the cache if so.
493 	 */
494 	if (!ctx->pos) {
495 		struct timespec64 mtime = inode_get_mtime(inode);
496 
497 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
498 		    !timespec64_equal(&fi->rdc.mtime, &mtime)) {
499 			fuse_rdc_reset(inode);
500 			goto retry_locked;
501 		}
502 	}
503 
504 	/*
505 	 * If cache version changed since the last getdents() call, then reset
506 	 * the cache stream.
507 	 */
508 	if (ff->readdir.version != fi->rdc.version) {
509 		ff->readdir.pos = 0;
510 		ff->readdir.cache_off = 0;
511 	}
512 	/*
513 	 * If at the beginning of the cache, than reset version to
514 	 * current.
515 	 */
516 	if (ff->readdir.pos == 0)
517 		ff->readdir.version = fi->rdc.version;
518 
519 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
520 
521 	index = ff->readdir.cache_off >> PAGE_SHIFT;
522 
523 	if (index == (fi->rdc.size >> PAGE_SHIFT))
524 		size = offset_in_page(fi->rdc.size);
525 	else
526 		size = PAGE_SIZE;
527 	spin_unlock(&fi->rdc.lock);
528 
529 	/* EOF? */
530 	if (offset_in_page(ff->readdir.cache_off) == size)
531 		return 0;
532 
533 	page = find_get_page_flags(file->f_mapping, index,
534 				   FGP_ACCESSED | FGP_LOCK);
535 	/* Page gone missing, then re-added to cache, but not initialized? */
536 	if (page && !PageUptodate(page)) {
537 		unlock_page(page);
538 		put_page(page);
539 		page = NULL;
540 	}
541 	spin_lock(&fi->rdc.lock);
542 	if (!page) {
543 		/*
544 		 * Uh-oh: page gone missing, cache is useless
545 		 */
546 		if (fi->rdc.version == ff->readdir.version)
547 			fuse_rdc_reset(inode);
548 		goto retry_locked;
549 	}
550 
551 	/* Make sure it's still the same version after getting the page. */
552 	if (ff->readdir.version != fi->rdc.version) {
553 		spin_unlock(&fi->rdc.lock);
554 		unlock_page(page);
555 		put_page(page);
556 		goto retry;
557 	}
558 	spin_unlock(&fi->rdc.lock);
559 
560 	/*
561 	 * Contents of the page are now protected against changing by holding
562 	 * the page lock.
563 	 */
564 	addr = kmap_local_page(page);
565 	res = fuse_parse_cache(ff, addr, size, ctx);
566 	kunmap_local(addr);
567 	unlock_page(page);
568 	put_page(page);
569 
570 	if (res == FOUND_ERR)
571 		return -EIO;
572 
573 	if (res == FOUND_ALL)
574 		return 0;
575 
576 	if (size == PAGE_SIZE) {
577 		/* We hit end of page: skip to next page. */
578 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
579 		goto retry;
580 	}
581 
582 	/*
583 	 * End of cache reached.  If found position, then we are done, otherwise
584 	 * need to fall back to uncached, since the position we were looking for
585 	 * wasn't in the cache.
586 	 */
587 	return res == FOUND_SOME ? 0 : UNCACHED;
588 }
589 
590 int fuse_readdir(struct file *file, struct dir_context *ctx)
591 {
592 	struct fuse_file *ff = file->private_data;
593 	struct inode *inode = file_inode(file);
594 	int err;
595 
596 	if (fuse_is_bad(inode))
597 		return -EIO;
598 
599 	err = UNCACHED;
600 	if (ff->open_flags & FOPEN_CACHE_DIR)
601 		err = fuse_readdir_cached(file, ctx);
602 	if (err == UNCACHED)
603 		err = fuse_readdir_uncached(file, ctx);
604 
605 	return err;
606 }
607