xref: /linux/fs/fuse/readdir.c (revision 7a5f1cd22d47f8ca4b760b6334378ae42c1bd24b)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	/* Dirent doesn't fit in readdir cache page?  Skip caching. */
45 	if (reclen > PAGE_SIZE)
46 		return;
47 
48 	spin_lock(&fi->rdc.lock);
49 	/*
50 	 * Is cache already completed?  Or this entry does not go at the end of
51 	 * cache?
52 	 */
53 	if (fi->rdc.cached || pos != fi->rdc.pos) {
54 		spin_unlock(&fi->rdc.lock);
55 		return;
56 	}
57 	version = fi->rdc.version;
58 	size = fi->rdc.size;
59 	offset = offset_in_page(size);
60 	index = size >> PAGE_SHIFT;
61 	/* Dirent doesn't fit in current page?  Jump to next page. */
62 	if (offset + reclen > PAGE_SIZE) {
63 		index++;
64 		offset = 0;
65 	}
66 	spin_unlock(&fi->rdc.lock);
67 
68 	if (offset) {
69 		page = find_lock_page(file->f_mapping, index);
70 	} else {
71 		page = find_or_create_page(file->f_mapping, index,
72 					   mapping_gfp_mask(file->f_mapping));
73 	}
74 	if (!page)
75 		return;
76 
77 	spin_lock(&fi->rdc.lock);
78 	/* Raced with another readdir */
79 	if (fi->rdc.version != version || fi->rdc.size != size ||
80 	    WARN_ON(fi->rdc.pos != pos))
81 		goto unlock;
82 
83 	addr = kmap_local_page(page);
84 	if (!offset) {
85 		clear_page(addr);
86 		SetPageUptodate(page);
87 	}
88 	memcpy(addr + offset, dirent, reclen);
89 	kunmap_local(addr);
90 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
91 	fi->rdc.pos = dirent->off;
92 unlock:
93 	spin_unlock(&fi->rdc.lock);
94 	unlock_page(page);
95 	put_page(page);
96 }
97 
98 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
99 {
100 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
101 	loff_t end;
102 
103 	spin_lock(&fi->rdc.lock);
104 	/* does cache end position match current position? */
105 	if (fi->rdc.pos != pos) {
106 		spin_unlock(&fi->rdc.lock);
107 		return;
108 	}
109 
110 	fi->rdc.cached = true;
111 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
112 	spin_unlock(&fi->rdc.lock);
113 
114 	/* truncate unused tail of cache */
115 	truncate_inode_pages(file->f_mapping, end);
116 }
117 
118 static bool fuse_emit(struct file *file, struct dir_context *ctx,
119 		      struct fuse_dirent *dirent)
120 {
121 	struct fuse_file *ff = file->private_data;
122 
123 	if (ff->open_flags & FOPEN_CACHE_DIR)
124 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
125 
126 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
127 			dirent->type | FILLDIR_FLAG_NOINTR);
128 }
129 
130 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
131 			 struct dir_context *ctx)
132 {
133 	while (nbytes >= FUSE_NAME_OFFSET) {
134 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
135 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
136 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
137 			return -EIO;
138 		if (reclen > nbytes)
139 			break;
140 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
141 			return -EIO;
142 
143 		if (!fuse_emit(file, ctx, dirent))
144 			break;
145 
146 		buf += reclen;
147 		nbytes -= reclen;
148 		ctx->pos = dirent->off;
149 	}
150 
151 	return 0;
152 }
153 
154 static int fuse_direntplus_link(struct file *file,
155 				struct fuse_direntplus *direntplus,
156 				u64 attr_version, u64 evict_ctr)
157 {
158 	struct fuse_entry_out *o = &direntplus->entry_out;
159 	struct fuse_dirent *dirent = &direntplus->dirent;
160 	struct dentry *parent = file->f_path.dentry;
161 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
162 	struct dentry *dentry;
163 	struct dentry *alias;
164 	struct inode *dir = d_inode(parent);
165 	struct fuse_conn *fc;
166 	struct inode *inode;
167 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
168 	int epoch;
169 
170 	if (!o->nodeid) {
171 		/*
172 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
173 		 * ENOENT. Instead, it only means the userspace filesystem did
174 		 * not want to return attributes/handle for this entry.
175 		 *
176 		 * So do nothing.
177 		 */
178 		return 0;
179 	}
180 
181 	if (name.name[0] == '.') {
182 		/*
183 		 * We could potentially refresh the attributes of the directory
184 		 * and its parent?
185 		 */
186 		if (name.len == 1)
187 			return 0;
188 		if (name.name[1] == '.' && name.len == 2)
189 			return 0;
190 	}
191 
192 	if (invalid_nodeid(o->nodeid))
193 		return -EIO;
194 	if (fuse_invalid_attr(&o->attr))
195 		return -EIO;
196 
197 	fc = get_fuse_conn(dir);
198 	epoch = atomic_read(&fc->epoch);
199 
200 	name.hash = full_name_hash(parent, name.name, name.len);
201 	dentry = d_lookup(parent, &name);
202 	if (!dentry) {
203 retry:
204 		dentry = d_alloc_parallel(parent, &name, &wq);
205 		if (IS_ERR(dentry))
206 			return PTR_ERR(dentry);
207 	}
208 	if (!d_in_lookup(dentry)) {
209 		struct fuse_inode *fi;
210 		inode = d_inode(dentry);
211 		if (inode && get_node_id(inode) != o->nodeid)
212 			inode = NULL;
213 		if (!inode ||
214 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
215 			if (inode)
216 				fuse_make_bad(inode);
217 			d_invalidate(dentry);
218 			dput(dentry);
219 			goto retry;
220 		}
221 		if (fuse_is_bad(inode)) {
222 			dput(dentry);
223 			return -EIO;
224 		}
225 
226 		fi = get_fuse_inode(inode);
227 		spin_lock(&fi->lock);
228 		fi->nlookup++;
229 		spin_unlock(&fi->lock);
230 
231 		forget_all_cached_acls(inode);
232 		fuse_change_attributes(inode, &o->attr, NULL,
233 				       ATTR_TIMEOUT(o),
234 				       attr_version);
235 		/*
236 		 * The other branch comes via fuse_iget()
237 		 * which bumps nlookup inside
238 		 */
239 	} else {
240 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
241 				  &o->attr, ATTR_TIMEOUT(o),
242 				  attr_version, evict_ctr);
243 		if (!inode)
244 			inode = ERR_PTR(-ENOMEM);
245 
246 		alias = d_splice_alias(inode, dentry);
247 		d_lookup_done(dentry);
248 		if (alias) {
249 			dput(dentry);
250 			dentry = alias;
251 		}
252 		if (IS_ERR(dentry)) {
253 			if (!IS_ERR(inode)) {
254 				struct fuse_inode *fi = get_fuse_inode(inode);
255 
256 				spin_lock(&fi->lock);
257 				fi->nlookup--;
258 				spin_unlock(&fi->lock);
259 			}
260 			return PTR_ERR(dentry);
261 		}
262 	}
263 	if (fc->readdirplus_auto)
264 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
265 	dentry->d_time = epoch;
266 	fuse_change_entry_timeout(dentry, o);
267 
268 	dput(dentry);
269 	return 0;
270 }
271 
272 static void fuse_force_forget(struct file *file, u64 nodeid)
273 {
274 	struct inode *inode = file_inode(file);
275 	struct fuse_mount *fm = get_fuse_mount(inode);
276 	struct fuse_forget_in inarg;
277 	FUSE_ARGS(args);
278 
279 	memset(&inarg, 0, sizeof(inarg));
280 	inarg.nlookup = 1;
281 	args.opcode = FUSE_FORGET;
282 	args.nodeid = nodeid;
283 	args.in_numargs = 1;
284 	args.in_args[0].size = sizeof(inarg);
285 	args.in_args[0].value = &inarg;
286 	args.force = true;
287 	args.noreply = true;
288 
289 	fuse_simple_request(fm, &args);
290 	/* ignore errors */
291 }
292 
293 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
294 			     struct dir_context *ctx, u64 attr_version,
295 			     u64 evict_ctr)
296 {
297 	struct fuse_direntplus *direntplus;
298 	struct fuse_dirent *dirent;
299 	size_t reclen;
300 	int over = 0;
301 	int ret;
302 
303 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
304 		direntplus = (struct fuse_direntplus *) buf;
305 		dirent = &direntplus->dirent;
306 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
307 
308 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
309 			return -EIO;
310 		if (reclen > nbytes)
311 			break;
312 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
313 			return -EIO;
314 
315 		if (!over) {
316 			/* We fill entries into dstbuf only as much as
317 			   it can hold. But we still continue iterating
318 			   over remaining entries to link them. If not,
319 			   we need to send a FORGET for each of those
320 			   which we did not link.
321 			*/
322 			over = !fuse_emit(file, ctx, dirent);
323 			if (!over)
324 				ctx->pos = dirent->off;
325 		}
326 
327 		buf += reclen;
328 		nbytes -= reclen;
329 
330 		ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
331 		if (ret)
332 			fuse_force_forget(file, direntplus->entry_out.nodeid);
333 	}
334 
335 	return 0;
336 }
337 
338 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
339 {
340 	int plus;
341 	ssize_t res;
342 	struct inode *inode = file_inode(file);
343 	struct fuse_mount *fm = get_fuse_mount(inode);
344 	struct fuse_conn *fc = fm->fc;
345 	struct fuse_io_args ia = {};
346 	struct fuse_args *args = &ia.ap.args;
347 	void *buf;
348 	size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT);
349 	u64 attr_version = 0, evict_ctr = 0;
350 	bool locked;
351 
352 	buf = kvmalloc(bufsize, GFP_KERNEL);
353 	if (!buf)
354 		return -ENOMEM;
355 
356 	args->out_args[0].value = buf;
357 
358 	plus = fuse_use_readdirplus(inode, ctx);
359 	if (plus) {
360 		attr_version = fuse_get_attr_version(fm->fc);
361 		evict_ctr = fuse_get_evict_ctr(fm->fc);
362 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS);
363 	} else {
364 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR);
365 	}
366 	locked = fuse_lock_inode(inode);
367 	res = fuse_simple_request(fm, args);
368 	fuse_unlock_inode(inode, locked);
369 	if (res >= 0) {
370 		if (!res) {
371 			struct fuse_file *ff = file->private_data;
372 
373 			if (ff->open_flags & FOPEN_CACHE_DIR)
374 				fuse_readdir_cache_end(file, ctx->pos);
375 		} else if (plus) {
376 			res = parse_dirplusfile(buf, res, file, ctx, attr_version,
377 						evict_ctr);
378 		} else {
379 			res = parse_dirfile(buf, res, file, ctx);
380 		}
381 	}
382 
383 	kvfree(buf);
384 	fuse_invalidate_atime(inode);
385 	return res;
386 }
387 
388 enum fuse_parse_result {
389 	FOUND_ERR = -1,
390 	FOUND_NONE = 0,
391 	FOUND_SOME,
392 	FOUND_ALL,
393 };
394 
395 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
396 					       void *addr, unsigned int size,
397 					       struct dir_context *ctx)
398 {
399 	unsigned int offset = offset_in_page(ff->readdir.cache_off);
400 	enum fuse_parse_result res = FOUND_NONE;
401 
402 	WARN_ON(offset >= size);
403 
404 	for (;;) {
405 		struct fuse_dirent *dirent = addr + offset;
406 		unsigned int nbytes = size - offset;
407 		size_t reclen;
408 
409 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
410 			break;
411 
412 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
413 
414 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
415 			return FOUND_ERR;
416 		if (WARN_ON(reclen > nbytes))
417 			return FOUND_ERR;
418 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
419 			return FOUND_ERR;
420 
421 		if (ff->readdir.pos == ctx->pos) {
422 			res = FOUND_SOME;
423 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
424 				      dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
425 				return FOUND_ALL;
426 			ctx->pos = dirent->off;
427 		}
428 		ff->readdir.pos = dirent->off;
429 		ff->readdir.cache_off += reclen;
430 
431 		offset += reclen;
432 	}
433 
434 	return res;
435 }
436 
437 static void fuse_rdc_reset(struct inode *inode)
438 {
439 	struct fuse_inode *fi = get_fuse_inode(inode);
440 
441 	fi->rdc.cached = false;
442 	fi->rdc.version++;
443 	fi->rdc.size = 0;
444 	fi->rdc.pos = 0;
445 }
446 
447 #define UNCACHED 1
448 
449 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
450 {
451 	struct fuse_file *ff = file->private_data;
452 	struct inode *inode = file_inode(file);
453 	struct fuse_conn *fc = get_fuse_conn(inode);
454 	struct fuse_inode *fi = get_fuse_inode(inode);
455 	enum fuse_parse_result res;
456 	pgoff_t index;
457 	unsigned int size;
458 	struct page *page;
459 	void *addr;
460 
461 	/* Seeked?  If so, reset the cache stream */
462 	if (ff->readdir.pos != ctx->pos) {
463 		ff->readdir.pos = 0;
464 		ff->readdir.cache_off = 0;
465 	}
466 
467 	/*
468 	 * We're just about to start reading into the cache or reading the
469 	 * cache; both cases require an up-to-date mtime value.
470 	 */
471 	if (!ctx->pos && fc->auto_inval_data) {
472 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
473 
474 		if (err)
475 			return err;
476 	}
477 
478 retry:
479 	spin_lock(&fi->rdc.lock);
480 retry_locked:
481 	if (!fi->rdc.cached) {
482 		/* Starting cache? Set cache mtime. */
483 		if (!ctx->pos && !fi->rdc.size) {
484 			fi->rdc.mtime = inode_get_mtime(inode);
485 			fi->rdc.iversion = inode_query_iversion(inode);
486 		}
487 		spin_unlock(&fi->rdc.lock);
488 		return UNCACHED;
489 	}
490 	/*
491 	 * When at the beginning of the directory (i.e. just after opendir(3) or
492 	 * rewinddir(3)), then need to check whether directory contents have
493 	 * changed, and reset the cache if so.
494 	 */
495 	if (!ctx->pos) {
496 		struct timespec64 mtime = inode_get_mtime(inode);
497 
498 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
499 		    !timespec64_equal(&fi->rdc.mtime, &mtime)) {
500 			fuse_rdc_reset(inode);
501 			goto retry_locked;
502 		}
503 	}
504 
505 	/*
506 	 * If cache version changed since the last getdents() call, then reset
507 	 * the cache stream.
508 	 */
509 	if (ff->readdir.version != fi->rdc.version) {
510 		ff->readdir.pos = 0;
511 		ff->readdir.cache_off = 0;
512 	}
513 	/*
514 	 * If at the beginning of the cache, than reset version to
515 	 * current.
516 	 */
517 	if (ff->readdir.pos == 0)
518 		ff->readdir.version = fi->rdc.version;
519 
520 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
521 
522 	index = ff->readdir.cache_off >> PAGE_SHIFT;
523 
524 	if (index == (fi->rdc.size >> PAGE_SHIFT))
525 		size = offset_in_page(fi->rdc.size);
526 	else
527 		size = PAGE_SIZE;
528 	spin_unlock(&fi->rdc.lock);
529 
530 	/* EOF? */
531 	if (offset_in_page(ff->readdir.cache_off) == size)
532 		return 0;
533 
534 	page = find_get_page_flags(file->f_mapping, index,
535 				   FGP_ACCESSED | FGP_LOCK);
536 	/* Page gone missing, then re-added to cache, but not initialized? */
537 	if (page && !PageUptodate(page)) {
538 		unlock_page(page);
539 		put_page(page);
540 		page = NULL;
541 	}
542 	spin_lock(&fi->rdc.lock);
543 	if (!page) {
544 		/*
545 		 * Uh-oh: page gone missing, cache is useless
546 		 */
547 		if (fi->rdc.version == ff->readdir.version)
548 			fuse_rdc_reset(inode);
549 		goto retry_locked;
550 	}
551 
552 	/* Make sure it's still the same version after getting the page. */
553 	if (ff->readdir.version != fi->rdc.version) {
554 		spin_unlock(&fi->rdc.lock);
555 		unlock_page(page);
556 		put_page(page);
557 		goto retry;
558 	}
559 	spin_unlock(&fi->rdc.lock);
560 
561 	/*
562 	 * Contents of the page are now protected against changing by holding
563 	 * the page lock.
564 	 */
565 	addr = kmap_local_page(page);
566 	res = fuse_parse_cache(ff, addr, size, ctx);
567 	kunmap_local(addr);
568 	unlock_page(page);
569 	put_page(page);
570 
571 	if (res == FOUND_ERR)
572 		return -EIO;
573 
574 	if (res == FOUND_ALL)
575 		return 0;
576 
577 	if (size == PAGE_SIZE) {
578 		/* We hit end of page: skip to next page. */
579 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
580 		goto retry;
581 	}
582 
583 	/*
584 	 * End of cache reached.  If found position, then we are done, otherwise
585 	 * need to fall back to uncached, since the position we were looking for
586 	 * wasn't in the cache.
587 	 */
588 	return res == FOUND_SOME ? 0 : UNCACHED;
589 }
590 
591 int fuse_readdir(struct file *file, struct dir_context *ctx)
592 {
593 	struct fuse_file *ff = file->private_data;
594 	struct inode *inode = file_inode(file);
595 	int err;
596 
597 	if (fuse_is_bad(inode))
598 		return -EIO;
599 
600 	err = UNCACHED;
601 	if (ff->open_flags & FOPEN_CACHE_DIR)
602 		err = fuse_readdir_cached(file, ctx);
603 	if (err == UNCACHED)
604 		err = fuse_readdir_uncached(file, ctx);
605 
606 	return err;
607 }
608