xref: /linux/fs/fuse/readdir.c (revision bfb4a6c721517a11b277e8841f8a7a64b1b14b72)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_local_page(page);
80 	if (!offset) {
81 		clear_page(addr);
82 		SetPageUptodate(page);
83 	}
84 	memcpy(addr + offset, dirent, reclen);
85 	kunmap_local(addr);
86 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87 	fi->rdc.pos = dirent->off;
88 unlock:
89 	spin_unlock(&fi->rdc.lock);
90 	unlock_page(page);
91 	put_page(page);
92 }
93 
94 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95 {
96 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97 	loff_t end;
98 
99 	spin_lock(&fi->rdc.lock);
100 	/* does cache end position match current position? */
101 	if (fi->rdc.pos != pos) {
102 		spin_unlock(&fi->rdc.lock);
103 		return;
104 	}
105 
106 	fi->rdc.cached = true;
107 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
108 	spin_unlock(&fi->rdc.lock);
109 
110 	/* truncate unused tail of cache */
111 	truncate_inode_pages(file->f_mapping, end);
112 }
113 
114 static bool fuse_emit(struct file *file, struct dir_context *ctx,
115 		      struct fuse_dirent *dirent)
116 {
117 	struct fuse_file *ff = file->private_data;
118 
119 	if (ff->open_flags & FOPEN_CACHE_DIR)
120 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121 
122 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123 			dirent->type | FILLDIR_FLAG_NOINTR);
124 }
125 
126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127 			 struct dir_context *ctx)
128 {
129 	while (nbytes >= FUSE_NAME_OFFSET) {
130 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
132 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133 			return -EIO;
134 		if (reclen > nbytes)
135 			break;
136 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137 			return -EIO;
138 
139 		if (!fuse_emit(file, ctx, dirent))
140 			break;
141 
142 		buf += reclen;
143 		nbytes -= reclen;
144 		ctx->pos = dirent->off;
145 	}
146 
147 	return 0;
148 }
149 
150 static int fuse_direntplus_link(struct file *file,
151 				struct fuse_direntplus *direntplus,
152 				u64 attr_version, u64 evict_ctr)
153 {
154 	struct fuse_entry_out *o = &direntplus->entry_out;
155 	struct fuse_dirent *dirent = &direntplus->dirent;
156 	struct dentry *parent = file->f_path.dentry;
157 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158 	struct dentry *dentry;
159 	struct dentry *alias;
160 	struct inode *dir = d_inode(parent);
161 	struct fuse_conn *fc;
162 	struct inode *inode;
163 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164 	int epoch;
165 
166 	if (!o->nodeid) {
167 		/*
168 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
169 		 * ENOENT. Instead, it only means the userspace filesystem did
170 		 * not want to return attributes/handle for this entry.
171 		 *
172 		 * So do nothing.
173 		 */
174 		return 0;
175 	}
176 
177 	if (name.name[0] == '.') {
178 		/*
179 		 * We could potentially refresh the attributes of the directory
180 		 * and its parent?
181 		 */
182 		if (name.len == 1)
183 			return 0;
184 		if (name.name[1] == '.' && name.len == 2)
185 			return 0;
186 	}
187 
188 	if (invalid_nodeid(o->nodeid))
189 		return -EIO;
190 	if (fuse_invalid_attr(&o->attr))
191 		return -EIO;
192 
193 	fc = get_fuse_conn(dir);
194 	epoch = atomic_read(&fc->epoch);
195 
196 	name.hash = full_name_hash(parent, name.name, name.len);
197 	dentry = d_lookup(parent, &name);
198 	if (!dentry) {
199 retry:
200 		dentry = d_alloc_parallel(parent, &name, &wq);
201 		if (IS_ERR(dentry))
202 			return PTR_ERR(dentry);
203 	}
204 	if (!d_in_lookup(dentry)) {
205 		struct fuse_inode *fi;
206 		inode = d_inode(dentry);
207 		if (inode && get_node_id(inode) != o->nodeid)
208 			inode = NULL;
209 		if (!inode ||
210 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
211 			if (inode)
212 				fuse_make_bad(inode);
213 			d_invalidate(dentry);
214 			dput(dentry);
215 			goto retry;
216 		}
217 		if (fuse_is_bad(inode)) {
218 			dput(dentry);
219 			return -EIO;
220 		}
221 
222 		fi = get_fuse_inode(inode);
223 		spin_lock(&fi->lock);
224 		fi->nlookup++;
225 		spin_unlock(&fi->lock);
226 
227 		forget_all_cached_acls(inode);
228 		fuse_change_attributes(inode, &o->attr, NULL,
229 				       ATTR_TIMEOUT(o),
230 				       attr_version);
231 		/*
232 		 * The other branch comes via fuse_iget()
233 		 * which bumps nlookup inside
234 		 */
235 	} else {
236 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
237 				  &o->attr, ATTR_TIMEOUT(o),
238 				  attr_version, evict_ctr);
239 		if (!inode)
240 			inode = ERR_PTR(-ENOMEM);
241 
242 		alias = d_splice_alias(inode, dentry);
243 		d_lookup_done(dentry);
244 		if (alias) {
245 			dput(dentry);
246 			dentry = alias;
247 		}
248 		if (IS_ERR(dentry)) {
249 			if (!IS_ERR(inode)) {
250 				struct fuse_inode *fi = get_fuse_inode(inode);
251 
252 				spin_lock(&fi->lock);
253 				fi->nlookup--;
254 				spin_unlock(&fi->lock);
255 			}
256 			return PTR_ERR(dentry);
257 		}
258 	}
259 	if (fc->readdirplus_auto)
260 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
261 	dentry->d_time = epoch;
262 	fuse_change_entry_timeout(dentry, o);
263 
264 	dput(dentry);
265 	return 0;
266 }
267 
268 static void fuse_force_forget(struct file *file, u64 nodeid)
269 {
270 	struct inode *inode = file_inode(file);
271 	struct fuse_mount *fm = get_fuse_mount(inode);
272 	struct fuse_forget_in inarg;
273 	FUSE_ARGS(args);
274 
275 	memset(&inarg, 0, sizeof(inarg));
276 	inarg.nlookup = 1;
277 	args.opcode = FUSE_FORGET;
278 	args.nodeid = nodeid;
279 	args.in_numargs = 1;
280 	args.in_args[0].size = sizeof(inarg);
281 	args.in_args[0].value = &inarg;
282 	args.force = true;
283 	args.noreply = true;
284 
285 	fuse_simple_request(fm, &args);
286 	/* ignore errors */
287 }
288 
289 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
290 			     struct dir_context *ctx, u64 attr_version,
291 			     u64 evict_ctr)
292 {
293 	struct fuse_direntplus *direntplus;
294 	struct fuse_dirent *dirent;
295 	size_t reclen;
296 	int over = 0;
297 	int ret;
298 
299 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
300 		direntplus = (struct fuse_direntplus *) buf;
301 		dirent = &direntplus->dirent;
302 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
303 
304 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
305 			return -EIO;
306 		if (reclen > nbytes)
307 			break;
308 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
309 			return -EIO;
310 
311 		if (!over) {
312 			/* We fill entries into dstbuf only as much as
313 			   it can hold. But we still continue iterating
314 			   over remaining entries to link them. If not,
315 			   we need to send a FORGET for each of those
316 			   which we did not link.
317 			*/
318 			over = !fuse_emit(file, ctx, dirent);
319 			if (!over)
320 				ctx->pos = dirent->off;
321 		}
322 
323 		buf += reclen;
324 		nbytes -= reclen;
325 
326 		ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
327 		if (ret)
328 			fuse_force_forget(file, direntplus->entry_out.nodeid);
329 	}
330 
331 	return 0;
332 }
333 
334 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
335 {
336 	int plus;
337 	ssize_t res;
338 	struct inode *inode = file_inode(file);
339 	struct fuse_mount *fm = get_fuse_mount(inode);
340 	struct fuse_conn *fc = fm->fc;
341 	struct fuse_io_args ia = {};
342 	struct fuse_args *args = &ia.ap.args;
343 	void *buf;
344 	size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT);
345 	u64 attr_version = 0, evict_ctr = 0;
346 	bool locked;
347 
348 	buf = kvmalloc(bufsize, GFP_KERNEL);
349 	if (!buf)
350 		return -ENOMEM;
351 
352 	args->out_args[0].value = buf;
353 
354 	plus = fuse_use_readdirplus(inode, ctx);
355 	if (plus) {
356 		attr_version = fuse_get_attr_version(fm->fc);
357 		evict_ctr = fuse_get_evict_ctr(fm->fc);
358 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS);
359 	} else {
360 		fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR);
361 	}
362 	locked = fuse_lock_inode(inode);
363 	res = fuse_simple_request(fm, args);
364 	fuse_unlock_inode(inode, locked);
365 	if (res >= 0) {
366 		if (!res) {
367 			struct fuse_file *ff = file->private_data;
368 
369 			if (ff->open_flags & FOPEN_CACHE_DIR)
370 				fuse_readdir_cache_end(file, ctx->pos);
371 		} else if (plus) {
372 			res = parse_dirplusfile(buf, res, file, ctx, attr_version,
373 						evict_ctr);
374 		} else {
375 			res = parse_dirfile(buf, res, file, ctx);
376 		}
377 	}
378 
379 	kvfree(buf);
380 	fuse_invalidate_atime(inode);
381 	return res;
382 }
383 
384 enum fuse_parse_result {
385 	FOUND_ERR = -1,
386 	FOUND_NONE = 0,
387 	FOUND_SOME,
388 	FOUND_ALL,
389 };
390 
391 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
392 					       void *addr, unsigned int size,
393 					       struct dir_context *ctx)
394 {
395 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
396 	enum fuse_parse_result res = FOUND_NONE;
397 
398 	WARN_ON(offset >= size);
399 
400 	for (;;) {
401 		struct fuse_dirent *dirent = addr + offset;
402 		unsigned int nbytes = size - offset;
403 		size_t reclen;
404 
405 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
406 			break;
407 
408 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
409 
410 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
411 			return FOUND_ERR;
412 		if (WARN_ON(reclen > nbytes))
413 			return FOUND_ERR;
414 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
415 			return FOUND_ERR;
416 
417 		if (ff->readdir.pos == ctx->pos) {
418 			res = FOUND_SOME;
419 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
420 				      dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR))
421 				return FOUND_ALL;
422 			ctx->pos = dirent->off;
423 		}
424 		ff->readdir.pos = dirent->off;
425 		ff->readdir.cache_off += reclen;
426 
427 		offset += reclen;
428 	}
429 
430 	return res;
431 }
432 
433 static void fuse_rdc_reset(struct inode *inode)
434 {
435 	struct fuse_inode *fi = get_fuse_inode(inode);
436 
437 	fi->rdc.cached = false;
438 	fi->rdc.version++;
439 	fi->rdc.size = 0;
440 	fi->rdc.pos = 0;
441 }
442 
443 #define UNCACHED 1
444 
445 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
446 {
447 	struct fuse_file *ff = file->private_data;
448 	struct inode *inode = file_inode(file);
449 	struct fuse_conn *fc = get_fuse_conn(inode);
450 	struct fuse_inode *fi = get_fuse_inode(inode);
451 	enum fuse_parse_result res;
452 	pgoff_t index;
453 	unsigned int size;
454 	struct page *page;
455 	void *addr;
456 
457 	/* Seeked?  If so, reset the cache stream */
458 	if (ff->readdir.pos != ctx->pos) {
459 		ff->readdir.pos = 0;
460 		ff->readdir.cache_off = 0;
461 	}
462 
463 	/*
464 	 * We're just about to start reading into the cache or reading the
465 	 * cache; both cases require an up-to-date mtime value.
466 	 */
467 	if (!ctx->pos && fc->auto_inval_data) {
468 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
469 
470 		if (err)
471 			return err;
472 	}
473 
474 retry:
475 	spin_lock(&fi->rdc.lock);
476 retry_locked:
477 	if (!fi->rdc.cached) {
478 		/* Starting cache? Set cache mtime. */
479 		if (!ctx->pos && !fi->rdc.size) {
480 			fi->rdc.mtime = inode_get_mtime(inode);
481 			fi->rdc.iversion = inode_query_iversion(inode);
482 		}
483 		spin_unlock(&fi->rdc.lock);
484 		return UNCACHED;
485 	}
486 	/*
487 	 * When at the beginning of the directory (i.e. just after opendir(3) or
488 	 * rewinddir(3)), then need to check whether directory contents have
489 	 * changed, and reset the cache if so.
490 	 */
491 	if (!ctx->pos) {
492 		struct timespec64 mtime = inode_get_mtime(inode);
493 
494 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
495 		    !timespec64_equal(&fi->rdc.mtime, &mtime)) {
496 			fuse_rdc_reset(inode);
497 			goto retry_locked;
498 		}
499 	}
500 
501 	/*
502 	 * If cache version changed since the last getdents() call, then reset
503 	 * the cache stream.
504 	 */
505 	if (ff->readdir.version != fi->rdc.version) {
506 		ff->readdir.pos = 0;
507 		ff->readdir.cache_off = 0;
508 	}
509 	/*
510 	 * If at the beginning of the cache, than reset version to
511 	 * current.
512 	 */
513 	if (ff->readdir.pos == 0)
514 		ff->readdir.version = fi->rdc.version;
515 
516 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
517 
518 	index = ff->readdir.cache_off >> PAGE_SHIFT;
519 
520 	if (index == (fi->rdc.size >> PAGE_SHIFT))
521 		size = fi->rdc.size & ~PAGE_MASK;
522 	else
523 		size = PAGE_SIZE;
524 	spin_unlock(&fi->rdc.lock);
525 
526 	/* EOF? */
527 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
528 		return 0;
529 
530 	page = find_get_page_flags(file->f_mapping, index,
531 				   FGP_ACCESSED | FGP_LOCK);
532 	/* Page gone missing, then re-added to cache, but not initialized? */
533 	if (page && !PageUptodate(page)) {
534 		unlock_page(page);
535 		put_page(page);
536 		page = NULL;
537 	}
538 	spin_lock(&fi->rdc.lock);
539 	if (!page) {
540 		/*
541 		 * Uh-oh: page gone missing, cache is useless
542 		 */
543 		if (fi->rdc.version == ff->readdir.version)
544 			fuse_rdc_reset(inode);
545 		goto retry_locked;
546 	}
547 
548 	/* Make sure it's still the same version after getting the page. */
549 	if (ff->readdir.version != fi->rdc.version) {
550 		spin_unlock(&fi->rdc.lock);
551 		unlock_page(page);
552 		put_page(page);
553 		goto retry;
554 	}
555 	spin_unlock(&fi->rdc.lock);
556 
557 	/*
558 	 * Contents of the page are now protected against changing by holding
559 	 * the page lock.
560 	 */
561 	addr = kmap_local_page(page);
562 	res = fuse_parse_cache(ff, addr, size, ctx);
563 	kunmap_local(addr);
564 	unlock_page(page);
565 	put_page(page);
566 
567 	if (res == FOUND_ERR)
568 		return -EIO;
569 
570 	if (res == FOUND_ALL)
571 		return 0;
572 
573 	if (size == PAGE_SIZE) {
574 		/* We hit end of page: skip to next page. */
575 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
576 		goto retry;
577 	}
578 
579 	/*
580 	 * End of cache reached.  If found position, then we are done, otherwise
581 	 * need to fall back to uncached, since the position we were looking for
582 	 * wasn't in the cache.
583 	 */
584 	return res == FOUND_SOME ? 0 : UNCACHED;
585 }
586 
587 int fuse_readdir(struct file *file, struct dir_context *ctx)
588 {
589 	struct fuse_file *ff = file->private_data;
590 	struct inode *inode = file_inode(file);
591 	int err;
592 
593 	if (fuse_is_bad(inode))
594 		return -EIO;
595 
596 	err = UNCACHED;
597 	if (ff->open_flags & FOPEN_CACHE_DIR)
598 		err = fuse_readdir_cached(file, ctx);
599 	if (err == UNCACHED)
600 		err = fuse_readdir_uncached(file, ctx);
601 
602 	return err;
603 }
604