xref: /linux/fs/fuse/readdir.c (revision d09560435cb712c9ec1e62b8a43a79b0af69fe77)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_atomic(page);
80 	if (!offset)
81 		clear_page(addr);
82 	memcpy(addr + offset, dirent, reclen);
83 	kunmap_atomic(addr);
84 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85 	fi->rdc.pos = dirent->off;
86 unlock:
87 	spin_unlock(&fi->rdc.lock);
88 	unlock_page(page);
89 	put_page(page);
90 }
91 
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93 {
94 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95 	loff_t end;
96 
97 	spin_lock(&fi->rdc.lock);
98 	/* does cache end position match current position? */
99 	if (fi->rdc.pos != pos) {
100 		spin_unlock(&fi->rdc.lock);
101 		return;
102 	}
103 
104 	fi->rdc.cached = true;
105 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
106 	spin_unlock(&fi->rdc.lock);
107 
108 	/* truncate unused tail of cache */
109 	truncate_inode_pages(file->f_mapping, end);
110 }
111 
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113 		      struct fuse_dirent *dirent)
114 {
115 	struct fuse_file *ff = file->private_data;
116 
117 	if (ff->open_flags & FOPEN_CACHE_DIR)
118 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119 
120 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121 			dirent->type);
122 }
123 
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125 			 struct dir_context *ctx)
126 {
127 	while (nbytes >= FUSE_NAME_OFFSET) {
128 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
130 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131 			return -EIO;
132 		if (reclen > nbytes)
133 			break;
134 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135 			return -EIO;
136 
137 		if (!fuse_emit(file, ctx, dirent))
138 			break;
139 
140 		buf += reclen;
141 		nbytes -= reclen;
142 		ctx->pos = dirent->off;
143 	}
144 
145 	return 0;
146 }
147 
148 static int fuse_direntplus_link(struct file *file,
149 				struct fuse_direntplus *direntplus,
150 				u64 attr_version)
151 {
152 	struct fuse_entry_out *o = &direntplus->entry_out;
153 	struct fuse_dirent *dirent = &direntplus->dirent;
154 	struct dentry *parent = file->f_path.dentry;
155 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156 	struct dentry *dentry;
157 	struct dentry *alias;
158 	struct inode *dir = d_inode(parent);
159 	struct fuse_conn *fc;
160 	struct inode *inode;
161 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162 
163 	if (!o->nodeid) {
164 		/*
165 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
166 		 * ENOENT. Instead, it only means the userspace filesystem did
167 		 * not want to return attributes/handle for this entry.
168 		 *
169 		 * So do nothing.
170 		 */
171 		return 0;
172 	}
173 
174 	if (name.name[0] == '.') {
175 		/*
176 		 * We could potentially refresh the attributes of the directory
177 		 * and its parent?
178 		 */
179 		if (name.len == 1)
180 			return 0;
181 		if (name.name[1] == '.' && name.len == 2)
182 			return 0;
183 	}
184 
185 	if (invalid_nodeid(o->nodeid))
186 		return -EIO;
187 	if (fuse_invalid_attr(&o->attr))
188 		return -EIO;
189 
190 	fc = get_fuse_conn(dir);
191 
192 	name.hash = full_name_hash(parent, name.name, name.len);
193 	dentry = d_lookup(parent, &name);
194 	if (!dentry) {
195 retry:
196 		dentry = d_alloc_parallel(parent, &name, &wq);
197 		if (IS_ERR(dentry))
198 			return PTR_ERR(dentry);
199 	}
200 	if (!d_in_lookup(dentry)) {
201 		struct fuse_inode *fi;
202 		inode = d_inode(dentry);
203 		if (inode && get_node_id(inode) != o->nodeid)
204 			inode = NULL;
205 		if (!inode ||
206 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
207 			if (inode)
208 				fuse_make_bad(inode);
209 			d_invalidate(dentry);
210 			dput(dentry);
211 			goto retry;
212 		}
213 		if (fuse_is_bad(inode)) {
214 			dput(dentry);
215 			return -EIO;
216 		}
217 
218 		fi = get_fuse_inode(inode);
219 		spin_lock(&fi->lock);
220 		fi->nlookup++;
221 		spin_unlock(&fi->lock);
222 
223 		forget_all_cached_acls(inode);
224 		fuse_change_attributes(inode, &o->attr,
225 				       entry_attr_timeout(o),
226 				       attr_version);
227 		/*
228 		 * The other branch comes via fuse_iget()
229 		 * which bumps nlookup inside
230 		 */
231 	} else {
232 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
233 				  &o->attr, entry_attr_timeout(o),
234 				  attr_version);
235 		if (!inode)
236 			inode = ERR_PTR(-ENOMEM);
237 
238 		alias = d_splice_alias(inode, dentry);
239 		d_lookup_done(dentry);
240 		if (alias) {
241 			dput(dentry);
242 			dentry = alias;
243 		}
244 		if (IS_ERR(dentry))
245 			return PTR_ERR(dentry);
246 	}
247 	if (fc->readdirplus_auto)
248 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
249 	fuse_change_entry_timeout(dentry, o);
250 
251 	dput(dentry);
252 	return 0;
253 }
254 
255 static void fuse_force_forget(struct file *file, u64 nodeid)
256 {
257 	struct inode *inode = file_inode(file);
258 	struct fuse_mount *fm = get_fuse_mount(inode);
259 	struct fuse_forget_in inarg;
260 	FUSE_ARGS(args);
261 
262 	memset(&inarg, 0, sizeof(inarg));
263 	inarg.nlookup = 1;
264 	args.opcode = FUSE_FORGET;
265 	args.nodeid = nodeid;
266 	args.in_numargs = 1;
267 	args.in_args[0].size = sizeof(inarg);
268 	args.in_args[0].value = &inarg;
269 	args.force = true;
270 	args.noreply = true;
271 
272 	fuse_simple_request(fm, &args);
273 	/* ignore errors */
274 }
275 
276 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
277 			     struct dir_context *ctx, u64 attr_version)
278 {
279 	struct fuse_direntplus *direntplus;
280 	struct fuse_dirent *dirent;
281 	size_t reclen;
282 	int over = 0;
283 	int ret;
284 
285 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
286 		direntplus = (struct fuse_direntplus *) buf;
287 		dirent = &direntplus->dirent;
288 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
289 
290 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
291 			return -EIO;
292 		if (reclen > nbytes)
293 			break;
294 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
295 			return -EIO;
296 
297 		if (!over) {
298 			/* We fill entries into dstbuf only as much as
299 			   it can hold. But we still continue iterating
300 			   over remaining entries to link them. If not,
301 			   we need to send a FORGET for each of those
302 			   which we did not link.
303 			*/
304 			over = !fuse_emit(file, ctx, dirent);
305 			if (!over)
306 				ctx->pos = dirent->off;
307 		}
308 
309 		buf += reclen;
310 		nbytes -= reclen;
311 
312 		ret = fuse_direntplus_link(file, direntplus, attr_version);
313 		if (ret)
314 			fuse_force_forget(file, direntplus->entry_out.nodeid);
315 	}
316 
317 	return 0;
318 }
319 
320 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
321 {
322 	int plus;
323 	ssize_t res;
324 	struct page *page;
325 	struct inode *inode = file_inode(file);
326 	struct fuse_mount *fm = get_fuse_mount(inode);
327 	struct fuse_io_args ia = {};
328 	struct fuse_args_pages *ap = &ia.ap;
329 	struct fuse_page_desc desc = { .length = PAGE_SIZE };
330 	u64 attr_version = 0;
331 	bool locked;
332 
333 	page = alloc_page(GFP_KERNEL);
334 	if (!page)
335 		return -ENOMEM;
336 
337 	plus = fuse_use_readdirplus(inode, ctx);
338 	ap->args.out_pages = true;
339 	ap->num_pages = 1;
340 	ap->pages = &page;
341 	ap->descs = &desc;
342 	if (plus) {
343 		attr_version = fuse_get_attr_version(fm->fc);
344 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
345 				    FUSE_READDIRPLUS);
346 	} else {
347 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
348 				    FUSE_READDIR);
349 	}
350 	locked = fuse_lock_inode(inode);
351 	res = fuse_simple_request(fm, &ap->args);
352 	fuse_unlock_inode(inode, locked);
353 	if (res >= 0) {
354 		if (!res) {
355 			struct fuse_file *ff = file->private_data;
356 
357 			if (ff->open_flags & FOPEN_CACHE_DIR)
358 				fuse_readdir_cache_end(file, ctx->pos);
359 		} else if (plus) {
360 			res = parse_dirplusfile(page_address(page), res,
361 						file, ctx, attr_version);
362 		} else {
363 			res = parse_dirfile(page_address(page), res, file,
364 					    ctx);
365 		}
366 	}
367 
368 	__free_page(page);
369 	fuse_invalidate_atime(inode);
370 	return res;
371 }
372 
373 enum fuse_parse_result {
374 	FOUND_ERR = -1,
375 	FOUND_NONE = 0,
376 	FOUND_SOME,
377 	FOUND_ALL,
378 };
379 
380 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
381 					       void *addr, unsigned int size,
382 					       struct dir_context *ctx)
383 {
384 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
385 	enum fuse_parse_result res = FOUND_NONE;
386 
387 	WARN_ON(offset >= size);
388 
389 	for (;;) {
390 		struct fuse_dirent *dirent = addr + offset;
391 		unsigned int nbytes = size - offset;
392 		size_t reclen;
393 
394 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
395 			break;
396 
397 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
398 
399 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
400 			return FOUND_ERR;
401 		if (WARN_ON(reclen > nbytes))
402 			return FOUND_ERR;
403 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
404 			return FOUND_ERR;
405 
406 		if (ff->readdir.pos == ctx->pos) {
407 			res = FOUND_SOME;
408 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
409 				      dirent->ino, dirent->type))
410 				return FOUND_ALL;
411 			ctx->pos = dirent->off;
412 		}
413 		ff->readdir.pos = dirent->off;
414 		ff->readdir.cache_off += reclen;
415 
416 		offset += reclen;
417 	}
418 
419 	return res;
420 }
421 
422 static void fuse_rdc_reset(struct inode *inode)
423 {
424 	struct fuse_inode *fi = get_fuse_inode(inode);
425 
426 	fi->rdc.cached = false;
427 	fi->rdc.version++;
428 	fi->rdc.size = 0;
429 	fi->rdc.pos = 0;
430 }
431 
432 #define UNCACHED 1
433 
434 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
435 {
436 	struct fuse_file *ff = file->private_data;
437 	struct inode *inode = file_inode(file);
438 	struct fuse_conn *fc = get_fuse_conn(inode);
439 	struct fuse_inode *fi = get_fuse_inode(inode);
440 	enum fuse_parse_result res;
441 	pgoff_t index;
442 	unsigned int size;
443 	struct page *page;
444 	void *addr;
445 
446 	/* Seeked?  If so, reset the cache stream */
447 	if (ff->readdir.pos != ctx->pos) {
448 		ff->readdir.pos = 0;
449 		ff->readdir.cache_off = 0;
450 	}
451 
452 	/*
453 	 * We're just about to start reading into the cache or reading the
454 	 * cache; both cases require an up-to-date mtime value.
455 	 */
456 	if (!ctx->pos && fc->auto_inval_data) {
457 		int err = fuse_update_attributes(inode, file);
458 
459 		if (err)
460 			return err;
461 	}
462 
463 retry:
464 	spin_lock(&fi->rdc.lock);
465 retry_locked:
466 	if (!fi->rdc.cached) {
467 		/* Starting cache? Set cache mtime. */
468 		if (!ctx->pos && !fi->rdc.size) {
469 			fi->rdc.mtime = inode->i_mtime;
470 			fi->rdc.iversion = inode_query_iversion(inode);
471 		}
472 		spin_unlock(&fi->rdc.lock);
473 		return UNCACHED;
474 	}
475 	/*
476 	 * When at the beginning of the directory (i.e. just after opendir(3) or
477 	 * rewinddir(3)), then need to check whether directory contents have
478 	 * changed, and reset the cache if so.
479 	 */
480 	if (!ctx->pos) {
481 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
482 		    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
483 			fuse_rdc_reset(inode);
484 			goto retry_locked;
485 		}
486 	}
487 
488 	/*
489 	 * If cache version changed since the last getdents() call, then reset
490 	 * the cache stream.
491 	 */
492 	if (ff->readdir.version != fi->rdc.version) {
493 		ff->readdir.pos = 0;
494 		ff->readdir.cache_off = 0;
495 	}
496 	/*
497 	 * If at the beginning of the cache, than reset version to
498 	 * current.
499 	 */
500 	if (ff->readdir.pos == 0)
501 		ff->readdir.version = fi->rdc.version;
502 
503 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
504 
505 	index = ff->readdir.cache_off >> PAGE_SHIFT;
506 
507 	if (index == (fi->rdc.size >> PAGE_SHIFT))
508 		size = fi->rdc.size & ~PAGE_MASK;
509 	else
510 		size = PAGE_SIZE;
511 	spin_unlock(&fi->rdc.lock);
512 
513 	/* EOF? */
514 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
515 		return 0;
516 
517 	page = find_get_page_flags(file->f_mapping, index,
518 				   FGP_ACCESSED | FGP_LOCK);
519 	spin_lock(&fi->rdc.lock);
520 	if (!page) {
521 		/*
522 		 * Uh-oh: page gone missing, cache is useless
523 		 */
524 		if (fi->rdc.version == ff->readdir.version)
525 			fuse_rdc_reset(inode);
526 		goto retry_locked;
527 	}
528 
529 	/* Make sure it's still the same version after getting the page. */
530 	if (ff->readdir.version != fi->rdc.version) {
531 		spin_unlock(&fi->rdc.lock);
532 		unlock_page(page);
533 		put_page(page);
534 		goto retry;
535 	}
536 	spin_unlock(&fi->rdc.lock);
537 
538 	/*
539 	 * Contents of the page are now protected against changing by holding
540 	 * the page lock.
541 	 */
542 	addr = kmap(page);
543 	res = fuse_parse_cache(ff, addr, size, ctx);
544 	kunmap(page);
545 	unlock_page(page);
546 	put_page(page);
547 
548 	if (res == FOUND_ERR)
549 		return -EIO;
550 
551 	if (res == FOUND_ALL)
552 		return 0;
553 
554 	if (size == PAGE_SIZE) {
555 		/* We hit end of page: skip to next page. */
556 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
557 		goto retry;
558 	}
559 
560 	/*
561 	 * End of cache reached.  If found position, then we are done, otherwise
562 	 * need to fall back to uncached, since the position we were looking for
563 	 * wasn't in the cache.
564 	 */
565 	return res == FOUND_SOME ? 0 : UNCACHED;
566 }
567 
568 int fuse_readdir(struct file *file, struct dir_context *ctx)
569 {
570 	struct fuse_file *ff = file->private_data;
571 	struct inode *inode = file_inode(file);
572 	int err;
573 
574 	if (fuse_is_bad(inode))
575 		return -EIO;
576 
577 	mutex_lock(&ff->readdir.lock);
578 
579 	err = UNCACHED;
580 	if (ff->open_flags & FOPEN_CACHE_DIR)
581 		err = fuse_readdir_cached(file, ctx);
582 	if (err == UNCACHED)
583 		err = fuse_readdir_uncached(file, ctx);
584 
585 	mutex_unlock(&ff->readdir.lock);
586 
587 	return err;
588 }
589