xref: /linux/fs/fuse/readdir.c (revision 4eb7ae7a301d3586c3351e81d5c3cfe2304a1a6a)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_atomic(page);
80 	if (!offset)
81 		clear_page(addr);
82 	memcpy(addr + offset, dirent, reclen);
83 	kunmap_atomic(addr);
84 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85 	fi->rdc.pos = dirent->off;
86 unlock:
87 	spin_unlock(&fi->rdc.lock);
88 	unlock_page(page);
89 	put_page(page);
90 }
91 
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93 {
94 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95 	loff_t end;
96 
97 	spin_lock(&fi->rdc.lock);
98 	/* does cache end position match current position? */
99 	if (fi->rdc.pos != pos) {
100 		spin_unlock(&fi->rdc.lock);
101 		return;
102 	}
103 
104 	fi->rdc.cached = true;
105 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
106 	spin_unlock(&fi->rdc.lock);
107 
108 	/* truncate unused tail of cache */
109 	truncate_inode_pages(file->f_mapping, end);
110 }
111 
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113 		      struct fuse_dirent *dirent)
114 {
115 	struct fuse_file *ff = file->private_data;
116 
117 	if (ff->open_flags & FOPEN_CACHE_DIR)
118 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119 
120 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121 			dirent->type);
122 }
123 
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125 			 struct dir_context *ctx)
126 {
127 	while (nbytes >= FUSE_NAME_OFFSET) {
128 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
130 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131 			return -EIO;
132 		if (reclen > nbytes)
133 			break;
134 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135 			return -EIO;
136 
137 		if (!fuse_emit(file, ctx, dirent))
138 			break;
139 
140 		buf += reclen;
141 		nbytes -= reclen;
142 		ctx->pos = dirent->off;
143 	}
144 
145 	return 0;
146 }
147 
148 static int fuse_direntplus_link(struct file *file,
149 				struct fuse_direntplus *direntplus,
150 				u64 attr_version)
151 {
152 	struct fuse_entry_out *o = &direntplus->entry_out;
153 	struct fuse_dirent *dirent = &direntplus->dirent;
154 	struct dentry *parent = file->f_path.dentry;
155 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156 	struct dentry *dentry;
157 	struct dentry *alias;
158 	struct inode *dir = d_inode(parent);
159 	struct fuse_conn *fc;
160 	struct inode *inode;
161 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162 
163 	if (!o->nodeid) {
164 		/*
165 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
166 		 * ENOENT. Instead, it only means the userspace filesystem did
167 		 * not want to return attributes/handle for this entry.
168 		 *
169 		 * So do nothing.
170 		 */
171 		return 0;
172 	}
173 
174 	if (name.name[0] == '.') {
175 		/*
176 		 * We could potentially refresh the attributes of the directory
177 		 * and its parent?
178 		 */
179 		if (name.len == 1)
180 			return 0;
181 		if (name.name[1] == '.' && name.len == 2)
182 			return 0;
183 	}
184 
185 	if (invalid_nodeid(o->nodeid))
186 		return -EIO;
187 	if (fuse_invalid_attr(&o->attr))
188 		return -EIO;
189 
190 	fc = get_fuse_conn(dir);
191 
192 	name.hash = full_name_hash(parent, name.name, name.len);
193 	dentry = d_lookup(parent, &name);
194 	if (!dentry) {
195 retry:
196 		dentry = d_alloc_parallel(parent, &name, &wq);
197 		if (IS_ERR(dentry))
198 			return PTR_ERR(dentry);
199 	}
200 	if (!d_in_lookup(dentry)) {
201 		struct fuse_inode *fi;
202 		inode = d_inode(dentry);
203 		if (!inode ||
204 		    get_node_id(inode) != o->nodeid ||
205 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206 			d_invalidate(dentry);
207 			dput(dentry);
208 			goto retry;
209 		}
210 		if (is_bad_inode(inode)) {
211 			dput(dentry);
212 			return -EIO;
213 		}
214 
215 		fi = get_fuse_inode(inode);
216 		spin_lock(&fi->lock);
217 		fi->nlookup++;
218 		spin_unlock(&fi->lock);
219 
220 		forget_all_cached_acls(inode);
221 		fuse_change_attributes(inode, &o->attr,
222 				       entry_attr_timeout(o),
223 				       attr_version);
224 		/*
225 		 * The other branch comes via fuse_iget()
226 		 * which bumps nlookup inside
227 		 */
228 	} else {
229 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230 				  &o->attr, entry_attr_timeout(o),
231 				  attr_version);
232 		if (!inode)
233 			inode = ERR_PTR(-ENOMEM);
234 
235 		alias = d_splice_alias(inode, dentry);
236 		d_lookup_done(dentry);
237 		if (alias) {
238 			dput(dentry);
239 			dentry = alias;
240 		}
241 		if (IS_ERR(dentry))
242 			return PTR_ERR(dentry);
243 	}
244 	if (fc->readdirplus_auto)
245 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
246 	fuse_change_entry_timeout(dentry, o);
247 
248 	dput(dentry);
249 	return 0;
250 }
251 
252 static void fuse_force_forget(struct file *file, u64 nodeid)
253 {
254 	struct inode *inode = file_inode(file);
255 	struct fuse_conn *fc = get_fuse_conn(inode);
256 	struct fuse_forget_in inarg;
257 	FUSE_ARGS(args);
258 
259 	memset(&inarg, 0, sizeof(inarg));
260 	inarg.nlookup = 1;
261 	args.opcode = FUSE_FORGET;
262 	args.nodeid = nodeid;
263 	args.in_numargs = 1;
264 	args.in_args[0].size = sizeof(inarg);
265 	args.in_args[0].value = &inarg;
266 	args.force = true;
267 	args.noreply = true;
268 
269 	fuse_simple_request(fc, &args);
270 	/* ignore errors */
271 }
272 
273 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
274 			     struct dir_context *ctx, u64 attr_version)
275 {
276 	struct fuse_direntplus *direntplus;
277 	struct fuse_dirent *dirent;
278 	size_t reclen;
279 	int over = 0;
280 	int ret;
281 
282 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
283 		direntplus = (struct fuse_direntplus *) buf;
284 		dirent = &direntplus->dirent;
285 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
286 
287 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
288 			return -EIO;
289 		if (reclen > nbytes)
290 			break;
291 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
292 			return -EIO;
293 
294 		if (!over) {
295 			/* We fill entries into dstbuf only as much as
296 			   it can hold. But we still continue iterating
297 			   over remaining entries to link them. If not,
298 			   we need to send a FORGET for each of those
299 			   which we did not link.
300 			*/
301 			over = !fuse_emit(file, ctx, dirent);
302 			if (!over)
303 				ctx->pos = dirent->off;
304 		}
305 
306 		buf += reclen;
307 		nbytes -= reclen;
308 
309 		ret = fuse_direntplus_link(file, direntplus, attr_version);
310 		if (ret)
311 			fuse_force_forget(file, direntplus->entry_out.nodeid);
312 	}
313 
314 	return 0;
315 }
316 
317 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
318 {
319 	int plus;
320 	ssize_t res;
321 	struct page *page;
322 	struct inode *inode = file_inode(file);
323 	struct fuse_conn *fc = get_fuse_conn(inode);
324 	struct fuse_io_args ia = {};
325 	struct fuse_args_pages *ap = &ia.ap;
326 	struct fuse_page_desc desc = { .length = PAGE_SIZE };
327 	u64 attr_version = 0;
328 	bool locked;
329 
330 	page = alloc_page(GFP_KERNEL);
331 	if (!page)
332 		return -ENOMEM;
333 
334 	plus = fuse_use_readdirplus(inode, ctx);
335 	ap->args.out_pages = 1;
336 	ap->num_pages = 1;
337 	ap->pages = &page;
338 	ap->descs = &desc;
339 	if (plus) {
340 		attr_version = fuse_get_attr_version(fc);
341 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
342 				    FUSE_READDIRPLUS);
343 	} else {
344 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
345 				    FUSE_READDIR);
346 	}
347 	locked = fuse_lock_inode(inode);
348 	res = fuse_simple_request(fc, &ap->args);
349 	fuse_unlock_inode(inode, locked);
350 	if (res >= 0) {
351 		if (!res) {
352 			struct fuse_file *ff = file->private_data;
353 
354 			if (ff->open_flags & FOPEN_CACHE_DIR)
355 				fuse_readdir_cache_end(file, ctx->pos);
356 		} else if (plus) {
357 			res = parse_dirplusfile(page_address(page), res,
358 						file, ctx, attr_version);
359 		} else {
360 			res = parse_dirfile(page_address(page), res, file,
361 					    ctx);
362 		}
363 	}
364 
365 	__free_page(page);
366 	fuse_invalidate_atime(inode);
367 	return res;
368 }
369 
370 enum fuse_parse_result {
371 	FOUND_ERR = -1,
372 	FOUND_NONE = 0,
373 	FOUND_SOME,
374 	FOUND_ALL,
375 };
376 
377 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
378 					       void *addr, unsigned int size,
379 					       struct dir_context *ctx)
380 {
381 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
382 	enum fuse_parse_result res = FOUND_NONE;
383 
384 	WARN_ON(offset >= size);
385 
386 	for (;;) {
387 		struct fuse_dirent *dirent = addr + offset;
388 		unsigned int nbytes = size - offset;
389 		size_t reclen;
390 
391 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
392 			break;
393 
394 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
395 
396 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
397 			return FOUND_ERR;
398 		if (WARN_ON(reclen > nbytes))
399 			return FOUND_ERR;
400 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
401 			return FOUND_ERR;
402 
403 		if (ff->readdir.pos == ctx->pos) {
404 			res = FOUND_SOME;
405 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
406 				      dirent->ino, dirent->type))
407 				return FOUND_ALL;
408 			ctx->pos = dirent->off;
409 		}
410 		ff->readdir.pos = dirent->off;
411 		ff->readdir.cache_off += reclen;
412 
413 		offset += reclen;
414 	}
415 
416 	return res;
417 }
418 
419 static void fuse_rdc_reset(struct inode *inode)
420 {
421 	struct fuse_inode *fi = get_fuse_inode(inode);
422 
423 	fi->rdc.cached = false;
424 	fi->rdc.version++;
425 	fi->rdc.size = 0;
426 	fi->rdc.pos = 0;
427 }
428 
429 #define UNCACHED 1
430 
431 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
432 {
433 	struct fuse_file *ff = file->private_data;
434 	struct inode *inode = file_inode(file);
435 	struct fuse_conn *fc = get_fuse_conn(inode);
436 	struct fuse_inode *fi = get_fuse_inode(inode);
437 	enum fuse_parse_result res;
438 	pgoff_t index;
439 	unsigned int size;
440 	struct page *page;
441 	void *addr;
442 
443 	/* Seeked?  If so, reset the cache stream */
444 	if (ff->readdir.pos != ctx->pos) {
445 		ff->readdir.pos = 0;
446 		ff->readdir.cache_off = 0;
447 	}
448 
449 	/*
450 	 * We're just about to start reading into the cache or reading the
451 	 * cache; both cases require an up-to-date mtime value.
452 	 */
453 	if (!ctx->pos && fc->auto_inval_data) {
454 		int err = fuse_update_attributes(inode, file);
455 
456 		if (err)
457 			return err;
458 	}
459 
460 retry:
461 	spin_lock(&fi->rdc.lock);
462 retry_locked:
463 	if (!fi->rdc.cached) {
464 		/* Starting cache? Set cache mtime. */
465 		if (!ctx->pos && !fi->rdc.size) {
466 			fi->rdc.mtime = inode->i_mtime;
467 			fi->rdc.iversion = inode_query_iversion(inode);
468 		}
469 		spin_unlock(&fi->rdc.lock);
470 		return UNCACHED;
471 	}
472 	/*
473 	 * When at the beginning of the directory (i.e. just after opendir(3) or
474 	 * rewinddir(3)), then need to check whether directory contents have
475 	 * changed, and reset the cache if so.
476 	 */
477 	if (!ctx->pos) {
478 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
479 		    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
480 			fuse_rdc_reset(inode);
481 			goto retry_locked;
482 		}
483 	}
484 
485 	/*
486 	 * If cache version changed since the last getdents() call, then reset
487 	 * the cache stream.
488 	 */
489 	if (ff->readdir.version != fi->rdc.version) {
490 		ff->readdir.pos = 0;
491 		ff->readdir.cache_off = 0;
492 	}
493 	/*
494 	 * If at the beginning of the cache, than reset version to
495 	 * current.
496 	 */
497 	if (ff->readdir.pos == 0)
498 		ff->readdir.version = fi->rdc.version;
499 
500 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
501 
502 	index = ff->readdir.cache_off >> PAGE_SHIFT;
503 
504 	if (index == (fi->rdc.size >> PAGE_SHIFT))
505 		size = fi->rdc.size & ~PAGE_MASK;
506 	else
507 		size = PAGE_SIZE;
508 	spin_unlock(&fi->rdc.lock);
509 
510 	/* EOF? */
511 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
512 		return 0;
513 
514 	page = find_get_page_flags(file->f_mapping, index,
515 				   FGP_ACCESSED | FGP_LOCK);
516 	spin_lock(&fi->rdc.lock);
517 	if (!page) {
518 		/*
519 		 * Uh-oh: page gone missing, cache is useless
520 		 */
521 		if (fi->rdc.version == ff->readdir.version)
522 			fuse_rdc_reset(inode);
523 		goto retry_locked;
524 	}
525 
526 	/* Make sure it's still the same version after getting the page. */
527 	if (ff->readdir.version != fi->rdc.version) {
528 		spin_unlock(&fi->rdc.lock);
529 		unlock_page(page);
530 		put_page(page);
531 		goto retry;
532 	}
533 	spin_unlock(&fi->rdc.lock);
534 
535 	/*
536 	 * Contents of the page are now protected against changing by holding
537 	 * the page lock.
538 	 */
539 	addr = kmap(page);
540 	res = fuse_parse_cache(ff, addr, size, ctx);
541 	kunmap(page);
542 	unlock_page(page);
543 	put_page(page);
544 
545 	if (res == FOUND_ERR)
546 		return -EIO;
547 
548 	if (res == FOUND_ALL)
549 		return 0;
550 
551 	if (size == PAGE_SIZE) {
552 		/* We hit end of page: skip to next page. */
553 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
554 		goto retry;
555 	}
556 
557 	/*
558 	 * End of cache reached.  If found position, then we are done, otherwise
559 	 * need to fall back to uncached, since the position we were looking for
560 	 * wasn't in the cache.
561 	 */
562 	return res == FOUND_SOME ? 0 : UNCACHED;
563 }
564 
565 int fuse_readdir(struct file *file, struct dir_context *ctx)
566 {
567 	struct fuse_file *ff = file->private_data;
568 	struct inode *inode = file_inode(file);
569 	int err;
570 
571 	if (is_bad_inode(inode))
572 		return -EIO;
573 
574 	mutex_lock(&ff->readdir.lock);
575 
576 	err = UNCACHED;
577 	if (ff->open_flags & FOPEN_CACHE_DIR)
578 		err = fuse_readdir_cached(file, ctx);
579 	if (err == UNCACHED)
580 		err = fuse_readdir_uncached(file, ctx);
581 
582 	mutex_unlock(&ff->readdir.lock);
583 
584 	return err;
585 }
586