xref: /linux/fs/fuse/dir.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 
17 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18 {
19 	struct fuse_conn *fc = get_fuse_conn(dir);
20 	struct fuse_inode *fi = get_fuse_inode(dir);
21 
22 	if (!fc->do_readdirplus)
23 		return false;
24 	if (!fc->readdirplus_auto)
25 		return true;
26 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 		return true;
28 	if (ctx->pos == 0)
29 		return true;
30 	return false;
31 }
32 
33 static void fuse_advise_use_readdirplus(struct inode *dir)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(dir);
36 
37 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38 }
39 
40 #if BITS_PER_LONG >= 64
41 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42 {
43 	entry->d_time = time;
44 }
45 
46 static inline u64 fuse_dentry_time(struct dentry *entry)
47 {
48 	return entry->d_time;
49 }
50 #else
51 /*
52  * On 32 bit archs store the high 32 bits of time in d_fsdata
53  */
54 static void fuse_dentry_settime(struct dentry *entry, u64 time)
55 {
56 	entry->d_time = time;
57 	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58 }
59 
60 static u64 fuse_dentry_time(struct dentry *entry)
61 {
62 	return (u64) entry->d_time +
63 		((u64) (unsigned long) entry->d_fsdata << 32);
64 }
65 #endif
66 
67 /*
68  * FUSE caches dentries and attributes with separate timeout.  The
69  * time in jiffies until the dentry/attributes are valid is stored in
70  * dentry->d_time and fuse_inode->i_time respectively.
71  */
72 
73 /*
74  * Calculate the time in jiffies until a dentry/attributes are valid
75  */
76 static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77 {
78 	if (sec || nsec) {
79 		struct timespec ts = {sec, nsec};
80 		return get_jiffies_64() + timespec_to_jiffies(&ts);
81 	} else
82 		return 0;
83 }
84 
85 /*
86  * Set dentry and possibly attribute timeouts from the lookup/mk*
87  * replies
88  */
89 static void fuse_change_entry_timeout(struct dentry *entry,
90 				      struct fuse_entry_out *o)
91 {
92 	fuse_dentry_settime(entry,
93 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94 }
95 
96 static u64 attr_timeout(struct fuse_attr_out *o)
97 {
98 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99 }
100 
101 static u64 entry_attr_timeout(struct fuse_entry_out *o)
102 {
103 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104 }
105 
106 /*
107  * Mark the attributes as stale, so that at the next call to
108  * ->getattr() they will be fetched from userspace
109  */
110 void fuse_invalidate_attr(struct inode *inode)
111 {
112 	get_fuse_inode(inode)->i_time = 0;
113 }
114 
115 /**
116  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117  * atime is not used.
118  */
119 void fuse_invalidate_atime(struct inode *inode)
120 {
121 	if (!IS_RDONLY(inode))
122 		fuse_invalidate_attr(inode);
123 }
124 
125 /*
126  * Just mark the entry as stale, so that a next attempt to look it up
127  * will result in a new lookup call to userspace
128  *
129  * This is called when a dentry is about to become negative and the
130  * timeout is unknown (unlink, rmdir, rename and in some cases
131  * lookup)
132  */
133 void fuse_invalidate_entry_cache(struct dentry *entry)
134 {
135 	fuse_dentry_settime(entry, 0);
136 }
137 
138 /*
139  * Same as fuse_invalidate_entry_cache(), but also try to remove the
140  * dentry from the hash
141  */
142 static void fuse_invalidate_entry(struct dentry *entry)
143 {
144 	d_invalidate(entry);
145 	fuse_invalidate_entry_cache(entry);
146 }
147 
148 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149 			     u64 nodeid, struct qstr *name,
150 			     struct fuse_entry_out *outarg)
151 {
152 	memset(outarg, 0, sizeof(struct fuse_entry_out));
153 	args->in.h.opcode = FUSE_LOOKUP;
154 	args->in.h.nodeid = nodeid;
155 	args->in.numargs = 1;
156 	args->in.args[0].size = name->len + 1;
157 	args->in.args[0].value = name->name;
158 	args->out.numargs = 1;
159 	args->out.args[0].size = sizeof(struct fuse_entry_out);
160 	args->out.args[0].value = outarg;
161 }
162 
163 u64 fuse_get_attr_version(struct fuse_conn *fc)
164 {
165 	u64 curr_version;
166 
167 	/*
168 	 * The spin lock isn't actually needed on 64bit archs, but we
169 	 * don't yet care too much about such optimizations.
170 	 */
171 	spin_lock(&fc->lock);
172 	curr_version = fc->attr_version;
173 	spin_unlock(&fc->lock);
174 
175 	return curr_version;
176 }
177 
178 /*
179  * Check whether the dentry is still valid
180  *
181  * If the entry validity timeout has expired and the dentry is
182  * positive, try to redo the lookup.  If the lookup results in a
183  * different inode, then let the VFS invalidate the dentry and redo
184  * the lookup once more.  If the lookup results in the same inode,
185  * then refresh the attributes, timeouts and mark the dentry valid.
186  */
187 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
188 {
189 	struct inode *inode;
190 	struct dentry *parent;
191 	struct fuse_conn *fc;
192 	struct fuse_inode *fi;
193 	int ret;
194 
195 	inode = d_inode_rcu(entry);
196 	if (inode && is_bad_inode(inode))
197 		goto invalid;
198 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
199 		 (flags & LOOKUP_REVAL)) {
200 		struct fuse_entry_out outarg;
201 		FUSE_ARGS(args);
202 		struct fuse_forget_link *forget;
203 		u64 attr_version;
204 
205 		/* For negative dentries, always do a fresh lookup */
206 		if (!inode)
207 			goto invalid;
208 
209 		ret = -ECHILD;
210 		if (flags & LOOKUP_RCU)
211 			goto out;
212 
213 		fc = get_fuse_conn(inode);
214 
215 		forget = fuse_alloc_forget();
216 		ret = -ENOMEM;
217 		if (!forget)
218 			goto out;
219 
220 		attr_version = fuse_get_attr_version(fc);
221 
222 		parent = dget_parent(entry);
223 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
224 				 &entry->d_name, &outarg);
225 		ret = fuse_simple_request(fc, &args);
226 		dput(parent);
227 		/* Zero nodeid is same as -ENOENT */
228 		if (!ret && !outarg.nodeid)
229 			ret = -ENOENT;
230 		if (!ret) {
231 			fi = get_fuse_inode(inode);
232 			if (outarg.nodeid != get_node_id(inode)) {
233 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234 				goto invalid;
235 			}
236 			spin_lock(&fc->lock);
237 			fi->nlookup++;
238 			spin_unlock(&fc->lock);
239 		}
240 		kfree(forget);
241 		if (ret == -ENOMEM)
242 			goto out;
243 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
244 			goto invalid;
245 
246 		fuse_change_attributes(inode, &outarg.attr,
247 				       entry_attr_timeout(&outarg),
248 				       attr_version);
249 		fuse_change_entry_timeout(entry, &outarg);
250 	} else if (inode) {
251 		fi = get_fuse_inode(inode);
252 		if (flags & LOOKUP_RCU) {
253 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
254 				return -ECHILD;
255 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
256 			parent = dget_parent(entry);
257 			fuse_advise_use_readdirplus(d_inode(parent));
258 			dput(parent);
259 		}
260 	}
261 	ret = 1;
262 out:
263 	return ret;
264 
265 invalid:
266 	ret = 0;
267 	goto out;
268 }
269 
270 static int invalid_nodeid(u64 nodeid)
271 {
272 	return !nodeid || nodeid == FUSE_ROOT_ID;
273 }
274 
275 const struct dentry_operations fuse_dentry_operations = {
276 	.d_revalidate	= fuse_dentry_revalidate,
277 };
278 
279 int fuse_valid_type(int m)
280 {
281 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
282 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
283 }
284 
285 int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
286 		     struct fuse_entry_out *outarg, struct inode **inode)
287 {
288 	struct fuse_conn *fc = get_fuse_conn_super(sb);
289 	FUSE_ARGS(args);
290 	struct fuse_forget_link *forget;
291 	u64 attr_version;
292 	int err;
293 
294 	*inode = NULL;
295 	err = -ENAMETOOLONG;
296 	if (name->len > FUSE_NAME_MAX)
297 		goto out;
298 
299 
300 	forget = fuse_alloc_forget();
301 	err = -ENOMEM;
302 	if (!forget)
303 		goto out;
304 
305 	attr_version = fuse_get_attr_version(fc);
306 
307 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
308 	err = fuse_simple_request(fc, &args);
309 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
310 	if (err || !outarg->nodeid)
311 		goto out_put_forget;
312 
313 	err = -EIO;
314 	if (!outarg->nodeid)
315 		goto out_put_forget;
316 	if (!fuse_valid_type(outarg->attr.mode))
317 		goto out_put_forget;
318 
319 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
320 			   &outarg->attr, entry_attr_timeout(outarg),
321 			   attr_version);
322 	err = -ENOMEM;
323 	if (!*inode) {
324 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
325 		goto out;
326 	}
327 	err = 0;
328 
329  out_put_forget:
330 	kfree(forget);
331  out:
332 	return err;
333 }
334 
335 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
336 				  unsigned int flags)
337 {
338 	int err;
339 	struct fuse_entry_out outarg;
340 	struct inode *inode;
341 	struct dentry *newent;
342 	bool outarg_valid = true;
343 
344 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
345 			       &outarg, &inode);
346 	if (err == -ENOENT) {
347 		outarg_valid = false;
348 		err = 0;
349 	}
350 	if (err)
351 		goto out_err;
352 
353 	err = -EIO;
354 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
355 		goto out_iput;
356 
357 	newent = d_splice_alias(inode, entry);
358 	err = PTR_ERR(newent);
359 	if (IS_ERR(newent))
360 		goto out_err;
361 
362 	entry = newent ? newent : entry;
363 	if (outarg_valid)
364 		fuse_change_entry_timeout(entry, &outarg);
365 	else
366 		fuse_invalidate_entry_cache(entry);
367 
368 	fuse_advise_use_readdirplus(dir);
369 	return newent;
370 
371  out_iput:
372 	iput(inode);
373  out_err:
374 	return ERR_PTR(err);
375 }
376 
377 /*
378  * Atomic create+open operation
379  *
380  * If the filesystem doesn't support this, then fall back to separate
381  * 'mknod' + 'open' requests.
382  */
383 static int fuse_create_open(struct inode *dir, struct dentry *entry,
384 			    struct file *file, unsigned flags,
385 			    umode_t mode, int *opened)
386 {
387 	int err;
388 	struct inode *inode;
389 	struct fuse_conn *fc = get_fuse_conn(dir);
390 	FUSE_ARGS(args);
391 	struct fuse_forget_link *forget;
392 	struct fuse_create_in inarg;
393 	struct fuse_open_out outopen;
394 	struct fuse_entry_out outentry;
395 	struct fuse_file *ff;
396 
397 	/* Userspace expects S_IFREG in create mode */
398 	BUG_ON((mode & S_IFMT) != S_IFREG);
399 
400 	forget = fuse_alloc_forget();
401 	err = -ENOMEM;
402 	if (!forget)
403 		goto out_err;
404 
405 	err = -ENOMEM;
406 	ff = fuse_file_alloc(fc);
407 	if (!ff)
408 		goto out_put_forget_req;
409 
410 	if (!fc->dont_mask)
411 		mode &= ~current_umask();
412 
413 	flags &= ~O_NOCTTY;
414 	memset(&inarg, 0, sizeof(inarg));
415 	memset(&outentry, 0, sizeof(outentry));
416 	inarg.flags = flags;
417 	inarg.mode = mode;
418 	inarg.umask = current_umask();
419 	args.in.h.opcode = FUSE_CREATE;
420 	args.in.h.nodeid = get_node_id(dir);
421 	args.in.numargs = 2;
422 	args.in.args[0].size = sizeof(inarg);
423 	args.in.args[0].value = &inarg;
424 	args.in.args[1].size = entry->d_name.len + 1;
425 	args.in.args[1].value = entry->d_name.name;
426 	args.out.numargs = 2;
427 	args.out.args[0].size = sizeof(outentry);
428 	args.out.args[0].value = &outentry;
429 	args.out.args[1].size = sizeof(outopen);
430 	args.out.args[1].value = &outopen;
431 	err = fuse_simple_request(fc, &args);
432 	if (err)
433 		goto out_free_ff;
434 
435 	err = -EIO;
436 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
437 		goto out_free_ff;
438 
439 	ff->fh = outopen.fh;
440 	ff->nodeid = outentry.nodeid;
441 	ff->open_flags = outopen.open_flags;
442 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
443 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
444 	if (!inode) {
445 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
446 		fuse_sync_release(ff, flags);
447 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
448 		err = -ENOMEM;
449 		goto out_err;
450 	}
451 	kfree(forget);
452 	d_instantiate(entry, inode);
453 	fuse_change_entry_timeout(entry, &outentry);
454 	fuse_invalidate_attr(dir);
455 	err = finish_open(file, entry, generic_file_open, opened);
456 	if (err) {
457 		fuse_sync_release(ff, flags);
458 	} else {
459 		file->private_data = fuse_file_get(ff);
460 		fuse_finish_open(inode, file);
461 	}
462 	return err;
463 
464 out_free_ff:
465 	fuse_file_free(ff);
466 out_put_forget_req:
467 	kfree(forget);
468 out_err:
469 	return err;
470 }
471 
472 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
473 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
474 			    struct file *file, unsigned flags,
475 			    umode_t mode, int *opened)
476 {
477 	int err;
478 	struct fuse_conn *fc = get_fuse_conn(dir);
479 	struct dentry *res = NULL;
480 
481 	if (d_unhashed(entry)) {
482 		res = fuse_lookup(dir, entry, 0);
483 		if (IS_ERR(res))
484 			return PTR_ERR(res);
485 
486 		if (res)
487 			entry = res;
488 	}
489 
490 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
491 		goto no_open;
492 
493 	/* Only creates */
494 	*opened |= FILE_CREATED;
495 
496 	if (fc->no_create)
497 		goto mknod;
498 
499 	err = fuse_create_open(dir, entry, file, flags, mode, opened);
500 	if (err == -ENOSYS) {
501 		fc->no_create = 1;
502 		goto mknod;
503 	}
504 out_dput:
505 	dput(res);
506 	return err;
507 
508 mknod:
509 	err = fuse_mknod(dir, entry, mode, 0);
510 	if (err)
511 		goto out_dput;
512 no_open:
513 	return finish_no_open(file, res);
514 }
515 
516 /*
517  * Code shared between mknod, mkdir, symlink and link
518  */
519 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
520 			    struct inode *dir, struct dentry *entry,
521 			    umode_t mode)
522 {
523 	struct fuse_entry_out outarg;
524 	struct inode *inode;
525 	int err;
526 	struct fuse_forget_link *forget;
527 
528 	forget = fuse_alloc_forget();
529 	if (!forget)
530 		return -ENOMEM;
531 
532 	memset(&outarg, 0, sizeof(outarg));
533 	args->in.h.nodeid = get_node_id(dir);
534 	args->out.numargs = 1;
535 	args->out.args[0].size = sizeof(outarg);
536 	args->out.args[0].value = &outarg;
537 	err = fuse_simple_request(fc, args);
538 	if (err)
539 		goto out_put_forget_req;
540 
541 	err = -EIO;
542 	if (invalid_nodeid(outarg.nodeid))
543 		goto out_put_forget_req;
544 
545 	if ((outarg.attr.mode ^ mode) & S_IFMT)
546 		goto out_put_forget_req;
547 
548 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
549 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
550 	if (!inode) {
551 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
552 		return -ENOMEM;
553 	}
554 	kfree(forget);
555 
556 	err = d_instantiate_no_diralias(entry, inode);
557 	if (err)
558 		return err;
559 
560 	fuse_change_entry_timeout(entry, &outarg);
561 	fuse_invalidate_attr(dir);
562 	return 0;
563 
564  out_put_forget_req:
565 	kfree(forget);
566 	return err;
567 }
568 
569 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
570 		      dev_t rdev)
571 {
572 	struct fuse_mknod_in inarg;
573 	struct fuse_conn *fc = get_fuse_conn(dir);
574 	FUSE_ARGS(args);
575 
576 	if (!fc->dont_mask)
577 		mode &= ~current_umask();
578 
579 	memset(&inarg, 0, sizeof(inarg));
580 	inarg.mode = mode;
581 	inarg.rdev = new_encode_dev(rdev);
582 	inarg.umask = current_umask();
583 	args.in.h.opcode = FUSE_MKNOD;
584 	args.in.numargs = 2;
585 	args.in.args[0].size = sizeof(inarg);
586 	args.in.args[0].value = &inarg;
587 	args.in.args[1].size = entry->d_name.len + 1;
588 	args.in.args[1].value = entry->d_name.name;
589 	return create_new_entry(fc, &args, dir, entry, mode);
590 }
591 
592 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
593 		       bool excl)
594 {
595 	return fuse_mknod(dir, entry, mode, 0);
596 }
597 
598 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
599 {
600 	struct fuse_mkdir_in inarg;
601 	struct fuse_conn *fc = get_fuse_conn(dir);
602 	FUSE_ARGS(args);
603 
604 	if (!fc->dont_mask)
605 		mode &= ~current_umask();
606 
607 	memset(&inarg, 0, sizeof(inarg));
608 	inarg.mode = mode;
609 	inarg.umask = current_umask();
610 	args.in.h.opcode = FUSE_MKDIR;
611 	args.in.numargs = 2;
612 	args.in.args[0].size = sizeof(inarg);
613 	args.in.args[0].value = &inarg;
614 	args.in.args[1].size = entry->d_name.len + 1;
615 	args.in.args[1].value = entry->d_name.name;
616 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
617 }
618 
619 static int fuse_symlink(struct inode *dir, struct dentry *entry,
620 			const char *link)
621 {
622 	struct fuse_conn *fc = get_fuse_conn(dir);
623 	unsigned len = strlen(link) + 1;
624 	FUSE_ARGS(args);
625 
626 	args.in.h.opcode = FUSE_SYMLINK;
627 	args.in.numargs = 2;
628 	args.in.args[0].size = entry->d_name.len + 1;
629 	args.in.args[0].value = entry->d_name.name;
630 	args.in.args[1].size = len;
631 	args.in.args[1].value = link;
632 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
633 }
634 
635 static inline void fuse_update_ctime(struct inode *inode)
636 {
637 	if (!IS_NOCMTIME(inode)) {
638 		inode->i_ctime = current_fs_time(inode->i_sb);
639 		mark_inode_dirty_sync(inode);
640 	}
641 }
642 
643 static int fuse_unlink(struct inode *dir, struct dentry *entry)
644 {
645 	int err;
646 	struct fuse_conn *fc = get_fuse_conn(dir);
647 	FUSE_ARGS(args);
648 
649 	args.in.h.opcode = FUSE_UNLINK;
650 	args.in.h.nodeid = get_node_id(dir);
651 	args.in.numargs = 1;
652 	args.in.args[0].size = entry->d_name.len + 1;
653 	args.in.args[0].value = entry->d_name.name;
654 	err = fuse_simple_request(fc, &args);
655 	if (!err) {
656 		struct inode *inode = d_inode(entry);
657 		struct fuse_inode *fi = get_fuse_inode(inode);
658 
659 		spin_lock(&fc->lock);
660 		fi->attr_version = ++fc->attr_version;
661 		/*
662 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
663 		 * happen if userspace filesystem is careless.  It would be
664 		 * difficult to enforce correct nlink usage so just ignore this
665 		 * condition here
666 		 */
667 		if (inode->i_nlink > 0)
668 			drop_nlink(inode);
669 		spin_unlock(&fc->lock);
670 		fuse_invalidate_attr(inode);
671 		fuse_invalidate_attr(dir);
672 		fuse_invalidate_entry_cache(entry);
673 		fuse_update_ctime(inode);
674 	} else if (err == -EINTR)
675 		fuse_invalidate_entry(entry);
676 	return err;
677 }
678 
679 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
680 {
681 	int err;
682 	struct fuse_conn *fc = get_fuse_conn(dir);
683 	FUSE_ARGS(args);
684 
685 	args.in.h.opcode = FUSE_RMDIR;
686 	args.in.h.nodeid = get_node_id(dir);
687 	args.in.numargs = 1;
688 	args.in.args[0].size = entry->d_name.len + 1;
689 	args.in.args[0].value = entry->d_name.name;
690 	err = fuse_simple_request(fc, &args);
691 	if (!err) {
692 		clear_nlink(d_inode(entry));
693 		fuse_invalidate_attr(dir);
694 		fuse_invalidate_entry_cache(entry);
695 	} else if (err == -EINTR)
696 		fuse_invalidate_entry(entry);
697 	return err;
698 }
699 
700 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
701 			      struct inode *newdir, struct dentry *newent,
702 			      unsigned int flags, int opcode, size_t argsize)
703 {
704 	int err;
705 	struct fuse_rename2_in inarg;
706 	struct fuse_conn *fc = get_fuse_conn(olddir);
707 	FUSE_ARGS(args);
708 
709 	memset(&inarg, 0, argsize);
710 	inarg.newdir = get_node_id(newdir);
711 	inarg.flags = flags;
712 	args.in.h.opcode = opcode;
713 	args.in.h.nodeid = get_node_id(olddir);
714 	args.in.numargs = 3;
715 	args.in.args[0].size = argsize;
716 	args.in.args[0].value = &inarg;
717 	args.in.args[1].size = oldent->d_name.len + 1;
718 	args.in.args[1].value = oldent->d_name.name;
719 	args.in.args[2].size = newent->d_name.len + 1;
720 	args.in.args[2].value = newent->d_name.name;
721 	err = fuse_simple_request(fc, &args);
722 	if (!err) {
723 		/* ctime changes */
724 		fuse_invalidate_attr(d_inode(oldent));
725 		fuse_update_ctime(d_inode(oldent));
726 
727 		if (flags & RENAME_EXCHANGE) {
728 			fuse_invalidate_attr(d_inode(newent));
729 			fuse_update_ctime(d_inode(newent));
730 		}
731 
732 		fuse_invalidate_attr(olddir);
733 		if (olddir != newdir)
734 			fuse_invalidate_attr(newdir);
735 
736 		/* newent will end up negative */
737 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
738 			fuse_invalidate_attr(d_inode(newent));
739 			fuse_invalidate_entry_cache(newent);
740 			fuse_update_ctime(d_inode(newent));
741 		}
742 	} else if (err == -EINTR) {
743 		/* If request was interrupted, DEITY only knows if the
744 		   rename actually took place.  If the invalidation
745 		   fails (e.g. some process has CWD under the renamed
746 		   directory), then there can be inconsistency between
747 		   the dcache and the real filesystem.  Tough luck. */
748 		fuse_invalidate_entry(oldent);
749 		if (d_really_is_positive(newent))
750 			fuse_invalidate_entry(newent);
751 	}
752 
753 	return err;
754 }
755 
756 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
757 			struct inode *newdir, struct dentry *newent,
758 			unsigned int flags)
759 {
760 	struct fuse_conn *fc = get_fuse_conn(olddir);
761 	int err;
762 
763 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
764 		return -EINVAL;
765 
766 	if (flags) {
767 		if (fc->no_rename2 || fc->minor < 23)
768 			return -EINVAL;
769 
770 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
771 					 FUSE_RENAME2,
772 					 sizeof(struct fuse_rename2_in));
773 		if (err == -ENOSYS) {
774 			fc->no_rename2 = 1;
775 			err = -EINVAL;
776 		}
777 	} else {
778 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
779 					 FUSE_RENAME,
780 					 sizeof(struct fuse_rename_in));
781 	}
782 
783 	return err;
784 }
785 
786 static int fuse_link(struct dentry *entry, struct inode *newdir,
787 		     struct dentry *newent)
788 {
789 	int err;
790 	struct fuse_link_in inarg;
791 	struct inode *inode = d_inode(entry);
792 	struct fuse_conn *fc = get_fuse_conn(inode);
793 	FUSE_ARGS(args);
794 
795 	memset(&inarg, 0, sizeof(inarg));
796 	inarg.oldnodeid = get_node_id(inode);
797 	args.in.h.opcode = FUSE_LINK;
798 	args.in.numargs = 2;
799 	args.in.args[0].size = sizeof(inarg);
800 	args.in.args[0].value = &inarg;
801 	args.in.args[1].size = newent->d_name.len + 1;
802 	args.in.args[1].value = newent->d_name.name;
803 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
804 	/* Contrary to "normal" filesystems it can happen that link
805 	   makes two "logical" inodes point to the same "physical"
806 	   inode.  We invalidate the attributes of the old one, so it
807 	   will reflect changes in the backing inode (link count,
808 	   etc.)
809 	*/
810 	if (!err) {
811 		struct fuse_inode *fi = get_fuse_inode(inode);
812 
813 		spin_lock(&fc->lock);
814 		fi->attr_version = ++fc->attr_version;
815 		inc_nlink(inode);
816 		spin_unlock(&fc->lock);
817 		fuse_invalidate_attr(inode);
818 		fuse_update_ctime(inode);
819 	} else if (err == -EINTR) {
820 		fuse_invalidate_attr(inode);
821 	}
822 	return err;
823 }
824 
825 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
826 			  struct kstat *stat)
827 {
828 	unsigned int blkbits;
829 	struct fuse_conn *fc = get_fuse_conn(inode);
830 
831 	/* see the comment in fuse_change_attributes() */
832 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
833 		attr->size = i_size_read(inode);
834 		attr->mtime = inode->i_mtime.tv_sec;
835 		attr->mtimensec = inode->i_mtime.tv_nsec;
836 		attr->ctime = inode->i_ctime.tv_sec;
837 		attr->ctimensec = inode->i_ctime.tv_nsec;
838 	}
839 
840 	stat->dev = inode->i_sb->s_dev;
841 	stat->ino = attr->ino;
842 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
843 	stat->nlink = attr->nlink;
844 	stat->uid = make_kuid(&init_user_ns, attr->uid);
845 	stat->gid = make_kgid(&init_user_ns, attr->gid);
846 	stat->rdev = inode->i_rdev;
847 	stat->atime.tv_sec = attr->atime;
848 	stat->atime.tv_nsec = attr->atimensec;
849 	stat->mtime.tv_sec = attr->mtime;
850 	stat->mtime.tv_nsec = attr->mtimensec;
851 	stat->ctime.tv_sec = attr->ctime;
852 	stat->ctime.tv_nsec = attr->ctimensec;
853 	stat->size = attr->size;
854 	stat->blocks = attr->blocks;
855 
856 	if (attr->blksize != 0)
857 		blkbits = ilog2(attr->blksize);
858 	else
859 		blkbits = inode->i_sb->s_blocksize_bits;
860 
861 	stat->blksize = 1 << blkbits;
862 }
863 
864 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
865 			   struct file *file)
866 {
867 	int err;
868 	struct fuse_getattr_in inarg;
869 	struct fuse_attr_out outarg;
870 	struct fuse_conn *fc = get_fuse_conn(inode);
871 	FUSE_ARGS(args);
872 	u64 attr_version;
873 
874 	attr_version = fuse_get_attr_version(fc);
875 
876 	memset(&inarg, 0, sizeof(inarg));
877 	memset(&outarg, 0, sizeof(outarg));
878 	/* Directories have separate file-handle space */
879 	if (file && S_ISREG(inode->i_mode)) {
880 		struct fuse_file *ff = file->private_data;
881 
882 		inarg.getattr_flags |= FUSE_GETATTR_FH;
883 		inarg.fh = ff->fh;
884 	}
885 	args.in.h.opcode = FUSE_GETATTR;
886 	args.in.h.nodeid = get_node_id(inode);
887 	args.in.numargs = 1;
888 	args.in.args[0].size = sizeof(inarg);
889 	args.in.args[0].value = &inarg;
890 	args.out.numargs = 1;
891 	args.out.args[0].size = sizeof(outarg);
892 	args.out.args[0].value = &outarg;
893 	err = fuse_simple_request(fc, &args);
894 	if (!err) {
895 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
896 			make_bad_inode(inode);
897 			err = -EIO;
898 		} else {
899 			fuse_change_attributes(inode, &outarg.attr,
900 					       attr_timeout(&outarg),
901 					       attr_version);
902 			if (stat)
903 				fuse_fillattr(inode, &outarg.attr, stat);
904 		}
905 	}
906 	return err;
907 }
908 
909 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
910 			   struct file *file, bool *refreshed)
911 {
912 	struct fuse_inode *fi = get_fuse_inode(inode);
913 	int err;
914 	bool r;
915 
916 	if (time_before64(fi->i_time, get_jiffies_64())) {
917 		r = true;
918 		err = fuse_do_getattr(inode, stat, file);
919 	} else {
920 		r = false;
921 		err = 0;
922 		if (stat) {
923 			generic_fillattr(inode, stat);
924 			stat->mode = fi->orig_i_mode;
925 			stat->ino = fi->orig_ino;
926 		}
927 	}
928 
929 	if (refreshed != NULL)
930 		*refreshed = r;
931 
932 	return err;
933 }
934 
935 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
936 			     u64 child_nodeid, struct qstr *name)
937 {
938 	int err = -ENOTDIR;
939 	struct inode *parent;
940 	struct dentry *dir;
941 	struct dentry *entry;
942 
943 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
944 	if (!parent)
945 		return -ENOENT;
946 
947 	inode_lock(parent);
948 	if (!S_ISDIR(parent->i_mode))
949 		goto unlock;
950 
951 	err = -ENOENT;
952 	dir = d_find_alias(parent);
953 	if (!dir)
954 		goto unlock;
955 
956 	entry = d_lookup(dir, name);
957 	dput(dir);
958 	if (!entry)
959 		goto unlock;
960 
961 	fuse_invalidate_attr(parent);
962 	fuse_invalidate_entry(entry);
963 
964 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
965 		inode_lock(d_inode(entry));
966 		if (get_node_id(d_inode(entry)) != child_nodeid) {
967 			err = -ENOENT;
968 			goto badentry;
969 		}
970 		if (d_mountpoint(entry)) {
971 			err = -EBUSY;
972 			goto badentry;
973 		}
974 		if (d_is_dir(entry)) {
975 			shrink_dcache_parent(entry);
976 			if (!simple_empty(entry)) {
977 				err = -ENOTEMPTY;
978 				goto badentry;
979 			}
980 			d_inode(entry)->i_flags |= S_DEAD;
981 		}
982 		dont_mount(entry);
983 		clear_nlink(d_inode(entry));
984 		err = 0;
985  badentry:
986 		inode_unlock(d_inode(entry));
987 		if (!err)
988 			d_delete(entry);
989 	} else {
990 		err = 0;
991 	}
992 	dput(entry);
993 
994  unlock:
995 	inode_unlock(parent);
996 	iput(parent);
997 	return err;
998 }
999 
1000 /*
1001  * Calling into a user-controlled filesystem gives the filesystem
1002  * daemon ptrace-like capabilities over the current process.  This
1003  * means, that the filesystem daemon is able to record the exact
1004  * filesystem operations performed, and can also control the behavior
1005  * of the requester process in otherwise impossible ways.  For example
1006  * it can delay the operation for arbitrary length of time allowing
1007  * DoS against the requester.
1008  *
1009  * For this reason only those processes can call into the filesystem,
1010  * for which the owner of the mount has ptrace privilege.  This
1011  * excludes processes started by other users, suid or sgid processes.
1012  */
1013 int fuse_allow_current_process(struct fuse_conn *fc)
1014 {
1015 	const struct cred *cred;
1016 
1017 	if (fc->flags & FUSE_ALLOW_OTHER)
1018 		return 1;
1019 
1020 	cred = current_cred();
1021 	if (uid_eq(cred->euid, fc->user_id) &&
1022 	    uid_eq(cred->suid, fc->user_id) &&
1023 	    uid_eq(cred->uid,  fc->user_id) &&
1024 	    gid_eq(cred->egid, fc->group_id) &&
1025 	    gid_eq(cred->sgid, fc->group_id) &&
1026 	    gid_eq(cred->gid,  fc->group_id))
1027 		return 1;
1028 
1029 	return 0;
1030 }
1031 
1032 static int fuse_access(struct inode *inode, int mask)
1033 {
1034 	struct fuse_conn *fc = get_fuse_conn(inode);
1035 	FUSE_ARGS(args);
1036 	struct fuse_access_in inarg;
1037 	int err;
1038 
1039 	BUG_ON(mask & MAY_NOT_BLOCK);
1040 
1041 	if (fc->no_access)
1042 		return 0;
1043 
1044 	memset(&inarg, 0, sizeof(inarg));
1045 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1046 	args.in.h.opcode = FUSE_ACCESS;
1047 	args.in.h.nodeid = get_node_id(inode);
1048 	args.in.numargs = 1;
1049 	args.in.args[0].size = sizeof(inarg);
1050 	args.in.args[0].value = &inarg;
1051 	err = fuse_simple_request(fc, &args);
1052 	if (err == -ENOSYS) {
1053 		fc->no_access = 1;
1054 		err = 0;
1055 	}
1056 	return err;
1057 }
1058 
1059 static int fuse_perm_getattr(struct inode *inode, int mask)
1060 {
1061 	if (mask & MAY_NOT_BLOCK)
1062 		return -ECHILD;
1063 
1064 	return fuse_do_getattr(inode, NULL, NULL);
1065 }
1066 
1067 /*
1068  * Check permission.  The two basic access models of FUSE are:
1069  *
1070  * 1) Local access checking ('default_permissions' mount option) based
1071  * on file mode.  This is the plain old disk filesystem permission
1072  * modell.
1073  *
1074  * 2) "Remote" access checking, where server is responsible for
1075  * checking permission in each inode operation.  An exception to this
1076  * is if ->permission() was invoked from sys_access() in which case an
1077  * access request is sent.  Execute permission is still checked
1078  * locally based on file mode.
1079  */
1080 static int fuse_permission(struct inode *inode, int mask)
1081 {
1082 	struct fuse_conn *fc = get_fuse_conn(inode);
1083 	bool refreshed = false;
1084 	int err = 0;
1085 
1086 	if (!fuse_allow_current_process(fc))
1087 		return -EACCES;
1088 
1089 	/*
1090 	 * If attributes are needed, refresh them before proceeding
1091 	 */
1092 	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1093 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1094 		struct fuse_inode *fi = get_fuse_inode(inode);
1095 
1096 		if (time_before64(fi->i_time, get_jiffies_64())) {
1097 			refreshed = true;
1098 
1099 			err = fuse_perm_getattr(inode, mask);
1100 			if (err)
1101 				return err;
1102 		}
1103 	}
1104 
1105 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1106 		err = generic_permission(inode, mask);
1107 
1108 		/* If permission is denied, try to refresh file
1109 		   attributes.  This is also needed, because the root
1110 		   node will at first have no permissions */
1111 		if (err == -EACCES && !refreshed) {
1112 			err = fuse_perm_getattr(inode, mask);
1113 			if (!err)
1114 				err = generic_permission(inode, mask);
1115 		}
1116 
1117 		/* Note: the opposite of the above test does not
1118 		   exist.  So if permissions are revoked this won't be
1119 		   noticed immediately, only after the attribute
1120 		   timeout has expired */
1121 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1122 		err = fuse_access(inode, mask);
1123 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1124 		if (!(inode->i_mode & S_IXUGO)) {
1125 			if (refreshed)
1126 				return -EACCES;
1127 
1128 			err = fuse_perm_getattr(inode, mask);
1129 			if (!err && !(inode->i_mode & S_IXUGO))
1130 				return -EACCES;
1131 		}
1132 	}
1133 	return err;
1134 }
1135 
1136 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1137 			 struct dir_context *ctx)
1138 {
1139 	while (nbytes >= FUSE_NAME_OFFSET) {
1140 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1141 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1142 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1143 			return -EIO;
1144 		if (reclen > nbytes)
1145 			break;
1146 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1147 			return -EIO;
1148 
1149 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1150 			       dirent->ino, dirent->type))
1151 			break;
1152 
1153 		buf += reclen;
1154 		nbytes -= reclen;
1155 		ctx->pos = dirent->off;
1156 	}
1157 
1158 	return 0;
1159 }
1160 
1161 static int fuse_direntplus_link(struct file *file,
1162 				struct fuse_direntplus *direntplus,
1163 				u64 attr_version)
1164 {
1165 	struct fuse_entry_out *o = &direntplus->entry_out;
1166 	struct fuse_dirent *dirent = &direntplus->dirent;
1167 	struct dentry *parent = file->f_path.dentry;
1168 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1169 	struct dentry *dentry;
1170 	struct dentry *alias;
1171 	struct inode *dir = d_inode(parent);
1172 	struct fuse_conn *fc;
1173 	struct inode *inode;
1174 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1175 
1176 	if (!o->nodeid) {
1177 		/*
1178 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1179 		 * ENOENT. Instead, it only means the userspace filesystem did
1180 		 * not want to return attributes/handle for this entry.
1181 		 *
1182 		 * So do nothing.
1183 		 */
1184 		return 0;
1185 	}
1186 
1187 	if (name.name[0] == '.') {
1188 		/*
1189 		 * We could potentially refresh the attributes of the directory
1190 		 * and its parent?
1191 		 */
1192 		if (name.len == 1)
1193 			return 0;
1194 		if (name.name[1] == '.' && name.len == 2)
1195 			return 0;
1196 	}
1197 
1198 	if (invalid_nodeid(o->nodeid))
1199 		return -EIO;
1200 	if (!fuse_valid_type(o->attr.mode))
1201 		return -EIO;
1202 
1203 	fc = get_fuse_conn(dir);
1204 
1205 	name.hash = full_name_hash(name.name, name.len);
1206 	dentry = d_lookup(parent, &name);
1207 	if (!dentry) {
1208 retry:
1209 		dentry = d_alloc_parallel(parent, &name, &wq);
1210 		if (IS_ERR(dentry))
1211 			return PTR_ERR(dentry);
1212 	}
1213 	if (!d_in_lookup(dentry)) {
1214 		struct fuse_inode *fi;
1215 		inode = d_inode(dentry);
1216 		if (!inode ||
1217 		    get_node_id(inode) != o->nodeid ||
1218 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1219 			d_invalidate(dentry);
1220 			dput(dentry);
1221 			goto retry;
1222 		}
1223 		if (is_bad_inode(inode)) {
1224 			dput(dentry);
1225 			return -EIO;
1226 		}
1227 
1228 		fi = get_fuse_inode(inode);
1229 		spin_lock(&fc->lock);
1230 		fi->nlookup++;
1231 		spin_unlock(&fc->lock);
1232 
1233 		fuse_change_attributes(inode, &o->attr,
1234 				       entry_attr_timeout(o),
1235 				       attr_version);
1236 		/*
1237 		 * The other branch comes via fuse_iget()
1238 		 * which bumps nlookup inside
1239 		 */
1240 	} else {
1241 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1242 				  &o->attr, entry_attr_timeout(o),
1243 				  attr_version);
1244 		if (!inode)
1245 			inode = ERR_PTR(-ENOMEM);
1246 
1247 		alias = d_splice_alias(inode, dentry);
1248 		d_lookup_done(dentry);
1249 		if (alias) {
1250 			dput(dentry);
1251 			dentry = alias;
1252 		}
1253 		if (IS_ERR(dentry))
1254 			return PTR_ERR(dentry);
1255 	}
1256 	if (fc->readdirplus_auto)
1257 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1258 	fuse_change_entry_timeout(dentry, o);
1259 
1260 	dput(dentry);
1261 	return 0;
1262 }
1263 
1264 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1265 			     struct dir_context *ctx, u64 attr_version)
1266 {
1267 	struct fuse_direntplus *direntplus;
1268 	struct fuse_dirent *dirent;
1269 	size_t reclen;
1270 	int over = 0;
1271 	int ret;
1272 
1273 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1274 		direntplus = (struct fuse_direntplus *) buf;
1275 		dirent = &direntplus->dirent;
1276 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1277 
1278 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1279 			return -EIO;
1280 		if (reclen > nbytes)
1281 			break;
1282 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1283 			return -EIO;
1284 
1285 		if (!over) {
1286 			/* We fill entries into dstbuf only as much as
1287 			   it can hold. But we still continue iterating
1288 			   over remaining entries to link them. If not,
1289 			   we need to send a FORGET for each of those
1290 			   which we did not link.
1291 			*/
1292 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1293 				       dirent->ino, dirent->type);
1294 			ctx->pos = dirent->off;
1295 		}
1296 
1297 		buf += reclen;
1298 		nbytes -= reclen;
1299 
1300 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1301 		if (ret)
1302 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1303 	}
1304 
1305 	return 0;
1306 }
1307 
1308 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1309 {
1310 	int plus, err;
1311 	size_t nbytes;
1312 	struct page *page;
1313 	struct inode *inode = file_inode(file);
1314 	struct fuse_conn *fc = get_fuse_conn(inode);
1315 	struct fuse_req *req;
1316 	u64 attr_version = 0;
1317 
1318 	if (is_bad_inode(inode))
1319 		return -EIO;
1320 
1321 	req = fuse_get_req(fc, 1);
1322 	if (IS_ERR(req))
1323 		return PTR_ERR(req);
1324 
1325 	page = alloc_page(GFP_KERNEL);
1326 	if (!page) {
1327 		fuse_put_request(fc, req);
1328 		return -ENOMEM;
1329 	}
1330 
1331 	plus = fuse_use_readdirplus(inode, ctx);
1332 	req->out.argpages = 1;
1333 	req->num_pages = 1;
1334 	req->pages[0] = page;
1335 	req->page_descs[0].length = PAGE_SIZE;
1336 	if (plus) {
1337 		attr_version = fuse_get_attr_version(fc);
1338 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1339 			       FUSE_READDIRPLUS);
1340 	} else {
1341 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1342 			       FUSE_READDIR);
1343 	}
1344 	fuse_request_send(fc, req);
1345 	nbytes = req->out.args[0].size;
1346 	err = req->out.h.error;
1347 	fuse_put_request(fc, req);
1348 	if (!err) {
1349 		if (plus) {
1350 			err = parse_dirplusfile(page_address(page), nbytes,
1351 						file, ctx,
1352 						attr_version);
1353 		} else {
1354 			err = parse_dirfile(page_address(page), nbytes, file,
1355 					    ctx);
1356 		}
1357 	}
1358 
1359 	__free_page(page);
1360 	fuse_invalidate_atime(inode);
1361 	return err;
1362 }
1363 
1364 static const char *fuse_get_link(struct dentry *dentry,
1365 				 struct inode *inode,
1366 				 struct delayed_call *done)
1367 {
1368 	struct fuse_conn *fc = get_fuse_conn(inode);
1369 	FUSE_ARGS(args);
1370 	char *link;
1371 	ssize_t ret;
1372 
1373 	if (!dentry)
1374 		return ERR_PTR(-ECHILD);
1375 
1376 	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1377 	if (!link)
1378 		return ERR_PTR(-ENOMEM);
1379 
1380 	args.in.h.opcode = FUSE_READLINK;
1381 	args.in.h.nodeid = get_node_id(inode);
1382 	args.out.argvar = 1;
1383 	args.out.numargs = 1;
1384 	args.out.args[0].size = PAGE_SIZE - 1;
1385 	args.out.args[0].value = link;
1386 	ret = fuse_simple_request(fc, &args);
1387 	if (ret < 0) {
1388 		kfree(link);
1389 		link = ERR_PTR(ret);
1390 	} else {
1391 		link[ret] = '\0';
1392 		set_delayed_call(done, kfree_link, link);
1393 	}
1394 	fuse_invalidate_atime(inode);
1395 	return link;
1396 }
1397 
1398 static int fuse_dir_open(struct inode *inode, struct file *file)
1399 {
1400 	return fuse_open_common(inode, file, true);
1401 }
1402 
1403 static int fuse_dir_release(struct inode *inode, struct file *file)
1404 {
1405 	fuse_release_common(file, FUSE_RELEASEDIR);
1406 
1407 	return 0;
1408 }
1409 
1410 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1411 			  int datasync)
1412 {
1413 	return fuse_fsync_common(file, start, end, datasync, 1);
1414 }
1415 
1416 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1417 			    unsigned long arg)
1418 {
1419 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1420 
1421 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1422 	if (fc->minor < 18)
1423 		return -ENOTTY;
1424 
1425 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1426 }
1427 
1428 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1429 				   unsigned long arg)
1430 {
1431 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1432 
1433 	if (fc->minor < 18)
1434 		return -ENOTTY;
1435 
1436 	return fuse_ioctl_common(file, cmd, arg,
1437 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1438 }
1439 
1440 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1441 {
1442 	/* Always update if mtime is explicitly set  */
1443 	if (ivalid & ATTR_MTIME_SET)
1444 		return true;
1445 
1446 	/* Or if kernel i_mtime is the official one */
1447 	if (trust_local_mtime)
1448 		return true;
1449 
1450 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1451 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1452 		return false;
1453 
1454 	/* In all other cases update */
1455 	return true;
1456 }
1457 
1458 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1459 			   bool trust_local_cmtime)
1460 {
1461 	unsigned ivalid = iattr->ia_valid;
1462 
1463 	if (ivalid & ATTR_MODE)
1464 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1465 	if (ivalid & ATTR_UID)
1466 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1467 	if (ivalid & ATTR_GID)
1468 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1469 	if (ivalid & ATTR_SIZE)
1470 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1471 	if (ivalid & ATTR_ATIME) {
1472 		arg->valid |= FATTR_ATIME;
1473 		arg->atime = iattr->ia_atime.tv_sec;
1474 		arg->atimensec = iattr->ia_atime.tv_nsec;
1475 		if (!(ivalid & ATTR_ATIME_SET))
1476 			arg->valid |= FATTR_ATIME_NOW;
1477 	}
1478 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1479 		arg->valid |= FATTR_MTIME;
1480 		arg->mtime = iattr->ia_mtime.tv_sec;
1481 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1482 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1483 			arg->valid |= FATTR_MTIME_NOW;
1484 	}
1485 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1486 		arg->valid |= FATTR_CTIME;
1487 		arg->ctime = iattr->ia_ctime.tv_sec;
1488 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1489 	}
1490 }
1491 
1492 /*
1493  * Prevent concurrent writepages on inode
1494  *
1495  * This is done by adding a negative bias to the inode write counter
1496  * and waiting for all pending writes to finish.
1497  */
1498 void fuse_set_nowrite(struct inode *inode)
1499 {
1500 	struct fuse_conn *fc = get_fuse_conn(inode);
1501 	struct fuse_inode *fi = get_fuse_inode(inode);
1502 
1503 	BUG_ON(!inode_is_locked(inode));
1504 
1505 	spin_lock(&fc->lock);
1506 	BUG_ON(fi->writectr < 0);
1507 	fi->writectr += FUSE_NOWRITE;
1508 	spin_unlock(&fc->lock);
1509 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1510 }
1511 
1512 /*
1513  * Allow writepages on inode
1514  *
1515  * Remove the bias from the writecounter and send any queued
1516  * writepages.
1517  */
1518 static void __fuse_release_nowrite(struct inode *inode)
1519 {
1520 	struct fuse_inode *fi = get_fuse_inode(inode);
1521 
1522 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1523 	fi->writectr = 0;
1524 	fuse_flush_writepages(inode);
1525 }
1526 
1527 void fuse_release_nowrite(struct inode *inode)
1528 {
1529 	struct fuse_conn *fc = get_fuse_conn(inode);
1530 
1531 	spin_lock(&fc->lock);
1532 	__fuse_release_nowrite(inode);
1533 	spin_unlock(&fc->lock);
1534 }
1535 
1536 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1537 			      struct inode *inode,
1538 			      struct fuse_setattr_in *inarg_p,
1539 			      struct fuse_attr_out *outarg_p)
1540 {
1541 	args->in.h.opcode = FUSE_SETATTR;
1542 	args->in.h.nodeid = get_node_id(inode);
1543 	args->in.numargs = 1;
1544 	args->in.args[0].size = sizeof(*inarg_p);
1545 	args->in.args[0].value = inarg_p;
1546 	args->out.numargs = 1;
1547 	args->out.args[0].size = sizeof(*outarg_p);
1548 	args->out.args[0].value = outarg_p;
1549 }
1550 
1551 /*
1552  * Flush inode->i_mtime to the server
1553  */
1554 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1555 {
1556 	struct fuse_conn *fc = get_fuse_conn(inode);
1557 	FUSE_ARGS(args);
1558 	struct fuse_setattr_in inarg;
1559 	struct fuse_attr_out outarg;
1560 
1561 	memset(&inarg, 0, sizeof(inarg));
1562 	memset(&outarg, 0, sizeof(outarg));
1563 
1564 	inarg.valid = FATTR_MTIME;
1565 	inarg.mtime = inode->i_mtime.tv_sec;
1566 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1567 	if (fc->minor >= 23) {
1568 		inarg.valid |= FATTR_CTIME;
1569 		inarg.ctime = inode->i_ctime.tv_sec;
1570 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1571 	}
1572 	if (ff) {
1573 		inarg.valid |= FATTR_FH;
1574 		inarg.fh = ff->fh;
1575 	}
1576 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1577 
1578 	return fuse_simple_request(fc, &args);
1579 }
1580 
1581 /*
1582  * Set attributes, and at the same time refresh them.
1583  *
1584  * Truncation is slightly complicated, because the 'truncate' request
1585  * may fail, in which case we don't want to touch the mapping.
1586  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1587  * and the actual truncation by hand.
1588  */
1589 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1590 		    struct file *file)
1591 {
1592 	struct fuse_conn *fc = get_fuse_conn(inode);
1593 	struct fuse_inode *fi = get_fuse_inode(inode);
1594 	FUSE_ARGS(args);
1595 	struct fuse_setattr_in inarg;
1596 	struct fuse_attr_out outarg;
1597 	bool is_truncate = false;
1598 	bool is_wb = fc->writeback_cache;
1599 	loff_t oldsize;
1600 	int err;
1601 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1602 
1603 	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1604 		attr->ia_valid |= ATTR_FORCE;
1605 
1606 	err = inode_change_ok(inode, attr);
1607 	if (err)
1608 		return err;
1609 
1610 	if (attr->ia_valid & ATTR_OPEN) {
1611 		if (fc->atomic_o_trunc)
1612 			return 0;
1613 		file = NULL;
1614 	}
1615 
1616 	if (attr->ia_valid & ATTR_SIZE)
1617 		is_truncate = true;
1618 
1619 	if (is_truncate) {
1620 		fuse_set_nowrite(inode);
1621 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1622 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1623 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1624 	}
1625 
1626 	memset(&inarg, 0, sizeof(inarg));
1627 	memset(&outarg, 0, sizeof(outarg));
1628 	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1629 	if (file) {
1630 		struct fuse_file *ff = file->private_data;
1631 		inarg.valid |= FATTR_FH;
1632 		inarg.fh = ff->fh;
1633 	}
1634 	if (attr->ia_valid & ATTR_SIZE) {
1635 		/* For mandatory locking in truncate */
1636 		inarg.valid |= FATTR_LOCKOWNER;
1637 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1638 	}
1639 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1640 	err = fuse_simple_request(fc, &args);
1641 	if (err) {
1642 		if (err == -EINTR)
1643 			fuse_invalidate_attr(inode);
1644 		goto error;
1645 	}
1646 
1647 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1648 		make_bad_inode(inode);
1649 		err = -EIO;
1650 		goto error;
1651 	}
1652 
1653 	spin_lock(&fc->lock);
1654 	/* the kernel maintains i_mtime locally */
1655 	if (trust_local_cmtime) {
1656 		if (attr->ia_valid & ATTR_MTIME)
1657 			inode->i_mtime = attr->ia_mtime;
1658 		if (attr->ia_valid & ATTR_CTIME)
1659 			inode->i_ctime = attr->ia_ctime;
1660 		/* FIXME: clear I_DIRTY_SYNC? */
1661 	}
1662 
1663 	fuse_change_attributes_common(inode, &outarg.attr,
1664 				      attr_timeout(&outarg));
1665 	oldsize = inode->i_size;
1666 	/* see the comment in fuse_change_attributes() */
1667 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1668 		i_size_write(inode, outarg.attr.size);
1669 
1670 	if (is_truncate) {
1671 		/* NOTE: this may release/reacquire fc->lock */
1672 		__fuse_release_nowrite(inode);
1673 	}
1674 	spin_unlock(&fc->lock);
1675 
1676 	/*
1677 	 * Only call invalidate_inode_pages2() after removing
1678 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1679 	 */
1680 	if ((is_truncate || !is_wb) &&
1681 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1682 		truncate_pagecache(inode, outarg.attr.size);
1683 		invalidate_inode_pages2(inode->i_mapping);
1684 	}
1685 
1686 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1687 	return 0;
1688 
1689 error:
1690 	if (is_truncate)
1691 		fuse_release_nowrite(inode);
1692 
1693 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1694 	return err;
1695 }
1696 
1697 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1698 {
1699 	struct inode *inode = d_inode(entry);
1700 
1701 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1702 		return -EACCES;
1703 
1704 	if (attr->ia_valid & ATTR_FILE)
1705 		return fuse_do_setattr(inode, attr, attr->ia_file);
1706 	else
1707 		return fuse_do_setattr(inode, attr, NULL);
1708 }
1709 
1710 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1711 			struct kstat *stat)
1712 {
1713 	struct inode *inode = d_inode(entry);
1714 	struct fuse_conn *fc = get_fuse_conn(inode);
1715 
1716 	if (!fuse_allow_current_process(fc))
1717 		return -EACCES;
1718 
1719 	return fuse_update_attributes(inode, stat, NULL, NULL);
1720 }
1721 
1722 static int fuse_setxattr(struct dentry *unused, struct inode *inode,
1723 			 const char *name, const void *value,
1724 			 size_t size, int flags)
1725 {
1726 	struct fuse_conn *fc = get_fuse_conn(inode);
1727 	FUSE_ARGS(args);
1728 	struct fuse_setxattr_in inarg;
1729 	int err;
1730 
1731 	if (fc->no_setxattr)
1732 		return -EOPNOTSUPP;
1733 
1734 	memset(&inarg, 0, sizeof(inarg));
1735 	inarg.size = size;
1736 	inarg.flags = flags;
1737 	args.in.h.opcode = FUSE_SETXATTR;
1738 	args.in.h.nodeid = get_node_id(inode);
1739 	args.in.numargs = 3;
1740 	args.in.args[0].size = sizeof(inarg);
1741 	args.in.args[0].value = &inarg;
1742 	args.in.args[1].size = strlen(name) + 1;
1743 	args.in.args[1].value = name;
1744 	args.in.args[2].size = size;
1745 	args.in.args[2].value = value;
1746 	err = fuse_simple_request(fc, &args);
1747 	if (err == -ENOSYS) {
1748 		fc->no_setxattr = 1;
1749 		err = -EOPNOTSUPP;
1750 	}
1751 	if (!err) {
1752 		fuse_invalidate_attr(inode);
1753 		fuse_update_ctime(inode);
1754 	}
1755 	return err;
1756 }
1757 
1758 static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode,
1759 			     const char *name, void *value, size_t size)
1760 {
1761 	struct fuse_conn *fc = get_fuse_conn(inode);
1762 	FUSE_ARGS(args);
1763 	struct fuse_getxattr_in inarg;
1764 	struct fuse_getxattr_out outarg;
1765 	ssize_t ret;
1766 
1767 	if (fc->no_getxattr)
1768 		return -EOPNOTSUPP;
1769 
1770 	memset(&inarg, 0, sizeof(inarg));
1771 	inarg.size = size;
1772 	args.in.h.opcode = FUSE_GETXATTR;
1773 	args.in.h.nodeid = get_node_id(inode);
1774 	args.in.numargs = 2;
1775 	args.in.args[0].size = sizeof(inarg);
1776 	args.in.args[0].value = &inarg;
1777 	args.in.args[1].size = strlen(name) + 1;
1778 	args.in.args[1].value = name;
1779 	/* This is really two different operations rolled into one */
1780 	args.out.numargs = 1;
1781 	if (size) {
1782 		args.out.argvar = 1;
1783 		args.out.args[0].size = size;
1784 		args.out.args[0].value = value;
1785 	} else {
1786 		args.out.args[0].size = sizeof(outarg);
1787 		args.out.args[0].value = &outarg;
1788 	}
1789 	ret = fuse_simple_request(fc, &args);
1790 	if (!ret && !size)
1791 		ret = outarg.size;
1792 	if (ret == -ENOSYS) {
1793 		fc->no_getxattr = 1;
1794 		ret = -EOPNOTSUPP;
1795 	}
1796 	return ret;
1797 }
1798 
1799 static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1800 {
1801 	struct inode *inode = d_inode(entry);
1802 	struct fuse_conn *fc = get_fuse_conn(inode);
1803 	FUSE_ARGS(args);
1804 	struct fuse_getxattr_in inarg;
1805 	struct fuse_getxattr_out outarg;
1806 	ssize_t ret;
1807 
1808 	if (!fuse_allow_current_process(fc))
1809 		return -EACCES;
1810 
1811 	if (fc->no_listxattr)
1812 		return -EOPNOTSUPP;
1813 
1814 	memset(&inarg, 0, sizeof(inarg));
1815 	inarg.size = size;
1816 	args.in.h.opcode = FUSE_LISTXATTR;
1817 	args.in.h.nodeid = get_node_id(inode);
1818 	args.in.numargs = 1;
1819 	args.in.args[0].size = sizeof(inarg);
1820 	args.in.args[0].value = &inarg;
1821 	/* This is really two different operations rolled into one */
1822 	args.out.numargs = 1;
1823 	if (size) {
1824 		args.out.argvar = 1;
1825 		args.out.args[0].size = size;
1826 		args.out.args[0].value = list;
1827 	} else {
1828 		args.out.args[0].size = sizeof(outarg);
1829 		args.out.args[0].value = &outarg;
1830 	}
1831 	ret = fuse_simple_request(fc, &args);
1832 	if (!ret && !size)
1833 		ret = outarg.size;
1834 	if (ret == -ENOSYS) {
1835 		fc->no_listxattr = 1;
1836 		ret = -EOPNOTSUPP;
1837 	}
1838 	return ret;
1839 }
1840 
1841 static int fuse_removexattr(struct dentry *entry, const char *name)
1842 {
1843 	struct inode *inode = d_inode(entry);
1844 	struct fuse_conn *fc = get_fuse_conn(inode);
1845 	FUSE_ARGS(args);
1846 	int err;
1847 
1848 	if (fc->no_removexattr)
1849 		return -EOPNOTSUPP;
1850 
1851 	args.in.h.opcode = FUSE_REMOVEXATTR;
1852 	args.in.h.nodeid = get_node_id(inode);
1853 	args.in.numargs = 1;
1854 	args.in.args[0].size = strlen(name) + 1;
1855 	args.in.args[0].value = name;
1856 	err = fuse_simple_request(fc, &args);
1857 	if (err == -ENOSYS) {
1858 		fc->no_removexattr = 1;
1859 		err = -EOPNOTSUPP;
1860 	}
1861 	if (!err) {
1862 		fuse_invalidate_attr(inode);
1863 		fuse_update_ctime(inode);
1864 	}
1865 	return err;
1866 }
1867 
1868 static const struct inode_operations fuse_dir_inode_operations = {
1869 	.lookup		= fuse_lookup,
1870 	.mkdir		= fuse_mkdir,
1871 	.symlink	= fuse_symlink,
1872 	.unlink		= fuse_unlink,
1873 	.rmdir		= fuse_rmdir,
1874 	.rename2	= fuse_rename2,
1875 	.link		= fuse_link,
1876 	.setattr	= fuse_setattr,
1877 	.create		= fuse_create,
1878 	.atomic_open	= fuse_atomic_open,
1879 	.mknod		= fuse_mknod,
1880 	.permission	= fuse_permission,
1881 	.getattr	= fuse_getattr,
1882 	.setxattr	= fuse_setxattr,
1883 	.getxattr	= fuse_getxattr,
1884 	.listxattr	= fuse_listxattr,
1885 	.removexattr	= fuse_removexattr,
1886 };
1887 
1888 static const struct file_operations fuse_dir_operations = {
1889 	.llseek		= generic_file_llseek,
1890 	.read		= generic_read_dir,
1891 	.iterate_shared	= fuse_readdir,
1892 	.open		= fuse_dir_open,
1893 	.release	= fuse_dir_release,
1894 	.fsync		= fuse_dir_fsync,
1895 	.unlocked_ioctl	= fuse_dir_ioctl,
1896 	.compat_ioctl	= fuse_dir_compat_ioctl,
1897 };
1898 
1899 static const struct inode_operations fuse_common_inode_operations = {
1900 	.setattr	= fuse_setattr,
1901 	.permission	= fuse_permission,
1902 	.getattr	= fuse_getattr,
1903 	.setxattr	= fuse_setxattr,
1904 	.getxattr	= fuse_getxattr,
1905 	.listxattr	= fuse_listxattr,
1906 	.removexattr	= fuse_removexattr,
1907 };
1908 
1909 static const struct inode_operations fuse_symlink_inode_operations = {
1910 	.setattr	= fuse_setattr,
1911 	.get_link	= fuse_get_link,
1912 	.readlink	= generic_readlink,
1913 	.getattr	= fuse_getattr,
1914 	.setxattr	= fuse_setxattr,
1915 	.getxattr	= fuse_getxattr,
1916 	.listxattr	= fuse_listxattr,
1917 	.removexattr	= fuse_removexattr,
1918 };
1919 
1920 void fuse_init_common(struct inode *inode)
1921 {
1922 	inode->i_op = &fuse_common_inode_operations;
1923 }
1924 
1925 void fuse_init_dir(struct inode *inode)
1926 {
1927 	inode->i_op = &fuse_dir_inode_operations;
1928 	inode->i_fop = &fuse_dir_operations;
1929 }
1930 
1931 void fuse_init_symlink(struct inode *inode)
1932 {
1933 	inode->i_op = &fuse_symlink_inode_operations;
1934 }
1935