xref: /linux/fs/nfsd/vfs.c (revision 2b8232ce512105e28453f301d1510de8363bccd1)
1 #define MSNFS	/* HACK HACK */
2 /*
3  * linux/fs/nfsd/vfs.c
4  *
5  * File operations used by nfsd. Some of these have been ripped from
6  * other parts of the kernel because they weren't exported, others
7  * are partial duplicates with added or changed functionality.
8  *
9  * Note that several functions dget() the dentry upon which they want
10  * to act, most notably those that create directory entries. Response
11  * dentry's are dput()'d if necessary in the release callback.
12  * So if you notice code paths that apparently fail to dput() the
13  * dentry, don't worry--they have been taken care of.
14  *
15  * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
16  * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
17  */
18 
19 #include <linux/string.h>
20 #include <linux/time.h>
21 #include <linux/errno.h>
22 #include <linux/fs.h>
23 #include <linux/file.h>
24 #include <linux/mount.h>
25 #include <linux/major.h>
26 #include <linux/splice.h>
27 #include <linux/proc_fs.h>
28 #include <linux/stat.h>
29 #include <linux/fcntl.h>
30 #include <linux/net.h>
31 #include <linux/unistd.h>
32 #include <linux/slab.h>
33 #include <linux/pagemap.h>
34 #include <linux/in.h>
35 #include <linux/module.h>
36 #include <linux/namei.h>
37 #include <linux/vfs.h>
38 #include <linux/delay.h>
39 #include <linux/sunrpc/svc.h>
40 #include <linux/nfsd/nfsd.h>
41 #ifdef CONFIG_NFSD_V3
42 #include <linux/nfs3.h>
43 #include <linux/nfsd/xdr3.h>
44 #endif /* CONFIG_NFSD_V3 */
45 #include <linux/nfsd/nfsfh.h>
46 #include <linux/quotaops.h>
47 #include <linux/fsnotify.h>
48 #include <linux/posix_acl.h>
49 #include <linux/posix_acl_xattr.h>
50 #include <linux/xattr.h>
51 #ifdef CONFIG_NFSD_V4
52 #include <linux/nfs4.h>
53 #include <linux/nfs4_acl.h>
54 #include <linux/nfsd_idmap.h>
55 #include <linux/security.h>
56 #endif /* CONFIG_NFSD_V4 */
57 #include <linux/jhash.h>
58 
59 #include <asm/uaccess.h>
60 
61 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
62 
63 
64 /* We must ignore files (but only files) which might have mandatory
65  * locks on them because there is no way to know if the accesser has
66  * the lock.
67  */
68 #define IS_ISMNDLK(i)	(S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
69 
70 /*
71  * This is a cache of readahead params that help us choose the proper
72  * readahead strategy. Initially, we set all readahead parameters to 0
73  * and let the VFS handle things.
74  * If you increase the number of cached files very much, you'll need to
75  * add a hash table here.
76  */
77 struct raparms {
78 	struct raparms		*p_next;
79 	unsigned int		p_count;
80 	ino_t			p_ino;
81 	dev_t			p_dev;
82 	int			p_set;
83 	struct file_ra_state	p_ra;
84 	unsigned int		p_hindex;
85 };
86 
87 struct raparm_hbucket {
88 	struct raparms		*pb_head;
89 	spinlock_t		pb_lock;
90 } ____cacheline_aligned_in_smp;
91 
92 static struct raparms *		raparml;
93 #define RAPARM_HASH_BITS	4
94 #define RAPARM_HASH_SIZE	(1<<RAPARM_HASH_BITS)
95 #define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
96 static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
97 
98 /*
99  * Called from nfsd_lookup and encode_dirent. Check if we have crossed
100  * a mount point.
101  * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged,
102  *  or nfs_ok having possibly changed *dpp and *expp
103  */
104 int
105 nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
106 		        struct svc_export **expp)
107 {
108 	struct svc_export *exp = *expp, *exp2 = NULL;
109 	struct dentry *dentry = *dpp;
110 	struct vfsmount *mnt = mntget(exp->ex_mnt);
111 	struct dentry *mounts = dget(dentry);
112 	int err = 0;
113 
114 	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
115 
116 	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
117 	if (IS_ERR(exp2)) {
118 		if (PTR_ERR(exp2) != -ENOENT)
119 			err = PTR_ERR(exp2);
120 		dput(mounts);
121 		mntput(mnt);
122 		goto out;
123 	}
124 	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
125 		/* successfully crossed mount point */
126 		exp_put(exp);
127 		*expp = exp2;
128 		dput(dentry);
129 		*dpp = mounts;
130 	} else {
131 		exp_put(exp2);
132 		dput(mounts);
133 	}
134 	mntput(mnt);
135 out:
136 	return err;
137 }
138 
139 __be32
140 nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
141 		   const char *name, int len,
142 		   struct svc_export **exp_ret, struct dentry **dentry_ret)
143 {
144 	struct svc_export	*exp;
145 	struct dentry		*dparent;
146 	struct dentry		*dentry;
147 	__be32			err;
148 	int			host_err;
149 
150 	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
151 
152 	/* Obtain dentry and export. */
153 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
154 	if (err)
155 		return err;
156 
157 	dparent = fhp->fh_dentry;
158 	exp  = fhp->fh_export;
159 	exp_get(exp);
160 
161 	/* Lookup the name, but don't follow links */
162 	if (isdotent(name, len)) {
163 		if (len==1)
164 			dentry = dget(dparent);
165 		else if (dparent != exp->ex_dentry) {
166 			dentry = dget_parent(dparent);
167 		} else  if (!EX_NOHIDE(exp))
168 			dentry = dget(dparent); /* .. == . just like at / */
169 		else {
170 			/* checking mountpoint crossing is very different when stepping up */
171 			struct svc_export *exp2 = NULL;
172 			struct dentry *dp;
173 			struct vfsmount *mnt = mntget(exp->ex_mnt);
174 			dentry = dget(dparent);
175 			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
176 				;
177 			dp = dget_parent(dentry);
178 			dput(dentry);
179 			dentry = dp;
180 
181 			exp2 = rqst_exp_parent(rqstp, mnt, dentry);
182 			if (PTR_ERR(exp2) == -ENOENT) {
183 				dput(dentry);
184 				dentry = dget(dparent);
185 			} else if (IS_ERR(exp2)) {
186 				host_err = PTR_ERR(exp2);
187 				dput(dentry);
188 				mntput(mnt);
189 				goto out_nfserr;
190 			} else {
191 				exp_put(exp);
192 				exp = exp2;
193 			}
194 			mntput(mnt);
195 		}
196 	} else {
197 		fh_lock(fhp);
198 		dentry = lookup_one_len(name, dparent, len);
199 		host_err = PTR_ERR(dentry);
200 		if (IS_ERR(dentry))
201 			goto out_nfserr;
202 		/*
203 		 * check if we have crossed a mount point ...
204 		 */
205 		if (d_mountpoint(dentry)) {
206 			if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
207 				dput(dentry);
208 				goto out_nfserr;
209 			}
210 		}
211 	}
212 	*dentry_ret = dentry;
213 	*exp_ret = exp;
214 	return 0;
215 
216 out_nfserr:
217 	exp_put(exp);
218 	return nfserrno(host_err);
219 }
220 
221 /*
222  * Look up one component of a pathname.
223  * N.B. After this call _both_ fhp and resfh need an fh_put
224  *
225  * If the lookup would cross a mountpoint, and the mounted filesystem
226  * is exported to the client with NFSEXP_NOHIDE, then the lookup is
227  * accepted as it stands and the mounted directory is
228  * returned. Otherwise the covered directory is returned.
229  * NOTE: this mountpoint crossing is not supported properly by all
230  *   clients and is explicitly disallowed for NFSv3
231  *      NeilBrown <neilb@cse.unsw.edu.au>
232  */
233 __be32
234 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
235 					int len, struct svc_fh *resfh)
236 {
237 	struct svc_export	*exp;
238 	struct dentry		*dentry;
239 	__be32 err;
240 
241 	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
242 	if (err)
243 		return err;
244 	err = check_nfsd_access(exp, rqstp);
245 	if (err)
246 		goto out;
247 	/*
248 	 * Note: we compose the file handle now, but as the
249 	 * dentry may be negative, it may need to be updated.
250 	 */
251 	err = fh_compose(resfh, exp, dentry, fhp);
252 	if (!err && !dentry->d_inode)
253 		err = nfserr_noent;
254 out:
255 	dput(dentry);
256 	exp_put(exp);
257 	return err;
258 }
259 
260 
261 /*
262  * Set various file attributes.
263  * N.B. After this call fhp needs an fh_put
264  */
265 __be32
266 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
267 	     int check_guard, time_t guardtime)
268 {
269 	struct dentry	*dentry;
270 	struct inode	*inode;
271 	int		accmode = MAY_SATTR;
272 	int		ftype = 0;
273 	int		imode;
274 	__be32		err;
275 	int		host_err;
276 	int		size_change = 0;
277 
278 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
279 		accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
280 	if (iap->ia_valid & ATTR_SIZE)
281 		ftype = S_IFREG;
282 
283 	/* Get inode */
284 	err = fh_verify(rqstp, fhp, ftype, accmode);
285 	if (err)
286 		goto out;
287 
288 	dentry = fhp->fh_dentry;
289 	inode = dentry->d_inode;
290 
291 	/* Ignore any mode updates on symlinks */
292 	if (S_ISLNK(inode->i_mode))
293 		iap->ia_valid &= ~ATTR_MODE;
294 
295 	if (!iap->ia_valid)
296 		goto out;
297 
298 	/* NFSv2 does not differentiate between "set-[ac]time-to-now"
299 	 * which only requires access, and "set-[ac]time-to-X" which
300 	 * requires ownership.
301 	 * So if it looks like it might be "set both to the same time which
302 	 * is close to now", and if inode_change_ok fails, then we
303 	 * convert to "set to now" instead of "set to explicit time"
304 	 *
305 	 * We only call inode_change_ok as the last test as technically
306 	 * it is not an interface that we should be using.  It is only
307 	 * valid if the filesystem does not define it's own i_op->setattr.
308 	 */
309 #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
310 #define	MAX_TOUCH_TIME_ERROR (30*60)
311 	if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
312 	    && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec
313 	    ) {
314 	    /* Looks probable.  Now just make sure time is in the right ballpark.
315 	     * Solaris, at least, doesn't seem to care what the time request is.
316 	     * We require it be within 30 minutes of now.
317 	     */
318 	    time_t delta = iap->ia_atime.tv_sec - get_seconds();
319 	    if (delta<0) delta = -delta;
320 	    if (delta < MAX_TOUCH_TIME_ERROR &&
321 		inode_change_ok(inode, iap) != 0) {
322 		/* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
323 		 * this will cause notify_change to set these times to "now"
324 		 */
325 		iap->ia_valid &= ~BOTH_TIME_SET;
326 	    }
327 	}
328 
329 	/* The size case is special. It changes the file as well as the attributes.  */
330 	if (iap->ia_valid & ATTR_SIZE) {
331 		if (iap->ia_size < inode->i_size) {
332 			err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
333 			if (err)
334 				goto out;
335 		}
336 
337 		/*
338 		 * If we are changing the size of the file, then
339 		 * we need to break all leases.
340 		 */
341 		host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
342 		if (host_err == -EWOULDBLOCK)
343 			host_err = -ETIMEDOUT;
344 		if (host_err) /* ENOMEM or EWOULDBLOCK */
345 			goto out_nfserr;
346 
347 		host_err = get_write_access(inode);
348 		if (host_err)
349 			goto out_nfserr;
350 
351 		size_change = 1;
352 		host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
353 		if (host_err) {
354 			put_write_access(inode);
355 			goto out_nfserr;
356 		}
357 		DQUOT_INIT(inode);
358 	}
359 
360 	imode = inode->i_mode;
361 	if (iap->ia_valid & ATTR_MODE) {
362 		iap->ia_mode &= S_IALLUGO;
363 		imode = iap->ia_mode |= (imode & ~S_IALLUGO);
364 	}
365 
366 	/* Revoke setuid/setgid bit on chown/chgrp */
367 	if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
368 		iap->ia_valid |= ATTR_KILL_SUID;
369 	if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
370 		iap->ia_valid |= ATTR_KILL_SGID;
371 
372 	/* Change the attributes. */
373 
374 	iap->ia_valid |= ATTR_CTIME;
375 
376 	err = nfserr_notsync;
377 	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
378 		fh_lock(fhp);
379 		host_err = notify_change(dentry, iap);
380 		err = nfserrno(host_err);
381 		fh_unlock(fhp);
382 	}
383 	if (size_change)
384 		put_write_access(inode);
385 	if (!err)
386 		if (EX_ISSYNC(fhp->fh_export))
387 			write_inode_now(inode, 1);
388 out:
389 	return err;
390 
391 out_nfserr:
392 	err = nfserrno(host_err);
393 	goto out;
394 }
395 
396 #if defined(CONFIG_NFSD_V2_ACL) || \
397     defined(CONFIG_NFSD_V3_ACL) || \
398     defined(CONFIG_NFSD_V4)
399 static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
400 {
401 	ssize_t buflen;
402 
403 	buflen = vfs_getxattr(dentry, key, NULL, 0);
404 	if (buflen <= 0)
405 		return buflen;
406 
407 	*buf = kmalloc(buflen, GFP_KERNEL);
408 	if (!*buf)
409 		return -ENOMEM;
410 
411 	return vfs_getxattr(dentry, key, *buf, buflen);
412 }
413 #endif
414 
415 #if defined(CONFIG_NFSD_V4)
416 static int
417 set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
418 {
419 	int len;
420 	size_t buflen;
421 	char *buf = NULL;
422 	int error = 0;
423 
424 	buflen = posix_acl_xattr_size(pacl->a_count);
425 	buf = kmalloc(buflen, GFP_KERNEL);
426 	error = -ENOMEM;
427 	if (buf == NULL)
428 		goto out;
429 
430 	len = posix_acl_to_xattr(pacl, buf, buflen);
431 	if (len < 0) {
432 		error = len;
433 		goto out;
434 	}
435 
436 	error = vfs_setxattr(dentry, key, buf, len, 0);
437 out:
438 	kfree(buf);
439 	return error;
440 }
441 
442 __be32
443 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
444     struct nfs4_acl *acl)
445 {
446 	__be32 error;
447 	int host_error;
448 	struct dentry *dentry;
449 	struct inode *inode;
450 	struct posix_acl *pacl = NULL, *dpacl = NULL;
451 	unsigned int flags = 0;
452 
453 	/* Get inode */
454 	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
455 	if (error)
456 		return error;
457 
458 	dentry = fhp->fh_dentry;
459 	inode = dentry->d_inode;
460 	if (S_ISDIR(inode->i_mode))
461 		flags = NFS4_ACL_DIR;
462 
463 	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
464 	if (host_error == -EINVAL) {
465 		return nfserr_attrnotsupp;
466 	} else if (host_error < 0)
467 		goto out_nfserr;
468 
469 	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
470 	if (host_error < 0)
471 		goto out_release;
472 
473 	if (S_ISDIR(inode->i_mode))
474 		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
475 
476 out_release:
477 	posix_acl_release(pacl);
478 	posix_acl_release(dpacl);
479 out_nfserr:
480 	if (host_error == -EOPNOTSUPP)
481 		return nfserr_attrnotsupp;
482 	else
483 		return nfserrno(host_error);
484 }
485 
486 static struct posix_acl *
487 _get_posix_acl(struct dentry *dentry, char *key)
488 {
489 	void *buf = NULL;
490 	struct posix_acl *pacl = NULL;
491 	int buflen;
492 
493 	buflen = nfsd_getxattr(dentry, key, &buf);
494 	if (!buflen)
495 		buflen = -ENODATA;
496 	if (buflen <= 0)
497 		return ERR_PTR(buflen);
498 
499 	pacl = posix_acl_from_xattr(buf, buflen);
500 	kfree(buf);
501 	return pacl;
502 }
503 
504 int
505 nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
506 {
507 	struct inode *inode = dentry->d_inode;
508 	int error = 0;
509 	struct posix_acl *pacl = NULL, *dpacl = NULL;
510 	unsigned int flags = 0;
511 
512 	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
513 	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
514 		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
515 	if (IS_ERR(pacl)) {
516 		error = PTR_ERR(pacl);
517 		pacl = NULL;
518 		goto out;
519 	}
520 
521 	if (S_ISDIR(inode->i_mode)) {
522 		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
523 		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
524 			dpacl = NULL;
525 		else if (IS_ERR(dpacl)) {
526 			error = PTR_ERR(dpacl);
527 			dpacl = NULL;
528 			goto out;
529 		}
530 		flags = NFS4_ACL_DIR;
531 	}
532 
533 	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
534 	if (IS_ERR(*acl)) {
535 		error = PTR_ERR(*acl);
536 		*acl = NULL;
537 	}
538  out:
539 	posix_acl_release(pacl);
540 	posix_acl_release(dpacl);
541 	return error;
542 }
543 
544 #endif /* defined(CONFIG_NFS_V4) */
545 
546 #ifdef CONFIG_NFSD_V3
547 /*
548  * Check server access rights to a file system object
549  */
550 struct accessmap {
551 	u32		access;
552 	int		how;
553 };
554 static struct accessmap	nfs3_regaccess[] = {
555     {	NFS3_ACCESS_READ,	MAY_READ			},
556     {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
557     {	NFS3_ACCESS_MODIFY,	MAY_WRITE|MAY_TRUNC		},
558     {	NFS3_ACCESS_EXTEND,	MAY_WRITE			},
559 
560     {	0,			0				}
561 };
562 
563 static struct accessmap	nfs3_diraccess[] = {
564     {	NFS3_ACCESS_READ,	MAY_READ			},
565     {	NFS3_ACCESS_LOOKUP,	MAY_EXEC			},
566     {	NFS3_ACCESS_MODIFY,	MAY_EXEC|MAY_WRITE|MAY_TRUNC	},
567     {	NFS3_ACCESS_EXTEND,	MAY_EXEC|MAY_WRITE		},
568     {	NFS3_ACCESS_DELETE,	MAY_REMOVE			},
569 
570     {	0,			0				}
571 };
572 
573 static struct accessmap	nfs3_anyaccess[] = {
574 	/* Some clients - Solaris 2.6 at least, make an access call
575 	 * to the server to check for access for things like /dev/null
576 	 * (which really, the server doesn't care about).  So
577 	 * We provide simple access checking for them, looking
578 	 * mainly at mode bits, and we make sure to ignore read-only
579 	 * filesystem checks
580 	 */
581     {	NFS3_ACCESS_READ,	MAY_READ			},
582     {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
583     {	NFS3_ACCESS_MODIFY,	MAY_WRITE|MAY_LOCAL_ACCESS	},
584     {	NFS3_ACCESS_EXTEND,	MAY_WRITE|MAY_LOCAL_ACCESS	},
585 
586     {	0,			0				}
587 };
588 
589 __be32
590 nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
591 {
592 	struct accessmap	*map;
593 	struct svc_export	*export;
594 	struct dentry		*dentry;
595 	u32			query, result = 0, sresult = 0;
596 	__be32			error;
597 
598 	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
599 	if (error)
600 		goto out;
601 
602 	export = fhp->fh_export;
603 	dentry = fhp->fh_dentry;
604 
605 	if (S_ISREG(dentry->d_inode->i_mode))
606 		map = nfs3_regaccess;
607 	else if (S_ISDIR(dentry->d_inode->i_mode))
608 		map = nfs3_diraccess;
609 	else
610 		map = nfs3_anyaccess;
611 
612 
613 	query = *access;
614 	for  (; map->access; map++) {
615 		if (map->access & query) {
616 			__be32 err2;
617 
618 			sresult |= map->access;
619 
620 			err2 = nfsd_permission(rqstp, export, dentry, map->how);
621 			switch (err2) {
622 			case nfs_ok:
623 				result |= map->access;
624 				break;
625 
626 			/* the following error codes just mean the access was not allowed,
627 			 * rather than an error occurred */
628 			case nfserr_rofs:
629 			case nfserr_acces:
630 			case nfserr_perm:
631 				/* simply don't "or" in the access bit. */
632 				break;
633 			default:
634 				error = err2;
635 				goto out;
636 			}
637 		}
638 	}
639 	*access = result;
640 	if (supported)
641 		*supported = sresult;
642 
643  out:
644 	return error;
645 }
646 #endif /* CONFIG_NFSD_V3 */
647 
648 
649 
650 /*
651  * Open an existing file or directory.
652  * The access argument indicates the type of open (read/write/lock)
653  * N.B. After this call fhp needs an fh_put
654  */
655 __be32
656 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
657 			int access, struct file **filp)
658 {
659 	struct dentry	*dentry;
660 	struct inode	*inode;
661 	int		flags = O_RDONLY|O_LARGEFILE;
662 	__be32		err;
663 	int		host_err;
664 
665 	/*
666 	 * If we get here, then the client has already done an "open",
667 	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
668 	 * in case a chmod has now revoked permission.
669 	 */
670 	err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
671 	if (err)
672 		goto out;
673 
674 	dentry = fhp->fh_dentry;
675 	inode = dentry->d_inode;
676 
677 	/* Disallow write access to files with the append-only bit set
678 	 * or any access when mandatory locking enabled
679 	 */
680 	err = nfserr_perm;
681 	if (IS_APPEND(inode) && (access & MAY_WRITE))
682 		goto out;
683 	if (IS_ISMNDLK(inode))
684 		goto out;
685 
686 	if (!inode->i_fop)
687 		goto out;
688 
689 	/*
690 	 * Check to see if there are any leases on this file.
691 	 * This may block while leases are broken.
692 	 */
693 	host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
694 	if (host_err == -EWOULDBLOCK)
695 		host_err = -ETIMEDOUT;
696 	if (host_err) /* NOMEM or WOULDBLOCK */
697 		goto out_nfserr;
698 
699 	if (access & MAY_WRITE) {
700 		if (access & MAY_READ)
701 			flags = O_RDWR|O_LARGEFILE;
702 		else
703 			flags = O_WRONLY|O_LARGEFILE;
704 
705 		DQUOT_INIT(inode);
706 	}
707 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
708 	if (IS_ERR(*filp))
709 		host_err = PTR_ERR(*filp);
710 out_nfserr:
711 	err = nfserrno(host_err);
712 out:
713 	return err;
714 }
715 
716 /*
717  * Close a file.
718  */
719 void
720 nfsd_close(struct file *filp)
721 {
722 	fput(filp);
723 }
724 
725 /*
726  * Sync a file
727  * As this calls fsync (not fdatasync) there is no need for a write_inode
728  * after it.
729  */
730 static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
731 			      const struct file_operations *fop)
732 {
733 	struct inode *inode = dp->d_inode;
734 	int (*fsync) (struct file *, struct dentry *, int);
735 	int err;
736 
737 	err = filemap_fdatawrite(inode->i_mapping);
738 	if (err == 0 && fop && (fsync = fop->fsync))
739 		err = fsync(filp, dp, 0);
740 	if (err == 0)
741 		err = filemap_fdatawait(inode->i_mapping);
742 
743 	return err;
744 }
745 
746 
747 static int
748 nfsd_sync(struct file *filp)
749 {
750         int err;
751 	struct inode *inode = filp->f_path.dentry->d_inode;
752 	dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
753 	mutex_lock(&inode->i_mutex);
754 	err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
755 	mutex_unlock(&inode->i_mutex);
756 
757 	return err;
758 }
759 
760 int
761 nfsd_sync_dir(struct dentry *dp)
762 {
763 	return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
764 }
765 
766 /*
767  * Obtain the readahead parameters for the file
768  * specified by (dev, ino).
769  */
770 
771 static inline struct raparms *
772 nfsd_get_raparms(dev_t dev, ino_t ino)
773 {
774 	struct raparms	*ra, **rap, **frap = NULL;
775 	int depth = 0;
776 	unsigned int hash;
777 	struct raparm_hbucket *rab;
778 
779 	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
780 	rab = &raparm_hash[hash];
781 
782 	spin_lock(&rab->pb_lock);
783 	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
784 		if (ra->p_ino == ino && ra->p_dev == dev)
785 			goto found;
786 		depth++;
787 		if (ra->p_count == 0)
788 			frap = rap;
789 	}
790 	depth = nfsdstats.ra_size*11/10;
791 	if (!frap) {
792 		spin_unlock(&rab->pb_lock);
793 		return NULL;
794 	}
795 	rap = frap;
796 	ra = *frap;
797 	ra->p_dev = dev;
798 	ra->p_ino = ino;
799 	ra->p_set = 0;
800 	ra->p_hindex = hash;
801 found:
802 	if (rap != &rab->pb_head) {
803 		*rap = ra->p_next;
804 		ra->p_next   = rab->pb_head;
805 		rab->pb_head = ra;
806 	}
807 	ra->p_count++;
808 	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
809 	spin_unlock(&rab->pb_lock);
810 	return ra;
811 }
812 
813 /*
814  * Grab and keep cached pages associated with a file in the svc_rqst
815  * so that they can be passed to the network sendmsg/sendpage routines
816  * directly. They will be released after the sending has completed.
817  */
818 static int
819 nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
820 		  struct splice_desc *sd)
821 {
822 	struct svc_rqst *rqstp = sd->u.data;
823 	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
824 	struct page *page = buf->page;
825 	size_t size;
826 	int ret;
827 
828 	ret = buf->ops->confirm(pipe, buf);
829 	if (unlikely(ret))
830 		return ret;
831 
832 	size = sd->len;
833 
834 	if (rqstp->rq_res.page_len == 0) {
835 		get_page(page);
836 		put_page(*pp);
837 		*pp = page;
838 		rqstp->rq_resused++;
839 		rqstp->rq_res.page_base = buf->offset;
840 		rqstp->rq_res.page_len = size;
841 	} else if (page != pp[-1]) {
842 		get_page(page);
843 		if (*pp)
844 			put_page(*pp);
845 		*pp = page;
846 		rqstp->rq_resused++;
847 		rqstp->rq_res.page_len += size;
848 	} else
849 		rqstp->rq_res.page_len += size;
850 
851 	return size;
852 }
853 
854 static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
855 				    struct splice_desc *sd)
856 {
857 	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
858 }
859 
860 static __be32
861 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
862               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
863 {
864 	struct inode *inode;
865 	struct raparms	*ra;
866 	mm_segment_t	oldfs;
867 	__be32		err;
868 	int		host_err;
869 
870 	err = nfserr_perm;
871 	inode = file->f_path.dentry->d_inode;
872 #ifdef MSNFS
873 	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
874 		(!lock_may_read(inode, offset, *count)))
875 		goto out;
876 #endif
877 
878 	/* Get readahead parameters */
879 	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
880 
881 	if (ra && ra->p_set)
882 		file->f_ra = ra->p_ra;
883 
884 	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
885 		struct splice_desc sd = {
886 			.len		= 0,
887 			.total_len	= *count,
888 			.pos		= offset,
889 			.u.data		= rqstp,
890 		};
891 
892 		rqstp->rq_resused = 1;
893 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
894 	} else {
895 		oldfs = get_fs();
896 		set_fs(KERNEL_DS);
897 		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
898 		set_fs(oldfs);
899 	}
900 
901 	/* Write back readahead params */
902 	if (ra) {
903 		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
904 		spin_lock(&rab->pb_lock);
905 		ra->p_ra = file->f_ra;
906 		ra->p_set = 1;
907 		ra->p_count--;
908 		spin_unlock(&rab->pb_lock);
909 	}
910 
911 	if (host_err >= 0) {
912 		nfsdstats.io_read += host_err;
913 		*count = host_err;
914 		err = 0;
915 		fsnotify_access(file->f_path.dentry);
916 	} else
917 		err = nfserrno(host_err);
918 out:
919 	return err;
920 }
921 
922 static void kill_suid(struct dentry *dentry)
923 {
924 	struct iattr	ia;
925 	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
926 
927 	mutex_lock(&dentry->d_inode->i_mutex);
928 	notify_change(dentry, &ia);
929 	mutex_unlock(&dentry->d_inode->i_mutex);
930 }
931 
932 static __be32
933 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
934 				loff_t offset, struct kvec *vec, int vlen,
935 	   			unsigned long cnt, int *stablep)
936 {
937 	struct svc_export	*exp;
938 	struct dentry		*dentry;
939 	struct inode		*inode;
940 	mm_segment_t		oldfs;
941 	__be32			err = 0;
942 	int			host_err;
943 	int			stable = *stablep;
944 
945 #ifdef MSNFS
946 	err = nfserr_perm;
947 
948 	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
949 		(!lock_may_write(file->f_path.dentry->d_inode, offset, cnt)))
950 		goto out;
951 #endif
952 
953 	dentry = file->f_path.dentry;
954 	inode = dentry->d_inode;
955 	exp   = fhp->fh_export;
956 
957 	/*
958 	 * Request sync writes if
959 	 *  -	the sync export option has been set, or
960 	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
961 	 *  -   The file system doesn't support fsync().
962 	 * When gathered writes have been configured for this volume,
963 	 * flushing the data to disk is handled separately below.
964 	 */
965 
966 	if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */
967 	       stable = 2;
968 	       *stablep = 2; /* FILE_SYNC */
969 	}
970 
971 	if (!EX_ISSYNC(exp))
972 		stable = 0;
973 	if (stable && !EX_WGATHER(exp))
974 		file->f_flags |= O_SYNC;
975 
976 	/* Write the data. */
977 	oldfs = get_fs(); set_fs(KERNEL_DS);
978 	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
979 	set_fs(oldfs);
980 	if (host_err >= 0) {
981 		nfsdstats.io_write += cnt;
982 		fsnotify_modify(file->f_path.dentry);
983 	}
984 
985 	/* clear setuid/setgid flag after write */
986 	if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
987 		kill_suid(dentry);
988 
989 	if (host_err >= 0 && stable) {
990 		static ino_t	last_ino;
991 		static dev_t	last_dev;
992 
993 		/*
994 		 * Gathered writes: If another process is currently
995 		 * writing to the file, there's a high chance
996 		 * this is another nfsd (triggered by a bulk write
997 		 * from a client's biod). Rather than syncing the
998 		 * file with each write request, we sleep for 10 msec.
999 		 *
1000 		 * I don't know if this roughly approximates
1001 		 * C. Juszak's idea of gathered writes, but it's a
1002 		 * nice and simple solution (IMHO), and it seems to
1003 		 * work:-)
1004 		 */
1005 		if (EX_WGATHER(exp)) {
1006 			if (atomic_read(&inode->i_writecount) > 1
1007 			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
1008 				dprintk("nfsd: write defer %d\n", current->pid);
1009 				msleep(10);
1010 				dprintk("nfsd: write resume %d\n", current->pid);
1011 			}
1012 
1013 			if (inode->i_state & I_DIRTY) {
1014 				dprintk("nfsd: write sync %d\n", current->pid);
1015 				host_err=nfsd_sync(file);
1016 			}
1017 #if 0
1018 			wake_up(&inode->i_wait);
1019 #endif
1020 		}
1021 		last_ino = inode->i_ino;
1022 		last_dev = inode->i_sb->s_dev;
1023 	}
1024 
1025 	dprintk("nfsd: write complete host_err=%d\n", host_err);
1026 	if (host_err >= 0)
1027 		err = 0;
1028 	else
1029 		err = nfserrno(host_err);
1030 out:
1031 	return err;
1032 }
1033 
1034 /*
1035  * Read data from a file. count must contain the requested read count
1036  * on entry. On return, *count contains the number of bytes actually read.
1037  * N.B. After this call fhp needs an fh_put
1038  */
1039 __be32
1040 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1041 		loff_t offset, struct kvec *vec, int vlen,
1042 		unsigned long *count)
1043 {
1044 	__be32		err;
1045 
1046 	if (file) {
1047 		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1048 				MAY_READ|MAY_OWNER_OVERRIDE);
1049 		if (err)
1050 			goto out;
1051 		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1052 	} else {
1053 		err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
1054 		if (err)
1055 			goto out;
1056 		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1057 		nfsd_close(file);
1058 	}
1059 out:
1060 	return err;
1061 }
1062 
1063 /*
1064  * Write data to a file.
1065  * The stable flag requests synchronous writes.
1066  * N.B. After this call fhp needs an fh_put
1067  */
1068 __be32
1069 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1070 		loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
1071 		int *stablep)
1072 {
1073 	__be32			err = 0;
1074 
1075 	if (file) {
1076 		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1077 				MAY_WRITE|MAY_OWNER_OVERRIDE);
1078 		if (err)
1079 			goto out;
1080 		err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
1081 				stablep);
1082 	} else {
1083 		err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
1084 		if (err)
1085 			goto out;
1086 
1087 		if (cnt)
1088 			err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
1089 					     cnt, stablep);
1090 		nfsd_close(file);
1091 	}
1092 out:
1093 	return err;
1094 }
1095 
1096 #ifdef CONFIG_NFSD_V3
1097 /*
1098  * Commit all pending writes to stable storage.
1099  * Strictly speaking, we could sync just the indicated file region here,
1100  * but there's currently no way we can ask the VFS to do so.
1101  *
1102  * Unfortunately we cannot lock the file to make sure we return full WCC
1103  * data to the client, as locking happens lower down in the filesystem.
1104  */
1105 __be32
1106 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1107                loff_t offset, unsigned long count)
1108 {
1109 	struct file	*file;
1110 	__be32		err;
1111 
1112 	if ((u64)count > ~(u64)offset)
1113 		return nfserr_inval;
1114 
1115 	if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
1116 		return err;
1117 	if (EX_ISSYNC(fhp->fh_export)) {
1118 		if (file->f_op && file->f_op->fsync) {
1119 			err = nfserrno(nfsd_sync(file));
1120 		} else {
1121 			err = nfserr_notsupp;
1122 		}
1123 	}
1124 
1125 	nfsd_close(file);
1126 	return err;
1127 }
1128 #endif /* CONFIG_NFSD_V3 */
1129 
1130 /*
1131  * Create a file (regular, directory, device, fifo); UNIX sockets
1132  * not yet implemented.
1133  * If the response fh has been verified, the parent directory should
1134  * already be locked. Note that the parent directory is left locked.
1135  *
1136  * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
1137  */
1138 __be32
1139 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1140 		char *fname, int flen, struct iattr *iap,
1141 		int type, dev_t rdev, struct svc_fh *resfhp)
1142 {
1143 	struct dentry	*dentry, *dchild = NULL;
1144 	struct inode	*dirp;
1145 	__be32		err;
1146 	int		host_err;
1147 
1148 	err = nfserr_perm;
1149 	if (!flen)
1150 		goto out;
1151 	err = nfserr_exist;
1152 	if (isdotent(fname, flen))
1153 		goto out;
1154 
1155 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
1156 	if (err)
1157 		goto out;
1158 
1159 	dentry = fhp->fh_dentry;
1160 	dirp = dentry->d_inode;
1161 
1162 	err = nfserr_notdir;
1163 	if(!dirp->i_op || !dirp->i_op->lookup)
1164 		goto out;
1165 	/*
1166 	 * Check whether the response file handle has been verified yet.
1167 	 * If it has, the parent directory should already be locked.
1168 	 */
1169 	if (!resfhp->fh_dentry) {
1170 		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1171 		fh_lock_nested(fhp, I_MUTEX_PARENT);
1172 		dchild = lookup_one_len(fname, dentry, flen);
1173 		host_err = PTR_ERR(dchild);
1174 		if (IS_ERR(dchild))
1175 			goto out_nfserr;
1176 		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1177 		if (err)
1178 			goto out;
1179 	} else {
1180 		/* called from nfsd_proc_create */
1181 		dchild = dget(resfhp->fh_dentry);
1182 		if (!fhp->fh_locked) {
1183 			/* not actually possible */
1184 			printk(KERN_ERR
1185 				"nfsd_create: parent %s/%s not locked!\n",
1186 				dentry->d_parent->d_name.name,
1187 				dentry->d_name.name);
1188 			err = nfserr_io;
1189 			goto out;
1190 		}
1191 	}
1192 	/*
1193 	 * Make sure the child dentry is still negative ...
1194 	 */
1195 	err = nfserr_exist;
1196 	if (dchild->d_inode) {
1197 		dprintk("nfsd_create: dentry %s/%s not negative!\n",
1198 			dentry->d_name.name, dchild->d_name.name);
1199 		goto out;
1200 	}
1201 
1202 	if (!(iap->ia_valid & ATTR_MODE))
1203 		iap->ia_mode = 0;
1204 	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
1205 
1206 	/*
1207 	 * Get the dir op function pointer.
1208 	 */
1209 	err = 0;
1210 	switch (type) {
1211 	case S_IFREG:
1212 		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1213 		break;
1214 	case S_IFDIR:
1215 		host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1216 		break;
1217 	case S_IFCHR:
1218 	case S_IFBLK:
1219 	case S_IFIFO:
1220 	case S_IFSOCK:
1221 		host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1222 		break;
1223 	default:
1224 	        printk("nfsd: bad file type %o in nfsd_create\n", type);
1225 		host_err = -EINVAL;
1226 	}
1227 	if (host_err < 0)
1228 		goto out_nfserr;
1229 
1230 	if (EX_ISSYNC(fhp->fh_export)) {
1231 		err = nfserrno(nfsd_sync_dir(dentry));
1232 		write_inode_now(dchild->d_inode, 1);
1233 	}
1234 
1235 
1236 	/* Set file attributes. Mode has already been set and
1237 	 * setting uid/gid works only for root. Irix appears to
1238 	 * send along the gid when it tries to implement setgid
1239 	 * directories via NFS.
1240 	 */
1241 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
1242 		__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1243 		if (err2)
1244 			err = err2;
1245 	}
1246 	/*
1247 	 * Update the file handle to get the new inode info.
1248 	 */
1249 	if (!err)
1250 		err = fh_update(resfhp);
1251 out:
1252 	if (dchild && !IS_ERR(dchild))
1253 		dput(dchild);
1254 	return err;
1255 
1256 out_nfserr:
1257 	err = nfserrno(host_err);
1258 	goto out;
1259 }
1260 
1261 #ifdef CONFIG_NFSD_V3
1262 /*
1263  * NFSv3 version of nfsd_create
1264  */
1265 __be32
1266 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1267 		char *fname, int flen, struct iattr *iap,
1268 		struct svc_fh *resfhp, int createmode, u32 *verifier,
1269 	        int *truncp, int *created)
1270 {
1271 	struct dentry	*dentry, *dchild = NULL;
1272 	struct inode	*dirp;
1273 	__be32		err;
1274 	int		host_err;
1275 	__u32		v_mtime=0, v_atime=0;
1276 
1277 	err = nfserr_perm;
1278 	if (!flen)
1279 		goto out;
1280 	err = nfserr_exist;
1281 	if (isdotent(fname, flen))
1282 		goto out;
1283 	if (!(iap->ia_valid & ATTR_MODE))
1284 		iap->ia_mode = 0;
1285 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
1286 	if (err)
1287 		goto out;
1288 
1289 	dentry = fhp->fh_dentry;
1290 	dirp = dentry->d_inode;
1291 
1292 	/* Get all the sanity checks out of the way before
1293 	 * we lock the parent. */
1294 	err = nfserr_notdir;
1295 	if(!dirp->i_op || !dirp->i_op->lookup)
1296 		goto out;
1297 	fh_lock_nested(fhp, I_MUTEX_PARENT);
1298 
1299 	/*
1300 	 * Compose the response file handle.
1301 	 */
1302 	dchild = lookup_one_len(fname, dentry, flen);
1303 	host_err = PTR_ERR(dchild);
1304 	if (IS_ERR(dchild))
1305 		goto out_nfserr;
1306 
1307 	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1308 	if (err)
1309 		goto out;
1310 
1311 	if (createmode == NFS3_CREATE_EXCLUSIVE) {
1312 		/* solaris7 gets confused (bugid 4218508) if these have
1313 		 * the high bit set, so just clear the high bits. If this is
1314 		 * ever changed to use different attrs for storing the
1315 		 * verifier, then do_open_lookup() will also need to be fixed
1316 		 * accordingly.
1317 		 */
1318 		v_mtime = verifier[0]&0x7fffffff;
1319 		v_atime = verifier[1]&0x7fffffff;
1320 	}
1321 
1322 	if (dchild->d_inode) {
1323 		err = 0;
1324 
1325 		switch (createmode) {
1326 		case NFS3_CREATE_UNCHECKED:
1327 			if (! S_ISREG(dchild->d_inode->i_mode))
1328 				err = nfserr_exist;
1329 			else if (truncp) {
1330 				/* in nfsv4, we need to treat this case a little
1331 				 * differently.  we don't want to truncate the
1332 				 * file now; this would be wrong if the OPEN
1333 				 * fails for some other reason.  furthermore,
1334 				 * if the size is nonzero, we should ignore it
1335 				 * according to spec!
1336 				 */
1337 				*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
1338 			}
1339 			else {
1340 				iap->ia_valid &= ATTR_SIZE;
1341 				goto set_attr;
1342 			}
1343 			break;
1344 		case NFS3_CREATE_EXCLUSIVE:
1345 			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
1346 			    && dchild->d_inode->i_atime.tv_sec == v_atime
1347 			    && dchild->d_inode->i_size  == 0 )
1348 				break;
1349 			 /* fallthru */
1350 		case NFS3_CREATE_GUARDED:
1351 			err = nfserr_exist;
1352 		}
1353 		goto out;
1354 	}
1355 
1356 	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1357 	if (host_err < 0)
1358 		goto out_nfserr;
1359 	if (created)
1360 		*created = 1;
1361 
1362 	if (EX_ISSYNC(fhp->fh_export)) {
1363 		err = nfserrno(nfsd_sync_dir(dentry));
1364 		/* setattr will sync the child (or not) */
1365 	}
1366 
1367 	if (createmode == NFS3_CREATE_EXCLUSIVE) {
1368 		/* Cram the verifier into atime/mtime */
1369 		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
1370 			| ATTR_MTIME_SET|ATTR_ATIME_SET;
1371 		/* XXX someone who knows this better please fix it for nsec */
1372 		iap->ia_mtime.tv_sec = v_mtime;
1373 		iap->ia_atime.tv_sec = v_atime;
1374 		iap->ia_mtime.tv_nsec = 0;
1375 		iap->ia_atime.tv_nsec = 0;
1376 	}
1377 
1378 	/* Set file attributes.
1379 	 * Irix appears to send along the gid when it tries to
1380 	 * implement setgid directories via NFS. Clear out all that cruft.
1381 	 */
1382  set_attr:
1383 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
1384  		__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1385 		if (err2)
1386 			err = err2;
1387 	}
1388 
1389 	/*
1390 	 * Update the filehandle to get the new inode info.
1391 	 */
1392 	if (!err)
1393 		err = fh_update(resfhp);
1394 
1395  out:
1396 	fh_unlock(fhp);
1397 	if (dchild && !IS_ERR(dchild))
1398 		dput(dchild);
1399  	return err;
1400 
1401  out_nfserr:
1402 	err = nfserrno(host_err);
1403 	goto out;
1404 }
1405 #endif /* CONFIG_NFSD_V3 */
1406 
1407 /*
1408  * Read a symlink. On entry, *lenp must contain the maximum path length that
1409  * fits into the buffer. On return, it contains the true length.
1410  * N.B. After this call fhp needs an fh_put
1411  */
1412 __be32
1413 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1414 {
1415 	struct dentry	*dentry;
1416 	struct inode	*inode;
1417 	mm_segment_t	oldfs;
1418 	__be32		err;
1419 	int		host_err;
1420 
1421 	err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
1422 	if (err)
1423 		goto out;
1424 
1425 	dentry = fhp->fh_dentry;
1426 	inode = dentry->d_inode;
1427 
1428 	err = nfserr_inval;
1429 	if (!inode->i_op || !inode->i_op->readlink)
1430 		goto out;
1431 
1432 	touch_atime(fhp->fh_export->ex_mnt, dentry);
1433 	/* N.B. Why does this call need a get_fs()??
1434 	 * Remove the set_fs and watch the fireworks:-) --okir
1435 	 */
1436 
1437 	oldfs = get_fs(); set_fs(KERNEL_DS);
1438 	host_err = inode->i_op->readlink(dentry, buf, *lenp);
1439 	set_fs(oldfs);
1440 
1441 	if (host_err < 0)
1442 		goto out_nfserr;
1443 	*lenp = host_err;
1444 	err = 0;
1445 out:
1446 	return err;
1447 
1448 out_nfserr:
1449 	err = nfserrno(host_err);
1450 	goto out;
1451 }
1452 
1453 /*
1454  * Create a symlink and look up its inode
1455  * N.B. After this call _both_ fhp and resfhp need an fh_put
1456  */
1457 __be32
1458 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1459 				char *fname, int flen,
1460 				char *path,  int plen,
1461 				struct svc_fh *resfhp,
1462 				struct iattr *iap)
1463 {
1464 	struct dentry	*dentry, *dnew;
1465 	__be32		err, cerr;
1466 	int		host_err;
1467 	umode_t		mode;
1468 
1469 	err = nfserr_noent;
1470 	if (!flen || !plen)
1471 		goto out;
1472 	err = nfserr_exist;
1473 	if (isdotent(fname, flen))
1474 		goto out;
1475 
1476 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
1477 	if (err)
1478 		goto out;
1479 	fh_lock(fhp);
1480 	dentry = fhp->fh_dentry;
1481 	dnew = lookup_one_len(fname, dentry, flen);
1482 	host_err = PTR_ERR(dnew);
1483 	if (IS_ERR(dnew))
1484 		goto out_nfserr;
1485 
1486 	mode = S_IALLUGO;
1487 	/* Only the MODE ATTRibute is even vaguely meaningful */
1488 	if (iap && (iap->ia_valid & ATTR_MODE))
1489 		mode = iap->ia_mode & S_IALLUGO;
1490 
1491 	if (unlikely(path[plen] != 0)) {
1492 		char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1493 		if (path_alloced == NULL)
1494 			host_err = -ENOMEM;
1495 		else {
1496 			strncpy(path_alloced, path, plen);
1497 			path_alloced[plen] = 0;
1498 			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
1499 			kfree(path_alloced);
1500 		}
1501 	} else
1502 		host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
1503 
1504 	if (!host_err) {
1505 		if (EX_ISSYNC(fhp->fh_export))
1506 			host_err = nfsd_sync_dir(dentry);
1507 	}
1508 	err = nfserrno(host_err);
1509 	fh_unlock(fhp);
1510 
1511 	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1512 	dput(dnew);
1513 	if (err==0) err = cerr;
1514 out:
1515 	return err;
1516 
1517 out_nfserr:
1518 	err = nfserrno(host_err);
1519 	goto out;
1520 }
1521 
1522 /*
1523  * Create a hardlink
1524  * N.B. After this call _both_ ffhp and tfhp need an fh_put
1525  */
1526 __be32
1527 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1528 				char *name, int len, struct svc_fh *tfhp)
1529 {
1530 	struct dentry	*ddir, *dnew, *dold;
1531 	struct inode	*dirp, *dest;
1532 	__be32		err;
1533 	int		host_err;
1534 
1535 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
1536 	if (err)
1537 		goto out;
1538 	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
1539 	if (err)
1540 		goto out;
1541 
1542 	err = nfserr_perm;
1543 	if (!len)
1544 		goto out;
1545 	err = nfserr_exist;
1546 	if (isdotent(name, len))
1547 		goto out;
1548 
1549 	fh_lock_nested(ffhp, I_MUTEX_PARENT);
1550 	ddir = ffhp->fh_dentry;
1551 	dirp = ddir->d_inode;
1552 
1553 	dnew = lookup_one_len(name, ddir, len);
1554 	host_err = PTR_ERR(dnew);
1555 	if (IS_ERR(dnew))
1556 		goto out_nfserr;
1557 
1558 	dold = tfhp->fh_dentry;
1559 	dest = dold->d_inode;
1560 
1561 	host_err = vfs_link(dold, dirp, dnew);
1562 	if (!host_err) {
1563 		if (EX_ISSYNC(ffhp->fh_export)) {
1564 			err = nfserrno(nfsd_sync_dir(ddir));
1565 			write_inode_now(dest, 1);
1566 		}
1567 		err = 0;
1568 	} else {
1569 		if (host_err == -EXDEV && rqstp->rq_vers == 2)
1570 			err = nfserr_acces;
1571 		else
1572 			err = nfserrno(host_err);
1573 	}
1574 
1575 	dput(dnew);
1576 out_unlock:
1577 	fh_unlock(ffhp);
1578 out:
1579 	return err;
1580 
1581 out_nfserr:
1582 	err = nfserrno(host_err);
1583 	goto out_unlock;
1584 }
1585 
1586 /*
1587  * Rename a file
1588  * N.B. After this call _both_ ffhp and tfhp need an fh_put
1589  */
1590 __be32
1591 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1592 			    struct svc_fh *tfhp, char *tname, int tlen)
1593 {
1594 	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
1595 	struct inode	*fdir, *tdir;
1596 	__be32		err;
1597 	int		host_err;
1598 
1599 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
1600 	if (err)
1601 		goto out;
1602 	err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
1603 	if (err)
1604 		goto out;
1605 
1606 	fdentry = ffhp->fh_dentry;
1607 	fdir = fdentry->d_inode;
1608 
1609 	tdentry = tfhp->fh_dentry;
1610 	tdir = tdentry->d_inode;
1611 
1612 	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
1613 	if (ffhp->fh_export != tfhp->fh_export)
1614 		goto out;
1615 
1616 	err = nfserr_perm;
1617 	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1618 		goto out;
1619 
1620 	/* cannot use fh_lock as we need deadlock protective ordering
1621 	 * so do it by hand */
1622 	trap = lock_rename(tdentry, fdentry);
1623 	ffhp->fh_locked = tfhp->fh_locked = 1;
1624 	fill_pre_wcc(ffhp);
1625 	fill_pre_wcc(tfhp);
1626 
1627 	odentry = lookup_one_len(fname, fdentry, flen);
1628 	host_err = PTR_ERR(odentry);
1629 	if (IS_ERR(odentry))
1630 		goto out_nfserr;
1631 
1632 	host_err = -ENOENT;
1633 	if (!odentry->d_inode)
1634 		goto out_dput_old;
1635 	host_err = -EINVAL;
1636 	if (odentry == trap)
1637 		goto out_dput_old;
1638 
1639 	ndentry = lookup_one_len(tname, tdentry, tlen);
1640 	host_err = PTR_ERR(ndentry);
1641 	if (IS_ERR(ndentry))
1642 		goto out_dput_old;
1643 	host_err = -ENOTEMPTY;
1644 	if (ndentry == trap)
1645 		goto out_dput_new;
1646 
1647 #ifdef MSNFS
1648 	if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1649 		((atomic_read(&odentry->d_count) > 1)
1650 		 || (atomic_read(&ndentry->d_count) > 1))) {
1651 			host_err = -EPERM;
1652 	} else
1653 #endif
1654 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1655 	if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
1656 		host_err = nfsd_sync_dir(tdentry);
1657 		if (!host_err)
1658 			host_err = nfsd_sync_dir(fdentry);
1659 	}
1660 
1661  out_dput_new:
1662 	dput(ndentry);
1663  out_dput_old:
1664 	dput(odentry);
1665  out_nfserr:
1666 	err = nfserrno(host_err);
1667 
1668 	/* we cannot reply on fh_unlock on the two filehandles,
1669 	 * as that would do the wrong thing if the two directories
1670 	 * were the same, so again we do it by hand
1671 	 */
1672 	fill_post_wcc(ffhp);
1673 	fill_post_wcc(tfhp);
1674 	unlock_rename(tdentry, fdentry);
1675 	ffhp->fh_locked = tfhp->fh_locked = 0;
1676 
1677 out:
1678 	return err;
1679 }
1680 
1681 /*
1682  * Unlink a file or directory
1683  * N.B. After this call fhp needs an fh_put
1684  */
1685 __be32
1686 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1687 				char *fname, int flen)
1688 {
1689 	struct dentry	*dentry, *rdentry;
1690 	struct inode	*dirp;
1691 	__be32		err;
1692 	int		host_err;
1693 
1694 	err = nfserr_acces;
1695 	if (!flen || isdotent(fname, flen))
1696 		goto out;
1697 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
1698 	if (err)
1699 		goto out;
1700 
1701 	fh_lock_nested(fhp, I_MUTEX_PARENT);
1702 	dentry = fhp->fh_dentry;
1703 	dirp = dentry->d_inode;
1704 
1705 	rdentry = lookup_one_len(fname, dentry, flen);
1706 	host_err = PTR_ERR(rdentry);
1707 	if (IS_ERR(rdentry))
1708 		goto out_nfserr;
1709 
1710 	if (!rdentry->d_inode) {
1711 		dput(rdentry);
1712 		err = nfserr_noent;
1713 		goto out;
1714 	}
1715 
1716 	if (!type)
1717 		type = rdentry->d_inode->i_mode & S_IFMT;
1718 
1719 	if (type != S_IFDIR) { /* It's UNLINK */
1720 #ifdef MSNFS
1721 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1722 			(atomic_read(&rdentry->d_count) > 1)) {
1723 			host_err = -EPERM;
1724 		} else
1725 #endif
1726 		host_err = vfs_unlink(dirp, rdentry);
1727 	} else { /* It's RMDIR */
1728 		host_err = vfs_rmdir(dirp, rdentry);
1729 	}
1730 
1731 	dput(rdentry);
1732 
1733 	if (host_err)
1734 		goto out_nfserr;
1735 	if (EX_ISSYNC(fhp->fh_export))
1736 		host_err = nfsd_sync_dir(dentry);
1737 
1738 out_nfserr:
1739 	err = nfserrno(host_err);
1740 out:
1741 	return err;
1742 }
1743 
1744 /*
1745  * Read entries from a directory.
1746  * The  NFSv3/4 verifier we ignore for now.
1747  */
1748 __be32
1749 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1750 	     struct readdir_cd *cdp, filldir_t func)
1751 {
1752 	__be32		err;
1753 	int 		host_err;
1754 	struct file	*file;
1755 	loff_t		offset = *offsetp;
1756 
1757 	err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
1758 	if (err)
1759 		goto out;
1760 
1761 	offset = vfs_llseek(file, offset, 0);
1762 	if (offset < 0) {
1763 		err = nfserrno((int)offset);
1764 		goto out_close;
1765 	}
1766 
1767 	/*
1768 	 * Read the directory entries. This silly loop is necessary because
1769 	 * readdir() is not guaranteed to fill up the entire buffer, but
1770 	 * may choose to do less.
1771 	 */
1772 
1773 	do {
1774 		cdp->err = nfserr_eof; /* will be cleared on successful read */
1775 		host_err = vfs_readdir(file, func, cdp);
1776 	} while (host_err >=0 && cdp->err == nfs_ok);
1777 	if (host_err)
1778 		err = nfserrno(host_err);
1779 	else
1780 		err = cdp->err;
1781 	*offsetp = vfs_llseek(file, 0, 1);
1782 
1783 	if (err == nfserr_eof || err == nfserr_toosmall)
1784 		err = nfs_ok; /* can still be found in ->err */
1785 out_close:
1786 	nfsd_close(file);
1787 out:
1788 	return err;
1789 }
1790 
1791 /*
1792  * Get file system stats
1793  * N.B. After this call fhp needs an fh_put
1794  */
1795 __be32
1796 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
1797 {
1798 	__be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP);
1799 	if (!err && vfs_statfs(fhp->fh_dentry,stat))
1800 		err = nfserr_io;
1801 	return err;
1802 }
1803 
1804 static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp)
1805 {
1806 	return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
1807 }
1808 
1809 /*
1810  * Check for a user's access permissions to this inode.
1811  */
1812 __be32
1813 nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1814 					struct dentry *dentry, int acc)
1815 {
1816 	struct inode	*inode = dentry->d_inode;
1817 	int		err;
1818 
1819 	if (acc == MAY_NOP)
1820 		return 0;
1821 #if 0
1822 	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
1823 		acc,
1824 		(acc & MAY_READ)?	" read"  : "",
1825 		(acc & MAY_WRITE)?	" write" : "",
1826 		(acc & MAY_EXEC)?	" exec"  : "",
1827 		(acc & MAY_SATTR)?	" sattr" : "",
1828 		(acc & MAY_TRUNC)?	" trunc" : "",
1829 		(acc & MAY_LOCK)?	" lock"  : "",
1830 		(acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
1831 		inode->i_mode,
1832 		IS_IMMUTABLE(inode)?	" immut" : "",
1833 		IS_APPEND(inode)?	" append" : "",
1834 		IS_RDONLY(inode)?	" ro" : "");
1835 	dprintk("      owner %d/%d user %d/%d\n",
1836 		inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
1837 #endif
1838 
1839 	/* Normally we reject any write/sattr etc access on a read-only file
1840 	 * system.  But if it is IRIX doing check on write-access for a
1841 	 * device special file, we ignore rofs.
1842 	 */
1843 	if (!(acc & MAY_LOCAL_ACCESS))
1844 		if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
1845 			if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode))
1846 				return nfserr_rofs;
1847 			if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
1848 				return nfserr_perm;
1849 		}
1850 	if ((acc & MAY_TRUNC) && IS_APPEND(inode))
1851 		return nfserr_perm;
1852 
1853 	if (acc & MAY_LOCK) {
1854 		/* If we cannot rely on authentication in NLM requests,
1855 		 * just allow locks, otherwise require read permission, or
1856 		 * ownership
1857 		 */
1858 		if (exp->ex_flags & NFSEXP_NOAUTHNLM)
1859 			return 0;
1860 		else
1861 			acc = MAY_READ | MAY_OWNER_OVERRIDE;
1862 	}
1863 	/*
1864 	 * The file owner always gets access permission for accesses that
1865 	 * would normally be checked at open time. This is to make
1866 	 * file access work even when the client has done a fchmod(fd, 0).
1867 	 *
1868 	 * However, `cp foo bar' should fail nevertheless when bar is
1869 	 * readonly. A sensible way to do this might be to reject all
1870 	 * attempts to truncate a read-only file, because a creat() call
1871 	 * always implies file truncation.
1872 	 * ... but this isn't really fair.  A process may reasonably call
1873 	 * ftruncate on an open file descriptor on a file with perm 000.
1874 	 * We must trust the client to do permission checking - using "ACCESS"
1875 	 * with NFSv3.
1876 	 */
1877 	if ((acc & MAY_OWNER_OVERRIDE) &&
1878 	    inode->i_uid == current->fsuid)
1879 		return 0;
1880 
1881 	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
1882 
1883 	/* Allow read access to binaries even when mode 111 */
1884 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
1885 	    acc == (MAY_READ | MAY_OWNER_OVERRIDE))
1886 		err = permission(inode, MAY_EXEC, NULL);
1887 
1888 	return err? nfserrno(err) : 0;
1889 }
1890 
1891 void
1892 nfsd_racache_shutdown(void)
1893 {
1894 	if (!raparml)
1895 		return;
1896 	dprintk("nfsd: freeing readahead buffers.\n");
1897 	kfree(raparml);
1898 	raparml = NULL;
1899 }
1900 /*
1901  * Initialize readahead param cache
1902  */
1903 int
1904 nfsd_racache_init(int cache_size)
1905 {
1906 	int	i;
1907 	int	j = 0;
1908 	int	nperbucket;
1909 
1910 
1911 	if (raparml)
1912 		return 0;
1913 	if (cache_size < 2*RAPARM_HASH_SIZE)
1914 		cache_size = 2*RAPARM_HASH_SIZE;
1915 	raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL);
1916 
1917 	if (!raparml) {
1918 		printk(KERN_WARNING
1919 			"nfsd: Could not allocate memory read-ahead cache.\n");
1920 		return -ENOMEM;
1921 	}
1922 
1923 	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
1924 	for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
1925 		raparm_hash[i].pb_head = NULL;
1926 		spin_lock_init(&raparm_hash[i].pb_lock);
1927 	}
1928 	nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
1929 	for (i = 0; i < cache_size - 1; i++) {
1930 		if (i % nperbucket == 0)
1931 			raparm_hash[j++].pb_head = raparml + i;
1932 		if (i % nperbucket < nperbucket-1)
1933 			raparml[i].p_next = raparml + i + 1;
1934 	}
1935 
1936 	nfsdstats.ra_size = cache_size;
1937 	return 0;
1938 }
1939 
1940 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
1941 struct posix_acl *
1942 nfsd_get_posix_acl(struct svc_fh *fhp, int type)
1943 {
1944 	struct inode *inode = fhp->fh_dentry->d_inode;
1945 	char *name;
1946 	void *value = NULL;
1947 	ssize_t size;
1948 	struct posix_acl *acl;
1949 
1950 	if (!IS_POSIXACL(inode))
1951 		return ERR_PTR(-EOPNOTSUPP);
1952 
1953 	switch (type) {
1954 	case ACL_TYPE_ACCESS:
1955 		name = POSIX_ACL_XATTR_ACCESS;
1956 		break;
1957 	case ACL_TYPE_DEFAULT:
1958 		name = POSIX_ACL_XATTR_DEFAULT;
1959 		break;
1960 	default:
1961 		return ERR_PTR(-EOPNOTSUPP);
1962 	}
1963 
1964 	size = nfsd_getxattr(fhp->fh_dentry, name, &value);
1965 	if (size < 0)
1966 		return ERR_PTR(size);
1967 
1968 	acl = posix_acl_from_xattr(value, size);
1969 	kfree(value);
1970 	return acl;
1971 }
1972 
1973 int
1974 nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
1975 {
1976 	struct inode *inode = fhp->fh_dentry->d_inode;
1977 	char *name;
1978 	void *value = NULL;
1979 	size_t size;
1980 	int error;
1981 
1982 	if (!IS_POSIXACL(inode) || !inode->i_op ||
1983 	    !inode->i_op->setxattr || !inode->i_op->removexattr)
1984 		return -EOPNOTSUPP;
1985 	switch(type) {
1986 		case ACL_TYPE_ACCESS:
1987 			name = POSIX_ACL_XATTR_ACCESS;
1988 			break;
1989 		case ACL_TYPE_DEFAULT:
1990 			name = POSIX_ACL_XATTR_DEFAULT;
1991 			break;
1992 		default:
1993 			return -EOPNOTSUPP;
1994 	}
1995 
1996 	if (acl && acl->a_count) {
1997 		size = posix_acl_xattr_size(acl->a_count);
1998 		value = kmalloc(size, GFP_KERNEL);
1999 		if (!value)
2000 			return -ENOMEM;
2001 		error = posix_acl_to_xattr(acl, value, size);
2002 		if (error < 0)
2003 			goto getout;
2004 		size = error;
2005 	} else
2006 		size = 0;
2007 
2008 	if (size)
2009 		error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
2010 	else {
2011 		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
2012 			error = 0;
2013 		else {
2014 			error = vfs_removexattr(fhp->fh_dentry, name);
2015 			if (error == -ENODATA)
2016 				error = 0;
2017 		}
2018 	}
2019 
2020 getout:
2021 	kfree(value);
2022 	return error;
2023 }
2024 #endif  /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
2025