xref: /freebsd/sys/fs/p9fs/p9fs_vnops.c (revision 7937bfbc0ca53fe7cdd0d54414f9296e273a518e)
1 /*
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9 *	notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *	notice, this list of conditions and the following disclaimer in the
12  *	documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /* This file contains VFS file ops for the 9P protocol.
28  * This makes the upper layer of the p9fs driver. These functions interact
29  * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
30  * the user file operations are handled here.
31  */
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/buf.h>
36 #include <sys/dirent.h>
37 #include <sys/fcntl.h>
38 #include <sys/namei.h>
39 #include <sys/priv.h>
40 #include <sys/stat.h>
41 #include <sys/vnode.h>
42 #include <sys/rwlock.h>
43 #include <sys/vmmeter.h>
44 
45 #include <vm/vm.h>
46 #include <vm/vm_extern.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_pager.h>
50 #include <vm/vnode_pager.h>
51 
52 #include <fs/p9fs/p9_client.h>
53 #include <fs/p9fs/p9_debug.h>
54 #include <fs/p9fs/p9fs.h>
55 #include <fs/p9fs/p9fs_proto.h>
56 
57 /* File permissions. */
58 #define IEXEC		0000100 /* Executable. */
59 #define IWRITE		0000200 /* Writeable. */
60 #define IREAD		0000400 /* Readable. */
61 #define ISVTX		0001000 /* Sticky bit. */
62 #define ISGID		0002000 /* Set-gid. */
63 #define ISUID		0004000 /* Set-uid. */
64 
65 static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
66 extern uma_zone_t p9fs_io_buffer_zone;
67 extern uma_zone_t p9fs_getattr_zone;
68 extern uma_zone_t p9fs_setattr_zone;
69 extern uma_zone_t p9fs_pbuf_zone;
70 /* For the root vnode's vnops. */
71 struct vop_vector p9fs_vnops;
72 
73 static uint32_t p9fs_unix2p9_mode(uint32_t mode);
74 
75 static void
76 p9fs_itimes(struct vnode *vp)
77 {
78 	struct p9fs_node *node;
79 	struct timespec ts;
80 	struct p9fs_inode *inode;
81 
82 	node = P9FS_VTON(vp);
83 	inode = &node->inode;
84 
85 	vfs_timestamp(&ts);
86 	inode->i_mtime = ts.tv_sec;
87 }
88 
89 /*
90  * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
91  * The cleanup includes invalidating all cache entries for the vnode,
92  * destroying the vobject, removing vnode from hashlist, removing p9fs node
93  * from the list of session p9fs nodes, and disposing of the p9fs node.
94  * Basically it is doing a reverse of what a create/vget does.
95  */
96 void
97 p9fs_cleanup(struct p9fs_node *np)
98 {
99 	struct vnode *vp;
100 	struct p9fs_session *vses;
101 
102 	if (np == NULL)
103 		return;
104 
105 	vp = P9FS_NTOV(np);
106 	vses = np->p9fs_ses;
107 
108 	/* Remove the vnode from hash list if vnode is not already deleted */
109 	if ((np->flags & P9FS_NODE_DELETED) == 0)
110 		vfs_hash_remove(vp);
111 
112 	P9FS_LOCK(vses);
113 	if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
114 		np->flags &= ~P9FS_NODE_IN_SESSION;
115 		STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
116 	} else {
117 		P9FS_UNLOCK(vses);
118 		return;
119 	}
120 	P9FS_UNLOCK(vses);
121 
122 	/* Invalidate all entries to a particular vnode. */
123 	cache_purge(vp);
124 
125 	/* Destroy the vm object and flush associated pages. */
126 	vnode_destroy_vobject(vp);
127 
128 	/* Remove all the FID */
129 	p9fs_fid_remove_all(np, FALSE);
130 
131 	/* Dispose all node knowledge.*/
132 	p9fs_destroy_node(&np);
133 }
134 
135 /*
136  * Reclaim VOP is defined to be called for every vnode. This starts off
137  * the cleanup by clunking(remove the fid on the server) and calls
138  * p9fs_cleanup to free all the resources allocated for p9fs node.
139  */
140 static int
141 p9fs_reclaim(struct vop_reclaim_args *ap)
142 {
143 	struct vnode *vp;
144 	struct p9fs_node *np;
145 
146 	vp = ap->a_vp;
147 	np = P9FS_VTON(vp);
148 
149 	P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
150 	p9fs_cleanup(np);
151 
152 	return (0);
153 }
154 
155 /*
156  * recycle vnodes which are no longer referenced i.e, their usecount is zero
157  */
158 static int
159 p9fs_inactive(struct vop_inactive_args *ap)
160 {
161 	struct vnode *vp;
162 	struct p9fs_node *np;
163 
164 	vp = ap->a_vp;
165 	np = P9FS_VTON(vp);
166 
167 	P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
168 	if (np->flags & P9FS_NODE_DELETED)
169 		vrecycle(vp);
170 
171 	return (0);
172 }
173 
174 struct p9fs_lookup_alloc_arg {
175 	struct componentname *cnp;
176 	struct p9fs_node *dnp;
177 	struct p9_fid *newfid;
178 };
179 
180 /* Callback for vn_get_ino */
181 static int
182 p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
183 {
184 	struct p9fs_lookup_alloc_arg *p9aa = arg;
185 
186 	return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
187 		p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
188 }
189 
190 /*
191  * p9fs_lookup is called for every component name that is being searched for.
192  *
193  * I. If component is found on the server, we look for the in-memory
194  *    repesentation(vnode) of this component in namecache.
195  *    A. If the node is found in the namecache, we check is the vnode is still
196  *	 valid.
197  *	 1. If it is still valid, return vnode.
198  *	 2. If it is not valid, we remove this vnode from the name cache and
199  *	    create a new vnode for the component and return that vnode.
200  *    B. If the vnode is not found in the namecache, we look for it in the
201  *       hash list.
202  *       1. If the vnode is in the hash list, we check if the vnode is still
203  *	    valid.
204  *	    a. If it is still valid, we add that vnode to the namecache for
205  *	       future lookups and return the vnode.
206  *	    b. If it is not valid, create a new vnode and p9fs node,
207  *	       initialize them and return the vnode.
208  *	 2. If the vnode is not found in the hash list, we create a new vnode
209  *	    and p9fs node, initialize them and return the vnode.
210  * II. If the component is not found on the server, an error code is returned.
211  *     A. For the creation case, we return EJUSTRETURN so VFS can handle it.
212  *     B. For all other cases, ENOENT is returned.
213  */
214 static int
215 p9fs_lookup(struct vop_lookup_args *ap)
216 {
217 	struct vnode *dvp;
218 	struct vnode **vpp, *vp;
219 	struct componentname *cnp;
220 	struct p9fs_node *dnp; /*dir p9_node */
221 	struct p9fs_node *np;
222 	struct p9fs_session *vses;
223 	struct mount *mp; /* Get the mount point */
224 	struct p9_fid *dvfid, *newfid;
225 	int error;
226 	struct vattr vattr;
227 	int flags;
228 	char tmpchr;
229 
230 	dvp = ap->a_dvp;
231 	vpp = ap->a_vpp;
232 	cnp = ap->a_cnp;
233 	dnp = P9FS_VTON(dvp);
234 	error = 0;
235 	flags = cnp->cn_flags;
236 	*vpp = NULLVP;
237 
238 	if (dnp == NULL)
239 		return (ENOENT);
240 
241 	if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) {
242 		vref(dvp);
243 		*vpp = dvp;
244 		return (0);
245 	}
246 
247 	vses = dnp->p9fs_ses;
248 	mp = vses->p9fs_mount;
249 
250 	/* Do the cache part ourselves */
251 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
252 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
253 		return (EROFS);
254 
255 	if (dvp->v_type != VDIR)
256 		return (ENOTDIR);
257 
258 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
259 	if (error)
260 		return (error);
261 
262 	/* Do the directory walk on host to check if file exist */
263 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
264 	if (error)
265 		return (error);
266 
267 	/*
268 	 * Save the character present at namelen in nameptr string and
269 	 * null terminate the character to get the search name for p9_dir_walk
270 	 * This is done to handle when lookup is for "a" and component
271 	 * name contains a/b/c
272 	 */
273 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
274 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
275 
276 	/*
277 	 * If the client_walk fails, it means the file looking for doesnt exist.
278 	 * Create the file is the flags are set or just return the error
279 	 */
280 	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
281 
282 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
283 
284 	if (error != 0 || newfid == NULL) {
285 		/* Clunk the newfid if it is not NULL */
286 		if (newfid != NULL)
287 			p9_client_clunk(newfid);
288 
289 		if (error != ENOENT)
290 			return (error);
291 
292 		/* The requested file was not found. */
293 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
294 		    (flags & ISLASTCN)) {
295 
296 			if (mp->mnt_flag & MNT_RDONLY)
297 				return (EROFS);
298 
299 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
300 			    curthread);
301 			if (!error) {
302 				return (EJUSTRETURN);
303 			}
304 		}
305 		return (error);
306 	}
307 
308 	/* Look for the entry in the component cache*/
309 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
310 	if (error > 0 && error != ENOENT) {
311 		P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
312 		goto out;
313 	}
314 
315 	if (error == -1) {
316 		vp = *vpp;
317 		/* Check if the entry in cache is stale or not */
318 		if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
319 		    ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
320 			goto out;
321 		}
322 		/*
323 		 * This case, we have an error coming from getattr,
324 		 * act accordingly.
325 		 */
326 		cache_purge(vp);
327 		if (dvp != vp)
328 			vput(vp);
329 		else
330 			vrele(vp);
331 
332 		*vpp = NULLVP;
333 	} else if (error == ENOENT) {
334 		if (VN_IS_DOOMED(dvp))
335 			goto out;
336 		if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
337 			error = ENOENT;
338 			goto out;
339 		}
340 		cache_purge_negative(dvp);
341 	}
342 	/* Reset values */
343 	error = 0;
344 	vp = NULLVP;
345 
346 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
347 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
348 
349 	/*
350 	 * Looks like we have found an entry. Now take care of all other cases.
351 	 */
352 	if (flags & ISDOTDOT) {
353 		struct p9fs_lookup_alloc_arg p9aa;
354 		p9aa.cnp = cnp;
355 		p9aa.dnp = dnp;
356 		p9aa.newfid = newfid;
357 		error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
358 		if (error)
359 			goto out;
360 		*vpp = vp;
361 	} else {
362 		/*
363 		 * client_walk is equivalent to searching a component name in a
364 		 * directory(fid) here. If new fid is returned, we have found an
365 		 * entry for this component name so, go and create the rest of
366 		 * the vnode infra(vget_common) for the returned newfid.
367 		 */
368 		if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
369 		    && (flags & ISLASTCN)) {
370 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
371 			    curthread);
372 			if (error)
373 				goto out;
374 
375 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
376 			    dnp, newfid, &vp, cnp->cn_nameptr);
377 			if (error)
378 				goto out;
379 
380 			*vpp = vp;
381 			np = P9FS_VTON(vp);
382 			if ((dnp->inode.i_mode & ISVTX) &&
383 			    cnp->cn_cred->cr_uid != 0 &&
384 			    cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
385 			    cnp->cn_cred->cr_uid != np->inode.n_uid) {
386 				vput(*vpp);
387 				*vpp = NULL;
388 				cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
389 				return (EPERM);
390 			}
391 		} else {
392 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
393 			    dnp, newfid, &vp, cnp->cn_nameptr);
394 			if (error)
395 				goto out;
396 			*vpp = vp;
397 		}
398 	}
399 
400 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
401 
402 	/* Store the result the cache if MAKEENTRY is specified in flags */
403 	if ((cnp->cn_flags & MAKEENTRY) != 0)
404 		cache_enter(dvp, *vpp, cnp);
405 	return (error);
406 out:
407 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
408 	p9_client_clunk(newfid);
409 	return (error);
410 }
411 
412 /*
413  * Common creation function for file/directory with respective flags. We first
414  * open the parent directory in order to create the file under it. For this,
415  * as 9P protocol suggests, we need to call client_walk to create the open fid.
416  * Once we have the open fid, the file_create function creates the direntry with
417  * the name and perm specified under the parent dir. If this succeeds (an entry
418  * is created for the new file on the server), we create our metadata for this
419  * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
420  * fid of the parent directory.
421  */
422 static int
423 create_common(struct p9fs_node *dnp, struct componentname *cnp,
424     char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
425 {
426 	char tmpchr;
427 	struct p9_fid *dvfid, *ofid, *newfid;
428 	struct p9fs_session *vses;
429 	struct mount *mp;
430 	int error;
431 
432 	P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);
433 
434 	vses = dnp->p9fs_ses;
435 	mp = vses->p9fs_mount;
436 	newfid = NULL;
437 	error = 0;
438 
439 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
440 	if (error != 0)
441 		return (error);
442 
443 	/* Clone the directory fid to create the new file */
444 	ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
445 	if (error != 0)
446 		return (error);
447 
448 	/*
449 	 * Save the character present at namelen in nameptr string and
450 	 * null terminate the character to get the search name for p9_dir_walk
451 	 */
452 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
453 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
454 
455 	error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
456 		    extension);
457 	if (error != 0) {
458 		P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
459 		goto out;
460 	}
461 
462 	/* If its not hardlink only then do the walk, else we are done. */
463 	if (!(perm & P9PROTO_DMLINK)) {
464 		/*
465 		 * Do the lookup part and add the vnode, p9fs node. Note that vpp
466 		 * is filled in here.
467 		 */
468 		newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
469 		if (newfid != NULL) {
470 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
471 			    dnp, newfid, vpp, cnp->cn_nameptr);
472 			if (error != 0)
473 				goto out;
474 		} else {
475 			/* Not found return NOENTRY.*/
476 			goto out;
477 		}
478 
479 		if ((cnp->cn_flags & MAKEENTRY) != 0)
480 			cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
481 	}
482 	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
483 	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
484 	/* Clunk the open ofid. */
485 	if (ofid != NULL)
486 		(void)p9_client_clunk(ofid);
487 
488 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
489 	return (0);
490 out:
491 	if (ofid != NULL)
492 		(void)p9_client_clunk(ofid);
493 
494 	if (newfid != NULL)
495 		(void)p9_client_clunk(newfid);
496 
497 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
498 	return (error);
499 }
500 
501 /*
502  * This is the main file creation VOP. Make the permissions of the new
503  * file and call the create_common common code to complete the create.
504  */
505 static int
506 p9fs_create(struct vop_create_args *ap)
507 {
508 	struct vnode *dvp;
509 	struct vnode **vpp;
510 	struct componentname *cnp;
511 	uint32_t mode;
512 	struct p9fs_node *dnp;
513 	struct p9fs_inode *dinode;
514 	uint32_t perm;
515 	int ret;
516 
517 	dvp = ap->a_dvp;
518 	vpp = ap->a_vpp;
519 	cnp = ap->a_cnp;
520 	dnp = P9FS_VTON(dvp);
521 	dinode = &dnp->inode;
522 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
523 	perm = p9fs_unix2p9_mode(mode);
524 
525 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
526 
527 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
528 	if (ret == 0) {
529 		P9FS_INCR_LINKS(dinode);
530 	}
531 
532 	return (ret);
533 }
534 
535 /*
536  * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
537  * and call the create_common common code to complete the create.
538  */
539 static int
540 p9fs_mkdir(struct vop_mkdir_args *ap)
541 {
542 	struct vnode *dvp;
543 	struct vnode **vpp;
544 	struct componentname *cnp;
545 	uint32_t mode;
546 	struct p9fs_node *dnp;
547 	struct p9fs_inode *dinode;
548 	uint32_t perm;
549 	int ret;
550 
551 	dvp = ap->a_dvp;
552 	vpp = ap->a_vpp;
553 	cnp = ap->a_cnp;
554 	dnp = P9FS_VTON(dvp);
555 	dinode = &dnp->inode;
556 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
557 	perm = p9fs_unix2p9_mode(mode | S_IFDIR);
558 
559 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
560 
561 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
562 	if (ret == 0)
563 		P9FS_INCR_LINKS(dinode);
564 
565 	return (ret);
566 }
567 
568 /*
569  * p9fs_mknod is the main node creation vop. Make the permissions of the new node
570  * and call the create_common common code to complete the create.
571  */
572 static int
573 p9fs_mknod(struct vop_mknod_args *ap)
574 {
575 	struct vnode *dvp;
576 	struct vnode **vpp;
577 	struct componentname *cnp;
578 	uint32_t mode;
579 	struct p9fs_node *dnp;
580 	struct p9fs_inode *dinode;
581 	uint32_t perm;
582 	int ret;
583 
584 	dvp = ap->a_dvp;
585 	vpp = ap->a_vpp;
586 	cnp = ap->a_cnp;
587 	dnp = P9FS_VTON(dvp);
588 	dinode = &dnp->inode;
589 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
590 	perm = p9fs_unix2p9_mode(mode);
591 
592 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
593 
594 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
595 	if (ret == 0) {
596 		P9FS_INCR_LINKS(dinode);
597 	}
598 
599 	return (ret);
600 }
601 
602 /* Convert open mode permissions to P9 */
603 static int
604 p9fs_uflags_mode(int uflags, int extended)
605 {
606 	uint32_t ret;
607 
608 	/* Convert first to O flags.*/
609 	uflags = OFLAGS(uflags);
610 
611 	switch (uflags & 3) {
612 
613 	case O_RDONLY:
614 	    ret = P9PROTO_OREAD;
615 	    break;
616 
617 	case O_WRONLY:
618 	    ret = P9PROTO_OWRITE;
619 	    break;
620 
621 	case O_RDWR:
622 	    ret = P9PROTO_ORDWR;
623 	    break;
624 	}
625 
626 	if (extended) {
627 		if (uflags & O_EXCL)
628 			ret |= P9PROTO_OEXCL;
629 
630 		if (uflags & O_APPEND)
631 			ret |= P9PROTO_OAPPEND;
632 	}
633 
634 	return (ret);
635 }
636 
637 /*
638  * This is the main open VOP for every file open. If the file is already
639  * open, then increment and return. If there is no open fid for this file,
640  * there needs to be a client_walk which creates a new open fid for this file.
641  * Once we have a open fid, call the open on this file with the mode creating
642  * the vobject.
643  */
644 static int
645 p9fs_open(struct vop_open_args *ap)
646 {
647 	int error;
648 	struct vnode *vp;
649 	struct p9fs_node *np;
650 	struct p9fs_session *vses;
651 	struct p9_fid *vofid, *vfid;
652 	size_t filesize;
653 	uint32_t mode;
654 
655 	error = 0;
656 	vp = ap->a_vp;
657 	np = P9FS_VTON(vp);
658 	vses = np->p9fs_ses;
659 
660 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
661 
662 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
663 		return (EOPNOTSUPP);
664 
665 	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
666 	if (error != 0)
667 		return (error);
668 
669 	ASSERT_VOP_LOCKED(vp, __func__);
670 	/*
671 	 * Invalidate the pages of the vm_object cache if the file is modified
672 	 * based on the flag set in reload stats
673 	 */
674 	if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
675 		error = vinvalbuf(vp, 0, 0, 0);
676 		if (error != 0)
677 			return (error);
678 		np->flags &= ~P9FS_NODE_MODIFIED;
679 	}
680 
681 	vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
682 	if (error != 0)
683 		return (error);
684 
685 	/*
686 	 * Translate kernel fflags to 9p mode
687 	 */
688 	mode = p9fs_uflags_mode(ap->a_mode, 1);
689 
690 	/*
691 	 * Search the fid in vofid_list for current user. If found increase the open
692 	 * count and return. If not found clone a new fid and open the file using
693 	 * that cloned fid.
694 	 */
695 	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
696 	if (vofid != NULL) {
697 		vofid->v_opens++;
698 		return (0);
699 	} else {
700 		/*vofid is the open fid for this file.*/
701 		vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
702 		if (error != 0)
703 			return (error);
704 	}
705 
706 	error = p9_client_open(vofid, mode);
707 	if (error != 0)
708 		p9_client_clunk(vofid);
709 	else {
710 		vofid->v_opens = 1;
711 		filesize = np->inode.i_size;
712 		vnode_create_vobject(vp, filesize, ap->a_td);
713 		p9fs_fid_add(np, vofid, VOFID);
714 	}
715 
716 	return (error);
717 }
718 
719 /*
720  * Close the open references. Just reduce the open count on vofid and return.
721  * Let clunking of VOFID happen in p9fs_reclaim.
722  */
723 static int
724 p9fs_close(struct vop_close_args *ap)
725 {
726 	struct vnode *vp;
727 	struct p9fs_node *np;
728 	struct p9fs_session *vses;
729 	struct p9_fid *vofid;
730 	int error;
731 
732 	vp = ap->a_vp;
733 	np = P9FS_VTON(vp);
734 
735 	if (np == NULL)
736 		return (0);
737 
738 	vses = np->p9fs_ses;
739 	error = 0;
740 
741 	P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);
742 
743 	/*
744 	 * Translate kernel fflags to 9p mode
745 	 */
746 	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
747 	    p9fs_uflags_mode(ap->a_fflag, 1), &error);
748 	if (vofid == NULL)
749 		return (0);
750 
751 	vofid->v_opens--;
752 
753 	return (0);
754 }
755 
756 /* Helper routine for checking if fileops are possible on this file */
757 static int
758 p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
759 {
760 
761 	/* Check if we are allowed to write */
762 	switch (vap->va_type) {
763 	case VDIR:
764 	case VLNK:
765 	case VREG:
766 		/*
767 		 * Normal nodes: check if we're on a read-only mounted
768 		 * file system and bail out if we're trying to write.
769 		 */
770 		if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
771 			return (EROFS);
772 		break;
773 	case VBLK:
774 	case VCHR:
775 	case VSOCK:
776 	case VFIFO:
777 		/*
778 		 * Special nodes: even on read-only mounted file systems
779 		 * these are allowed to be written to if permissions allow.
780 		 */
781 		break;
782 	default:
783 		/* No idea what this is */
784 		return (EINVAL);
785 	}
786 
787 	return (0);
788 }
789 
790 /* Check the access permissions of the file. */
791 static int
792 p9fs_access(struct vop_access_args *ap)
793 {
794 	struct vnode *vp;
795 	accmode_t accmode;
796 	struct ucred *cred;
797 	struct vattr vap;
798 	int error;
799 
800 	vp = ap->a_vp;
801 	accmode = ap->a_accmode;
802 	cred = ap->a_cred;
803 
804 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
805 
806 	/* make sure getattr is working correctly and is defined.*/
807 	error = VOP_GETATTR(vp, &vap, cred);
808 	if (error != 0)
809 		return (error);
810 
811 	error = p9fs_check_possible(vp, &vap, accmode);
812 	if (error != 0)
813 		return (error);
814 
815 	/* Call the Generic Access check in VOPS*/
816 	error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
817 	    cred);
818 
819 
820 	return (error);
821 }
822 
823 /*
824  * Reload the file stats from the server and update the inode structure present
825  * in p9fs node.
826  */
827 int
828 p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
829 {
830 	struct p9_stat_dotl *stat;
831 	int error;
832 	struct p9fs_node *node;
833 	struct p9fs_session *vses;
834 	struct p9_fid *vfid;
835 
836 	error = 0;
837 	node = P9FS_VTON(vp);
838 	vses = node->p9fs_ses;
839 
840 	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
841 	if (vfid == NULL) {
842 		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
843 		if (error)
844 			return (error);
845 	}
846 
847 	stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);
848 
849 	error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
850 	if (error != 0) {
851 		P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
852 		goto out;
853 	}
854 
855 	/* Init the vnode with the disk info */
856 	p9fs_stat_vnode_dotl(stat, vp);
857 out:
858 	if (stat != NULL) {
859 		uma_zfree(p9fs_getattr_zone, stat);
860 	}
861 
862 	return (error);
863 }
864 
865 /*
866  * Read the current inode values into the vap attr. We reload the stats from
867  * the server.
868  */
869 static int
870 p9fs_getattr_dotl(struct vop_getattr_args *ap)
871 {
872 	struct vnode *vp;
873 	struct vattr *vap;
874 	struct p9fs_node *node;
875 	struct p9fs_inode *inode;
876 	int error;
877 
878 	vp = ap->a_vp;
879 	vap = ap->a_vap;
880 	node = P9FS_VTON(vp);
881 
882 	if (node == NULL)
883 		return (ENOENT);
884 
885 	inode = &node->inode;
886 
887 	P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));
888 
889 	/* Reload our stats once to get the right values.*/
890 	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
891 	if (error != 0) {
892 		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
893 		return (error);
894 	}
895 
896 	/* Basic info */
897 	VATTR_NULL(vap);
898 
899 	vap->va_atime.tv_sec = inode->i_atime;
900 	vap->va_mtime.tv_sec = inode->i_mtime;
901 	vap->va_ctime.tv_sec = inode->i_ctime;
902 	vap->va_atime.tv_nsec = inode->i_atime_nsec;
903 	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
904 	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
905 	vap->va_type = IFTOVT(inode->i_mode);
906 	vap->va_mode = inode->i_mode;
907 	vap->va_uid = inode->n_uid;
908 	vap->va_gid = inode->n_gid;
909 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
910 	vap->va_size = inode->i_size;
911 	vap->va_nlink = inode->i_links_count;
912 	vap->va_blocksize = inode->blksize;
913 	vap->va_fileid = inode->i_qid_path;
914 	vap->va_flags = inode->i_flags;
915 	vap->va_gen = inode->gen;
916 	vap->va_filerev = inode->data_version;
917 	vap->va_vaflags = 0;
918 	vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;
919 
920 	return (0);
921 }
922 
923 /* Convert a standard FreeBSD permission to P9. */
924 static uint32_t
925 p9fs_unix2p9_mode(uint32_t mode)
926 {
927 	uint32_t res;
928 
929 	res = mode & 0777;
930 	if (S_ISDIR(mode))
931 		res |= P9PROTO_DMDIR;
932 	if (S_ISSOCK(mode))
933 		res |= P9PROTO_DMSOCKET;
934 	if (S_ISLNK(mode))
935 		res |= P9PROTO_DMSYMLINK;
936 	if (S_ISFIFO(mode))
937 		res |= P9PROTO_DMNAMEDPIPE;
938 	if ((mode & S_ISUID) == S_ISUID)
939 		res |= P9PROTO_DMSETUID;
940 	if ((mode & S_ISGID) == S_ISGID)
941 		res |= P9PROTO_DMSETGID;
942 	if ((mode & S_ISVTX) == S_ISVTX)
943 		res |= P9PROTO_DMSETVTX;
944 
945 	return (res);
946 }
947 
948 /* Update inode with the stats read from server.(9P2000.L version) */
949 int
950 p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
951 {
952 	struct p9fs_node *np;
953 	struct p9fs_inode *inode;
954 
955 	np = P9FS_VTON(vp);
956 	inode = &np->inode;
957 
958 	ASSERT_VOP_LOCKED(vp, __func__);
959 	/* Update the pager size if file size changes on host */
960 	if (inode->i_size != stat->st_size) {
961 		inode->i_size = stat->st_size;
962 		if (vp->v_type == VREG)
963 			vnode_pager_setsize(vp, inode->i_size);
964 	}
965 
966 	inode->i_mtime = stat->st_mtime_sec;
967 	inode->i_atime = stat->st_atime_sec;
968 	inode->i_ctime = stat->st_ctime_sec;
969 	inode->i_mtime_nsec = stat->st_mtime_nsec;
970 	inode->i_atime_nsec = stat->st_atime_nsec;
971 	inode->i_ctime_nsec = stat->st_ctime_nsec;
972 	inode->n_uid = stat->st_uid;
973 	inode->n_gid = stat->st_gid;
974 	inode->i_mode = stat->st_mode;
975 	vp->v_type = IFTOVT(inode->i_mode);
976 	inode->i_links_count = stat->st_nlink;
977 	inode->blksize = stat->st_blksize;
978 	inode->blocks = stat->st_blocks;
979 	inode->gen = stat->st_gen;
980 	inode->data_version = stat->st_data_version;
981 
982 	ASSERT_VOP_LOCKED(vp, __func__);
983 	/* Setting a flag if file changes based on qid version */
984 	if (np->vqid.qid_version != stat->qid.version)
985 		np->flags |= P9FS_NODE_MODIFIED;
986 	memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));
987 
988 	return (0);
989 }
990 
991 /*
992  * Write the current in memory inode stats into persistent stats structure
993  * to write to the server(for linux version).
994  */
995 static int
996 p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
997 {
998 	p9attr->size = inode->i_size;
999 	p9attr->mode = inode->i_mode;
1000 	p9attr->uid = inode->n_uid;
1001 	p9attr->gid = inode->n_gid;
1002 	p9attr->atime_sec = inode->i_atime;
1003 	p9attr->atime_nsec = inode->i_atime_nsec;
1004 	p9attr->mtime_sec = inode->i_mtime;
1005 	p9attr->mtime_nsec = inode->i_mtime_nsec;
1006 
1007 	return (0);
1008 }
1009 
1010 /*
1011  * Modify the ownership of a file whenever the chown is called on the
1012  * file.
1013  */
1014 static int
1015 p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
1016     struct thread *td)
1017 {
1018 	struct p9fs_node *np;
1019 	struct p9fs_inode *inode;
1020 	uid_t ouid;
1021 	gid_t ogid;
1022 	int error;
1023 
1024 	np = P9FS_VTON(vp);
1025 	inode = &np->inode;
1026 
1027 	if (uid == (uid_t)VNOVAL)
1028 		uid = inode->n_uid;
1029 	if (gid == (gid_t)VNOVAL)
1030 		gid = inode->n_gid;
1031 	/*
1032 	 * To modify the ownership of a file, must possess VADMIN for that
1033 	 * file.
1034 	 */
1035 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
1036 		return (error);
1037 	/*
1038 	 * To change the owner of a file, or change the group of a file to a
1039 	 * group of which we are not a member, the caller must have
1040 	 * privilege.
1041 	 */
1042 	if (((uid != inode->n_uid && uid != cred->cr_uid) ||
1043 	    (gid != inode->n_gid && !groupmember(gid, cred))) &&
1044 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
1045 		return (error);
1046 
1047 	ogid = inode->n_gid;
1048 	ouid = inode->n_uid;
1049 
1050 	inode->n_gid = gid;
1051 	inode->n_uid = uid;
1052 
1053 	if ((inode->i_mode & (ISUID | ISGID)) &&
1054 	    (ouid != uid || ogid != gid)) {
1055 
1056 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
1057 			inode->i_mode &= ~(ISUID | ISGID);
1058 	}
1059 	P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);
1060 
1061 	return (0);
1062 }
1063 
1064 /*
1065  * Update the in memory inode with all chmod new permissions/mode. Typically a
1066  * setattr is called to update it to server.
1067  */
1068 static int
1069 p9fs_chmod(struct vnode *vp, uint32_t  mode, struct ucred *cred, struct thread *td)
1070 {
1071 	struct p9fs_node *np;
1072 	struct p9fs_inode *inode;
1073 	uint32_t nmode;
1074 	int error;
1075 
1076 	np = P9FS_VTON(vp);
1077 	inode = &np->inode;
1078 
1079 	P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n",  __func__, vp, mode, cred, td);
1080 	/*
1081 	 * To modify the permissions on a file, must possess VADMIN
1082 	 * for that file.
1083 	 */
1084 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
1085 		return (error);
1086 
1087 	/*
1088 	 * Privileged processes may set the sticky bit on non-directories,
1089 	 * as well as set the setgid bit on a file with a group that the
1090 	 * process is not a member of. Both of these are allowed in
1091 	 * jail(8).
1092 	 */
1093 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
1094 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
1095 			return (EFTYPE);
1096 	}
1097 	if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
1098 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
1099 		if (error != 0)
1100 			return (error);
1101 	}
1102 
1103 	/*
1104 	 * Deny setting setuid if we are not the file owner.
1105 	 */
1106 	if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
1107 		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
1108 		if (error != 0)
1109 			return (error);
1110 	}
1111 	nmode = inode->i_mode;
1112 	nmode &= ~ALLPERMS;
1113 	nmode |= (mode & ALLPERMS);
1114 	inode->i_mode = nmode;
1115 
1116 	P9_DEBUG(VOPS, "%s: to mode %x  %d \n ", __func__, nmode, error);
1117 
1118 	return (error);
1119 }
1120 
1121 /*
1122  * Set the attributes of a file referenced by fid. A valid bitmask is sent
1123  * in request selecting which fields to set
1124  */
1125 static int
1126 p9fs_setattr_dotl(struct vop_setattr_args *ap)
1127 {
1128 	struct vnode *vp;
1129 	struct vattr *vap;
1130 	struct p9fs_node *node;
1131 	struct p9fs_inode *inode;
1132 	struct ucred *cred;
1133 	struct thread *td;
1134 	struct p9_iattr_dotl *p9attr;
1135 	struct p9fs_session *vses;
1136 	struct p9_fid *vfid;
1137 	uint64_t oldfilesize;
1138 	int error;
1139 
1140 	vp = ap->a_vp;
1141 	vap = ap->a_vap;
1142 	node = P9FS_VTON(vp);
1143 	inode = &node->inode;
1144 	cred = ap->a_cred;
1145 	td = curthread;
1146 	vses = node->p9fs_ses;
1147 	error = 0;
1148 
1149 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
1150 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
1151 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
1152 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
1153 		P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
1154 		return (EINVAL);
1155 	}
1156 	/* Disallow write attempts on read only filesystem */
1157 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1158 		return (EROFS);
1159 
1160 	/* Setting of flags is not supported */
1161 	if (vap->va_flags != VNOVAL)
1162 		return (EOPNOTSUPP);
1163 
1164 	/* Allocate p9attr struct */
1165 	p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
1166 	if (p9attr == NULL)
1167 		return (ENOMEM);
1168 
1169 	/* Check if we need to change the ownership of the file*/
1170 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1171 		P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
1172 		    vp, td, vap->va_uid, vap->va_gid);
1173 
1174 		error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
1175 		p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
1176 			P9PROTO_SETATTR_MODE;
1177 		if (error)
1178 			goto out;
1179 	}
1180 
1181 	/* Check for mode changes */
1182 	if (vap->va_mode != (mode_t)VNOVAL) {
1183 		P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
1184 		    vap->va_mode);
1185 
1186 		error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
1187 		p9attr->valid |= P9PROTO_SETATTR_MODE;
1188 		if (error)
1189 			goto out;
1190 	}
1191 
1192 	/* Update the size of the file and update mtime */
1193 	if (vap->va_size != (uint64_t)VNOVAL) {
1194 		P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
1195 		    vp, td, (uintmax_t)vap->va_size);
1196 		switch (vp->v_type) {
1197 			case VDIR:
1198 				error = EISDIR;
1199 				goto out;
1200 			case VLNK:
1201 			case VREG:
1202 				/* Invalidate cached pages of vp */
1203 				error = vinvalbuf(vp, 0, 0, 0);
1204 				if (error)
1205 					goto out;
1206 				oldfilesize = inode->i_size;
1207 				inode->i_size = vap->va_size;
1208 				/* Update the p9fs_inode time */
1209 				p9fs_itimes(vp);
1210 				p9attr->valid |= P9PROTO_SETATTR_SIZE |
1211 				    P9PROTO_SETATTR_ATIME |
1212 				    P9PROTO_SETATTR_MTIME |
1213 				    P9PROTO_SETATTR_ATIME_SET |
1214 				    P9PROTO_SETATTR_MTIME_SET ;
1215 				break;
1216 			default:
1217 				goto out;
1218 		}
1219 	} else if (vap->va_atime.tv_sec != VNOVAL ||
1220 		    vap->va_mtime.tv_sec != VNOVAL) {
1221 		P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
1222 		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
1223 		    (uintmax_t)vap->va_mtime.tv_sec);
1224 		/* Update the p9fs_inode times */
1225 		p9fs_itimes(vp);
1226 		p9attr->valid |= P9PROTO_SETATTR_ATIME |
1227 			P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
1228 			P9PROTO_SETATTR_MTIME_SET;
1229 	}
1230 
1231 	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
1232 	if (vfid == NULL) {
1233 		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
1234 		if (error)
1235 			goto out;
1236 	}
1237 
1238 	/* Write the inode structure values into p9attr */
1239 	p9fs_inode_to_iattr(inode, p9attr);
1240 	error = p9_client_setattr(vfid, p9attr);
1241 	if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
1242 		if (error)
1243 			inode->i_size = oldfilesize;
1244 		else
1245 			vnode_pager_setsize(vp, inode->i_size);
1246 	}
1247 out:
1248 	if (p9attr) {
1249 		uma_zfree(p9fs_setattr_zone, p9attr);
1250 	}
1251 	P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
1252 	return (error);
1253 }
1254 
1255 struct open_fid_state {
1256 	struct p9_fid *vofid;
1257 	int fflags;
1258 	int opened;
1259 };
1260 
1261 /*
1262  * TODO: change this to take P9PROTO_* mode and avoid routing through
1263  * VOP_OPEN, factoring out implementation of p9fs_open.
1264  */
1265 static int
1266 p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
1267 {
1268 	struct p9fs_node *np;
1269 	struct p9fs_session *vses;
1270 	struct p9_fid *vofid;
1271 	int mode = p9fs_uflags_mode(fflags, TRUE);
1272 	int error = 0;
1273 
1274 	statep->opened = FALSE;
1275 
1276 	np = P9FS_VTON(vp);
1277 	vses = np->p9fs_ses;
1278 	vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1279 	if (vofid == NULL) {
1280 		error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
1281 		if (error) {
1282 			return (error);
1283 		}
1284 		vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1285 		if (vofid == NULL) {
1286 			return (EBADF);
1287 		}
1288 		statep->fflags = fflags;
1289 		statep->opened = TRUE;
1290 	}
1291 	statep->vofid = vofid;
1292 	return (0);
1293 }
1294 
1295 static void
1296 p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
1297 {
1298 	if (statep->opened) {
1299 		(void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
1300 	}
1301 }
1302 
1303 /*
1304  * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
1305  * need to copy data into. As long as resid is greater than zero, we call
1306  * client_read to read data from offset(offset into the file) in the open fid
1307  * for the file into the I/O buffer. The data is read into the user data buffer.
1308  */
1309 static int
1310 p9fs_read(struct vop_read_args *ap)
1311 {
1312 	struct vnode *vp;
1313 	struct uio *uio;
1314 	struct p9fs_node *np;
1315 	uint64_t offset;
1316 	int64_t ret;
1317 	uint64_t resid;
1318 	uint32_t count;
1319 	int error;
1320 	char *io_buffer = NULL;
1321 	uint64_t filesize;
1322 	struct open_fid_state ostate;
1323 
1324 	vp = ap->a_vp;
1325 	uio = ap->a_uio;
1326 	np = P9FS_VTON(vp);
1327 	error = 0;
1328 
1329 	if (vp->v_type == VCHR || vp->v_type == VBLK)
1330 		return (EOPNOTSUPP);
1331 	if (vp->v_type != VREG)
1332 		return (EISDIR);
1333 	if (uio->uio_resid == 0)
1334 		return (0);
1335 	if (uio->uio_offset < 0)
1336 		return (EINVAL);
1337 
1338 	error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
1339 	if (error)
1340 		return (error);
1341 
1342 	/* where in the file are we to start reading */
1343 	offset = uio->uio_offset;
1344 	filesize = np->inode.i_size;
1345 	if (uio->uio_offset >= filesize)
1346 		goto out;
1347 
1348 	P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
1349 	    __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);
1350 
1351 	/* Work with a local buffer from the pool for this vop */
1352 
1353 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1354 	while ((resid = uio->uio_resid) > 0) {
1355 		if (offset >= filesize)
1356 			break;
1357 		count = MIN(filesize - uio->uio_offset , resid);
1358 		if (count == 0)
1359 			break;
1360 
1361 		/* Copy count bytes into the uio */
1362 		ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
1363 		/*
1364 		 * This is the only place in the entire p9fs where we check the
1365 		 * error for < 0 as p9_client_read/write return the number of
1366 		 * bytes instead of an error code. In this case if ret is < 0,
1367 		 * it means there is an IO error.
1368 		 */
1369 		if (ret < 0) {
1370 			error = -ret;
1371 			goto out;
1372 		}
1373 		error = uiomove(io_buffer, ret, uio);
1374 		if (error != 0)
1375 			goto out;
1376 
1377 		offset += ret;
1378 	}
1379 	uio->uio_offset = offset;
1380 out:
1381 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1382 	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1383 
1384 	return (error);
1385 }
1386 
1387 /*
1388  * The user buffer contains the data to be written. This data is copied first
1389  * from uio into I/O buffer. This I/O  buffer is used to do the client_write to
1390  * the fid of the file starting from the offset given upto count bytes. The
1391  * number of bytes written is returned to the caller.
1392  */
1393 static int
1394 p9fs_write(struct vop_write_args *ap)
1395 {
1396 	struct vnode *vp;
1397 	struct uio *uio;
1398 	struct p9fs_node *np;
1399 	uint64_t off, offset;
1400 	int64_t ret;
1401 	uint64_t resid, bytes_written;
1402 	uint32_t count;
1403 	int error, ioflag;
1404 	uint64_t file_size;
1405 	char *io_buffer = NULL;
1406 	struct open_fid_state ostate;
1407 
1408 	vp = ap->a_vp;
1409 	uio = ap->a_uio;
1410 	np = P9FS_VTON(vp);
1411 	error = 0;
1412 	ioflag = ap->a_ioflag;
1413 
1414 	error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
1415 	if (error)
1416 		return (error);
1417 
1418 	P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
1419 	    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1420 
1421 	if (uio->uio_offset < 0) {
1422 		error = EINVAL;
1423 		goto out;
1424 	}
1425 	if (uio->uio_resid == 0)
1426 		goto out;
1427 
1428 	file_size = np->inode.i_size;
1429 
1430 	switch (vp->v_type) {
1431 	case VREG:
1432 		if (ioflag & IO_APPEND)
1433 			uio->uio_offset = file_size;
1434 		break;
1435 	case VDIR:
1436 		return (EISDIR);
1437 	case VLNK:
1438 		break;
1439 	default:
1440 		panic("%s: bad file type vp: %p", __func__, vp);
1441 	}
1442 
1443 	resid = uio->uio_resid;
1444 	offset = uio->uio_offset;
1445 	bytes_written = 0;
1446 	error = 0;
1447 
1448 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1449 	while ((resid = uio->uio_resid) > 0) {
1450                 off = 0;
1451 		count = MIN(resid, P9FS_IOUNIT);
1452 		error = uiomove(io_buffer, count, uio);
1453 
1454 		if (error != 0) {
1455 			P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
1456 			goto out;
1457 		}
1458 
1459 		/* While count still exists, keep writing.*/
1460 		while (count > 0) {
1461 			/* Copy count bytes from the uio */
1462 			ret = p9_client_write(ostate.vofid, offset, count,
1463                                 io_buffer + off);
1464 			if (ret < 0) {
1465 				if (bytes_written == 0) {
1466 					error = -ret;
1467 					goto out;
1468 				} else {
1469 					break;
1470 				}
1471 			}
1472 			P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
1473 			    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1474 
1475                         off += ret;
1476 			offset += ret;
1477 			bytes_written += ret;
1478 			count -= ret;
1479 		}
1480 	}
1481 	/* Update the fields in the node to reflect the change*/
1482 	if (file_size < uio->uio_offset + uio->uio_resid) {
1483 		np->inode.i_size = uio->uio_offset + uio->uio_resid;
1484 		vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
1485 	}
1486 out:
1487 	if (io_buffer)
1488 		uma_zfree(p9fs_io_buffer_zone, io_buffer);
1489 	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1490 
1491 	return (error);
1492 }
1493 
1494 /*
1495  * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
1496  * client_remove op to send messages to remove the node's fid on the server.
1497  * After that, does a node metadata cleanup on client side.
1498  */
1499 static int
1500 remove_common(struct p9fs_node *np, struct ucred *cred)
1501 {
1502 	int error;
1503 	struct p9fs_session *vses;
1504 	struct vnode *vp;
1505 	struct p9_fid *vfid;
1506 
1507 	error = 0;
1508 	vses = np->p9fs_ses;
1509 	vp = P9FS_NTOV(np);
1510 
1511 	vfid = p9fs_get_fid(vses->clnt, np, cred, VFID, -1, &error);
1512 	if (error != 0)
1513 		return (error);
1514 
1515 	error = p9_client_remove(vfid);
1516 	if (error != 0)
1517 		return (error);
1518 
1519 	/* Remove all non-open fids associated with the vp */
1520 	p9fs_fid_remove_all(np, TRUE);
1521 
1522 	/* Invalidate all entries of vnode from name cache and hash list. */
1523 	cache_purge(vp);
1524 
1525 	vfs_hash_remove(vp);
1526 	np->flags |= P9FS_NODE_DELETED;
1527 
1528 	return (error);
1529 }
1530 
1531 /* Remove vop for all files. Call common code for remove and adjust links */
1532 static int
1533 p9fs_remove(struct vop_remove_args *ap)
1534 {
1535 	struct vnode *vp;
1536 	struct p9fs_node *np;
1537 	struct vnode *dvp;
1538 	struct p9fs_node *dnp;
1539 	struct p9fs_inode *dinode;
1540 	int error;
1541 
1542 	vp = ap->a_vp;
1543 	np = P9FS_VTON(vp);
1544 	dvp = ap->a_dvp;
1545 	dnp = P9FS_VTON(dvp);
1546 	dinode = &dnp->inode;
1547 
1548 	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1549 
1550 	if (vp->v_type == VDIR)
1551 		return (EISDIR);
1552 
1553 	error = remove_common(np, ap->a_cnp->cn_cred);
1554 	if (error == 0)
1555 		P9FS_DECR_LINKS(dinode);
1556 
1557 	return (error);
1558 }
1559 
1560 /* Remove vop for all directories. Call common code for remove and adjust links */
1561 static int
1562 p9fs_rmdir(struct vop_rmdir_args *ap)
1563 {
1564 	struct vnode *vp;
1565 	struct p9fs_node *np;
1566 	struct vnode *dvp;
1567 	struct p9fs_node *dnp;
1568 	struct p9fs_inode *dinode;
1569 	int error;
1570 
1571 	vp = ap->a_vp;
1572 	np = P9FS_VTON(vp);
1573 	dvp = ap->a_dvp;
1574 	dnp = P9FS_VTON(dvp);
1575 	dinode = &dnp->inode;
1576 
1577 	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1578 
1579 	error = remove_common(np, ap->a_cnp->cn_cred);
1580 	if (error == 0)
1581 		P9FS_DECR_LINKS(dinode);
1582 
1583 	return (error);
1584 }
1585 
1586 /*
1587  * Create symlinks. Make the permissions and call create_common code
1588  * for Soft links.
1589  */
1590 static int
1591 p9fs_symlink(struct vop_symlink_args *ap)
1592 {
1593 	struct vnode *dvp;
1594 	struct vnode **vpp;
1595 	struct vattr *vap;
1596 	struct componentname *cnp;
1597 	char *symtgt;
1598 	struct p9fs_node *dnp;
1599 	struct p9fs_session *vses;
1600 	struct mount *mp;
1601 	struct p9_fid *dvfid, *newfid;
1602 	int error;
1603 	char tmpchr;
1604 	gid_t gid;
1605 
1606 	dvp = ap->a_dvp;
1607 	vpp = ap->a_vpp;
1608 	vap = ap->a_vap;
1609 	cnp = ap->a_cnp;
1610 	symtgt = (char*)(uintptr_t) ap->a_target;
1611 	dnp = P9FS_VTON(dvp);
1612 	vses = dnp->p9fs_ses;
1613 	mp = vses->p9fs_mount;
1614 	newfid = NULL;
1615 	error = 0;
1616 	gid = vap->va_gid;
1617 
1618 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
1619 
1620 	/*
1621 	 * Save the character present at namelen in nameptr string and
1622 	 * null terminate the character to get the search name for p9_dir_walk
1623 	 */
1624 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
1625 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
1626 
1627 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1628 	if (error != 0)
1629 		goto out;
1630 
1631 	error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
1632 	if (error != 0)
1633 		goto out;
1634 
1635 	/*create vnode for symtgt */
1636 	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
1637 	if (newfid != NULL) {
1638 		error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
1639 		    dnp, newfid, vpp, cnp->cn_nameptr);
1640 		if (error != 0)
1641 			goto out;
1642 	} else
1643 		goto out;
1644 
1645 	if ((cnp->cn_flags & MAKEENTRY) != 0) {
1646 		cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
1647 	}
1648 	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
1649 	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
1650 
1651 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1652 	return (error);
1653 
1654 out:
1655 	if (newfid != NULL)
1656 		p9_client_clunk(newfid);
1657 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1658 	return (error);
1659 }
1660 
1661 /* Create hard link */
1662 static int
1663 p9fs_link(struct vop_link_args *ap)
1664 {
1665 	struct vnode *vp;
1666 	struct vnode *tdvp;
1667 	struct componentname *cnp;
1668 	struct p9fs_node *dnp;
1669 	struct p9fs_node *np;
1670 	struct p9fs_inode *inode;
1671 	struct p9fs_session *vses;
1672 	struct p9_fid *dvfid, *oldvfid;
1673 	int error;
1674 
1675 	vp = ap->a_vp;
1676 	tdvp = ap->a_tdvp;
1677 	cnp = ap->a_cnp;
1678 	dnp = P9FS_VTON(tdvp);
1679 	np = P9FS_VTON(vp);
1680 	inode = &np->inode;
1681 	vses = np->p9fs_ses;
1682 	error = 0;
1683 
1684 	P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);
1685 
1686 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1687 	if (error != 0)
1688 		return (error);
1689 	oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
1690 	if (error != 0)
1691 		return (error);
1692 
1693 	error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
1694 	if (error != 0)
1695 		return (error);
1696 	/* Increment ref count on the inode */
1697 	P9FS_INCR_LINKS(inode);
1698 
1699 	return (0);
1700 }
1701 
1702 /* Read contents of the symbolic link */
1703 static int
1704 p9fs_readlink(struct vop_readlink_args *ap)
1705 {
1706 	struct vnode *vp;
1707 	struct uio *uio;
1708 	struct p9fs_node *dnp;
1709 	struct p9fs_session *vses;
1710 	struct p9_fid *dvfid;
1711 	int error, len;
1712 	char *target;
1713 
1714 	vp = ap->a_vp;
1715 	uio = ap->a_uio;
1716 	dnp = P9FS_VTON(vp);
1717 	vses = dnp->p9fs_ses;
1718 	error = 0;
1719 
1720 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
1721 
1722 	dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
1723 	if (error != 0)
1724 		return (error);
1725 
1726 	error = p9_readlink(dvfid, &target);
1727 	if (error != 0)
1728 		return (error);
1729 
1730 	len = strlen(target);
1731 	error = uiomove(target, len, uio);
1732 
1733 	return (0);
1734 }
1735 
1736 /*
1737  * Iterate through a directory. An entire 8k data is read into the I/O buffer.
1738  * This buffer is parsed to make dir entries and fed to the user buffer to
1739  * complete it to the VFS.
1740  */
1741 static int
1742 p9fs_readdir(struct vop_readdir_args *ap)
1743 {
1744 	struct uio *uio;
1745 	struct vnode *vp;
1746 	struct dirent cde;
1747 	int64_t offset;
1748 	uint64_t diroffset;
1749 	struct p9fs_node *np;
1750 	int error;
1751 	int32_t count;
1752 	struct p9_client *clnt;
1753 	struct p9_dirent dent;
1754 	char *io_buffer;
1755 	struct p9_fid *vofid;
1756 
1757 	uio = ap->a_uio;
1758 	vp = ap->a_vp;
1759 	np = P9FS_VTON(ap->a_vp);
1760 	offset = 0;
1761 	diroffset = 0;
1762 	error = 0;
1763 	count = 0;
1764 	clnt = np->p9fs_ses->clnt;
1765 
1766 	P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);
1767 
1768 	if (ap->a_uio->uio_iov->iov_len <= 0)
1769 		return (EINVAL);
1770 
1771 	if (vp->v_type != VDIR)
1772 		return (ENOTDIR);
1773 
1774 	vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
1775 	if (vofid == NULL) {
1776 		P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
1777 		return (EBADF);
1778 	}
1779 
1780 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
1781 
1782 	/* We haven't reached the end yet. read more. */
1783 	diroffset = uio->uio_offset;
1784 	while (uio->uio_resid >= sizeof(struct dirent)) {
1785 		/*
1786 		 * We need to read more data as what is indicated by filesize because
1787 		 * filesize is based on data stored in struct dirent structure but
1788 		 * we read data in struct p9_dirent format which has different size.
1789 		 * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
1790 		 * it into struct dirent structure and send it back.
1791 		 */
1792 		count = P9FS_IOUNIT;
1793 		bzero(io_buffer, P9FS_MTU);
1794 		count = p9_client_readdir(vofid, (char *)io_buffer,
1795 		    diroffset, count);
1796 
1797 		if (count == 0)
1798 			break;
1799 
1800 		if (count < 0) {
1801 			error = EIO;
1802 			goto out;
1803 		}
1804 
1805 		offset = 0;
1806 		while (offset + QEMU_DIRENTRY_SZ <= count) {
1807 
1808 			/*
1809 			 * Read and make sense out of the buffer in one dirent
1810 			 * This is part of 9p protocol read. This reads one p9_dirent,
1811 			 * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
1812 			 */
1813 			bzero(&dent, sizeof(dent));
1814 			offset = p9_dirent_read(clnt, io_buffer, offset, count,
1815 				&dent);
1816 			if (offset < 0 || offset > count) {
1817 				error = EIO;
1818 				goto out;
1819 			}
1820 
1821 			bzero(&cde, sizeof(cde));
1822 			strncpy(cde.d_name, dent.d_name, dent.len);
1823 			cde.d_fileno = dent.qid.path;
1824 			cde.d_type = dent.d_type;
1825 			cde.d_namlen = dent.len;
1826 			cde.d_reclen = GENERIC_DIRSIZ(&cde);
1827 
1828                         /*
1829                          * If there isn't enough space in the uio to return a
1830                          * whole dirent, break off read
1831                          */
1832                         if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
1833                                 break;
1834 
1835 			/* Transfer */
1836 			error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
1837 			if (error != 0) {
1838 				error = EIO;
1839 				goto out;
1840 			}
1841 			diroffset = dent.d_off;
1842 		}
1843 	}
1844 	/* Pass on last transferred offset */
1845 	uio->uio_offset = diroffset;
1846 
1847 out:
1848 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1849 
1850 	return (error);
1851 }
1852 
1853 static void
1854 p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
1855 {
1856 	struct uio *uiov;
1857 	struct iovec io;
1858 	int error;
1859 	uint64_t off, offset;
1860 	uint64_t filesize;
1861 	uint64_t resid;
1862 	uint32_t count;
1863 	int64_t ret;
1864 	struct p9fs_node *np;
1865 	char *io_buffer;
1866 
1867 	error = 0;
1868 	np = P9FS_VTON(vp);
1869 
1870 	filesize = np->inode.i_size;
1871 	uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
1872 	uiov->uio_iov = &io;
1873 	uiov->uio_iovcnt = 1;
1874 	uiov->uio_segflg = UIO_SYSSPACE;
1875 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1876 
1877 	if (bp->b_iocmd == BIO_READ) {
1878 		io.iov_len = uiov->uio_resid = bp->b_bcount;
1879 		io.iov_base = bp->b_data;
1880 		uiov->uio_rw = UIO_READ;
1881 
1882 		switch (vp->v_type) {
1883 
1884 		case VREG:
1885 		{
1886 			uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1887 
1888 			if (uiov->uio_resid) {
1889 				int left = uiov->uio_resid;
1890 				int nread = bp->b_bcount - left;
1891 
1892 				if (left > 0)
1893 					bzero((char *)bp->b_data + nread, left);
1894 			}
1895 			/* where in the file are we to start reading */
1896 			offset = uiov->uio_offset;
1897 			if (uiov->uio_offset >= filesize)
1898 				goto out;
1899 
1900 			while ((resid = uiov->uio_resid) > 0) {
1901 				if (offset >= filesize)
1902 					break;
1903 				count = min(filesize - uiov->uio_offset, resid);
1904 				if (count == 0)
1905 					break;
1906 
1907 				P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
1908 				    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
1909 
1910 				/* Copy count bytes into the uio */
1911 				ret = p9_client_read(vofid, offset, count, io_buffer);
1912 				error = uiomove(io_buffer, ret, uiov);
1913 
1914 				if (error != 0)
1915 					goto out;
1916 				offset += ret;
1917 			}
1918 			break;
1919 		}
1920 		default:
1921 			printf("vfs:  type %x unexpected\n", vp->v_type);
1922 			break;
1923 		}
1924 	} else {
1925 		if (bp->b_dirtyend > bp->b_dirtyoff) {
1926 			io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
1927 			uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
1928 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1929 			uiov->uio_rw = UIO_WRITE;
1930 
1931 			if (uiov->uio_offset < 0) {
1932 				error = EINVAL;
1933 				goto out;
1934 			}
1935 
1936 			if (uiov->uio_resid == 0)
1937 				goto out;
1938 
1939 			resid = uiov->uio_resid;
1940 			offset = uiov->uio_offset;
1941 			error = 0;
1942 
1943 			while ((resid = uiov->uio_resid) > 0) {
1944                                 off = 0;
1945 				count = MIN(resid, P9FS_IOUNIT);
1946 				error = uiomove(io_buffer, count, uiov);
1947 				if (error != 0) {
1948 					goto out;
1949 				}
1950 
1951 				while (count > 0) {
1952 					/* Copy count bytes from the uio */
1953 					ret = p9_client_write(vofid, offset, count,
1954                                                 io_buffer + off);
1955 					if (ret < 0)
1956 						goto out;
1957 
1958 					P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
1959 					    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
1960                                         off += ret;
1961 					offset += ret;
1962 					count -= ret;
1963 				}
1964 			}
1965 
1966 			/* Update the fields in the node to reflect the change */
1967 			if (filesize < uiov->uio_offset + uiov->uio_resid) {
1968 				np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
1969 				vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
1970 				/* update the modified timers. */
1971 				p9fs_itimes(vp);
1972 			}
1973 		} else {
1974 			 bp->b_resid = 0;
1975 			 goto out1;
1976 		}
1977 	}
1978 out:
1979 	/* Set the error */
1980 	if (error != 0) {
1981 		bp->b_error = error;
1982 		bp->b_ioflags |= BIO_ERROR;
1983 	}
1984 	bp->b_resid = uiov->uio_resid;
1985 out1:
1986 	bufdone(bp);
1987 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1988 	free(uiov, M_P9UIOV);
1989 }
1990 
1991 /*
1992  * The I/O buffer is mapped to a uio and a client_write/client_read is performed
1993  * the same way as p9fs_read and p9fs_write.
1994  */
1995 static int
1996 p9fs_strategy(struct vop_strategy_args *ap)
1997 {
1998 	struct vnode *vp;
1999 	struct buf *bp;
2000 	struct ucred *cr;
2001 	int error;
2002 	struct open_fid_state ostate;
2003 
2004 	vp = ap->a_vp;
2005 	bp = ap->a_bp;
2006 	error = 0;
2007 
2008 	P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);
2009 
2010 	if (bp->b_iocmd == BIO_READ)
2011 		cr = bp->b_rcred;
2012 	else
2013 		cr = bp->b_wcred;
2014 
2015 	error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
2016 	if (error) {
2017 		P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
2018 		bp->b_error = error;
2019 		bp->b_ioflags |= BIO_ERROR;
2020 		bufdone(bp);
2021 		return (0);
2022 	}
2023 
2024 	p9fs_doio(vp, bp, ostate.vofid, cr);
2025 	p9fs_release_open_fid(vp, cr, &ostate);
2026 
2027 	return (0);
2028 }
2029 
2030 /* Rename a file */
2031 static int
2032 p9fs_rename(struct vop_rename_args *ap)
2033 {
2034 	struct vnode *tvp;
2035 	struct vnode *tdvp;
2036 	struct vnode *fvp;
2037 	struct vnode *fdvp;
2038 	struct componentname *tcnp;
2039 	struct componentname *fcnp;
2040 	struct p9fs_node *tdnode;
2041 	struct p9fs_node *fdnode;
2042 	struct p9fs_inode *fdinode;
2043 	struct p9fs_node *fnode;
2044 	struct p9fs_inode *finode;
2045 	struct p9fs_session *vses;
2046 	struct p9fs_node *tnode;
2047 	struct p9fs_inode *tinode;
2048 	struct p9_fid *olddirvfid, *newdirvfid ;
2049 	int error;
2050 
2051 	tvp = ap->a_tvp;
2052 	tdvp = ap->a_tdvp;
2053 	fvp = ap->a_fvp;
2054 	fdvp = ap->a_fdvp;
2055 	tcnp = ap->a_tcnp;
2056 	fcnp = ap->a_fcnp;
2057 	tdnode = P9FS_VTON(tdvp);
2058 	fdnode = P9FS_VTON(fdvp);
2059 	fdinode = &fdnode->inode;
2060 	fnode = P9FS_VTON(fvp);
2061 	finode = &fnode->inode;
2062 	vses = fnode->p9fs_ses;
2063 	error = 0;
2064 
2065 	P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);
2066 
2067 	/* Check for cross mount operation */
2068 	if (fvp->v_mount != tdvp->v_mount ||
2069 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
2070 		error = EXDEV;
2071 		goto out;
2072 	}
2073 
2074 	/* warning  if you are renaming to the same name */
2075 	if (fvp == tvp)
2076 		error = 0;
2077 
2078 	olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
2079 	if (error != 0)
2080 		goto out;
2081 	newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
2082 	if (error != 0)
2083 		goto out;
2084 
2085 	error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
2086 	if (error != 0)
2087 		goto out;
2088 
2089 	/*
2090 	 * decrement the link count on the "from" file whose name is going
2091 	 * to be changed if its a directory
2092 	 */
2093 	if (fvp->v_type == VDIR) {
2094 		if (tvp && tvp->v_type == VDIR)
2095 			cache_purge(tdvp);
2096 		P9FS_DECR_LINKS(fdinode);
2097 		cache_purge(fdvp);
2098 	}
2099 
2100 	/* Taking exclusive lock on the from node before decrementing the link count */
2101 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
2102 		goto out;
2103 	P9FS_DECR_LINKS(finode);
2104 	VOP_UNLOCK(fvp);
2105 
2106 	if (tvp) {
2107 		tnode = P9FS_VTON(tvp);
2108 		tinode = &tnode->inode;
2109 		P9FS_DECR_LINKS(tinode);
2110 	}
2111 
2112 out:
2113 	if (tdvp == tvp)
2114 		vrele(tdvp);
2115 	else
2116 		vput(tdvp);
2117 	if (tvp)
2118 		vput(tvp);
2119 	vrele(fdvp);
2120 	vrele(fvp);
2121 	return (error);
2122 }
2123 
2124 /*
2125  * Put VM pages, synchronously.
2126  * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
2127  */
2128 static int
2129 p9fs_putpages(struct vop_putpages_args *ap)
2130 {
2131 	struct uio uio;
2132 	struct iovec iov;
2133 	int i, error, npages, count;
2134 	off_t offset;
2135 	int *rtvals;
2136 	struct vnode *vp;
2137 	struct thread *td;
2138 	struct ucred *cred;
2139 	struct p9fs_node *np;
2140 	vm_page_t *pages;
2141 	vm_offset_t kva;
2142 	struct buf *bp;
2143 
2144 	vp = ap->a_vp;
2145 	np = P9FS_VTON(vp);
2146 	td = curthread;
2147 	cred = curthread->td_ucred;
2148 	pages = ap->a_m;
2149 	count = ap->a_count;
2150 	rtvals = ap->a_rtvals;
2151 	npages = btoc(count);
2152 	offset = IDX_TO_OFF(pages[0]->pindex);
2153 
2154 	/*
2155 	 * When putting pages, do not extend file past EOF.
2156 	 */
2157 	if (offset + count > np->inode.i_size) {
2158 		count = np->inode.i_size - offset;
2159 		if (count < 0)
2160 			count = 0;
2161 	}
2162 
2163 	for (i = 0; i < npages; i++)
2164 		rtvals[i] = VM_PAGER_ERROR;
2165 
2166 	bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
2167 	kva = (vm_offset_t) bp->b_data;
2168 	pmap_qenter(kva, pages, npages);
2169 
2170 	VM_CNT_INC(v_vnodeout);
2171 	VM_CNT_ADD(v_vnodepgsout, count);
2172 
2173 	iov.iov_base = (caddr_t) kva;
2174 	iov.iov_len = count;
2175 	uio.uio_iov = &iov;
2176 	uio.uio_iovcnt = 1;
2177 	uio.uio_offset = offset;
2178 	uio.uio_resid = count;
2179 	uio.uio_segflg = UIO_SYSSPACE;
2180 	uio.uio_rw = UIO_WRITE;
2181 	uio.uio_td = td;
2182 
2183 	P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);
2184 
2185 	error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
2186 	    cred);
2187 
2188 	pmap_qremove(kva, npages);
2189 	uma_zfree(p9fs_pbuf_zone, bp);
2190 
2191 	if (error == 0)
2192 		vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
2193 		    np->inode.i_size - offset, npages * PAGE_SIZE);
2194 
2195 	return (rtvals[0]);
2196 }
2197 
2198 struct vop_vector p9fs_vnops = {
2199 	.vop_default =		&default_vnodeops,
2200 	.vop_lookup =		p9fs_lookup,
2201 	.vop_open =		p9fs_open,
2202 	.vop_close =		p9fs_close,
2203 	.vop_access =		p9fs_access,
2204 	.vop_getattr =		p9fs_getattr_dotl,
2205 	.vop_setattr =		p9fs_setattr_dotl,
2206 	.vop_reclaim =		p9fs_reclaim,
2207 	.vop_inactive =		p9fs_inactive,
2208 	.vop_readdir =		p9fs_readdir,
2209 	.vop_create =		p9fs_create,
2210 	.vop_mknod =		p9fs_mknod,
2211 	.vop_read =		p9fs_read,
2212 	.vop_write =		p9fs_write,
2213 	.vop_remove =		p9fs_remove,
2214 	.vop_mkdir =		p9fs_mkdir,
2215 	.vop_rmdir =		p9fs_rmdir,
2216 	.vop_strategy =		p9fs_strategy,
2217 	.vop_symlink =		p9fs_symlink,
2218 	.vop_rename =           p9fs_rename,
2219 	.vop_link =		p9fs_link,
2220 	.vop_readlink =		p9fs_readlink,
2221 	.vop_putpages =		p9fs_putpages,
2222 };
2223 VFS_VOP_VECTOR_REGISTER(p9fs_vnops);
2224