xref: /freebsd/sys/fs/p9fs/p9fs_vnops.c (revision 9cbf1de7e34a6fced041388fad5d9180cb7705fe)
1 /*
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9 *	notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *	notice, this list of conditions and the following disclaimer in the
12  *	documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /* This file contains VFS file ops for the 9P protocol.
28  * This makes the upper layer of the p9fs driver. These functions interact
29  * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
30  * the user file operations are handled here.
31  */
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/buf.h>
36 #include <sys/dirent.h>
37 #include <sys/fcntl.h>
38 #include <sys/namei.h>
39 #include <sys/priv.h>
40 #include <sys/stat.h>
41 #include <sys/vnode.h>
42 #include <sys/rwlock.h>
43 #include <sys/vmmeter.h>
44 
45 #include <vm/vm.h>
46 #include <vm/vm_extern.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_pager.h>
50 #include <vm/vnode_pager.h>
51 
52 #include <fs/p9fs/p9_client.h>
53 #include <fs/p9fs/p9_debug.h>
54 #include <fs/p9fs/p9fs.h>
55 #include <fs/p9fs/p9fs_proto.h>
56 
57 /* File permissions. */
58 #define IEXEC		0000100 /* Executable. */
59 #define IWRITE		0000200 /* Writeable. */
60 #define IREAD		0000400 /* Readable. */
61 #define ISVTX		0001000 /* Sticky bit. */
62 #define ISGID		0002000 /* Set-gid. */
63 #define ISUID		0004000 /* Set-uid. */
64 
65 static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
66 extern uma_zone_t p9fs_io_buffer_zone;
67 extern uma_zone_t p9fs_getattr_zone;
68 extern uma_zone_t p9fs_setattr_zone;
69 extern uma_zone_t p9fs_pbuf_zone;
70 /* For the root vnode's vnops. */
71 struct vop_vector p9fs_vnops;
72 
73 static uint32_t p9fs_unix2p9_mode(uint32_t mode);
74 
75 static void
76 p9fs_itimes(struct vnode *vp)
77 {
78 	struct p9fs_node *node;
79 	struct timespec ts;
80 	struct p9fs_inode *inode;
81 
82 	node = P9FS_VTON(vp);
83 	inode = &node->inode;
84 
85 	vfs_timestamp(&ts);
86 	inode->i_mtime = ts.tv_sec;
87 }
88 
89 /*
90  * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
91  * The cleanup includes invalidating all cache entries for the vnode,
92  * destroying the vobject, removing vnode from hashlist, removing p9fs node
93  * from the list of session p9fs nodes, and disposing of the p9fs node.
94  * Basically it is doing a reverse of what a create/vget does.
95  */
96 void
97 p9fs_cleanup(struct p9fs_node *np)
98 {
99 	struct vnode *vp;
100 	struct p9fs_session *vses;
101 
102 	if (np == NULL)
103 		return;
104 
105 	vp = P9FS_NTOV(np);
106 	vses = np->p9fs_ses;
107 
108 	/* Remove the vnode from hash list if vnode is not already deleted */
109 	if ((np->flags & P9FS_NODE_DELETED) == 0)
110 		vfs_hash_remove(vp);
111 
112 	P9FS_LOCK(vses);
113 	if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
114 		np->flags &= ~P9FS_NODE_IN_SESSION;
115 		STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
116 	} else {
117 		P9FS_UNLOCK(vses);
118 		return;
119 	}
120 	P9FS_UNLOCK(vses);
121 
122 	/* Invalidate all entries to a particular vnode. */
123 	cache_purge(vp);
124 
125 	/* Destroy the vm object and flush associated pages. */
126 	vnode_destroy_vobject(vp);
127 
128 	/* Remove the vnode from hash list if vnode is not already deleted */
129 	if ((np->flags & P9FS_NODE_DELETED) == 0)
130 		vfs_hash_remove(vp);
131 
132 	/* Invalidate all entries to a particular vnode. */
133 	cache_purge(vp);
134 
135 	/* Destroy the vm object and flush associated pages. */
136 	vnode_destroy_vobject(vp);
137 
138 	/* Remove all the FID */
139 	p9fs_fid_remove_all(np, FALSE);
140 
141 	/* Dispose all node knowledge.*/
142 	p9fs_destroy_node(&np);
143 }
144 
145 /*
146  * Reclaim VOP is defined to be called for every vnode. This starts off
147  * the cleanup by clunking(remove the fid on the server) and calls
148  * p9fs_cleanup to free all the resources allocated for p9fs node.
149  */
150 static int
151 p9fs_reclaim(struct vop_reclaim_args *ap)
152 {
153 	struct vnode *vp;
154 	struct p9fs_node *np;
155 
156 	vp = ap->a_vp;
157 	np = P9FS_VTON(vp);
158 
159 	P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
160 	p9fs_cleanup(np);
161 
162 	return (0);
163 }
164 
165 /*
166  * recycle vnodes which are no longer referenced i.e, their usecount is zero
167  */
168 static int
169 p9fs_inactive(struct vop_inactive_args *ap)
170 {
171 	struct vnode *vp;
172 	struct p9fs_node *np;
173 
174 	vp = ap->a_vp;
175 	np = P9FS_VTON(vp);
176 
177 	P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
178 	if (np->flags & P9FS_NODE_DELETED)
179 		vrecycle(vp);
180 
181 	return (0);
182 }
183 
184 struct p9fs_lookup_alloc_arg {
185 	struct componentname *cnp;
186 	struct p9fs_node *dnp;
187 	struct p9_fid *newfid;
188 };
189 
190 /* Callback for vn_get_ino */
191 static int
192 p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
193 {
194 	struct p9fs_lookup_alloc_arg *p9aa = arg;
195 
196 	return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
197 		p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
198 }
199 
200 /*
201  * p9fs_lookup is called for every component name that is being searched for.
202  *
203  * I. If component is found on the server, we look for the in-memory
204  *    repesentation(vnode) of this component in namecache.
205  *    A. If the node is found in the namecache, we check is the vnode is still
206  *	 valid.
207  *	 1. If it is still valid, return vnode.
208  *	 2. If it is not valid, we remove this vnode from the name cache and
209  *	    create a new vnode for the component and return that vnode.
210  *    B. If the vnode is not found in the namecache, we look for it in the
211  *       hash list.
212  *       1. If the vnode is in the hash list, we check if the vnode is still
213  *	    valid.
214  *	    a. If it is still valid, we add that vnode to the namecache for
215  *	       future lookups and return the vnode.
216  *	    b. If it is not valid, create a new vnode and p9fs node,
217  *	       initialize them and return the vnode.
218  *	 2. If the vnode is not found in the hash list, we create a new vnode
219  *	    and p9fs node, initialize them and return the vnode.
220  * II. If the component is not found on the server, an error code is returned.
221  *     A. For the creation case, we return EJUSTRETURN so VFS can handle it.
222  *     B. For all other cases, ENOENT is returned.
223  */
224 static int
225 p9fs_lookup(struct vop_lookup_args *ap)
226 {
227 	struct vnode *dvp;
228 	struct vnode **vpp, *vp;
229 	struct componentname *cnp;
230 	struct p9fs_node *dnp; /*dir p9_node */
231 	struct p9fs_node *np;
232 	struct p9fs_session *vses;
233 	struct mount *mp; /* Get the mount point */
234 	struct p9_fid *dvfid, *newfid;
235 	int error;
236 	struct vattr vattr;
237 	int flags;
238 	char tmpchr;
239 
240 	dvp = ap->a_dvp;
241 	vpp = ap->a_vpp;
242 	cnp = ap->a_cnp;
243 	dnp = P9FS_VTON(dvp);
244 	error = 0;
245 	flags = cnp->cn_flags;
246 	*vpp = NULLVP;
247 
248 	if (dnp == NULL)
249 		return (ENOENT);
250 
251 	if (cnp->cn_nameptr[0] == '.' && strlen(cnp->cn_nameptr) == 1) {
252 		vref(dvp);
253 		*vpp = dvp;
254 		return (0);
255 	}
256 
257 	vses = dnp->p9fs_ses;
258 	mp = vses->p9fs_mount;
259 
260 	/* Do the cache part ourselves */
261 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
262 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
263 		return (EROFS);
264 
265 	if (dvp->v_type != VDIR)
266 		return (ENOTDIR);
267 
268 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
269 	if (error)
270 		return (error);
271 
272 	/* Do the directory walk on host to check if file exist */
273 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
274 	if (error)
275 		return (error);
276 
277 	/*
278 	 * Save the character present at namelen in nameptr string and
279 	 * null terminate the character to get the search name for p9_dir_walk
280 	 * This is done to handle when lookup is for "a" and component
281 	 * name contains a/b/c
282 	 */
283 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
284 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
285 
286 	/*
287 	 * If the client_walk fails, it means the file looking for doesnt exist.
288 	 * Create the file is the flags are set or just return the error
289 	 */
290 	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
291 
292 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
293 
294 	if (error != 0 || newfid == NULL) {
295 		/* Clunk the newfid if it is not NULL */
296 		if (newfid != NULL)
297 			p9_client_clunk(newfid);
298 
299 		if (error != ENOENT)
300 			return (error);
301 
302 		/* The requested file was not found. */
303 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
304 		    (flags & ISLASTCN)) {
305 
306 			if (mp->mnt_flag & MNT_RDONLY)
307 				return (EROFS);
308 
309 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
310 			    curthread);
311 			if (!error) {
312 				return (EJUSTRETURN);
313 			}
314 		}
315 		return (error);
316 	}
317 
318 	/* Look for the entry in the component cache*/
319 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
320 	if (error > 0 && error != ENOENT) {
321 		P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
322 		goto out;
323 	}
324 
325 	if (error == -1) {
326 		vp = *vpp;
327 		/* Check if the entry in cache is stale or not */
328 		if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
329 		    ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
330 			goto out;
331 		}
332 		/*
333 		 * This case, we have an error coming from getattr,
334 		 * act accordingly.
335 		 */
336 		cache_purge(vp);
337 		if (dvp != vp)
338 			vput(vp);
339 		else
340 			vrele(vp);
341 
342 		*vpp = NULLVP;
343 	} else if (error == ENOENT) {
344 		if (VN_IS_DOOMED(dvp))
345 			goto out;
346 		if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
347 			error = ENOENT;
348 			goto out;
349 		}
350 		cache_purge_negative(dvp);
351 	}
352 	/* Reset values */
353 	error = 0;
354 	vp = NULLVP;
355 
356 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
357 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
358 
359 	/*
360 	 * Looks like we have found an entry. Now take care of all other cases.
361 	 */
362 	if (flags & ISDOTDOT) {
363 		struct p9fs_lookup_alloc_arg p9aa;
364 		p9aa.cnp = cnp;
365 		p9aa.dnp = dnp;
366 		p9aa.newfid = newfid;
367 		error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
368 		if (error)
369 			goto out;
370 		*vpp = vp;
371 	} else {
372 		/*
373 		 * client_walk is equivalent to searching a component name in a
374 		 * directory(fid) here. If new fid is returned, we have found an
375 		 * entry for this component name so, go and create the rest of
376 		 * the vnode infra(vget_common) for the returned newfid.
377 		 */
378 		if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
379 		    && (flags & ISLASTCN)) {
380 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
381 			    curthread);
382 			if (error)
383 				goto out;
384 
385 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
386 			    dnp, newfid, &vp, cnp->cn_nameptr);
387 			if (error)
388 				goto out;
389 
390 			*vpp = vp;
391 			np = P9FS_VTON(vp);
392 			if ((dnp->inode.i_mode & ISVTX) &&
393 			    cnp->cn_cred->cr_uid != 0 &&
394 			    cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
395 			    cnp->cn_cred->cr_uid != np->inode.n_uid) {
396 				vput(*vpp);
397 				*vpp = NULL;
398 				cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
399 				return (EPERM);
400 			}
401 		} else {
402 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
403 			    dnp, newfid, &vp, cnp->cn_nameptr);
404 			if (error)
405 				goto out;
406 			*vpp = vp;
407 		}
408 	}
409 
410 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
411 
412 	/* Store the result the cache if MAKEENTRY is specified in flags */
413 	if ((cnp->cn_flags & MAKEENTRY) != 0)
414 		cache_enter(dvp, *vpp, cnp);
415 	return (error);
416 out:
417 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
418 	p9_client_clunk(newfid);
419 	return (error);
420 }
421 
422 /*
423  * Common creation function for file/directory with respective flags. We first
424  * open the parent directory in order to create the file under it. For this,
425  * as 9P protocol suggests, we need to call client_walk to create the open fid.
426  * Once we have the open fid, the file_create function creates the direntry with
427  * the name and perm specified under the parent dir. If this succeeds (an entry
428  * is created for the new file on the server), we create our metadata for this
429  * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
430  * fid of the parent directory.
431  */
432 static int
433 create_common(struct p9fs_node *dnp, struct componentname *cnp,
434     char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
435 {
436 	char tmpchr;
437 	struct p9_fid *dvfid, *ofid, *newfid;
438 	struct p9fs_session *vses;
439 	struct mount *mp;
440 	int error;
441 
442 	P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);
443 
444 	vses = dnp->p9fs_ses;
445 	mp = vses->p9fs_mount;
446 	newfid = NULL;
447 	error = 0;
448 
449 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
450 	if (error != 0)
451 		return (error);
452 
453 	/* Clone the directory fid to create the new file */
454 	ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
455 	if (error != 0)
456 		return (error);
457 
458 	/*
459 	 * Save the character present at namelen in nameptr string and
460 	 * null terminate the character to get the search name for p9_dir_walk
461 	 */
462 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
463 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
464 
465 	error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
466 		    extension);
467 	if (error != 0) {
468 		P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
469 		goto out;
470 	}
471 
472 	/* If its not hardlink only then do the walk, else we are done. */
473 	if (!(perm & P9PROTO_DMLINK)) {
474 		/*
475 		 * Do the lookup part and add the vnode, p9fs node. Note that vpp
476 		 * is filled in here.
477 		 */
478 		newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
479 		if (newfid != NULL) {
480 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
481 			    dnp, newfid, vpp, cnp->cn_nameptr);
482 			if (error != 0)
483 				goto out;
484 		} else {
485 			/* Not found return NOENTRY.*/
486 			goto out;
487 		}
488 
489 		if ((cnp->cn_flags & MAKEENTRY) != 0)
490 			cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
491 	}
492 	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
493 	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
494 	/* Clunk the open ofid. */
495 	if (ofid != NULL)
496 		(void)p9_client_clunk(ofid);
497 
498 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
499 	return (0);
500 out:
501 	if (ofid != NULL)
502 		(void)p9_client_clunk(ofid);
503 
504 	if (newfid != NULL)
505 		(void)p9_client_clunk(newfid);
506 
507 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
508 	return (error);
509 }
510 
511 /*
512  * This is the main file creation VOP. Make the permissions of the new
513  * file and call the create_common common code to complete the create.
514  */
515 static int
516 p9fs_create(struct vop_create_args *ap)
517 {
518 	struct vnode *dvp;
519 	struct vnode **vpp;
520 	struct componentname *cnp;
521 	uint32_t mode;
522 	struct p9fs_node *dnp;
523 	struct p9fs_inode *dinode;
524 	uint32_t perm;
525 	int ret;
526 
527 	dvp = ap->a_dvp;
528 	vpp = ap->a_vpp;
529 	cnp = ap->a_cnp;
530 	dnp = P9FS_VTON(dvp);
531 	dinode = &dnp->inode;
532 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
533 	perm = p9fs_unix2p9_mode(mode);
534 
535 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
536 
537 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
538 	if (ret == 0) {
539 		P9FS_INCR_LINKS(dinode);
540 	}
541 
542 	return (ret);
543 }
544 
545 /*
546  * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
547  * and call the create_common common code to complete the create.
548  */
549 static int
550 p9fs_mkdir(struct vop_mkdir_args *ap)
551 {
552 	struct vnode *dvp;
553 	struct vnode **vpp;
554 	struct componentname *cnp;
555 	uint32_t mode;
556 	struct p9fs_node *dnp;
557 	struct p9fs_inode *dinode;
558 	uint32_t perm;
559 	int ret;
560 
561 	dvp = ap->a_dvp;
562 	vpp = ap->a_vpp;
563 	cnp = ap->a_cnp;
564 	dnp = P9FS_VTON(dvp);
565 	dinode = &dnp->inode;
566 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
567 	perm = p9fs_unix2p9_mode(mode | S_IFDIR);
568 
569 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
570 
571 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
572 	if (ret == 0)
573 		P9FS_INCR_LINKS(dinode);
574 
575 	return (ret);
576 }
577 
578 /*
579  * p9fs_mknod is the main node creation vop. Make the permissions of the new node
580  * and call the create_common common code to complete the create.
581  */
582 static int
583 p9fs_mknod(struct vop_mknod_args *ap)
584 {
585 	struct vnode *dvp;
586 	struct vnode **vpp;
587 	struct componentname *cnp;
588 	uint32_t mode;
589 	struct p9fs_node *dnp;
590 	struct p9fs_inode *dinode;
591 	uint32_t perm;
592 	int ret;
593 
594 	dvp = ap->a_dvp;
595 	vpp = ap->a_vpp;
596 	cnp = ap->a_cnp;
597 	dnp = P9FS_VTON(dvp);
598 	dinode = &dnp->inode;
599 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
600 	perm = p9fs_unix2p9_mode(mode);
601 
602 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
603 
604 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
605 	if (ret == 0) {
606 		P9FS_INCR_LINKS(dinode);
607 	}
608 
609 	return (ret);
610 }
611 
612 /* Convert open mode permissions to P9 */
613 static int
614 p9fs_uflags_mode(int uflags, int extended)
615 {
616 	uint32_t ret;
617 
618 	/* Convert first to O flags.*/
619 	uflags = OFLAGS(uflags);
620 
621 	switch (uflags & 3) {
622 
623 	case O_RDONLY:
624 	    ret = P9PROTO_OREAD;
625 	    break;
626 
627 	case O_WRONLY:
628 	    ret = P9PROTO_OWRITE;
629 	    break;
630 
631 	case O_RDWR:
632 	    ret = P9PROTO_ORDWR;
633 	    break;
634 	}
635 
636 	if (extended) {
637 		if (uflags & O_EXCL)
638 			ret |= P9PROTO_OEXCL;
639 
640 		if (uflags & O_APPEND)
641 			ret |= P9PROTO_OAPPEND;
642 	}
643 
644 	return (ret);
645 }
646 
647 /*
648  * This is the main open VOP for every file open. If the file is already
649  * open, then increment and return. If there is no open fid for this file,
650  * there needs to be a client_walk which creates a new open fid for this file.
651  * Once we have a open fid, call the open on this file with the mode creating
652  * the vobject.
653  */
654 static int
655 p9fs_open(struct vop_open_args *ap)
656 {
657 	int error;
658 	struct vnode *vp;
659 	struct p9fs_node *np;
660 	struct p9fs_session *vses;
661 	struct p9_fid *vofid, *vfid;
662 	size_t filesize;
663 	uint32_t mode;
664 
665 	error = 0;
666 	vp = ap->a_vp;
667 	np = P9FS_VTON(vp);
668 	vses = np->p9fs_ses;
669 
670 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
671 
672 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
673 		return (EOPNOTSUPP);
674 
675 	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
676 	if (error != 0)
677 		return (error);
678 
679 	ASSERT_VOP_LOCKED(vp, __func__);
680 	/*
681 	 * Invalidate the pages of the vm_object cache if the file is modified
682 	 * based on the flag set in reload stats
683 	 */
684 	if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
685 		error = vinvalbuf(vp, 0, 0, 0);
686 		if (error != 0)
687 			return (error);
688 		np->flags &= ~P9FS_NODE_MODIFIED;
689 	}
690 
691 	vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
692 	if (error != 0)
693 		return (error);
694 
695 	/*
696 	 * Translate kernel fflags to 9p mode
697 	 */
698 	mode = p9fs_uflags_mode(ap->a_mode, 1);
699 
700 	/*
701 	 * Search the fid in vofid_list for current user. If found increase the open
702 	 * count and return. If not found clone a new fid and open the file using
703 	 * that cloned fid.
704 	 */
705 	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
706 	if (vofid != NULL) {
707 		vofid->v_opens++;
708 		return (0);
709 	} else {
710 		/*vofid is the open fid for this file.*/
711 		vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
712 		if (error != 0)
713 			return (error);
714 	}
715 
716 	error = p9_client_open(vofid, mode);
717 	if (error != 0)
718 		p9_client_clunk(vofid);
719 	else {
720 		vofid->v_opens = 1;
721 		filesize = np->inode.i_size;
722 		vnode_create_vobject(vp, filesize, ap->a_td);
723 		p9fs_fid_add(np, vofid, VOFID);
724 	}
725 
726 	return (error);
727 }
728 
729 /*
730  * Close the open references. Just reduce the open count on vofid and return.
731  * Let clunking of VOFID happen in p9fs_reclaim.
732  */
733 static int
734 p9fs_close(struct vop_close_args *ap)
735 {
736 	struct vnode *vp;
737 	struct p9fs_node *np;
738 	struct p9fs_session *vses;
739 	struct p9_fid *vofid;
740 	int error;
741 
742 	vp = ap->a_vp;
743 	np = P9FS_VTON(vp);
744 
745 	if (np == NULL)
746 		return (0);
747 
748 	vses = np->p9fs_ses;
749 	error = 0;
750 
751 	P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);
752 
753 	/*
754 	 * Translate kernel fflags to 9p mode
755 	 */
756 	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
757 	    p9fs_uflags_mode(ap->a_fflag, 1), &error);
758 	if (vofid == NULL)
759 		return (0);
760 
761 	vofid->v_opens--;
762 
763 	return (0);
764 }
765 
766 /* Helper routine for checking if fileops are possible on this file */
767 static int
768 p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
769 {
770 
771 	/* Check if we are allowed to write */
772 	switch (vap->va_type) {
773 	case VDIR:
774 	case VLNK:
775 	case VREG:
776 		/*
777 		 * Normal nodes: check if we're on a read-only mounted
778 		 * file system and bail out if we're trying to write.
779 		 */
780 		if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
781 			return (EROFS);
782 		break;
783 	case VBLK:
784 	case VCHR:
785 	case VSOCK:
786 	case VFIFO:
787 		/*
788 		 * Special nodes: even on read-only mounted file systems
789 		 * these are allowed to be written to if permissions allow.
790 		 */
791 		break;
792 	default:
793 		/* No idea what this is */
794 		return (EINVAL);
795 	}
796 
797 	return (0);
798 }
799 
800 /* Check the access permissions of the file. */
801 static int
802 p9fs_access(struct vop_access_args *ap)
803 {
804 	struct vnode *vp;
805 	accmode_t accmode;
806 	struct ucred *cred;
807 	struct vattr vap;
808 	int error;
809 
810 	vp = ap->a_vp;
811 	accmode = ap->a_accmode;
812 	cred = ap->a_cred;
813 
814 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
815 
816 	/* make sure getattr is working correctly and is defined.*/
817 	error = VOP_GETATTR(vp, &vap, cred);
818 	if (error != 0)
819 		return (error);
820 
821 	error = p9fs_check_possible(vp, &vap, accmode);
822 	if (error != 0)
823 		return (error);
824 
825 	/* Call the Generic Access check in VOPS*/
826 	error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
827 	    cred);
828 
829 
830 	return (error);
831 }
832 
833 /*
834  * Reload the file stats from the server and update the inode structure present
835  * in p9fs node.
836  */
837 int
838 p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
839 {
840 	struct p9_stat_dotl *stat;
841 	int error;
842 	struct p9fs_node *node;
843 	struct p9fs_session *vses;
844 	struct p9_fid *vfid;
845 
846 	error = 0;
847 	node = P9FS_VTON(vp);
848 	vses = node->p9fs_ses;
849 
850 	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
851 	if (vfid == NULL) {
852 		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
853 		if (error)
854 			return (error);
855 	}
856 
857 	stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);
858 
859 	error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
860 	if (error != 0) {
861 		P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
862 		goto out;
863 	}
864 
865 	/* Init the vnode with the disk info */
866 	p9fs_stat_vnode_dotl(stat, vp);
867 out:
868 	if (stat != NULL) {
869 		uma_zfree(p9fs_getattr_zone, stat);
870 	}
871 
872 	return (error);
873 }
874 
875 /*
876  * Read the current inode values into the vap attr. We reload the stats from
877  * the server.
878  */
879 static int
880 p9fs_getattr_dotl(struct vop_getattr_args *ap)
881 {
882 	struct vnode *vp;
883 	struct vattr *vap;
884 	struct p9fs_node *node;
885 	struct p9fs_inode *inode;
886 	int error;
887 
888 	vp = ap->a_vp;
889 	vap = ap->a_vap;
890 	node = P9FS_VTON(vp);
891 
892 	if (node == NULL)
893 		return (ENOENT);
894 
895 	inode = &node->inode;
896 
897 	P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));
898 
899 	/* Reload our stats once to get the right values.*/
900 	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
901 	if (error != 0) {
902 		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
903 		return (error);
904 	}
905 
906 	/* Basic info */
907 	VATTR_NULL(vap);
908 
909 	vap->va_atime.tv_sec = inode->i_atime;
910 	vap->va_mtime.tv_sec = inode->i_mtime;
911 	vap->va_ctime.tv_sec = inode->i_ctime;
912 	vap->va_atime.tv_nsec = inode->i_atime_nsec;
913 	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
914 	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
915 	vap->va_type = IFTOVT(inode->i_mode);
916 	vap->va_mode = inode->i_mode;
917 	vap->va_uid = inode->n_uid;
918 	vap->va_gid = inode->n_gid;
919 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
920 	vap->va_size = inode->i_size;
921 	vap->va_nlink = inode->i_links_count;
922 	vap->va_blocksize = inode->blksize;
923 	vap->va_fileid = inode->i_qid_path;
924 	vap->va_flags = inode->i_flags;
925 	vap->va_gen = inode->gen;
926 	vap->va_filerev = inode->data_version;
927 	vap->va_vaflags = 0;
928 	vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;
929 
930 	return (0);
931 }
932 
933 /* Convert a standard FreeBSD permission to P9. */
934 static uint32_t
935 p9fs_unix2p9_mode(uint32_t mode)
936 {
937 	uint32_t res;
938 
939 	res = mode & 0777;
940 	if (S_ISDIR(mode))
941 		res |= P9PROTO_DMDIR;
942 	if (S_ISSOCK(mode))
943 		res |= P9PROTO_DMSOCKET;
944 	if (S_ISLNK(mode))
945 		res |= P9PROTO_DMSYMLINK;
946 	if (S_ISFIFO(mode))
947 		res |= P9PROTO_DMNAMEDPIPE;
948 	if ((mode & S_ISUID) == S_ISUID)
949 		res |= P9PROTO_DMSETUID;
950 	if ((mode & S_ISGID) == S_ISGID)
951 		res |= P9PROTO_DMSETGID;
952 	if ((mode & S_ISVTX) == S_ISVTX)
953 		res |= P9PROTO_DMSETVTX;
954 
955 	return (res);
956 }
957 
958 /* Update inode with the stats read from server.(9P2000.L version) */
959 int
960 p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
961 {
962 	struct p9fs_node *np;
963 	struct p9fs_inode *inode;
964 
965 	np = P9FS_VTON(vp);
966 	inode = &np->inode;
967 
968 	ASSERT_VOP_LOCKED(vp, __func__);
969 	/* Update the pager size if file size changes on host */
970 	if (inode->i_size != stat->st_size) {
971 		inode->i_size = stat->st_size;
972 		if (vp->v_type == VREG)
973 			vnode_pager_setsize(vp, inode->i_size);
974 	}
975 
976 	inode->i_mtime = stat->st_mtime_sec;
977 	inode->i_atime = stat->st_atime_sec;
978 	inode->i_ctime = stat->st_ctime_sec;
979 	inode->i_mtime_nsec = stat->st_mtime_nsec;
980 	inode->i_atime_nsec = stat->st_atime_nsec;
981 	inode->i_ctime_nsec = stat->st_ctime_nsec;
982 	inode->n_uid = stat->st_uid;
983 	inode->n_gid = stat->st_gid;
984 	inode->i_mode = stat->st_mode;
985 	vp->v_type = IFTOVT(inode->i_mode);
986 	inode->i_links_count = stat->st_nlink;
987 	inode->blksize = stat->st_blksize;
988 	inode->blocks = stat->st_blocks;
989 	inode->gen = stat->st_gen;
990 	inode->data_version = stat->st_data_version;
991 
992 	ASSERT_VOP_LOCKED(vp, __func__);
993 	/* Setting a flag if file changes based on qid version */
994 	if (np->vqid.qid_version != stat->qid.version)
995 		np->flags |= P9FS_NODE_MODIFIED;
996 	memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));
997 
998 	return (0);
999 }
1000 
1001 /*
1002  * Write the current in memory inode stats into persistent stats structure
1003  * to write to the server(for linux version).
1004  */
1005 static int
1006 p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
1007 {
1008 	p9attr->size = inode->i_size;
1009 	p9attr->mode = inode->i_mode;
1010 	p9attr->uid = inode->n_uid;
1011 	p9attr->gid = inode->n_gid;
1012 	p9attr->atime_sec = inode->i_atime;
1013 	p9attr->atime_nsec = inode->i_atime_nsec;
1014 	p9attr->mtime_sec = inode->i_mtime;
1015 	p9attr->mtime_nsec = inode->i_mtime_nsec;
1016 
1017 	return (0);
1018 }
1019 
1020 /*
1021  * Modify the ownership of a file whenever the chown is called on the
1022  * file.
1023  */
1024 static int
1025 p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
1026     struct thread *td)
1027 {
1028 	struct p9fs_node *np;
1029 	struct p9fs_inode *inode;
1030 	uid_t ouid;
1031 	gid_t ogid;
1032 	int error;
1033 
1034 	np = P9FS_VTON(vp);
1035 	inode = &np->inode;
1036 
1037 	if (uid == (uid_t)VNOVAL)
1038 		uid = inode->n_uid;
1039 	if (gid == (gid_t)VNOVAL)
1040 		gid = inode->n_gid;
1041 	/*
1042 	 * To modify the ownership of a file, must possess VADMIN for that
1043 	 * file.
1044 	 */
1045 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
1046 		return (error);
1047 	/*
1048 	 * To change the owner of a file, or change the group of a file to a
1049 	 * group of which we are not a member, the caller must have
1050 	 * privilege.
1051 	 */
1052 	if (((uid != inode->n_uid && uid != cred->cr_uid) ||
1053 	    (gid != inode->n_gid && !groupmember(gid, cred))) &&
1054 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
1055 		return (error);
1056 
1057 	ogid = inode->n_gid;
1058 	ouid = inode->n_uid;
1059 
1060 	inode->n_gid = gid;
1061 	inode->n_uid = uid;
1062 
1063 	if ((inode->i_mode & (ISUID | ISGID)) &&
1064 	    (ouid != uid || ogid != gid)) {
1065 
1066 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
1067 			inode->i_mode &= ~(ISUID | ISGID);
1068 	}
1069 	P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);
1070 
1071 	return (0);
1072 }
1073 
1074 /*
1075  * Update the in memory inode with all chmod new permissions/mode. Typically a
1076  * setattr is called to update it to server.
1077  */
1078 static int
1079 p9fs_chmod(struct vnode *vp, uint32_t  mode, struct ucred *cred, struct thread *td)
1080 {
1081 	struct p9fs_node *np;
1082 	struct p9fs_inode *inode;
1083 	uint32_t nmode;
1084 	int error;
1085 
1086 	np = P9FS_VTON(vp);
1087 	inode = &np->inode;
1088 
1089 	P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n",  __func__, vp, mode, cred, td);
1090 	/*
1091 	 * To modify the permissions on a file, must possess VADMIN
1092 	 * for that file.
1093 	 */
1094 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
1095 		return (error);
1096 
1097 	/*
1098 	 * Privileged processes may set the sticky bit on non-directories,
1099 	 * as well as set the setgid bit on a file with a group that the
1100 	 * process is not a member of. Both of these are allowed in
1101 	 * jail(8).
1102 	 */
1103 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
1104 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
1105 			return (EFTYPE);
1106 	}
1107 	if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
1108 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
1109 		if (error != 0)
1110 			return (error);
1111 	}
1112 
1113 	/*
1114 	 * Deny setting setuid if we are not the file owner.
1115 	 */
1116 	if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
1117 		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
1118 		if (error != 0)
1119 			return (error);
1120 	}
1121 	nmode = inode->i_mode;
1122 	nmode &= ~ALLPERMS;
1123 	nmode |= (mode & ALLPERMS);
1124 	inode->i_mode = nmode;
1125 
1126 	P9_DEBUG(VOPS, "%s: to mode %x  %d \n ", __func__, nmode, error);
1127 
1128 	return (error);
1129 }
1130 
1131 /*
1132  * Set the attributes of a file referenced by fid. A valid bitmask is sent
1133  * in request selecting which fields to set
1134  */
1135 static int
1136 p9fs_setattr_dotl(struct vop_setattr_args *ap)
1137 {
1138 	struct vnode *vp;
1139 	struct vattr *vap;
1140 	struct p9fs_node *node;
1141 	struct p9fs_inode *inode;
1142 	struct ucred *cred;
1143 	struct thread *td;
1144 	struct p9_iattr_dotl *p9attr;
1145 	struct p9fs_session *vses;
1146 	struct p9_fid *vfid;
1147 	uint64_t oldfilesize;
1148 	int error;
1149 
1150 	vp = ap->a_vp;
1151 	vap = ap->a_vap;
1152 	node = P9FS_VTON(vp);
1153 	inode = &node->inode;
1154 	cred = ap->a_cred;
1155 	td = curthread;
1156 	vses = node->p9fs_ses;
1157 	error = 0;
1158 
1159 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
1160 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
1161 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
1162 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
1163 		P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
1164 		return (EINVAL);
1165 	}
1166 	/* Disallow write attempts on read only filesystem */
1167 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1168 		return (EROFS);
1169 
1170 	/* Setting of flags is not supported */
1171 	if (vap->va_flags != VNOVAL)
1172 		return (EOPNOTSUPP);
1173 
1174 	/* Allocate p9attr struct */
1175 	p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
1176 	if (p9attr == NULL)
1177 		return (ENOMEM);
1178 
1179 	/* Check if we need to change the ownership of the file*/
1180 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1181 		P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
1182 		    vp, td, vap->va_uid, vap->va_gid);
1183 
1184 		error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
1185 		p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
1186 			P9PROTO_SETATTR_MODE;
1187 		if (error)
1188 			goto out;
1189 	}
1190 
1191 	/* Check for mode changes */
1192 	if (vap->va_mode != (mode_t)VNOVAL) {
1193 		P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
1194 		    vap->va_mode);
1195 
1196 		error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
1197 		p9attr->valid |= P9PROTO_SETATTR_MODE;
1198 		if (error)
1199 			goto out;
1200 	}
1201 
1202 	/* Update the size of the file and update mtime */
1203 	if (vap->va_size != (uint64_t)VNOVAL) {
1204 		P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
1205 		    vp, td, (uintmax_t)vap->va_size);
1206 		switch (vp->v_type) {
1207 			case VDIR:
1208 				error = EISDIR;
1209 				goto out;
1210 			case VLNK:
1211 			case VREG:
1212 				/* Invalidate cached pages of vp */
1213 				error = vinvalbuf(vp, 0, 0, 0);
1214 				if (error)
1215 					goto out;
1216 				oldfilesize = inode->i_size;
1217 				inode->i_size = vap->va_size;
1218 				/* Update the p9fs_inode time */
1219 				p9fs_itimes(vp);
1220 				p9attr->valid |= P9PROTO_SETATTR_SIZE |
1221 				    P9PROTO_SETATTR_ATIME |
1222 				    P9PROTO_SETATTR_MTIME |
1223 				    P9PROTO_SETATTR_ATIME_SET |
1224 				    P9PROTO_SETATTR_MTIME_SET ;
1225 				break;
1226 			default:
1227 				goto out;
1228 		}
1229 	} else if (vap->va_atime.tv_sec != VNOVAL ||
1230 		    vap->va_mtime.tv_sec != VNOVAL) {
1231 		P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
1232 		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
1233 		    (uintmax_t)vap->va_mtime.tv_sec);
1234 		/* Update the p9fs_inode times */
1235 		p9fs_itimes(vp);
1236 		p9attr->valid |= P9PROTO_SETATTR_ATIME |
1237 			P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
1238 			P9PROTO_SETATTR_MTIME_SET;
1239 	}
1240 
1241 	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
1242 	if (vfid == NULL) {
1243 		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
1244 		if (error)
1245 			goto out;
1246 	}
1247 
1248 	/* Write the inode structure values into p9attr */
1249 	p9fs_inode_to_iattr(inode, p9attr);
1250 	error = p9_client_setattr(vfid, p9attr);
1251 	if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
1252 		if (error)
1253 			inode->i_size = oldfilesize;
1254 		else
1255 			vnode_pager_setsize(vp, inode->i_size);
1256 	}
1257 out:
1258 	if (p9attr) {
1259 		uma_zfree(p9fs_setattr_zone, p9attr);
1260 	}
1261 	P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
1262 	return (error);
1263 }
1264 
1265 struct open_fid_state {
1266 	struct p9_fid *vofid;
1267 	int fflags;
1268 	int opened;
1269 };
1270 
1271 /*
1272  * TODO: change this to take P9PROTO_* mode and avoid routing through
1273  * VOP_OPEN, factoring out implementation of p9fs_open.
1274  */
1275 static int
1276 p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
1277 {
1278 	struct p9fs_node *np;
1279 	struct p9fs_session *vses;
1280 	struct p9_fid *vofid;
1281 	int mode = p9fs_uflags_mode(fflags, TRUE);
1282 	int error = 0;
1283 
1284 	statep->opened = FALSE;
1285 
1286 	np = P9FS_VTON(vp);
1287 	vses = np->p9fs_ses;
1288 	vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1289 	if (vofid == NULL) {
1290 		error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
1291 		if (error) {
1292 			return (error);
1293 		}
1294 		vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1295 		if (vofid == NULL) {
1296 			return (EBADF);
1297 		}
1298 		statep->fflags = fflags;
1299 		statep->opened = TRUE;
1300 	}
1301 	statep->vofid = vofid;
1302 	return (0);
1303 }
1304 
1305 static void
1306 p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
1307 {
1308 	if (statep->opened) {
1309 		(void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
1310 	}
1311 }
1312 
1313 /*
1314  * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
1315  * need to copy data into. As long as resid is greater than zero, we call
1316  * client_read to read data from offset(offset into the file) in the open fid
1317  * for the file into the I/O buffer. The data is read into the user data buffer.
1318  */
1319 static int
1320 p9fs_read(struct vop_read_args *ap)
1321 {
1322 	struct vnode *vp;
1323 	struct uio *uio;
1324 	struct p9fs_node *np;
1325 	uint64_t offset;
1326 	int64_t ret;
1327 	uint64_t resid;
1328 	uint32_t count;
1329 	int error;
1330 	char *io_buffer = NULL;
1331 	uint64_t filesize;
1332 	struct open_fid_state ostate;
1333 
1334 	vp = ap->a_vp;
1335 	uio = ap->a_uio;
1336 	np = P9FS_VTON(vp);
1337 	error = 0;
1338 
1339 	if (vp->v_type == VCHR || vp->v_type == VBLK)
1340 		return (EOPNOTSUPP);
1341 	if (vp->v_type != VREG)
1342 		return (EISDIR);
1343 	if (uio->uio_resid == 0)
1344 		return (0);
1345 	if (uio->uio_offset < 0)
1346 		return (EINVAL);
1347 
1348 	error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
1349 	if (error)
1350 		return (error);
1351 
1352 	/* where in the file are we to start reading */
1353 	offset = uio->uio_offset;
1354 	filesize = np->inode.i_size;
1355 	if (uio->uio_offset >= filesize)
1356 		goto out;
1357 
1358 	P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
1359 	    __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);
1360 
1361 	/* Work with a local buffer from the pool for this vop */
1362 
1363 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1364 	while ((resid = uio->uio_resid) > 0) {
1365 		if (offset >= filesize)
1366 			break;
1367 		count = MIN(filesize - uio->uio_offset , resid);
1368 		if (count == 0)
1369 			break;
1370 
1371 		/* Copy count bytes into the uio */
1372 		ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
1373 		/*
1374 		 * This is the only place in the entire p9fs where we check the
1375 		 * error for < 0 as p9_client_read/write return the number of
1376 		 * bytes instead of an error code. In this case if ret is < 0,
1377 		 * it means there is an IO error.
1378 		 */
1379 		if (ret < 0) {
1380 			error = -ret;
1381 			goto out;
1382 		}
1383 		error = uiomove(io_buffer, ret, uio);
1384 		if (error != 0)
1385 			goto out;
1386 
1387 		offset += ret;
1388 	}
1389 	uio->uio_offset = offset;
1390 out:
1391 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1392 	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1393 
1394 	return (error);
1395 }
1396 
1397 /*
1398  * The user buffer contains the data to be written. This data is copied first
1399  * from uio into I/O buffer. This I/O  buffer is used to do the client_write to
1400  * the fid of the file starting from the offset given upto count bytes. The
1401  * number of bytes written is returned to the caller.
1402  */
1403 static int
1404 p9fs_write(struct vop_write_args *ap)
1405 {
1406 	struct vnode *vp;
1407 	struct uio *uio;
1408 	struct p9fs_node *np;
1409 	uint64_t off, offset;
1410 	int64_t ret;
1411 	uint64_t resid, bytes_written;
1412 	uint32_t count;
1413 	int error, ioflag;
1414 	uint64_t file_size;
1415 	char *io_buffer = NULL;
1416 	struct open_fid_state ostate;
1417 
1418 	vp = ap->a_vp;
1419 	uio = ap->a_uio;
1420 	np = P9FS_VTON(vp);
1421 	error = 0;
1422 	ioflag = ap->a_ioflag;
1423 
1424 	error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
1425 	if (error)
1426 		return (error);
1427 
1428 	P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
1429 	    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1430 
1431 	if (uio->uio_offset < 0) {
1432 		error = EINVAL;
1433 		goto out;
1434 	}
1435 	if (uio->uio_resid == 0)
1436 		goto out;
1437 
1438 	file_size = np->inode.i_size;
1439 
1440 	switch (vp->v_type) {
1441 	case VREG:
1442 		if (ioflag & IO_APPEND)
1443 			uio->uio_offset = file_size;
1444 		break;
1445 	case VDIR:
1446 		return (EISDIR);
1447 	case VLNK:
1448 		break;
1449 	default:
1450 		panic("%s: bad file type vp: %p", __func__, vp);
1451 	}
1452 
1453 	resid = uio->uio_resid;
1454 	offset = uio->uio_offset;
1455 	bytes_written = 0;
1456 	error = 0;
1457 
1458 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1459 	while ((resid = uio->uio_resid) > 0) {
1460                 off = 0;
1461 		count = MIN(resid, P9FS_IOUNIT);
1462 		error = uiomove(io_buffer, count, uio);
1463 
1464 		if (error != 0) {
1465 			P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
1466 			goto out;
1467 		}
1468 
1469 		/* While count still exists, keep writing.*/
1470 		while (count > 0) {
1471 			/* Copy count bytes from the uio */
1472 			ret = p9_client_write(ostate.vofid, offset, count,
1473                                 io_buffer + off);
1474 			if (ret < 0) {
1475 				if (bytes_written == 0) {
1476 					error = -ret;
1477 					goto out;
1478 				} else {
1479 					break;
1480 				}
1481 			}
1482 			P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
1483 			    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1484 
1485                         off += ret;
1486 			offset += ret;
1487 			bytes_written += ret;
1488 			count -= ret;
1489 		}
1490 	}
1491 	/* Update the fields in the node to reflect the change*/
1492 	if (file_size < uio->uio_offset + uio->uio_resid) {
1493 		np->inode.i_size = uio->uio_offset + uio->uio_resid;
1494 		vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
1495 	}
1496 out:
1497 	if (io_buffer)
1498 		uma_zfree(p9fs_io_buffer_zone, io_buffer);
1499 	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1500 
1501 	return (error);
1502 }
1503 
1504 /*
1505  * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
1506  * client_remove op to send messages to remove the node's fid on the server.
1507  * After that, does a node metadata cleanup on client side.
1508  */
1509 static int
1510 remove_common(struct p9fs_node *np, struct ucred *cred)
1511 {
1512 	int error;
1513 	struct p9fs_session *vses;
1514 	struct vnode *vp;
1515 	struct p9_fid *vfid;
1516 
1517 	error = 0;
1518 	vses = np->p9fs_ses;
1519 	vp = P9FS_NTOV(np);
1520 
1521 	vfid = p9fs_get_fid(vses->clnt, np, cred, VFID, -1, &error);
1522 	if (error != 0)
1523 		return (error);
1524 
1525 	error = p9_client_remove(vfid);
1526 	if (error != 0)
1527 		return (error);
1528 
1529 	/* Remove all non-open fids associated with the vp */
1530 	p9fs_fid_remove_all(np, TRUE);
1531 
1532 	/* Invalidate all entries of vnode from name cache and hash list. */
1533 	cache_purge(vp);
1534 
1535 	vfs_hash_remove(vp);
1536 	np->flags |= P9FS_NODE_DELETED;
1537 
1538 	return (error);
1539 }
1540 
1541 /* Remove vop for all files. Call common code for remove and adjust links */
1542 static int
1543 p9fs_remove(struct vop_remove_args *ap)
1544 {
1545 	struct vnode *vp;
1546 	struct p9fs_node *np;
1547 	struct vnode *dvp;
1548 	struct p9fs_node *dnp;
1549 	struct p9fs_inode *dinode;
1550 	int error;
1551 
1552 	vp = ap->a_vp;
1553 	np = P9FS_VTON(vp);
1554 	dvp = ap->a_dvp;
1555 	dnp = P9FS_VTON(dvp);
1556 	dinode = &dnp->inode;
1557 
1558 	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1559 
1560 	if (vp->v_type == VDIR)
1561 		return (EISDIR);
1562 
1563 	error = remove_common(np, ap->a_cnp->cn_cred);
1564 	if (error == 0)
1565 		P9FS_DECR_LINKS(dinode);
1566 
1567 	return (error);
1568 }
1569 
1570 /* Remove vop for all directories. Call common code for remove and adjust links */
1571 static int
1572 p9fs_rmdir(struct vop_rmdir_args *ap)
1573 {
1574 	struct vnode *vp;
1575 	struct p9fs_node *np;
1576 	struct vnode *dvp;
1577 	struct p9fs_node *dnp;
1578 	struct p9fs_inode *dinode;
1579 	int error;
1580 
1581 	vp = ap->a_vp;
1582 	np = P9FS_VTON(vp);
1583 	dvp = ap->a_dvp;
1584 	dnp = P9FS_VTON(dvp);
1585 	dinode = &dnp->inode;
1586 
1587 	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1588 
1589 	error = remove_common(np, ap->a_cnp->cn_cred);
1590 	if (error == 0)
1591 		P9FS_DECR_LINKS(dinode);
1592 
1593 	return (error);
1594 }
1595 
1596 /*
1597  * Create symlinks. Make the permissions and call create_common code
1598  * for Soft links.
1599  */
1600 static int
1601 p9fs_symlink(struct vop_symlink_args *ap)
1602 {
1603 	struct vnode *dvp;
1604 	struct vnode **vpp;
1605 	struct vattr *vap;
1606 	struct componentname *cnp;
1607 	char *symtgt;
1608 	struct p9fs_node *dnp;
1609 	struct p9fs_session *vses;
1610 	struct mount *mp;
1611 	struct p9_fid *dvfid, *newfid;
1612 	int error;
1613 	char tmpchr;
1614 	gid_t gid;
1615 
1616 	dvp = ap->a_dvp;
1617 	vpp = ap->a_vpp;
1618 	vap = ap->a_vap;
1619 	cnp = ap->a_cnp;
1620 	symtgt = (char*)(uintptr_t) ap->a_target;
1621 	dnp = P9FS_VTON(dvp);
1622 	vses = dnp->p9fs_ses;
1623 	mp = vses->p9fs_mount;
1624 	newfid = NULL;
1625 	error = 0;
1626 	gid = vap->va_gid;
1627 
1628 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
1629 
1630 	/*
1631 	 * Save the character present at namelen in nameptr string and
1632 	 * null terminate the character to get the search name for p9_dir_walk
1633 	 */
1634 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
1635 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
1636 
1637 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1638 	if (error != 0)
1639 		goto out;
1640 
1641 	error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
1642 	if (error != 0)
1643 		goto out;
1644 
1645 	/*create vnode for symtgt */
1646 	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
1647 	if (newfid != NULL) {
1648 		error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
1649 		    dnp, newfid, vpp, cnp->cn_nameptr);
1650 		if (error != 0)
1651 			goto out;
1652 	} else
1653 		goto out;
1654 
1655 	if ((cnp->cn_flags & MAKEENTRY) != 0) {
1656 		cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
1657 	}
1658 	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
1659 	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
1660 
1661 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1662 	return (error);
1663 
1664 out:
1665 	if (newfid != NULL)
1666 		p9_client_clunk(newfid);
1667 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1668 	return (error);
1669 }
1670 
1671 /* Create hard link */
1672 static int
1673 p9fs_link(struct vop_link_args *ap)
1674 {
1675 	struct vnode *vp;
1676 	struct vnode *tdvp;
1677 	struct componentname *cnp;
1678 	struct p9fs_node *dnp;
1679 	struct p9fs_node *np;
1680 	struct p9fs_inode *inode;
1681 	struct p9fs_session *vses;
1682 	struct p9_fid *dvfid, *oldvfid;
1683 	int error;
1684 
1685 	vp = ap->a_vp;
1686 	tdvp = ap->a_tdvp;
1687 	cnp = ap->a_cnp;
1688 	dnp = P9FS_VTON(tdvp);
1689 	np = P9FS_VTON(vp);
1690 	inode = &np->inode;
1691 	vses = np->p9fs_ses;
1692 	error = 0;
1693 
1694 	P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);
1695 
1696 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1697 	if (error != 0)
1698 		return (error);
1699 	oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
1700 	if (error != 0)
1701 		return (error);
1702 
1703 	error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
1704 	if (error != 0)
1705 		return (error);
1706 	/* Increment ref count on the inode */
1707 	P9FS_INCR_LINKS(inode);
1708 
1709 	return (0);
1710 }
1711 
1712 /* Read contents of the symbolic link */
1713 static int
1714 p9fs_readlink(struct vop_readlink_args *ap)
1715 {
1716 	struct vnode *vp;
1717 	struct uio *uio;
1718 	struct p9fs_node *dnp;
1719 	struct p9fs_session *vses;
1720 	struct p9_fid *dvfid;
1721 	int error, len;
1722 	char *target;
1723 
1724 	vp = ap->a_vp;
1725 	uio = ap->a_uio;
1726 	dnp = P9FS_VTON(vp);
1727 	vses = dnp->p9fs_ses;
1728 	error = 0;
1729 
1730 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
1731 
1732 	dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
1733 	if (error != 0)
1734 		return (error);
1735 
1736 	error = p9_readlink(dvfid, &target);
1737 	if (error != 0)
1738 		return (error);
1739 
1740 	len = strlen(target);
1741 	error = uiomove(target, len, uio);
1742 
1743 	return (0);
1744 }
1745 
1746 /*
1747  * Iterate through a directory. An entire 8k data is read into the I/O buffer.
1748  * This buffer is parsed to make dir entries and fed to the user buffer to
1749  * complete it to the VFS.
1750  */
1751 static int
1752 p9fs_readdir(struct vop_readdir_args *ap)
1753 {
1754 	struct uio *uio;
1755 	struct vnode *vp;
1756 	struct dirent cde;
1757 	int64_t offset;
1758 	uint64_t diroffset;
1759 	struct p9fs_node *np;
1760 	int error;
1761 	int32_t count;
1762 	struct p9_client *clnt;
1763 	struct p9_dirent dent;
1764 	char *io_buffer;
1765 	struct p9_fid *vofid;
1766 
1767 	uio = ap->a_uio;
1768 	vp = ap->a_vp;
1769 	np = P9FS_VTON(ap->a_vp);
1770 	offset = 0;
1771 	diroffset = 0;
1772 	error = 0;
1773 	count = 0;
1774 	clnt = np->p9fs_ses->clnt;
1775 
1776 	P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);
1777 
1778 	if (ap->a_uio->uio_iov->iov_len <= 0)
1779 		return (EINVAL);
1780 
1781 	if (vp->v_type != VDIR)
1782 		return (ENOTDIR);
1783 
1784 	vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
1785 	if (vofid == NULL) {
1786 		P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
1787 		return (EBADF);
1788 	}
1789 
1790 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
1791 
1792 	/* We haven't reached the end yet. read more. */
1793 	diroffset = uio->uio_offset;
1794 	while (uio->uio_resid >= sizeof(struct dirent)) {
1795 		/*
1796 		 * We need to read more data as what is indicated by filesize because
1797 		 * filesize is based on data stored in struct dirent structure but
1798 		 * we read data in struct p9_dirent format which has different size.
1799 		 * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
1800 		 * it into struct dirent structure and send it back.
1801 		 */
1802 		count = P9FS_IOUNIT;
1803 		bzero(io_buffer, P9FS_MTU);
1804 		count = p9_client_readdir(vofid, (char *)io_buffer,
1805 		    diroffset, count);
1806 
1807 		if (count == 0)
1808 			break;
1809 
1810 		if (count < 0) {
1811 			error = EIO;
1812 			goto out;
1813 		}
1814 
1815 		offset = 0;
1816 		while (offset + QEMU_DIRENTRY_SZ <= count) {
1817 
1818 			/*
1819 			 * Read and make sense out of the buffer in one dirent
1820 			 * This is part of 9p protocol read. This reads one p9_dirent,
1821 			 * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
1822 			 */
1823 			bzero(&dent, sizeof(dent));
1824 			offset = p9_dirent_read(clnt, io_buffer, offset, count,
1825 				&dent);
1826 			if (offset < 0 || offset > count) {
1827 				error = EIO;
1828 				goto out;
1829 			}
1830 
1831 			bzero(&cde, sizeof(cde));
1832 			strncpy(cde.d_name, dent.d_name, dent.len);
1833 			cde.d_fileno = dent.qid.path;
1834 			cde.d_type = dent.d_type;
1835 			cde.d_namlen = dent.len;
1836 			cde.d_reclen = GENERIC_DIRSIZ(&cde);
1837 
1838                         /*
1839                          * If there isn't enough space in the uio to return a
1840                          * whole dirent, break off read
1841                          */
1842                         if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
1843                                 break;
1844 
1845 			/* Transfer */
1846 			error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
1847 			if (error != 0) {
1848 				error = EIO;
1849 				goto out;
1850 			}
1851 			diroffset = dent.d_off;
1852 		}
1853 	}
1854 	/* Pass on last transferred offset */
1855 	uio->uio_offset = diroffset;
1856 
1857 out:
1858 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1859 
1860 	return (error);
1861 }
1862 
1863 static void
1864 p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
1865 {
1866 	struct uio *uiov;
1867 	struct iovec io;
1868 	int error;
1869 	uint64_t off, offset;
1870 	uint64_t filesize;
1871 	uint64_t resid;
1872 	uint32_t count;
1873 	int64_t ret;
1874 	struct p9fs_node *np;
1875 	char *io_buffer;
1876 
1877 	error = 0;
1878 	np = P9FS_VTON(vp);
1879 
1880 	filesize = np->inode.i_size;
1881 	uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
1882 	uiov->uio_iov = &io;
1883 	uiov->uio_iovcnt = 1;
1884 	uiov->uio_segflg = UIO_SYSSPACE;
1885 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1886 
1887 	if (bp->b_iocmd == BIO_READ) {
1888 		io.iov_len = uiov->uio_resid = bp->b_bcount;
1889 		io.iov_base = bp->b_data;
1890 		uiov->uio_rw = UIO_READ;
1891 
1892 		switch (vp->v_type) {
1893 
1894 		case VREG:
1895 		{
1896 			uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1897 
1898 			if (uiov->uio_resid) {
1899 				int left = uiov->uio_resid;
1900 				int nread = bp->b_bcount - left;
1901 
1902 				if (left > 0)
1903 					bzero((char *)bp->b_data + nread, left);
1904 			}
1905 			/* where in the file are we to start reading */
1906 			offset = uiov->uio_offset;
1907 			if (uiov->uio_offset >= filesize)
1908 				goto out;
1909 
1910 			while ((resid = uiov->uio_resid) > 0) {
1911 				if (offset >= filesize)
1912 					break;
1913 				count = min(filesize - uiov->uio_offset, resid);
1914 				if (count == 0)
1915 					break;
1916 
1917 				P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
1918 				    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
1919 
1920 				/* Copy count bytes into the uio */
1921 				ret = p9_client_read(vofid, offset, count, io_buffer);
1922 				error = uiomove(io_buffer, ret, uiov);
1923 
1924 				if (error != 0)
1925 					goto out;
1926 				offset += ret;
1927 			}
1928 			break;
1929 		}
1930 		default:
1931 			printf("vfs:  type %x unexpected\n", vp->v_type);
1932 			break;
1933 		}
1934 	} else {
1935 		if (bp->b_dirtyend > bp->b_dirtyoff) {
1936 			io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
1937 			uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
1938 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1939 			uiov->uio_rw = UIO_WRITE;
1940 
1941 			if (uiov->uio_offset < 0) {
1942 				error = EINVAL;
1943 				goto out;
1944 			}
1945 
1946 			if (uiov->uio_resid == 0)
1947 				goto out;
1948 
1949 			resid = uiov->uio_resid;
1950 			offset = uiov->uio_offset;
1951 			error = 0;
1952 
1953 			while ((resid = uiov->uio_resid) > 0) {
1954                                 off = 0;
1955 				count = MIN(resid, P9FS_IOUNIT);
1956 				error = uiomove(io_buffer, count, uiov);
1957 				if (error != 0) {
1958 					goto out;
1959 				}
1960 
1961 				while (count > 0) {
1962 					/* Copy count bytes from the uio */
1963 					ret = p9_client_write(vofid, offset, count,
1964                                                 io_buffer + off);
1965 					if (ret < 0)
1966 						goto out;
1967 
1968 					P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
1969 					    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
1970                                         off += ret;
1971 					offset += ret;
1972 					count -= ret;
1973 				}
1974 			}
1975 
1976 			/* Update the fields in the node to reflect the change */
1977 			if (filesize < uiov->uio_offset + uiov->uio_resid) {
1978 				np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
1979 				vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
1980 				/* update the modified timers. */
1981 				p9fs_itimes(vp);
1982 			}
1983 		} else {
1984 			 bp->b_resid = 0;
1985 			 goto out1;
1986 		}
1987 	}
1988 out:
1989 	/* Set the error */
1990 	if (error != 0) {
1991 		bp->b_error = error;
1992 		bp->b_ioflags |= BIO_ERROR;
1993 	}
1994 	bp->b_resid = uiov->uio_resid;
1995 out1:
1996 	bufdone(bp);
1997 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1998 	free(uiov, M_P9UIOV);
1999 }
2000 
2001 /*
2002  * The I/O buffer is mapped to a uio and a client_write/client_read is performed
2003  * the same way as p9fs_read and p9fs_write.
2004  */
2005 static int
2006 p9fs_strategy(struct vop_strategy_args *ap)
2007 {
2008 	struct vnode *vp;
2009 	struct buf *bp;
2010 	struct ucred *cr;
2011 	int error;
2012 	struct open_fid_state ostate;
2013 
2014 	vp = ap->a_vp;
2015 	bp = ap->a_bp;
2016 	error = 0;
2017 
2018 	P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);
2019 
2020 	if (bp->b_iocmd == BIO_READ)
2021 		cr = bp->b_rcred;
2022 	else
2023 		cr = bp->b_wcred;
2024 
2025 	error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
2026 	if (error) {
2027 		P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
2028 		bp->b_error = error;
2029 		bp->b_ioflags |= BIO_ERROR;
2030 		bufdone(bp);
2031 		return (0);
2032 	}
2033 
2034 	p9fs_doio(vp, bp, ostate.vofid, cr);
2035 	p9fs_release_open_fid(vp, cr, &ostate);
2036 
2037 	return (0);
2038 }
2039 
2040 /* Rename a file */
2041 static int
2042 p9fs_rename(struct vop_rename_args *ap)
2043 {
2044 	struct vnode *tvp;
2045 	struct vnode *tdvp;
2046 	struct vnode *fvp;
2047 	struct vnode *fdvp;
2048 	struct componentname *tcnp;
2049 	struct componentname *fcnp;
2050 	struct p9fs_node *tdnode;
2051 	struct p9fs_node *fdnode;
2052 	struct p9fs_inode *fdinode;
2053 	struct p9fs_node *fnode;
2054 	struct p9fs_inode *finode;
2055 	struct p9fs_session *vses;
2056 	struct p9fs_node *tnode;
2057 	struct p9fs_inode *tinode;
2058 	struct p9_fid *olddirvfid, *newdirvfid ;
2059 	int error;
2060 
2061 	tvp = ap->a_tvp;
2062 	tdvp = ap->a_tdvp;
2063 	fvp = ap->a_fvp;
2064 	fdvp = ap->a_fdvp;
2065 	tcnp = ap->a_tcnp;
2066 	fcnp = ap->a_fcnp;
2067 	tdnode = P9FS_VTON(tdvp);
2068 	fdnode = P9FS_VTON(fdvp);
2069 	fdinode = &fdnode->inode;
2070 	fnode = P9FS_VTON(fvp);
2071 	finode = &fnode->inode;
2072 	vses = fnode->p9fs_ses;
2073 	error = 0;
2074 
2075 	P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);
2076 
2077 	/* Check for cross mount operation */
2078 	if (fvp->v_mount != tdvp->v_mount ||
2079 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
2080 		error = EXDEV;
2081 		goto out;
2082 	}
2083 
2084 	/* warning  if you are renaming to the same name */
2085 	if (fvp == tvp)
2086 		error = 0;
2087 
2088 	olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
2089 	if (error != 0)
2090 		goto out;
2091 	newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
2092 	if (error != 0)
2093 		goto out;
2094 
2095 	error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
2096 	if (error != 0)
2097 		goto out;
2098 
2099 	/*
2100 	 * decrement the link count on the "from" file whose name is going
2101 	 * to be changed if its a directory
2102 	 */
2103 	if (fvp->v_type == VDIR) {
2104 		if (tvp && tvp->v_type == VDIR)
2105 			cache_purge(tdvp);
2106 		P9FS_DECR_LINKS(fdinode);
2107 		cache_purge(fdvp);
2108 	}
2109 
2110 	/* Taking exclusive lock on the from node before decrementing the link count */
2111 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
2112 		goto out;
2113 	P9FS_DECR_LINKS(finode);
2114 	VOP_UNLOCK(fvp);
2115 
2116 	if (tvp) {
2117 		tnode = P9FS_VTON(tvp);
2118 		tinode = &tnode->inode;
2119 		P9FS_DECR_LINKS(tinode);
2120 	}
2121 
2122 out:
2123 	if (tdvp == tvp)
2124 		vrele(tdvp);
2125 	else
2126 		vput(tdvp);
2127 	if (tvp)
2128 		vput(tvp);
2129 	vrele(fdvp);
2130 	vrele(fvp);
2131 	return (error);
2132 }
2133 
2134 /*
2135  * Put VM pages, synchronously.
2136  * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
2137  */
2138 static int
2139 p9fs_putpages(struct vop_putpages_args *ap)
2140 {
2141 	struct uio uio;
2142 	struct iovec iov;
2143 	int i, error, npages, count;
2144 	off_t offset;
2145 	int *rtvals;
2146 	struct vnode *vp;
2147 	struct thread *td;
2148 	struct ucred *cred;
2149 	struct p9fs_node *np;
2150 	vm_page_t *pages;
2151 	vm_offset_t kva;
2152 	struct buf *bp;
2153 
2154 	vp = ap->a_vp;
2155 	np = P9FS_VTON(vp);
2156 	td = curthread;
2157 	cred = curthread->td_ucred;
2158 	pages = ap->a_m;
2159 	count = ap->a_count;
2160 	rtvals = ap->a_rtvals;
2161 	npages = btoc(count);
2162 	offset = IDX_TO_OFF(pages[0]->pindex);
2163 
2164 	/*
2165 	 * When putting pages, do not extend file past EOF.
2166 	 */
2167 	if (offset + count > np->inode.i_size) {
2168 		count = np->inode.i_size - offset;
2169 		if (count < 0)
2170 			count = 0;
2171 	}
2172 
2173 	for (i = 0; i < npages; i++)
2174 		rtvals[i] = VM_PAGER_ERROR;
2175 
2176 	bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
2177 	kva = (vm_offset_t) bp->b_data;
2178 	pmap_qenter(kva, pages, npages);
2179 
2180 	VM_CNT_INC(v_vnodeout);
2181 	VM_CNT_ADD(v_vnodepgsout, count);
2182 
2183 	iov.iov_base = (caddr_t) kva;
2184 	iov.iov_len = count;
2185 	uio.uio_iov = &iov;
2186 	uio.uio_iovcnt = 1;
2187 	uio.uio_offset = offset;
2188 	uio.uio_resid = count;
2189 	uio.uio_segflg = UIO_SYSSPACE;
2190 	uio.uio_rw = UIO_WRITE;
2191 	uio.uio_td = td;
2192 
2193 	P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);
2194 
2195 	error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
2196 	    cred);
2197 
2198 	pmap_qremove(kva, npages);
2199 	uma_zfree(p9fs_pbuf_zone, bp);
2200 
2201 	if (error == 0)
2202 		vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
2203 		    np->inode.i_size - offset, npages * PAGE_SIZE);
2204 
2205 	return (rtvals[0]);
2206 }
2207 
2208 struct vop_vector p9fs_vnops = {
2209 	.vop_default =		&default_vnodeops,
2210 	.vop_lookup =		p9fs_lookup,
2211 	.vop_open =		p9fs_open,
2212 	.vop_close =		p9fs_close,
2213 	.vop_access =		p9fs_access,
2214 	.vop_getattr =		p9fs_getattr_dotl,
2215 	.vop_setattr =		p9fs_setattr_dotl,
2216 	.vop_reclaim =		p9fs_reclaim,
2217 	.vop_inactive =		p9fs_inactive,
2218 	.vop_readdir =		p9fs_readdir,
2219 	.vop_create =		p9fs_create,
2220 	.vop_mknod =		p9fs_mknod,
2221 	.vop_read =		p9fs_read,
2222 	.vop_write =		p9fs_write,
2223 	.vop_remove =		p9fs_remove,
2224 	.vop_mkdir =		p9fs_mkdir,
2225 	.vop_rmdir =		p9fs_rmdir,
2226 	.vop_strategy =		p9fs_strategy,
2227 	.vop_symlink =		p9fs_symlink,
2228 	.vop_rename =           p9fs_rename,
2229 	.vop_link =		p9fs_link,
2230 	.vop_readlink =		p9fs_readlink,
2231 	.vop_putpages =		p9fs_putpages,
2232 };
2233 VFS_VOP_VECTOR_REGISTER(p9fs_vnops);
2234