xref: /freebsd/sys/fs/p9fs/p9fs_vnops.c (revision 40d59ee35dc106cda88d66e37527975a32596cd7)
1 /*
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9 *	notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *	notice, this list of conditions and the following disclaimer in the
12  *	documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /* This file contains VFS file ops for the 9P protocol.
28  * This makes the upper layer of the p9fs driver. These functions interact
29  * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
30  * the user file operations are handled here.
31  */
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/buf.h>
36 #include <sys/dirent.h>
37 #include <sys/fcntl.h>
38 #include <sys/namei.h>
39 #include <sys/priv.h>
40 #include <sys/rwlock.h>
41 #include <sys/stat.h>
42 #include <sys/syslimits.h>
43 #include <sys/unistd.h>
44 #include <sys/vmmeter.h>
45 #include <sys/vnode.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_extern.h>
49 #include <vm/vm_object.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_pager.h>
52 #include <vm/vnode_pager.h>
53 
54 #include <fs/p9fs/p9_client.h>
55 #include <fs/p9fs/p9_debug.h>
56 #include <fs/p9fs/p9fs.h>
57 #include <fs/p9fs/p9fs_proto.h>
58 
59 /* File permissions. */
60 #define IEXEC		0000100 /* Executable. */
61 #define IWRITE		0000200 /* Writeable. */
62 #define IREAD		0000400 /* Readable. */
63 #define ISVTX		0001000 /* Sticky bit. */
64 #define ISGID		0002000 /* Set-gid. */
65 #define ISUID		0004000 /* Set-uid. */
66 
67 static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
68 extern uma_zone_t p9fs_io_buffer_zone;
69 extern uma_zone_t p9fs_getattr_zone;
70 extern uma_zone_t p9fs_setattr_zone;
71 extern uma_zone_t p9fs_pbuf_zone;
72 /* For the root vnode's vnops. */
73 struct vop_vector p9fs_vnops;
74 
75 static uint32_t p9fs_unix2p9_mode(uint32_t mode);
76 
77 static void
p9fs_itimes(struct vnode * vp)78 p9fs_itimes(struct vnode *vp)
79 {
80 	struct p9fs_node *node;
81 	struct timespec ts;
82 	struct p9fs_inode *inode;
83 
84 	node = P9FS_VTON(vp);
85 	inode = &node->inode;
86 
87 	vfs_timestamp(&ts);
88 	inode->i_mtime = ts.tv_sec;
89 }
90 
91 /*
92  * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
93  * The cleanup includes invalidating all cache entries for the vnode,
94  * destroying the vobject, removing vnode from hashlist, removing p9fs node
95  * from the list of session p9fs nodes, and disposing of the p9fs node.
96  * Basically it is doing a reverse of what a create/vget does.
97  */
98 void
p9fs_cleanup(struct p9fs_node * np)99 p9fs_cleanup(struct p9fs_node *np)
100 {
101 	struct vnode *vp;
102 	struct p9fs_session *vses;
103 
104 	if (np == NULL)
105 		return;
106 
107 	vp = P9FS_NTOV(np);
108 	vses = np->p9fs_ses;
109 
110 	/* Remove the vnode from hash list if vnode is not already deleted */
111 	if ((np->flags & P9FS_NODE_DELETED) == 0)
112 		vfs_hash_remove(vp);
113 
114 	P9FS_LOCK(vses);
115 	if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
116 		P9FS_NODE_CLRF(np, P9FS_NODE_IN_SESSION);
117 		STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
118 	} else {
119 		P9FS_UNLOCK(vses);
120 		return;
121 	}
122 	P9FS_UNLOCK(vses);
123 
124 	/* Invalidate all entries to a particular vnode. */
125 	cache_purge(vp);
126 
127 	/* Destroy the vm object and flush associated pages. */
128 	vnode_destroy_vobject(vp);
129 
130 	/* Remove all the FID */
131 	p9fs_fid_remove_all(np, FALSE);
132 
133 	/* Dispose all node knowledge.*/
134 	p9fs_destroy_node(&np);
135 }
136 
137 /*
138  * Reclaim VOP is defined to be called for every vnode. This starts off
139  * the cleanup by clunking(remove the fid on the server) and calls
140  * p9fs_cleanup to free all the resources allocated for p9fs node.
141  */
142 static int
p9fs_reclaim(struct vop_reclaim_args * ap)143 p9fs_reclaim(struct vop_reclaim_args *ap)
144 {
145 	struct vnode *vp;
146 	struct p9fs_node *np;
147 
148 	vp = ap->a_vp;
149 	np = P9FS_VTON(vp);
150 
151 	P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
152 	p9fs_cleanup(np);
153 
154 	return (0);
155 }
156 
157 /*
158  * recycle vnodes which are no longer referenced i.e, their usecount is zero
159  */
160 static int
p9fs_inactive(struct vop_inactive_args * ap)161 p9fs_inactive(struct vop_inactive_args *ap)
162 {
163 	struct vnode *vp;
164 	struct p9fs_node *np;
165 
166 	vp = ap->a_vp;
167 	np = P9FS_VTON(vp);
168 
169 	P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
170 	if (np->flags & P9FS_NODE_DELETED)
171 		vrecycle(vp);
172 
173 	return (0);
174 }
175 
176 struct p9fs_lookup_alloc_arg {
177 	struct componentname *cnp;
178 	struct p9fs_node *dnp;
179 	struct p9_fid *newfid;
180 };
181 
182 /* Callback for vn_get_ino */
183 static int
p9fs_lookup_alloc(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)184 p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
185 {
186 	struct p9fs_lookup_alloc_arg *p9aa = arg;
187 
188 	return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
189 		p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
190 }
191 
192 /*
193  * p9fs_lookup is called for every component name that is being searched for.
194  *
195  * I. If component is found on the server, we look for the in-memory
196  *    repesentation(vnode) of this component in namecache.
197  *    A. If the node is found in the namecache, we check is the vnode is still
198  *	 valid.
199  *	 1. If it is still valid, return vnode.
200  *	 2. If it is not valid, we remove this vnode from the name cache and
201  *	    create a new vnode for the component and return that vnode.
202  *    B. If the vnode is not found in the namecache, we look for it in the
203  *       hash list.
204  *       1. If the vnode is in the hash list, we check if the vnode is still
205  *	    valid.
206  *	    a. If it is still valid, we add that vnode to the namecache for
207  *	       future lookups and return the vnode.
208  *	    b. If it is not valid, create a new vnode and p9fs node,
209  *	       initialize them and return the vnode.
210  *	 2. If the vnode is not found in the hash list, we create a new vnode
211  *	    and p9fs node, initialize them and return the vnode.
212  * II. If the component is not found on the server, an error code is returned.
213  *     A. For the creation case, we return EJUSTRETURN so VFS can handle it.
214  *     B. For all other cases, ENOENT is returned.
215  */
216 static int
p9fs_lookup(struct vop_lookup_args * ap)217 p9fs_lookup(struct vop_lookup_args *ap)
218 {
219 	struct vnode *dvp;
220 	struct vnode **vpp, *vp;
221 	struct componentname *cnp;
222 	struct p9fs_node *dnp; /*dir p9_node */
223 	struct p9fs_node *np;
224 	struct p9fs_session *vses;
225 	struct mount *mp; /* Get the mount point */
226 	struct p9_fid *dvfid, *newfid;
227 	uint64_t flags;
228 	int error;
229 	struct vattr vattr;
230 	char tmpchr;
231 
232 	dvp = ap->a_dvp;
233 	vpp = ap->a_vpp;
234 	cnp = ap->a_cnp;
235 	dnp = P9FS_VTON(dvp);
236 	error = 0;
237 	flags = cnp->cn_flags;
238 	*vpp = NULL;
239 
240 	if (dnp == NULL)
241 		return (ENOENT);
242 
243 	if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) {
244 		vref(dvp);
245 		*vpp = dvp;
246 		return (0);
247 	}
248 
249 	vses = dnp->p9fs_ses;
250 	mp = vses->p9fs_mount;
251 
252 	/* Do the cache part ourselves */
253 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
254 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
255 		return (EROFS);
256 
257 	if (dvp->v_type != VDIR)
258 		return (ENOTDIR);
259 
260 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
261 	if (error)
262 		return (error);
263 
264 	/* Do the directory walk on host to check if file exist */
265 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
266 	if (error)
267 		return (error);
268 
269 	/*
270 	 * Save the character present at namelen in nameptr string and
271 	 * null terminate the character to get the search name for p9_dir_walk
272 	 * This is done to handle when lookup is for "a" and component
273 	 * name contains a/b/c
274 	 */
275 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
276 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
277 
278 	/*
279 	 * If the client_walk fails, it means the file looking for doesnt exist.
280 	 * Create the file is the flags are set or just return the error
281 	 */
282 	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
283 
284 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
285 
286 	if (error != 0 || newfid == NULL) {
287 		/* Clunk the newfid if it is not NULL */
288 		if (newfid != NULL)
289 			p9_client_clunk(newfid);
290 
291 		if (error != ENOENT)
292 			return (error);
293 
294 		/* The requested file was not found. */
295 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
296 		    (flags & ISLASTCN)) {
297 
298 			if (mp->mnt_flag & MNT_RDONLY)
299 				return (EROFS);
300 
301 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
302 			    curthread);
303 			if (!error) {
304 				return (EJUSTRETURN);
305 			}
306 		}
307 		return (error);
308 	}
309 
310 	/* Look for the entry in the component cache*/
311 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
312 	if (error > 0 && error != ENOENT) {
313 		P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
314 		goto out;
315 	}
316 
317 	if (error == -1) {
318 		vp = *vpp;
319 		/* Check if the entry in cache is stale or not */
320 		if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
321 		    ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
322 			goto out;
323 		}
324 		/*
325 		 * This case, we have an error coming from getattr,
326 		 * act accordingly.
327 		 */
328 		cache_purge(vp);
329 		if (dvp != vp)
330 			vput(vp);
331 		else
332 			vrele(vp);
333 
334 		*vpp = NULL;
335 	} else if (error == ENOENT) {
336 		if (VN_IS_DOOMED(dvp))
337 			goto out;
338 		if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
339 			error = ENOENT;
340 			goto out;
341 		}
342 		cache_purge_negative(dvp);
343 	}
344 	/* Reset values */
345 	error = 0;
346 	vp = NULL;
347 
348 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
349 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
350 
351 	/*
352 	 * Looks like we have found an entry. Now take care of all other cases.
353 	 */
354 	if (flags & ISDOTDOT) {
355 		struct p9fs_lookup_alloc_arg p9aa;
356 		p9aa.cnp = cnp;
357 		p9aa.dnp = dnp;
358 		p9aa.newfid = newfid;
359 		error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
360 		if (error)
361 			goto out;
362 		*vpp = vp;
363 	} else {
364 		/*
365 		 * client_walk is equivalent to searching a component name in a
366 		 * directory(fid) here. If new fid is returned, we have found an
367 		 * entry for this component name so, go and create the rest of
368 		 * the vnode infra(vget_common) for the returned newfid.
369 		 */
370 		if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
371 		    && (flags & ISLASTCN)) {
372 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
373 			    curthread);
374 			if (error)
375 				goto out;
376 
377 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
378 			    dnp, newfid, &vp, cnp->cn_nameptr);
379 			if (error)
380 				goto out;
381 
382 			*vpp = vp;
383 			np = P9FS_VTON(vp);
384 			if ((dnp->inode.i_mode & ISVTX) &&
385 			    cnp->cn_cred->cr_uid != 0 &&
386 			    cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
387 			    cnp->cn_cred->cr_uid != np->inode.n_uid) {
388 				vput(*vpp);
389 				*vpp = NULL;
390 				cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
391 				return (EPERM);
392 			}
393 		} else {
394 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
395 			    dnp, newfid, &vp, cnp->cn_nameptr);
396 			if (error)
397 				goto out;
398 			*vpp = vp;
399 		}
400 	}
401 
402 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
403 
404 	/* Store the result the cache if MAKEENTRY is specified in flags */
405 	if ((cnp->cn_flags & MAKEENTRY) != 0)
406 		cache_enter(dvp, *vpp, cnp);
407 	return (error);
408 out:
409 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
410 	p9_client_clunk(newfid);
411 	return (error);
412 }
413 
414 /*
415  * Common creation function for file/directory with respective flags. We first
416  * open the parent directory in order to create the file under it. For this,
417  * as 9P protocol suggests, we need to call client_walk to create the open fid.
418  * Once we have the open fid, the file_create function creates the direntry with
419  * the name and perm specified under the parent dir. If this succeeds (an entry
420  * is created for the new file on the server), we create our metadata for this
421  * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
422  * fid of the parent directory if it was not retained.
423  */
424 static int
create_common(struct p9fs_node * dnp,struct componentname * cnp,char * extension,uint32_t perm,uint8_t mode,struct vnode ** vpp)425 create_common(struct p9fs_node *dnp, struct componentname *cnp,
426     char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
427 {
428 	char tmpchr;
429 	struct p9_fid *dvfid, *ofid, *newfid;
430 	struct p9fs_session *vses;
431 	struct mount *mp;
432 	int error;
433 
434 	P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);
435 
436 	vses = dnp->p9fs_ses;
437 	mp = vses->p9fs_mount;
438 	newfid = NULL;
439 	error = 0;
440 
441 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
442 	if (error != 0)
443 		return (error);
444 
445 	/* Clone the directory fid to create the new file */
446 	ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
447 	if (error != 0)
448 		return (error);
449 
450 	/*
451 	 * Save the character present at namelen in nameptr string and
452 	 * null terminate the character to get the search name for p9_dir_walk
453 	 */
454 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
455 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
456 
457 	error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
458 		    extension);
459 	if (error != 0) {
460 		P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
461 		goto out;
462 	}
463 
464 	/* If its not hardlink only then do the walk, else we are done. */
465 	if (!(perm & P9PROTO_DMLINK)) {
466 		/*
467 		 * Do the lookup part and add the vnode, p9fs node. Note that vpp
468 		 * is filled in here.
469 		 */
470 		newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
471 		if (newfid != NULL) {
472 			error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
473 			    dnp, newfid, vpp, cnp->cn_nameptr);
474 			if (error != 0)
475 				goto out;
476 
477 			if (ofid != NULL) {
478 				struct p9fs_node *np = P9FS_VTON(*vpp);
479 				ofid->v_opens = 0;
480 				/*
481 				 * The 9P file creation request natively opens
482 				 * the file as part of the create operation and
483 				 * gives us a writable file handle (ofid).
484 				 * We retain this open descriptor by adding it
485 				 * to the VOFID list of the new vnode. This
486 				 * guarantees that a subsequent VOP_OPEN call
487 				 * does not need to send a redundant TOPEN
488 				 * request. This is particularly important
489 				 * because if a file was requested to be created
490 				 * with 000 permissions, the host will reject
491 				 * subsequent TOPEN requests due to insufficient
492 				 * permissions, which would cause an overall
493 				 * open() failure.
494 				 */
495 				p9fs_fid_add(np, ofid, VOFID);
496 				ofid = NULL; /* prevent closing handle below */
497 			}
498 		} else {
499 			/* Not found return NOENTRY.*/
500 			goto out;
501 		}
502 
503 		if ((cnp->cn_flags & MAKEENTRY) != 0)
504 			cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
505 	}
506 	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
507 	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
508 	/* Clunk the open ofid. */
509 	if (ofid != NULL)
510 		(void)p9_client_clunk(ofid);
511 
512 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
513 	return (0);
514 out:
515 	if (ofid != NULL)
516 		(void)p9_client_clunk(ofid);
517 
518 	if (newfid != NULL)
519 		(void)p9_client_clunk(newfid);
520 
521 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
522 	return (error);
523 }
524 
525 /*
526  * This is the main file creation VOP. Make the permissions of the new
527  * file and call the create_common common code to complete the create.
528  */
529 static int
p9fs_create(struct vop_create_args * ap)530 p9fs_create(struct vop_create_args *ap)
531 {
532 	struct vnode *dvp;
533 	struct vnode **vpp;
534 	struct componentname *cnp;
535 	uint32_t mode;
536 	struct p9fs_node *dnp;
537 	struct p9fs_inode *dinode;
538 	uint32_t perm;
539 	int ret;
540 
541 	dvp = ap->a_dvp;
542 	vpp = ap->a_vpp;
543 	cnp = ap->a_cnp;
544 	dnp = P9FS_VTON(dvp);
545 	dinode = &dnp->inode;
546 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
547 	perm = p9fs_unix2p9_mode(mode);
548 
549 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
550 
551 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
552 	if (ret == 0) {
553 		P9FS_INCR_LINKS(dinode);
554 	}
555 
556 	return (ret);
557 }
558 
559 /*
560  * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
561  * and call the create_common common code to complete the create.
562  */
563 static int
p9fs_mkdir(struct vop_mkdir_args * ap)564 p9fs_mkdir(struct vop_mkdir_args *ap)
565 {
566 	struct vnode *dvp;
567 	struct vnode **vpp;
568 	struct componentname *cnp;
569 	uint32_t mode;
570 	struct p9fs_node *dnp;
571 	struct p9fs_inode *dinode;
572 	uint32_t perm;
573 	int ret;
574 
575 	dvp = ap->a_dvp;
576 	vpp = ap->a_vpp;
577 	cnp = ap->a_cnp;
578 	dnp = P9FS_VTON(dvp);
579 	dinode = &dnp->inode;
580 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
581 	perm = p9fs_unix2p9_mode(mode | S_IFDIR);
582 
583 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
584 
585 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
586 	if (ret == 0)
587 		P9FS_INCR_LINKS(dinode);
588 
589 	return (ret);
590 }
591 
592 /*
593  * p9fs_mknod is the main node creation vop. Make the permissions of the new node
594  * and call the create_common common code to complete the create.
595  */
596 static int
p9fs_mknod(struct vop_mknod_args * ap)597 p9fs_mknod(struct vop_mknod_args *ap)
598 {
599 	struct vnode *dvp;
600 	struct vnode **vpp;
601 	struct componentname *cnp;
602 	uint32_t mode;
603 	struct p9fs_node *dnp;
604 	struct p9fs_inode *dinode;
605 	uint32_t perm;
606 	int ret;
607 
608 	dvp = ap->a_dvp;
609 	vpp = ap->a_vpp;
610 	cnp = ap->a_cnp;
611 	dnp = P9FS_VTON(dvp);
612 	dinode = &dnp->inode;
613 	mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
614 	perm = p9fs_unix2p9_mode(mode);
615 
616 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
617 
618 	ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
619 	if (ret == 0) {
620 		P9FS_INCR_LINKS(dinode);
621 	}
622 
623 	return (ret);
624 }
625 
626 /* Convert open mode permissions to P9 */
627 static int
p9fs_uflags_mode(int uflags,int extended)628 p9fs_uflags_mode(int uflags, int extended)
629 {
630 	uint32_t ret;
631 
632 	/* Convert first to O flags.*/
633 	uflags = OFLAGS(uflags);
634 
635 	switch (uflags & 3) {
636 
637 	case O_RDONLY:
638 	    ret = P9PROTO_OREAD;
639 	    break;
640 
641 	case O_WRONLY:
642 	    ret = P9PROTO_OWRITE;
643 	    break;
644 
645 	case O_RDWR:
646 	    ret = P9PROTO_ORDWR;
647 	    break;
648 	}
649 
650 	if (extended) {
651 		if (uflags & O_EXCL)
652 			ret |= P9PROTO_OEXCL;
653 
654 		if (uflags & O_APPEND)
655 			ret |= P9PROTO_OAPPEND;
656 	}
657 
658 	return (ret);
659 }
660 
661 /*
662  * This is the main open VOP for every file open. If the file is already
663  * open, then increment and return. If there is no open fid for this file,
664  * there needs to be a client_walk which creates a new open fid for this file.
665  * Once we have a open fid, call the open on this file with the mode creating
666  * the vobject.
667  */
668 static int
p9fs_open(struct vop_open_args * ap)669 p9fs_open(struct vop_open_args *ap)
670 {
671 	int error;
672 	struct vnode *vp;
673 	struct p9fs_node *np;
674 	struct p9fs_session *vses;
675 	struct p9_fid *vofid, *vfid;
676 	size_t filesize;
677 	uint32_t mode;
678 
679 	error = 0;
680 	vp = ap->a_vp;
681 	np = P9FS_VTON(vp);
682 	vses = np->p9fs_ses;
683 
684 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
685 
686 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
687 		return (EOPNOTSUPP);
688 
689 	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
690 	if (error != 0)
691 		return (error);
692 
693 	ASSERT_VOP_LOCKED(vp, __func__);
694 	/*
695 	 * Invalidate the pages of the vm_object cache if the file is modified
696 	 * based on the flag set in reload stats
697 	 */
698 	if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
699 		error = vinvalbuf(vp, 0, 0, 0);
700 		if (error != 0)
701 			return (error);
702 		P9FS_NODE_CLRF(np, P9FS_NODE_MODIFIED);
703 	}
704 
705 	vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
706 	if (error != 0)
707 		return (error);
708 
709 	/*
710 	 * Translate kernel fflags to 9p mode
711 	 */
712 	mode = p9fs_uflags_mode(ap->a_mode, 1);
713 
714 	/*
715 	 * Search the fid in vofid_list for current user. If found increase the open
716 	 * count and return. If not found clone a new fid and open the file using
717 	 * that cloned fid.
718 	 */
719 	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
720 	if (vofid != NULL) {
721 		vofid->v_opens++;
722 		return (0);
723 	} else {
724 		/*vofid is the open fid for this file.*/
725 		vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
726 		if (error != 0)
727 			return (error);
728 	}
729 
730 	error = p9_client_open(vofid, mode);
731 	if (error != 0)
732 		p9_client_clunk(vofid);
733 	else {
734 		vofid->v_opens = 1;
735 		filesize = np->inode.i_size;
736 		vnode_create_vobject(vp, filesize, ap->a_td);
737 		p9fs_fid_add(np, vofid, VOFID);
738 	}
739 
740 	return (error);
741 }
742 
743 /*
744  * Close the open references. Just reduce the open count on vofid and return.
745  * Let clunking of VOFID happen in p9fs_reclaim.
746  */
747 static int
p9fs_close(struct vop_close_args * ap)748 p9fs_close(struct vop_close_args *ap)
749 {
750 	struct vnode *vp;
751 	struct p9fs_node *np;
752 	struct p9fs_session *vses;
753 	struct p9_fid *vofid;
754 	int error;
755 
756 	vp = ap->a_vp;
757 	np = P9FS_VTON(vp);
758 
759 	if (np == NULL)
760 		return (0);
761 
762 	vses = np->p9fs_ses;
763 	error = 0;
764 
765 	P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);
766 
767 	/*
768 	 * Translate kernel fflags to 9p mode
769 	 */
770 	vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
771 	    p9fs_uflags_mode(ap->a_fflag, 1), &error);
772 	if (vofid == NULL)
773 		return (0);
774 
775 	vofid->v_opens--;
776 
777 	return (0);
778 }
779 
780 /* Helper routine for checking if fileops are possible on this file */
781 static int
p9fs_check_possible(struct vnode * vp,struct vattr * vap,mode_t mode)782 p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
783 {
784 
785 	/* Check if we are allowed to write */
786 	switch (vap->va_type) {
787 	case VDIR:
788 	case VLNK:
789 	case VREG:
790 		/*
791 		 * Normal nodes: check if we're on a read-only mounted
792 		 * file system and bail out if we're trying to write.
793 		 */
794 		if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
795 			return (EROFS);
796 		break;
797 	case VBLK:
798 	case VCHR:
799 	case VSOCK:
800 	case VFIFO:
801 		/*
802 		 * Special nodes: even on read-only mounted file systems
803 		 * these are allowed to be written to if permissions allow.
804 		 */
805 		break;
806 	default:
807 		/* No idea what this is */
808 		return (EINVAL);
809 	}
810 
811 	return (0);
812 }
813 
814 /* Check the access permissions of the file. */
815 static int
p9fs_access(struct vop_access_args * ap)816 p9fs_access(struct vop_access_args *ap)
817 {
818 	struct vnode *vp;
819 	accmode_t accmode;
820 	struct ucred *cred;
821 	struct vattr vap;
822 	int error;
823 
824 	vp = ap->a_vp;
825 	accmode = ap->a_accmode;
826 	cred = ap->a_cred;
827 
828 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
829 
830 	/* make sure getattr is working correctly and is defined.*/
831 	error = VOP_GETATTR(vp, &vap, cred);
832 	if (error != 0)
833 		return (error);
834 
835 	error = p9fs_check_possible(vp, &vap, accmode);
836 	if (error != 0)
837 		return (error);
838 
839 	/* Call the Generic Access check in VOPS*/
840 	error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
841 	    cred);
842 
843 
844 	return (error);
845 }
846 
847 /*
848  * Reload the file stats from the server and update the inode structure present
849  * in p9fs node.
850  */
851 int
p9fs_reload_stats_dotl(struct vnode * vp,struct ucred * cred)852 p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
853 {
854 	struct p9_stat_dotl *stat;
855 	int error;
856 	struct p9fs_node *node;
857 	struct p9fs_session *vses;
858 	struct p9_fid *vfid;
859 
860 	error = 0;
861 	node = P9FS_VTON(vp);
862 	vses = node->p9fs_ses;
863 
864 	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
865 	if (vfid == NULL) {
866 		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
867 		if (error)
868 			return (error);
869 	}
870 
871 	stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);
872 
873 	error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
874 	if (error != 0) {
875 		P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
876 		goto out;
877 	}
878 
879 	/* Init the vnode with the disk info */
880 	p9fs_stat_vnode_dotl(stat, vp);
881 out:
882 	if (stat != NULL) {
883 		uma_zfree(p9fs_getattr_zone, stat);
884 	}
885 
886 	return (error);
887 }
888 
889 /*
890  * Read the current inode values into the vap attr. We reload the stats from
891  * the server.
892  */
893 static int
p9fs_getattr_dotl(struct vop_getattr_args * ap)894 p9fs_getattr_dotl(struct vop_getattr_args *ap)
895 {
896 	struct vnode *vp;
897 	struct vattr *vap;
898 	struct p9fs_node *node;
899 	struct p9fs_inode *inode;
900 	int error;
901 
902 	vp = ap->a_vp;
903 	vap = ap->a_vap;
904 	node = P9FS_VTON(vp);
905 
906 	if (node == NULL)
907 		return (ENOENT);
908 
909 	inode = &node->inode;
910 
911 	P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));
912 
913 	/* Reload our stats once to get the right values.*/
914 	error = p9fs_reload_stats_dotl(vp, ap->a_cred);
915 	if (error != 0) {
916 		P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
917 		return (error);
918 	}
919 
920 	/* Basic info */
921 	VATTR_NULL(vap);
922 
923 	VI_LOCK(vp);
924 	vap->va_atime.tv_sec = inode->i_atime;
925 	vap->va_mtime.tv_sec = inode->i_mtime;
926 	vap->va_ctime.tv_sec = inode->i_ctime;
927 	vap->va_atime.tv_nsec = inode->i_atime_nsec;
928 	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
929 	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
930 	vap->va_type = IFTOVT(inode->i_mode);
931 	vap->va_mode = inode->i_mode;
932 	vap->va_uid = inode->n_uid;
933 	vap->va_gid = inode->n_gid;
934 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
935 	vap->va_size = inode->i_size;
936 	vap->va_nlink = inode->i_links_count;
937 	vap->va_blocksize = inode->blksize;
938 	vap->va_fileid = inode->i_qid_path;
939 	vap->va_flags = inode->i_flags;
940 	vap->va_gen = inode->gen;
941 	vap->va_filerev = inode->data_version;
942 	vap->va_vaflags = 0;
943 	vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;
944 	VI_UNLOCK(vp);
945 
946 	return (0);
947 }
948 
949 /* Convert a standard FreeBSD permission to P9. */
950 static uint32_t
p9fs_unix2p9_mode(uint32_t mode)951 p9fs_unix2p9_mode(uint32_t mode)
952 {
953 	uint32_t res;
954 
955 	res = mode & 0777;
956 	if (S_ISDIR(mode))
957 		res |= P9PROTO_DMDIR;
958 	if (S_ISSOCK(mode))
959 		res |= P9PROTO_DMSOCKET;
960 	if (S_ISLNK(mode))
961 		res |= P9PROTO_DMSYMLINK;
962 	if (S_ISFIFO(mode))
963 		res |= P9PROTO_DMNAMEDPIPE;
964 	if ((mode & S_ISUID) == S_ISUID)
965 		res |= P9PROTO_DMSETUID;
966 	if ((mode & S_ISGID) == S_ISGID)
967 		res |= P9PROTO_DMSETGID;
968 	if ((mode & S_ISVTX) == S_ISVTX)
969 		res |= P9PROTO_DMSETVTX;
970 
971 	return (res);
972 }
973 
974 /* Update inode with the stats read from server.(9P2000.L version) */
975 int
p9fs_stat_vnode_dotl(struct p9_stat_dotl * stat,struct vnode * vp)976 p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
977 {
978 	struct p9fs_node *np;
979 	struct p9fs_inode *inode;
980 	bool excl_locked;
981 
982 	np = P9FS_VTON(vp);
983 	inode = &np->inode;
984 
985 	/*
986 	 * This function might be called with the vnode only shared
987 	 * locked.  Then, interlock the vnode to ensure the exclusive
988 	 * access to the inode fields: the thread either owns
989 	 * exclusive vnode lock, or shared vnode lock plus interlock.
990 	 *
991 	 * If the vnode is locked exclusive, do not take the
992 	 * interlock.  We directly call vnode_pager_setsize(), which
993 	 * needs the vm_object lock, and that lock is before vnode
994 	 * interlock in the lock order.
995 	 */
996 	ASSERT_VOP_LOCKED(vp, __func__);
997 	excl_locked = VOP_ISLOCKED(vp) == LK_EXCLUSIVE;
998 	if (!excl_locked)
999 		VI_LOCK(vp);
1000 
1001 	/* Update the pager size if file size changes on host */
1002 	if (inode->i_size != stat->st_size) {
1003 		inode->i_size = stat->st_size;
1004 		if (vp->v_type == VREG) {
1005 			if (excl_locked)
1006 				vnode_pager_setsize(vp, inode->i_size);
1007 			else
1008 				vn_delayed_setsize_locked(vp);
1009 		}
1010 	}
1011 
1012 	inode->i_mtime = stat->st_mtime_sec;
1013 	inode->i_atime = stat->st_atime_sec;
1014 	inode->i_ctime = stat->st_ctime_sec;
1015 	inode->i_mtime_nsec = stat->st_mtime_nsec;
1016 	inode->i_atime_nsec = stat->st_atime_nsec;
1017 	inode->i_ctime_nsec = stat->st_ctime_nsec;
1018 	inode->n_uid = stat->st_uid;
1019 	inode->n_gid = stat->st_gid;
1020 	inode->i_mode = stat->st_mode;
1021 	vp->v_type = IFTOVT(inode->i_mode);
1022 	inode->i_links_count = stat->st_nlink;
1023 	inode->blksize = stat->st_blksize;
1024 	inode->blocks = stat->st_blocks;
1025 	inode->gen = stat->st_gen;
1026 	inode->data_version = stat->st_data_version;
1027 
1028 	/* Setting a flag if file changes based on qid version */
1029 	if (np->vqid.qid_version != stat->qid.version)
1030 		P9FS_NODE_SETF(np, P9FS_NODE_MODIFIED);
1031 	memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));
1032 	if (!excl_locked)
1033 		VI_UNLOCK(vp);
1034 
1035 	return (0);
1036 }
1037 
1038 /*
1039  * Write the current in memory inode stats into persistent stats structure
1040  * to write to the server(for linux version).
1041  */
1042 static int
p9fs_inode_to_iattr(struct p9fs_inode * inode,struct p9_iattr_dotl * p9attr)1043 p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
1044 {
1045 	p9attr->size = inode->i_size;
1046 	p9attr->mode = inode->i_mode;
1047 	p9attr->uid = inode->n_uid;
1048 	p9attr->gid = inode->n_gid;
1049 	p9attr->atime_sec = inode->i_atime;
1050 	p9attr->atime_nsec = inode->i_atime_nsec;
1051 	p9attr->mtime_sec = inode->i_mtime;
1052 	p9attr->mtime_nsec = inode->i_mtime_nsec;
1053 
1054 	return (0);
1055 }
1056 
1057 /*
1058  * Modify the ownership of a file whenever the chown is called on the
1059  * file.
1060  */
1061 static int
p9fs_chown(struct vnode * vp,uid_t uid,gid_t gid,struct ucred * cred,struct thread * td)1062 p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
1063     struct thread *td)
1064 {
1065 	struct p9fs_node *np;
1066 	struct p9fs_inode *inode;
1067 	uid_t ouid;
1068 	gid_t ogid;
1069 	int error;
1070 
1071 	np = P9FS_VTON(vp);
1072 	inode = &np->inode;
1073 
1074 	if (uid == (uid_t)VNOVAL)
1075 		uid = inode->n_uid;
1076 	if (gid == (gid_t)VNOVAL)
1077 		gid = inode->n_gid;
1078 	/*
1079 	 * To modify the ownership of a file, must possess VADMIN for that
1080 	 * file.
1081 	 */
1082 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
1083 		return (error);
1084 	/*
1085 	 * To change the owner of a file, or change the group of a file to a
1086 	 * group of which we are not a member, the caller must have
1087 	 * privilege.
1088 	 */
1089 	if (((uid != inode->n_uid && uid != cred->cr_uid) ||
1090 	    (gid != inode->n_gid && !groupmember(gid, cred))) &&
1091 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
1092 		return (error);
1093 
1094 	ogid = inode->n_gid;
1095 	ouid = inode->n_uid;
1096 
1097 	inode->n_gid = gid;
1098 	inode->n_uid = uid;
1099 
1100 	if ((inode->i_mode & (ISUID | ISGID)) &&
1101 	    (ouid != uid || ogid != gid)) {
1102 
1103 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
1104 			inode->i_mode &= ~(ISUID | ISGID);
1105 	}
1106 	P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);
1107 
1108 	return (0);
1109 }
1110 
1111 /*
1112  * Update the in memory inode with all chmod new permissions/mode. Typically a
1113  * setattr is called to update it to server.
1114  */
1115 static int
p9fs_chmod(struct vnode * vp,uint32_t mode,struct ucred * cred,struct thread * td)1116 p9fs_chmod(struct vnode *vp, uint32_t  mode, struct ucred *cred, struct thread *td)
1117 {
1118 	struct p9fs_node *np;
1119 	struct p9fs_inode *inode;
1120 	uint32_t nmode;
1121 	int error;
1122 
1123 	np = P9FS_VTON(vp);
1124 	inode = &np->inode;
1125 
1126 	P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n",  __func__, vp, mode, cred, td);
1127 	/*
1128 	 * To modify the permissions on a file, must possess VADMIN
1129 	 * for that file.
1130 	 */
1131 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
1132 		return (error);
1133 
1134 	/*
1135 	 * Privileged processes may set the sticky bit on non-directories,
1136 	 * as well as set the setgid bit on a file with a group that the
1137 	 * process is not a member of. Both of these are allowed in
1138 	 * jail(8).
1139 	 */
1140 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
1141 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
1142 			return (EFTYPE);
1143 	}
1144 	if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
1145 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
1146 		if (error != 0)
1147 			return (error);
1148 	}
1149 
1150 	/*
1151 	 * Deny setting setuid if we are not the file owner.
1152 	 */
1153 	if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
1154 		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
1155 		if (error != 0)
1156 			return (error);
1157 	}
1158 	nmode = inode->i_mode;
1159 	nmode &= ~ALLPERMS;
1160 	nmode |= (mode & ALLPERMS);
1161 	inode->i_mode = nmode;
1162 
1163 	P9_DEBUG(VOPS, "%s: to mode %x  %d \n ", __func__, nmode, error);
1164 
1165 	return (error);
1166 }
1167 
1168 /*
1169  * Set the attributes of a file referenced by fid. A valid bitmask is sent
1170  * in request selecting which fields to set
1171  */
1172 static int
p9fs_setattr_dotl(struct vop_setattr_args * ap)1173 p9fs_setattr_dotl(struct vop_setattr_args *ap)
1174 {
1175 	struct vnode *vp;
1176 	struct vattr *vap;
1177 	struct p9fs_node *node;
1178 	struct p9fs_inode *inode;
1179 	struct ucred *cred;
1180 	struct thread *td;
1181 	struct p9_iattr_dotl *p9attr;
1182 	struct p9fs_session *vses;
1183 	struct p9_fid *vfid;
1184 	uint64_t oldfilesize;
1185 	int error;
1186 
1187 	vp = ap->a_vp;
1188 	vap = ap->a_vap;
1189 	node = P9FS_VTON(vp);
1190 	inode = &node->inode;
1191 	cred = ap->a_cred;
1192 	td = curthread;
1193 	vses = node->p9fs_ses;
1194 	error = 0;
1195 
1196 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
1197 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
1198 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
1199 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
1200 		P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
1201 		return (EINVAL);
1202 	}
1203 	/* Disallow write attempts on read only filesystem */
1204 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1205 		return (EROFS);
1206 
1207 	/* Setting of flags is not supported */
1208 	if (vap->va_flags != VNOVAL)
1209 		return (EOPNOTSUPP);
1210 
1211 	/* Allocate p9attr struct */
1212 	p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
1213 	if (p9attr == NULL)
1214 		return (ENOMEM);
1215 
1216 	/* Check if we need to change the ownership of the file*/
1217 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1218 		P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
1219 		    vp, td, vap->va_uid, vap->va_gid);
1220 
1221 		error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
1222 		p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
1223 			P9PROTO_SETATTR_MODE;
1224 		if (error)
1225 			goto out;
1226 	}
1227 
1228 	/* Check for mode changes */
1229 	if (vap->va_mode != (mode_t)VNOVAL) {
1230 		P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
1231 		    vap->va_mode);
1232 
1233 		error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
1234 		p9attr->valid |= P9PROTO_SETATTR_MODE;
1235 		if (error)
1236 			goto out;
1237 	}
1238 
1239 	/* Update the size of the file and update mtime */
1240 	if (vap->va_size != (uint64_t)VNOVAL) {
1241 		P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
1242 		    vp, td, (uintmax_t)vap->va_size);
1243 		switch (vp->v_type) {
1244 			case VDIR:
1245 				error = EISDIR;
1246 				goto out;
1247 			case VLNK:
1248 			case VREG:
1249 				/* Invalidate cached pages of vp */
1250 				error = vinvalbuf(vp, 0, 0, 0);
1251 				if (error)
1252 					goto out;
1253 				oldfilesize = inode->i_size;
1254 				inode->i_size = vap->va_size;
1255 				/* Update the p9fs_inode time */
1256 				p9fs_itimes(vp);
1257 				p9attr->valid |= P9PROTO_SETATTR_SIZE |
1258 				    P9PROTO_SETATTR_ATIME |
1259 				    P9PROTO_SETATTR_MTIME |
1260 				    P9PROTO_SETATTR_ATIME_SET |
1261 				    P9PROTO_SETATTR_MTIME_SET ;
1262 				break;
1263 			default:
1264 				goto out;
1265 		}
1266 	} else if (vap->va_atime.tv_sec != VNOVAL ||
1267 		    vap->va_mtime.tv_sec != VNOVAL) {
1268 		P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
1269 		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
1270 		    (uintmax_t)vap->va_mtime.tv_sec);
1271 		/* Update the p9fs_inode times */
1272 		p9fs_itimes(vp);
1273 		p9attr->valid |= P9PROTO_SETATTR_ATIME |
1274 			P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
1275 			P9PROTO_SETATTR_MTIME_SET;
1276 	}
1277 
1278 	vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
1279 	if (vfid == NULL) {
1280 		vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
1281 		if (error)
1282 			goto out;
1283 	}
1284 
1285 	/* Write the inode structure values into p9attr */
1286 	p9fs_inode_to_iattr(inode, p9attr);
1287 	error = p9_client_setattr(vfid, p9attr);
1288 	if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
1289 		if (error)
1290 			inode->i_size = oldfilesize;
1291 		else
1292 			vnode_pager_setsize(vp, inode->i_size);
1293 	}
1294 out:
1295 	if (p9attr) {
1296 		uma_zfree(p9fs_setattr_zone, p9attr);
1297 	}
1298 	P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
1299 	return (error);
1300 }
1301 
1302 struct open_fid_state {
1303 	struct p9_fid *vofid;
1304 	int fflags;
1305 	int opened;
1306 };
1307 
1308 /*
1309  * TODO: change this to take P9PROTO_* mode and avoid routing through
1310  * VOP_OPEN, factoring out implementation of p9fs_open.
1311  */
1312 static int
p9fs_get_open_fid(struct vnode * vp,int fflags,struct ucred * cr,struct open_fid_state * statep)1313 p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
1314 {
1315 	struct p9fs_node *np;
1316 	struct p9fs_session *vses;
1317 	struct p9_fid *vofid;
1318 	int mode = p9fs_uflags_mode(fflags, TRUE);
1319 	int error = 0;
1320 
1321 	statep->opened = FALSE;
1322 
1323 	np = P9FS_VTON(vp);
1324 	vses = np->p9fs_ses;
1325 	vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1326 	if (vofid == NULL) {
1327 		error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
1328 		if (error) {
1329 			return (error);
1330 		}
1331 		vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1332 		if (vofid == NULL) {
1333 			return (EBADF);
1334 		}
1335 		statep->fflags = fflags;
1336 		statep->opened = TRUE;
1337 	}
1338 	statep->vofid = vofid;
1339 	return (0);
1340 }
1341 
1342 static void
p9fs_release_open_fid(struct vnode * vp,struct ucred * cr,struct open_fid_state * statep)1343 p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
1344 {
1345 	if (statep->opened) {
1346 		(void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
1347 	}
1348 }
1349 
1350 /*
1351  * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
1352  * need to copy data into. As long as resid is greater than zero, we call
1353  * client_read to read data from offset(offset into the file) in the open fid
1354  * for the file into the I/O buffer. The data is read into the user data buffer.
1355  */
1356 static int
p9fs_read(struct vop_read_args * ap)1357 p9fs_read(struct vop_read_args *ap)
1358 {
1359 	struct vnode *vp;
1360 	struct uio *uio;
1361 	struct p9fs_node *np;
1362 	uint64_t offset;
1363 	int64_t ret;
1364 	uint64_t resid;
1365 	uint32_t count;
1366 	int error;
1367 	char *io_buffer = NULL;
1368 	uint64_t filesize;
1369 	struct open_fid_state ostate;
1370 
1371 	vp = ap->a_vp;
1372 	uio = ap->a_uio;
1373 	np = P9FS_VTON(vp);
1374 	error = 0;
1375 
1376 	if (VN_ISDEV(vp))
1377 		return (EOPNOTSUPP);
1378 	if (vp->v_type != VREG)
1379 		return (EISDIR);
1380 	if (uio->uio_resid == 0)
1381 		return (0);
1382 	if (uio->uio_offset < 0)
1383 		return (EINVAL);
1384 
1385 	error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
1386 	if (error)
1387 		return (error);
1388 
1389 	/* where in the file are we to start reading */
1390 	offset = uio->uio_offset;
1391 	filesize = np->inode.i_size;
1392 	if (uio->uio_offset >= filesize)
1393 		goto out;
1394 
1395 	P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
1396 	    __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);
1397 
1398 	/* Work with a local buffer from the pool for this vop */
1399 
1400 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1401 	while ((resid = uio->uio_resid) > 0) {
1402 		if (offset >= filesize)
1403 			break;
1404 		count = MIN(filesize - uio->uio_offset , resid);
1405 		if (count == 0)
1406 			break;
1407 
1408 		/* Copy count bytes into the uio */
1409 		ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
1410 		/*
1411 		 * This is the only place in the entire p9fs where we check the
1412 		 * error for < 0 as p9_client_read/write return the number of
1413 		 * bytes instead of an error code. In this case if ret is < 0,
1414 		 * it means there is an IO error.
1415 		 */
1416 		if (ret < 0) {
1417 			error = -ret;
1418 			goto out;
1419 		}
1420 		error = uiomove(io_buffer, ret, uio);
1421 		if (error != 0)
1422 			goto out;
1423 
1424 		offset += ret;
1425 	}
1426 	uio->uio_offset = offset;
1427 out:
1428 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1429 	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1430 
1431 	return (error);
1432 }
1433 
1434 /*
1435  * The user buffer contains the data to be written. This data is copied first
1436  * from uio into I/O buffer. This I/O  buffer is used to do the client_write to
1437  * the fid of the file starting from the offset given upto count bytes. The
1438  * number of bytes written is returned to the caller.
1439  */
1440 static int
p9fs_write(struct vop_write_args * ap)1441 p9fs_write(struct vop_write_args *ap)
1442 {
1443 	struct vnode *vp;
1444 	struct uio *uio;
1445 	struct p9fs_node *np;
1446 	uint64_t off, offset;
1447 	int64_t ret;
1448 	uint64_t resid, bytes_written;
1449 	uint32_t count;
1450 	int error, ioflag;
1451 	uint64_t file_size;
1452 	char *io_buffer = NULL;
1453 	struct open_fid_state ostate;
1454 
1455 	vp = ap->a_vp;
1456 	uio = ap->a_uio;
1457 	np = P9FS_VTON(vp);
1458 	error = 0;
1459 	ioflag = ap->a_ioflag;
1460 
1461 	error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
1462 	if (error)
1463 		return (error);
1464 
1465 	P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
1466 	    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1467 
1468 	if (uio->uio_offset < 0) {
1469 		error = EINVAL;
1470 		goto out;
1471 	}
1472 	if (uio->uio_resid == 0)
1473 		goto out;
1474 
1475 	file_size = np->inode.i_size;
1476 
1477 	switch (vp->v_type) {
1478 	case VREG:
1479 		if (ioflag & IO_APPEND)
1480 			uio->uio_offset = file_size;
1481 		break;
1482 	case VDIR:
1483 		return (EISDIR);
1484 	case VLNK:
1485 		break;
1486 	default:
1487 		panic("%s: bad file type vp: %p", __func__, vp);
1488 	}
1489 
1490 	resid = uio->uio_resid;
1491 	offset = uio->uio_offset;
1492 	bytes_written = 0;
1493 	error = 0;
1494 
1495 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1496 	while ((resid = uio->uio_resid) > 0) {
1497                 off = 0;
1498 		count = MIN(resid, P9FS_IOUNIT);
1499 		error = uiomove(io_buffer, count, uio);
1500 
1501 		if (error != 0) {
1502 			P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
1503 			goto out;
1504 		}
1505 
1506 		/* While count still exists, keep writing.*/
1507 		while (count > 0) {
1508 			/* Copy count bytes from the uio */
1509 			ret = p9_client_write(ostate.vofid, offset, count,
1510                                 io_buffer + off);
1511 			if (ret < 0) {
1512 				if (bytes_written == 0) {
1513 					error = -ret;
1514 					goto out;
1515 				} else {
1516 					break;
1517 				}
1518 			}
1519 			P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
1520 			    __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1521 
1522                         off += ret;
1523 			offset += ret;
1524 			bytes_written += ret;
1525 			count -= ret;
1526 		}
1527 	}
1528 	/* Update the fields in the node to reflect the change*/
1529 	if (file_size < uio->uio_offset + uio->uio_resid) {
1530 		np->inode.i_size = uio->uio_offset + uio->uio_resid;
1531 		vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
1532 	}
1533 out:
1534 	if (io_buffer)
1535 		uma_zfree(p9fs_io_buffer_zone, io_buffer);
1536 	p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1537 
1538 	return (error);
1539 }
1540 
1541 /*
1542  * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
1543  * client_remove op to send messages to remove the node's fid on the server.
1544  * After that, does a node metadata cleanup on client side.
1545  */
1546 static int
remove_common(struct p9fs_node * dnp,struct p9fs_node * np,const char * name,struct ucred * cred)1547 remove_common(struct p9fs_node *dnp, struct p9fs_node *np, const char *name,
1548     struct ucred *cred)
1549 {
1550 	int error;
1551 	struct p9fs_session *vses;
1552 	struct vnode *vp;
1553 	struct p9_fid *vfid;
1554 
1555 	error = 0;
1556 	vses = np->p9fs_ses;
1557 	vp = P9FS_NTOV(np);
1558 
1559 	vfid = p9fs_get_fid(vses->clnt, dnp, cred, VFID, -1, &error);
1560 	if (error != 0)
1561 		return (error);
1562 
1563 	error = p9_client_unlink(vfid, name,
1564 	    np->v_node->v_type == VDIR ? P9PROTO_UNLINKAT_REMOVEDIR : 0);
1565 	if (error != 0)
1566 		return (error);
1567 
1568 	/* Remove all non-open fids associated with the vp */
1569 	if (np->inode.i_links_count == 1)
1570 		p9fs_fid_remove_all(np, TRUE);
1571 
1572 	/* Invalidate all entries of vnode from name cache and hash list. */
1573 	cache_purge(vp);
1574 	vfs_hash_remove(vp);
1575 
1576 	P9FS_NODE_SETF(np, P9FS_NODE_DELETED);
1577 
1578 	return (error);
1579 }
1580 
1581 /* Remove vop for all files. Call common code for remove and adjust links */
1582 static int
p9fs_remove(struct vop_remove_args * ap)1583 p9fs_remove(struct vop_remove_args *ap)
1584 {
1585 	struct vnode *vp;
1586 	struct p9fs_node *np;
1587 	struct vnode *dvp;
1588 	struct p9fs_node *dnp;
1589 	struct p9fs_inode *dinode;
1590 	struct componentname *cnp;
1591 	int error;
1592 
1593 	cnp = ap->a_cnp;
1594 	vp = ap->a_vp;
1595 	np = P9FS_VTON(vp);
1596 	dvp = ap->a_dvp;
1597 	dnp = P9FS_VTON(dvp);
1598 	dinode = &dnp->inode;
1599 
1600 	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1601 
1602 	if (vp->v_type == VDIR)
1603 		return (EISDIR);
1604 
1605 	error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
1606 	if (error == 0)
1607 		P9FS_DECR_LINKS(dinode);
1608 
1609 	return (error);
1610 }
1611 
1612 /* Remove vop for all directories. Call common code for remove and adjust links */
1613 static int
p9fs_rmdir(struct vop_rmdir_args * ap)1614 p9fs_rmdir(struct vop_rmdir_args *ap)
1615 {
1616 	struct vnode *vp;
1617 	struct p9fs_node *np;
1618 	struct vnode *dvp;
1619 	struct p9fs_node *dnp;
1620 	struct p9fs_inode *dinode;
1621 	struct componentname *cnp;
1622 	int error;
1623 
1624 	cnp = ap->a_cnp;
1625 	vp = ap->a_vp;
1626 	np = P9FS_VTON(vp);
1627 	dvp = ap->a_dvp;
1628 	dnp = P9FS_VTON(dvp);
1629 	dinode = &dnp->inode;
1630 
1631 	P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1632 
1633 	error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
1634 	if (error == 0)
1635 		P9FS_DECR_LINKS(dinode);
1636 
1637 	return (error);
1638 }
1639 
1640 /*
1641  * Create symlinks. Make the permissions and call create_common code
1642  * for Soft links.
1643  */
1644 static int
p9fs_symlink(struct vop_symlink_args * ap)1645 p9fs_symlink(struct vop_symlink_args *ap)
1646 {
1647 	struct vnode *dvp;
1648 	struct vnode **vpp;
1649 	struct vattr *vap;
1650 	struct componentname *cnp;
1651 	char *symtgt;
1652 	struct p9fs_node *dnp;
1653 	struct p9fs_session *vses;
1654 	struct mount *mp;
1655 	struct p9_fid *dvfid, *newfid;
1656 	int error;
1657 	char tmpchr;
1658 	gid_t gid;
1659 
1660 	dvp = ap->a_dvp;
1661 	vpp = ap->a_vpp;
1662 	vap = ap->a_vap;
1663 	cnp = ap->a_cnp;
1664 	symtgt = (char*)(uintptr_t) ap->a_target;
1665 	dnp = P9FS_VTON(dvp);
1666 	vses = dnp->p9fs_ses;
1667 	mp = vses->p9fs_mount;
1668 	newfid = NULL;
1669 	error = 0;
1670 	gid = vap->va_gid;
1671 
1672 	P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
1673 
1674 	/*
1675 	 * Save the character present at namelen in nameptr string and
1676 	 * null terminate the character to get the search name for p9_dir_walk
1677 	 */
1678 	tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
1679 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
1680 
1681 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1682 	if (error != 0)
1683 		goto out;
1684 
1685 	error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
1686 	if (error != 0)
1687 		goto out;
1688 
1689 	/*create vnode for symtgt */
1690 	newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
1691 	if (newfid != NULL) {
1692 		error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
1693 		    dnp, newfid, vpp, cnp->cn_nameptr);
1694 		if (error != 0)
1695 			goto out;
1696 	} else
1697 		goto out;
1698 
1699 	if ((cnp->cn_flags & MAKEENTRY) != 0) {
1700 		cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
1701 	}
1702 	P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
1703 	    __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
1704 
1705 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1706 	return (error);
1707 
1708 out:
1709 	if (newfid != NULL)
1710 		p9_client_clunk(newfid);
1711 	cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1712 	return (error);
1713 }
1714 
1715 /* Create hard link */
1716 static int
p9fs_link(struct vop_link_args * ap)1717 p9fs_link(struct vop_link_args *ap)
1718 {
1719 	struct vnode *vp;
1720 	struct vnode *tdvp;
1721 	struct componentname *cnp;
1722 	struct p9fs_node *dnp;
1723 	struct p9fs_node *np;
1724 	struct p9fs_inode *inode;
1725 	struct p9fs_session *vses;
1726 	struct p9_fid *dvfid, *oldvfid;
1727 	int error;
1728 
1729 	vp = ap->a_vp;
1730 	tdvp = ap->a_tdvp;
1731 	cnp = ap->a_cnp;
1732 	dnp = P9FS_VTON(tdvp);
1733 	np = P9FS_VTON(vp);
1734 	inode = &np->inode;
1735 	vses = np->p9fs_ses;
1736 	error = 0;
1737 
1738 	P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);
1739 
1740 	dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1741 	if (error != 0)
1742 		return (error);
1743 	oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
1744 	if (error != 0)
1745 		return (error);
1746 
1747 	error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
1748 	if (error != 0)
1749 		return (error);
1750 	/* Increment ref count on the inode */
1751 	P9FS_INCR_LINKS(inode);
1752 
1753 	return (0);
1754 }
1755 
1756 /* Read contents of the symbolic link */
1757 static int
p9fs_readlink(struct vop_readlink_args * ap)1758 p9fs_readlink(struct vop_readlink_args *ap)
1759 {
1760 	struct vnode *vp;
1761 	struct uio *uio;
1762 	struct p9fs_node *dnp;
1763 	struct p9fs_session *vses;
1764 	struct p9_fid *dvfid;
1765 	int error, len;
1766 	char *target;
1767 
1768 	vp = ap->a_vp;
1769 	uio = ap->a_uio;
1770 	dnp = P9FS_VTON(vp);
1771 	vses = dnp->p9fs_ses;
1772 	error = 0;
1773 
1774 	P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
1775 
1776 	dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
1777 	if (error != 0)
1778 		return (error);
1779 
1780 	error = p9_readlink(dvfid, &target);
1781 	if (error != 0)
1782 		return (error);
1783 
1784 	len = strlen(target);
1785 	error = uiomove(target, len, uio);
1786 
1787 	return (0);
1788 }
1789 
1790 /*
1791  * Iterate through a directory. An entire 8k data is read into the I/O buffer.
1792  * This buffer is parsed to make dir entries and fed to the user buffer to
1793  * complete it to the VFS.
1794  */
1795 static int
p9fs_readdir(struct vop_readdir_args * ap)1796 p9fs_readdir(struct vop_readdir_args *ap)
1797 {
1798 	struct uio *uio;
1799 	struct vnode *vp;
1800 	struct dirent cde;
1801 	int64_t offset;
1802 	uint64_t diroffset;
1803 	struct p9fs_node *np;
1804 	int error;
1805 	int32_t count;
1806 	struct p9_client *clnt;
1807 	struct p9_dirent dent;
1808 	char *io_buffer;
1809 	struct p9_fid *vofid;
1810 
1811 	uio = ap->a_uio;
1812 	vp = ap->a_vp;
1813 	np = P9FS_VTON(ap->a_vp);
1814 	offset = 0;
1815 	diroffset = 0;
1816 	error = 0;
1817 	count = 0;
1818 	clnt = np->p9fs_ses->clnt;
1819 
1820 	P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);
1821 
1822 	if (ap->a_uio->uio_iov->iov_len <= 0)
1823 		return (EINVAL);
1824 
1825 	if (vp->v_type != VDIR)
1826 		return (ENOTDIR);
1827 
1828 	vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
1829 	if (vofid == NULL) {
1830 		P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
1831 		return (EBADF);
1832 	}
1833 
1834 	if (ap->a_eofflag != NULL)
1835 		*ap->a_eofflag = 0;
1836 
1837 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
1838 
1839 	/* We haven't reached the end yet. read more. */
1840 	diroffset = uio->uio_offset;
1841 	while (uio->uio_resid >= sizeof(struct dirent)) {
1842 		/*
1843 		 * We need to read more data as what is indicated by filesize because
1844 		 * filesize is based on data stored in struct dirent structure but
1845 		 * we read data in struct p9_dirent format which has different size.
1846 		 * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
1847 		 * it into struct dirent structure and send it back.
1848 		 */
1849 		count = P9FS_IOUNIT;
1850 		bzero(io_buffer, P9FS_MTU);
1851 		count = p9_client_readdir(vofid, (char *)io_buffer,
1852 		    diroffset, count);
1853 
1854 		if (count == 0) {
1855 			if (ap->a_eofflag != NULL)
1856 				*ap->a_eofflag = 1;
1857 			break;
1858 		}
1859 
1860 		if (count < 0) {
1861 			error = EIO;
1862 			goto out;
1863 		}
1864 
1865 		offset = 0;
1866 		while (offset + QEMU_DIRENTRY_SZ <= count) {
1867 
1868 			/*
1869 			 * Read and make sense out of the buffer in one dirent
1870 			 * This is part of 9p protocol read. This reads one p9_dirent,
1871 			 * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
1872 			 */
1873 			bzero(&dent, sizeof(dent));
1874 			offset = p9_dirent_read(clnt, io_buffer, offset, count,
1875 				&dent);
1876 			if (offset < 0 || offset > count) {
1877 				error = EIO;
1878 				goto out;
1879 			}
1880 
1881 			bzero(&cde, sizeof(cde));
1882 			strncpy(cde.d_name, dent.d_name, dent.len);
1883 			cde.d_fileno = dent.qid.path;
1884 			cde.d_type = dent.d_type;
1885 			cde.d_namlen = dent.len;
1886 			cde.d_reclen = GENERIC_DIRSIZ(&cde);
1887 
1888                         /*
1889                          * If there isn't enough space in the uio to return a
1890                          * whole dirent, break off read
1891                          */
1892                         if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
1893                                 break;
1894 
1895 			/* Transfer */
1896 			error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
1897 			if (error != 0) {
1898 				error = EIO;
1899 				goto out;
1900 			}
1901 			diroffset = dent.d_off;
1902 		}
1903 	}
1904 	/* Pass on last transferred offset */
1905 	uio->uio_offset = diroffset;
1906 
1907 out:
1908 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
1909 
1910 	return (error);
1911 }
1912 
1913 static void
p9fs_doio(struct vnode * vp,struct buf * bp,struct p9_fid * vofid,struct ucred * cr)1914 p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
1915 {
1916 	struct uio *uiov;
1917 	struct iovec io;
1918 	int error;
1919 	uint64_t off, offset;
1920 	uint64_t filesize;
1921 	uint64_t resid;
1922 	uint32_t count;
1923 	int64_t ret;
1924 	struct p9fs_node *np;
1925 	char *io_buffer;
1926 
1927 	error = 0;
1928 	np = P9FS_VTON(vp);
1929 
1930 	filesize = np->inode.i_size;
1931 	uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
1932 	uiov->uio_iov = &io;
1933 	uiov->uio_iovcnt = 1;
1934 	uiov->uio_segflg = UIO_SYSSPACE;
1935 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1936 
1937 	if (bp->b_iocmd == BIO_READ) {
1938 		io.iov_len = uiov->uio_resid = bp->b_bcount;
1939 		io.iov_base = bp->b_data;
1940 		uiov->uio_rw = UIO_READ;
1941 
1942 		switch (vp->v_type) {
1943 
1944 		case VREG:
1945 		{
1946 			uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1947 
1948 			if (uiov->uio_resid) {
1949 				int left = uiov->uio_resid;
1950 				int nread = bp->b_bcount - left;
1951 
1952 				if (left > 0)
1953 					bzero((char *)bp->b_data + nread, left);
1954 			}
1955 			/* where in the file are we to start reading */
1956 			offset = uiov->uio_offset;
1957 			if (uiov->uio_offset >= filesize)
1958 				goto out;
1959 
1960 			while ((resid = uiov->uio_resid) > 0) {
1961 				if (offset >= filesize)
1962 					break;
1963 				count = min(filesize - uiov->uio_offset, resid);
1964 				if (count == 0)
1965 					break;
1966 
1967 				P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
1968 				    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
1969 
1970 				/* Copy count bytes into the uio */
1971 				ret = p9_client_read(vofid, offset, count, io_buffer);
1972 				error = uiomove(io_buffer, ret, uiov);
1973 
1974 				if (error != 0)
1975 					goto out;
1976 				offset += ret;
1977 			}
1978 			break;
1979 		}
1980 		default:
1981 			printf("vfs:  type %x unexpected\n", vp->v_type);
1982 			break;
1983 		}
1984 	} else {
1985 		if (bp->b_dirtyend > bp->b_dirtyoff) {
1986 			io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
1987 			uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
1988 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1989 			uiov->uio_rw = UIO_WRITE;
1990 
1991 			if (uiov->uio_offset < 0) {
1992 				error = EINVAL;
1993 				goto out;
1994 			}
1995 
1996 			if (uiov->uio_resid == 0)
1997 				goto out;
1998 
1999 			resid = uiov->uio_resid;
2000 			offset = uiov->uio_offset;
2001 			error = 0;
2002 
2003 			while ((resid = uiov->uio_resid) > 0) {
2004                                 off = 0;
2005 				count = MIN(resid, P9FS_IOUNIT);
2006 				error = uiomove(io_buffer, count, uiov);
2007 				if (error != 0) {
2008 					goto out;
2009 				}
2010 
2011 				while (count > 0) {
2012 					/* Copy count bytes from the uio */
2013 					ret = p9_client_write(vofid, offset, count,
2014                                                 io_buffer + off);
2015 					if (ret < 0)
2016 						goto out;
2017 
2018 					P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
2019 					    __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
2020                                         off += ret;
2021 					offset += ret;
2022 					count -= ret;
2023 				}
2024 			}
2025 
2026 			/* Update the fields in the node to reflect the change */
2027 			if (filesize < uiov->uio_offset + uiov->uio_resid) {
2028 				np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
2029 				vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
2030 				/* update the modified timers. */
2031 				p9fs_itimes(vp);
2032 			}
2033 		} else {
2034 			 bp->b_resid = 0;
2035 			 goto out1;
2036 		}
2037 	}
2038 out:
2039 	/* Set the error */
2040 	if (error != 0) {
2041 		bp->b_error = error;
2042 		bp->b_ioflags |= BIO_ERROR;
2043 	}
2044 	bp->b_resid = uiov->uio_resid;
2045 out1:
2046 	bufdone(bp);
2047 	uma_zfree(p9fs_io_buffer_zone, io_buffer);
2048 	free(uiov, M_P9UIOV);
2049 }
2050 
2051 /*
2052  * The I/O buffer is mapped to a uio and a client_write/client_read is performed
2053  * the same way as p9fs_read and p9fs_write.
2054  */
2055 static int
p9fs_strategy(struct vop_strategy_args * ap)2056 p9fs_strategy(struct vop_strategy_args *ap)
2057 {
2058 	struct vnode *vp;
2059 	struct buf *bp;
2060 	struct ucred *cr;
2061 	int error;
2062 	struct open_fid_state ostate;
2063 
2064 	vp = ap->a_vp;
2065 	bp = ap->a_bp;
2066 	error = 0;
2067 
2068 	P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);
2069 
2070 	if (bp->b_iocmd == BIO_READ)
2071 		cr = bp->b_rcred;
2072 	else
2073 		cr = bp->b_wcred;
2074 
2075 	error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
2076 	if (error) {
2077 		P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
2078 		bp->b_error = error;
2079 		bp->b_ioflags |= BIO_ERROR;
2080 		bufdone(bp);
2081 		return (0);
2082 	}
2083 
2084 	p9fs_doio(vp, bp, ostate.vofid, cr);
2085 	p9fs_release_open_fid(vp, cr, &ostate);
2086 
2087 	return (0);
2088 }
2089 
2090 /* Rename a file */
2091 static int
p9fs_rename(struct vop_rename_args * ap)2092 p9fs_rename(struct vop_rename_args *ap)
2093 {
2094 	struct vnode *tvp;
2095 	struct vnode *tdvp;
2096 	struct vnode *fvp;
2097 	struct vnode *fdvp;
2098 	struct componentname *tcnp;
2099 	struct componentname *fcnp;
2100 	struct p9fs_node *tdnode;
2101 	struct p9fs_node *fdnode;
2102 	struct p9fs_inode *fdinode;
2103 	struct p9fs_node *fnode;
2104 	struct p9fs_inode *finode;
2105 	struct p9fs_session *vses;
2106 	struct p9fs_node *tnode;
2107 	struct p9fs_inode *tinode;
2108 	struct p9_fid *olddirvfid, *newdirvfid ;
2109 	int error;
2110 
2111 	tvp = ap->a_tvp;
2112 	tdvp = ap->a_tdvp;
2113 	fvp = ap->a_fvp;
2114 	fdvp = ap->a_fdvp;
2115 	tcnp = ap->a_tcnp;
2116 	fcnp = ap->a_fcnp;
2117 	tdnode = P9FS_VTON(tdvp);
2118 	fdnode = P9FS_VTON(fdvp);
2119 	fdinode = &fdnode->inode;
2120 	fnode = P9FS_VTON(fvp);
2121 	finode = &fnode->inode;
2122 	vses = fnode->p9fs_ses;
2123 	error = 0;
2124 
2125 	P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);
2126 
2127 	/* Check for cross mount operation */
2128 	if (fvp->v_mount != tdvp->v_mount ||
2129 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
2130 		error = EXDEV;
2131 		goto out;
2132 	}
2133 
2134 	if (ap->a_flags != 0) {
2135 		error = EOPNOTSUPP;
2136 		goto out;
2137 	}
2138 
2139 	/* warning  if you are renaming to the same name */
2140 	if (fvp == tvp)
2141 		error = 0;
2142 
2143 	olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
2144 	if (error != 0)
2145 		goto out;
2146 	newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
2147 	if (error != 0)
2148 		goto out;
2149 
2150 	error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
2151 	if (error != 0)
2152 		goto out;
2153 
2154 	/*
2155 	 * decrement the link count on the "from" file whose name is going
2156 	 * to be changed if its a directory
2157 	 */
2158 	if (fvp->v_type == VDIR) {
2159 		if (tvp && tvp->v_type == VDIR)
2160 			cache_purge(tdvp);
2161 		P9FS_DECR_LINKS(fdinode);
2162 		cache_purge(fdvp);
2163 	}
2164 
2165 	/* Taking exclusive lock on the from node before decrementing the link count */
2166 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
2167 		goto out;
2168 	P9FS_DECR_LINKS(finode);
2169 	VOP_UNLOCK(fvp);
2170 
2171 	if (tvp) {
2172 		tnode = P9FS_VTON(tvp);
2173 		tinode = &tnode->inode;
2174 		P9FS_DECR_LINKS(tinode);
2175 	}
2176 
2177 out:
2178 	if (tdvp == tvp)
2179 		vrele(tdvp);
2180 	else
2181 		vput(tdvp);
2182 	if (tvp)
2183 		vput(tvp);
2184 	vrele(fdvp);
2185 	vrele(fvp);
2186 	return (error);
2187 }
2188 
2189 /*
2190  * Put VM pages, synchronously.
2191  * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
2192  */
2193 static int
p9fs_putpages(struct vop_putpages_args * ap)2194 p9fs_putpages(struct vop_putpages_args *ap)
2195 {
2196 	struct uio uio;
2197 	struct iovec iov;
2198 	int i, error, npages, count;
2199 	off_t offset;
2200 	int *rtvals;
2201 	struct vnode *vp;
2202 	struct thread *td;
2203 	struct ucred *cred;
2204 	struct p9fs_node *np;
2205 	vm_page_t *pages;
2206 	void *kva;
2207 	struct buf *bp;
2208 
2209 	vp = ap->a_vp;
2210 	np = P9FS_VTON(vp);
2211 	td = curthread;
2212 	cred = curthread->td_ucred;
2213 	pages = ap->a_m;
2214 	count = ap->a_count;
2215 	rtvals = ap->a_rtvals;
2216 	npages = btoc(count);
2217 	offset = IDX_TO_OFF(pages[0]->pindex);
2218 
2219 	/*
2220 	 * When putting pages, do not extend file past EOF.
2221 	 */
2222 	if (offset + count > np->inode.i_size) {
2223 		count = np->inode.i_size - offset;
2224 		if (count < 0)
2225 			count = 0;
2226 	}
2227 
2228 	for (i = 0; i < npages; i++)
2229 		rtvals[i] = VM_PAGER_ERROR;
2230 
2231 	bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
2232 	kva = bp->b_data;
2233 	pmap_qenter(kva, pages, npages);
2234 
2235 	VM_CNT_INC(v_vnodeout);
2236 	VM_CNT_ADD(v_vnodepgsout, count);
2237 
2238 	iov.iov_base = kva;
2239 	iov.iov_len = count;
2240 	uio.uio_iov = &iov;
2241 	uio.uio_iovcnt = 1;
2242 	uio.uio_offset = offset;
2243 	uio.uio_resid = count;
2244 	uio.uio_segflg = UIO_SYSSPACE;
2245 	uio.uio_rw = UIO_WRITE;
2246 	uio.uio_td = td;
2247 
2248 	P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);
2249 
2250 	error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
2251 	    cred);
2252 
2253 	pmap_qremove(kva, npages);
2254 	uma_zfree(p9fs_pbuf_zone, bp);
2255 
2256 	if (error == 0)
2257 		vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
2258 		    np->inode.i_size - offset, npages * PAGE_SIZE);
2259 
2260 	return (rtvals[0]);
2261 }
2262 
2263 static int
p9fs_delayed_setsize(struct vop_delayed_setsize_args * ap)2264 p9fs_delayed_setsize(struct vop_delayed_setsize_args *ap)
2265 {
2266 	struct vnode *vp;
2267 	struct p9fs_node *np;
2268 
2269 	vp = ap->a_vp;
2270 	np = P9FS_VTON(vp);
2271 	vnode_pager_setsize(vp, np->inode.i_size);
2272 	return (0);
2273 }
2274 
2275 static unsigned int
p9fs_get_name_max(struct p9fs_node * np)2276 p9fs_get_name_max(struct p9fs_node *np)
2277 {
2278 	struct p9fs_session *vses = np->p9fs_ses;
2279 	struct p9_statfs statfs;
2280 	struct p9_fid *vfid;
2281 	unsigned int name_max;
2282 	int error = 0;
2283 
2284 	name_max = atomic_load_int(&vses->name_max);
2285 	if (name_max != 0)
2286 		return (name_max);
2287 
2288 	P9_DEBUG(VOPS, "%s: querying _PC_NAME_MAX\n", __func__);
2289 	vfid = p9fs_get_fid(vses->clnt, np, NULL, VFID, -1, &error);
2290 	if (vfid != NULL) {
2291 		error = p9_client_statfs(vfid, &statfs);
2292 		if (error == 0) {
2293 			/*
2294 			 * Note that this is not strictly correct if you have
2295 			 * nested mounts on the host (e.g. when using qemu with
2296 			 * multidevs=remap), but is a better estimate than just
2297 			 * returning 255.
2298 			 */
2299 			name_max = statfs.namelen;
2300 		}
2301 	}
2302 	P9_DEBUG(VOPS, "%s: max_name=%u error=%d\n", __func__, name_max, error);
2303 	if (error != 0 || name_max == 0) {
2304 		printf("p9fs: warning: failed to query name_max (error %d), "
2305 		    "using fallback %d\n", error, NAME_MAX);
2306 		name_max = NAME_MAX; /* fallback and prevent retrying */
2307 	}
2308 	atomic_store_int(&vses->name_max, name_max);
2309 	return (name_max);
2310 }
2311 
2312 /*
2313  * Return POSIX pathconf information applicable to p9fs filesystems.
2314  */
2315 static int
p9fs_pathconf(struct vop_pathconf_args * ap)2316 p9fs_pathconf(struct vop_pathconf_args *ap)
2317 {
2318 	int error = 0;
2319 	struct vnode *vp = ap->a_vp;
2320 	struct p9fs_node *np = P9FS_VTON(vp);
2321 
2322 	switch (ap->a_name) {
2323 	case _PC_NAME_MAX:
2324 		*ap->a_retval = p9fs_get_name_max(np);
2325 		break;
2326 	case _PC_SYMLINK_MAX:
2327 	case _PC_PATH_MAX:
2328 		/*
2329 		 * These are conservative estimates, the real value depends on
2330 		 * the host file system.
2331 		 */
2332 		*ap->a_retval = MAXPATHLEN;
2333 		break;
2334 	default:
2335 		error = vop_stdpathconf(ap);
2336 		break;
2337 	}
2338 	return (error);
2339 }
2340 
2341 struct vop_vector p9fs_vnops = {
2342 	.vop_default =		&default_vnodeops,
2343 	.vop_lookup =		p9fs_lookup,
2344 	.vop_open =		p9fs_open,
2345 	.vop_close =		p9fs_close,
2346 	.vop_access =		p9fs_access,
2347 	.vop_delayed_setsize =	p9fs_delayed_setsize,
2348 	.vop_getattr =		p9fs_getattr_dotl,
2349 	.vop_setattr =		p9fs_setattr_dotl,
2350 	.vop_pathconf =		p9fs_pathconf,
2351 	.vop_reclaim =		p9fs_reclaim,
2352 	.vop_inactive =		p9fs_inactive,
2353 	.vop_readdir =		p9fs_readdir,
2354 	.vop_create =		p9fs_create,
2355 	.vop_mknod =		p9fs_mknod,
2356 	.vop_read =		p9fs_read,
2357 	.vop_write =		p9fs_write,
2358 	.vop_remove =		p9fs_remove,
2359 	.vop_mkdir =		p9fs_mkdir,
2360 	.vop_rmdir =		p9fs_rmdir,
2361 	.vop_strategy =		p9fs_strategy,
2362 	.vop_symlink =		p9fs_symlink,
2363 	.vop_rename =           p9fs_rename,
2364 	.vop_link =		p9fs_link,
2365 	.vop_readlink =		p9fs_readlink,
2366 	.vop_putpages =		p9fs_putpages,
2367 };
2368 VFS_VOP_VECTOR_REGISTER(p9fs_vnops);
2369