xref: /freebsd/sys/fs/p9fs/p9fs_vfsops.c (revision 1ed2ef42e01771f5d8ca9be61e07dcf0fd47feba)
1 /*-
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /*
28  * This file consists of all the VFS interactions of VFS ops which include
29  * mount, unmount, initilaize etc. for p9fs.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/fnv_hash.h>
35 #include <sys/mount.h>
36 #include <sys/sysctl.h>
37 #include <sys/vnode.h>
38 #include <sys/buf.h>
39 #include <vm/uma.h>
40 
41 #include <fs/p9fs/p9fs_proto.h>
42 #include <fs/p9fs/p9_client.h>
43 #include <fs/p9fs/p9_debug.h>
44 #include <fs/p9fs/p9fs.h>
45 
46 SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47     "Plan 9 filesystem");
48 
49 /* This count is static now. Can be made tunable later */
50 #define P9FS_FLUSH_RETRIES 10
51 
52 static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
53 static uma_zone_t p9fs_node_zone;
54 uma_zone_t p9fs_io_buffer_zone;
55 uma_zone_t p9fs_getattr_zone;
56 uma_zone_t p9fs_setattr_zone;
57 uma_zone_t p9fs_pbuf_zone;
58 extern struct vop_vector p9fs_vnops;
59 
60 /* option parsing */
61 static const char *p9fs_opts[] = {
62         "from", "trans", "access", NULL
63 };
64 
65 /* Dispose p9fs node, freeing it to the UMA zone */
66 void
p9fs_dispose_node(struct p9fs_node ** npp)67 p9fs_dispose_node(struct p9fs_node **npp)
68 {
69 	struct p9fs_node *node;
70 	struct vnode *vp;
71 
72 	node = *npp;
73 
74 	if (node == NULL)
75 		return;
76 
77 	if (node->parent && node->parent != node) {
78 		vrele(P9FS_NTOV(node->parent));
79 	}
80 
81 	P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
82 
83 	vp = P9FS_NTOV(node);
84 	vp->v_data = NULL;
85 
86 	/* Free our associated memory */
87 	if (!(vp->v_vflag & VV_ROOT)) {
88 		free(node->inode.i_name, M_TEMP);
89 		uma_zfree(p9fs_node_zone, node);
90 	}
91 
92 	*npp = NULL;
93 }
94 
95 /* Initialize memory allocation */
96 static int
p9fs_init(struct vfsconf * vfsp)97 p9fs_init(struct vfsconf *vfsp)
98 {
99 
100 	p9fs_node_zone = uma_zcreate("p9fs node zone",
101 	    sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
102 
103 	/* Create the getattr_dotl zone */
104 	p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
105 	    sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
106 
107 	/* Create the setattr_dotl zone */
108 	p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
109 	    sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
110 
111 	/* Create the putpages zone */
112 	p9fs_pbuf_zone = pbuf_zsecond_create("p9fs pbuf zone", nswbuf / 2);
113 
114 	/*
115 	 * Create the io_buffer zone pool to keep things simpler in case of
116 	 * multiple threads. Each thread works with its own so there is no
117 	 * contention.
118 	 */
119 	p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
120 	    P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
121 
122 	p9_init_zones();
123 
124 	return (0);
125 }
126 
127 /* Destroy all the allocated memory */
128 static int
p9fs_uninit(struct vfsconf * vfsp)129 p9fs_uninit(struct vfsconf *vfsp)
130 {
131 
132 	p9_destroy_zones();
133 
134 	uma_zdestroy(p9fs_node_zone);
135 	uma_zdestroy(p9fs_io_buffer_zone);
136 	uma_zdestroy(p9fs_getattr_zone);
137 	uma_zdestroy(p9fs_setattr_zone);
138 	uma_zdestroy(p9fs_pbuf_zone);
139 
140 	return (0);
141 }
142 
143 /* Function to umount p9fs */
144 static int
p9fs_unmount(struct mount * mp,int mntflags)145 p9fs_unmount(struct mount *mp, int mntflags)
146 {
147 	struct p9fs_mount *vmp;
148 	struct p9fs_session *vses;
149 	int error, flags, i;
150 
151 	error = 0;
152 	flags = 0;
153 	vmp = VFSTOP9(mp);
154 	if (vmp == NULL)
155 		return (0);
156 
157 	vses = &vmp->p9fs_session;
158 	if (mntflags & MNT_FORCE)
159 		flags |= FORCECLOSE;
160 
161 	p9fs_prepare_to_close(mp);
162 	for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
163 
164 		/* Flush everything on this mount point.*/
165 		error = vflush(mp, 1, flags, curthread);
166 
167 		if (error == 0 || (mntflags & MNT_FORCE) == 0)
168 			break;
169 		/* Sleep until interrupted or 1 tick expires. */
170 		error = tsleep(&error, PSOCK, "p9unmnt", 1);
171 		if (error == EINTR)
172 			break;
173 		error = EBUSY;
174 	}
175 
176 	if (error != 0)
177 		goto out;
178 	p9fs_close_session(mp);
179 	/* Cleanup the mount structure. */
180 	free(vmp, M_P9MNT);
181 	mp->mnt_data = NULL;
182 	return (error);
183 out:
184 	/* Restore the flag in case of error */
185 	vses->clnt->trans_status = P9FS_CONNECT;
186 	return (error);
187 }
188 
189 /*
190  * Compare qid stored in p9fs node
191  * Return 1 if does not match otherwise return 0
192  */
193 int
p9fs_node_cmp(struct vnode * vp,void * arg)194 p9fs_node_cmp(struct vnode *vp, void *arg)
195 {
196 	struct p9fs_node *np;
197 	struct p9_qid *qid;
198 
199 	np = vp->v_data;
200 	qid = (struct p9_qid *)arg;
201 
202 	if (np == NULL)
203 		return (1);
204 
205 	if (np->vqid.qid_path == qid->path) {
206 		if (vp->v_vflag & VV_ROOT)
207 			return (0);
208 		else if (np->vqid.qid_mode == qid->type &&
209 			    np->vqid.qid_version == qid->version)
210 			return (0);
211 	}
212 
213 	return (1);
214 }
215 
216 /*
217  * Cleanup p9fs node
218  *  - Destroy the FID LIST locks
219  *  - Dispose all node knowledge
220  */
221 void
p9fs_destroy_node(struct p9fs_node ** npp)222 p9fs_destroy_node(struct p9fs_node **npp)
223 {
224 	struct p9fs_node *np;
225 
226 	np = *npp;
227 
228 	if (np == NULL)
229 		return;
230 
231 	/* Destroy the FID LIST locks */
232 	P9FS_VFID_LOCK_DESTROY(np);
233 	P9FS_VOFID_LOCK_DESTROY(np);
234 
235 	/* Dispose all node knowledge.*/
236 	p9fs_dispose_node(&np);
237 }
238 
239 /*
240  * Common code used across p9fs to return vnode for the file represented
241  * by the fid.
242  * Lookup for the vnode in hash_list. This lookup is based on the qid path
243  * which is unique to a file. p9fs_node_cmp is called in this lookup process.
244  * I. If the vnode we are looking for is found in the hash list
245  *    1. Check if the vnode is a valid vnode by reloading its stats
246  *       a. if the reloading of the vnode stats returns error then remove the
247  *          vnode from hash list and return
248  *       b. If reloading of vnode stats returns without any error then, clunk the
249  *          new fid which was created for the vnode as we know that the vnode
250  *          already has a fid associated with it and return the vnode.
251  *          This is to avoid fid leaks
252  * II. If vnode is not found in the hash list then, create new vnode, p9fs
253  *     node and return the vnode
254  */
255 int
p9fs_vget_common(struct mount * mp,struct p9fs_node * np,int flags,struct p9fs_node * parent,struct p9_fid * fid,struct vnode ** vpp,char * name)256 p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
257     struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
258     char *name)
259 {
260 	struct p9fs_mount *vmp;
261 	struct p9fs_session *vses;
262 	struct vnode *vp;
263 	struct p9fs_node *node;
264 	struct thread *td;
265 	uint32_t hash;
266 	int error, error_reload = 0;
267 	struct p9fs_inode *inode;
268 
269 	td = curthread;
270 	vmp = VFSTOP9(mp);
271 	vses = &vmp->p9fs_session;
272 
273 	/* Look for vp in the hash_list */
274 	hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
275 	error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
276 	    &fid->qid);
277 	if (error != 0)
278 		return (error);
279 	else if (vp != NULL) {
280 		if (vp->v_vflag & VV_ROOT) {
281 			if (np == NULL)
282 				p9_client_clunk(fid);
283 			*vpp = vp;
284 			return (0);
285 		}
286 		error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
287 		if (error != 0) {
288 			node = vp->v_data;
289 			/* Remove stale vnode from hash list */
290 			vfs_hash_remove(vp);
291 			P9FS_NODE_SETF(node, P9FS_NODE_DELETED);
292 
293 			vput(vp);
294 			*vpp = NULL;
295 			vp = NULL;
296 		} else {
297 			*vpp = vp;
298 			/* Clunk the new fid if not root */
299 			p9_client_clunk(fid);
300 			return (0);
301 		}
302 	}
303 
304 	/*
305 	 * We must promote to an exclusive lock for vnode creation.  This
306 	 * can happen if lookup is passed LOCKSHARED.
307 	 */
308 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
309 		flags &= ~LK_TYPE_MASK;
310 		flags |= LK_EXCLUSIVE;
311 	}
312 
313 	/* Allocate a new vnode. */
314 	if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
315 		*vpp = NULL;
316 		P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
317 		return (error);
318 	}
319 
320 	/* If we dont have it, create one. */
321 	if (np == NULL) {
322 		np =  uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
323 		/* Initialize the VFID list */
324 		P9FS_VFID_LOCK_INIT(np);
325 		STAILQ_INIT(&np->vfid_list);
326 		p9fs_fid_add(np, fid, VFID);
327 
328 		/* Initialize the VOFID list */
329 		P9FS_VOFID_LOCK_INIT(np);
330 		STAILQ_INIT(&np->vofid_list);
331 
332 		vref(P9FS_NTOV(parent));
333 		np->parent = parent;
334 		np->p9fs_ses = vses; /* Map the current session */
335 		inode = &np->inode;
336 		/*Fill the name of the file in inode */
337 		inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
338 		strlcpy(inode->i_name, name, strlen(name)+1);
339 	} else {
340 		vp->v_type = VDIR; /* root vp is a directory */
341 		vp->v_vflag |= VV_ROOT;
342 		vref(vp); /* Increment a reference on root vnode during mount */
343 	}
344 
345 	vp->v_data = np;
346 	np->v_node = vp;
347 	inode = &np->inode;
348 	inode->i_qid_path = fid->qid.path;
349 	P9FS_SET_LINKS(inode);
350 
351 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
352 	if (vp->v_type != VFIFO)
353 		VN_LOCK_ASHARE(vp);
354 	error = insmntque(vp, mp);
355 	if (error != 0) {
356 		/*
357 		 * vput(vp) is already called from insmntque_stddtr().
358 		 * Just goto 'out' to dispose the node.
359 		 */
360 		goto out;
361 	}
362 
363 	/* Init the vnode with the disk info*/
364 	error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
365 	if (error != 0) {
366 		error_reload = 1;
367 		goto out;
368 	}
369 
370 	error = vfs_hash_insert(vp, hash, flags, td, vpp,
371 	    p9fs_node_cmp, &fid->qid);
372 	if (error != 0) {
373 		goto out;
374 	}
375 
376 	if (*vpp == NULL) {
377 		P9FS_LOCK(vses);
378 		STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
379 		P9FS_NODE_SETF(np, P9FS_NODE_IN_SESSION);
380 		P9FS_UNLOCK(vses);
381 		vn_set_state(vp, VSTATE_CONSTRUCTED);
382 		*vpp = vp;
383 	} else {
384 		/*
385 		 * Returning matching vp found in hashlist.
386 		 * So cleanup the np allocated above in this context.
387 		 */
388 		if (!IS_ROOT(np)) {
389 			p9fs_destroy_node(&np);
390 		}
391 	}
392 
393 	return (0);
394 out:
395 	/* Something went wrong, dispose the node */
396 	if (!IS_ROOT(np)) {
397 		p9fs_destroy_node(&np);
398 	}
399 
400 	if (error_reload) {
401 		vput(vp);
402 	}
403 
404 	*vpp = NULL;
405 	return (error);
406 }
407 
408 /* Main mount function for 9pfs */
409 static int
p9_mount(struct mount * mp)410 p9_mount(struct mount *mp)
411 {
412 	struct p9_fid *fid;
413 	struct p9fs_mount *vmp;
414 	struct p9fs_session *vses;
415 	struct p9fs_node *p9fs_root;
416 	int error;
417 	char *from;
418 	int len;
419 
420 	/* Verify the validity of mount options */
421 	if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
422 		return (EINVAL);
423 
424 	/* Extract NULL terminated mount tag from mount options */
425 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
426 	if (error != 0 || from[len - 1] != '\0')
427 		return (EINVAL);
428 
429 	/* Allocate and initialize the private mount structure. */
430 	vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
431 	mp->mnt_data = vmp;
432 	vmp->p9fs_mountp = mp;
433 	vmp->mount_tag = from;
434 	vmp->mount_tag_len = len;
435 	vses = &vmp->p9fs_session;
436 	vses->p9fs_mount = mp;
437 	p9fs_root = &vses->rnp;
438 	/* Hardware iosize from the Qemu */
439 	mp->mnt_iosize_max = PAGE_SIZE;
440 	/*
441 	 * Init the session for the p9fs root. This creates a new root fid and
442 	 * attaches the client and server.
443 	 */
444 	fid = p9fs_init_session(mp, &error);
445 	if (fid == NULL) {
446 		goto out;
447 	}
448 
449 	P9FS_VFID_LOCK_INIT(p9fs_root);
450 	STAILQ_INIT(&p9fs_root->vfid_list);
451 	p9fs_fid_add(p9fs_root, fid, VFID);
452 	P9FS_VOFID_LOCK_INIT(p9fs_root);
453 	STAILQ_INIT(&p9fs_root->vofid_list);
454 	p9fs_root->parent = p9fs_root;
455 	P9FS_NODE_SETF(p9fs_root, P9FS_NODE_ROOT);
456 	p9fs_root->p9fs_ses = vses;
457 	vfs_getnewfsid(mp);
458 	strlcpy(mp->mnt_stat.f_mntfromname, from,
459 	    sizeof(mp->mnt_stat.f_mntfromname));
460 	MNT_ILOCK(mp);
461 	mp->mnt_flag |= MNT_LOCAL;
462 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
463 	MNT_IUNLOCK(mp);
464 	P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
465 	/* Mount structures created. */
466 
467 	return (0);
468 out:
469 	P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
470 	if (vmp != NULL) {
471 		free(vmp, M_P9MNT);
472 		mp->mnt_data = NULL;
473 	}
474 	return (error);
475 }
476 
477 /* Mount entry point */
478 static int
p9fs_mount(struct mount * mp)479 p9fs_mount(struct mount *mp)
480 {
481 	int error;
482 
483 	/*
484 	 * Minimal support for MNT_UPDATE - allow changing from
485 	 * readonly.
486 	 */
487 	if (mp->mnt_flag & MNT_UPDATE) {
488 		if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
489 			mp->mnt_flag &= ~MNT_RDONLY;
490 		}
491 		return (0);
492 	}
493 
494 	error = p9_mount(mp);
495 	if (error != 0)
496 		(void) p9fs_unmount(mp, MNT_FORCE);
497 
498 	return (error);
499 }
500 
501 /*
502  * Retrieve the root vnode of this mount. After filesystem is mounted, the root
503  * vnode is created for the first time. Subsequent calls to p9fs root will
504  * return the same vnode created during mount.
505  */
506 static int
p9fs_root(struct mount * mp,int lkflags,struct vnode ** vpp)507 p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
508 {
509 	struct p9fs_mount *vmp;
510 	struct p9fs_node *np;
511 	struct p9_client *clnt;
512 	struct p9_fid *vfid;
513 	int error;
514 
515 	vmp = VFSTOP9(mp);
516 	np = &vmp->p9fs_session.rnp;
517 	clnt = vmp->p9fs_session.clnt;
518 	error = 0;
519 
520 	P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
521 
522 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
523 
524 	if (error != 0) {
525 		/* for root use the nobody user's fid as vfid.
526 		 * This is used while unmounting as root when non-root
527 		 * user has mounted p9fs
528 		 */
529 		if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
530 			vfid = vmp->p9fs_session.mnt_fid;
531 		else {
532 			*vpp = NULL;
533 			return (error);
534 		}
535 	}
536 
537 	error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
538 	if (error != 0) {
539 		*vpp = NULL;
540 		return (error);
541 	}
542 	np->v_node = *vpp;
543 	return (error);
544 }
545 
546 /* Retrieve the file system statistics */
547 static int
p9fs_statfs(struct mount * mp __unused,struct statfs * buf)548 p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
549 {
550 	struct p9fs_mount *vmp;
551 	struct p9fs_node *np;
552 	struct p9_client *clnt;
553 	struct p9_fid *vfid;
554 	struct p9_statfs statfs;
555 	int res, error;
556 
557 	vmp = VFSTOP9(mp);
558 	np = &vmp->p9fs_session.rnp;
559 	clnt = vmp->p9fs_session.clnt;
560 	error = 0;
561 
562 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
563 	if (error != 0) {
564 		return (error);
565 	}
566 
567 	res = p9_client_statfs(vfid, &statfs);
568 
569 	if (res == 0) {
570 		buf->f_type = statfs.type;
571 		/*
572 		 * We have a limit of 4k irrespective of what the
573 		 * Qemu server can do.
574 		 */
575 		if (statfs.bsize > PAGE_SIZE)
576 			buf->f_bsize = PAGE_SIZE;
577 		else
578 			buf->f_bsize = statfs.bsize;
579 
580 		buf->f_iosize = buf->f_bsize;
581 		buf->f_blocks = statfs.blocks;
582 		buf->f_bfree = statfs.bfree;
583 		buf->f_bavail = statfs.bavail;
584 		buf->f_files = statfs.files;
585 		buf->f_ffree = statfs.ffree;
586 	}
587 	else {
588 		/* Atleast set these if stat fail */
589 		buf->f_bsize = PAGE_SIZE;
590 		buf->f_iosize = buf->f_bsize;   /* XXX */
591 	}
592 
593 	return (0);
594 }
595 
596 static int
p9fs_fhtovp(struct mount * mp,struct fid * fhp,int flags,struct vnode ** vpp)597 p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
598 {
599 
600 	return (EINVAL);
601 }
602 
603 struct vfsops p9fs_vfsops = {
604 	.vfs_init  =	p9fs_init,
605 	.vfs_uninit =	p9fs_uninit,
606 	.vfs_mount =	p9fs_mount,
607 	.vfs_unmount =	p9fs_unmount,
608 	.vfs_root =	p9fs_root,
609 	.vfs_statfs =	p9fs_statfs,
610 	.vfs_fhtovp =	p9fs_fhtovp,
611 };
612 
613 VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
614 MODULE_VERSION(p9fs, 1);
615