xref: /freebsd/sys/fs/p9fs/p9fs_vfsops.c (revision 4b15965daa99044daf184221b7c283bf7f2d7e66)
1 /*-
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /*
28  * This file consists of all the VFS interactions of VFS ops which include
29  * mount, unmount, initilaize etc. for p9fs.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/fnv_hash.h>
35 #include <sys/mount.h>
36 #include <sys/sysctl.h>
37 #include <sys/vnode.h>
38 #include <sys/buf.h>
39 #include <vm/uma.h>
40 
41 #include <fs/p9fs/p9fs_proto.h>
42 #include <fs/p9fs/p9_client.h>
43 #include <fs/p9fs/p9_debug.h>
44 #include <fs/p9fs/p9fs.h>
45 
46 SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47     "Plan 9 filesystem");
48 
49 /* This count is static now. Can be made tunable later */
50 #define P9FS_FLUSH_RETRIES 10
51 
52 static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
53 static uma_zone_t p9fs_node_zone;
54 uma_zone_t p9fs_io_buffer_zone;
55 uma_zone_t p9fs_getattr_zone;
56 uma_zone_t p9fs_setattr_zone;
57 uma_zone_t p9fs_pbuf_zone;
58 extern struct vop_vector p9fs_vnops;
59 
60 /* option parsing */
61 static const char *p9fs_opts[] = {
62         "from", "trans", "access", NULL
63 };
64 
65 /* Dispose p9fs node, freeing it to the UMA zone */
66 void
67 p9fs_dispose_node(struct p9fs_node **npp)
68 {
69 	struct p9fs_node *node;
70 	struct vnode *vp;
71 
72 	node = *npp;
73 
74 	if (node == NULL)
75 		return;
76 
77 	if (node->parent && node->parent != node) {
78 		vrele(P9FS_NTOV(node->parent));
79 	}
80 
81 	P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
82 
83 	vp = P9FS_NTOV(node);
84 	vp->v_data = NULL;
85 
86 	/* Free our associated memory */
87 	if (!(vp->v_vflag & VV_ROOT)) {
88 		free(node->inode.i_name, M_TEMP);
89 		uma_zfree(p9fs_node_zone, node);
90 	}
91 
92 	*npp = NULL;
93 }
94 
95 /* Initialize memory allocation */
96 static int
97 p9fs_init(struct vfsconf *vfsp)
98 {
99 
100 	p9fs_node_zone = uma_zcreate("p9fs node zone",
101 	    sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
102 
103 	/* Create the getattr_dotl zone */
104 	p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
105 	    sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
106 
107 	/* Create the setattr_dotl zone */
108 	p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
109 	    sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
110 
111 	/* Create the putpages zone */
112 	p9fs_pbuf_zone = pbuf_zsecond_create("p9fs pbuf zone", nswbuf / 2);
113 
114 	/*
115 	 * Create the io_buffer zone pool to keep things simpler in case of
116 	 * multiple threads. Each thread works with its own so there is no
117 	 * contention.
118 	 */
119 	p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
120 	    P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
121 
122 	return (0);
123 }
124 
125 /* Destroy all the allocated memory */
126 static int
127 p9fs_uninit(struct vfsconf *vfsp)
128 {
129 
130 	uma_zdestroy(p9fs_node_zone);
131 	uma_zdestroy(p9fs_io_buffer_zone);
132 	uma_zdestroy(p9fs_getattr_zone);
133 	uma_zdestroy(p9fs_setattr_zone);
134 	uma_zdestroy(p9fs_pbuf_zone);
135 
136 	return (0);
137 }
138 
139 /* Function to umount p9fs */
140 static int
141 p9fs_unmount(struct mount *mp, int mntflags)
142 {
143 	struct p9fs_mount *vmp;
144 	struct p9fs_session *vses;
145 	int error, flags, i;
146 
147 	error = 0;
148 	flags = 0;
149 	vmp = VFSTOP9(mp);
150 	if (vmp == NULL)
151 		return (0);
152 
153 	vses = &vmp->p9fs_session;
154 	if (mntflags & MNT_FORCE)
155 		flags |= FORCECLOSE;
156 
157 	p9fs_prepare_to_close(mp);
158 	for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
159 
160 		/* Flush everything on this mount point.*/
161 		error = vflush(mp, 1, flags, curthread);
162 
163 		if (error == 0 || (mntflags & MNT_FORCE) == 0)
164 			break;
165 		/* Sleep until interrupted or 1 tick expires. */
166 		error = tsleep(&error, PSOCK, "p9unmnt", 1);
167 		if (error == EINTR)
168 			break;
169 		error = EBUSY;
170 	}
171 
172 	if (error != 0)
173 		goto out;
174 	p9fs_close_session(mp);
175 	/* Cleanup the mount structure. */
176 	free(vmp, M_P9MNT);
177 	mp->mnt_data = NULL;
178 	return (error);
179 out:
180 	/* Restore the flag in case of error */
181 	vses->clnt->trans_status = P9FS_CONNECT;
182 	return (error);
183 }
184 
185 /*
186  * Compare qid stored in p9fs node
187  * Return 1 if does not match otherwise return 0
188  */
189 int
190 p9fs_node_cmp(struct vnode *vp, void *arg)
191 {
192 	struct p9fs_node *np;
193 	struct p9_qid *qid;
194 
195 	np = vp->v_data;
196 	qid = (struct p9_qid *)arg;
197 
198 	if (np == NULL)
199 		return (1);
200 
201 	if (np->vqid.qid_path == qid->path) {
202 		if (vp->v_vflag & VV_ROOT)
203 			return (0);
204 		else if (np->vqid.qid_mode == qid->type &&
205 			    np->vqid.qid_version == qid->version)
206 			return (0);
207 	}
208 
209 	return (1);
210 }
211 
212 /*
213  * Cleanup p9fs node
214  *  - Destroy the FID LIST locks
215  *  - Dispose all node knowledge
216  */
217 void
218 p9fs_destroy_node(struct p9fs_node **npp)
219 {
220 	struct p9fs_node *np;
221 
222 	np = *npp;
223 
224 	if (np == NULL)
225 		return;
226 
227 	/* Destroy the FID LIST locks */
228 	P9FS_VFID_LOCK_DESTROY(np);
229 	P9FS_VOFID_LOCK_DESTROY(np);
230 
231 	/* Dispose all node knowledge.*/
232 	p9fs_dispose_node(&np);
233 }
234 
235 /*
236  * Common code used across p9fs to return vnode for the file represented
237  * by the fid.
238  * Lookup for the vnode in hash_list. This lookup is based on the qid path
239  * which is unique to a file. p9fs_node_cmp is called in this lookup process.
240  * I. If the vnode we are looking for is found in the hash list
241  *    1. Check if the vnode is a valid vnode by reloading its stats
242  *       a. if the reloading of the vnode stats returns error then remove the
243  *          vnode from hash list and return
244  *       b. If reloading of vnode stats returns without any error then, clunk the
245  *          new fid which was created for the vnode as we know that the vnode
246  *          already has a fid associated with it and return the vnode.
247  *          This is to avoid fid leaks
248  * II. If vnode is not found in the hash list then, create new vnode, p9fs
249  *     node and return the vnode
250  */
251 int
252 p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
253     struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
254     char *name)
255 {
256 	struct p9fs_mount *vmp;
257 	struct p9fs_session *vses;
258 	struct vnode *vp;
259 	struct p9fs_node *node;
260 	struct thread *td;
261 	uint32_t hash;
262 	int error, error_reload = 0;
263 	struct p9fs_inode *inode;
264 
265 	td = curthread;
266 	vmp = VFSTOP9(mp);
267 	vses = &vmp->p9fs_session;
268 
269 	/* Look for vp in the hash_list */
270 	hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
271 	error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
272 	    &fid->qid);
273 	if (error != 0)
274 		return (error);
275 	else if (vp != NULL) {
276 		if (vp->v_vflag & VV_ROOT) {
277 			if (np == NULL)
278 				p9_client_clunk(fid);
279 			*vpp = vp;
280 			return (0);
281 		}
282 		error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
283 		if (error != 0) {
284 			node = vp->v_data;
285 			/* Remove stale vnode from hash list */
286 			vfs_hash_remove(vp);
287 			node->flags |= P9FS_NODE_DELETED;
288 
289 			vput(vp);
290 			*vpp = NULLVP;
291 			vp = NULL;
292 		} else {
293 			*vpp = vp;
294 			/* Clunk the new fid if not root */
295 			p9_client_clunk(fid);
296 			return (0);
297 		}
298 	}
299 
300 	/*
301 	 * We must promote to an exclusive lock for vnode creation.  This
302 	 * can happen if lookup is passed LOCKSHARED.
303 	 */
304 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
305 		flags &= ~LK_TYPE_MASK;
306 		flags |= LK_EXCLUSIVE;
307 	}
308 
309 	/* Allocate a new vnode. */
310 	if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
311 		*vpp = NULLVP;
312 		P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
313 		return (error);
314 	}
315 
316 	/* If we dont have it, create one. */
317 	if (np == NULL) {
318 		np =  uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
319 		/* Initialize the VFID list */
320 		P9FS_VFID_LOCK_INIT(np);
321 		STAILQ_INIT(&np->vfid_list);
322 		p9fs_fid_add(np, fid, VFID);
323 
324 		/* Initialize the VOFID list */
325 		P9FS_VOFID_LOCK_INIT(np);
326 		STAILQ_INIT(&np->vofid_list);
327 
328 		vref(P9FS_NTOV(parent));
329 		np->parent = parent;
330 		np->p9fs_ses = vses; /* Map the current session */
331 		inode = &np->inode;
332 		/*Fill the name of the file in inode */
333 		inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
334 		strlcpy(inode->i_name, name, strlen(name)+1);
335 	} else {
336 		vp->v_type = VDIR; /* root vp is a directory */
337 		vp->v_vflag |= VV_ROOT;
338 		vref(vp); /* Increment a reference on root vnode during mount */
339 	}
340 
341 	vp->v_data = np;
342 	np->v_node = vp;
343 	inode = &np->inode;
344 	inode->i_qid_path = fid->qid.path;
345 	P9FS_SET_LINKS(inode);
346 
347 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
348 	if (vp->v_type != VFIFO)
349 		VN_LOCK_ASHARE(vp);
350 	error = insmntque(vp, mp);
351 	if (error != 0) {
352 		/*
353 		 * vput(vp) is already called from insmntque_stddtr().
354 		 * Just goto 'out' to dispose the node.
355 		 */
356 		goto out;
357 	}
358 
359 	/* Init the vnode with the disk info*/
360 	error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
361 	if (error != 0) {
362 		error_reload = 1;
363 		goto out;
364 	}
365 
366 	error = vfs_hash_insert(vp, hash, flags, td, vpp,
367 	    p9fs_node_cmp, &fid->qid);
368 	if (error != 0) {
369 		goto out;
370 	}
371 
372 	if (*vpp == NULL) {
373 		P9FS_LOCK(vses);
374 		STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
375 		np->flags |= P9FS_NODE_IN_SESSION;
376 		P9FS_UNLOCK(vses);
377 		vn_set_state(vp, VSTATE_CONSTRUCTED);
378 		*vpp = vp;
379 	} else {
380 		/*
381 		 * Returning matching vp found in hashlist.
382 		 * So cleanup the np allocated above in this context.
383 		 */
384 		if (!IS_ROOT(np)) {
385 			p9fs_destroy_node(&np);
386 		}
387 	}
388 
389 	return (0);
390 out:
391 	/* Something went wrong, dispose the node */
392 	if (!IS_ROOT(np)) {
393 		p9fs_destroy_node(&np);
394 	}
395 
396 	if (error_reload) {
397 		vput(vp);
398 	}
399 
400 	*vpp = NULLVP;
401 	return (error);
402 }
403 
404 /* Main mount function for 9pfs */
405 static int
406 p9_mount(struct mount *mp)
407 {
408 	struct p9_fid *fid;
409 	struct p9fs_mount *vmp;
410 	struct p9fs_session *vses;
411 	struct p9fs_node *p9fs_root;
412 	int error;
413 	char *from;
414 	int len;
415 
416 	/* Verify the validity of mount options */
417 	if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
418 		return (EINVAL);
419 
420 	/* Extract NULL terminated mount tag from mount options */
421 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
422 	if (error != 0 || from[len - 1] != '\0')
423 		return (EINVAL);
424 
425 	/* Allocate and initialize the private mount structure. */
426 	vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
427 	mp->mnt_data = vmp;
428 	vmp->p9fs_mountp = mp;
429 	vmp->mount_tag = from;
430 	vmp->mount_tag_len = len;
431 	vses = &vmp->p9fs_session;
432 	vses->p9fs_mount = mp;
433 	p9fs_root = &vses->rnp;
434 	/* Hardware iosize from the Qemu */
435 	mp->mnt_iosize_max = PAGE_SIZE;
436 	/*
437 	 * Init the session for the p9fs root. This creates a new root fid and
438 	 * attaches the client and server.
439 	 */
440 	fid = p9fs_init_session(mp, &error);
441 	if (fid == NULL) {
442 		goto out;
443 	}
444 
445 	P9FS_VFID_LOCK_INIT(p9fs_root);
446 	STAILQ_INIT(&p9fs_root->vfid_list);
447 	p9fs_fid_add(p9fs_root, fid, VFID);
448 	P9FS_VOFID_LOCK_INIT(p9fs_root);
449 	STAILQ_INIT(&p9fs_root->vofid_list);
450 	p9fs_root->parent = p9fs_root;
451 	p9fs_root->flags |= P9FS_ROOT;
452 	p9fs_root->p9fs_ses = vses;
453 	vfs_getnewfsid(mp);
454 	strlcpy(mp->mnt_stat.f_mntfromname, from,
455 	    sizeof(mp->mnt_stat.f_mntfromname));
456 	MNT_ILOCK(mp);
457 	mp->mnt_flag |= MNT_LOCAL;
458 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
459 	MNT_IUNLOCK(mp);
460 	P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
461 	/* Mount structures created. */
462 
463 	return (0);
464 out:
465 	P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
466 	if (vmp != NULL) {
467 		free(vmp, M_P9MNT);
468 		mp->mnt_data = NULL;
469 	}
470 	return (error);
471 }
472 
473 /* Mount entry point */
474 static int
475 p9fs_mount(struct mount *mp)
476 {
477 	int error;
478 
479 	/*
480 	 * Minimal support for MNT_UPDATE - allow changing from
481 	 * readonly.
482 	 */
483 	if (mp->mnt_flag & MNT_UPDATE) {
484 		if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
485 			mp->mnt_flag &= ~MNT_RDONLY;
486 		}
487 		return (0);
488 	}
489 
490 	error = p9_mount(mp);
491 	if (error != 0)
492 		(void) p9fs_unmount(mp, MNT_FORCE);
493 
494 	return (error);
495 }
496 
497 /*
498  * Retrieve the root vnode of this mount. After filesystem is mounted, the root
499  * vnode is created for the first time. Subsequent calls to p9fs root will
500  * return the same vnode created during mount.
501  */
502 static int
503 p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
504 {
505 	struct p9fs_mount *vmp;
506 	struct p9fs_node *np;
507 	struct p9_client *clnt;
508 	struct p9_fid *vfid;
509 	int error;
510 
511 	vmp = VFSTOP9(mp);
512 	np = &vmp->p9fs_session.rnp;
513 	clnt = vmp->p9fs_session.clnt;
514 	error = 0;
515 
516 	P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
517 
518 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
519 
520 	if (error != 0) {
521 		/* for root use the nobody user's fid as vfid.
522 		 * This is used while unmounting as root when non-root
523 		 * user has mounted p9fs
524 		 */
525 		if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
526 			vfid = vmp->p9fs_session.mnt_fid;
527 		else {
528 			*vpp = NULLVP;
529 			return (error);
530 		}
531 	}
532 
533 	error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
534 	if (error != 0) {
535 		*vpp = NULLVP;
536 		return (error);
537 	}
538 	np->v_node = *vpp;
539 	return (error);
540 }
541 
542 /* Retrieve the file system statistics */
543 static int
544 p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
545 {
546 	struct p9fs_mount *vmp;
547 	struct p9fs_node *np;
548 	struct p9_client *clnt;
549 	struct p9_fid *vfid;
550 	struct p9_statfs statfs;
551 	int res, error;
552 
553 	vmp = VFSTOP9(mp);
554 	np = &vmp->p9fs_session.rnp;
555 	clnt = vmp->p9fs_session.clnt;
556 	error = 0;
557 
558 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
559 	if (error != 0) {
560 		return (error);
561 	}
562 
563 	res = p9_client_statfs(vfid, &statfs);
564 
565 	if (res == 0) {
566 		buf->f_type = statfs.type;
567 		/*
568 		 * We have a limit of 4k irrespective of what the
569 		 * Qemu server can do.
570 		 */
571 		if (statfs.bsize > PAGE_SIZE)
572 			buf->f_bsize = PAGE_SIZE;
573 		else
574 			buf->f_bsize = statfs.bsize;
575 
576 		buf->f_iosize = buf->f_bsize;
577 		buf->f_blocks = statfs.blocks;
578 		buf->f_bfree = statfs.bfree;
579 		buf->f_bavail = statfs.bavail;
580 		buf->f_files = statfs.files;
581 		buf->f_ffree = statfs.ffree;
582 	}
583 	else {
584 		/* Atleast set these if stat fail */
585 		buf->f_bsize = PAGE_SIZE;
586 		buf->f_iosize = buf->f_bsize;   /* XXX */
587 	}
588 
589 	return (0);
590 }
591 
592 static int
593 p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
594 {
595 
596 	return (EINVAL);
597 }
598 
599 struct vfsops p9fs_vfsops = {
600 	.vfs_init  =	p9fs_init,
601 	.vfs_uninit =	p9fs_uninit,
602 	.vfs_mount =	p9fs_mount,
603 	.vfs_unmount =	p9fs_unmount,
604 	.vfs_root =	p9fs_root,
605 	.vfs_statfs =	p9fs_statfs,
606 	.vfs_fhtovp =	p9fs_fhtovp,
607 };
608 
609 VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
610 MODULE_VERSION(p9fs, 1);
611