xref: /freebsd/sys/fs/p9fs/p9fs_vfsops.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /*
28  * This file consists of all the VFS interactions of VFS ops which include
29  * mount, unmount, initilaize etc. for p9fs.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/fnv_hash.h>
35 #include <sys/mount.h>
36 #include <sys/sysctl.h>
37 #include <sys/vnode.h>
38 #include <sys/buf.h>
39 #include <vm/uma.h>
40 
41 #include <fs/p9fs/p9fs_proto.h>
42 #include <fs/p9fs/p9_client.h>
43 #include <fs/p9fs/p9_debug.h>
44 #include <fs/p9fs/p9fs.h>
45 
46 SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47     "Plan 9 filesystem");
48 
49 /* This count is static now. Can be made tunable later */
50 #define P9FS_FLUSH_RETRIES 10
51 
52 static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
53 static uma_zone_t p9fs_node_zone;
54 uma_zone_t p9fs_io_buffer_zone;
55 uma_zone_t p9fs_getattr_zone;
56 uma_zone_t p9fs_setattr_zone;
57 uma_zone_t p9fs_pbuf_zone;
58 extern struct vop_vector p9fs_vnops;
59 
60 /* option parsing */
61 static const char *p9fs_opts[] = {
62         "from", "trans", "access", NULL
63 };
64 
65 /* Dispose p9fs node, freeing it to the UMA zone */
66 void
67 p9fs_dispose_node(struct p9fs_node **npp)
68 {
69 	struct p9fs_node *node;
70 	struct vnode *vp;
71 
72 	node = *npp;
73 
74 	if (node == NULL)
75 		return;
76 
77 	if (node->parent && node->parent != node) {
78 		vrele(P9FS_NTOV(node->parent));
79 	}
80 
81 	P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
82 
83 	vp = P9FS_NTOV(node);
84 	vp->v_data = NULL;
85 
86 	/* Free our associated memory */
87 	if (!(vp->v_vflag & VV_ROOT)) {
88 		free(node->inode.i_name, M_TEMP);
89 		uma_zfree(p9fs_node_zone, node);
90 	}
91 
92 	*npp = NULL;
93 }
94 
95 /* Initialize memory allocation */
96 static int
97 p9fs_init(struct vfsconf *vfsp)
98 {
99 
100 	p9fs_node_zone = uma_zcreate("p9fs node zone",
101 	    sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
102 
103 	/* Create the getattr_dotl zone */
104 	p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
105 	    sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
106 
107 	/* Create the setattr_dotl zone */
108 	p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
109 	    sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
110 
111 	/* Create the putpages zone */
112 	p9fs_pbuf_zone = pbuf_zsecond_create("p9fs pbuf zone", nswbuf / 2);
113 
114 	/*
115 	 * Create the io_buffer zone pool to keep things simpler in case of
116 	 * multiple threads. Each thread works with its own so there is no
117 	 * contention.
118 	 */
119 	p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
120 	    P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
121 
122 	return (0);
123 }
124 
125 /* Destroy all the allocated memory */
126 static int
127 p9fs_uninit(struct vfsconf *vfsp)
128 {
129 
130 	uma_zdestroy(p9fs_node_zone);
131 	uma_zdestroy(p9fs_io_buffer_zone);
132 	uma_zdestroy(p9fs_getattr_zone);
133 	uma_zdestroy(p9fs_setattr_zone);
134 	uma_zdestroy(p9fs_pbuf_zone);
135 
136 	return (0);
137 }
138 
139 /* Function to umount p9fs */
140 static int
141 p9fs_unmount(struct mount *mp, int mntflags)
142 {
143 	struct p9fs_mount *vmp;
144 	struct p9fs_session *vses;
145 	int error, flags, i;
146 
147 	error = 0;
148 	flags = 0;
149 	vmp = VFSTOP9(mp);
150 	if (vmp == NULL)
151 		return (0);
152 
153 	vses = &vmp->p9fs_session;
154 	if (mntflags & MNT_FORCE)
155 		flags |= FORCECLOSE;
156 
157 	p9fs_prepare_to_close(mp);
158 	for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
159 
160 		/* Flush everything on this mount point.*/
161 		error = vflush(mp, 1, flags, curthread);
162 
163 		if (error == 0 || (mntflags & MNT_FORCE) == 0)
164 			break;
165 		/* Sleep until interrupted or 1 tick expires. */
166 		error = tsleep(&error, PSOCK, "p9unmnt", 1);
167 		if (error == EINTR)
168 			break;
169 		error = EBUSY;
170 	}
171 
172 	if (error != 0)
173 		goto out;
174 	p9fs_close_session(mp);
175 	/* Cleanup the mount structure. */
176 	free(vmp, M_P9MNT);
177 	mp->mnt_data = NULL;
178 	return (error);
179 out:
180 	/* Restore the flag in case of error */
181 	vses->clnt->trans_status = P9FS_CONNECT;
182 	return (error);
183 }
184 
185 /*
186  * Compare qid stored in p9fs node
187  * Return 1 if does not match otherwise return 0
188  */
189 int
190 p9fs_node_cmp(struct vnode *vp, void *arg)
191 {
192 	struct p9fs_node *np;
193 	struct p9_qid *qid;
194 
195 	np = vp->v_data;
196 	qid = (struct p9_qid *)arg;
197 
198 	if (np == NULL)
199 		return (1);
200 
201 	if (np->vqid.qid_path == qid->path) {
202 		if (vp->v_vflag & VV_ROOT)
203 			return (0);
204 		else if (np->vqid.qid_mode == qid->type &&
205 			    np->vqid.qid_version == qid->version)
206 			return (0);
207 	}
208 
209 	return (1);
210 }
211 
212 /*
213  * Cleanup p9fs node
214  *  - Destroy the FID LIST locks
215  *  - Dispose all node knowledge
216  */
217 void
218 p9fs_destroy_node(struct p9fs_node **npp)
219 {
220 	struct p9fs_node *np;
221 
222 	np = *npp;
223 
224 	if (np == NULL)
225 		return;
226 
227 	/* Destroy the FID LIST locks */
228 	P9FS_VFID_LOCK_DESTROY(np);
229 	P9FS_VOFID_LOCK_DESTROY(np);
230 
231 	/* Dispose all node knowledge.*/
232 	p9fs_dispose_node(&np);
233 }
234 
235 /*
236  * Common code used across p9fs to return vnode for the file represented
237  * by the fid.
238  * Lookup for the vnode in hash_list. This lookup is based on the qid path
239  * which is unique to a file. p9fs_node_cmp is called in this lookup process.
240  * I. If the vnode we are looking for is found in the hash list
241  *    1. Check if the vnode is a valid vnode by reloading its stats
242  *       a. if the reloading of the vnode stats returns error then remove the
243  *          vnode from hash list and return
244  *       b. If reloading of vnode stats returns without any error then, clunk the
245  *          new fid which was created for the vnode as we know that the vnode
246  *          already has a fid associated with it and return the vnode.
247  *          This is to avoid fid leaks
248  * II. If vnode is not found in the hash list then, create new vnode, p9fs
249  *     node and return the vnode
250  */
251 int
252 p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
253     struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
254     char *name)
255 {
256 	struct p9fs_mount *vmp;
257 	struct p9fs_session *vses;
258 	struct vnode *vp;
259 	struct p9fs_node *node;
260 	struct thread *td;
261 	uint32_t hash;
262 	int error, error_reload = 0;
263 	struct p9fs_inode *inode;
264 
265 	td = curthread;
266 	vmp = VFSTOP9(mp);
267 	vses = &vmp->p9fs_session;
268 
269 	/* Look for vp in the hash_list */
270 	hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
271 	error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
272 	    &fid->qid);
273 	if (error != 0)
274 		return (error);
275 	else if (vp != NULL) {
276 		if (vp->v_vflag & VV_ROOT) {
277 			if (np == NULL)
278 				p9_client_clunk(fid);
279 			*vpp = vp;
280 			return (0);
281 		}
282 		error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
283 		if (error != 0) {
284 			node = vp->v_data;
285 			/* Remove stale vnode from hash list */
286 			vfs_hash_remove(vp);
287 			node->flags |= P9FS_NODE_DELETED;
288 
289 			vput(vp);
290 			*vpp = NULLVP;
291 			vp = NULL;
292 		} else {
293 			*vpp = vp;
294 			/* Clunk the new fid if not root */
295 			p9_client_clunk(fid);
296 			return (0);
297 		}
298 	}
299 
300 	/*
301 	 * We must promote to an exclusive lock for vnode creation.  This
302 	 * can happen if lookup is passed LOCKSHARED.
303 	 */
304 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
305 		flags &= ~LK_TYPE_MASK;
306 		flags |= LK_EXCLUSIVE;
307 	}
308 
309 	/* Allocate a new vnode. */
310 	if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
311 		*vpp = NULLVP;
312 		P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
313 		return (error);
314 	}
315 
316 	/* If we dont have it, create one. */
317 	if (np == NULL) {
318 		np =  uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
319 		/* Initialize the VFID list */
320 		P9FS_VFID_LOCK_INIT(np);
321 		STAILQ_INIT(&np->vfid_list);
322 		p9fs_fid_add(np, fid, VFID);
323 
324 		/* Initialize the VOFID list */
325 		P9FS_VOFID_LOCK_INIT(np);
326 		STAILQ_INIT(&np->vofid_list);
327 
328 		vref(P9FS_NTOV(parent));
329 		np->parent = parent;
330 		np->p9fs_ses = vses; /* Map the current session */
331 		inode = &np->inode;
332 		/*Fill the name of the file in inode */
333 		inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
334 		strlcpy(inode->i_name, name, strlen(name)+1);
335 	} else {
336 		vp->v_type = VDIR; /* root vp is a directory */
337 		vp->v_vflag |= VV_ROOT;
338 		vref(vp); /* Increment a reference on root vnode during mount */
339 	}
340 
341 	vp->v_data = np;
342 	np->v_node = vp;
343 	inode = &np->inode;
344 	inode->i_qid_path = fid->qid.path;
345 	P9FS_SET_LINKS(inode);
346 
347 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
348 	error = insmntque(vp, mp);
349 	if (error != 0) {
350 		/*
351 		 * vput(vp) is already called from insmntque_stddtr().
352 		 * Just goto 'out' to dispose the node.
353 		 */
354 		goto out;
355 	}
356 
357 	/* Init the vnode with the disk info*/
358 	error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
359 	if (error != 0) {
360 		error_reload = 1;
361 		goto out;
362 	}
363 
364 	error = vfs_hash_insert(vp, hash, flags, td, vpp,
365 	    p9fs_node_cmp, &fid->qid);
366 	if (error != 0) {
367 		goto out;
368 	}
369 
370 	if (*vpp == NULL) {
371 		P9FS_LOCK(vses);
372 		STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
373 		np->flags |= P9FS_NODE_IN_SESSION;
374 		P9FS_UNLOCK(vses);
375 
376 		*vpp = vp;
377 	} else {
378 		/*
379 		 * Returning matching vp found in hashlist.
380 		 * So cleanup the np allocated above in this context.
381 		 */
382 		if (!IS_ROOT(np)) {
383 			p9fs_destroy_node(&np);
384 		}
385 	}
386 
387 	return (0);
388 out:
389 	/* Something went wrong, dispose the node */
390 	if (!IS_ROOT(np)) {
391 		p9fs_destroy_node(&np);
392 	}
393 
394 	if (error_reload) {
395 		vput(vp);
396 	}
397 
398 	*vpp = NULLVP;
399 	return (error);
400 }
401 
402 /* Main mount function for 9pfs */
403 static int
404 p9_mount(struct mount *mp)
405 {
406 	struct p9_fid *fid;
407 	struct p9fs_mount *vmp;
408 	struct p9fs_session *vses;
409 	struct p9fs_node *p9fs_root;
410 	int error;
411 	char *from;
412 	int len;
413 
414 	/* Verify the validity of mount options */
415 	if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
416 		return (EINVAL);
417 
418 	/* Extract NULL terminated mount tag from mount options */
419 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
420 	if (error != 0 || from[len - 1] != '\0')
421 		return (EINVAL);
422 
423 	/* Allocate and initialize the private mount structure. */
424 	vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
425 	mp->mnt_data = vmp;
426 	vmp->p9fs_mountp = mp;
427 	vmp->mount_tag = from;
428 	vmp->mount_tag_len = len;
429 	vses = &vmp->p9fs_session;
430 	vses->p9fs_mount = mp;
431 	p9fs_root = &vses->rnp;
432 	/* Hardware iosize from the Qemu */
433 	mp->mnt_iosize_max = PAGE_SIZE;
434 	/*
435 	 * Init the session for the p9fs root. This creates a new root fid and
436 	 * attaches the client and server.
437 	 */
438 	fid = p9fs_init_session(mp, &error);
439 	if (fid == NULL) {
440 		goto out;
441 	}
442 
443 	P9FS_VFID_LOCK_INIT(p9fs_root);
444 	STAILQ_INIT(&p9fs_root->vfid_list);
445 	p9fs_fid_add(p9fs_root, fid, VFID);
446 	P9FS_VOFID_LOCK_INIT(p9fs_root);
447 	STAILQ_INIT(&p9fs_root->vofid_list);
448 	p9fs_root->parent = p9fs_root;
449 	p9fs_root->flags |= P9FS_ROOT;
450 	p9fs_root->p9fs_ses = vses;
451 	vfs_getnewfsid(mp);
452 	strlcpy(mp->mnt_stat.f_mntfromname, from,
453 	    sizeof(mp->mnt_stat.f_mntfromname));
454 	MNT_ILOCK(mp);
455 	mp->mnt_flag |= MNT_LOCAL;
456 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
457 	MNT_IUNLOCK(mp);
458 	P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
459 	/* Mount structures created. */
460 
461 	return (0);
462 out:
463 	P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
464 	if (vmp != NULL) {
465 		free(vmp, M_P9MNT);
466 		mp->mnt_data = NULL;
467 	}
468 	return (error);
469 }
470 
471 /* Mount entry point */
472 static int
473 p9fs_mount(struct mount *mp)
474 {
475 	int error;
476 
477 	/*
478 	 * Minimal support for MNT_UPDATE - allow changing from
479 	 * readonly.
480 	 */
481 	if (mp->mnt_flag & MNT_UPDATE) {
482 		if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
483 			mp->mnt_flag &= ~MNT_RDONLY;
484 		}
485 		return (0);
486 	}
487 
488 	error = p9_mount(mp);
489 	if (error != 0)
490 		(void) p9fs_unmount(mp, MNT_FORCE);
491 
492 	return (error);
493 }
494 
495 /*
496  * Retrieve the root vnode of this mount. After filesystem is mounted, the root
497  * vnode is created for the first time. Subsequent calls to p9fs root will
498  * return the same vnode created during mount.
499  */
500 static int
501 p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
502 {
503 	struct p9fs_mount *vmp;
504 	struct p9fs_node *np;
505 	struct p9_client *clnt;
506 	struct p9_fid *vfid;
507 	int error;
508 
509 	vmp = VFSTOP9(mp);
510 	np = &vmp->p9fs_session.rnp;
511 	clnt = vmp->p9fs_session.clnt;
512 	error = 0;
513 
514 	P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
515 
516 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
517 
518 	if (error != 0) {
519 		/* for root use the nobody user's fid as vfid.
520 		 * This is used while unmounting as root when non-root
521 		 * user has mounted p9fs
522 		 */
523 		if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
524 			vfid = vmp->p9fs_session.mnt_fid;
525 		else {
526 			*vpp = NULLVP;
527 			return (error);
528 		}
529 	}
530 
531 	error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
532 	if (error != 0) {
533 		*vpp = NULLVP;
534 		return (error);
535 	}
536 	np->v_node = *vpp;
537 	return (error);
538 }
539 
540 /* Retrieve the file system statistics */
541 static int
542 p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
543 {
544 	struct p9fs_mount *vmp;
545 	struct p9fs_node *np;
546 	struct p9_client *clnt;
547 	struct p9_fid *vfid;
548 	struct p9_statfs statfs;
549 	int res, error;
550 
551 	vmp = VFSTOP9(mp);
552 	np = &vmp->p9fs_session.rnp;
553 	clnt = vmp->p9fs_session.clnt;
554 	error = 0;
555 
556 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
557 	if (error != 0) {
558 		return (error);
559 	}
560 
561 	res = p9_client_statfs(vfid, &statfs);
562 
563 	if (res == 0) {
564 		buf->f_type = statfs.type;
565 		/*
566 		 * We have a limit of 4k irrespective of what the
567 		 * Qemu server can do.
568 		 */
569 		if (statfs.bsize > PAGE_SIZE)
570 			buf->f_bsize = PAGE_SIZE;
571 		else
572 			buf->f_bsize = statfs.bsize;
573 
574 		buf->f_iosize = buf->f_bsize;
575 		buf->f_blocks = statfs.blocks;
576 		buf->f_bfree = statfs.bfree;
577 		buf->f_bavail = statfs.bavail;
578 		buf->f_files = statfs.files;
579 		buf->f_ffree = statfs.ffree;
580 	}
581 	else {
582 		/* Atleast set these if stat fail */
583 		buf->f_bsize = PAGE_SIZE;
584 		buf->f_iosize = buf->f_bsize;   /* XXX */
585 	}
586 
587 	return (0);
588 }
589 
590 static int
591 p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
592 {
593 
594 	return (EINVAL);
595 }
596 
597 struct vfsops p9fs_vfsops = {
598 	.vfs_init  =	p9fs_init,
599 	.vfs_uninit =	p9fs_uninit,
600 	.vfs_mount =	p9fs_mount,
601 	.vfs_unmount =	p9fs_unmount,
602 	.vfs_root =	p9fs_root,
603 	.vfs_statfs =	p9fs_statfs,
604 	.vfs_fhtovp =	p9fs_fhtovp,
605 };
606 
607 VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
608 MODULE_VERSION(p9fs, 1);
609