xref: /freebsd/sys/fs/p9fs/p9fs_vfsops.c (revision d511e97c54d3b143368ff3896b4ab51040ca8994)
1 /*-
2  * Copyright (c) 2017-2020 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 /*
28  * This file consists of all the VFS interactions of VFS ops which include
29  * mount, unmount, initilaize etc. for p9fs.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/fnv_hash.h>
35 #include <sys/mount.h>
36 #include <sys/sysctl.h>
37 #include <sys/vnode.h>
38 #include <vm/uma.h>
39 
40 #include <fs/p9fs/p9fs_proto.h>
41 #include <fs/p9fs/p9_client.h>
42 #include <fs/p9fs/p9_debug.h>
43 #include <fs/p9fs/p9fs.h>
44 
45 SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
46     "Plan 9 filesystem");
47 
48 /* This count is static now. Can be made tunable later */
49 #define P9FS_FLUSH_RETRIES 10
50 
51 static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
52 static uma_zone_t p9fs_node_zone;
53 uma_zone_t p9fs_io_buffer_zone;
54 uma_zone_t p9fs_getattr_zone;
55 uma_zone_t p9fs_setattr_zone;
56 extern struct vop_vector p9fs_vnops;
57 
58 /* option parsing */
59 static const char *p9fs_opts[] = {
60         "from", "trans", "access", NULL
61 };
62 
63 /* Dispose p9fs node, freeing it to the UMA zone */
64 void
65 p9fs_dispose_node(struct p9fs_node **npp)
66 {
67 	struct p9fs_node *node;
68 	struct vnode *vp;
69 
70 	node = *npp;
71 
72 	if (node == NULL)
73 		return;
74 
75 	if (node->parent && node->parent != node) {
76 		vrele(P9FS_NTOV(node->parent));
77 	}
78 
79 	P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
80 
81 	vp = P9FS_NTOV(node);
82 	vp->v_data = NULL;
83 
84 	/* Free our associated memory */
85 	if (!(vp->v_vflag & VV_ROOT)) {
86 		free(node->inode.i_name, M_TEMP);
87 		uma_zfree(p9fs_node_zone, node);
88 	}
89 
90 	*npp = NULL;
91 }
92 
93 /* Initialize memory allocation */
94 static int
95 p9fs_init(struct vfsconf *vfsp)
96 {
97 
98 	p9fs_node_zone = uma_zcreate("p9fs node zone",
99 	    sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
100 
101 	/* Create the getattr_dotl zone */
102 	p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
103 	    sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
104 
105 	/* Create the setattr_dotl zone */
106 	p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
107 	    sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
108 
109 	/*
110 	 * Create the io_buffer zone pool to keep things simpler in case of
111 	 * multiple threads. Each thread works with its own so there is no
112 	 * contention.
113 	 */
114 	p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
115 	    P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
116 
117 	return (0);
118 }
119 
120 /* Destroy all the allocated memory */
121 static int
122 p9fs_uninit(struct vfsconf *vfsp)
123 {
124 
125 	uma_zdestroy(p9fs_node_zone);
126 	uma_zdestroy(p9fs_io_buffer_zone);
127 	uma_zdestroy(p9fs_getattr_zone);
128 	uma_zdestroy(p9fs_setattr_zone);
129 
130 	return (0);
131 }
132 
133 /* Function to umount p9fs */
134 static int
135 p9fs_unmount(struct mount *mp, int mntflags)
136 {
137 	struct p9fs_mount *vmp;
138 	struct p9fs_session *vses;
139 	int error, flags, i;
140 
141 	error = 0;
142 	flags = 0;
143 	vmp = VFSTOP9(mp);
144 	if (vmp == NULL)
145 		return (0);
146 
147 	vses = &vmp->p9fs_session;
148 	if (mntflags & MNT_FORCE)
149 		flags |= FORCECLOSE;
150 
151 	p9fs_prepare_to_close(mp);
152 	for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
153 
154 		/* Flush everything on this mount point.*/
155 		error = vflush(mp, 1, flags, curthread);
156 
157 		if (error == 0 || (mntflags & MNT_FORCE) == 0)
158 			break;
159 		/* Sleep until interrupted or 1 tick expires. */
160 		error = tsleep(&error, PSOCK, "p9unmnt", 1);
161 		if (error == EINTR)
162 			break;
163 		error = EBUSY;
164 	}
165 
166 	if (error != 0)
167 		goto out;
168 	p9fs_close_session(mp);
169 	/* Cleanup the mount structure. */
170 	free(vmp, M_P9MNT);
171 	mp->mnt_data = NULL;
172 	return (error);
173 out:
174 	/* Restore the flag in case of error */
175 	vses->clnt->trans_status = P9FS_CONNECT;
176 	return (error);
177 }
178 
179 /*
180  * Compare qid stored in p9fs node
181  * Return 1 if does not match otherwise return 0
182  */
183 int
184 p9fs_node_cmp(struct vnode *vp, void *arg)
185 {
186 	struct p9fs_node *np;
187 	struct p9_qid *qid;
188 
189 	np = vp->v_data;
190 	qid = (struct p9_qid *)arg;
191 
192 	if (np == NULL)
193 		return (1);
194 
195 	if (np->vqid.qid_path == qid->path) {
196 		if (vp->v_vflag & VV_ROOT)
197 			return (0);
198 		else if (np->vqid.qid_mode == qid->type &&
199 			    np->vqid.qid_version == qid->version)
200 			return (0);
201 	}
202 
203 	return (1);
204 }
205 
206 /*
207  * Cleanup p9fs node
208  *  - Destroy the FID LIST locks
209  *  - Dispose all node knowledge
210  */
211 void
212 p9fs_destroy_node(struct p9fs_node **npp)
213 {
214 	struct p9fs_node *np;
215 
216 	np = *npp;
217 
218 	if (np == NULL)
219 		return;
220 
221 	/* Destroy the FID LIST locks */
222 	P9FS_VFID_LOCK_DESTROY(np);
223 	P9FS_VOFID_LOCK_DESTROY(np);
224 
225 	/* Dispose all node knowledge.*/
226 	p9fs_dispose_node(&np);
227 }
228 
229 /*
230  * Common code used across p9fs to return vnode for the file represented
231  * by the fid.
232  * Lookup for the vnode in hash_list. This lookup is based on the qid path
233  * which is unique to a file. p9fs_node_cmp is called in this lookup process.
234  * I. If the vnode we are looking for is found in the hash list
235  *    1. Check if the vnode is a valid vnode by reloading its stats
236  *       a. if the reloading of the vnode stats returns error then remove the
237  *          vnode from hash list and return
238  *       b. If reloading of vnode stats returns without any error then, clunk the
239  *          new fid which was created for the vnode as we know that the vnode
240  *          already has a fid associated with it and return the vnode.
241  *          This is to avoid fid leaks
242  * II. If vnode is not found in the hash list then, create new vnode, p9fs
243  *     node and return the vnode
244  */
245 int
246 p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
247     struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
248     char *name)
249 {
250 	struct p9fs_mount *vmp;
251 	struct p9fs_session *vses;
252 	struct vnode *vp;
253 	struct p9fs_node *node;
254 	struct thread *td;
255 	uint32_t hash;
256 	int error, error_reload = 0;
257 	struct p9fs_inode *inode;
258 
259 	td = curthread;
260 	vmp = VFSTOP9(mp);
261 	vses = &vmp->p9fs_session;
262 
263 	/* Look for vp in the hash_list */
264 	hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
265 	error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
266 	    &fid->qid);
267 	if (error != 0)
268 		return (error);
269 	else if (vp != NULL) {
270 		if (vp->v_vflag & VV_ROOT) {
271 			if (np == NULL)
272 				p9_client_clunk(fid);
273 			*vpp = vp;
274 			return (0);
275 		}
276 		error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
277 		if (error != 0) {
278 			node = vp->v_data;
279 			/* Remove stale vnode from hash list */
280 			vfs_hash_remove(vp);
281 			node->flags |= P9FS_NODE_DELETED;
282 
283 			vput(vp);
284 			*vpp = NULLVP;
285 			vp = NULL;
286 		} else {
287 			*vpp = vp;
288 			/* Clunk the new fid if not root */
289 			p9_client_clunk(fid);
290 			return (0);
291 		}
292 	}
293 
294 	/*
295 	 * We must promote to an exclusive lock for vnode creation.  This
296 	 * can happen if lookup is passed LOCKSHARED.
297 	 */
298 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
299 		flags &= ~LK_TYPE_MASK;
300 		flags |= LK_EXCLUSIVE;
301 	}
302 
303 	/* Allocate a new vnode. */
304 	if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
305 		*vpp = NULLVP;
306 		P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
307 		return (error);
308 	}
309 
310 	/* If we dont have it, create one. */
311 	if (np == NULL) {
312 		np =  uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
313 		/* Initialize the VFID list */
314 		P9FS_VFID_LOCK_INIT(np);
315 		STAILQ_INIT(&np->vfid_list);
316 		p9fs_fid_add(np, fid, VFID);
317 
318 		/* Initialize the VOFID list */
319 		P9FS_VOFID_LOCK_INIT(np);
320 		STAILQ_INIT(&np->vofid_list);
321 
322 		vref(P9FS_NTOV(parent));
323 		np->parent = parent;
324 		np->p9fs_ses = vses; /* Map the current session */
325 		inode = &np->inode;
326 		/*Fill the name of the file in inode */
327 		inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
328 		strlcpy(inode->i_name, name, strlen(name)+1);
329 	} else {
330 		vp->v_type = VDIR; /* root vp is a directory */
331 		vp->v_vflag |= VV_ROOT;
332 		vref(vp); /* Increment a reference on root vnode during mount */
333 	}
334 
335 	vp->v_data = np;
336 	np->v_node = vp;
337 	inode = &np->inode;
338 	inode->i_qid_path = fid->qid.path;
339 	P9FS_SET_LINKS(inode);
340 
341 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
342 	error = insmntque(vp, mp);
343 	if (error != 0) {
344 		/*
345 		 * vput(vp) is already called from insmntque_stddtr().
346 		 * Just goto 'out' to dispose the node.
347 		 */
348 		goto out;
349 	}
350 
351 	/* Init the vnode with the disk info*/
352 	error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
353 	if (error != 0) {
354 		error_reload = 1;
355 		goto out;
356 	}
357 
358 	error = vfs_hash_insert(vp, hash, flags, td, vpp,
359 	    p9fs_node_cmp, &fid->qid);
360 	if (error != 0) {
361 		goto out;
362 	}
363 
364 	if (*vpp == NULL) {
365 		P9FS_LOCK(vses);
366 		STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
367 		np->flags |= P9FS_NODE_IN_SESSION;
368 		P9FS_UNLOCK(vses);
369 
370 		*vpp = vp;
371 	} else {
372 		/*
373 		 * Returning matching vp found in hashlist.
374 		 * So cleanup the np allocated above in this context.
375 		 */
376 		if (!IS_ROOT(np)) {
377 			p9fs_destroy_node(&np);
378 		}
379 	}
380 
381 	return (0);
382 out:
383 	/* Something went wrong, dispose the node */
384 	if (!IS_ROOT(np)) {
385 		p9fs_destroy_node(&np);
386 	}
387 
388 	if (error_reload) {
389 		vput(vp);
390 	}
391 
392 	*vpp = NULLVP;
393 	return (error);
394 }
395 
396 /* Main mount function for 9pfs */
397 static int
398 p9_mount(struct mount *mp)
399 {
400 	struct p9_fid *fid;
401 	struct p9fs_mount *vmp;
402 	struct p9fs_session *vses;
403 	struct p9fs_node *p9fs_root;
404 	int error;
405 	char *from;
406 	int len;
407 
408 	/* Verify the validity of mount options */
409 	if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
410 		return (EINVAL);
411 
412 	/* Extract NULL terminated mount tag from mount options */
413 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
414 	if (error != 0 || from[len - 1] != '\0')
415 		return (EINVAL);
416 
417 	/* Allocate and initialize the private mount structure. */
418 	vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
419 	mp->mnt_data = vmp;
420 	vmp->p9fs_mountp = mp;
421 	vmp->mount_tag = from;
422 	vmp->mount_tag_len = len;
423 	vses = &vmp->p9fs_session;
424 	vses->p9fs_mount = mp;
425 	p9fs_root = &vses->rnp;
426 	/* Hardware iosize from the Qemu */
427 	mp->mnt_iosize_max = PAGE_SIZE;
428 	/*
429 	 * Init the session for the p9fs root. This creates a new root fid and
430 	 * attaches the client and server.
431 	 */
432 	fid = p9fs_init_session(mp, &error);
433 	if (fid == NULL) {
434 		goto out;
435 	}
436 
437 	P9FS_VFID_LOCK_INIT(p9fs_root);
438 	STAILQ_INIT(&p9fs_root->vfid_list);
439 	p9fs_fid_add(p9fs_root, fid, VFID);
440 	P9FS_VOFID_LOCK_INIT(p9fs_root);
441 	STAILQ_INIT(&p9fs_root->vofid_list);
442 	p9fs_root->parent = p9fs_root;
443 	p9fs_root->flags |= P9FS_ROOT;
444 	p9fs_root->p9fs_ses = vses;
445 	vfs_getnewfsid(mp);
446 	strlcpy(mp->mnt_stat.f_mntfromname, from,
447 	    sizeof(mp->mnt_stat.f_mntfromname));
448 	MNT_ILOCK(mp);
449 	mp->mnt_flag |= MNT_LOCAL;
450 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
451 	MNT_IUNLOCK(mp);
452 	P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
453 	/* Mount structures created. */
454 
455 	return (0);
456 out:
457 	P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
458 	if (vmp != NULL) {
459 		free(vmp, M_P9MNT);
460 		mp->mnt_data = NULL;
461 	}
462 	return (error);
463 }
464 
465 /* Mount entry point */
466 static int
467 p9fs_mount(struct mount *mp)
468 {
469 	int error;
470 
471 	/*
472 	 * Minimal support for MNT_UPDATE - allow changing from
473 	 * readonly.
474 	 */
475 	if (mp->mnt_flag & MNT_UPDATE) {
476 		if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
477 			mp->mnt_flag &= ~MNT_RDONLY;
478 		}
479 		return (0);
480 	}
481 
482 	error = p9_mount(mp);
483 	if (error != 0)
484 		(void) p9fs_unmount(mp, MNT_FORCE);
485 
486 	return (error);
487 }
488 
489 /*
490  * Retrieve the root vnode of this mount. After filesystem is mounted, the root
491  * vnode is created for the first time. Subsequent calls to p9fs root will
492  * return the same vnode created during mount.
493  */
494 static int
495 p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
496 {
497 	struct p9fs_mount *vmp;
498 	struct p9fs_node *np;
499 	struct p9_client *clnt;
500 	struct p9_fid *vfid;
501 	int error;
502 
503 	vmp = VFSTOP9(mp);
504 	np = &vmp->p9fs_session.rnp;
505 	clnt = vmp->p9fs_session.clnt;
506 	error = 0;
507 
508 	P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
509 
510 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
511 
512 	if (error != 0) {
513 		/* for root use the nobody user's fid as vfid.
514 		 * This is used while unmounting as root when non-root
515 		 * user has mounted p9fs
516 		 */
517 		if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
518 			vfid = vmp->p9fs_session.mnt_fid;
519 		else {
520 			*vpp = NULLVP;
521 			return (error);
522 		}
523 	}
524 
525 	error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
526 	if (error != 0) {
527 		*vpp = NULLVP;
528 		return (error);
529 	}
530 	np->v_node = *vpp;
531 	return (error);
532 }
533 
534 /* Retrieve the file system statistics */
535 static int
536 p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
537 {
538 	struct p9fs_mount *vmp;
539 	struct p9fs_node *np;
540 	struct p9_client *clnt;
541 	struct p9_fid *vfid;
542 	struct p9_statfs statfs;
543 	int res, error;
544 
545 	vmp = VFSTOP9(mp);
546 	np = &vmp->p9fs_session.rnp;
547 	clnt = vmp->p9fs_session.clnt;
548 	error = 0;
549 
550 	vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
551 	if (error != 0) {
552 		return (error);
553 	}
554 
555 	res = p9_client_statfs(vfid, &statfs);
556 
557 	if (res == 0) {
558 		buf->f_type = statfs.type;
559 		/*
560 		 * We have a limit of 4k irrespective of what the
561 		 * Qemu server can do.
562 		 */
563 		if (statfs.bsize > PAGE_SIZE)
564 			buf->f_bsize = PAGE_SIZE;
565 		else
566 			buf->f_bsize = statfs.bsize;
567 
568 		buf->f_iosize = buf->f_bsize;
569 		buf->f_blocks = statfs.blocks;
570 		buf->f_bfree = statfs.bfree;
571 		buf->f_bavail = statfs.bavail;
572 		buf->f_files = statfs.files;
573 		buf->f_ffree = statfs.ffree;
574 	}
575 	else {
576 		/* Atleast set these if stat fail */
577 		buf->f_bsize = PAGE_SIZE;
578 		buf->f_iosize = buf->f_bsize;   /* XXX */
579 	}
580 
581 	return (0);
582 }
583 
584 static int
585 p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
586 {
587 
588 	return (EINVAL);
589 }
590 
591 struct vfsops p9fs_vfsops = {
592 	.vfs_init  =	p9fs_init,
593 	.vfs_uninit =	p9fs_uninit,
594 	.vfs_mount =	p9fs_mount,
595 	.vfs_unmount =	p9fs_unmount,
596 	.vfs_root =	p9fs_root,
597 	.vfs_statfs =	p9fs_statfs,
598 	.vfs_fhtovp =	p9fs_fhtovp,
599 };
600 
601 VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
602 MODULE_VERSION(p9fs, 1);
603