1 /*-
2 * Copyright (c) 2017-2020 Juniper Networks, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 */
26
27 /*
28 * This file consists of all the VFS interactions of VFS ops which include
29 * mount, unmount, initilaize etc. for p9fs.
30 */
31
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/fnv_hash.h>
35 #include <sys/mount.h>
36 #include <sys/sysctl.h>
37 #include <sys/vnode.h>
38 #include <sys/buf.h>
39 #include <vm/uma.h>
40
41 #include <fs/p9fs/p9fs_proto.h>
42 #include <fs/p9fs/p9_client.h>
43 #include <fs/p9fs/p9_debug.h>
44 #include <fs/p9fs/p9fs.h>
45
46 SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47 "Plan 9 filesystem");
48
49 /* This count is static now. Can be made tunable later */
50 #define P9FS_FLUSH_RETRIES 10
51
52 static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs");
53 static uma_zone_t p9fs_node_zone;
54 uma_zone_t p9fs_io_buffer_zone;
55 uma_zone_t p9fs_getattr_zone;
56 uma_zone_t p9fs_setattr_zone;
57 uma_zone_t p9fs_pbuf_zone;
58 extern struct vop_vector p9fs_vnops;
59
60 /* option parsing */
61 static const char *p9fs_opts[] = {
62 "from", "trans", "access", NULL
63 };
64
65 /* Dispose p9fs node, freeing it to the UMA zone */
66 void
p9fs_dispose_node(struct p9fs_node ** npp)67 p9fs_dispose_node(struct p9fs_node **npp)
68 {
69 struct p9fs_node *node;
70 struct vnode *vp;
71
72 node = *npp;
73
74 if (node == NULL)
75 return;
76
77 if (node->parent && node->parent != node) {
78 vrele(P9FS_NTOV(node->parent));
79 }
80
81 P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp);
82
83 vp = P9FS_NTOV(node);
84 vp->v_data = NULL;
85
86 /* Free our associated memory */
87 if (!(vp->v_vflag & VV_ROOT)) {
88 free(node->inode.i_name, M_TEMP);
89 uma_zfree(p9fs_node_zone, node);
90 }
91
92 *npp = NULL;
93 }
94
95 /* Initialize memory allocation */
96 static int
p9fs_init(struct vfsconf * vfsp)97 p9fs_init(struct vfsconf *vfsp)
98 {
99
100 p9fs_node_zone = uma_zcreate("p9fs node zone",
101 sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
102
103 /* Create the getattr_dotl zone */
104 p9fs_getattr_zone = uma_zcreate("p9fs getattr zone",
105 sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
106
107 /* Create the setattr_dotl zone */
108 p9fs_setattr_zone = uma_zcreate("p9fs setattr zone",
109 sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
110
111 /* Create the putpages zone */
112 p9fs_pbuf_zone = pbuf_zsecond_create("p9fs pbuf zone", nswbuf / 2);
113
114 /*
115 * Create the io_buffer zone pool to keep things simpler in case of
116 * multiple threads. Each thread works with its own so there is no
117 * contention.
118 */
119 p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone",
120 P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
121
122 p9_init_zones();
123
124 return (0);
125 }
126
127 /* Destroy all the allocated memory */
128 static int
p9fs_uninit(struct vfsconf * vfsp)129 p9fs_uninit(struct vfsconf *vfsp)
130 {
131
132 p9_destroy_zones();
133
134 uma_zdestroy(p9fs_node_zone);
135 uma_zdestroy(p9fs_io_buffer_zone);
136 uma_zdestroy(p9fs_getattr_zone);
137 uma_zdestroy(p9fs_setattr_zone);
138 uma_zdestroy(p9fs_pbuf_zone);
139
140 return (0);
141 }
142
143 /* Function to umount p9fs */
144 static int
p9fs_unmount(struct mount * mp,int mntflags)145 p9fs_unmount(struct mount *mp, int mntflags)
146 {
147 struct p9fs_mount *vmp;
148 struct p9fs_session *vses;
149 int error, flags, i;
150
151 error = 0;
152 flags = 0;
153 vmp = VFSTOP9(mp);
154 if (vmp == NULL)
155 return (0);
156
157 vses = &vmp->p9fs_session;
158 if (mntflags & MNT_FORCE)
159 flags |= FORCECLOSE;
160
161 p9fs_prepare_to_close(mp);
162 for (i = 0; i < P9FS_FLUSH_RETRIES; i++) {
163
164 /* Flush everything on this mount point.*/
165 error = vflush(mp, 1, flags, curthread);
166
167 if (error == 0 || (mntflags & MNT_FORCE) == 0)
168 break;
169 /* Sleep until interrupted or 1 tick expires. */
170 error = tsleep(&error, PSOCK, "p9unmnt", 1);
171 if (error == EINTR)
172 break;
173 error = EBUSY;
174 }
175
176 if (error != 0)
177 goto out;
178 p9fs_close_session(mp);
179 /* Cleanup the mount structure. */
180 free(vmp, M_P9MNT);
181 mp->mnt_data = NULL;
182 return (error);
183 out:
184 /* Restore the flag in case of error */
185 vses->clnt->trans_status = P9FS_CONNECT;
186 return (error);
187 }
188
189 /*
190 * Compare qid stored in p9fs node
191 * Return 1 if does not match otherwise return 0
192 */
193 int
p9fs_node_cmp(struct vnode * vp,void * arg)194 p9fs_node_cmp(struct vnode *vp, void *arg)
195 {
196 struct p9fs_node *np;
197 struct p9_qid *qid;
198
199 np = vp->v_data;
200 qid = (struct p9_qid *)arg;
201
202 if (np == NULL)
203 return (1);
204
205 if (np->vqid.qid_path == qid->path) {
206 if (vp->v_vflag & VV_ROOT)
207 return (0);
208 else if (np->vqid.qid_mode == qid->type &&
209 np->vqid.qid_version == qid->version)
210 return (0);
211 }
212
213 return (1);
214 }
215
216 /*
217 * Cleanup p9fs node
218 * - Destroy the FID LIST locks
219 * - Dispose all node knowledge
220 */
221 void
p9fs_destroy_node(struct p9fs_node ** npp)222 p9fs_destroy_node(struct p9fs_node **npp)
223 {
224 struct p9fs_node *np;
225
226 np = *npp;
227
228 if (np == NULL)
229 return;
230
231 /* Destroy the FID LIST locks */
232 P9FS_VFID_LOCK_DESTROY(np);
233 P9FS_VOFID_LOCK_DESTROY(np);
234
235 /* Dispose all node knowledge.*/
236 p9fs_dispose_node(&np);
237 }
238
239 /*
240 * Common code used across p9fs to return vnode for the file represented
241 * by the fid.
242 * Lookup for the vnode in hash_list. This lookup is based on the qid path
243 * which is unique to a file. p9fs_node_cmp is called in this lookup process.
244 * I. If the vnode we are looking for is found in the hash list
245 * 1. Check if the vnode is a valid vnode by reloading its stats
246 * a. if the reloading of the vnode stats returns error then remove the
247 * vnode from hash list and return
248 * b. If reloading of vnode stats returns without any error then, clunk the
249 * new fid which was created for the vnode as we know that the vnode
250 * already has a fid associated with it and return the vnode.
251 * This is to avoid fid leaks
252 * II. If vnode is not found in the hash list then, create new vnode, p9fs
253 * node and return the vnode
254 */
255 int
p9fs_vget_common(struct mount * mp,struct p9fs_node * np,int flags,struct p9fs_node * parent,struct p9_fid * fid,struct vnode ** vpp,char * name)256 p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags,
257 struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp,
258 char *name)
259 {
260 struct p9fs_mount *vmp;
261 struct p9fs_session *vses;
262 struct vnode *vp;
263 struct p9fs_node *node;
264 struct thread *td;
265 uint32_t hash;
266 int error, error_reload = 0;
267 struct p9fs_inode *inode;
268
269 td = curthread;
270 vmp = VFSTOP9(mp);
271 vses = &vmp->p9fs_session;
272
273 /* Look for vp in the hash_list */
274 hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT);
275 error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp,
276 &fid->qid);
277 if (error != 0)
278 return (error);
279 else if (vp != NULL) {
280 if (vp->v_vflag & VV_ROOT) {
281 if (np == NULL)
282 p9_client_clunk(fid);
283 *vpp = vp;
284 return (0);
285 }
286 error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
287 if (error != 0) {
288 node = vp->v_data;
289 /* Remove stale vnode from hash list */
290 vfs_hash_remove(vp);
291 P9FS_NODE_SETF(node, P9FS_NODE_DELETED);
292
293 vput(vp);
294 *vpp = NULL;
295 vp = NULL;
296 } else {
297 *vpp = vp;
298 /* Clunk the new fid if not root */
299 p9_client_clunk(fid);
300 return (0);
301 }
302 }
303
304 /*
305 * We must promote to an exclusive lock for vnode creation. This
306 * can happen if lookup is passed LOCKSHARED.
307 */
308 if ((flags & LK_TYPE_MASK) == LK_SHARED) {
309 flags &= ~LK_TYPE_MASK;
310 flags |= LK_EXCLUSIVE;
311 }
312
313 /* Allocate a new vnode. */
314 if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) {
315 *vpp = NULL;
316 P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error);
317 return (error);
318 }
319
320 /* If we dont have it, create one. */
321 if (np == NULL) {
322 np = uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO);
323 /* Initialize the VFID list */
324 P9FS_VFID_LOCK_INIT(np);
325 STAILQ_INIT(&np->vfid_list);
326 p9fs_fid_add(np, fid, VFID);
327
328 /* Initialize the VOFID list */
329 P9FS_VOFID_LOCK_INIT(np);
330 STAILQ_INIT(&np->vofid_list);
331
332 vref(P9FS_NTOV(parent));
333 np->parent = parent;
334 np->p9fs_ses = vses; /* Map the current session */
335 inode = &np->inode;
336 /*Fill the name of the file in inode */
337 inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO);
338 strlcpy(inode->i_name, name, strlen(name)+1);
339 } else {
340 vp->v_type = VDIR; /* root vp is a directory */
341 vp->v_vflag |= VV_ROOT;
342 vref(vp); /* Increment a reference on root vnode during mount */
343 }
344
345 vp->v_data = np;
346 np->v_node = vp;
347 inode = &np->inode;
348 inode->i_qid_path = fid->qid.path;
349 P9FS_SET_LINKS(inode);
350
351 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
352 if (vp->v_type != VFIFO)
353 VN_LOCK_ASHARE(vp);
354 error = insmntque(vp, mp);
355 if (error != 0) {
356 /*
357 * vput(vp) is already called from insmntque_stddtr().
358 * Just goto 'out' to dispose the node.
359 */
360 goto out;
361 }
362
363 /* Init the vnode with the disk info*/
364 error = p9fs_reload_stats_dotl(vp, curthread->td_ucred);
365 if (error != 0) {
366 error_reload = 1;
367 goto out;
368 }
369
370 error = vfs_hash_insert(vp, hash, flags, td, vpp,
371 p9fs_node_cmp, &fid->qid);
372 if (error != 0) {
373 goto out;
374 }
375
376 if (*vpp == NULL) {
377 P9FS_LOCK(vses);
378 STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next);
379 P9FS_NODE_SETF(np, P9FS_NODE_IN_SESSION);
380 P9FS_UNLOCK(vses);
381 vn_set_state(vp, VSTATE_CONSTRUCTED);
382 *vpp = vp;
383 } else {
384 /*
385 * Returning matching vp found in hashlist.
386 * So cleanup the np allocated above in this context.
387 */
388 if (!IS_ROOT(np)) {
389 p9fs_destroy_node(&np);
390 }
391 }
392
393 return (0);
394 out:
395 /* Something went wrong, dispose the node */
396 if (!IS_ROOT(np)) {
397 p9fs_destroy_node(&np);
398 }
399
400 if (error_reload) {
401 vput(vp);
402 }
403
404 *vpp = NULL;
405 return (error);
406 }
407
408 /* Main mount function for 9pfs */
409 static int
p9_mount(struct mount * mp)410 p9_mount(struct mount *mp)
411 {
412 struct p9_fid *fid;
413 struct p9fs_mount *vmp;
414 struct p9fs_session *vses;
415 struct p9fs_node *p9fs_root;
416 int error;
417 char *from;
418 int len;
419
420 /* Verify the validity of mount options */
421 if (vfs_filteropt(mp->mnt_optnew, p9fs_opts))
422 return (EINVAL);
423
424 /* Extract NULL terminated mount tag from mount options */
425 error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
426 if (error != 0 || from[len - 1] != '\0')
427 return (EINVAL);
428
429 /* Allocate and initialize the private mount structure. */
430 vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO);
431 mp->mnt_data = vmp;
432 vmp->p9fs_mountp = mp;
433 vmp->mount_tag = from;
434 vmp->mount_tag_len = len;
435 vses = &vmp->p9fs_session;
436 vses->p9fs_mount = mp;
437 p9fs_root = &vses->rnp;
438 /* Hardware iosize from the Qemu */
439 mp->mnt_iosize_max = PAGE_SIZE;
440 /*
441 * Init the session for the p9fs root. This creates a new root fid and
442 * attaches the client and server.
443 */
444 fid = p9fs_init_session(mp, &error);
445 if (fid == NULL) {
446 goto out;
447 }
448
449 P9FS_VFID_LOCK_INIT(p9fs_root);
450 STAILQ_INIT(&p9fs_root->vfid_list);
451 p9fs_fid_add(p9fs_root, fid, VFID);
452 P9FS_VOFID_LOCK_INIT(p9fs_root);
453 STAILQ_INIT(&p9fs_root->vofid_list);
454 p9fs_root->parent = p9fs_root;
455 P9FS_NODE_SETF(p9fs_root, P9FS_NODE_ROOT);
456 p9fs_root->p9fs_ses = vses;
457 vfs_getnewfsid(mp);
458 strlcpy(mp->mnt_stat.f_mntfromname, from,
459 sizeof(mp->mnt_stat.f_mntfromname));
460 MNT_ILOCK(mp);
461 mp->mnt_flag |= MNT_LOCAL;
462 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
463 MNT_IUNLOCK(mp);
464 P9_DEBUG(VOPS, "%s: Mount successful\n", __func__);
465 /* Mount structures created. */
466
467 return (0);
468 out:
469 P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__);
470 if (vmp != NULL) {
471 free(vmp, M_P9MNT);
472 mp->mnt_data = NULL;
473 }
474 return (error);
475 }
476
477 /* Mount entry point */
478 static int
p9fs_mount(struct mount * mp)479 p9fs_mount(struct mount *mp)
480 {
481 int error;
482
483 /*
484 * Minimal support for MNT_UPDATE - allow changing from
485 * readonly.
486 */
487 if (mp->mnt_flag & MNT_UPDATE) {
488 if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
489 mp->mnt_flag &= ~MNT_RDONLY;
490 }
491 return (0);
492 }
493
494 error = p9_mount(mp);
495 if (error != 0)
496 (void) p9fs_unmount(mp, MNT_FORCE);
497
498 return (error);
499 }
500
501 /*
502 * Retrieve the root vnode of this mount. After filesystem is mounted, the root
503 * vnode is created for the first time. Subsequent calls to p9fs root will
504 * return the same vnode created during mount.
505 */
506 static int
p9fs_root(struct mount * mp,int lkflags,struct vnode ** vpp)507 p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp)
508 {
509 struct p9fs_mount *vmp;
510 struct p9fs_node *np;
511 struct p9_client *clnt;
512 struct p9_fid *vfid;
513 int error;
514
515 vmp = VFSTOP9(mp);
516 np = &vmp->p9fs_session.rnp;
517 clnt = vmp->p9fs_session.clnt;
518 error = 0;
519
520 P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name);
521
522 vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
523
524 if (error != 0) {
525 /* for root use the nobody user's fid as vfid.
526 * This is used while unmounting as root when non-root
527 * user has mounted p9fs
528 */
529 if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT)
530 vfid = vmp->p9fs_session.mnt_fid;
531 else {
532 *vpp = NULL;
533 return (error);
534 }
535 }
536
537 error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL);
538 if (error != 0) {
539 *vpp = NULL;
540 return (error);
541 }
542 np->v_node = *vpp;
543 return (error);
544 }
545
546 /* Retrieve the file system statistics */
547 static int
p9fs_statfs(struct mount * mp __unused,struct statfs * buf)548 p9fs_statfs(struct mount *mp __unused, struct statfs *buf)
549 {
550 struct p9fs_mount *vmp;
551 struct p9fs_node *np;
552 struct p9_client *clnt;
553 struct p9_fid *vfid;
554 struct p9_statfs statfs;
555 int res, error;
556
557 vmp = VFSTOP9(mp);
558 np = &vmp->p9fs_session.rnp;
559 clnt = vmp->p9fs_session.clnt;
560 error = 0;
561
562 vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error);
563 if (error != 0) {
564 return (error);
565 }
566
567 res = p9_client_statfs(vfid, &statfs);
568
569 if (res == 0) {
570 buf->f_type = statfs.type;
571 /*
572 * We have a limit of 4k irrespective of what the
573 * Qemu server can do.
574 */
575 if (statfs.bsize > PAGE_SIZE)
576 buf->f_bsize = PAGE_SIZE;
577 else
578 buf->f_bsize = statfs.bsize;
579
580 buf->f_iosize = buf->f_bsize;
581 buf->f_blocks = statfs.blocks;
582 buf->f_bfree = statfs.bfree;
583 buf->f_bavail = statfs.bavail;
584 buf->f_files = statfs.files;
585 buf->f_ffree = statfs.ffree;
586 }
587 else {
588 /* Atleast set these if stat fail */
589 buf->f_bsize = PAGE_SIZE;
590 buf->f_iosize = buf->f_bsize; /* XXX */
591 }
592
593 return (0);
594 }
595
596 static int
p9fs_fhtovp(struct mount * mp,struct fid * fhp,int flags,struct vnode ** vpp)597 p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
598 {
599
600 return (EINVAL);
601 }
602
603 struct vfsops p9fs_vfsops = {
604 .vfs_init = p9fs_init,
605 .vfs_uninit = p9fs_uninit,
606 .vfs_mount = p9fs_mount,
607 .vfs_unmount = p9fs_unmount,
608 .vfs_root = p9fs_root,
609 .vfs_statfs = p9fs_statfs,
610 .vfs_fhtovp = p9fs_fhtovp,
611 };
612
613 VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL);
614 MODULE_VERSION(p9fs, 1);
615