1 /*
2 * Copyright (c) 2017-2020 Juniper Networks, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 */
26
27 /* This file contains VFS file ops for the 9P protocol.
28 * This makes the upper layer of the p9fs driver. These functions interact
29 * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All
30 * the user file operations are handled here.
31 */
32 #include <sys/cdefs.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/buf.h>
36 #include <sys/dirent.h>
37 #include <sys/fcntl.h>
38 #include <sys/namei.h>
39 #include <sys/priv.h>
40 #include <sys/rwlock.h>
41 #include <sys/stat.h>
42 #include <sys/syslimits.h>
43 #include <sys/unistd.h>
44 #include <sys/vmmeter.h>
45 #include <sys/vnode.h>
46
47 #include <vm/vm.h>
48 #include <vm/vm_extern.h>
49 #include <vm/vm_object.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_pager.h>
52 #include <vm/vnode_pager.h>
53
54 #include <fs/p9fs/p9_client.h>
55 #include <fs/p9fs/p9_debug.h>
56 #include <fs/p9fs/p9fs.h>
57 #include <fs/p9fs/p9fs_proto.h>
58
59 /* File permissions. */
60 #define IEXEC 0000100 /* Executable. */
61 #define IWRITE 0000200 /* Writeable. */
62 #define IREAD 0000400 /* Readable. */
63 #define ISVTX 0001000 /* Sticky bit. */
64 #define ISGID 0002000 /* Set-gid. */
65 #define ISUID 0004000 /* Set-uid. */
66
67 static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs");
68 extern uma_zone_t p9fs_io_buffer_zone;
69 extern uma_zone_t p9fs_getattr_zone;
70 extern uma_zone_t p9fs_setattr_zone;
71 extern uma_zone_t p9fs_pbuf_zone;
72 /* For the root vnode's vnops. */
73 struct vop_vector p9fs_vnops;
74
75 static uint32_t p9fs_unix2p9_mode(uint32_t mode);
76
77 static void
p9fs_itimes(struct vnode * vp)78 p9fs_itimes(struct vnode *vp)
79 {
80 struct p9fs_node *node;
81 struct timespec ts;
82 struct p9fs_inode *inode;
83
84 node = P9FS_VTON(vp);
85 inode = &node->inode;
86
87 vfs_timestamp(&ts);
88 inode->i_mtime = ts.tv_sec;
89 }
90
91 /*
92 * Cleanup the p9fs node, the in memory representation of a vnode for p9fs.
93 * The cleanup includes invalidating all cache entries for the vnode,
94 * destroying the vobject, removing vnode from hashlist, removing p9fs node
95 * from the list of session p9fs nodes, and disposing of the p9fs node.
96 * Basically it is doing a reverse of what a create/vget does.
97 */
98 void
p9fs_cleanup(struct p9fs_node * np)99 p9fs_cleanup(struct p9fs_node *np)
100 {
101 struct vnode *vp;
102 struct p9fs_session *vses;
103
104 if (np == NULL)
105 return;
106
107 vp = P9FS_NTOV(np);
108 vses = np->p9fs_ses;
109
110 /* Remove the vnode from hash list if vnode is not already deleted */
111 if ((np->flags & P9FS_NODE_DELETED) == 0)
112 vfs_hash_remove(vp);
113
114 P9FS_LOCK(vses);
115 if ((np->flags & P9FS_NODE_IN_SESSION) != 0) {
116 P9FS_NODE_CLRF(np, P9FS_NODE_IN_SESSION);
117 STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next);
118 } else {
119 P9FS_UNLOCK(vses);
120 return;
121 }
122 P9FS_UNLOCK(vses);
123
124 /* Invalidate all entries to a particular vnode. */
125 cache_purge(vp);
126
127 /* Destroy the vm object and flush associated pages. */
128 vnode_destroy_vobject(vp);
129
130 /* Remove all the FID */
131 p9fs_fid_remove_all(np, FALSE);
132
133 /* Dispose all node knowledge.*/
134 p9fs_destroy_node(&np);
135 }
136
137 /*
138 * Reclaim VOP is defined to be called for every vnode. This starts off
139 * the cleanup by clunking(remove the fid on the server) and calls
140 * p9fs_cleanup to free all the resources allocated for p9fs node.
141 */
142 static int
p9fs_reclaim(struct vop_reclaim_args * ap)143 p9fs_reclaim(struct vop_reclaim_args *ap)
144 {
145 struct vnode *vp;
146 struct p9fs_node *np;
147
148 vp = ap->a_vp;
149 np = P9FS_VTON(vp);
150
151 P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np);
152 p9fs_cleanup(np);
153
154 return (0);
155 }
156
157 /*
158 * recycle vnodes which are no longer referenced i.e, their usecount is zero
159 */
160 static int
p9fs_inactive(struct vop_inactive_args * ap)161 p9fs_inactive(struct vop_inactive_args *ap)
162 {
163 struct vnode *vp;
164 struct p9fs_node *np;
165
166 vp = ap->a_vp;
167 np = P9FS_VTON(vp);
168
169 P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name);
170 if (np->flags & P9FS_NODE_DELETED)
171 vrecycle(vp);
172
173 return (0);
174 }
175
176 struct p9fs_lookup_alloc_arg {
177 struct componentname *cnp;
178 struct p9fs_node *dnp;
179 struct p9_fid *newfid;
180 };
181
182 /* Callback for vn_get_ino */
183 static int
p9fs_lookup_alloc(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)184 p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
185 {
186 struct p9fs_lookup_alloc_arg *p9aa = arg;
187
188 return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp,
189 p9aa->newfid, vpp, p9aa->cnp->cn_nameptr));
190 }
191
192 /*
193 * p9fs_lookup is called for every component name that is being searched for.
194 *
195 * I. If component is found on the server, we look for the in-memory
196 * repesentation(vnode) of this component in namecache.
197 * A. If the node is found in the namecache, we check is the vnode is still
198 * valid.
199 * 1. If it is still valid, return vnode.
200 * 2. If it is not valid, we remove this vnode from the name cache and
201 * create a new vnode for the component and return that vnode.
202 * B. If the vnode is not found in the namecache, we look for it in the
203 * hash list.
204 * 1. If the vnode is in the hash list, we check if the vnode is still
205 * valid.
206 * a. If it is still valid, we add that vnode to the namecache for
207 * future lookups and return the vnode.
208 * b. If it is not valid, create a new vnode and p9fs node,
209 * initialize them and return the vnode.
210 * 2. If the vnode is not found in the hash list, we create a new vnode
211 * and p9fs node, initialize them and return the vnode.
212 * II. If the component is not found on the server, an error code is returned.
213 * A. For the creation case, we return EJUSTRETURN so VFS can handle it.
214 * B. For all other cases, ENOENT is returned.
215 */
216 static int
p9fs_lookup(struct vop_lookup_args * ap)217 p9fs_lookup(struct vop_lookup_args *ap)
218 {
219 struct vnode *dvp;
220 struct vnode **vpp, *vp;
221 struct componentname *cnp;
222 struct p9fs_node *dnp; /*dir p9_node */
223 struct p9fs_node *np;
224 struct p9fs_session *vses;
225 struct mount *mp; /* Get the mount point */
226 struct p9_fid *dvfid, *newfid;
227 uint64_t flags;
228 int error;
229 struct vattr vattr;
230 char tmpchr;
231
232 dvp = ap->a_dvp;
233 vpp = ap->a_vpp;
234 cnp = ap->a_cnp;
235 dnp = P9FS_VTON(dvp);
236 error = 0;
237 flags = cnp->cn_flags;
238 *vpp = NULL;
239
240 if (dnp == NULL)
241 return (ENOENT);
242
243 if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) {
244 vref(dvp);
245 *vpp = dvp;
246 return (0);
247 }
248
249 vses = dnp->p9fs_ses;
250 mp = vses->p9fs_mount;
251
252 /* Do the cache part ourselves */
253 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
254 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
255 return (EROFS);
256
257 if (dvp->v_type != VDIR)
258 return (ENOTDIR);
259
260 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
261 if (error)
262 return (error);
263
264 /* Do the directory walk on host to check if file exist */
265 dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
266 if (error)
267 return (error);
268
269 /*
270 * Save the character present at namelen in nameptr string and
271 * null terminate the character to get the search name for p9_dir_walk
272 * This is done to handle when lookup is for "a" and component
273 * name contains a/b/c
274 */
275 tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
276 cnp->cn_nameptr[cnp->cn_namelen] = '\0';
277
278 /*
279 * If the client_walk fails, it means the file looking for doesnt exist.
280 * Create the file is the flags are set or just return the error
281 */
282 newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
283
284 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
285
286 if (error != 0 || newfid == NULL) {
287 /* Clunk the newfid if it is not NULL */
288 if (newfid != NULL)
289 p9_client_clunk(newfid);
290
291 if (error != ENOENT)
292 return (error);
293
294 /* The requested file was not found. */
295 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
296 (flags & ISLASTCN)) {
297
298 if (mp->mnt_flag & MNT_RDONLY)
299 return (EROFS);
300
301 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
302 curthread);
303 if (!error) {
304 return (EJUSTRETURN);
305 }
306 }
307 return (error);
308 }
309
310 /* Look for the entry in the component cache*/
311 error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
312 if (error > 0 && error != ENOENT) {
313 P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error);
314 goto out;
315 }
316
317 if (error == -1) {
318 vp = *vpp;
319 /* Check if the entry in cache is stale or not */
320 if ((p9fs_node_cmp(vp, &newfid->qid) == 0) &&
321 ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) {
322 goto out;
323 }
324 /*
325 * This case, we have an error coming from getattr,
326 * act accordingly.
327 */
328 cache_purge(vp);
329 if (dvp != vp)
330 vput(vp);
331 else
332 vrele(vp);
333
334 *vpp = NULL;
335 } else if (error == ENOENT) {
336 if (VN_IS_DOOMED(dvp))
337 goto out;
338 if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) {
339 error = ENOENT;
340 goto out;
341 }
342 cache_purge_negative(dvp);
343 }
344 /* Reset values */
345 error = 0;
346 vp = NULL;
347
348 tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
349 cnp->cn_nameptr[cnp->cn_namelen] = '\0';
350
351 /*
352 * Looks like we have found an entry. Now take care of all other cases.
353 */
354 if (flags & ISDOTDOT) {
355 struct p9fs_lookup_alloc_arg p9aa;
356 p9aa.cnp = cnp;
357 p9aa.dnp = dnp;
358 p9aa.newfid = newfid;
359 error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp);
360 if (error)
361 goto out;
362 *vpp = vp;
363 } else {
364 /*
365 * client_walk is equivalent to searching a component name in a
366 * directory(fid) here. If new fid is returned, we have found an
367 * entry for this component name so, go and create the rest of
368 * the vnode infra(vget_common) for the returned newfid.
369 */
370 if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
371 && (flags & ISLASTCN)) {
372 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
373 curthread);
374 if (error)
375 goto out;
376
377 error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
378 dnp, newfid, &vp, cnp->cn_nameptr);
379 if (error)
380 goto out;
381
382 *vpp = vp;
383 np = P9FS_VTON(vp);
384 if ((dnp->inode.i_mode & ISVTX) &&
385 cnp->cn_cred->cr_uid != 0 &&
386 cnp->cn_cred->cr_uid != dnp->inode.n_uid &&
387 cnp->cn_cred->cr_uid != np->inode.n_uid) {
388 vput(*vpp);
389 *vpp = NULL;
390 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
391 return (EPERM);
392 }
393 } else {
394 error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
395 dnp, newfid, &vp, cnp->cn_nameptr);
396 if (error)
397 goto out;
398 *vpp = vp;
399 }
400 }
401
402 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
403
404 /* Store the result the cache if MAKEENTRY is specified in flags */
405 if ((cnp->cn_flags & MAKEENTRY) != 0)
406 cache_enter(dvp, *vpp, cnp);
407 return (error);
408 out:
409 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
410 p9_client_clunk(newfid);
411 return (error);
412 }
413
414 /*
415 * Common creation function for file/directory with respective flags. We first
416 * open the parent directory in order to create the file under it. For this,
417 * as 9P protocol suggests, we need to call client_walk to create the open fid.
418 * Once we have the open fid, the file_create function creates the direntry with
419 * the name and perm specified under the parent dir. If this succeeds (an entry
420 * is created for the new file on the server), we create our metadata for this
421 * file (vnode, p9fs node calling vget). Once we are done, we clunk the open
422 * fid of the parent directory if it was not retained.
423 */
424 static int
create_common(struct p9fs_node * dnp,struct componentname * cnp,char * extension,uint32_t perm,uint8_t mode,struct vnode ** vpp)425 create_common(struct p9fs_node *dnp, struct componentname *cnp,
426 char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp)
427 {
428 char tmpchr;
429 struct p9_fid *dvfid, *ofid, *newfid;
430 struct p9fs_session *vses;
431 struct mount *mp;
432 int error;
433
434 P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr);
435
436 vses = dnp->p9fs_ses;
437 mp = vses->p9fs_mount;
438 newfid = NULL;
439 error = 0;
440
441 dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
442 if (error != 0)
443 return (error);
444
445 /* Clone the directory fid to create the new file */
446 ofid = p9_client_walk(dvfid, 0, NULL, 1, &error);
447 if (error != 0)
448 return (error);
449
450 /*
451 * Save the character present at namelen in nameptr string and
452 * null terminate the character to get the search name for p9_dir_walk
453 */
454 tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
455 cnp->cn_nameptr[cnp->cn_namelen] = '\0';
456
457 error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode,
458 extension);
459 if (error != 0) {
460 P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error);
461 goto out;
462 }
463
464 /* If its not hardlink only then do the walk, else we are done. */
465 if (!(perm & P9PROTO_DMLINK)) {
466 /*
467 * Do the lookup part and add the vnode, p9fs node. Note that vpp
468 * is filled in here.
469 */
470 newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
471 if (newfid != NULL) {
472 error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
473 dnp, newfid, vpp, cnp->cn_nameptr);
474 if (error != 0)
475 goto out;
476
477 if (ofid != NULL) {
478 struct p9fs_node *np = P9FS_VTON(*vpp);
479 ofid->v_opens = 0;
480 /*
481 * The 9P file creation request natively opens
482 * the file as part of the create operation and
483 * gives us a writable file handle (ofid).
484 * We retain this open descriptor by adding it
485 * to the VOFID list of the new vnode. This
486 * guarantees that a subsequent VOP_OPEN call
487 * does not need to send a redundant TOPEN
488 * request. This is particularly important
489 * because if a file was requested to be created
490 * with 000 permissions, the host will reject
491 * subsequent TOPEN requests due to insufficient
492 * permissions, which would cause an overall
493 * open() failure.
494 */
495 p9fs_fid_add(np, ofid, VOFID);
496 ofid = NULL; /* prevent closing handle below */
497 }
498 } else {
499 /* Not found return NOENTRY.*/
500 goto out;
501 }
502
503 if ((cnp->cn_flags & MAKEENTRY) != 0)
504 cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
505 }
506 P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
507 __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
508 /* Clunk the open ofid. */
509 if (ofid != NULL)
510 (void)p9_client_clunk(ofid);
511
512 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
513 return (0);
514 out:
515 if (ofid != NULL)
516 (void)p9_client_clunk(ofid);
517
518 if (newfid != NULL)
519 (void)p9_client_clunk(newfid);
520
521 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
522 return (error);
523 }
524
525 /*
526 * This is the main file creation VOP. Make the permissions of the new
527 * file and call the create_common common code to complete the create.
528 */
529 static int
p9fs_create(struct vop_create_args * ap)530 p9fs_create(struct vop_create_args *ap)
531 {
532 struct vnode *dvp;
533 struct vnode **vpp;
534 struct componentname *cnp;
535 uint32_t mode;
536 struct p9fs_node *dnp;
537 struct p9fs_inode *dinode;
538 uint32_t perm;
539 int ret;
540
541 dvp = ap->a_dvp;
542 vpp = ap->a_vpp;
543 cnp = ap->a_cnp;
544 dnp = P9FS_VTON(dvp);
545 dinode = &dnp->inode;
546 mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
547 perm = p9fs_unix2p9_mode(mode);
548
549 P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
550
551 ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
552 if (ret == 0) {
553 P9FS_INCR_LINKS(dinode);
554 }
555
556 return (ret);
557 }
558
559 /*
560 * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir
561 * and call the create_common common code to complete the create.
562 */
563 static int
p9fs_mkdir(struct vop_mkdir_args * ap)564 p9fs_mkdir(struct vop_mkdir_args *ap)
565 {
566 struct vnode *dvp;
567 struct vnode **vpp;
568 struct componentname *cnp;
569 uint32_t mode;
570 struct p9fs_node *dnp;
571 struct p9fs_inode *dinode;
572 uint32_t perm;
573 int ret;
574
575 dvp = ap->a_dvp;
576 vpp = ap->a_vpp;
577 cnp = ap->a_cnp;
578 dnp = P9FS_VTON(dvp);
579 dinode = &dnp->inode;
580 mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
581 perm = p9fs_unix2p9_mode(mode | S_IFDIR);
582
583 P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
584
585 ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp);
586 if (ret == 0)
587 P9FS_INCR_LINKS(dinode);
588
589 return (ret);
590 }
591
592 /*
593 * p9fs_mknod is the main node creation vop. Make the permissions of the new node
594 * and call the create_common common code to complete the create.
595 */
596 static int
p9fs_mknod(struct vop_mknod_args * ap)597 p9fs_mknod(struct vop_mknod_args *ap)
598 {
599 struct vnode *dvp;
600 struct vnode **vpp;
601 struct componentname *cnp;
602 uint32_t mode;
603 struct p9fs_node *dnp;
604 struct p9fs_inode *dinode;
605 uint32_t perm;
606 int ret;
607
608 dvp = ap->a_dvp;
609 vpp = ap->a_vpp;
610 cnp = ap->a_cnp;
611 dnp = P9FS_VTON(dvp);
612 dinode = &dnp->inode;
613 mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
614 perm = p9fs_unix2p9_mode(mode);
615
616 P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
617
618 ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp);
619 if (ret == 0) {
620 P9FS_INCR_LINKS(dinode);
621 }
622
623 return (ret);
624 }
625
626 /* Convert open mode permissions to P9 */
627 static int
p9fs_uflags_mode(int uflags,int extended)628 p9fs_uflags_mode(int uflags, int extended)
629 {
630 uint32_t ret;
631
632 /* Convert first to O flags.*/
633 uflags = OFLAGS(uflags);
634
635 switch (uflags & 3) {
636
637 case O_RDONLY:
638 ret = P9PROTO_OREAD;
639 break;
640
641 case O_WRONLY:
642 ret = P9PROTO_OWRITE;
643 break;
644
645 case O_RDWR:
646 ret = P9PROTO_ORDWR;
647 break;
648 }
649
650 if (extended) {
651 if (uflags & O_EXCL)
652 ret |= P9PROTO_OEXCL;
653
654 if (uflags & O_APPEND)
655 ret |= P9PROTO_OAPPEND;
656 }
657
658 return (ret);
659 }
660
661 /*
662 * This is the main open VOP for every file open. If the file is already
663 * open, then increment and return. If there is no open fid for this file,
664 * there needs to be a client_walk which creates a new open fid for this file.
665 * Once we have a open fid, call the open on this file with the mode creating
666 * the vobject.
667 */
668 static int
p9fs_open(struct vop_open_args * ap)669 p9fs_open(struct vop_open_args *ap)
670 {
671 int error;
672 struct vnode *vp;
673 struct p9fs_node *np;
674 struct p9fs_session *vses;
675 struct p9_fid *vofid, *vfid;
676 size_t filesize;
677 uint32_t mode;
678
679 error = 0;
680 vp = ap->a_vp;
681 np = P9FS_VTON(vp);
682 vses = np->p9fs_ses;
683
684 P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
685
686 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
687 return (EOPNOTSUPP);
688
689 error = p9fs_reload_stats_dotl(vp, ap->a_cred);
690 if (error != 0)
691 return (error);
692
693 ASSERT_VOP_LOCKED(vp, __func__);
694 /*
695 * Invalidate the pages of the vm_object cache if the file is modified
696 * based on the flag set in reload stats
697 */
698 if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) {
699 error = vinvalbuf(vp, 0, 0, 0);
700 if (error != 0)
701 return (error);
702 P9FS_NODE_CLRF(np, P9FS_NODE_MODIFIED);
703 }
704
705 vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error);
706 if (error != 0)
707 return (error);
708
709 /*
710 * Translate kernel fflags to 9p mode
711 */
712 mode = p9fs_uflags_mode(ap->a_mode, 1);
713
714 /*
715 * Search the fid in vofid_list for current user. If found increase the open
716 * count and return. If not found clone a new fid and open the file using
717 * that cloned fid.
718 */
719 vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error);
720 if (vofid != NULL) {
721 vofid->v_opens++;
722 return (0);
723 } else {
724 /*vofid is the open fid for this file.*/
725 vofid = p9_client_walk(vfid, 0, NULL, 1, &error);
726 if (error != 0)
727 return (error);
728 }
729
730 error = p9_client_open(vofid, mode);
731 if (error != 0)
732 p9_client_clunk(vofid);
733 else {
734 vofid->v_opens = 1;
735 filesize = np->inode.i_size;
736 vnode_create_vobject(vp, filesize, ap->a_td);
737 p9fs_fid_add(np, vofid, VOFID);
738 }
739
740 return (error);
741 }
742
743 /*
744 * Close the open references. Just reduce the open count on vofid and return.
745 * Let clunking of VOFID happen in p9fs_reclaim.
746 */
747 static int
p9fs_close(struct vop_close_args * ap)748 p9fs_close(struct vop_close_args *ap)
749 {
750 struct vnode *vp;
751 struct p9fs_node *np;
752 struct p9fs_session *vses;
753 struct p9_fid *vofid;
754 int error;
755
756 vp = ap->a_vp;
757 np = P9FS_VTON(vp);
758
759 if (np == NULL)
760 return (0);
761
762 vses = np->p9fs_ses;
763 error = 0;
764
765 P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name);
766
767 /*
768 * Translate kernel fflags to 9p mode
769 */
770 vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID,
771 p9fs_uflags_mode(ap->a_fflag, 1), &error);
772 if (vofid == NULL)
773 return (0);
774
775 vofid->v_opens--;
776
777 return (0);
778 }
779
780 /* Helper routine for checking if fileops are possible on this file */
781 static int
p9fs_check_possible(struct vnode * vp,struct vattr * vap,mode_t mode)782 p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
783 {
784
785 /* Check if we are allowed to write */
786 switch (vap->va_type) {
787 case VDIR:
788 case VLNK:
789 case VREG:
790 /*
791 * Normal nodes: check if we're on a read-only mounted
792 * file system and bail out if we're trying to write.
793 */
794 if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
795 return (EROFS);
796 break;
797 case VBLK:
798 case VCHR:
799 case VSOCK:
800 case VFIFO:
801 /*
802 * Special nodes: even on read-only mounted file systems
803 * these are allowed to be written to if permissions allow.
804 */
805 break;
806 default:
807 /* No idea what this is */
808 return (EINVAL);
809 }
810
811 return (0);
812 }
813
814 /* Check the access permissions of the file. */
815 static int
p9fs_access(struct vop_access_args * ap)816 p9fs_access(struct vop_access_args *ap)
817 {
818 struct vnode *vp;
819 accmode_t accmode;
820 struct ucred *cred;
821 struct vattr vap;
822 int error;
823
824 vp = ap->a_vp;
825 accmode = ap->a_accmode;
826 cred = ap->a_cred;
827
828 P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
829
830 /* make sure getattr is working correctly and is defined.*/
831 error = VOP_GETATTR(vp, &vap, cred);
832 if (error != 0)
833 return (error);
834
835 error = p9fs_check_possible(vp, &vap, accmode);
836 if (error != 0)
837 return (error);
838
839 /* Call the Generic Access check in VOPS*/
840 error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode,
841 cred);
842
843
844 return (error);
845 }
846
847 /*
848 * Reload the file stats from the server and update the inode structure present
849 * in p9fs node.
850 */
851 int
p9fs_reload_stats_dotl(struct vnode * vp,struct ucred * cred)852 p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred)
853 {
854 struct p9_stat_dotl *stat;
855 int error;
856 struct p9fs_node *node;
857 struct p9fs_session *vses;
858 struct p9_fid *vfid;
859
860 error = 0;
861 node = P9FS_VTON(vp);
862 vses = node->p9fs_ses;
863
864 vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error);
865 if (vfid == NULL) {
866 vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
867 if (error)
868 return (error);
869 }
870
871 stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO);
872
873 error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL);
874 if (error != 0) {
875 P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error);
876 goto out;
877 }
878
879 /* Init the vnode with the disk info */
880 p9fs_stat_vnode_dotl(stat, vp);
881 out:
882 if (stat != NULL) {
883 uma_zfree(p9fs_getattr_zone, stat);
884 }
885
886 return (error);
887 }
888
889 /*
890 * Read the current inode values into the vap attr. We reload the stats from
891 * the server.
892 */
893 static int
p9fs_getattr_dotl(struct vop_getattr_args * ap)894 p9fs_getattr_dotl(struct vop_getattr_args *ap)
895 {
896 struct vnode *vp;
897 struct vattr *vap;
898 struct p9fs_node *node;
899 struct p9fs_inode *inode;
900 int error;
901
902 vp = ap->a_vp;
903 vap = ap->a_vap;
904 node = P9FS_VTON(vp);
905
906 if (node == NULL)
907 return (ENOENT);
908
909 inode = &node->inode;
910
911 P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode));
912
913 /* Reload our stats once to get the right values.*/
914 error = p9fs_reload_stats_dotl(vp, ap->a_cred);
915 if (error != 0) {
916 P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error);
917 return (error);
918 }
919
920 /* Basic info */
921 VATTR_NULL(vap);
922
923 VI_LOCK(vp);
924 vap->va_atime.tv_sec = inode->i_atime;
925 vap->va_mtime.tv_sec = inode->i_mtime;
926 vap->va_ctime.tv_sec = inode->i_ctime;
927 vap->va_atime.tv_nsec = inode->i_atime_nsec;
928 vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
929 vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
930 vap->va_type = IFTOVT(inode->i_mode);
931 vap->va_mode = inode->i_mode;
932 vap->va_uid = inode->n_uid;
933 vap->va_gid = inode->n_gid;
934 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
935 vap->va_size = inode->i_size;
936 vap->va_nlink = inode->i_links_count;
937 vap->va_blocksize = inode->blksize;
938 vap->va_fileid = inode->i_qid_path;
939 vap->va_flags = inode->i_flags;
940 vap->va_gen = inode->gen;
941 vap->va_filerev = inode->data_version;
942 vap->va_vaflags = 0;
943 vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK;
944 VI_UNLOCK(vp);
945
946 return (0);
947 }
948
949 /* Convert a standard FreeBSD permission to P9. */
950 static uint32_t
p9fs_unix2p9_mode(uint32_t mode)951 p9fs_unix2p9_mode(uint32_t mode)
952 {
953 uint32_t res;
954
955 res = mode & 0777;
956 if (S_ISDIR(mode))
957 res |= P9PROTO_DMDIR;
958 if (S_ISSOCK(mode))
959 res |= P9PROTO_DMSOCKET;
960 if (S_ISLNK(mode))
961 res |= P9PROTO_DMSYMLINK;
962 if (S_ISFIFO(mode))
963 res |= P9PROTO_DMNAMEDPIPE;
964 if ((mode & S_ISUID) == S_ISUID)
965 res |= P9PROTO_DMSETUID;
966 if ((mode & S_ISGID) == S_ISGID)
967 res |= P9PROTO_DMSETGID;
968 if ((mode & S_ISVTX) == S_ISVTX)
969 res |= P9PROTO_DMSETVTX;
970
971 return (res);
972 }
973
974 /* Update inode with the stats read from server.(9P2000.L version) */
975 int
p9fs_stat_vnode_dotl(struct p9_stat_dotl * stat,struct vnode * vp)976 p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp)
977 {
978 struct p9fs_node *np;
979 struct p9fs_inode *inode;
980 bool excl_locked;
981
982 np = P9FS_VTON(vp);
983 inode = &np->inode;
984
985 /*
986 * This function might be called with the vnode only shared
987 * locked. Then, interlock the vnode to ensure the exclusive
988 * access to the inode fields: the thread either owns
989 * exclusive vnode lock, or shared vnode lock plus interlock.
990 *
991 * If the vnode is locked exclusive, do not take the
992 * interlock. We directly call vnode_pager_setsize(), which
993 * needs the vm_object lock, and that lock is before vnode
994 * interlock in the lock order.
995 */
996 ASSERT_VOP_LOCKED(vp, __func__);
997 excl_locked = VOP_ISLOCKED(vp) == LK_EXCLUSIVE;
998 if (!excl_locked)
999 VI_LOCK(vp);
1000
1001 /* Update the pager size if file size changes on host */
1002 if (inode->i_size != stat->st_size) {
1003 inode->i_size = stat->st_size;
1004 if (vp->v_type == VREG) {
1005 if (excl_locked)
1006 vnode_pager_setsize(vp, inode->i_size);
1007 else
1008 vn_delayed_setsize_locked(vp);
1009 }
1010 }
1011
1012 inode->i_mtime = stat->st_mtime_sec;
1013 inode->i_atime = stat->st_atime_sec;
1014 inode->i_ctime = stat->st_ctime_sec;
1015 inode->i_mtime_nsec = stat->st_mtime_nsec;
1016 inode->i_atime_nsec = stat->st_atime_nsec;
1017 inode->i_ctime_nsec = stat->st_ctime_nsec;
1018 inode->n_uid = stat->st_uid;
1019 inode->n_gid = stat->st_gid;
1020 inode->i_mode = stat->st_mode;
1021 vp->v_type = IFTOVT(inode->i_mode);
1022 inode->i_links_count = stat->st_nlink;
1023 inode->blksize = stat->st_blksize;
1024 inode->blocks = stat->st_blocks;
1025 inode->gen = stat->st_gen;
1026 inode->data_version = stat->st_data_version;
1027
1028 /* Setting a flag if file changes based on qid version */
1029 if (np->vqid.qid_version != stat->qid.version)
1030 P9FS_NODE_SETF(np, P9FS_NODE_MODIFIED);
1031 memcpy(&np->vqid, &stat->qid, sizeof(stat->qid));
1032 if (!excl_locked)
1033 VI_UNLOCK(vp);
1034
1035 return (0);
1036 }
1037
1038 /*
1039 * Write the current in memory inode stats into persistent stats structure
1040 * to write to the server(for linux version).
1041 */
1042 static int
p9fs_inode_to_iattr(struct p9fs_inode * inode,struct p9_iattr_dotl * p9attr)1043 p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr)
1044 {
1045 p9attr->size = inode->i_size;
1046 p9attr->mode = inode->i_mode;
1047 p9attr->uid = inode->n_uid;
1048 p9attr->gid = inode->n_gid;
1049 p9attr->atime_sec = inode->i_atime;
1050 p9attr->atime_nsec = inode->i_atime_nsec;
1051 p9attr->mtime_sec = inode->i_mtime;
1052 p9attr->mtime_nsec = inode->i_mtime_nsec;
1053
1054 return (0);
1055 }
1056
1057 /*
1058 * Modify the ownership of a file whenever the chown is called on the
1059 * file.
1060 */
1061 static int
p9fs_chown(struct vnode * vp,uid_t uid,gid_t gid,struct ucred * cred,struct thread * td)1062 p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
1063 struct thread *td)
1064 {
1065 struct p9fs_node *np;
1066 struct p9fs_inode *inode;
1067 uid_t ouid;
1068 gid_t ogid;
1069 int error;
1070
1071 np = P9FS_VTON(vp);
1072 inode = &np->inode;
1073
1074 if (uid == (uid_t)VNOVAL)
1075 uid = inode->n_uid;
1076 if (gid == (gid_t)VNOVAL)
1077 gid = inode->n_gid;
1078 /*
1079 * To modify the ownership of a file, must possess VADMIN for that
1080 * file.
1081 */
1082 if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
1083 return (error);
1084 /*
1085 * To change the owner of a file, or change the group of a file to a
1086 * group of which we are not a member, the caller must have
1087 * privilege.
1088 */
1089 if (((uid != inode->n_uid && uid != cred->cr_uid) ||
1090 (gid != inode->n_gid && !groupmember(gid, cred))) &&
1091 (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
1092 return (error);
1093
1094 ogid = inode->n_gid;
1095 ouid = inode->n_uid;
1096
1097 inode->n_gid = gid;
1098 inode->n_uid = uid;
1099
1100 if ((inode->i_mode & (ISUID | ISGID)) &&
1101 (ouid != uid || ogid != gid)) {
1102
1103 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
1104 inode->i_mode &= ~(ISUID | ISGID);
1105 }
1106 P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td);
1107
1108 return (0);
1109 }
1110
1111 /*
1112 * Update the in memory inode with all chmod new permissions/mode. Typically a
1113 * setattr is called to update it to server.
1114 */
1115 static int
p9fs_chmod(struct vnode * vp,uint32_t mode,struct ucred * cred,struct thread * td)1116 p9fs_chmod(struct vnode *vp, uint32_t mode, struct ucred *cred, struct thread *td)
1117 {
1118 struct p9fs_node *np;
1119 struct p9fs_inode *inode;
1120 uint32_t nmode;
1121 int error;
1122
1123 np = P9FS_VTON(vp);
1124 inode = &np->inode;
1125
1126 P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td);
1127 /*
1128 * To modify the permissions on a file, must possess VADMIN
1129 * for that file.
1130 */
1131 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
1132 return (error);
1133
1134 /*
1135 * Privileged processes may set the sticky bit on non-directories,
1136 * as well as set the setgid bit on a file with a group that the
1137 * process is not a member of. Both of these are allowed in
1138 * jail(8).
1139 */
1140 if (vp->v_type != VDIR && (mode & S_ISTXT)) {
1141 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
1142 return (EFTYPE);
1143 }
1144 if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) {
1145 error = priv_check_cred(cred, PRIV_VFS_SETGID);
1146 if (error != 0)
1147 return (error);
1148 }
1149
1150 /*
1151 * Deny setting setuid if we are not the file owner.
1152 */
1153 if ((mode & ISUID) && inode->n_uid != cred->cr_uid) {
1154 error = priv_check_cred(cred, PRIV_VFS_ADMIN);
1155 if (error != 0)
1156 return (error);
1157 }
1158 nmode = inode->i_mode;
1159 nmode &= ~ALLPERMS;
1160 nmode |= (mode & ALLPERMS);
1161 inode->i_mode = nmode;
1162
1163 P9_DEBUG(VOPS, "%s: to mode %x %d \n ", __func__, nmode, error);
1164
1165 return (error);
1166 }
1167
1168 /*
1169 * Set the attributes of a file referenced by fid. A valid bitmask is sent
1170 * in request selecting which fields to set
1171 */
1172 static int
p9fs_setattr_dotl(struct vop_setattr_args * ap)1173 p9fs_setattr_dotl(struct vop_setattr_args *ap)
1174 {
1175 struct vnode *vp;
1176 struct vattr *vap;
1177 struct p9fs_node *node;
1178 struct p9fs_inode *inode;
1179 struct ucred *cred;
1180 struct thread *td;
1181 struct p9_iattr_dotl *p9attr;
1182 struct p9fs_session *vses;
1183 struct p9_fid *vfid;
1184 uint64_t oldfilesize;
1185 int error;
1186
1187 vp = ap->a_vp;
1188 vap = ap->a_vap;
1189 node = P9FS_VTON(vp);
1190 inode = &node->inode;
1191 cred = ap->a_cred;
1192 td = curthread;
1193 vses = node->p9fs_ses;
1194 error = 0;
1195
1196 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
1197 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
1198 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
1199 (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
1200 P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__);
1201 return (EINVAL);
1202 }
1203 /* Disallow write attempts on read only filesystem */
1204 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1205 return (EROFS);
1206
1207 /* Setting of flags is not supported */
1208 if (vap->va_flags != VNOVAL)
1209 return (EOPNOTSUPP);
1210
1211 /* Allocate p9attr struct */
1212 p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO);
1213 if (p9attr == NULL)
1214 return (ENOMEM);
1215
1216 /* Check if we need to change the ownership of the file*/
1217 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1218 P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
1219 vp, td, vap->va_uid, vap->va_gid);
1220
1221 error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
1222 p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID |
1223 P9PROTO_SETATTR_MODE;
1224 if (error)
1225 goto out;
1226 }
1227
1228 /* Check for mode changes */
1229 if (vap->va_mode != (mode_t)VNOVAL) {
1230 P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td,
1231 vap->va_mode);
1232
1233 error = p9fs_chmod(vp, (int)vap->va_mode, cred, td);
1234 p9attr->valid |= P9PROTO_SETATTR_MODE;
1235 if (error)
1236 goto out;
1237 }
1238
1239 /* Update the size of the file and update mtime */
1240 if (vap->va_size != (uint64_t)VNOVAL) {
1241 P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__,
1242 vp, td, (uintmax_t)vap->va_size);
1243 switch (vp->v_type) {
1244 case VDIR:
1245 error = EISDIR;
1246 goto out;
1247 case VLNK:
1248 case VREG:
1249 /* Invalidate cached pages of vp */
1250 error = vinvalbuf(vp, 0, 0, 0);
1251 if (error)
1252 goto out;
1253 oldfilesize = inode->i_size;
1254 inode->i_size = vap->va_size;
1255 /* Update the p9fs_inode time */
1256 p9fs_itimes(vp);
1257 p9attr->valid |= P9PROTO_SETATTR_SIZE |
1258 P9PROTO_SETATTR_ATIME |
1259 P9PROTO_SETATTR_MTIME |
1260 P9PROTO_SETATTR_ATIME_SET |
1261 P9PROTO_SETATTR_MTIME_SET ;
1262 break;
1263 default:
1264 goto out;
1265 }
1266 } else if (vap->va_atime.tv_sec != VNOVAL ||
1267 vap->va_mtime.tv_sec != VNOVAL) {
1268 P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n",
1269 __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
1270 (uintmax_t)vap->va_mtime.tv_sec);
1271 /* Update the p9fs_inode times */
1272 p9fs_itimes(vp);
1273 p9attr->valid |= P9PROTO_SETATTR_ATIME |
1274 P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET |
1275 P9PROTO_SETATTR_MTIME_SET;
1276 }
1277
1278 vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error);
1279 if (vfid == NULL) {
1280 vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error);
1281 if (error)
1282 goto out;
1283 }
1284
1285 /* Write the inode structure values into p9attr */
1286 p9fs_inode_to_iattr(inode, p9attr);
1287 error = p9_client_setattr(vfid, p9attr);
1288 if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) {
1289 if (error)
1290 inode->i_size = oldfilesize;
1291 else
1292 vnode_pager_setsize(vp, inode->i_size);
1293 }
1294 out:
1295 if (p9attr) {
1296 uma_zfree(p9fs_setattr_zone, p9attr);
1297 }
1298 P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error);
1299 return (error);
1300 }
1301
1302 struct open_fid_state {
1303 struct p9_fid *vofid;
1304 int fflags;
1305 int opened;
1306 };
1307
1308 /*
1309 * TODO: change this to take P9PROTO_* mode and avoid routing through
1310 * VOP_OPEN, factoring out implementation of p9fs_open.
1311 */
1312 static int
p9fs_get_open_fid(struct vnode * vp,int fflags,struct ucred * cr,struct open_fid_state * statep)1313 p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep)
1314 {
1315 struct p9fs_node *np;
1316 struct p9fs_session *vses;
1317 struct p9_fid *vofid;
1318 int mode = p9fs_uflags_mode(fflags, TRUE);
1319 int error = 0;
1320
1321 statep->opened = FALSE;
1322
1323 np = P9FS_VTON(vp);
1324 vses = np->p9fs_ses;
1325 vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1326 if (vofid == NULL) {
1327 error = VOP_OPEN(vp, fflags, cr, curthread, NULL);
1328 if (error) {
1329 return (error);
1330 }
1331 vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error);
1332 if (vofid == NULL) {
1333 return (EBADF);
1334 }
1335 statep->fflags = fflags;
1336 statep->opened = TRUE;
1337 }
1338 statep->vofid = vofid;
1339 return (0);
1340 }
1341
1342 static void
p9fs_release_open_fid(struct vnode * vp,struct ucred * cr,struct open_fid_state * statep)1343 p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep)
1344 {
1345 if (statep->opened) {
1346 (void) VOP_CLOSE(vp, statep->fflags, cr, curthread);
1347 }
1348 }
1349
1350 /*
1351 * An I/O buffer is used to to do any transfer. The uio is the vfs structure we
1352 * need to copy data into. As long as resid is greater than zero, we call
1353 * client_read to read data from offset(offset into the file) in the open fid
1354 * for the file into the I/O buffer. The data is read into the user data buffer.
1355 */
1356 static int
p9fs_read(struct vop_read_args * ap)1357 p9fs_read(struct vop_read_args *ap)
1358 {
1359 struct vnode *vp;
1360 struct uio *uio;
1361 struct p9fs_node *np;
1362 uint64_t offset;
1363 int64_t ret;
1364 uint64_t resid;
1365 uint32_t count;
1366 int error;
1367 char *io_buffer = NULL;
1368 uint64_t filesize;
1369 struct open_fid_state ostate;
1370
1371 vp = ap->a_vp;
1372 uio = ap->a_uio;
1373 np = P9FS_VTON(vp);
1374 error = 0;
1375
1376 if (VN_ISDEV(vp))
1377 return (EOPNOTSUPP);
1378 if (vp->v_type != VREG)
1379 return (EISDIR);
1380 if (uio->uio_resid == 0)
1381 return (0);
1382 if (uio->uio_offset < 0)
1383 return (EINVAL);
1384
1385 error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate);
1386 if (error)
1387 return (error);
1388
1389 /* where in the file are we to start reading */
1390 offset = uio->uio_offset;
1391 filesize = np->inode.i_size;
1392 if (uio->uio_offset >= filesize)
1393 goto out;
1394
1395 P9_DEBUG(VOPS, "%s: called %jd at %ju\n",
1396 __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset);
1397
1398 /* Work with a local buffer from the pool for this vop */
1399
1400 io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1401 while ((resid = uio->uio_resid) > 0) {
1402 if (offset >= filesize)
1403 break;
1404 count = MIN(filesize - uio->uio_offset , resid);
1405 if (count == 0)
1406 break;
1407
1408 /* Copy count bytes into the uio */
1409 ret = p9_client_read(ostate.vofid, offset, count, io_buffer);
1410 /*
1411 * This is the only place in the entire p9fs where we check the
1412 * error for < 0 as p9_client_read/write return the number of
1413 * bytes instead of an error code. In this case if ret is < 0,
1414 * it means there is an IO error.
1415 */
1416 if (ret < 0) {
1417 error = -ret;
1418 goto out;
1419 }
1420 error = uiomove(io_buffer, ret, uio);
1421 if (error != 0)
1422 goto out;
1423
1424 offset += ret;
1425 }
1426 uio->uio_offset = offset;
1427 out:
1428 uma_zfree(p9fs_io_buffer_zone, io_buffer);
1429 p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1430
1431 return (error);
1432 }
1433
1434 /*
1435 * The user buffer contains the data to be written. This data is copied first
1436 * from uio into I/O buffer. This I/O buffer is used to do the client_write to
1437 * the fid of the file starting from the offset given upto count bytes. The
1438 * number of bytes written is returned to the caller.
1439 */
1440 static int
p9fs_write(struct vop_write_args * ap)1441 p9fs_write(struct vop_write_args *ap)
1442 {
1443 struct vnode *vp;
1444 struct uio *uio;
1445 struct p9fs_node *np;
1446 uint64_t off, offset;
1447 int64_t ret;
1448 uint64_t resid, bytes_written;
1449 uint32_t count;
1450 int error, ioflag;
1451 uint64_t file_size;
1452 char *io_buffer = NULL;
1453 struct open_fid_state ostate;
1454
1455 vp = ap->a_vp;
1456 uio = ap->a_uio;
1457 np = P9FS_VTON(vp);
1458 error = 0;
1459 ioflag = ap->a_ioflag;
1460
1461 error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate);
1462 if (error)
1463 return (error);
1464
1465 P9_DEBUG(VOPS, "%s: %#zx at %#jx\n",
1466 __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1467
1468 if (uio->uio_offset < 0) {
1469 error = EINVAL;
1470 goto out;
1471 }
1472 if (uio->uio_resid == 0)
1473 goto out;
1474
1475 file_size = np->inode.i_size;
1476
1477 switch (vp->v_type) {
1478 case VREG:
1479 if (ioflag & IO_APPEND)
1480 uio->uio_offset = file_size;
1481 break;
1482 case VDIR:
1483 return (EISDIR);
1484 case VLNK:
1485 break;
1486 default:
1487 panic("%s: bad file type vp: %p", __func__, vp);
1488 }
1489
1490 resid = uio->uio_resid;
1491 offset = uio->uio_offset;
1492 bytes_written = 0;
1493 error = 0;
1494
1495 io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1496 while ((resid = uio->uio_resid) > 0) {
1497 off = 0;
1498 count = MIN(resid, P9FS_IOUNIT);
1499 error = uiomove(io_buffer, count, uio);
1500
1501 if (error != 0) {
1502 P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error);
1503 goto out;
1504 }
1505
1506 /* While count still exists, keep writing.*/
1507 while (count > 0) {
1508 /* Copy count bytes from the uio */
1509 ret = p9_client_write(ostate.vofid, offset, count,
1510 io_buffer + off);
1511 if (ret < 0) {
1512 if (bytes_written == 0) {
1513 error = -ret;
1514 goto out;
1515 } else {
1516 break;
1517 }
1518 }
1519 P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n",
1520 __func__, uio->uio_resid, (uintmax_t)uio->uio_offset);
1521
1522 off += ret;
1523 offset += ret;
1524 bytes_written += ret;
1525 count -= ret;
1526 }
1527 }
1528 /* Update the fields in the node to reflect the change*/
1529 if (file_size < uio->uio_offset + uio->uio_resid) {
1530 np->inode.i_size = uio->uio_offset + uio->uio_resid;
1531 vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid);
1532 }
1533 out:
1534 if (io_buffer)
1535 uma_zfree(p9fs_io_buffer_zone, io_buffer);
1536 p9fs_release_open_fid(vp, ap->a_cred, &ostate);
1537
1538 return (error);
1539 }
1540
1541 /*
1542 * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the
1543 * client_remove op to send messages to remove the node's fid on the server.
1544 * After that, does a node metadata cleanup on client side.
1545 */
1546 static int
remove_common(struct p9fs_node * dnp,struct p9fs_node * np,const char * name,struct ucred * cred)1547 remove_common(struct p9fs_node *dnp, struct p9fs_node *np, const char *name,
1548 struct ucred *cred)
1549 {
1550 int error;
1551 struct p9fs_session *vses;
1552 struct vnode *vp;
1553 struct p9_fid *vfid;
1554
1555 error = 0;
1556 vses = np->p9fs_ses;
1557 vp = P9FS_NTOV(np);
1558
1559 vfid = p9fs_get_fid(vses->clnt, dnp, cred, VFID, -1, &error);
1560 if (error != 0)
1561 return (error);
1562
1563 error = p9_client_unlink(vfid, name,
1564 np->v_node->v_type == VDIR ? P9PROTO_UNLINKAT_REMOVEDIR : 0);
1565 if (error != 0)
1566 return (error);
1567
1568 /* Remove all non-open fids associated with the vp */
1569 if (np->inode.i_links_count == 1)
1570 p9fs_fid_remove_all(np, TRUE);
1571
1572 /* Invalidate all entries of vnode from name cache and hash list. */
1573 cache_purge(vp);
1574 vfs_hash_remove(vp);
1575
1576 P9FS_NODE_SETF(np, P9FS_NODE_DELETED);
1577
1578 return (error);
1579 }
1580
1581 /* Remove vop for all files. Call common code for remove and adjust links */
1582 static int
p9fs_remove(struct vop_remove_args * ap)1583 p9fs_remove(struct vop_remove_args *ap)
1584 {
1585 struct vnode *vp;
1586 struct p9fs_node *np;
1587 struct vnode *dvp;
1588 struct p9fs_node *dnp;
1589 struct p9fs_inode *dinode;
1590 struct componentname *cnp;
1591 int error;
1592
1593 cnp = ap->a_cnp;
1594 vp = ap->a_vp;
1595 np = P9FS_VTON(vp);
1596 dvp = ap->a_dvp;
1597 dnp = P9FS_VTON(dvp);
1598 dinode = &dnp->inode;
1599
1600 P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1601
1602 if (vp->v_type == VDIR)
1603 return (EISDIR);
1604
1605 error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
1606 if (error == 0)
1607 P9FS_DECR_LINKS(dinode);
1608
1609 return (error);
1610 }
1611
1612 /* Remove vop for all directories. Call common code for remove and adjust links */
1613 static int
p9fs_rmdir(struct vop_rmdir_args * ap)1614 p9fs_rmdir(struct vop_rmdir_args *ap)
1615 {
1616 struct vnode *vp;
1617 struct p9fs_node *np;
1618 struct vnode *dvp;
1619 struct p9fs_node *dnp;
1620 struct p9fs_inode *dinode;
1621 struct componentname *cnp;
1622 int error;
1623
1624 cnp = ap->a_cnp;
1625 vp = ap->a_vp;
1626 np = P9FS_VTON(vp);
1627 dvp = ap->a_dvp;
1628 dnp = P9FS_VTON(dvp);
1629 dinode = &dnp->inode;
1630
1631 P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np);
1632
1633 error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred);
1634 if (error == 0)
1635 P9FS_DECR_LINKS(dinode);
1636
1637 return (error);
1638 }
1639
1640 /*
1641 * Create symlinks. Make the permissions and call create_common code
1642 * for Soft links.
1643 */
1644 static int
p9fs_symlink(struct vop_symlink_args * ap)1645 p9fs_symlink(struct vop_symlink_args *ap)
1646 {
1647 struct vnode *dvp;
1648 struct vnode **vpp;
1649 struct vattr *vap;
1650 struct componentname *cnp;
1651 char *symtgt;
1652 struct p9fs_node *dnp;
1653 struct p9fs_session *vses;
1654 struct mount *mp;
1655 struct p9_fid *dvfid, *newfid;
1656 int error;
1657 char tmpchr;
1658 gid_t gid;
1659
1660 dvp = ap->a_dvp;
1661 vpp = ap->a_vpp;
1662 vap = ap->a_vap;
1663 cnp = ap->a_cnp;
1664 symtgt = (char*)(uintptr_t) ap->a_target;
1665 dnp = P9FS_VTON(dvp);
1666 vses = dnp->p9fs_ses;
1667 mp = vses->p9fs_mount;
1668 newfid = NULL;
1669 error = 0;
1670 gid = vap->va_gid;
1671
1672 P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp);
1673
1674 /*
1675 * Save the character present at namelen in nameptr string and
1676 * null terminate the character to get the search name for p9_dir_walk
1677 */
1678 tmpchr = cnp->cn_nameptr[cnp->cn_namelen];
1679 cnp->cn_nameptr[cnp->cn_namelen] = '\0';
1680
1681 dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1682 if (error != 0)
1683 goto out;
1684
1685 error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid);
1686 if (error != 0)
1687 goto out;
1688
1689 /*create vnode for symtgt */
1690 newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error);
1691 if (newfid != NULL) {
1692 error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags,
1693 dnp, newfid, vpp, cnp->cn_nameptr);
1694 if (error != 0)
1695 goto out;
1696 } else
1697 goto out;
1698
1699 if ((cnp->cn_flags & MAKEENTRY) != 0) {
1700 cache_enter(P9FS_NTOV(dnp), *vpp, cnp);
1701 }
1702 P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n",
1703 __func__, *vpp, dnp, (uintmax_t)dvfid->fid);
1704
1705 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1706 return (error);
1707
1708 out:
1709 if (newfid != NULL)
1710 p9_client_clunk(newfid);
1711 cnp->cn_nameptr[cnp->cn_namelen] = tmpchr;
1712 return (error);
1713 }
1714
1715 /* Create hard link */
1716 static int
p9fs_link(struct vop_link_args * ap)1717 p9fs_link(struct vop_link_args *ap)
1718 {
1719 struct vnode *vp;
1720 struct vnode *tdvp;
1721 struct componentname *cnp;
1722 struct p9fs_node *dnp;
1723 struct p9fs_node *np;
1724 struct p9fs_inode *inode;
1725 struct p9fs_session *vses;
1726 struct p9_fid *dvfid, *oldvfid;
1727 int error;
1728
1729 vp = ap->a_vp;
1730 tdvp = ap->a_tdvp;
1731 cnp = ap->a_cnp;
1732 dnp = P9FS_VTON(tdvp);
1733 np = P9FS_VTON(vp);
1734 inode = &np->inode;
1735 vses = np->p9fs_ses;
1736 error = 0;
1737
1738 P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp);
1739
1740 dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error);
1741 if (error != 0)
1742 return (error);
1743 oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error);
1744 if (error != 0)
1745 return (error);
1746
1747 error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr);
1748 if (error != 0)
1749 return (error);
1750 /* Increment ref count on the inode */
1751 P9FS_INCR_LINKS(inode);
1752
1753 return (0);
1754 }
1755
1756 /* Read contents of the symbolic link */
1757 static int
p9fs_readlink(struct vop_readlink_args * ap)1758 p9fs_readlink(struct vop_readlink_args *ap)
1759 {
1760 struct vnode *vp;
1761 struct uio *uio;
1762 struct p9fs_node *dnp;
1763 struct p9fs_session *vses;
1764 struct p9_fid *dvfid;
1765 int error, len;
1766 char *target;
1767
1768 vp = ap->a_vp;
1769 uio = ap->a_uio;
1770 dnp = P9FS_VTON(vp);
1771 vses = dnp->p9fs_ses;
1772 error = 0;
1773
1774 P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp);
1775
1776 dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error);
1777 if (error != 0)
1778 return (error);
1779
1780 error = p9_readlink(dvfid, &target);
1781 if (error != 0)
1782 return (error);
1783
1784 len = strlen(target);
1785 error = uiomove(target, len, uio);
1786
1787 return (0);
1788 }
1789
1790 /*
1791 * Iterate through a directory. An entire 8k data is read into the I/O buffer.
1792 * This buffer is parsed to make dir entries and fed to the user buffer to
1793 * complete it to the VFS.
1794 */
1795 static int
p9fs_readdir(struct vop_readdir_args * ap)1796 p9fs_readdir(struct vop_readdir_args *ap)
1797 {
1798 struct uio *uio;
1799 struct vnode *vp;
1800 struct dirent cde;
1801 int64_t offset;
1802 uint64_t diroffset;
1803 struct p9fs_node *np;
1804 int error;
1805 int32_t count;
1806 struct p9_client *clnt;
1807 struct p9_dirent dent;
1808 char *io_buffer;
1809 struct p9_fid *vofid;
1810
1811 uio = ap->a_uio;
1812 vp = ap->a_vp;
1813 np = P9FS_VTON(ap->a_vp);
1814 offset = 0;
1815 diroffset = 0;
1816 error = 0;
1817 count = 0;
1818 clnt = np->p9fs_ses->clnt;
1819
1820 P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid);
1821
1822 if (ap->a_uio->uio_iov->iov_len <= 0)
1823 return (EINVAL);
1824
1825 if (vp->v_type != VDIR)
1826 return (ENOTDIR);
1827
1828 vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error);
1829 if (vofid == NULL) {
1830 P9_DEBUG(ERROR, "%s: NULL FID\n", __func__);
1831 return (EBADF);
1832 }
1833
1834 if (ap->a_eofflag != NULL)
1835 *ap->a_eofflag = 0;
1836
1837 io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
1838
1839 /* We haven't reached the end yet. read more. */
1840 diroffset = uio->uio_offset;
1841 while (uio->uio_resid >= sizeof(struct dirent)) {
1842 /*
1843 * We need to read more data as what is indicated by filesize because
1844 * filesize is based on data stored in struct dirent structure but
1845 * we read data in struct p9_dirent format which has different size.
1846 * Hence we read max data(P9FS_IOUNIT) everytime from host, convert
1847 * it into struct dirent structure and send it back.
1848 */
1849 count = P9FS_IOUNIT;
1850 bzero(io_buffer, P9FS_MTU);
1851 count = p9_client_readdir(vofid, (char *)io_buffer,
1852 diroffset, count);
1853
1854 if (count == 0) {
1855 if (ap->a_eofflag != NULL)
1856 *ap->a_eofflag = 1;
1857 break;
1858 }
1859
1860 if (count < 0) {
1861 error = EIO;
1862 goto out;
1863 }
1864
1865 offset = 0;
1866 while (offset + QEMU_DIRENTRY_SZ <= count) {
1867
1868 /*
1869 * Read and make sense out of the buffer in one dirent
1870 * This is part of 9p protocol read. This reads one p9_dirent,
1871 * appends it to dirent(FREEBSD specifc) and continues to parse the buffer.
1872 */
1873 bzero(&dent, sizeof(dent));
1874 offset = p9_dirent_read(clnt, io_buffer, offset, count,
1875 &dent);
1876 if (offset < 0 || offset > count) {
1877 error = EIO;
1878 goto out;
1879 }
1880
1881 bzero(&cde, sizeof(cde));
1882 strncpy(cde.d_name, dent.d_name, dent.len);
1883 cde.d_fileno = dent.qid.path;
1884 cde.d_type = dent.d_type;
1885 cde.d_namlen = dent.len;
1886 cde.d_reclen = GENERIC_DIRSIZ(&cde);
1887
1888 /*
1889 * If there isn't enough space in the uio to return a
1890 * whole dirent, break off read
1891 */
1892 if (uio->uio_resid < GENERIC_DIRSIZ(&cde))
1893 break;
1894
1895 /* Transfer */
1896 error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio);
1897 if (error != 0) {
1898 error = EIO;
1899 goto out;
1900 }
1901 diroffset = dent.d_off;
1902 }
1903 }
1904 /* Pass on last transferred offset */
1905 uio->uio_offset = diroffset;
1906
1907 out:
1908 uma_zfree(p9fs_io_buffer_zone, io_buffer);
1909
1910 return (error);
1911 }
1912
1913 static void
p9fs_doio(struct vnode * vp,struct buf * bp,struct p9_fid * vofid,struct ucred * cr)1914 p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr)
1915 {
1916 struct uio *uiov;
1917 struct iovec io;
1918 int error;
1919 uint64_t off, offset;
1920 uint64_t filesize;
1921 uint64_t resid;
1922 uint32_t count;
1923 int64_t ret;
1924 struct p9fs_node *np;
1925 char *io_buffer;
1926
1927 error = 0;
1928 np = P9FS_VTON(vp);
1929
1930 filesize = np->inode.i_size;
1931 uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK);
1932 uiov->uio_iov = &io;
1933 uiov->uio_iovcnt = 1;
1934 uiov->uio_segflg = UIO_SYSSPACE;
1935 io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO);
1936
1937 if (bp->b_iocmd == BIO_READ) {
1938 io.iov_len = uiov->uio_resid = bp->b_bcount;
1939 io.iov_base = bp->b_data;
1940 uiov->uio_rw = UIO_READ;
1941
1942 switch (vp->v_type) {
1943
1944 case VREG:
1945 {
1946 uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1947
1948 if (uiov->uio_resid) {
1949 int left = uiov->uio_resid;
1950 int nread = bp->b_bcount - left;
1951
1952 if (left > 0)
1953 bzero((char *)bp->b_data + nread, left);
1954 }
1955 /* where in the file are we to start reading */
1956 offset = uiov->uio_offset;
1957 if (uiov->uio_offset >= filesize)
1958 goto out;
1959
1960 while ((resid = uiov->uio_resid) > 0) {
1961 if (offset >= filesize)
1962 break;
1963 count = min(filesize - uiov->uio_offset, resid);
1964 if (count == 0)
1965 break;
1966
1967 P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n",
1968 __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
1969
1970 /* Copy count bytes into the uio */
1971 ret = p9_client_read(vofid, offset, count, io_buffer);
1972 error = uiomove(io_buffer, ret, uiov);
1973
1974 if (error != 0)
1975 goto out;
1976 offset += ret;
1977 }
1978 break;
1979 }
1980 default:
1981 printf("vfs: type %x unexpected\n", vp->v_type);
1982 break;
1983 }
1984 } else {
1985 if (bp->b_dirtyend > bp->b_dirtyoff) {
1986 io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
1987 uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff;
1988 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1989 uiov->uio_rw = UIO_WRITE;
1990
1991 if (uiov->uio_offset < 0) {
1992 error = EINVAL;
1993 goto out;
1994 }
1995
1996 if (uiov->uio_resid == 0)
1997 goto out;
1998
1999 resid = uiov->uio_resid;
2000 offset = uiov->uio_offset;
2001 error = 0;
2002
2003 while ((resid = uiov->uio_resid) > 0) {
2004 off = 0;
2005 count = MIN(resid, P9FS_IOUNIT);
2006 error = uiomove(io_buffer, count, uiov);
2007 if (error != 0) {
2008 goto out;
2009 }
2010
2011 while (count > 0) {
2012 /* Copy count bytes from the uio */
2013 ret = p9_client_write(vofid, offset, count,
2014 io_buffer + off);
2015 if (ret < 0)
2016 goto out;
2017
2018 P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n",
2019 __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset);
2020 off += ret;
2021 offset += ret;
2022 count -= ret;
2023 }
2024 }
2025
2026 /* Update the fields in the node to reflect the change */
2027 if (filesize < uiov->uio_offset + uiov->uio_resid) {
2028 np->inode.i_size = uiov->uio_offset + uiov->uio_resid;
2029 vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid);
2030 /* update the modified timers. */
2031 p9fs_itimes(vp);
2032 }
2033 } else {
2034 bp->b_resid = 0;
2035 goto out1;
2036 }
2037 }
2038 out:
2039 /* Set the error */
2040 if (error != 0) {
2041 bp->b_error = error;
2042 bp->b_ioflags |= BIO_ERROR;
2043 }
2044 bp->b_resid = uiov->uio_resid;
2045 out1:
2046 bufdone(bp);
2047 uma_zfree(p9fs_io_buffer_zone, io_buffer);
2048 free(uiov, M_P9UIOV);
2049 }
2050
2051 /*
2052 * The I/O buffer is mapped to a uio and a client_write/client_read is performed
2053 * the same way as p9fs_read and p9fs_write.
2054 */
2055 static int
p9fs_strategy(struct vop_strategy_args * ap)2056 p9fs_strategy(struct vop_strategy_args *ap)
2057 {
2058 struct vnode *vp;
2059 struct buf *bp;
2060 struct ucred *cr;
2061 int error;
2062 struct open_fid_state ostate;
2063
2064 vp = ap->a_vp;
2065 bp = ap->a_bp;
2066 error = 0;
2067
2068 P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd);
2069
2070 if (bp->b_iocmd == BIO_READ)
2071 cr = bp->b_rcred;
2072 else
2073 cr = bp->b_wcred;
2074
2075 error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate);
2076 if (error) {
2077 P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error);
2078 bp->b_error = error;
2079 bp->b_ioflags |= BIO_ERROR;
2080 bufdone(bp);
2081 return (0);
2082 }
2083
2084 p9fs_doio(vp, bp, ostate.vofid, cr);
2085 p9fs_release_open_fid(vp, cr, &ostate);
2086
2087 return (0);
2088 }
2089
2090 /* Rename a file */
2091 static int
p9fs_rename(struct vop_rename_args * ap)2092 p9fs_rename(struct vop_rename_args *ap)
2093 {
2094 struct vnode *tvp;
2095 struct vnode *tdvp;
2096 struct vnode *fvp;
2097 struct vnode *fdvp;
2098 struct componentname *tcnp;
2099 struct componentname *fcnp;
2100 struct p9fs_node *tdnode;
2101 struct p9fs_node *fdnode;
2102 struct p9fs_inode *fdinode;
2103 struct p9fs_node *fnode;
2104 struct p9fs_inode *finode;
2105 struct p9fs_session *vses;
2106 struct p9fs_node *tnode;
2107 struct p9fs_inode *tinode;
2108 struct p9_fid *olddirvfid, *newdirvfid ;
2109 int error;
2110
2111 tvp = ap->a_tvp;
2112 tdvp = ap->a_tdvp;
2113 fvp = ap->a_fvp;
2114 fdvp = ap->a_fdvp;
2115 tcnp = ap->a_tcnp;
2116 fcnp = ap->a_fcnp;
2117 tdnode = P9FS_VTON(tdvp);
2118 fdnode = P9FS_VTON(fdvp);
2119 fdinode = &fdnode->inode;
2120 fnode = P9FS_VTON(fvp);
2121 finode = &fnode->inode;
2122 vses = fnode->p9fs_ses;
2123 error = 0;
2124
2125 P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp);
2126
2127 /* Check for cross mount operation */
2128 if (fvp->v_mount != tdvp->v_mount ||
2129 (tvp && (fvp->v_mount != tvp->v_mount))) {
2130 error = EXDEV;
2131 goto out;
2132 }
2133
2134 if (ap->a_flags != 0) {
2135 error = EOPNOTSUPP;
2136 goto out;
2137 }
2138
2139 /* warning if you are renaming to the same name */
2140 if (fvp == tvp)
2141 error = 0;
2142
2143 olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error);
2144 if (error != 0)
2145 goto out;
2146 newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error);
2147 if (error != 0)
2148 goto out;
2149
2150 error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr);
2151 if (error != 0)
2152 goto out;
2153
2154 /*
2155 * decrement the link count on the "from" file whose name is going
2156 * to be changed if its a directory
2157 */
2158 if (fvp->v_type == VDIR) {
2159 if (tvp && tvp->v_type == VDIR)
2160 cache_purge(tdvp);
2161 P9FS_DECR_LINKS(fdinode);
2162 cache_purge(fdvp);
2163 }
2164
2165 /* Taking exclusive lock on the from node before decrementing the link count */
2166 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
2167 goto out;
2168 P9FS_DECR_LINKS(finode);
2169 VOP_UNLOCK(fvp);
2170
2171 if (tvp) {
2172 tnode = P9FS_VTON(tvp);
2173 tinode = &tnode->inode;
2174 P9FS_DECR_LINKS(tinode);
2175 }
2176
2177 out:
2178 if (tdvp == tvp)
2179 vrele(tdvp);
2180 else
2181 vput(tdvp);
2182 if (tvp)
2183 vput(tvp);
2184 vrele(fdvp);
2185 vrele(fvp);
2186 return (error);
2187 }
2188
2189 /*
2190 * Put VM pages, synchronously.
2191 * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement
2192 */
2193 static int
p9fs_putpages(struct vop_putpages_args * ap)2194 p9fs_putpages(struct vop_putpages_args *ap)
2195 {
2196 struct uio uio;
2197 struct iovec iov;
2198 int i, error, npages, count;
2199 off_t offset;
2200 int *rtvals;
2201 struct vnode *vp;
2202 struct thread *td;
2203 struct ucred *cred;
2204 struct p9fs_node *np;
2205 vm_page_t *pages;
2206 void *kva;
2207 struct buf *bp;
2208
2209 vp = ap->a_vp;
2210 np = P9FS_VTON(vp);
2211 td = curthread;
2212 cred = curthread->td_ucred;
2213 pages = ap->a_m;
2214 count = ap->a_count;
2215 rtvals = ap->a_rtvals;
2216 npages = btoc(count);
2217 offset = IDX_TO_OFF(pages[0]->pindex);
2218
2219 /*
2220 * When putting pages, do not extend file past EOF.
2221 */
2222 if (offset + count > np->inode.i_size) {
2223 count = np->inode.i_size - offset;
2224 if (count < 0)
2225 count = 0;
2226 }
2227
2228 for (i = 0; i < npages; i++)
2229 rtvals[i] = VM_PAGER_ERROR;
2230
2231 bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK);
2232 kva = bp->b_data;
2233 pmap_qenter(kva, pages, npages);
2234
2235 VM_CNT_INC(v_vnodeout);
2236 VM_CNT_ADD(v_vnodepgsout, count);
2237
2238 iov.iov_base = kva;
2239 iov.iov_len = count;
2240 uio.uio_iov = &iov;
2241 uio.uio_iovcnt = 1;
2242 uio.uio_offset = offset;
2243 uio.uio_resid = count;
2244 uio.uio_segflg = UIO_SYSSPACE;
2245 uio.uio_rw = UIO_WRITE;
2246 uio.uio_td = td;
2247
2248 P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid);
2249
2250 error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync),
2251 cred);
2252
2253 pmap_qremove(kva, npages);
2254 uma_zfree(p9fs_pbuf_zone, bp);
2255
2256 if (error == 0)
2257 vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
2258 np->inode.i_size - offset, npages * PAGE_SIZE);
2259
2260 return (rtvals[0]);
2261 }
2262
2263 static int
p9fs_delayed_setsize(struct vop_delayed_setsize_args * ap)2264 p9fs_delayed_setsize(struct vop_delayed_setsize_args *ap)
2265 {
2266 struct vnode *vp;
2267 struct p9fs_node *np;
2268
2269 vp = ap->a_vp;
2270 np = P9FS_VTON(vp);
2271 vnode_pager_setsize(vp, np->inode.i_size);
2272 return (0);
2273 }
2274
2275 static unsigned int
p9fs_get_name_max(struct p9fs_node * np)2276 p9fs_get_name_max(struct p9fs_node *np)
2277 {
2278 struct p9fs_session *vses = np->p9fs_ses;
2279 struct p9_statfs statfs;
2280 struct p9_fid *vfid;
2281 unsigned int name_max;
2282 int error = 0;
2283
2284 name_max = atomic_load_int(&vses->name_max);
2285 if (name_max != 0)
2286 return (name_max);
2287
2288 P9_DEBUG(VOPS, "%s: querying _PC_NAME_MAX\n", __func__);
2289 vfid = p9fs_get_fid(vses->clnt, np, NULL, VFID, -1, &error);
2290 if (vfid != NULL) {
2291 error = p9_client_statfs(vfid, &statfs);
2292 if (error == 0) {
2293 /*
2294 * Note that this is not strictly correct if you have
2295 * nested mounts on the host (e.g. when using qemu with
2296 * multidevs=remap), but is a better estimate than just
2297 * returning 255.
2298 */
2299 name_max = statfs.namelen;
2300 }
2301 }
2302 P9_DEBUG(VOPS, "%s: max_name=%u error=%d\n", __func__, name_max, error);
2303 if (error != 0 || name_max == 0) {
2304 printf("p9fs: warning: failed to query name_max (error %d), "
2305 "using fallback %d\n", error, NAME_MAX);
2306 name_max = NAME_MAX; /* fallback and prevent retrying */
2307 }
2308 atomic_store_int(&vses->name_max, name_max);
2309 return (name_max);
2310 }
2311
2312 /*
2313 * Return POSIX pathconf information applicable to p9fs filesystems.
2314 */
2315 static int
p9fs_pathconf(struct vop_pathconf_args * ap)2316 p9fs_pathconf(struct vop_pathconf_args *ap)
2317 {
2318 int error = 0;
2319 struct vnode *vp = ap->a_vp;
2320 struct p9fs_node *np = P9FS_VTON(vp);
2321
2322 switch (ap->a_name) {
2323 case _PC_NAME_MAX:
2324 *ap->a_retval = p9fs_get_name_max(np);
2325 break;
2326 case _PC_SYMLINK_MAX:
2327 case _PC_PATH_MAX:
2328 /*
2329 * These are conservative estimates, the real value depends on
2330 * the host file system.
2331 */
2332 *ap->a_retval = MAXPATHLEN;
2333 break;
2334 default:
2335 error = vop_stdpathconf(ap);
2336 break;
2337 }
2338 return (error);
2339 }
2340
2341 struct vop_vector p9fs_vnops = {
2342 .vop_default = &default_vnodeops,
2343 .vop_lookup = p9fs_lookup,
2344 .vop_open = p9fs_open,
2345 .vop_close = p9fs_close,
2346 .vop_access = p9fs_access,
2347 .vop_delayed_setsize = p9fs_delayed_setsize,
2348 .vop_getattr = p9fs_getattr_dotl,
2349 .vop_setattr = p9fs_setattr_dotl,
2350 .vop_pathconf = p9fs_pathconf,
2351 .vop_reclaim = p9fs_reclaim,
2352 .vop_inactive = p9fs_inactive,
2353 .vop_readdir = p9fs_readdir,
2354 .vop_create = p9fs_create,
2355 .vop_mknod = p9fs_mknod,
2356 .vop_read = p9fs_read,
2357 .vop_write = p9fs_write,
2358 .vop_remove = p9fs_remove,
2359 .vop_mkdir = p9fs_mkdir,
2360 .vop_rmdir = p9fs_rmdir,
2361 .vop_strategy = p9fs_strategy,
2362 .vop_symlink = p9fs_symlink,
2363 .vop_rename = p9fs_rename,
2364 .vop_link = p9fs_link,
2365 .vop_readlink = p9fs_readlink,
2366 .vop_putpages = p9fs_putpages,
2367 };
2368 VFS_VOP_VECTOR_REGISTER(p9fs_vnops);
2369