1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All rights reserved.
31 */
32
33 /*
34 * Copyright 2018 Nexenta Systems, Inc.
35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 */
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/stat.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/kstat.h>
52 #include <sys/dirent.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/vtrace.h>
56 #include <sys/mode.h>
57 #include <sys/acl.h>
58 #include <sys/nbmlock.h>
59 #include <sys/policy.h>
60 #include <sys/sdt.h>
61
62 #include <rpc/types.h>
63 #include <rpc/auth.h>
64 #include <rpc/svc.h>
65
66 #include <nfs/nfs.h>
67 #include <nfs/export.h>
68 #include <nfs/nfs_cmd.h>
69
70 #include <vm/hat.h>
71 #include <vm/as.h>
72 #include <vm/seg.h>
73 #include <vm/seg_map.h>
74 #include <vm/seg_kmem.h>
75
76 #include <sys/strsubr.h>
77
78 struct rfs_async_write_list;
79
80 /*
81 * Zone globals of NFSv2 server
82 */
83 typedef struct nfs_srv {
84 kmutex_t async_write_lock;
85 struct rfs_async_write_list *async_write_head;
86
87 /*
88 * enables write clustering if == 1
89 */
90 int write_async;
91 } nfs_srv_t;
92
93 /*
94 * These are the interface routines for the server side of the
95 * Network File System. See the NFS version 2 protocol specification
96 * for a description of this interface.
97 */
98
99 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
101 cred_t *);
102
103
104 /*
105 * Some "over the wire" UNIX file types. These are encoded
106 * into the mode. This needs to be fixed in the next rev.
107 */
108 #define IFMT 0170000 /* type of file */
109 #define IFCHR 0020000 /* character special */
110 #define IFBLK 0060000 /* block special */
111 #define IFSOCK 0140000 /* socket */
112
113 u_longlong_t nfs2_srv_caller_id;
114
115 static nfs_srv_t *
nfs_get_srv(void)116 nfs_get_srv(void)
117 {
118 nfs_globals_t *ng = nfs_srv_getzg();
119 nfs_srv_t *srv = ng->nfs_srv;
120 ASSERT(srv != NULL);
121 return (srv);
122 }
123
124 /*
125 * Get file attributes.
126 * Returns the current attributes of the file with the given fhandle.
127 */
128 /* ARGSUSED */
129 void
rfs_getattr(fhandle_t * fhp,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
131 struct svc_req *req, cred_t *cr, bool_t ro)
132 {
133 int error;
134 vnode_t *vp;
135 struct vattr va;
136
137 vp = nfs_fhtovp(fhp, exi);
138 if (vp == NULL) {
139 ns->ns_status = NFSERR_STALE;
140 return;
141 }
142
143 /*
144 * Do the getattr.
145 */
146 va.va_mask = AT_ALL; /* we want all the attributes */
147
148 error = rfs4_delegated_getattr(vp, &va, 0, cr);
149
150 /* check for overflows */
151 if (!error) {
152 /* Lie about the object type for a referral */
153 if (vn_is_nfs_reparse(vp, cr))
154 va.va_type = VLNK;
155
156 acl_perm(vp, exi, &va, cr);
157 error = vattr_to_nattr(&va, &ns->ns_attr);
158 }
159
160 VN_RELE(vp);
161
162 ns->ns_status = puterrno(error);
163 }
164 void *
rfs_getattr_getfh(fhandle_t * fhp)165 rfs_getattr_getfh(fhandle_t *fhp)
166 {
167 return (fhp);
168 }
169
170 /*
171 * Set file attributes.
172 * Sets the attributes of the file with the given fhandle. Returns
173 * the new attributes.
174 */
175 /* ARGSUSED */
176 void
rfs_setattr(struct nfssaargs * args,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
179 {
180 int error;
181 int flag;
182 int in_crit = 0;
183 vnode_t *vp;
184 struct vattr va;
185 struct vattr bva;
186 struct flock64 bf;
187 caller_context_t ct;
188
189
190 vp = nfs_fhtovp(&args->saa_fh, exi);
191 if (vp == NULL) {
192 ns->ns_status = NFSERR_STALE;
193 return;
194 }
195
196 if (rdonly(ro, vp)) {
197 VN_RELE(vp);
198 ns->ns_status = NFSERR_ROFS;
199 return;
200 }
201
202 error = sattr_to_vattr(&args->saa_sa, &va);
203 if (error) {
204 VN_RELE(vp);
205 ns->ns_status = puterrno(error);
206 return;
207 }
208
209 /*
210 * If the client is requesting a change to the mtime,
211 * but the nanosecond field is set to 1 billion, then
212 * this is a flag to the server that it should set the
213 * atime and mtime fields to the server's current time.
214 * The 1 billion number actually came from the client
215 * as 1 million, but the units in the over the wire
216 * request are microseconds instead of nanoseconds.
217 *
218 * This is an overload of the protocol and should be
219 * documented in the NFS Version 2 protocol specification.
220 */
221 if (va.va_mask & AT_MTIME) {
222 if (va.va_mtime.tv_nsec == 1000000000) {
223 gethrestime(&va.va_mtime);
224 va.va_atime = va.va_mtime;
225 va.va_mask |= AT_ATIME;
226 flag = 0;
227 } else
228 flag = ATTR_UTIME;
229 } else
230 flag = 0;
231
232 /*
233 * If the filesystem is exported with nosuid, then mask off
234 * the setuid and setgid bits.
235 */
236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
237 (exi->exi_export.ex_flags & EX_NOSUID))
238 va.va_mode &= ~(VSUID | VSGID);
239
240 ct.cc_sysid = 0;
241 ct.cc_pid = 0;
242 ct.cc_caller_id = nfs2_srv_caller_id;
243 ct.cc_flags = CC_DONTBLOCK;
244
245 /*
246 * We need to specially handle size changes because it is
247 * possible for the client to create a file with modes
248 * which indicate read-only, but with the file opened for
249 * writing. If the client then tries to set the size of
250 * the file, then the normal access checking done in
251 * VOP_SETATTR would prevent the client from doing so,
252 * although it should be legal for it to do so. To get
253 * around this, we do the access checking for ourselves
254 * and then use VOP_SPACE which doesn't do the access
255 * checking which VOP_SETATTR does. VOP_SPACE can only
256 * operate on VREG files, let VOP_SETATTR handle the other
257 * extremely rare cases.
258 * Also the client should not be allowed to change the
259 * size of the file if there is a conflicting non-blocking
260 * mandatory lock in the region of change.
261 */
262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
263 if (nbl_need_check(vp)) {
264 nbl_start_crit(vp, RW_READER);
265 in_crit = 1;
266 }
267
268 bva.va_mask = AT_UID | AT_SIZE;
269
270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
271
272 if (error) {
273 if (in_crit)
274 nbl_end_crit(vp);
275 VN_RELE(vp);
276 ns->ns_status = puterrno(error);
277 return;
278 }
279
280 if (in_crit) {
281 u_offset_t offset;
282 ssize_t length;
283
284 if (va.va_size < bva.va_size) {
285 offset = va.va_size;
286 length = bva.va_size - va.va_size;
287 } else {
288 offset = bva.va_size;
289 length = va.va_size - bva.va_size;
290 }
291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
292 NULL)) {
293 error = EACCES;
294 }
295 }
296
297 if (crgetuid(cr) == bva.va_uid && !error &&
298 va.va_size != bva.va_size) {
299 va.va_mask &= ~AT_SIZE;
300 bf.l_type = F_WRLCK;
301 bf.l_whence = 0;
302 bf.l_start = (off64_t)va.va_size;
303 bf.l_len = 0;
304 bf.l_sysid = 0;
305 bf.l_pid = 0;
306
307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
308 (offset_t)va.va_size, cr, &ct);
309 }
310 if (in_crit)
311 nbl_end_crit(vp);
312 } else
313 error = 0;
314
315 /*
316 * Do the setattr.
317 */
318 if (!error && va.va_mask) {
319 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
320 }
321
322 /*
323 * check if the monitor on either vop_space or vop_setattr detected
324 * a delegation conflict and if so, mark the thread flag as
325 * wouldblock so that the response is dropped and the client will
326 * try again.
327 */
328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
329 VN_RELE(vp);
330 curthread->t_flag |= T_WOULDBLOCK;
331 return;
332 }
333
334 if (!error) {
335 va.va_mask = AT_ALL; /* get everything */
336
337 error = rfs4_delegated_getattr(vp, &va, 0, cr);
338
339 /* check for overflows */
340 if (!error) {
341 acl_perm(vp, exi, &va, cr);
342 error = vattr_to_nattr(&va, &ns->ns_attr);
343 }
344 }
345
346 ct.cc_flags = 0;
347
348 /*
349 * Force modified metadata out to stable storage.
350 */
351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
352
353 VN_RELE(vp);
354
355 ns->ns_status = puterrno(error);
356 }
357 void *
rfs_setattr_getfh(struct nfssaargs * args)358 rfs_setattr_getfh(struct nfssaargs *args)
359 {
360 return (&args->saa_fh);
361 }
362
363 /* Change and release @exip and @vpp only in success */
364 int
rfs_cross_mnt(vnode_t ** vpp,struct exportinfo ** exip)365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
366 {
367 struct exportinfo *exi;
368 vnode_t *vp = *vpp;
369 fid_t fid;
370 int error;
371
372 VN_HOLD(vp);
373
374 if ((error = traverse(&vp)) != 0) {
375 VN_RELE(vp);
376 return (error);
377 }
378
379 bzero(&fid, sizeof (fid));
380 fid.fid_len = MAXFIDSZ;
381 error = VOP_FID(vp, &fid, NULL);
382 if (error) {
383 VN_RELE(vp);
384 return (error);
385 }
386
387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
388 if (exi == NULL ||
389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
390 /*
391 * It is not error, just subdir is not exported
392 * or "nohide" is not set
393 */
394 if (exi != NULL)
395 exi_rele(exi);
396 VN_RELE(vp);
397 } else {
398 /* go to submount */
399 exi_rele(*exip);
400 *exip = exi;
401
402 VN_RELE(*vpp);
403 *vpp = vp;
404 }
405
406 return (0);
407 }
408
409 /*
410 * Given mounted "dvp" and "exi", go upper mountpoint
411 * with dvp/exi correction
412 * Return 0 in success
413 */
414 int
rfs_climb_crossmnt(vnode_t ** dvpp,struct exportinfo ** exip,cred_t * cr)415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
416 {
417 struct exportinfo *exi;
418 vnode_t *dvp = *dvpp;
419 vnode_t *zone_rootvp;
420
421 zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
422 ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
423
424 VN_HOLD(dvp);
425 dvp = untraverse(dvp, zone_rootvp);
426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
427 if (exi == NULL) {
428 VN_RELE(dvp);
429 return (-1);
430 }
431
432 ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
433 exi_rele(*exip);
434 *exip = exi;
435 VN_RELE(*dvpp);
436 *dvpp = dvp;
437
438 return (0);
439 }
440 /*
441 * Directory lookup.
442 * Returns an fhandle and file attributes for file name in a directory.
443 */
444 /* ARGSUSED */
445 void
rfs_lookup(struct nfsdiropargs * da,struct nfsdiropres * dr,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
448 {
449 int error;
450 vnode_t *dvp;
451 vnode_t *vp;
452 struct vattr va;
453 fhandle_t *fhp = da->da_fhandle;
454 struct sec_ol sec = {0, 0};
455 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
456 char *name;
457 struct sockaddr *ca;
458
459 /*
460 * Trusted Extension doesn't support NFSv2. MOUNT
461 * will reject v2 clients. Need to prevent v2 client
462 * access via WebNFS here.
463 */
464 if (is_system_labeled() && req->rq_vers == 2) {
465 dr->dr_status = NFSERR_ACCES;
466 return;
467 }
468
469 /*
470 * Disallow NULL paths
471 */
472 if (da->da_name == NULL || *da->da_name == '\0') {
473 dr->dr_status = NFSERR_ACCES;
474 return;
475 }
476
477 /*
478 * Allow lookups from the root - the default
479 * location of the public filehandle.
480 */
481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
482 dvp = ZONE_ROOTVP();
483 VN_HOLD(dvp);
484 } else {
485 dvp = nfs_fhtovp(fhp, exi);
486 if (dvp == NULL) {
487 dr->dr_status = NFSERR_STALE;
488 return;
489 }
490 }
491
492 exi_hold(exi);
493 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
494
495 /*
496 * Not allow lookup beyond root.
497 * If the filehandle matches a filehandle of the exi,
498 * then the ".." refers beyond the root of an exported filesystem.
499 */
500 if (strcmp(da->da_name, "..") == 0 &&
501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
502 if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
503 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
504 /*
505 * special case for ".." and 'nohide'exported root
506 */
507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
508 error = NFSERR_ACCES;
509 goto out;
510 }
511 } else {
512 error = NFSERR_NOENT;
513 goto out;
514 }
515 }
516
517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
519 MAXPATHLEN);
520
521 if (name == NULL) {
522 error = NFSERR_ACCES;
523 goto out;
524 }
525
526 /*
527 * If the public filehandle is used then allow
528 * a multi-component lookup, i.e. evaluate
529 * a pathname and follow symbolic links if
530 * necessary.
531 *
532 * This may result in a vnode in another filesystem
533 * which is OK as long as the filesystem is exported.
534 */
535 if (PUBLIC_FH2(fhp)) {
536 publicfh_flag = TRUE;
537
538 exi_rele(exi);
539 exi = NULL;
540
541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
542 &sec);
543 } else {
544 /*
545 * Do a normal single component lookup.
546 */
547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
548 NULL, NULL, NULL);
549 }
550
551 if (name != da->da_name)
552 kmem_free(name, MAXPATHLEN);
553
554 if (error == 0 && vn_ismntpt(vp)) {
555 error = rfs_cross_mnt(&vp, &exi);
556 if (error)
557 VN_RELE(vp);
558 }
559
560 if (!error) {
561 va.va_mask = AT_ALL; /* we want everything */
562
563 error = rfs4_delegated_getattr(vp, &va, 0, cr);
564
565 /* check for overflows */
566 if (!error) {
567 acl_perm(vp, exi, &va, cr);
568 error = vattr_to_nattr(&va, &dr->dr_attr);
569 if (!error) {
570 if (sec.sec_flags & SEC_QUERY)
571 error = makefh_ol(&dr->dr_fhandle, exi,
572 sec.sec_index);
573 else {
574 error = makefh(&dr->dr_fhandle, vp,
575 exi);
576 if (!error && publicfh_flag &&
577 !chk_clnt_sec(exi, req))
578 auth_weak = TRUE;
579 }
580 }
581 }
582 VN_RELE(vp);
583 }
584
585 out:
586 VN_RELE(dvp);
587
588 if (exi != NULL)
589 exi_rele(exi);
590
591 /*
592 * If it's public fh, no 0x81, and client's flavor is
593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
595 */
596 if (auth_weak)
597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
598 else
599 dr->dr_status = puterrno(error);
600 }
601 void *
rfs_lookup_getfh(struct nfsdiropargs * da)602 rfs_lookup_getfh(struct nfsdiropargs *da)
603 {
604 return (da->da_fhandle);
605 }
606
607 /*
608 * Read symbolic link.
609 * Returns the string in the symbolic link at the given fhandle.
610 */
611 /* ARGSUSED */
612 void
rfs_readlink(fhandle_t * fhp,struct nfsrdlnres * rl,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
614 struct svc_req *req, cred_t *cr, bool_t ro)
615 {
616 int error;
617 struct iovec iov;
618 struct uio uio;
619 vnode_t *vp;
620 struct vattr va;
621 struct sockaddr *ca;
622 char *name = NULL;
623 int is_referral = 0;
624
625 vp = nfs_fhtovp(fhp, exi);
626 if (vp == NULL) {
627 rl->rl_data = NULL;
628 rl->rl_status = NFSERR_STALE;
629 return;
630 }
631
632 va.va_mask = AT_MODE;
633
634 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
635
636 if (error) {
637 VN_RELE(vp);
638 rl->rl_data = NULL;
639 rl->rl_status = puterrno(error);
640 return;
641 }
642
643 if (MANDLOCK(vp, va.va_mode)) {
644 VN_RELE(vp);
645 rl->rl_data = NULL;
646 rl->rl_status = NFSERR_ACCES;
647 return;
648 }
649
650 /* We lied about the object type for a referral */
651 if (vn_is_nfs_reparse(vp, cr))
652 is_referral = 1;
653
654 /*
655 * XNFS and RFC1094 require us to return ENXIO if argument
656 * is not a link. BUGID 1138002.
657 */
658 if (vp->v_type != VLNK && !is_referral) {
659 VN_RELE(vp);
660 rl->rl_data = NULL;
661 rl->rl_status = NFSERR_NXIO;
662 return;
663 }
664
665 /*
666 * Allocate data for pathname. This will be freed by rfs_rlfree.
667 */
668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
669
670 if (is_referral) {
671 char *s;
672 size_t strsz;
673 kstat_named_t *stat =
674 exi->exi_ne->ne_globals->svstat[NFS_VERSION];
675
676 /* Get an artificial symlink based on a referral */
677 s = build_symlink(vp, cr, &strsz);
678 stat[NFS_REFERLINKS].value.ui64++;
679 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
680 vnode_t *, vp, char *, s);
681 if (s == NULL)
682 error = EINVAL;
683 else {
684 error = 0;
685 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
686 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
687 kmem_free(s, strsz);
688 }
689
690 } else {
691
692 /*
693 * Set up io vector to read sym link data
694 */
695 iov.iov_base = rl->rl_data;
696 iov.iov_len = NFS_MAXPATHLEN;
697 uio.uio_iov = &iov;
698 uio.uio_iovcnt = 1;
699 uio.uio_segflg = UIO_SYSSPACE;
700 uio.uio_extflg = UIO_COPY_CACHED;
701 uio.uio_loffset = (offset_t)0;
702 uio.uio_resid = NFS_MAXPATHLEN;
703
704 /*
705 * Do the readlink.
706 */
707 error = VOP_READLINK(vp, &uio, cr, NULL);
708
709 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
710
711 if (!error)
712 rl->rl_data[rl->rl_count] = '\0';
713
714 }
715
716
717 VN_RELE(vp);
718
719 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
720 name = nfscmd_convname(ca, exi, rl->rl_data,
721 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
722
723 if (name != NULL && name != rl->rl_data) {
724 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
725 rl->rl_data = name;
726 }
727
728 /*
729 * XNFS and RFC1094 require us to return ENXIO if argument
730 * is not a link. UFS returns EINVAL if this is the case,
731 * so we do the mapping here. BUGID 1138002.
732 */
733 if (error == EINVAL)
734 rl->rl_status = NFSERR_NXIO;
735 else
736 rl->rl_status = puterrno(error);
737
738 }
739 void *
rfs_readlink_getfh(fhandle_t * fhp)740 rfs_readlink_getfh(fhandle_t *fhp)
741 {
742 return (fhp);
743 }
744 /*
745 * Free data allocated by rfs_readlink
746 */
747 void
rfs_rlfree(struct nfsrdlnres * rl)748 rfs_rlfree(struct nfsrdlnres *rl)
749 {
750 if (rl->rl_data != NULL)
751 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
752 }
753
754 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
755
756 /*
757 * Read data.
758 * Returns some data read from the file at the given fhandle.
759 */
760 /* ARGSUSED */
761 void
rfs_read(struct nfsreadargs * ra,struct nfsrdresult * rr,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)762 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
763 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
764 {
765 vnode_t *vp;
766 int error;
767 struct vattr va;
768 struct iovec iov;
769 struct uio uio;
770 mblk_t *mp;
771 int alloc_err = 0;
772 int in_crit = 0;
773 caller_context_t ct;
774
775 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
776 if (vp == NULL) {
777 rr->rr_data = NULL;
778 rr->rr_status = NFSERR_STALE;
779 return;
780 }
781
782 if (vp->v_type != VREG) {
783 VN_RELE(vp);
784 rr->rr_data = NULL;
785 rr->rr_status = NFSERR_ISDIR;
786 return;
787 }
788
789 ct.cc_sysid = 0;
790 ct.cc_pid = 0;
791 ct.cc_caller_id = nfs2_srv_caller_id;
792 ct.cc_flags = CC_DONTBLOCK;
793
794 /*
795 * Enter the critical region before calling VOP_RWLOCK
796 * to avoid a deadlock with write requests.
797 */
798 if (nbl_need_check(vp)) {
799 nbl_start_crit(vp, RW_READER);
800 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
801 0, NULL)) {
802 nbl_end_crit(vp);
803 VN_RELE(vp);
804 rr->rr_data = NULL;
805 rr->rr_status = NFSERR_ACCES;
806 return;
807 }
808 in_crit = 1;
809 }
810
811 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
812
813 /* check if a monitor detected a delegation conflict */
814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
815 if (in_crit)
816 nbl_end_crit(vp);
817 VN_RELE(vp);
818 /* mark as wouldblock so response is dropped */
819 curthread->t_flag |= T_WOULDBLOCK;
820
821 rr->rr_data = NULL;
822 return;
823 }
824
825 va.va_mask = AT_ALL;
826
827 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
828
829 if (error) {
830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
831 if (in_crit)
832 nbl_end_crit(vp);
833
834 VN_RELE(vp);
835 rr->rr_data = NULL;
836 rr->rr_status = puterrno(error);
837
838 return;
839 }
840
841 /*
842 * This is a kludge to allow reading of files created
843 * with no read permission. The owner of the file
844 * is always allowed to read it.
845 */
846 if (crgetuid(cr) != va.va_uid) {
847 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
848
849 if (error) {
850 /*
851 * Exec is the same as read over the net because
852 * of demand loading.
853 */
854 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
855 }
856 if (error) {
857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
858 if (in_crit)
859 nbl_end_crit(vp);
860 VN_RELE(vp);
861 rr->rr_data = NULL;
862 rr->rr_status = puterrno(error);
863
864 return;
865 }
866 }
867
868 if (MANDLOCK(vp, va.va_mode)) {
869 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
870 if (in_crit)
871 nbl_end_crit(vp);
872
873 VN_RELE(vp);
874 rr->rr_data = NULL;
875 rr->rr_status = NFSERR_ACCES;
876
877 return;
878 }
879
880 rr->rr_ok.rrok_wlist_len = 0;
881 rr->rr_ok.rrok_wlist = NULL;
882
883 if ((u_offset_t)ra->ra_offset >= va.va_size) {
884 rr->rr_count = 0;
885 rr->rr_data = NULL;
886 /*
887 * In this case, status is NFS_OK, but there is no data
888 * to encode. So set rr_mp to NULL.
889 */
890 rr->rr_mp = NULL;
891 rr->rr_ok.rrok_wlist = ra->ra_wlist;
892 if (rr->rr_ok.rrok_wlist)
893 clist_zero_len(rr->rr_ok.rrok_wlist);
894 goto done;
895 }
896
897 if (ra->ra_wlist) {
898 mp = NULL;
899 rr->rr_mp = NULL;
900 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
901 if (ra->ra_count > iov.iov_len) {
902 rr->rr_data = NULL;
903 rr->rr_status = NFSERR_INVAL;
904 goto done;
905 }
906 } else {
907 /*
908 * mp will contain the data to be sent out in the read reply.
909 * This will be freed after the reply has been sent out (by the
910 * driver).
911 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
912 * that the call to xdrmblk_putmblk() never fails.
913 */
914 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
915 &alloc_err);
916 ASSERT(mp != NULL);
917 ASSERT(alloc_err == 0);
918
919 rr->rr_mp = mp;
920
921 /*
922 * Set up io vector
923 */
924 iov.iov_base = (caddr_t)mp->b_datap->db_base;
925 iov.iov_len = ra->ra_count;
926 }
927
928 uio.uio_iov = &iov;
929 uio.uio_iovcnt = 1;
930 uio.uio_segflg = UIO_SYSSPACE;
931 uio.uio_extflg = UIO_COPY_CACHED;
932 uio.uio_loffset = (offset_t)ra->ra_offset;
933 uio.uio_resid = ra->ra_count;
934
935 error = VOP_READ(vp, &uio, 0, cr, &ct);
936
937 if (error) {
938 if (mp)
939 freeb(mp);
940
941 /*
942 * check if a monitor detected a delegation conflict and
943 * mark as wouldblock so response is dropped
944 */
945 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
946 curthread->t_flag |= T_WOULDBLOCK;
947 else
948 rr->rr_status = puterrno(error);
949
950 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
951 if (in_crit)
952 nbl_end_crit(vp);
953
954 VN_RELE(vp);
955 rr->rr_data = NULL;
956
957 return;
958 }
959
960 /*
961 * Get attributes again so we can send the latest access
962 * time to the client side for its cache.
963 */
964 va.va_mask = AT_ALL;
965
966 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
967
968 if (error) {
969 if (mp)
970 freeb(mp);
971
972 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
973 if (in_crit)
974 nbl_end_crit(vp);
975
976 VN_RELE(vp);
977 rr->rr_data = NULL;
978 rr->rr_status = puterrno(error);
979
980 return;
981 }
982
983 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
984
985 if (mp) {
986 rr->rr_data = (char *)mp->b_datap->db_base;
987 } else {
988 if (ra->ra_wlist) {
989 rr->rr_data = (caddr_t)iov.iov_base;
990 if (!rdma_setup_read_data2(ra, rr)) {
991 rr->rr_data = NULL;
992 rr->rr_status = puterrno(NFSERR_INVAL);
993 }
994 }
995 }
996 done:
997 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
998 if (in_crit)
999 nbl_end_crit(vp);
1000
1001 acl_perm(vp, exi, &va, cr);
1002
1003 /* check for overflows */
1004 error = vattr_to_nattr(&va, &rr->rr_attr);
1005
1006 VN_RELE(vp);
1007
1008 rr->rr_status = puterrno(error);
1009 }
1010
1011 /*
1012 * Free data allocated by rfs_read
1013 */
1014 void
rfs_rdfree(struct nfsrdresult * rr)1015 rfs_rdfree(struct nfsrdresult *rr)
1016 {
1017 mblk_t *mp;
1018
1019 if (rr->rr_status == NFS_OK) {
1020 mp = rr->rr_mp;
1021 if (mp != NULL)
1022 freeb(mp);
1023 }
1024 }
1025
1026 void *
rfs_read_getfh(struct nfsreadargs * ra)1027 rfs_read_getfh(struct nfsreadargs *ra)
1028 {
1029 return (&ra->ra_fhandle);
1030 }
1031
1032 #define MAX_IOVECS 12
1033
1034 #ifdef DEBUG
1035 static int rfs_write_sync_hits = 0;
1036 static int rfs_write_sync_misses = 0;
1037 #endif
1038
1039 /*
1040 * Write data to file.
1041 * Returns attributes of a file after writing some data to it.
1042 *
1043 * Any changes made here, especially in error handling might have
1044 * to also be done in rfs_write (which clusters write requests).
1045 */
1046 /* ARGSUSED */
1047 void
rfs_write_sync(struct nfswriteargs * wa,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1048 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
1049 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1050 {
1051 int error;
1052 vnode_t *vp;
1053 rlim64_t rlimit;
1054 struct vattr va;
1055 struct uio uio;
1056 struct iovec iov[MAX_IOVECS];
1057 mblk_t *m;
1058 struct iovec *iovp;
1059 int iovcnt;
1060 cred_t *savecred;
1061 int in_crit = 0;
1062 caller_context_t ct;
1063
1064 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1065 if (vp == NULL) {
1066 ns->ns_status = NFSERR_STALE;
1067 return;
1068 }
1069
1070 if (rdonly(ro, vp)) {
1071 VN_RELE(vp);
1072 ns->ns_status = NFSERR_ROFS;
1073 return;
1074 }
1075
1076 if (vp->v_type != VREG) {
1077 VN_RELE(vp);
1078 ns->ns_status = NFSERR_ISDIR;
1079 return;
1080 }
1081
1082 ct.cc_sysid = 0;
1083 ct.cc_pid = 0;
1084 ct.cc_caller_id = nfs2_srv_caller_id;
1085 ct.cc_flags = CC_DONTBLOCK;
1086
1087 va.va_mask = AT_UID|AT_MODE;
1088
1089 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1090
1091 if (error) {
1092 VN_RELE(vp);
1093 ns->ns_status = puterrno(error);
1094
1095 return;
1096 }
1097
1098 if (crgetuid(cr) != va.va_uid) {
1099 /*
1100 * This is a kludge to allow writes of files created
1101 * with read only permission. The owner of the file
1102 * is always allowed to write it.
1103 */
1104 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
1105
1106 if (error) {
1107 VN_RELE(vp);
1108 ns->ns_status = puterrno(error);
1109 return;
1110 }
1111 }
1112
1113 /*
1114 * Can't access a mandatory lock file. This might cause
1115 * the NFS service thread to block forever waiting for a
1116 * lock to be released that will never be released.
1117 */
1118 if (MANDLOCK(vp, va.va_mode)) {
1119 VN_RELE(vp);
1120 ns->ns_status = NFSERR_ACCES;
1121 return;
1122 }
1123
1124 /*
1125 * We have to enter the critical region before calling VOP_RWLOCK
1126 * to avoid a deadlock with ufs.
1127 */
1128 if (nbl_need_check(vp)) {
1129 nbl_start_crit(vp, RW_READER);
1130 in_crit = 1;
1131 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1132 wa->wa_count, 0, NULL)) {
1133 error = EACCES;
1134 goto out;
1135 }
1136 }
1137
1138 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1139
1140 /* check if a monitor detected a delegation conflict */
1141 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1142 goto out;
1143 }
1144
1145 if (wa->wa_data || wa->wa_rlist) {
1146 /* Do the RDMA thing if necessary */
1147 if (wa->wa_rlist) {
1148 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1149 iov[0].iov_len = wa->wa_count;
1150 } else {
1151 iov[0].iov_base = wa->wa_data;
1152 iov[0].iov_len = wa->wa_count;
1153 }
1154 uio.uio_iov = iov;
1155 uio.uio_iovcnt = 1;
1156 uio.uio_segflg = UIO_SYSSPACE;
1157 uio.uio_extflg = UIO_COPY_DEFAULT;
1158 uio.uio_loffset = (offset_t)wa->wa_offset;
1159 uio.uio_resid = wa->wa_count;
1160 /*
1161 * The limit is checked on the client. We
1162 * should allow any size writes here.
1163 */
1164 uio.uio_llimit = curproc->p_fsz_ctl;
1165 rlimit = uio.uio_llimit - wa->wa_offset;
1166 if (rlimit < (rlim64_t)uio.uio_resid)
1167 uio.uio_resid = (uint_t)rlimit;
1168
1169 /*
1170 * for now we assume no append mode
1171 */
1172 /*
1173 * We're changing creds because VM may fault and we need
1174 * the cred of the current thread to be used if quota
1175 * checking is enabled.
1176 */
1177 savecred = curthread->t_cred;
1178 curthread->t_cred = cr;
1179 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1180 curthread->t_cred = savecred;
1181 } else {
1182
1183 iovcnt = 0;
1184 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1185 iovcnt++;
1186 if (iovcnt <= MAX_IOVECS) {
1187 #ifdef DEBUG
1188 rfs_write_sync_hits++;
1189 #endif
1190 iovp = iov;
1191 } else {
1192 #ifdef DEBUG
1193 rfs_write_sync_misses++;
1194 #endif
1195 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1196 }
1197 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1198 uio.uio_iov = iovp;
1199 uio.uio_iovcnt = iovcnt;
1200 uio.uio_segflg = UIO_SYSSPACE;
1201 uio.uio_extflg = UIO_COPY_DEFAULT;
1202 uio.uio_loffset = (offset_t)wa->wa_offset;
1203 uio.uio_resid = wa->wa_count;
1204 /*
1205 * The limit is checked on the client. We
1206 * should allow any size writes here.
1207 */
1208 uio.uio_llimit = curproc->p_fsz_ctl;
1209 rlimit = uio.uio_llimit - wa->wa_offset;
1210 if (rlimit < (rlim64_t)uio.uio_resid)
1211 uio.uio_resid = (uint_t)rlimit;
1212
1213 /*
1214 * For now we assume no append mode.
1215 */
1216 /*
1217 * We're changing creds because VM may fault and we need
1218 * the cred of the current thread to be used if quota
1219 * checking is enabled.
1220 */
1221 savecred = curthread->t_cred;
1222 curthread->t_cred = cr;
1223 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1224 curthread->t_cred = savecred;
1225
1226 if (iovp != iov)
1227 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1228 }
1229
1230 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1231
1232 if (!error) {
1233 /*
1234 * Get attributes again so we send the latest mod
1235 * time to the client side for its cache.
1236 */
1237 va.va_mask = AT_ALL; /* now we want everything */
1238
1239 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1240
1241 /* check for overflows */
1242 if (!error) {
1243 acl_perm(vp, exi, &va, cr);
1244 error = vattr_to_nattr(&va, &ns->ns_attr);
1245 }
1246 }
1247
1248 out:
1249 if (in_crit)
1250 nbl_end_crit(vp);
1251 VN_RELE(vp);
1252
1253 /* check if a monitor detected a delegation conflict */
1254 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1255 /* mark as wouldblock so response is dropped */
1256 curthread->t_flag |= T_WOULDBLOCK;
1257 else
1258 ns->ns_status = puterrno(error);
1259
1260 }
1261
1262 struct rfs_async_write {
1263 struct nfswriteargs *wa;
1264 struct nfsattrstat *ns;
1265 struct svc_req *req;
1266 cred_t *cr;
1267 bool_t ro;
1268 kthread_t *thread;
1269 struct rfs_async_write *list;
1270 };
1271
1272 struct rfs_async_write_list {
1273 fhandle_t *fhp;
1274 kcondvar_t cv;
1275 struct rfs_async_write *list;
1276 struct rfs_async_write_list *next;
1277 };
1278
1279 #define MAXCLIOVECS 42
1280 #define RFSWRITE_INITVAL (enum nfsstat) -1
1281
1282 #ifdef DEBUG
1283 static int rfs_write_hits = 0;
1284 static int rfs_write_misses = 0;
1285 #endif
1286
1287 /*
1288 * Write data to file.
1289 * Returns attributes of a file after writing some data to it.
1290 */
1291 void
rfs_write(struct nfswriteargs * wa,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1292 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1293 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1294 {
1295 int error;
1296 vnode_t *vp;
1297 rlim64_t rlimit;
1298 struct vattr va;
1299 struct uio uio;
1300 struct rfs_async_write_list *lp;
1301 struct rfs_async_write_list *nlp;
1302 struct rfs_async_write *rp;
1303 struct rfs_async_write *nrp;
1304 struct rfs_async_write *trp;
1305 struct rfs_async_write *lrp;
1306 int data_written;
1307 int iovcnt;
1308 mblk_t *m;
1309 struct iovec *iovp;
1310 struct iovec *niovp;
1311 struct iovec iov[MAXCLIOVECS];
1312 int count;
1313 int rcount;
1314 uint_t off;
1315 uint_t len;
1316 struct rfs_async_write nrpsp;
1317 struct rfs_async_write_list nlpsp;
1318 ushort_t t_flag;
1319 cred_t *savecred;
1320 int in_crit = 0;
1321 caller_context_t ct;
1322 nfs_srv_t *nsrv;
1323
1324 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
1325 nsrv = nfs_get_srv();
1326 if (!nsrv->write_async) {
1327 rfs_write_sync(wa, ns, exi, req, cr, ro);
1328 return;
1329 }
1330
1331 /*
1332 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1333 * is considered an OK.
1334 */
1335 ns->ns_status = RFSWRITE_INITVAL;
1336
1337 nrp = &nrpsp;
1338 nrp->wa = wa;
1339 nrp->ns = ns;
1340 nrp->req = req;
1341 nrp->cr = cr;
1342 nrp->ro = ro;
1343 nrp->thread = curthread;
1344
1345 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1346
1347 /*
1348 * Look to see if there is already a cluster started
1349 * for this file.
1350 */
1351 mutex_enter(&nsrv->async_write_lock);
1352 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
1353 if (bcmp(&wa->wa_fhandle, lp->fhp,
1354 sizeof (fhandle_t)) == 0)
1355 break;
1356 }
1357
1358 /*
1359 * If lp is non-NULL, then there is already a cluster
1360 * started. We need to place ourselves in the cluster
1361 * list in the right place as determined by starting
1362 * offset. Conflicts with non-blocking mandatory locked
1363 * regions will be checked when the cluster is processed.
1364 */
1365 if (lp != NULL) {
1366 rp = lp->list;
1367 trp = NULL;
1368 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1369 trp = rp;
1370 rp = rp->list;
1371 }
1372 nrp->list = rp;
1373 if (trp == NULL)
1374 lp->list = nrp;
1375 else
1376 trp->list = nrp;
1377 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1378 cv_wait(&lp->cv, &nsrv->async_write_lock);
1379 mutex_exit(&nsrv->async_write_lock);
1380
1381 return;
1382 }
1383
1384 /*
1385 * No cluster started yet, start one and add ourselves
1386 * to the list of clusters.
1387 */
1388 nrp->list = NULL;
1389
1390 nlp = &nlpsp;
1391 nlp->fhp = &wa->wa_fhandle;
1392 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1393 nlp->list = nrp;
1394 nlp->next = NULL;
1395
1396 if (nsrv->async_write_head == NULL) {
1397 nsrv->async_write_head = nlp;
1398 } else {
1399 lp = nsrv->async_write_head;
1400 while (lp->next != NULL)
1401 lp = lp->next;
1402 lp->next = nlp;
1403 }
1404 mutex_exit(&nsrv->async_write_lock);
1405
1406 /*
1407 * Convert the file handle common to all of the requests
1408 * in this cluster to a vnode.
1409 */
1410 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1411 if (vp == NULL) {
1412 mutex_enter(&nsrv->async_write_lock);
1413 if (nsrv->async_write_head == nlp)
1414 nsrv->async_write_head = nlp->next;
1415 else {
1416 lp = nsrv->async_write_head;
1417 while (lp->next != nlp)
1418 lp = lp->next;
1419 lp->next = nlp->next;
1420 }
1421 t_flag = curthread->t_flag & T_WOULDBLOCK;
1422 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1423 rp->ns->ns_status = NFSERR_STALE;
1424 rp->thread->t_flag |= t_flag;
1425 }
1426 cv_broadcast(&nlp->cv);
1427 mutex_exit(&nsrv->async_write_lock);
1428
1429 return;
1430 }
1431
1432 /*
1433 * Can only write regular files. Attempts to write any
1434 * other file types fail with EISDIR.
1435 */
1436 if (vp->v_type != VREG) {
1437 VN_RELE(vp);
1438 mutex_enter(&nsrv->async_write_lock);
1439 if (nsrv->async_write_head == nlp)
1440 nsrv->async_write_head = nlp->next;
1441 else {
1442 lp = nsrv->async_write_head;
1443 while (lp->next != nlp)
1444 lp = lp->next;
1445 lp->next = nlp->next;
1446 }
1447 t_flag = curthread->t_flag & T_WOULDBLOCK;
1448 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1449 rp->ns->ns_status = NFSERR_ISDIR;
1450 rp->thread->t_flag |= t_flag;
1451 }
1452 cv_broadcast(&nlp->cv);
1453 mutex_exit(&nsrv->async_write_lock);
1454
1455 return;
1456 }
1457
1458 /*
1459 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1460 * deadlock with ufs.
1461 */
1462 if (nbl_need_check(vp)) {
1463 nbl_start_crit(vp, RW_READER);
1464 in_crit = 1;
1465 }
1466
1467 ct.cc_sysid = 0;
1468 ct.cc_pid = 0;
1469 ct.cc_caller_id = nfs2_srv_caller_id;
1470 ct.cc_flags = CC_DONTBLOCK;
1471
1472 /*
1473 * Lock the file for writing. This operation provides
1474 * the delay which allows clusters to grow.
1475 */
1476 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1477
1478 /* check if a monitor detected a delegation conflict */
1479 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1480 if (in_crit)
1481 nbl_end_crit(vp);
1482 VN_RELE(vp);
1483 /* mark as wouldblock so response is dropped */
1484 curthread->t_flag |= T_WOULDBLOCK;
1485 mutex_enter(&nsrv->async_write_lock);
1486 if (nsrv->async_write_head == nlp)
1487 nsrv->async_write_head = nlp->next;
1488 else {
1489 lp = nsrv->async_write_head;
1490 while (lp->next != nlp)
1491 lp = lp->next;
1492 lp->next = nlp->next;
1493 }
1494 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1495 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1496 rp->ns->ns_status = puterrno(error);
1497 rp->thread->t_flag |= T_WOULDBLOCK;
1498 }
1499 }
1500 cv_broadcast(&nlp->cv);
1501 mutex_exit(&nsrv->async_write_lock);
1502
1503 return;
1504 }
1505
1506 /*
1507 * Disconnect this cluster from the list of clusters.
1508 * The cluster that is being dealt with must be fixed
1509 * in size after this point, so there is no reason
1510 * to leave it on the list so that new requests can
1511 * find it.
1512 *
1513 * The algorithm is that the first write request will
1514 * create a cluster, convert the file handle to a
1515 * vnode pointer, and then lock the file for writing.
1516 * This request is not likely to be clustered with
1517 * any others. However, the next request will create
1518 * a new cluster and be blocked in VOP_RWLOCK while
1519 * the first request is being processed. This delay
1520 * will allow more requests to be clustered in this
1521 * second cluster.
1522 */
1523 mutex_enter(&nsrv->async_write_lock);
1524 if (nsrv->async_write_head == nlp)
1525 nsrv->async_write_head = nlp->next;
1526 else {
1527 lp = nsrv->async_write_head;
1528 while (lp->next != nlp)
1529 lp = lp->next;
1530 lp->next = nlp->next;
1531 }
1532 mutex_exit(&nsrv->async_write_lock);
1533
1534 /*
1535 * Step through the list of requests in this cluster.
1536 * We need to check permissions to make sure that all
1537 * of the requests have sufficient permission to write
1538 * the file. A cluster can be composed of requests
1539 * from different clients and different users on each
1540 * client.
1541 *
1542 * As a side effect, we also calculate the size of the
1543 * byte range that this cluster encompasses.
1544 */
1545 rp = nlp->list;
1546 off = rp->wa->wa_offset;
1547 len = (uint_t)0;
1548 do {
1549 if (rdonly(rp->ro, vp)) {
1550 rp->ns->ns_status = NFSERR_ROFS;
1551 t_flag = curthread->t_flag & T_WOULDBLOCK;
1552 rp->thread->t_flag |= t_flag;
1553 continue;
1554 }
1555
1556 va.va_mask = AT_UID|AT_MODE;
1557
1558 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1559
1560 if (!error) {
1561 if (crgetuid(rp->cr) != va.va_uid) {
1562 /*
1563 * This is a kludge to allow writes of files
1564 * created with read only permission. The
1565 * owner of the file is always allowed to
1566 * write it.
1567 */
1568 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1569 }
1570 if (!error && MANDLOCK(vp, va.va_mode))
1571 error = EACCES;
1572 }
1573
1574 /*
1575 * Check for a conflict with a nbmand-locked region.
1576 */
1577 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1578 rp->wa->wa_count, 0, NULL)) {
1579 error = EACCES;
1580 }
1581
1582 if (error) {
1583 rp->ns->ns_status = puterrno(error);
1584 t_flag = curthread->t_flag & T_WOULDBLOCK;
1585 rp->thread->t_flag |= t_flag;
1586 continue;
1587 }
1588 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1589 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1590 } while ((rp = rp->list) != NULL);
1591
1592 /*
1593 * Step through the cluster attempting to gather as many
1594 * requests which are contiguous as possible. These
1595 * contiguous requests are handled via one call to VOP_WRITE
1596 * instead of different calls to VOP_WRITE. We also keep
1597 * track of the fact that any data was written.
1598 */
1599 rp = nlp->list;
1600 data_written = 0;
1601 do {
1602 /*
1603 * Skip any requests which are already marked as having an
1604 * error.
1605 */
1606 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1607 rp = rp->list;
1608 continue;
1609 }
1610
1611 /*
1612 * Count the number of iovec's which are required
1613 * to handle this set of requests. One iovec is
1614 * needed for each data buffer, whether addressed
1615 * by wa_data or by the b_rptr pointers in the
1616 * mblk chains.
1617 */
1618 iovcnt = 0;
1619 lrp = rp;
1620 for (;;) {
1621 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1622 iovcnt++;
1623 else {
1624 m = lrp->wa->wa_mblk;
1625 while (m != NULL) {
1626 iovcnt++;
1627 m = m->b_cont;
1628 }
1629 }
1630 if (lrp->list == NULL ||
1631 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1632 lrp->wa->wa_offset + lrp->wa->wa_count !=
1633 lrp->list->wa->wa_offset) {
1634 lrp = lrp->list;
1635 break;
1636 }
1637 lrp = lrp->list;
1638 }
1639
1640 if (iovcnt <= MAXCLIOVECS) {
1641 #ifdef DEBUG
1642 rfs_write_hits++;
1643 #endif
1644 niovp = iov;
1645 } else {
1646 #ifdef DEBUG
1647 rfs_write_misses++;
1648 #endif
1649 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1650 }
1651 /*
1652 * Put together the scatter/gather iovecs.
1653 */
1654 iovp = niovp;
1655 trp = rp;
1656 count = 0;
1657 do {
1658 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1659 if (trp->wa->wa_rlist) {
1660 iovp->iov_base =
1661 (char *)((trp->wa->wa_rlist)->
1662 u.c_daddr3);
1663 iovp->iov_len = trp->wa->wa_count;
1664 } else {
1665 iovp->iov_base = trp->wa->wa_data;
1666 iovp->iov_len = trp->wa->wa_count;
1667 }
1668 iovp++;
1669 } else {
1670 m = trp->wa->wa_mblk;
1671 rcount = trp->wa->wa_count;
1672 while (m != NULL) {
1673 iovp->iov_base = (caddr_t)m->b_rptr;
1674 iovp->iov_len = (m->b_wptr - m->b_rptr);
1675 rcount -= iovp->iov_len;
1676 if (rcount < 0)
1677 iovp->iov_len += rcount;
1678 iovp++;
1679 if (rcount <= 0)
1680 break;
1681 m = m->b_cont;
1682 }
1683 }
1684 count += trp->wa->wa_count;
1685 trp = trp->list;
1686 } while (trp != lrp);
1687
1688 uio.uio_iov = niovp;
1689 uio.uio_iovcnt = iovcnt;
1690 uio.uio_segflg = UIO_SYSSPACE;
1691 uio.uio_extflg = UIO_COPY_DEFAULT;
1692 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1693 uio.uio_resid = count;
1694 /*
1695 * The limit is checked on the client. We
1696 * should allow any size writes here.
1697 */
1698 uio.uio_llimit = curproc->p_fsz_ctl;
1699 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1700 if (rlimit < (rlim64_t)uio.uio_resid)
1701 uio.uio_resid = (uint_t)rlimit;
1702
1703 /*
1704 * For now we assume no append mode.
1705 */
1706
1707 /*
1708 * We're changing creds because VM may fault
1709 * and we need the cred of the current
1710 * thread to be used if quota * checking is
1711 * enabled.
1712 */
1713 savecred = curthread->t_cred;
1714 curthread->t_cred = cr;
1715 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1716 curthread->t_cred = savecred;
1717
1718 /* check if a monitor detected a delegation conflict */
1719 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1720 /* mark as wouldblock so response is dropped */
1721 curthread->t_flag |= T_WOULDBLOCK;
1722
1723 if (niovp != iov)
1724 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1725
1726 if (!error) {
1727 data_written = 1;
1728 /*
1729 * Get attributes again so we send the latest mod
1730 * time to the client side for its cache.
1731 */
1732 va.va_mask = AT_ALL; /* now we want everything */
1733
1734 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1735
1736 if (!error)
1737 acl_perm(vp, exi, &va, rp->cr);
1738 }
1739
1740 /*
1741 * Fill in the status responses for each request
1742 * which was just handled. Also, copy the latest
1743 * attributes in to the attribute responses if
1744 * appropriate.
1745 */
1746 t_flag = curthread->t_flag & T_WOULDBLOCK;
1747 do {
1748 rp->thread->t_flag |= t_flag;
1749 /* check for overflows */
1750 if (!error) {
1751 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1752 }
1753 rp->ns->ns_status = puterrno(error);
1754 rp = rp->list;
1755 } while (rp != lrp);
1756 } while (rp != NULL);
1757
1758 /*
1759 * If any data was written at all, then we need to flush
1760 * the data and metadata to stable storage.
1761 */
1762 if (data_written) {
1763 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1764
1765 if (!error) {
1766 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1767 }
1768 }
1769
1770 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1771
1772 if (in_crit)
1773 nbl_end_crit(vp);
1774 VN_RELE(vp);
1775
1776 t_flag = curthread->t_flag & T_WOULDBLOCK;
1777 mutex_enter(&nsrv->async_write_lock);
1778 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1779 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1780 rp->ns->ns_status = puterrno(error);
1781 rp->thread->t_flag |= t_flag;
1782 }
1783 }
1784 cv_broadcast(&nlp->cv);
1785 mutex_exit(&nsrv->async_write_lock);
1786
1787 }
1788
1789 void *
rfs_write_getfh(struct nfswriteargs * wa)1790 rfs_write_getfh(struct nfswriteargs *wa)
1791 {
1792 return (&wa->wa_fhandle);
1793 }
1794
1795 /*
1796 * Create a file.
1797 * Creates a file with given attributes and returns those attributes
1798 * and an fhandle for the new file.
1799 */
1800 void
rfs_create(struct nfscreatargs * args,struct nfsdiropres * dr,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1801 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1802 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1803 {
1804 int error;
1805 int lookuperr;
1806 int in_crit = 0;
1807 struct vattr va;
1808 vnode_t *vp;
1809 vnode_t *realvp;
1810 vnode_t *dvp;
1811 char *name = args->ca_da.da_name;
1812 vnode_t *tvp = NULL;
1813 int mode;
1814 int lookup_ok;
1815 bool_t trunc;
1816 struct sockaddr *ca;
1817
1818 /*
1819 * Disallow NULL paths
1820 */
1821 if (name == NULL || *name == '\0') {
1822 dr->dr_status = NFSERR_ACCES;
1823 return;
1824 }
1825
1826 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1827 if (dvp == NULL) {
1828 dr->dr_status = NFSERR_STALE;
1829 return;
1830 }
1831
1832 error = sattr_to_vattr(args->ca_sa, &va);
1833 if (error) {
1834 dr->dr_status = puterrno(error);
1835 return;
1836 }
1837
1838 /*
1839 * Must specify the mode.
1840 */
1841 if (!(va.va_mask & AT_MODE)) {
1842 VN_RELE(dvp);
1843 dr->dr_status = NFSERR_INVAL;
1844 return;
1845 }
1846
1847 /*
1848 * This is a completely gross hack to make mknod
1849 * work over the wire until we can wack the protocol
1850 */
1851 if ((va.va_mode & IFMT) == IFCHR) {
1852 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1853 va.va_type = VFIFO; /* xtra kludge for named pipe */
1854 else {
1855 va.va_type = VCHR;
1856 /*
1857 * uncompress the received dev_t
1858 * if the top half is zero indicating a request
1859 * from an `older style' OS.
1860 */
1861 if ((va.va_size & 0xffff0000) == 0)
1862 va.va_rdev = nfsv2_expdev(va.va_size);
1863 else
1864 va.va_rdev = (dev_t)va.va_size;
1865 }
1866 va.va_mask &= ~AT_SIZE;
1867 } else if ((va.va_mode & IFMT) == IFBLK) {
1868 va.va_type = VBLK;
1869 /*
1870 * uncompress the received dev_t
1871 * if the top half is zero indicating a request
1872 * from an `older style' OS.
1873 */
1874 if ((va.va_size & 0xffff0000) == 0)
1875 va.va_rdev = nfsv2_expdev(va.va_size);
1876 else
1877 va.va_rdev = (dev_t)va.va_size;
1878 va.va_mask &= ~AT_SIZE;
1879 } else if ((va.va_mode & IFMT) == IFSOCK) {
1880 va.va_type = VSOCK;
1881 } else {
1882 va.va_type = VREG;
1883 }
1884 va.va_mode &= ~IFMT;
1885 va.va_mask |= AT_TYPE;
1886
1887 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1888 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1889 MAXPATHLEN);
1890 if (name == NULL) {
1891 dr->dr_status = puterrno(EINVAL);
1892 return;
1893 }
1894
1895 /*
1896 * Why was the choice made to use VWRITE as the mode to the
1897 * call to VOP_CREATE ? This results in a bug. When a client
1898 * opens a file that already exists and is RDONLY, the second
1899 * open fails with an EACESS because of the mode.
1900 * bug ID 1054648.
1901 */
1902 lookup_ok = 0;
1903 mode = VWRITE;
1904 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1905 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1906 NULL, NULL, NULL);
1907 if (!error) {
1908 struct vattr at;
1909
1910 lookup_ok = 1;
1911 at.va_mask = AT_MODE;
1912 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1913 if (!error)
1914 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1915 VN_RELE(tvp);
1916 tvp = NULL;
1917 }
1918 }
1919
1920 if (!lookup_ok) {
1921 if (rdonly(ro, dvp)) {
1922 error = EROFS;
1923 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1924 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1925 error = EPERM;
1926 } else {
1927 error = 0;
1928 }
1929 }
1930
1931 /*
1932 * If file size is being modified on an already existing file
1933 * make sure that there are no conflicting non-blocking mandatory
1934 * locks in the region being manipulated. Return EACCES if there
1935 * are conflicting locks.
1936 */
1937 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1938 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1939 NULL, NULL, NULL);
1940
1941 if (!lookuperr &&
1942 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1943 VN_RELE(tvp);
1944 curthread->t_flag |= T_WOULDBLOCK;
1945 goto out;
1946 }
1947
1948 if (!lookuperr && nbl_need_check(tvp)) {
1949 /*
1950 * The file exists. Now check if it has any
1951 * conflicting non-blocking mandatory locks
1952 * in the region being changed.
1953 */
1954 struct vattr bva;
1955 u_offset_t offset;
1956 ssize_t length;
1957
1958 nbl_start_crit(tvp, RW_READER);
1959 in_crit = 1;
1960
1961 bva.va_mask = AT_SIZE;
1962 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1963 if (!error) {
1964 if (va.va_size < bva.va_size) {
1965 offset = va.va_size;
1966 length = bva.va_size - va.va_size;
1967 } else {
1968 offset = bva.va_size;
1969 length = va.va_size - bva.va_size;
1970 }
1971 if (length) {
1972 if (nbl_conflict(tvp, NBL_WRITE,
1973 offset, length, 0, NULL)) {
1974 error = EACCES;
1975 }
1976 }
1977 }
1978 if (error) {
1979 nbl_end_crit(tvp);
1980 VN_RELE(tvp);
1981 in_crit = 0;
1982 }
1983 } else if (tvp != NULL) {
1984 VN_RELE(tvp);
1985 }
1986 }
1987
1988 if (!error) {
1989 /*
1990 * If filesystem is shared with nosuid the remove any
1991 * setuid/setgid bits on create.
1992 */
1993 if (va.va_type == VREG &&
1994 exi->exi_export.ex_flags & EX_NOSUID)
1995 va.va_mode &= ~(VSUID | VSGID);
1996
1997 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1998 NULL, NULL);
1999
2000 if (!error) {
2001
2002 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
2003 trunc = TRUE;
2004 else
2005 trunc = FALSE;
2006
2007 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
2008 VN_RELE(vp);
2009 curthread->t_flag |= T_WOULDBLOCK;
2010 goto out;
2011 }
2012 va.va_mask = AT_ALL;
2013
2014 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
2015
2016 /* check for overflows */
2017 if (!error) {
2018 acl_perm(vp, exi, &va, cr);
2019 error = vattr_to_nattr(&va, &dr->dr_attr);
2020 if (!error) {
2021 error = makefh(&dr->dr_fhandle, vp,
2022 exi);
2023 }
2024 }
2025 /*
2026 * Force modified metadata out to stable storage.
2027 *
2028 * if a underlying vp exists, pass it to VOP_FSYNC
2029 */
2030 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2031 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2032 else
2033 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2034 VN_RELE(vp);
2035 }
2036
2037 if (in_crit) {
2038 nbl_end_crit(tvp);
2039 VN_RELE(tvp);
2040 }
2041 }
2042
2043 /*
2044 * Force modified data and metadata out to stable storage.
2045 */
2046 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2047
2048 out:
2049
2050 VN_RELE(dvp);
2051
2052 dr->dr_status = puterrno(error);
2053
2054 if (name != args->ca_da.da_name)
2055 kmem_free(name, MAXPATHLEN);
2056 }
2057 void *
rfs_create_getfh(struct nfscreatargs * args)2058 rfs_create_getfh(struct nfscreatargs *args)
2059 {
2060 return (args->ca_da.da_fhandle);
2061 }
2062
2063 /*
2064 * Remove a file.
2065 * Remove named file from parent directory.
2066 */
2067 /* ARGSUSED */
2068 void
rfs_remove(struct nfsdiropargs * da,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2069 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
2070 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2071 {
2072 int error = 0;
2073 vnode_t *vp;
2074 vnode_t *targvp;
2075 int in_crit = 0;
2076
2077 /*
2078 * Disallow NULL paths
2079 */
2080 if (da->da_name == NULL || *da->da_name == '\0') {
2081 *status = NFSERR_ACCES;
2082 return;
2083 }
2084
2085 vp = nfs_fhtovp(da->da_fhandle, exi);
2086 if (vp == NULL) {
2087 *status = NFSERR_STALE;
2088 return;
2089 }
2090
2091 if (rdonly(ro, vp)) {
2092 VN_RELE(vp);
2093 *status = NFSERR_ROFS;
2094 return;
2095 }
2096
2097 /*
2098 * Check for a conflict with a non-blocking mandatory share reservation.
2099 */
2100 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
2101 NULL, cr, NULL, NULL, NULL);
2102 if (error != 0) {
2103 VN_RELE(vp);
2104 *status = puterrno(error);
2105 return;
2106 }
2107
2108 /*
2109 * If the file is delegated to an v4 client, then initiate
2110 * recall and drop this request (by setting T_WOULDBLOCK).
2111 * The client will eventually re-transmit the request and
2112 * (hopefully), by then, the v4 client will have returned
2113 * the delegation.
2114 */
2115
2116 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2117 VN_RELE(vp);
2118 VN_RELE(targvp);
2119 curthread->t_flag |= T_WOULDBLOCK;
2120 return;
2121 }
2122
2123 if (nbl_need_check(targvp)) {
2124 nbl_start_crit(targvp, RW_READER);
2125 in_crit = 1;
2126 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2127 error = EACCES;
2128 goto out;
2129 }
2130 }
2131
2132 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2133
2134 /*
2135 * Force modified data and metadata out to stable storage.
2136 */
2137 (void) VOP_FSYNC(vp, 0, cr, NULL);
2138
2139 out:
2140 if (in_crit)
2141 nbl_end_crit(targvp);
2142 VN_RELE(targvp);
2143 VN_RELE(vp);
2144
2145 *status = puterrno(error);
2146
2147 }
2148
2149 void *
rfs_remove_getfh(struct nfsdiropargs * da)2150 rfs_remove_getfh(struct nfsdiropargs *da)
2151 {
2152 return (da->da_fhandle);
2153 }
2154
2155 /*
2156 * rename a file
2157 * Give a file (from) a new name (to).
2158 */
2159 /* ARGSUSED */
2160 void
rfs_rename(struct nfsrnmargs * args,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2161 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2162 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2163 {
2164 int error = 0;
2165 vnode_t *fromvp;
2166 vnode_t *tovp;
2167 struct exportinfo *to_exi;
2168 fhandle_t *fh;
2169 vnode_t *srcvp;
2170 vnode_t *targvp;
2171 int in_crit = 0;
2172
2173 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2174 if (fromvp == NULL) {
2175 *status = NFSERR_STALE;
2176 return;
2177 }
2178
2179 fh = args->rna_to.da_fhandle;
2180 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2181 if (to_exi == NULL) {
2182 VN_RELE(fromvp);
2183 *status = NFSERR_ACCES;
2184 return;
2185 }
2186 exi_rele(to_exi);
2187
2188 if (to_exi != exi) {
2189 VN_RELE(fromvp);
2190 *status = NFSERR_XDEV;
2191 return;
2192 }
2193
2194 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2195 if (tovp == NULL) {
2196 VN_RELE(fromvp);
2197 *status = NFSERR_STALE;
2198 return;
2199 }
2200
2201 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2202 VN_RELE(tovp);
2203 VN_RELE(fromvp);
2204 *status = NFSERR_NOTDIR;
2205 return;
2206 }
2207
2208 /*
2209 * Disallow NULL paths
2210 */
2211 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2212 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2213 VN_RELE(tovp);
2214 VN_RELE(fromvp);
2215 *status = NFSERR_ACCES;
2216 return;
2217 }
2218
2219 if (rdonly(ro, tovp)) {
2220 VN_RELE(tovp);
2221 VN_RELE(fromvp);
2222 *status = NFSERR_ROFS;
2223 return;
2224 }
2225
2226 /*
2227 * Check for a conflict with a non-blocking mandatory share reservation.
2228 */
2229 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2230 NULL, cr, NULL, NULL, NULL);
2231 if (error != 0) {
2232 VN_RELE(tovp);
2233 VN_RELE(fromvp);
2234 *status = puterrno(error);
2235 return;
2236 }
2237
2238 /* Check for delegations on the source file */
2239
2240 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2241 VN_RELE(tovp);
2242 VN_RELE(fromvp);
2243 VN_RELE(srcvp);
2244 curthread->t_flag |= T_WOULDBLOCK;
2245 return;
2246 }
2247
2248 /* Check for delegation on the file being renamed over, if it exists */
2249
2250 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2251 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2252 NULL, NULL, NULL) == 0) {
2253
2254 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2255 VN_RELE(tovp);
2256 VN_RELE(fromvp);
2257 VN_RELE(srcvp);
2258 VN_RELE(targvp);
2259 curthread->t_flag |= T_WOULDBLOCK;
2260 return;
2261 }
2262 VN_RELE(targvp);
2263 }
2264
2265
2266 if (nbl_need_check(srcvp)) {
2267 nbl_start_crit(srcvp, RW_READER);
2268 in_crit = 1;
2269 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2270 error = EACCES;
2271 goto out;
2272 }
2273 }
2274
2275 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2276 tovp, args->rna_to.da_name, cr, NULL, 0);
2277
2278 if (error == 0)
2279 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2280 strlen(args->rna_to.da_name));
2281
2282 /*
2283 * Force modified data and metadata out to stable storage.
2284 */
2285 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2286 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2287
2288 out:
2289 if (in_crit)
2290 nbl_end_crit(srcvp);
2291 VN_RELE(srcvp);
2292 VN_RELE(tovp);
2293 VN_RELE(fromvp);
2294
2295 *status = puterrno(error);
2296
2297 }
2298 void *
rfs_rename_getfh(struct nfsrnmargs * args)2299 rfs_rename_getfh(struct nfsrnmargs *args)
2300 {
2301 return (args->rna_from.da_fhandle);
2302 }
2303
2304 /*
2305 * Link to a file.
2306 * Create a file (to) which is a hard link to the given file (from).
2307 */
2308 /* ARGSUSED */
2309 void
rfs_link(struct nfslinkargs * args,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2310 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2311 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2312 {
2313 int error;
2314 vnode_t *fromvp;
2315 vnode_t *tovp;
2316 struct exportinfo *to_exi;
2317 fhandle_t *fh;
2318
2319 fromvp = nfs_fhtovp(args->la_from, exi);
2320 if (fromvp == NULL) {
2321 *status = NFSERR_STALE;
2322 return;
2323 }
2324
2325 fh = args->la_to.da_fhandle;
2326 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2327 if (to_exi == NULL) {
2328 VN_RELE(fromvp);
2329 *status = NFSERR_ACCES;
2330 return;
2331 }
2332 exi_rele(to_exi);
2333
2334 if (to_exi != exi) {
2335 VN_RELE(fromvp);
2336 *status = NFSERR_XDEV;
2337 return;
2338 }
2339
2340 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2341 if (tovp == NULL) {
2342 VN_RELE(fromvp);
2343 *status = NFSERR_STALE;
2344 return;
2345 }
2346
2347 if (tovp->v_type != VDIR) {
2348 VN_RELE(tovp);
2349 VN_RELE(fromvp);
2350 *status = NFSERR_NOTDIR;
2351 return;
2352 }
2353 /*
2354 * Disallow NULL paths
2355 */
2356 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2357 VN_RELE(tovp);
2358 VN_RELE(fromvp);
2359 *status = NFSERR_ACCES;
2360 return;
2361 }
2362
2363 if (rdonly(ro, tovp)) {
2364 VN_RELE(tovp);
2365 VN_RELE(fromvp);
2366 *status = NFSERR_ROFS;
2367 return;
2368 }
2369
2370 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2371
2372 /*
2373 * Force modified data and metadata out to stable storage.
2374 */
2375 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2376 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2377
2378 VN_RELE(tovp);
2379 VN_RELE(fromvp);
2380
2381 *status = puterrno(error);
2382
2383 }
2384 void *
rfs_link_getfh(struct nfslinkargs * args)2385 rfs_link_getfh(struct nfslinkargs *args)
2386 {
2387 return (args->la_from);
2388 }
2389
2390 /*
2391 * Symbolicly link to a file.
2392 * Create a file (to) with the given attributes which is a symbolic link
2393 * to the given path name (to).
2394 */
2395 void
rfs_symlink(struct nfsslargs * args,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2396 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2397 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2398 {
2399 int error;
2400 struct vattr va;
2401 vnode_t *vp;
2402 vnode_t *svp;
2403 int lerror;
2404 struct sockaddr *ca;
2405 char *name = NULL;
2406
2407 /*
2408 * Disallow NULL paths
2409 */
2410 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2411 *status = NFSERR_ACCES;
2412 return;
2413 }
2414
2415 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2416 if (vp == NULL) {
2417 *status = NFSERR_STALE;
2418 return;
2419 }
2420
2421 if (rdonly(ro, vp)) {
2422 VN_RELE(vp);
2423 *status = NFSERR_ROFS;
2424 return;
2425 }
2426
2427 error = sattr_to_vattr(args->sla_sa, &va);
2428 if (error) {
2429 VN_RELE(vp);
2430 *status = puterrno(error);
2431 return;
2432 }
2433
2434 if (!(va.va_mask & AT_MODE)) {
2435 VN_RELE(vp);
2436 *status = NFSERR_INVAL;
2437 return;
2438 }
2439
2440 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2441 name = nfscmd_convname(ca, exi, args->sla_tnm,
2442 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2443
2444 if (name == NULL) {
2445 *status = NFSERR_ACCES;
2446 return;
2447 }
2448
2449 va.va_type = VLNK;
2450 va.va_mask |= AT_TYPE;
2451
2452 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2453
2454 /*
2455 * Force new data and metadata out to stable storage.
2456 */
2457 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2458 NULL, cr, NULL, NULL, NULL);
2459
2460 if (!lerror) {
2461 (void) VOP_FSYNC(svp, 0, cr, NULL);
2462 VN_RELE(svp);
2463 }
2464
2465 /*
2466 * Force modified data and metadata out to stable storage.
2467 */
2468 (void) VOP_FSYNC(vp, 0, cr, NULL);
2469
2470 VN_RELE(vp);
2471
2472 *status = puterrno(error);
2473 if (name != args->sla_tnm)
2474 kmem_free(name, MAXPATHLEN);
2475
2476 }
2477 void *
rfs_symlink_getfh(struct nfsslargs * args)2478 rfs_symlink_getfh(struct nfsslargs *args)
2479 {
2480 return (args->sla_from.da_fhandle);
2481 }
2482
2483 /*
2484 * Make a directory.
2485 * Create a directory with the given name, parent directory, and attributes.
2486 * Returns a file handle and attributes for the new directory.
2487 */
2488 /* ARGSUSED */
2489 void
rfs_mkdir(struct nfscreatargs * args,struct nfsdiropres * dr,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2490 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2491 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2492 {
2493 int error;
2494 struct vattr va;
2495 vnode_t *dvp = NULL;
2496 vnode_t *vp;
2497 char *name = args->ca_da.da_name;
2498
2499 /*
2500 * Disallow NULL paths
2501 */
2502 if (name == NULL || *name == '\0') {
2503 dr->dr_status = NFSERR_ACCES;
2504 return;
2505 }
2506
2507 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2508 if (vp == NULL) {
2509 dr->dr_status = NFSERR_STALE;
2510 return;
2511 }
2512
2513 if (rdonly(ro, vp)) {
2514 VN_RELE(vp);
2515 dr->dr_status = NFSERR_ROFS;
2516 return;
2517 }
2518
2519 error = sattr_to_vattr(args->ca_sa, &va);
2520 if (error) {
2521 VN_RELE(vp);
2522 dr->dr_status = puterrno(error);
2523 return;
2524 }
2525
2526 if (!(va.va_mask & AT_MODE)) {
2527 VN_RELE(vp);
2528 dr->dr_status = NFSERR_INVAL;
2529 return;
2530 }
2531
2532 va.va_type = VDIR;
2533 va.va_mask |= AT_TYPE;
2534
2535 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2536
2537 if (!error) {
2538 /*
2539 * Attribtutes of the newly created directory should
2540 * be returned to the client.
2541 */
2542 va.va_mask = AT_ALL; /* We want everything */
2543 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2544
2545 /* check for overflows */
2546 if (!error) {
2547 acl_perm(vp, exi, &va, cr);
2548 error = vattr_to_nattr(&va, &dr->dr_attr);
2549 if (!error) {
2550 error = makefh(&dr->dr_fhandle, dvp, exi);
2551 }
2552 }
2553 /*
2554 * Force new data and metadata out to stable storage.
2555 */
2556 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2557 VN_RELE(dvp);
2558 }
2559
2560 /*
2561 * Force modified data and metadata out to stable storage.
2562 */
2563 (void) VOP_FSYNC(vp, 0, cr, NULL);
2564
2565 VN_RELE(vp);
2566
2567 dr->dr_status = puterrno(error);
2568
2569 }
2570 void *
rfs_mkdir_getfh(struct nfscreatargs * args)2571 rfs_mkdir_getfh(struct nfscreatargs *args)
2572 {
2573 return (args->ca_da.da_fhandle);
2574 }
2575
2576 /*
2577 * Remove a directory.
2578 * Remove the given directory name from the given parent directory.
2579 */
2580 /* ARGSUSED */
2581 void
rfs_rmdir(struct nfsdiropargs * da,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2582 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2583 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2584 {
2585 int error;
2586 vnode_t *vp;
2587
2588 /*
2589 * Disallow NULL paths
2590 */
2591 if (da->da_name == NULL || *da->da_name == '\0') {
2592 *status = NFSERR_ACCES;
2593 return;
2594 }
2595
2596 vp = nfs_fhtovp(da->da_fhandle, exi);
2597 if (vp == NULL) {
2598 *status = NFSERR_STALE;
2599 return;
2600 }
2601
2602 if (rdonly(ro, vp)) {
2603 VN_RELE(vp);
2604 *status = NFSERR_ROFS;
2605 return;
2606 }
2607
2608 /*
2609 * VOP_RMDIR takes a third argument (the current
2610 * directory of the process). That's because someone
2611 * wants to return EINVAL if one tries to remove ".".
2612 * Of course, NFS servers have no idea what their
2613 * clients' current directories are. We fake it by
2614 * supplying a vnode known to exist and illegal to
2615 * remove.
2616 */
2617 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
2618
2619 /*
2620 * Force modified data and metadata out to stable storage.
2621 */
2622 (void) VOP_FSYNC(vp, 0, cr, NULL);
2623
2624 VN_RELE(vp);
2625
2626 /*
2627 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2628 * if the directory is not empty. A System V NFS server
2629 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2630 * over the wire.
2631 */
2632 if (error == EEXIST)
2633 *status = NFSERR_NOTEMPTY;
2634 else
2635 *status = puterrno(error);
2636
2637 }
2638 void *
rfs_rmdir_getfh(struct nfsdiropargs * da)2639 rfs_rmdir_getfh(struct nfsdiropargs *da)
2640 {
2641 return (da->da_fhandle);
2642 }
2643
2644 /* ARGSUSED */
2645 void
rfs_readdir(struct nfsrddirargs * rda,struct nfsrddirres * rd,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2646 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2647 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2648 {
2649 int error;
2650 int iseof;
2651 struct iovec iov;
2652 struct uio uio;
2653 vnode_t *vp;
2654 char *ndata = NULL;
2655 struct sockaddr *ca;
2656 size_t nents;
2657 int ret;
2658
2659 vp = nfs_fhtovp(&rda->rda_fh, exi);
2660 if (vp == NULL) {
2661 rd->rd_entries = NULL;
2662 rd->rd_status = NFSERR_STALE;
2663 return;
2664 }
2665
2666 if (vp->v_type != VDIR) {
2667 VN_RELE(vp);
2668 rd->rd_entries = NULL;
2669 rd->rd_status = NFSERR_NOTDIR;
2670 return;
2671 }
2672
2673 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2674
2675 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2676
2677 if (error) {
2678 rd->rd_entries = NULL;
2679 goto bad;
2680 }
2681
2682 if (rda->rda_count == 0) {
2683 rd->rd_entries = NULL;
2684 rd->rd_size = 0;
2685 rd->rd_eof = FALSE;
2686 goto bad;
2687 }
2688
2689 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2690
2691 /*
2692 * Allocate data for entries. This will be freed by rfs_rddirfree.
2693 */
2694 rd->rd_bufsize = (uint_t)rda->rda_count;
2695 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2696
2697 /*
2698 * Set up io vector to read directory data
2699 */
2700 iov.iov_base = (caddr_t)rd->rd_entries;
2701 iov.iov_len = rda->rda_count;
2702 uio.uio_iov = &iov;
2703 uio.uio_iovcnt = 1;
2704 uio.uio_segflg = UIO_SYSSPACE;
2705 uio.uio_extflg = UIO_COPY_CACHED;
2706 uio.uio_loffset = (offset_t)rda->rda_offset;
2707 uio.uio_resid = rda->rda_count;
2708
2709 /*
2710 * read directory
2711 */
2712 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2713
2714 /*
2715 * Clean up
2716 */
2717 if (!error) {
2718 /*
2719 * set size and eof
2720 */
2721 if (uio.uio_resid == rda->rda_count) {
2722 rd->rd_size = 0;
2723 rd->rd_eof = TRUE;
2724 } else {
2725 rd->rd_size = (uint32_t)(rda->rda_count -
2726 uio.uio_resid);
2727 rd->rd_eof = iseof ? TRUE : FALSE;
2728 }
2729 }
2730
2731 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2732 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2733 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2734 rda->rda_count, &ndata);
2735
2736 if (ret != 0) {
2737 size_t dropbytes;
2738 /*
2739 * We had to drop one or more entries in order to fit
2740 * during the character conversion. We need to patch
2741 * up the size and eof info.
2742 */
2743 if (rd->rd_eof)
2744 rd->rd_eof = FALSE;
2745 dropbytes = nfscmd_dropped_entrysize(
2746 (struct dirent64 *)rd->rd_entries, nents, ret);
2747 rd->rd_size -= dropbytes;
2748 }
2749 if (ndata == NULL) {
2750 ndata = (char *)rd->rd_entries;
2751 } else if (ndata != (char *)rd->rd_entries) {
2752 kmem_free(rd->rd_entries, rd->rd_bufsize);
2753 rd->rd_entries = (void *)ndata;
2754 rd->rd_bufsize = rda->rda_count;
2755 }
2756
2757 bad:
2758 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2759
2760 #if 0 /* notyet */
2761 /*
2762 * Don't do this. It causes local disk writes when just
2763 * reading the file and the overhead is deemed larger
2764 * than the benefit.
2765 */
2766 /*
2767 * Force modified metadata out to stable storage.
2768 */
2769 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2770 #endif
2771
2772 VN_RELE(vp);
2773
2774 rd->rd_status = puterrno(error);
2775
2776 }
2777 void *
rfs_readdir_getfh(struct nfsrddirargs * rda)2778 rfs_readdir_getfh(struct nfsrddirargs *rda)
2779 {
2780 return (&rda->rda_fh);
2781 }
2782 void
rfs_rddirfree(struct nfsrddirres * rd)2783 rfs_rddirfree(struct nfsrddirres *rd)
2784 {
2785 if (rd->rd_entries != NULL)
2786 kmem_free(rd->rd_entries, rd->rd_bufsize);
2787 }
2788
2789 /* ARGSUSED */
2790 void
rfs_statfs(fhandle_t * fh,struct nfsstatfs * fs,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2791 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2792 struct svc_req *req, cred_t *cr, bool_t ro)
2793 {
2794 int error;
2795 struct statvfs64 sb;
2796 vnode_t *vp;
2797
2798 vp = nfs_fhtovp(fh, exi);
2799 if (vp == NULL) {
2800 fs->fs_status = NFSERR_STALE;
2801 return;
2802 }
2803
2804 error = VFS_STATVFS(vp->v_vfsp, &sb);
2805
2806 if (!error) {
2807 fs->fs_tsize = nfstsize();
2808 fs->fs_bsize = sb.f_frsize;
2809 fs->fs_blocks = sb.f_blocks;
2810 fs->fs_bfree = sb.f_bfree;
2811 fs->fs_bavail = sb.f_bavail;
2812 }
2813
2814 VN_RELE(vp);
2815
2816 fs->fs_status = puterrno(error);
2817
2818 }
2819 void *
rfs_statfs_getfh(fhandle_t * fh)2820 rfs_statfs_getfh(fhandle_t *fh)
2821 {
2822 return (fh);
2823 }
2824
2825 static int
sattr_to_vattr(struct nfssattr * sa,struct vattr * vap)2826 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2827 {
2828 vap->va_mask = 0;
2829
2830 /*
2831 * There was a sign extension bug in some VFS based systems
2832 * which stored the mode as a short. When it would get
2833 * assigned to a u_long, no sign extension would occur.
2834 * It needed to, but this wasn't noticed because sa_mode
2835 * would then get assigned back to the short, thus ignoring
2836 * the upper 16 bits of sa_mode.
2837 *
2838 * To make this implementation work for both broken
2839 * clients and good clients, we check for both versions
2840 * of the mode.
2841 */
2842 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2843 sa->sa_mode != (uint32_t)-1) {
2844 vap->va_mask |= AT_MODE;
2845 vap->va_mode = sa->sa_mode;
2846 }
2847 if (sa->sa_uid != (uint32_t)-1) {
2848 vap->va_mask |= AT_UID;
2849 vap->va_uid = sa->sa_uid;
2850 }
2851 if (sa->sa_gid != (uint32_t)-1) {
2852 vap->va_mask |= AT_GID;
2853 vap->va_gid = sa->sa_gid;
2854 }
2855 if (sa->sa_size != (uint32_t)-1) {
2856 vap->va_mask |= AT_SIZE;
2857 vap->va_size = sa->sa_size;
2858 }
2859 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2860 sa->sa_atime.tv_usec != (int32_t)-1) {
2861 #ifndef _LP64
2862 /* return error if time overflow */
2863 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2864 return (EOVERFLOW);
2865 #endif
2866 vap->va_mask |= AT_ATIME;
2867 /*
2868 * nfs protocol defines times as unsigned so don't extend sign,
2869 * unless sysadmin set nfs_allow_preepoch_time.
2870 */
2871 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2872 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2873 }
2874 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2875 sa->sa_mtime.tv_usec != (int32_t)-1) {
2876 #ifndef _LP64
2877 /* return error if time overflow */
2878 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2879 return (EOVERFLOW);
2880 #endif
2881 vap->va_mask |= AT_MTIME;
2882 /*
2883 * nfs protocol defines times as unsigned so don't extend sign,
2884 * unless sysadmin set nfs_allow_preepoch_time.
2885 */
2886 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2887 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2888 }
2889 return (0);
2890 }
2891
2892 static const enum nfsftype vt_to_nf[] = {
2893 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2894 };
2895
2896 /*
2897 * check the following fields for overflow: nodeid, size, and time.
2898 * There could be a problem when converting 64-bit LP64 fields
2899 * into 32-bit ones. Return an error if there is an overflow.
2900 */
2901 int
vattr_to_nattr(struct vattr * vap,struct nfsfattr * na)2902 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2903 {
2904 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2905 na->na_type = vt_to_nf[vap->va_type];
2906
2907 if (vap->va_mode == (unsigned short) -1)
2908 na->na_mode = (uint32_t)-1;
2909 else
2910 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2911
2912 if (vap->va_uid == (unsigned short)(-1))
2913 na->na_uid = (uint32_t)(-1);
2914 else if (vap->va_uid == UID_NOBODY)
2915 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2916 else
2917 na->na_uid = vap->va_uid;
2918
2919 if (vap->va_gid == (unsigned short)(-1))
2920 na->na_gid = (uint32_t)-1;
2921 else if (vap->va_gid == GID_NOBODY)
2922 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2923 else
2924 na->na_gid = vap->va_gid;
2925
2926 /*
2927 * Do we need to check fsid for overflow? It is 64-bit in the
2928 * vattr, but are bigger than 32 bit values supported?
2929 */
2930 na->na_fsid = vap->va_fsid;
2931
2932 na->na_nodeid = vap->va_nodeid;
2933
2934 /*
2935 * Check to make sure that the nodeid is representable over the
2936 * wire without losing bits.
2937 */
2938 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2939 return (EFBIG);
2940 na->na_nlink = vap->va_nlink;
2941
2942 /*
2943 * Check for big files here, instead of at the caller. See
2944 * comments in cstat for large special file explanation.
2945 */
2946 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2947 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2948 return (EFBIG);
2949 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2950 /* UNKNOWN_SIZE | OVERFLOW */
2951 na->na_size = MAXOFF32_T;
2952 } else
2953 na->na_size = vap->va_size;
2954 } else
2955 na->na_size = vap->va_size;
2956
2957 /*
2958 * If the vnode times overflow the 32-bit times that NFS2
2959 * uses on the wire then return an error.
2960 */
2961 if (!NFS_VAP_TIME_OK(vap)) {
2962 return (EOVERFLOW);
2963 }
2964 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2965 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2966
2967 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2968 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2969
2970 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2971 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2972
2973 /*
2974 * If the dev_t will fit into 16 bits then compress
2975 * it, otherwise leave it alone. See comments in
2976 * nfs_client.c.
2977 */
2978 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2979 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2980 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2981 else
2982 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2983
2984 na->na_blocks = vap->va_nblocks;
2985 na->na_blocksize = vap->va_blksize;
2986
2987 /*
2988 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2989 * over-the-wire protocols for named-pipe vnodes. It remaps the
2990 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2991 *
2992 * BUYER BEWARE:
2993 * If you are porting the NFS to a non-Sun server, you probably
2994 * don't want to include the following block of code. The
2995 * over-the-wire special file types will be changing with the
2996 * NFS Protocol Revision.
2997 */
2998 if (vap->va_type == VFIFO)
2999 NA_SETFIFO(na);
3000 return (0);
3001 }
3002
3003 /*
3004 * acl v2 support: returns approximate permission.
3005 * default: returns minimal permission (more restrictive)
3006 * aclok: returns maximal permission (less restrictive)
3007 * This routine changes the permissions that are alaredy in *va.
3008 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
3009 * CLASS_OBJ is always the same as GROUP_OBJ entry.
3010 */
3011 static void
acl_perm(struct vnode * vp,struct exportinfo * exi,struct vattr * va,cred_t * cr)3012 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
3013 {
3014 vsecattr_t vsa;
3015 int aclcnt;
3016 aclent_t *aclentp;
3017 mode_t mask_perm;
3018 mode_t grp_perm;
3019 mode_t other_perm;
3020 mode_t other_orig;
3021 int error;
3022
3023 /* dont care default acl */
3024 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
3025 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
3026
3027 if (!error) {
3028 aclcnt = vsa.vsa_aclcnt;
3029 if (aclcnt > MIN_ACL_ENTRIES) {
3030 /* non-trivial ACL */
3031 aclentp = vsa.vsa_aclentp;
3032 if (exi->exi_export.ex_flags & EX_ACLOK) {
3033 /* maximal permissions */
3034 grp_perm = 0;
3035 other_perm = 0;
3036 for (; aclcnt > 0; aclcnt--, aclentp++) {
3037 switch (aclentp->a_type) {
3038 case USER_OBJ:
3039 break;
3040 case USER:
3041 grp_perm |=
3042 aclentp->a_perm << 3;
3043 other_perm |= aclentp->a_perm;
3044 break;
3045 case GROUP_OBJ:
3046 grp_perm |=
3047 aclentp->a_perm << 3;
3048 break;
3049 case GROUP:
3050 other_perm |= aclentp->a_perm;
3051 break;
3052 case OTHER_OBJ:
3053 other_orig = aclentp->a_perm;
3054 break;
3055 case CLASS_OBJ:
3056 mask_perm = aclentp->a_perm;
3057 break;
3058 default:
3059 break;
3060 }
3061 }
3062 grp_perm &= mask_perm << 3;
3063 other_perm &= mask_perm;
3064 other_perm |= other_orig;
3065
3066 } else {
3067 /* minimal permissions */
3068 grp_perm = 070;
3069 other_perm = 07;
3070 for (; aclcnt > 0; aclcnt--, aclentp++) {
3071 switch (aclentp->a_type) {
3072 case USER_OBJ:
3073 break;
3074 case USER:
3075 case CLASS_OBJ:
3076 grp_perm &=
3077 aclentp->a_perm << 3;
3078 other_perm &=
3079 aclentp->a_perm;
3080 break;
3081 case GROUP_OBJ:
3082 grp_perm &=
3083 aclentp->a_perm << 3;
3084 break;
3085 case GROUP:
3086 other_perm &=
3087 aclentp->a_perm;
3088 break;
3089 case OTHER_OBJ:
3090 other_perm &=
3091 aclentp->a_perm;
3092 break;
3093 default:
3094 break;
3095 }
3096 }
3097 }
3098 /* copy to va */
3099 va->va_mode &= ~077;
3100 va->va_mode |= grp_perm | other_perm;
3101 }
3102 if (vsa.vsa_aclcnt)
3103 kmem_free(vsa.vsa_aclentp,
3104 vsa.vsa_aclcnt * sizeof (aclent_t));
3105 }
3106 }
3107
3108 void
rfs_srvrinit(void)3109 rfs_srvrinit(void)
3110 {
3111 nfs2_srv_caller_id = fs_new_caller_id();
3112 }
3113
3114 void
rfs_srvrfini(void)3115 rfs_srvrfini(void)
3116 {
3117 }
3118
3119 /* ARGSUSED */
3120 void
rfs_srv_zone_init(nfs_globals_t * ng)3121 rfs_srv_zone_init(nfs_globals_t *ng)
3122 {
3123 nfs_srv_t *ns;
3124
3125 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
3126
3127 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
3128 ns->write_async = 1;
3129
3130 ng->nfs_srv = ns;
3131 }
3132
3133 /* ARGSUSED */
3134 void
rfs_srv_zone_fini(nfs_globals_t * ng)3135 rfs_srv_zone_fini(nfs_globals_t *ng)
3136 {
3137 nfs_srv_t *ns = ng->nfs_srv;
3138
3139 ng->nfs_srv = NULL;
3140
3141 mutex_destroy(&ns->async_write_lock);
3142 kmem_free(ns, sizeof (*ns));
3143 }
3144
3145 static int
rdma_setup_read_data2(struct nfsreadargs * ra,struct nfsrdresult * rr)3146 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
3147 {
3148 struct clist *wcl;
3149 int wlist_len;
3150 uint32_t count = rr->rr_count;
3151
3152 wcl = ra->ra_wlist;
3153
3154 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3155 return (FALSE);
3156 }
3157
3158 wcl = ra->ra_wlist;
3159 rr->rr_ok.rrok_wlist_len = wlist_len;
3160 rr->rr_ok.rrok_wlist = wcl;
3161
3162 return (TRUE);
3163 }
3164