1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
51
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 #include <rpc/rpc_rdma.h>
56
57 #include <nfs/nfs.h>
58 #include <nfs/export.h>
59 #include <nfs/nfs_cmd.h>
60
61 #include <sys/strsubr.h>
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tndb.h>
64
65 #include <sys/zone.h>
66
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69
70 /*
71 * These are the interface routines for the server side of the
72 * Network File System. See the NFS version 3 protocol specification
73 * for a description of this interface.
74 */
75
76 static writeverf3 write3verf;
77
78 static int sattr3_to_vattr(sattr3 *, struct vattr *);
79 static int vattr_to_fattr3(struct vattr *, fattr3 *);
80 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
81 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
82 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
83 static int rdma_setup_read_data3(READ3args *, READ3resok *);
84
85 extern int nfs_loaned_buffers;
86
87 u_longlong_t nfs3_srv_caller_id;
88
89 /* ARGSUSED */
90 void
rfs3_getattr(GETATTR3args * args,GETATTR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 struct svc_req *req, cred_t *cr, bool_t ro)
93 {
94 int error;
95 vnode_t *vp;
96 struct vattr va;
97
98 vp = nfs3_fhtovp(&args->object, exi);
99
100 DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102
103 if (vp == NULL) {
104 error = ESTALE;
105 goto out;
106 }
107
108 va.va_mask = AT_ALL;
109 error = rfs4_delegated_getattr(vp, &va, 0, cr);
110
111 if (!error) {
112 /* Lie about the object type for a referral */
113 if (vn_is_nfs_reparse(vp, cr))
114 va.va_type = VLNK;
115
116 /* overflow error if time or size is out of range */
117 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 if (error)
119 goto out;
120 resp->status = NFS3_OK;
121
122 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
123 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
124
125 VN_RELE(vp);
126
127 return;
128 }
129
130 out:
131 if (curthread->t_flag & T_WOULDBLOCK) {
132 curthread->t_flag &= ~T_WOULDBLOCK;
133 resp->status = NFS3ERR_JUKEBOX;
134 } else
135 resp->status = puterrno3(error);
136
137 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
138 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
139
140 if (vp != NULL)
141 VN_RELE(vp);
142 }
143
144 void *
rfs3_getattr_getfh(GETATTR3args * args)145 rfs3_getattr_getfh(GETATTR3args *args)
146 {
147
148 return (&args->object);
149 }
150
151 void
rfs3_setattr(SETATTR3args * args,SETATTR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
153 struct svc_req *req, cred_t *cr, bool_t ro)
154 {
155 int error;
156 vnode_t *vp;
157 struct vattr *bvap;
158 struct vattr bva;
159 struct vattr *avap;
160 struct vattr ava;
161 int flag;
162 int in_crit = 0;
163 struct flock64 bf;
164 caller_context_t ct;
165
166 bvap = NULL;
167 avap = NULL;
168
169 vp = nfs3_fhtovp(&args->object, exi);
170
171 DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
172 cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
173
174 if (vp == NULL) {
175 error = ESTALE;
176 goto out;
177 }
178
179 error = sattr3_to_vattr(&args->new_attributes, &ava);
180 if (error)
181 goto out;
182
183 if (is_system_labeled()) {
184 bslabel_t *clabel = req->rq_label;
185
186 ASSERT(clabel != NULL);
187 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
188 "got client label from request(1)", struct svc_req *, req);
189
190 if (!blequal(&l_admin_low->tsl_label, clabel)) {
191 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
192 exi)) {
193 resp->status = NFS3ERR_ACCES;
194 goto out1;
195 }
196 }
197 }
198
199 /*
200 * We need to specially handle size changes because of
201 * possible conflicting NBMAND locks. Get into critical
202 * region before VOP_GETATTR, so the size attribute is
203 * valid when checking conflicts.
204 *
205 * Also, check to see if the v4 side of the server has
206 * delegated this file. If so, then we return JUKEBOX to
207 * allow the client to retrasmit its request.
208 */
209 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
210 if (nbl_need_check(vp)) {
211 nbl_start_crit(vp, RW_READER);
212 in_crit = 1;
213 }
214 }
215
216 bva.va_mask = AT_ALL;
217 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
218
219 /*
220 * If we can't get the attributes, then we can't do the
221 * right access checking. So, we'll fail the request.
222 */
223 if (error)
224 goto out;
225
226 bvap = &bva;
227
228 if (rdonly(ro, vp)) {
229 resp->status = NFS3ERR_ROFS;
230 goto out1;
231 }
232
233 if (args->guard.check &&
234 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 resp->status = NFS3ERR_NOT_SYNC;
237 goto out1;
238 }
239
240 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 flag = ATTR_UTIME;
242 else
243 flag = 0;
244
245 /*
246 * If the filesystem is exported with nosuid, then mask off
247 * the setuid and setgid bits.
248 */
249 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 (exi->exi_export.ex_flags & EX_NOSUID))
251 ava.va_mode &= ~(VSUID | VSGID);
252
253 ct.cc_sysid = 0;
254 ct.cc_pid = 0;
255 ct.cc_caller_id = nfs3_srv_caller_id;
256 ct.cc_flags = CC_DONTBLOCK;
257
258 /*
259 * We need to specially handle size changes because it is
260 * possible for the client to create a file with modes
261 * which indicate read-only, but with the file opened for
262 * writing. If the client then tries to set the size of
263 * the file, then the normal access checking done in
264 * VOP_SETATTR would prevent the client from doing so,
265 * although it should be legal for it to do so. To get
266 * around this, we do the access checking for ourselves
267 * and then use VOP_SPACE which doesn't do the access
268 * checking which VOP_SETATTR does. VOP_SPACE can only
269 * operate on VREG files, let VOP_SETATTR handle the other
270 * extremely rare cases.
271 * Also the client should not be allowed to change the
272 * size of the file if there is a conflicting non-blocking
273 * mandatory lock in the region the change.
274 */
275 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 if (in_crit) {
277 u_offset_t offset;
278 ssize_t length;
279
280 if (ava.va_size < bva.va_size) {
281 offset = ava.va_size;
282 length = bva.va_size - ava.va_size;
283 } else {
284 offset = bva.va_size;
285 length = ava.va_size - bva.va_size;
286 }
287 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 NULL)) {
289 error = EACCES;
290 goto out;
291 }
292 }
293
294 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 ava.va_mask &= ~AT_SIZE;
296 bf.l_type = F_WRLCK;
297 bf.l_whence = 0;
298 bf.l_start = (off64_t)ava.va_size;
299 bf.l_len = 0;
300 bf.l_sysid = 0;
301 bf.l_pid = 0;
302 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 (offset_t)ava.va_size, cr, &ct);
304 }
305 }
306
307 if (!error && ava.va_mask)
308 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309
310 /* check if a monitor detected a delegation conflict */
311 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 resp->status = NFS3ERR_JUKEBOX;
313 goto out1;
314 }
315
316 ava.va_mask = AT_ALL;
317 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
318
319 /*
320 * Force modified metadata out to stable storage.
321 */
322 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
323
324 if (error)
325 goto out;
326
327 if (in_crit)
328 nbl_end_crit(vp);
329
330 resp->status = NFS3_OK;
331 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
332
333 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
334 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
335
336 VN_RELE(vp);
337
338 return;
339
340 out:
341 if (curthread->t_flag & T_WOULDBLOCK) {
342 curthread->t_flag &= ~T_WOULDBLOCK;
343 resp->status = NFS3ERR_JUKEBOX;
344 } else
345 resp->status = puterrno3(error);
346 out1:
347 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
348 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
349
350 if (vp != NULL) {
351 if (in_crit)
352 nbl_end_crit(vp);
353 VN_RELE(vp);
354 }
355 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
356 }
357
358 void *
rfs3_setattr_getfh(SETATTR3args * args)359 rfs3_setattr_getfh(SETATTR3args *args)
360 {
361
362 return (&args->object);
363 }
364
365 /* ARGSUSED */
366 void
rfs3_lookup(LOOKUP3args * args,LOOKUP3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
368 struct svc_req *req, cred_t *cr, bool_t ro)
369 {
370 int error;
371 vnode_t *vp;
372 vnode_t *dvp;
373 struct vattr *vap;
374 struct vattr va;
375 struct vattr *dvap;
376 struct vattr dva;
377 nfs_fh3 *fhp;
378 struct sec_ol sec = {0, 0};
379 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
380 struct sockaddr *ca;
381 char *name = NULL;
382
383 dvap = NULL;
384
385 /*
386 * Allow lookups from the root - the default
387 * location of the public filehandle.
388 */
389 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
390 dvp = rootdir;
391 VN_HOLD(dvp);
392
393 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
394 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
395 } else {
396 dvp = nfs3_fhtovp(&args->what.dir, exi);
397
398 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
399 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
400
401 if (dvp == NULL) {
402 error = ESTALE;
403 goto out;
404 }
405 }
406
407 dva.va_mask = AT_ALL;
408 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
409
410 if (args->what.name == nfs3nametoolong) {
411 resp->status = NFS3ERR_NAMETOOLONG;
412 goto out1;
413 }
414
415 if (args->what.name == NULL || *(args->what.name) == '\0') {
416 resp->status = NFS3ERR_ACCES;
417 goto out1;
418 }
419
420 fhp = &args->what.dir;
421 if (strcmp(args->what.name, "..") == 0 &&
422 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
423 resp->status = NFS3ERR_NOENT;
424 goto out1;
425 }
426
427 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
428 name = nfscmd_convname(ca, exi, args->what.name,
429 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
430
431 if (name == NULL) {
432 resp->status = NFS3ERR_ACCES;
433 goto out1;
434 }
435
436 /*
437 * If the public filehandle is used then allow
438 * a multi-component lookup
439 */
440 if (PUBLIC_FH3(&args->what.dir)) {
441 publicfh_flag = TRUE;
442 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
443 &exi, &sec);
444 if (error && exi != NULL)
445 exi_rele(exi); /* See comment below Re: publicfh_flag */
446 /*
447 * Since WebNFS may bypass MOUNT, we need to ensure this
448 * request didn't come from an unlabeled admin_low client.
449 */
450 if (is_system_labeled() && error == 0) {
451 int addr_type;
452 void *ipaddr;
453 tsol_tpc_t *tp;
454
455 if (ca->sa_family == AF_INET) {
456 addr_type = IPV4_VERSION;
457 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
458 } else if (ca->sa_family == AF_INET6) {
459 addr_type = IPV6_VERSION;
460 ipaddr = &((struct sockaddr_in6 *)
461 ca)->sin6_addr;
462 }
463 tp = find_tpc(ipaddr, addr_type, B_FALSE);
464 if (tp == NULL || tp->tpc_tp.tp_doi !=
465 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
466 SUN_CIPSO) {
467 if (exi != NULL)
468 exi_rele(exi);
469 VN_RELE(vp);
470 error = EACCES;
471 }
472 if (tp != NULL)
473 TPC_RELE(tp);
474 }
475 } else {
476 error = VOP_LOOKUP(dvp, name, &vp,
477 NULL, 0, NULL, cr, NULL, NULL, NULL);
478 }
479
480 if (name != args->what.name)
481 kmem_free(name, MAXPATHLEN + 1);
482
483 if (is_system_labeled() && error == 0) {
484 bslabel_t *clabel = req->rq_label;
485
486 ASSERT(clabel != NULL);
487 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
488 "got client label from request(1)", struct svc_req *, req);
489
490 if (!blequal(&l_admin_low->tsl_label, clabel)) {
491 if (!do_rfs_label_check(clabel, dvp,
492 DOMINANCE_CHECK, exi)) {
493 if (publicfh_flag && exi != NULL)
494 exi_rele(exi);
495 VN_RELE(vp);
496 error = EACCES;
497 }
498 }
499 }
500
501 dva.va_mask = AT_ALL;
502 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
503
504 if (error)
505 goto out;
506
507 if (sec.sec_flags & SEC_QUERY) {
508 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
509 } else {
510 error = makefh3(&resp->resok.object, vp, exi);
511 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
512 auth_weak = TRUE;
513 }
514
515 /*
516 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
517 * and have obtained a new exportinfo in exi which needs to be
518 * released. Note that the original exportinfo pointed to by exi
519 * will be released by the caller, common_dispatch.
520 */
521 if (publicfh_flag)
522 exi_rele(exi);
523
524 if (error) {
525 VN_RELE(vp);
526 goto out;
527 }
528
529 va.va_mask = AT_ALL;
530 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
531
532 VN_RELE(vp);
533
534 resp->status = NFS3_OK;
535 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
536 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
537
538 /*
539 * If it's public fh, no 0x81, and client's flavor is
540 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
541 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
542 */
543 if (auth_weak)
544 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
545
546 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
547 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
548 VN_RELE(dvp);
549
550 return;
551
552 out:
553 if (curthread->t_flag & T_WOULDBLOCK) {
554 curthread->t_flag &= ~T_WOULDBLOCK;
555 resp->status = NFS3ERR_JUKEBOX;
556 } else
557 resp->status = puterrno3(error);
558 out1:
559 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
560 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
561
562 if (dvp != NULL)
563 VN_RELE(dvp);
564 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
565
566 }
567
568 void *
rfs3_lookup_getfh(LOOKUP3args * args)569 rfs3_lookup_getfh(LOOKUP3args *args)
570 {
571
572 return (&args->what.dir);
573 }
574
575 /* ARGSUSED */
576 void
rfs3_access(ACCESS3args * args,ACCESS3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)577 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
578 struct svc_req *req, cred_t *cr, bool_t ro)
579 {
580 int error;
581 vnode_t *vp;
582 struct vattr *vap;
583 struct vattr va;
584 int checkwriteperm;
585 boolean_t dominant_label = B_FALSE;
586 boolean_t equal_label = B_FALSE;
587 boolean_t admin_low_client;
588
589 vap = NULL;
590
591 vp = nfs3_fhtovp(&args->object, exi);
592
593 DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
594 cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
595
596 if (vp == NULL) {
597 error = ESTALE;
598 goto out;
599 }
600
601 /*
602 * If the file system is exported read only, it is not appropriate
603 * to check write permissions for regular files and directories.
604 * Special files are interpreted by the client, so the underlying
605 * permissions are sent back to the client for interpretation.
606 */
607 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
608 checkwriteperm = 0;
609 else
610 checkwriteperm = 1;
611
612 /*
613 * We need the mode so that we can correctly determine access
614 * permissions relative to a mandatory lock file. Access to
615 * mandatory lock files is denied on the server, so it might
616 * as well be reflected to the server during the open.
617 */
618 va.va_mask = AT_MODE;
619 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
620 if (error)
621 goto out;
622
623 vap = &va;
624
625 resp->resok.access = 0;
626
627 if (is_system_labeled()) {
628 bslabel_t *clabel = req->rq_label;
629
630 ASSERT(clabel != NULL);
631 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
632 "got client label from request(1)", struct svc_req *, req);
633
634 if (!blequal(&l_admin_low->tsl_label, clabel)) {
635 if ((equal_label = do_rfs_label_check(clabel, vp,
636 EQUALITY_CHECK, exi)) == B_FALSE) {
637 dominant_label = do_rfs_label_check(clabel,
638 vp, DOMINANCE_CHECK, exi);
639 } else
640 dominant_label = B_TRUE;
641 admin_low_client = B_FALSE;
642 } else
643 admin_low_client = B_TRUE;
644 }
645
646 if (args->access & ACCESS3_READ) {
647 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
648 if (error) {
649 if (curthread->t_flag & T_WOULDBLOCK)
650 goto out;
651 } else if (!MANDLOCK(vp, va.va_mode) &&
652 (!is_system_labeled() || admin_low_client ||
653 dominant_label))
654 resp->resok.access |= ACCESS3_READ;
655 }
656 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
657 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
658 if (error) {
659 if (curthread->t_flag & T_WOULDBLOCK)
660 goto out;
661 } else if (!is_system_labeled() || admin_low_client ||
662 dominant_label)
663 resp->resok.access |= ACCESS3_LOOKUP;
664 }
665 if (checkwriteperm &&
666 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
667 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
668 if (error) {
669 if (curthread->t_flag & T_WOULDBLOCK)
670 goto out;
671 } else if (!MANDLOCK(vp, va.va_mode) &&
672 (!is_system_labeled() || admin_low_client || equal_label)) {
673 resp->resok.access |=
674 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
675 }
676 }
677 if (checkwriteperm &&
678 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
679 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
680 if (error) {
681 if (curthread->t_flag & T_WOULDBLOCK)
682 goto out;
683 } else if (!is_system_labeled() || admin_low_client ||
684 equal_label)
685 resp->resok.access |= ACCESS3_DELETE;
686 }
687 if (args->access & ACCESS3_EXECUTE) {
688 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
689 if (error) {
690 if (curthread->t_flag & T_WOULDBLOCK)
691 goto out;
692 } else if (!MANDLOCK(vp, va.va_mode) &&
693 (!is_system_labeled() || admin_low_client ||
694 dominant_label))
695 resp->resok.access |= ACCESS3_EXECUTE;
696 }
697
698 va.va_mask = AT_ALL;
699 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
700
701 resp->status = NFS3_OK;
702 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
703
704 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
705 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
706
707 VN_RELE(vp);
708
709 return;
710
711 out:
712 if (curthread->t_flag & T_WOULDBLOCK) {
713 curthread->t_flag &= ~T_WOULDBLOCK;
714 resp->status = NFS3ERR_JUKEBOX;
715 } else
716 resp->status = puterrno3(error);
717 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
718 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
719 if (vp != NULL)
720 VN_RELE(vp);
721 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
722 }
723
724 void *
rfs3_access_getfh(ACCESS3args * args)725 rfs3_access_getfh(ACCESS3args *args)
726 {
727
728 return (&args->object);
729 }
730
731 /* ARGSUSED */
732 void
rfs3_readlink(READLINK3args * args,READLINK3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)733 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
734 struct svc_req *req, cred_t *cr, bool_t ro)
735 {
736 int error;
737 vnode_t *vp;
738 struct vattr *vap;
739 struct vattr va;
740 struct iovec iov;
741 struct uio uio;
742 char *data;
743 struct sockaddr *ca;
744 char *name = NULL;
745 int is_referral = 0;
746
747 vap = NULL;
748
749 vp = nfs3_fhtovp(&args->symlink, exi);
750
751 DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
752 cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
753
754 if (vp == NULL) {
755 error = ESTALE;
756 goto out;
757 }
758
759 va.va_mask = AT_ALL;
760 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
761 if (error)
762 goto out;
763
764 vap = &va;
765
766 /* We lied about the object type for a referral */
767 if (vn_is_nfs_reparse(vp, cr))
768 is_referral = 1;
769
770 if (vp->v_type != VLNK && !is_referral) {
771 resp->status = NFS3ERR_INVAL;
772 goto out1;
773 }
774
775 if (MANDLOCK(vp, va.va_mode)) {
776 resp->status = NFS3ERR_ACCES;
777 goto out1;
778 }
779
780 if (is_system_labeled()) {
781 bslabel_t *clabel = req->rq_label;
782
783 ASSERT(clabel != NULL);
784 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
785 "got client label from request(1)", struct svc_req *, req);
786
787 if (!blequal(&l_admin_low->tsl_label, clabel)) {
788 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
789 exi)) {
790 resp->status = NFS3ERR_ACCES;
791 goto out1;
792 }
793 }
794 }
795
796 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
797
798 if (is_referral) {
799 char *s;
800 size_t strsz;
801
802 /* Get an artificial symlink based on a referral */
803 s = build_symlink(vp, cr, &strsz);
804 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
805 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
806 vnode_t *, vp, char *, s);
807 if (s == NULL)
808 error = EINVAL;
809 else {
810 error = 0;
811 (void) strlcpy(data, s, MAXPATHLEN + 1);
812 kmem_free(s, strsz);
813 }
814
815 } else {
816
817 iov.iov_base = data;
818 iov.iov_len = MAXPATHLEN;
819 uio.uio_iov = &iov;
820 uio.uio_iovcnt = 1;
821 uio.uio_segflg = UIO_SYSSPACE;
822 uio.uio_extflg = UIO_COPY_CACHED;
823 uio.uio_loffset = 0;
824 uio.uio_resid = MAXPATHLEN;
825
826 error = VOP_READLINK(vp, &uio, cr, NULL);
827
828 if (!error)
829 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
830 }
831
832 va.va_mask = AT_ALL;
833 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
834
835 /* Lie about object type again just to be consistent */
836 if (is_referral && vap != NULL)
837 vap->va_type = VLNK;
838
839 #if 0 /* notyet */
840 /*
841 * Don't do this. It causes local disk writes when just
842 * reading the file and the overhead is deemed larger
843 * than the benefit.
844 */
845 /*
846 * Force modified metadata out to stable storage.
847 */
848 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
849 #endif
850
851 if (error) {
852 kmem_free(data, MAXPATHLEN + 1);
853 goto out;
854 }
855
856 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
857 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
858 MAXPATHLEN + 1);
859
860 if (name == NULL) {
861 /*
862 * Even though the conversion failed, we return
863 * something. We just don't translate it.
864 */
865 name = data;
866 }
867
868 resp->status = NFS3_OK;
869 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
870 resp->resok.data = name;
871
872 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
873 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
874 VN_RELE(vp);
875
876 if (name != data)
877 kmem_free(data, MAXPATHLEN + 1);
878
879 return;
880
881 out:
882 if (curthread->t_flag & T_WOULDBLOCK) {
883 curthread->t_flag &= ~T_WOULDBLOCK;
884 resp->status = NFS3ERR_JUKEBOX;
885 } else
886 resp->status = puterrno3(error);
887 out1:
888 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
889 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
890 if (vp != NULL)
891 VN_RELE(vp);
892 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
893 }
894
895 void *
rfs3_readlink_getfh(READLINK3args * args)896 rfs3_readlink_getfh(READLINK3args *args)
897 {
898
899 return (&args->symlink);
900 }
901
902 void
rfs3_readlink_free(READLINK3res * resp)903 rfs3_readlink_free(READLINK3res *resp)
904 {
905
906 if (resp->status == NFS3_OK)
907 kmem_free(resp->resok.data, MAXPATHLEN + 1);
908 }
909
910 /*
911 * Server routine to handle read
912 * May handle RDMA data as well as mblks
913 */
914 /* ARGSUSED */
915 void
rfs3_read(READ3args * args,READ3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)916 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
917 struct svc_req *req, cred_t *cr, bool_t ro)
918 {
919 int error;
920 vnode_t *vp;
921 struct vattr *vap;
922 struct vattr va;
923 struct iovec iov, *iovp = NULL;
924 int iovcnt;
925 struct uio uio;
926 u_offset_t offset;
927 mblk_t *mp = NULL;
928 int in_crit = 0;
929 int need_rwunlock = 0;
930 caller_context_t ct;
931 int rdma_used = 0;
932 int loaned_buffers;
933 struct uio *uiop;
934
935 vap = NULL;
936
937 vp = nfs3_fhtovp(&args->file, exi);
938
939 DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
940 cred_t *, cr, vnode_t *, vp, READ3args *, args);
941
942 if (vp == NULL) {
943 error = ESTALE;
944 goto out;
945 }
946
947 if (args->wlist) {
948 if (args->count > clist_len(args->wlist)) {
949 error = EINVAL;
950 goto out;
951 }
952 rdma_used = 1;
953 }
954
955 /* use loaned buffers for TCP */
956 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
957
958 if (is_system_labeled()) {
959 bslabel_t *clabel = req->rq_label;
960
961 ASSERT(clabel != NULL);
962 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
963 "got client label from request(1)", struct svc_req *, req);
964
965 if (!blequal(&l_admin_low->tsl_label, clabel)) {
966 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
967 exi)) {
968 resp->status = NFS3ERR_ACCES;
969 goto out1;
970 }
971 }
972 }
973
974 ct.cc_sysid = 0;
975 ct.cc_pid = 0;
976 ct.cc_caller_id = nfs3_srv_caller_id;
977 ct.cc_flags = CC_DONTBLOCK;
978
979 /*
980 * Enter the critical region before calling VOP_RWLOCK
981 * to avoid a deadlock with write requests.
982 */
983 if (nbl_need_check(vp)) {
984 nbl_start_crit(vp, RW_READER);
985 in_crit = 1;
986 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
987 NULL)) {
988 error = EACCES;
989 goto out;
990 }
991 }
992
993 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
994
995 /* check if a monitor detected a delegation conflict */
996 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
997 resp->status = NFS3ERR_JUKEBOX;
998 goto out1;
999 }
1000
1001 need_rwunlock = 1;
1002
1003 va.va_mask = AT_ALL;
1004 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1005
1006 /*
1007 * If we can't get the attributes, then we can't do the
1008 * right access checking. So, we'll fail the request.
1009 */
1010 if (error)
1011 goto out;
1012
1013 vap = &va;
1014
1015 if (vp->v_type != VREG) {
1016 resp->status = NFS3ERR_INVAL;
1017 goto out1;
1018 }
1019
1020 if (crgetuid(cr) != va.va_uid) {
1021 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1022 if (error) {
1023 if (curthread->t_flag & T_WOULDBLOCK)
1024 goto out;
1025 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1026 if (error)
1027 goto out;
1028 }
1029 }
1030
1031 if (MANDLOCK(vp, va.va_mode)) {
1032 resp->status = NFS3ERR_ACCES;
1033 goto out1;
1034 }
1035
1036 offset = args->offset;
1037 if (offset >= va.va_size) {
1038 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1039 if (in_crit)
1040 nbl_end_crit(vp);
1041 resp->status = NFS3_OK;
1042 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1043 resp->resok.count = 0;
1044 resp->resok.eof = TRUE;
1045 resp->resok.data.data_len = 0;
1046 resp->resok.data.data_val = NULL;
1047 resp->resok.data.mp = NULL;
1048 /* RDMA */
1049 resp->resok.wlist = args->wlist;
1050 resp->resok.wlist_len = resp->resok.count;
1051 if (resp->resok.wlist)
1052 clist_zero_len(resp->resok.wlist);
1053 goto done;
1054 }
1055
1056 if (args->count == 0) {
1057 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1058 if (in_crit)
1059 nbl_end_crit(vp);
1060 resp->status = NFS3_OK;
1061 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1062 resp->resok.count = 0;
1063 resp->resok.eof = FALSE;
1064 resp->resok.data.data_len = 0;
1065 resp->resok.data.data_val = NULL;
1066 resp->resok.data.mp = NULL;
1067 /* RDMA */
1068 resp->resok.wlist = args->wlist;
1069 resp->resok.wlist_len = resp->resok.count;
1070 if (resp->resok.wlist)
1071 clist_zero_len(resp->resok.wlist);
1072 goto done;
1073 }
1074
1075 /*
1076 * do not allocate memory more the max. allowed
1077 * transfer size
1078 */
1079 if (args->count > rfs3_tsize(req))
1080 args->count = rfs3_tsize(req);
1081
1082 if (loaned_buffers) {
1083 uiop = (uio_t *)rfs_setup_xuio(vp);
1084 ASSERT(uiop != NULL);
1085 uiop->uio_segflg = UIO_SYSSPACE;
1086 uiop->uio_loffset = args->offset;
1087 uiop->uio_resid = args->count;
1088
1089 /* Jump to do the read if successful */
1090 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1091 /*
1092 * Need to hold the vnode until after VOP_RETZCBUF()
1093 * is called.
1094 */
1095 VN_HOLD(vp);
1096 goto doio_read;
1097 }
1098
1099 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1100 uiop->uio_loffset, int, uiop->uio_resid);
1101
1102 uiop->uio_extflg = 0;
1103 /* failure to setup for zero copy */
1104 rfs_free_xuio((void *)uiop);
1105 loaned_buffers = 0;
1106 }
1107
1108 /*
1109 * If returning data via RDMA Write, then grab the chunk list.
1110 * If we aren't returning READ data w/RDMA_WRITE, then grab
1111 * a mblk.
1112 */
1113 if (rdma_used) {
1114 (void) rdma_get_wchunk(req, &iov, args->wlist);
1115 uio.uio_iov = &iov;
1116 uio.uio_iovcnt = 1;
1117 } else {
1118 /*
1119 * mp will contain the data to be sent out in the read reply.
1120 * For UDP, this will be freed after the reply has been sent
1121 * out by the driver. For TCP, it will be freed after the last
1122 * segment associated with the reply has been ACKed by the
1123 * client.
1124 */
1125 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1126 uio.uio_iov = iovp;
1127 uio.uio_iovcnt = iovcnt;
1128 }
1129
1130 uio.uio_segflg = UIO_SYSSPACE;
1131 uio.uio_extflg = UIO_COPY_CACHED;
1132 uio.uio_loffset = args->offset;
1133 uio.uio_resid = args->count;
1134 uiop = &uio;
1135
1136 doio_read:
1137 error = VOP_READ(vp, uiop, 0, cr, &ct);
1138
1139 if (error) {
1140 if (mp)
1141 freemsg(mp);
1142 /* check if a monitor detected a delegation conflict */
1143 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1144 resp->status = NFS3ERR_JUKEBOX;
1145 goto out1;
1146 }
1147 goto out;
1148 }
1149
1150 /* make mblk using zc buffers */
1151 if (loaned_buffers) {
1152 mp = uio_to_mblk(uiop);
1153 ASSERT(mp != NULL);
1154 }
1155
1156 va.va_mask = AT_ALL;
1157 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1158
1159 if (error)
1160 vap = NULL;
1161 else
1162 vap = &va;
1163
1164 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1165
1166 if (in_crit)
1167 nbl_end_crit(vp);
1168
1169 resp->status = NFS3_OK;
1170 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1171 resp->resok.count = args->count - uiop->uio_resid;
1172 if (!error && offset + resp->resok.count == va.va_size)
1173 resp->resok.eof = TRUE;
1174 else
1175 resp->resok.eof = FALSE;
1176 resp->resok.data.data_len = resp->resok.count;
1177
1178 if (mp)
1179 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1180
1181 resp->resok.data.mp = mp;
1182 resp->resok.size = (uint_t)args->count;
1183
1184 if (rdma_used) {
1185 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1186 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1187 resp->status = NFS3ERR_INVAL;
1188 }
1189 } else {
1190 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1191 (resp->resok).wlist = NULL;
1192 }
1193
1194 done:
1195 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1196 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1197
1198 VN_RELE(vp);
1199
1200 if (iovp != NULL)
1201 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1202
1203 return;
1204
1205 out:
1206 if (curthread->t_flag & T_WOULDBLOCK) {
1207 curthread->t_flag &= ~T_WOULDBLOCK;
1208 resp->status = NFS3ERR_JUKEBOX;
1209 } else
1210 resp->status = puterrno3(error);
1211 out1:
1212 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1213 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1214
1215 if (vp != NULL) {
1216 if (need_rwunlock)
1217 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1218 if (in_crit)
1219 nbl_end_crit(vp);
1220 VN_RELE(vp);
1221 }
1222 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1223
1224 if (iovp != NULL)
1225 kmem_free(iovp, iovcnt * sizeof (struct iovec));
1226 }
1227
1228 void
rfs3_read_free(READ3res * resp)1229 rfs3_read_free(READ3res *resp)
1230 {
1231 mblk_t *mp;
1232
1233 if (resp->status == NFS3_OK) {
1234 mp = resp->resok.data.mp;
1235 if (mp != NULL)
1236 freemsg(mp);
1237 }
1238 }
1239
1240 void *
rfs3_read_getfh(READ3args * args)1241 rfs3_read_getfh(READ3args *args)
1242 {
1243
1244 return (&args->file);
1245 }
1246
1247 #define MAX_IOVECS 12
1248
1249 #ifdef DEBUG
1250 static int rfs3_write_hits = 0;
1251 static int rfs3_write_misses = 0;
1252 #endif
1253
1254 void
rfs3_write(WRITE3args * args,WRITE3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1255 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1256 struct svc_req *req, cred_t *cr, bool_t ro)
1257 {
1258 int error;
1259 vnode_t *vp;
1260 struct vattr *bvap = NULL;
1261 struct vattr bva;
1262 struct vattr *avap = NULL;
1263 struct vattr ava;
1264 u_offset_t rlimit;
1265 struct uio uio;
1266 struct iovec iov[MAX_IOVECS];
1267 mblk_t *m;
1268 struct iovec *iovp;
1269 int iovcnt;
1270 int ioflag;
1271 cred_t *savecred;
1272 int in_crit = 0;
1273 int rwlock_ret = -1;
1274 caller_context_t ct;
1275
1276 vp = nfs3_fhtovp(&args->file, exi);
1277
1278 DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1279 cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1280
1281 if (vp == NULL) {
1282 error = ESTALE;
1283 goto err;
1284 }
1285
1286 if (is_system_labeled()) {
1287 bslabel_t *clabel = req->rq_label;
1288
1289 ASSERT(clabel != NULL);
1290 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1291 "got client label from request(1)", struct svc_req *, req);
1292
1293 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1294 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1295 exi)) {
1296 resp->status = NFS3ERR_ACCES;
1297 goto err1;
1298 }
1299 }
1300 }
1301
1302 ct.cc_sysid = 0;
1303 ct.cc_pid = 0;
1304 ct.cc_caller_id = nfs3_srv_caller_id;
1305 ct.cc_flags = CC_DONTBLOCK;
1306
1307 /*
1308 * We have to enter the critical region before calling VOP_RWLOCK
1309 * to avoid a deadlock with ufs.
1310 */
1311 if (nbl_need_check(vp)) {
1312 nbl_start_crit(vp, RW_READER);
1313 in_crit = 1;
1314 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1315 NULL)) {
1316 error = EACCES;
1317 goto err;
1318 }
1319 }
1320
1321 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1322
1323 /* check if a monitor detected a delegation conflict */
1324 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1325 resp->status = NFS3ERR_JUKEBOX;
1326 rwlock_ret = -1;
1327 goto err1;
1328 }
1329
1330
1331 bva.va_mask = AT_ALL;
1332 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1333
1334 /*
1335 * If we can't get the attributes, then we can't do the
1336 * right access checking. So, we'll fail the request.
1337 */
1338 if (error)
1339 goto err;
1340
1341 bvap = &bva;
1342 avap = bvap;
1343
1344 if (args->count != args->data.data_len) {
1345 resp->status = NFS3ERR_INVAL;
1346 goto err1;
1347 }
1348
1349 if (rdonly(ro, vp)) {
1350 resp->status = NFS3ERR_ROFS;
1351 goto err1;
1352 }
1353
1354 if (vp->v_type != VREG) {
1355 resp->status = NFS3ERR_INVAL;
1356 goto err1;
1357 }
1358
1359 if (crgetuid(cr) != bva.va_uid &&
1360 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1361 goto err;
1362
1363 if (MANDLOCK(vp, bva.va_mode)) {
1364 resp->status = NFS3ERR_ACCES;
1365 goto err1;
1366 }
1367
1368 if (args->count == 0) {
1369 resp->status = NFS3_OK;
1370 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1371 resp->resok.count = 0;
1372 resp->resok.committed = args->stable;
1373 resp->resok.verf = write3verf;
1374 goto out;
1375 }
1376
1377 if (args->mblk != NULL) {
1378 iovcnt = 0;
1379 for (m = args->mblk; m != NULL; m = m->b_cont)
1380 iovcnt++;
1381 if (iovcnt <= MAX_IOVECS) {
1382 #ifdef DEBUG
1383 rfs3_write_hits++;
1384 #endif
1385 iovp = iov;
1386 } else {
1387 #ifdef DEBUG
1388 rfs3_write_misses++;
1389 #endif
1390 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1391 }
1392 mblk_to_iov(args->mblk, iovcnt, iovp);
1393
1394 } else if (args->rlist != NULL) {
1395 iovcnt = 1;
1396 iovp = iov;
1397 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1398 iovp->iov_len = args->count;
1399 } else {
1400 iovcnt = 1;
1401 iovp = iov;
1402 iovp->iov_base = args->data.data_val;
1403 iovp->iov_len = args->count;
1404 }
1405
1406 uio.uio_iov = iovp;
1407 uio.uio_iovcnt = iovcnt;
1408
1409 uio.uio_segflg = UIO_SYSSPACE;
1410 uio.uio_extflg = UIO_COPY_DEFAULT;
1411 uio.uio_loffset = args->offset;
1412 uio.uio_resid = args->count;
1413 uio.uio_llimit = curproc->p_fsz_ctl;
1414 rlimit = uio.uio_llimit - args->offset;
1415 if (rlimit < (u_offset_t)uio.uio_resid)
1416 uio.uio_resid = (int)rlimit;
1417
1418 if (args->stable == UNSTABLE)
1419 ioflag = 0;
1420 else if (args->stable == FILE_SYNC)
1421 ioflag = FSYNC;
1422 else if (args->stable == DATA_SYNC)
1423 ioflag = FDSYNC;
1424 else {
1425 if (iovp != iov)
1426 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1427 resp->status = NFS3ERR_INVAL;
1428 goto err1;
1429 }
1430
1431 /*
1432 * We're changing creds because VM may fault and we need
1433 * the cred of the current thread to be used if quota
1434 * checking is enabled.
1435 */
1436 savecred = curthread->t_cred;
1437 curthread->t_cred = cr;
1438 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1439 curthread->t_cred = savecred;
1440
1441 if (iovp != iov)
1442 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1443
1444 /* check if a monitor detected a delegation conflict */
1445 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1446 resp->status = NFS3ERR_JUKEBOX;
1447 goto err1;
1448 }
1449
1450 ava.va_mask = AT_ALL;
1451 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1452
1453 if (error)
1454 goto err;
1455
1456 /*
1457 * If we were unable to get the V_WRITELOCK_TRUE, then we
1458 * may not have accurate after attrs, so check if
1459 * we have both attributes, they have a non-zero va_seq, and
1460 * va_seq has changed by exactly one,
1461 * if not, turn off the before attr.
1462 */
1463 if (rwlock_ret != V_WRITELOCK_TRUE) {
1464 if (bvap == NULL || avap == NULL ||
1465 bvap->va_seq == 0 || avap->va_seq == 0 ||
1466 avap->va_seq != (bvap->va_seq + 1)) {
1467 bvap = NULL;
1468 }
1469 }
1470
1471 resp->status = NFS3_OK;
1472 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1473 resp->resok.count = args->count - uio.uio_resid;
1474 resp->resok.committed = args->stable;
1475 resp->resok.verf = write3verf;
1476 goto out;
1477
1478 err:
1479 if (curthread->t_flag & T_WOULDBLOCK) {
1480 curthread->t_flag &= ~T_WOULDBLOCK;
1481 resp->status = NFS3ERR_JUKEBOX;
1482 } else
1483 resp->status = puterrno3(error);
1484 err1:
1485 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1486 out:
1487 DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1488 cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1489
1490 if (vp != NULL) {
1491 if (rwlock_ret != -1)
1492 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1493 if (in_crit)
1494 nbl_end_crit(vp);
1495 VN_RELE(vp);
1496 }
1497 }
1498
1499 void *
rfs3_write_getfh(WRITE3args * args)1500 rfs3_write_getfh(WRITE3args *args)
1501 {
1502
1503 return (&args->file);
1504 }
1505
1506 void
rfs3_create(CREATE3args * args,CREATE3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1507 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1508 struct svc_req *req, cred_t *cr, bool_t ro)
1509 {
1510 int error;
1511 int in_crit = 0;
1512 vnode_t *vp;
1513 vnode_t *tvp = NULL;
1514 vnode_t *dvp;
1515 struct vattr *vap;
1516 struct vattr va;
1517 struct vattr *dbvap;
1518 struct vattr dbva;
1519 struct vattr *davap;
1520 struct vattr dava;
1521 enum vcexcl excl;
1522 nfstime3 *mtime;
1523 len_t reqsize;
1524 bool_t trunc;
1525 struct sockaddr *ca;
1526 char *name = NULL;
1527
1528 dbvap = NULL;
1529 davap = NULL;
1530
1531 dvp = nfs3_fhtovp(&args->where.dir, exi);
1532
1533 DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1534 cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1535
1536 if (dvp == NULL) {
1537 error = ESTALE;
1538 goto out;
1539 }
1540
1541 dbva.va_mask = AT_ALL;
1542 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1543 davap = dbvap;
1544
1545 if (args->where.name == nfs3nametoolong) {
1546 resp->status = NFS3ERR_NAMETOOLONG;
1547 goto out1;
1548 }
1549
1550 if (args->where.name == NULL || *(args->where.name) == '\0') {
1551 resp->status = NFS3ERR_ACCES;
1552 goto out1;
1553 }
1554
1555 if (rdonly(ro, dvp)) {
1556 resp->status = NFS3ERR_ROFS;
1557 goto out1;
1558 }
1559
1560 if (is_system_labeled()) {
1561 bslabel_t *clabel = req->rq_label;
1562
1563 ASSERT(clabel != NULL);
1564 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1565 "got client label from request(1)", struct svc_req *, req);
1566
1567 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1568 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1569 exi)) {
1570 resp->status = NFS3ERR_ACCES;
1571 goto out1;
1572 }
1573 }
1574 }
1575
1576 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1577 name = nfscmd_convname(ca, exi, args->where.name,
1578 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1579
1580 if (name == NULL) {
1581 /* This is really a Solaris EILSEQ */
1582 resp->status = NFS3ERR_INVAL;
1583 goto out1;
1584 }
1585
1586 if (args->how.mode == EXCLUSIVE) {
1587 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1588 va.va_type = VREG;
1589 va.va_mode = (mode_t)0;
1590 /*
1591 * Ensure no time overflows and that types match
1592 */
1593 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1594 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1595 va.va_mtime.tv_nsec = mtime->nseconds;
1596 excl = EXCL;
1597 } else {
1598 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1599 &va);
1600 if (error)
1601 goto out;
1602 va.va_mask |= AT_TYPE;
1603 va.va_type = VREG;
1604 if (args->how.mode == GUARDED)
1605 excl = EXCL;
1606 else {
1607 excl = NONEXCL;
1608
1609 /*
1610 * During creation of file in non-exclusive mode
1611 * if size of file is being set then make sure
1612 * that if the file already exists that no conflicting
1613 * non-blocking mandatory locks exists in the region
1614 * being modified. If there are conflicting locks fail
1615 * the operation with EACCES.
1616 */
1617 if (va.va_mask & AT_SIZE) {
1618 struct vattr tva;
1619
1620 /*
1621 * Does file already exist?
1622 */
1623 error = VOP_LOOKUP(dvp, name, &tvp,
1624 NULL, 0, NULL, cr, NULL, NULL, NULL);
1625
1626 /*
1627 * Check to see if the file has been delegated
1628 * to a v4 client. If so, then begin recall of
1629 * the delegation and return JUKEBOX to allow
1630 * the client to retrasmit its request.
1631 */
1632
1633 trunc = va.va_size == 0;
1634 if (!error &&
1635 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1636 resp->status = NFS3ERR_JUKEBOX;
1637 goto out1;
1638 }
1639
1640 /*
1641 * Check for NBMAND lock conflicts
1642 */
1643 if (!error && nbl_need_check(tvp)) {
1644 u_offset_t offset;
1645 ssize_t len;
1646
1647 nbl_start_crit(tvp, RW_READER);
1648 in_crit = 1;
1649
1650 tva.va_mask = AT_SIZE;
1651 error = VOP_GETATTR(tvp, &tva, 0, cr,
1652 NULL);
1653 /*
1654 * Can't check for conflicts, so return
1655 * error.
1656 */
1657 if (error)
1658 goto out;
1659
1660 offset = tva.va_size < va.va_size ?
1661 tva.va_size : va.va_size;
1662 len = tva.va_size < va.va_size ?
1663 va.va_size - tva.va_size :
1664 tva.va_size - va.va_size;
1665 if (nbl_conflict(tvp, NBL_WRITE,
1666 offset, len, 0, NULL)) {
1667 error = EACCES;
1668 goto out;
1669 }
1670 } else if (tvp) {
1671 VN_RELE(tvp);
1672 tvp = NULL;
1673 }
1674 }
1675 }
1676 if (va.va_mask & AT_SIZE)
1677 reqsize = va.va_size;
1678 }
1679
1680 /*
1681 * Must specify the mode.
1682 */
1683 if (!(va.va_mask & AT_MODE)) {
1684 resp->status = NFS3ERR_INVAL;
1685 goto out1;
1686 }
1687
1688 /*
1689 * If the filesystem is exported with nosuid, then mask off
1690 * the setuid and setgid bits.
1691 */
1692 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1693 va.va_mode &= ~(VSUID | VSGID);
1694
1695 tryagain:
1696 /*
1697 * The file open mode used is VWRITE. If the client needs
1698 * some other semantic, then it should do the access checking
1699 * itself. It would have been nice to have the file open mode
1700 * passed as part of the arguments.
1701 */
1702 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1703 &vp, cr, 0, NULL, NULL);
1704
1705 dava.va_mask = AT_ALL;
1706 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1707
1708 if (error) {
1709 /*
1710 * If we got something other than file already exists
1711 * then just return this error. Otherwise, we got
1712 * EEXIST. If we were doing a GUARDED create, then
1713 * just return this error. Otherwise, we need to
1714 * make sure that this wasn't a duplicate of an
1715 * exclusive create request.
1716 *
1717 * The assumption is made that a non-exclusive create
1718 * request will never return EEXIST.
1719 */
1720 if (error != EEXIST || args->how.mode == GUARDED)
1721 goto out;
1722 /*
1723 * Lookup the file so that we can get a vnode for it.
1724 */
1725 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1726 NULL, cr, NULL, NULL, NULL);
1727 if (error) {
1728 /*
1729 * We couldn't find the file that we thought that
1730 * we just created. So, we'll just try creating
1731 * it again.
1732 */
1733 if (error == ENOENT)
1734 goto tryagain;
1735 goto out;
1736 }
1737
1738 /*
1739 * If the file is delegated to a v4 client, go ahead
1740 * and initiate recall, this create is a hint that a
1741 * conflicting v3 open has occurred.
1742 */
1743
1744 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1745 VN_RELE(vp);
1746 resp->status = NFS3ERR_JUKEBOX;
1747 goto out1;
1748 }
1749
1750 va.va_mask = AT_ALL;
1751 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1752
1753 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1754 /* % with INT32_MAX to prevent overflows */
1755 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1756 vap->va_mtime.tv_sec !=
1757 (mtime->seconds % INT32_MAX) ||
1758 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1759 VN_RELE(vp);
1760 error = EEXIST;
1761 goto out;
1762 }
1763 } else {
1764
1765 if ((args->how.mode == UNCHECKED ||
1766 args->how.mode == GUARDED) &&
1767 args->how.createhow3_u.obj_attributes.size.set_it &&
1768 va.va_size == 0)
1769 trunc = TRUE;
1770 else
1771 trunc = FALSE;
1772
1773 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1774 VN_RELE(vp);
1775 resp->status = NFS3ERR_JUKEBOX;
1776 goto out1;
1777 }
1778
1779 va.va_mask = AT_ALL;
1780 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1781
1782 /*
1783 * We need to check to make sure that the file got
1784 * created to the indicated size. If not, we do a
1785 * setattr to try to change the size, but we don't
1786 * try too hard. This shouldn't a problem as most
1787 * clients will only specifiy a size of zero which
1788 * local file systems handle. However, even if
1789 * the client does specify a non-zero size, it can
1790 * still recover by checking the size of the file
1791 * after it has created it and then issue a setattr
1792 * request of its own to set the size of the file.
1793 */
1794 if (vap != NULL &&
1795 (args->how.mode == UNCHECKED ||
1796 args->how.mode == GUARDED) &&
1797 args->how.createhow3_u.obj_attributes.size.set_it &&
1798 vap->va_size != reqsize) {
1799 va.va_mask = AT_SIZE;
1800 va.va_size = reqsize;
1801 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1802 va.va_mask = AT_ALL;
1803 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1804 }
1805 }
1806
1807 if (name != args->where.name)
1808 kmem_free(name, MAXPATHLEN + 1);
1809
1810 error = makefh3(&resp->resok.obj.handle, vp, exi);
1811 if (error)
1812 resp->resok.obj.handle_follows = FALSE;
1813 else
1814 resp->resok.obj.handle_follows = TRUE;
1815
1816 /*
1817 * Force modified data and metadata out to stable storage.
1818 */
1819 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1820 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1821
1822 VN_RELE(vp);
1823 if (tvp != NULL) {
1824 if (in_crit)
1825 nbl_end_crit(tvp);
1826 VN_RELE(tvp);
1827 }
1828
1829 resp->status = NFS3_OK;
1830 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1831 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1832
1833 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1834 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1835
1836 VN_RELE(dvp);
1837 return;
1838
1839 out:
1840 if (curthread->t_flag & T_WOULDBLOCK) {
1841 curthread->t_flag &= ~T_WOULDBLOCK;
1842 resp->status = NFS3ERR_JUKEBOX;
1843 } else
1844 resp->status = puterrno3(error);
1845 out1:
1846 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1847 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1848
1849 if (name != NULL && name != args->where.name)
1850 kmem_free(name, MAXPATHLEN + 1);
1851
1852 if (tvp != NULL) {
1853 if (in_crit)
1854 nbl_end_crit(tvp);
1855 VN_RELE(tvp);
1856 }
1857 if (dvp != NULL)
1858 VN_RELE(dvp);
1859 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1860 }
1861
1862 void *
rfs3_create_getfh(CREATE3args * args)1863 rfs3_create_getfh(CREATE3args *args)
1864 {
1865
1866 return (&args->where.dir);
1867 }
1868
1869 void
rfs3_mkdir(MKDIR3args * args,MKDIR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1870 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1871 struct svc_req *req, cred_t *cr, bool_t ro)
1872 {
1873 int error;
1874 vnode_t *vp = NULL;
1875 vnode_t *dvp;
1876 struct vattr *vap;
1877 struct vattr va;
1878 struct vattr *dbvap;
1879 struct vattr dbva;
1880 struct vattr *davap;
1881 struct vattr dava;
1882 struct sockaddr *ca;
1883 char *name = NULL;
1884
1885 dbvap = NULL;
1886 davap = NULL;
1887
1888 dvp = nfs3_fhtovp(&args->where.dir, exi);
1889
1890 DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1891 cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1892
1893 if (dvp == NULL) {
1894 error = ESTALE;
1895 goto out;
1896 }
1897
1898 dbva.va_mask = AT_ALL;
1899 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1900 davap = dbvap;
1901
1902 if (args->where.name == nfs3nametoolong) {
1903 resp->status = NFS3ERR_NAMETOOLONG;
1904 goto out1;
1905 }
1906
1907 if (args->where.name == NULL || *(args->where.name) == '\0') {
1908 resp->status = NFS3ERR_ACCES;
1909 goto out1;
1910 }
1911
1912 if (rdonly(ro, dvp)) {
1913 resp->status = NFS3ERR_ROFS;
1914 goto out1;
1915 }
1916
1917 if (is_system_labeled()) {
1918 bslabel_t *clabel = req->rq_label;
1919
1920 ASSERT(clabel != NULL);
1921 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1922 "got client label from request(1)", struct svc_req *, req);
1923
1924 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1925 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1926 exi)) {
1927 resp->status = NFS3ERR_ACCES;
1928 goto out1;
1929 }
1930 }
1931 }
1932
1933 error = sattr3_to_vattr(&args->attributes, &va);
1934 if (error)
1935 goto out;
1936
1937 if (!(va.va_mask & AT_MODE)) {
1938 resp->status = NFS3ERR_INVAL;
1939 goto out1;
1940 }
1941
1942 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1943 name = nfscmd_convname(ca, exi, args->where.name,
1944 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1945
1946 if (name == NULL) {
1947 resp->status = NFS3ERR_INVAL;
1948 goto out1;
1949 }
1950
1951 va.va_mask |= AT_TYPE;
1952 va.va_type = VDIR;
1953
1954 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1955
1956 if (name != args->where.name)
1957 kmem_free(name, MAXPATHLEN + 1);
1958
1959 dava.va_mask = AT_ALL;
1960 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1961
1962 /*
1963 * Force modified data and metadata out to stable storage.
1964 */
1965 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1966
1967 if (error)
1968 goto out;
1969
1970 error = makefh3(&resp->resok.obj.handle, vp, exi);
1971 if (error)
1972 resp->resok.obj.handle_follows = FALSE;
1973 else
1974 resp->resok.obj.handle_follows = TRUE;
1975
1976 va.va_mask = AT_ALL;
1977 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1978
1979 /*
1980 * Force modified data and metadata out to stable storage.
1981 */
1982 (void) VOP_FSYNC(vp, 0, cr, NULL);
1983
1984 VN_RELE(vp);
1985
1986 resp->status = NFS3_OK;
1987 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1988 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1989
1990 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1991 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1992 VN_RELE(dvp);
1993
1994 return;
1995
1996 out:
1997 if (curthread->t_flag & T_WOULDBLOCK) {
1998 curthread->t_flag &= ~T_WOULDBLOCK;
1999 resp->status = NFS3ERR_JUKEBOX;
2000 } else
2001 resp->status = puterrno3(error);
2002 out1:
2003 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2004 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2005 if (dvp != NULL)
2006 VN_RELE(dvp);
2007 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2008 }
2009
2010 void *
rfs3_mkdir_getfh(MKDIR3args * args)2011 rfs3_mkdir_getfh(MKDIR3args *args)
2012 {
2013
2014 return (&args->where.dir);
2015 }
2016
2017 void
rfs3_symlink(SYMLINK3args * args,SYMLINK3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2018 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2019 struct svc_req *req, cred_t *cr, bool_t ro)
2020 {
2021 int error;
2022 vnode_t *vp;
2023 vnode_t *dvp;
2024 struct vattr *vap;
2025 struct vattr va;
2026 struct vattr *dbvap;
2027 struct vattr dbva;
2028 struct vattr *davap;
2029 struct vattr dava;
2030 struct sockaddr *ca;
2031 char *name = NULL;
2032 char *symdata = NULL;
2033
2034 dbvap = NULL;
2035 davap = NULL;
2036
2037 dvp = nfs3_fhtovp(&args->where.dir, exi);
2038
2039 DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2040 cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2041
2042 if (dvp == NULL) {
2043 error = ESTALE;
2044 goto err;
2045 }
2046
2047 dbva.va_mask = AT_ALL;
2048 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2049 davap = dbvap;
2050
2051 if (args->where.name == nfs3nametoolong) {
2052 resp->status = NFS3ERR_NAMETOOLONG;
2053 goto err1;
2054 }
2055
2056 if (args->where.name == NULL || *(args->where.name) == '\0') {
2057 resp->status = NFS3ERR_ACCES;
2058 goto err1;
2059 }
2060
2061 if (rdonly(ro, dvp)) {
2062 resp->status = NFS3ERR_ROFS;
2063 goto err1;
2064 }
2065
2066 if (is_system_labeled()) {
2067 bslabel_t *clabel = req->rq_label;
2068
2069 ASSERT(clabel != NULL);
2070 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2071 "got client label from request(1)", struct svc_req *, req);
2072
2073 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2074 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2075 exi)) {
2076 resp->status = NFS3ERR_ACCES;
2077 goto err1;
2078 }
2079 }
2080 }
2081
2082 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2083 if (error)
2084 goto err;
2085
2086 if (!(va.va_mask & AT_MODE)) {
2087 resp->status = NFS3ERR_INVAL;
2088 goto err1;
2089 }
2090
2091 if (args->symlink.symlink_data == nfs3nametoolong) {
2092 resp->status = NFS3ERR_NAMETOOLONG;
2093 goto err1;
2094 }
2095
2096 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2097 name = nfscmd_convname(ca, exi, args->where.name,
2098 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2099
2100 if (name == NULL) {
2101 /* This is really a Solaris EILSEQ */
2102 resp->status = NFS3ERR_INVAL;
2103 goto err1;
2104 }
2105
2106 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2107 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2108 if (symdata == NULL) {
2109 /* This is really a Solaris EILSEQ */
2110 resp->status = NFS3ERR_INVAL;
2111 goto err1;
2112 }
2113
2114
2115 va.va_mask |= AT_TYPE;
2116 va.va_type = VLNK;
2117
2118 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2119
2120 dava.va_mask = AT_ALL;
2121 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2122
2123 if (error)
2124 goto err;
2125
2126 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2127 NULL, NULL, NULL);
2128
2129 /*
2130 * Force modified data and metadata out to stable storage.
2131 */
2132 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2133
2134
2135 resp->status = NFS3_OK;
2136 if (error) {
2137 resp->resok.obj.handle_follows = FALSE;
2138 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2139 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2140 goto out;
2141 }
2142
2143 error = makefh3(&resp->resok.obj.handle, vp, exi);
2144 if (error)
2145 resp->resok.obj.handle_follows = FALSE;
2146 else
2147 resp->resok.obj.handle_follows = TRUE;
2148
2149 va.va_mask = AT_ALL;
2150 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2151
2152 /*
2153 * Force modified data and metadata out to stable storage.
2154 */
2155 (void) VOP_FSYNC(vp, 0, cr, NULL);
2156
2157 VN_RELE(vp);
2158
2159 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2160 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2161 goto out;
2162
2163 err:
2164 if (curthread->t_flag & T_WOULDBLOCK) {
2165 curthread->t_flag &= ~T_WOULDBLOCK;
2166 resp->status = NFS3ERR_JUKEBOX;
2167 } else
2168 resp->status = puterrno3(error);
2169 err1:
2170 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2171 out:
2172 if (name != NULL && name != args->where.name)
2173 kmem_free(name, MAXPATHLEN + 1);
2174 if (symdata != NULL && symdata != args->symlink.symlink_data)
2175 kmem_free(symdata, MAXPATHLEN + 1);
2176
2177 DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2178 cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2179
2180 if (dvp != NULL)
2181 VN_RELE(dvp);
2182 }
2183
2184 void *
rfs3_symlink_getfh(SYMLINK3args * args)2185 rfs3_symlink_getfh(SYMLINK3args *args)
2186 {
2187
2188 return (&args->where.dir);
2189 }
2190
2191 void
rfs3_mknod(MKNOD3args * args,MKNOD3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2192 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2193 struct svc_req *req, cred_t *cr, bool_t ro)
2194 {
2195 int error;
2196 vnode_t *vp;
2197 vnode_t *realvp;
2198 vnode_t *dvp;
2199 struct vattr *vap;
2200 struct vattr va;
2201 struct vattr *dbvap;
2202 struct vattr dbva;
2203 struct vattr *davap;
2204 struct vattr dava;
2205 int mode;
2206 enum vcexcl excl;
2207 struct sockaddr *ca;
2208 char *name = NULL;
2209
2210 dbvap = NULL;
2211 davap = NULL;
2212
2213 dvp = nfs3_fhtovp(&args->where.dir, exi);
2214
2215 DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2216 cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2217
2218 if (dvp == NULL) {
2219 error = ESTALE;
2220 goto out;
2221 }
2222
2223 dbva.va_mask = AT_ALL;
2224 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2225 davap = dbvap;
2226
2227 if (args->where.name == nfs3nametoolong) {
2228 resp->status = NFS3ERR_NAMETOOLONG;
2229 goto out1;
2230 }
2231
2232 if (args->where.name == NULL || *(args->where.name) == '\0') {
2233 resp->status = NFS3ERR_ACCES;
2234 goto out1;
2235 }
2236
2237 if (rdonly(ro, dvp)) {
2238 resp->status = NFS3ERR_ROFS;
2239 goto out1;
2240 }
2241
2242 if (is_system_labeled()) {
2243 bslabel_t *clabel = req->rq_label;
2244
2245 ASSERT(clabel != NULL);
2246 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2247 "got client label from request(1)", struct svc_req *, req);
2248
2249 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2250 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2251 exi)) {
2252 resp->status = NFS3ERR_ACCES;
2253 goto out1;
2254 }
2255 }
2256 }
2257
2258 switch (args->what.type) {
2259 case NF3CHR:
2260 case NF3BLK:
2261 error = sattr3_to_vattr(
2262 &args->what.mknoddata3_u.device.dev_attributes, &va);
2263 if (error)
2264 goto out;
2265 if (secpolicy_sys_devices(cr) != 0) {
2266 resp->status = NFS3ERR_PERM;
2267 goto out1;
2268 }
2269 if (args->what.type == NF3CHR)
2270 va.va_type = VCHR;
2271 else
2272 va.va_type = VBLK;
2273 va.va_rdev = makedevice(
2274 args->what.mknoddata3_u.device.spec.specdata1,
2275 args->what.mknoddata3_u.device.spec.specdata2);
2276 va.va_mask |= AT_TYPE | AT_RDEV;
2277 break;
2278 case NF3SOCK:
2279 error = sattr3_to_vattr(
2280 &args->what.mknoddata3_u.pipe_attributes, &va);
2281 if (error)
2282 goto out;
2283 va.va_type = VSOCK;
2284 va.va_mask |= AT_TYPE;
2285 break;
2286 case NF3FIFO:
2287 error = sattr3_to_vattr(
2288 &args->what.mknoddata3_u.pipe_attributes, &va);
2289 if (error)
2290 goto out;
2291 va.va_type = VFIFO;
2292 va.va_mask |= AT_TYPE;
2293 break;
2294 default:
2295 resp->status = NFS3ERR_BADTYPE;
2296 goto out1;
2297 }
2298
2299 /*
2300 * Must specify the mode.
2301 */
2302 if (!(va.va_mask & AT_MODE)) {
2303 resp->status = NFS3ERR_INVAL;
2304 goto out1;
2305 }
2306
2307 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2308 name = nfscmd_convname(ca, exi, args->where.name,
2309 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2310
2311 if (name == NULL) {
2312 resp->status = NFS3ERR_INVAL;
2313 goto out1;
2314 }
2315
2316 excl = EXCL;
2317
2318 mode = 0;
2319
2320 error = VOP_CREATE(dvp, name, &va, excl, mode,
2321 &vp, cr, 0, NULL, NULL);
2322
2323 if (name != args->where.name)
2324 kmem_free(name, MAXPATHLEN + 1);
2325
2326 dava.va_mask = AT_ALL;
2327 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2328
2329 /*
2330 * Force modified data and metadata out to stable storage.
2331 */
2332 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2333
2334 if (error)
2335 goto out;
2336
2337 resp->status = NFS3_OK;
2338
2339 error = makefh3(&resp->resok.obj.handle, vp, exi);
2340 if (error)
2341 resp->resok.obj.handle_follows = FALSE;
2342 else
2343 resp->resok.obj.handle_follows = TRUE;
2344
2345 va.va_mask = AT_ALL;
2346 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2347
2348 /*
2349 * Force modified metadata out to stable storage.
2350 *
2351 * if a underlying vp exists, pass it to VOP_FSYNC
2352 */
2353 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2354 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2355 else
2356 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2357
2358 VN_RELE(vp);
2359
2360 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2361 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2362 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2363 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2364 VN_RELE(dvp);
2365 return;
2366
2367 out:
2368 if (curthread->t_flag & T_WOULDBLOCK) {
2369 curthread->t_flag &= ~T_WOULDBLOCK;
2370 resp->status = NFS3ERR_JUKEBOX;
2371 } else
2372 resp->status = puterrno3(error);
2373 out1:
2374 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2375 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2376 if (dvp != NULL)
2377 VN_RELE(dvp);
2378 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2379 }
2380
2381 void *
rfs3_mknod_getfh(MKNOD3args * args)2382 rfs3_mknod_getfh(MKNOD3args *args)
2383 {
2384
2385 return (&args->where.dir);
2386 }
2387
2388 void
rfs3_remove(REMOVE3args * args,REMOVE3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2389 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2390 struct svc_req *req, cred_t *cr, bool_t ro)
2391 {
2392 int error = 0;
2393 vnode_t *vp;
2394 struct vattr *bvap;
2395 struct vattr bva;
2396 struct vattr *avap;
2397 struct vattr ava;
2398 vnode_t *targvp = NULL;
2399 struct sockaddr *ca;
2400 char *name = NULL;
2401
2402 bvap = NULL;
2403 avap = NULL;
2404
2405 vp = nfs3_fhtovp(&args->object.dir, exi);
2406
2407 DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2408 cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2409
2410 if (vp == NULL) {
2411 error = ESTALE;
2412 goto err;
2413 }
2414
2415 bva.va_mask = AT_ALL;
2416 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2417 avap = bvap;
2418
2419 if (vp->v_type != VDIR) {
2420 resp->status = NFS3ERR_NOTDIR;
2421 goto err1;
2422 }
2423
2424 if (args->object.name == nfs3nametoolong) {
2425 resp->status = NFS3ERR_NAMETOOLONG;
2426 goto err1;
2427 }
2428
2429 if (args->object.name == NULL || *(args->object.name) == '\0') {
2430 resp->status = NFS3ERR_ACCES;
2431 goto err1;
2432 }
2433
2434 if (rdonly(ro, vp)) {
2435 resp->status = NFS3ERR_ROFS;
2436 goto err1;
2437 }
2438
2439 if (is_system_labeled()) {
2440 bslabel_t *clabel = req->rq_label;
2441
2442 ASSERT(clabel != NULL);
2443 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2444 "got client label from request(1)", struct svc_req *, req);
2445
2446 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2447 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2448 exi)) {
2449 resp->status = NFS3ERR_ACCES;
2450 goto err1;
2451 }
2452 }
2453 }
2454
2455 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2456 name = nfscmd_convname(ca, exi, args->object.name,
2457 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2458
2459 if (name == NULL) {
2460 resp->status = NFS3ERR_INVAL;
2461 goto err1;
2462 }
2463
2464 /*
2465 * Check for a conflict with a non-blocking mandatory share
2466 * reservation and V4 delegations
2467 */
2468 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2469 NULL, cr, NULL, NULL, NULL);
2470 if (error != 0)
2471 goto err;
2472
2473 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2474 resp->status = NFS3ERR_JUKEBOX;
2475 goto err1;
2476 }
2477
2478 if (!nbl_need_check(targvp)) {
2479 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2480 } else {
2481 nbl_start_crit(targvp, RW_READER);
2482 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2483 error = EACCES;
2484 } else {
2485 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2486 }
2487 nbl_end_crit(targvp);
2488 }
2489 VN_RELE(targvp);
2490 targvp = NULL;
2491
2492 ava.va_mask = AT_ALL;
2493 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2494
2495 /*
2496 * Force modified data and metadata out to stable storage.
2497 */
2498 (void) VOP_FSYNC(vp, 0, cr, NULL);
2499
2500 if (error)
2501 goto err;
2502
2503 resp->status = NFS3_OK;
2504 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2505 goto out;
2506
2507 err:
2508 if (curthread->t_flag & T_WOULDBLOCK) {
2509 curthread->t_flag &= ~T_WOULDBLOCK;
2510 resp->status = NFS3ERR_JUKEBOX;
2511 } else
2512 resp->status = puterrno3(error);
2513 err1:
2514 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2515 out:
2516 DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2517 cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2518
2519 if (name != NULL && name != args->object.name)
2520 kmem_free(name, MAXPATHLEN + 1);
2521
2522 if (vp != NULL)
2523 VN_RELE(vp);
2524 }
2525
2526 void *
rfs3_remove_getfh(REMOVE3args * args)2527 rfs3_remove_getfh(REMOVE3args *args)
2528 {
2529
2530 return (&args->object.dir);
2531 }
2532
2533 void
rfs3_rmdir(RMDIR3args * args,RMDIR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2534 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2535 struct svc_req *req, cred_t *cr, bool_t ro)
2536 {
2537 int error;
2538 vnode_t *vp;
2539 struct vattr *bvap;
2540 struct vattr bva;
2541 struct vattr *avap;
2542 struct vattr ava;
2543 struct sockaddr *ca;
2544 char *name = NULL;
2545
2546 bvap = NULL;
2547 avap = NULL;
2548
2549 vp = nfs3_fhtovp(&args->object.dir, exi);
2550
2551 DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2552 cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2553
2554 if (vp == NULL) {
2555 error = ESTALE;
2556 goto err;
2557 }
2558
2559 bva.va_mask = AT_ALL;
2560 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2561 avap = bvap;
2562
2563 if (vp->v_type != VDIR) {
2564 resp->status = NFS3ERR_NOTDIR;
2565 goto err1;
2566 }
2567
2568 if (args->object.name == nfs3nametoolong) {
2569 resp->status = NFS3ERR_NAMETOOLONG;
2570 goto err1;
2571 }
2572
2573 if (args->object.name == NULL || *(args->object.name) == '\0') {
2574 resp->status = NFS3ERR_ACCES;
2575 goto err1;
2576 }
2577
2578 if (rdonly(ro, vp)) {
2579 resp->status = NFS3ERR_ROFS;
2580 goto err1;
2581 }
2582
2583 if (is_system_labeled()) {
2584 bslabel_t *clabel = req->rq_label;
2585
2586 ASSERT(clabel != NULL);
2587 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2588 "got client label from request(1)", struct svc_req *, req);
2589
2590 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2591 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2592 exi)) {
2593 resp->status = NFS3ERR_ACCES;
2594 goto err1;
2595 }
2596 }
2597 }
2598
2599 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2600 name = nfscmd_convname(ca, exi, args->object.name,
2601 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2602
2603 if (name == NULL) {
2604 resp->status = NFS3ERR_INVAL;
2605 goto err1;
2606 }
2607
2608 error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2609
2610 if (name != args->object.name)
2611 kmem_free(name, MAXPATHLEN + 1);
2612
2613 ava.va_mask = AT_ALL;
2614 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2615
2616 /*
2617 * Force modified data and metadata out to stable storage.
2618 */
2619 (void) VOP_FSYNC(vp, 0, cr, NULL);
2620
2621 if (error) {
2622 /*
2623 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2624 * if the directory is not empty. A System V NFS server
2625 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2626 * over the wire.
2627 */
2628 if (error == EEXIST)
2629 error = ENOTEMPTY;
2630 goto err;
2631 }
2632
2633 resp->status = NFS3_OK;
2634 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2635 goto out;
2636
2637 err:
2638 if (curthread->t_flag & T_WOULDBLOCK) {
2639 curthread->t_flag &= ~T_WOULDBLOCK;
2640 resp->status = NFS3ERR_JUKEBOX;
2641 } else
2642 resp->status = puterrno3(error);
2643 err1:
2644 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2645 out:
2646 DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2647 cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2648 if (vp != NULL)
2649 VN_RELE(vp);
2650
2651 }
2652
2653 void *
rfs3_rmdir_getfh(RMDIR3args * args)2654 rfs3_rmdir_getfh(RMDIR3args *args)
2655 {
2656
2657 return (&args->object.dir);
2658 }
2659
2660 void
rfs3_rename(RENAME3args * args,RENAME3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2661 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2662 struct svc_req *req, cred_t *cr, bool_t ro)
2663 {
2664 int error = 0;
2665 vnode_t *fvp;
2666 vnode_t *tvp;
2667 vnode_t *targvp;
2668 struct vattr *fbvap;
2669 struct vattr fbva;
2670 struct vattr *favap;
2671 struct vattr fava;
2672 struct vattr *tbvap;
2673 struct vattr tbva;
2674 struct vattr *tavap;
2675 struct vattr tava;
2676 nfs_fh3 *fh3;
2677 struct exportinfo *to_exi;
2678 vnode_t *srcvp = NULL;
2679 bslabel_t *clabel;
2680 struct sockaddr *ca;
2681 char *name = NULL;
2682 char *toname = NULL;
2683
2684 fbvap = NULL;
2685 favap = NULL;
2686 tbvap = NULL;
2687 tavap = NULL;
2688 tvp = NULL;
2689
2690 fvp = nfs3_fhtovp(&args->from.dir, exi);
2691
2692 DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2693 cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2694
2695 if (fvp == NULL) {
2696 error = ESTALE;
2697 goto err;
2698 }
2699
2700 if (is_system_labeled()) {
2701 clabel = req->rq_label;
2702 ASSERT(clabel != NULL);
2703 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2704 "got client label from request(1)", struct svc_req *, req);
2705
2706 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2707 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2708 exi)) {
2709 resp->status = NFS3ERR_ACCES;
2710 goto err1;
2711 }
2712 }
2713 }
2714
2715 fbva.va_mask = AT_ALL;
2716 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2717 favap = fbvap;
2718
2719 fh3 = &args->to.dir;
2720 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3), NULL);
2721 if (to_exi == NULL) {
2722 resp->status = NFS3ERR_ACCES;
2723 goto err1;
2724 }
2725 exi_rele(to_exi);
2726
2727 if (to_exi != exi) {
2728 resp->status = NFS3ERR_XDEV;
2729 goto err1;
2730 }
2731
2732 tvp = nfs3_fhtovp(&args->to.dir, exi);
2733 if (tvp == NULL) {
2734 error = ESTALE;
2735 goto err;
2736 }
2737
2738 tbva.va_mask = AT_ALL;
2739 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2740 tavap = tbvap;
2741
2742 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2743 resp->status = NFS3ERR_NOTDIR;
2744 goto err1;
2745 }
2746
2747 if (args->from.name == nfs3nametoolong ||
2748 args->to.name == nfs3nametoolong) {
2749 resp->status = NFS3ERR_NAMETOOLONG;
2750 goto err1;
2751 }
2752 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2753 args->to.name == NULL || *(args->to.name) == '\0') {
2754 resp->status = NFS3ERR_ACCES;
2755 goto err1;
2756 }
2757
2758 if (rdonly(ro, tvp)) {
2759 resp->status = NFS3ERR_ROFS;
2760 goto err1;
2761 }
2762
2763 if (is_system_labeled()) {
2764 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2765 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2766 exi)) {
2767 resp->status = NFS3ERR_ACCES;
2768 goto err1;
2769 }
2770 }
2771 }
2772
2773 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2774 name = nfscmd_convname(ca, exi, args->from.name,
2775 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2776
2777 if (name == NULL) {
2778 resp->status = NFS3ERR_INVAL;
2779 goto err1;
2780 }
2781
2782 toname = nfscmd_convname(ca, exi, args->to.name,
2783 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2784
2785 if (toname == NULL) {
2786 resp->status = NFS3ERR_INVAL;
2787 goto err1;
2788 }
2789
2790 /*
2791 * Check for a conflict with a non-blocking mandatory share
2792 * reservation or V4 delegations.
2793 */
2794 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2795 NULL, cr, NULL, NULL, NULL);
2796 if (error != 0)
2797 goto err;
2798
2799 /*
2800 * If we rename a delegated file we should recall the
2801 * delegation, since future opens should fail or would
2802 * refer to a new file.
2803 */
2804 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2805 resp->status = NFS3ERR_JUKEBOX;
2806 goto err1;
2807 }
2808
2809 /*
2810 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2811 * first to avoid VOP_LOOKUP if possible.
2812 */
2813 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2814 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2815 NULL, NULL, NULL) == 0) {
2816
2817 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2818 VN_RELE(targvp);
2819 resp->status = NFS3ERR_JUKEBOX;
2820 goto err1;
2821 }
2822 VN_RELE(targvp);
2823 }
2824
2825 if (!nbl_need_check(srcvp)) {
2826 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2827 } else {
2828 nbl_start_crit(srcvp, RW_READER);
2829 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2830 error = EACCES;
2831 else
2832 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2833 nbl_end_crit(srcvp);
2834 }
2835 if (error == 0)
2836 vn_renamepath(tvp, srcvp, args->to.name,
2837 strlen(args->to.name));
2838 VN_RELE(srcvp);
2839 srcvp = NULL;
2840
2841 fava.va_mask = AT_ALL;
2842 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2843 tava.va_mask = AT_ALL;
2844 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2845
2846 /*
2847 * Force modified data and metadata out to stable storage.
2848 */
2849 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2850 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2851
2852 if (error)
2853 goto err;
2854
2855 resp->status = NFS3_OK;
2856 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2857 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2858 goto out;
2859
2860 err:
2861 if (curthread->t_flag & T_WOULDBLOCK) {
2862 curthread->t_flag &= ~T_WOULDBLOCK;
2863 resp->status = NFS3ERR_JUKEBOX;
2864 } else {
2865 resp->status = puterrno3(error);
2866 }
2867 err1:
2868 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2869 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2870
2871 out:
2872 if (name != NULL && name != args->from.name)
2873 kmem_free(name, MAXPATHLEN + 1);
2874 if (toname != NULL && toname != args->to.name)
2875 kmem_free(toname, MAXPATHLEN + 1);
2876
2877 DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2878 cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2879 if (fvp != NULL)
2880 VN_RELE(fvp);
2881 if (tvp != NULL)
2882 VN_RELE(tvp);
2883 }
2884
2885 void *
rfs3_rename_getfh(RENAME3args * args)2886 rfs3_rename_getfh(RENAME3args *args)
2887 {
2888
2889 return (&args->from.dir);
2890 }
2891
2892 void
rfs3_link(LINK3args * args,LINK3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2893 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2894 struct svc_req *req, cred_t *cr, bool_t ro)
2895 {
2896 int error;
2897 vnode_t *vp;
2898 vnode_t *dvp;
2899 struct vattr *vap;
2900 struct vattr va;
2901 struct vattr *bvap;
2902 struct vattr bva;
2903 struct vattr *avap;
2904 struct vattr ava;
2905 nfs_fh3 *fh3;
2906 struct exportinfo *to_exi;
2907 bslabel_t *clabel;
2908 struct sockaddr *ca;
2909 char *name = NULL;
2910
2911 vap = NULL;
2912 bvap = NULL;
2913 avap = NULL;
2914 dvp = NULL;
2915
2916 vp = nfs3_fhtovp(&args->file, exi);
2917
2918 DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2919 cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2920
2921 if (vp == NULL) {
2922 error = ESTALE;
2923 goto out;
2924 }
2925
2926 va.va_mask = AT_ALL;
2927 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2928
2929 fh3 = &args->link.dir;
2930 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3), NULL);
2931 if (to_exi == NULL) {
2932 resp->status = NFS3ERR_ACCES;
2933 goto out1;
2934 }
2935 exi_rele(to_exi);
2936
2937 if (to_exi != exi) {
2938 resp->status = NFS3ERR_XDEV;
2939 goto out1;
2940 }
2941
2942 if (is_system_labeled()) {
2943 clabel = req->rq_label;
2944
2945 ASSERT(clabel != NULL);
2946 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2947 "got client label from request(1)", struct svc_req *, req);
2948
2949 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2950 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2951 exi)) {
2952 resp->status = NFS3ERR_ACCES;
2953 goto out1;
2954 }
2955 }
2956 }
2957
2958 dvp = nfs3_fhtovp(&args->link.dir, exi);
2959 if (dvp == NULL) {
2960 error = ESTALE;
2961 goto out;
2962 }
2963
2964 bva.va_mask = AT_ALL;
2965 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2966
2967 if (dvp->v_type != VDIR) {
2968 resp->status = NFS3ERR_NOTDIR;
2969 goto out1;
2970 }
2971
2972 if (args->link.name == nfs3nametoolong) {
2973 resp->status = NFS3ERR_NAMETOOLONG;
2974 goto out1;
2975 }
2976
2977 if (args->link.name == NULL || *(args->link.name) == '\0') {
2978 resp->status = NFS3ERR_ACCES;
2979 goto out1;
2980 }
2981
2982 if (rdonly(ro, dvp)) {
2983 resp->status = NFS3ERR_ROFS;
2984 goto out1;
2985 }
2986
2987 if (is_system_labeled()) {
2988 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2989 "got client label from request(1)", struct svc_req *, req);
2990
2991 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2992 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2993 exi)) {
2994 resp->status = NFS3ERR_ACCES;
2995 goto out1;
2996 }
2997 }
2998 }
2999
3000 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3001 name = nfscmd_convname(ca, exi, args->link.name,
3002 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3003
3004 if (name == NULL) {
3005 resp->status = NFS3ERR_SERVERFAULT;
3006 goto out1;
3007 }
3008
3009 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3010
3011 va.va_mask = AT_ALL;
3012 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3013 ava.va_mask = AT_ALL;
3014 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3015
3016 /*
3017 * Force modified data and metadata out to stable storage.
3018 */
3019 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3020 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3021
3022 if (error)
3023 goto out;
3024
3025 VN_RELE(dvp);
3026
3027 resp->status = NFS3_OK;
3028 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3029 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3030
3031 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3032 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3033
3034 VN_RELE(vp);
3035
3036 return;
3037
3038 out:
3039 if (curthread->t_flag & T_WOULDBLOCK) {
3040 curthread->t_flag &= ~T_WOULDBLOCK;
3041 resp->status = NFS3ERR_JUKEBOX;
3042 } else
3043 resp->status = puterrno3(error);
3044 out1:
3045 if (name != NULL && name != args->link.name)
3046 kmem_free(name, MAXPATHLEN + 1);
3047
3048 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3049 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3050
3051 if (vp != NULL)
3052 VN_RELE(vp);
3053 if (dvp != NULL)
3054 VN_RELE(dvp);
3055 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3056 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3057 }
3058
3059 void *
rfs3_link_getfh(LINK3args * args)3060 rfs3_link_getfh(LINK3args *args)
3061 {
3062
3063 return (&args->file);
3064 }
3065
3066 /*
3067 * This macro defines the size of a response which contains attribute
3068 * information and one directory entry (whose length is specified by
3069 * the macro parameter). If the incoming request is larger than this,
3070 * then we are guaranteed to be able to return at one directory entry
3071 * if one exists. Therefore, we do not need to check for
3072 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3073 * is not, then we need to check to make sure that this error does not
3074 * need to be returned.
3075 *
3076 * NFS3_READDIR_MIN_COUNT is comprised of following :
3077 *
3078 * status - 1 * BYTES_PER_XDR_UNIT
3079 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3080 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3081 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3082 * boolean - 1 * BYTES_PER_XDR_UNIT
3083 * file id - 2 * BYTES_PER_XDR_UNIT
3084 * directory name length - 1 * BYTES_PER_XDR_UNIT
3085 * cookie - 2 * BYTES_PER_XDR_UNIT
3086 * end of list - 1 * BYTES_PER_XDR_UNIT
3087 * end of file - 1 * BYTES_PER_XDR_UNIT
3088 * Name length of directory to the nearest byte
3089 */
3090
3091 #define NFS3_READDIR_MIN_COUNT(length) \
3092 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3093 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3094
3095 /* ARGSUSED */
3096 void
rfs3_readdir(READDIR3args * args,READDIR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3097 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3098 struct svc_req *req, cred_t *cr, bool_t ro)
3099 {
3100 int error;
3101 vnode_t *vp;
3102 struct vattr *vap;
3103 struct vattr va;
3104 struct iovec iov;
3105 struct uio uio;
3106 char *data;
3107 int iseof;
3108 int bufsize;
3109 int namlen;
3110 uint_t count;
3111 struct sockaddr *ca;
3112
3113 vap = NULL;
3114
3115 vp = nfs3_fhtovp(&args->dir, exi);
3116
3117 DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3118 cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3119
3120 if (vp == NULL) {
3121 error = ESTALE;
3122 goto out;
3123 }
3124
3125 if (is_system_labeled()) {
3126 bslabel_t *clabel = req->rq_label;
3127
3128 ASSERT(clabel != NULL);
3129 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3130 "got client label from request(1)", struct svc_req *, req);
3131
3132 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3133 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3134 exi)) {
3135 resp->status = NFS3ERR_ACCES;
3136 goto out1;
3137 }
3138 }
3139 }
3140
3141 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3142
3143 va.va_mask = AT_ALL;
3144 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3145
3146 if (vp->v_type != VDIR) {
3147 resp->status = NFS3ERR_NOTDIR;
3148 goto out1;
3149 }
3150
3151 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3152 if (error)
3153 goto out;
3154
3155 /*
3156 * Now don't allow arbitrary count to alloc;
3157 * allow the maximum not to exceed rfs3_tsize()
3158 */
3159 if (args->count > rfs3_tsize(req))
3160 args->count = rfs3_tsize(req);
3161
3162 /*
3163 * Make sure that there is room to read at least one entry
3164 * if any are available.
3165 */
3166 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3167 count = DIRENT64_RECLEN(MAXNAMELEN);
3168 else
3169 count = args->count;
3170
3171 data = kmem_alloc(count, KM_SLEEP);
3172
3173 iov.iov_base = data;
3174 iov.iov_len = count;
3175 uio.uio_iov = &iov;
3176 uio.uio_iovcnt = 1;
3177 uio.uio_segflg = UIO_SYSSPACE;
3178 uio.uio_extflg = UIO_COPY_CACHED;
3179 uio.uio_loffset = (offset_t)args->cookie;
3180 uio.uio_resid = count;
3181
3182 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3183
3184 va.va_mask = AT_ALL;
3185 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3186
3187 if (error) {
3188 kmem_free(data, count);
3189 goto out;
3190 }
3191
3192 /*
3193 * If the count was not large enough to be able to guarantee
3194 * to be able to return at least one entry, then need to
3195 * check to see if NFS3ERR_TOOSMALL should be returned.
3196 */
3197 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3198 /*
3199 * bufsize is used to keep track of the size of the response.
3200 * It is primed with:
3201 * 1 for the status +
3202 * 1 for the dir_attributes.attributes boolean +
3203 * 2 for the cookie verifier
3204 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3205 * to bytes. If there are directory attributes to be
3206 * returned, then:
3207 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3208 * time BYTES_PER_XDR_UNIT is added to account for them.
3209 */
3210 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3211 if (vap != NULL)
3212 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3213 /*
3214 * An entry is composed of:
3215 * 1 for the true/false list indicator +
3216 * 2 for the fileid +
3217 * 1 for the length of the name +
3218 * 2 for the cookie +
3219 * all times BYTES_PER_XDR_UNIT to convert from
3220 * XDR units to bytes, plus the length of the name
3221 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3222 */
3223 if (count != uio.uio_resid) {
3224 namlen = strlen(((struct dirent64 *)data)->d_name);
3225 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3226 roundup(namlen, BYTES_PER_XDR_UNIT);
3227 }
3228 /*
3229 * We need to check to see if the number of bytes left
3230 * to go into the buffer will actually fit into the
3231 * buffer. This is calculated as the size of this
3232 * entry plus:
3233 * 1 for the true/false list indicator +
3234 * 1 for the eof indicator
3235 * times BYTES_PER_XDR_UNIT to convert from from
3236 * XDR units to bytes.
3237 */
3238 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3239 if (bufsize > args->count) {
3240 kmem_free(data, count);
3241 resp->status = NFS3ERR_TOOSMALL;
3242 goto out1;
3243 }
3244 }
3245
3246 /*
3247 * Have a valid readir buffer for the native character
3248 * set. Need to check if a conversion is necessary and
3249 * potentially rewrite the whole buffer. Note that if the
3250 * conversion expands names enough, the structure may not
3251 * fit. In this case, we need to drop entries until if fits
3252 * and patch the counts in order that the next readdir will
3253 * get the correct entries.
3254 */
3255 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3256 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3257
3258
3259 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3260
3261 #if 0 /* notyet */
3262 /*
3263 * Don't do this. It causes local disk writes when just
3264 * reading the file and the overhead is deemed larger
3265 * than the benefit.
3266 */
3267 /*
3268 * Force modified metadata out to stable storage.
3269 */
3270 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3271 #endif
3272
3273 resp->status = NFS3_OK;
3274 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3275 resp->resok.cookieverf = 0;
3276 resp->resok.reply.entries = (entry3 *)data;
3277 resp->resok.reply.eof = iseof;
3278 resp->resok.size = count - uio.uio_resid;
3279 resp->resok.count = args->count;
3280 resp->resok.freecount = count;
3281
3282 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3283 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3284
3285 VN_RELE(vp);
3286
3287 return;
3288
3289 out:
3290 if (curthread->t_flag & T_WOULDBLOCK) {
3291 curthread->t_flag &= ~T_WOULDBLOCK;
3292 resp->status = NFS3ERR_JUKEBOX;
3293 } else
3294 resp->status = puterrno3(error);
3295 out1:
3296 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3297 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3298
3299 if (vp != NULL) {
3300 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3301 VN_RELE(vp);
3302 }
3303 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3304 }
3305
3306 void *
rfs3_readdir_getfh(READDIR3args * args)3307 rfs3_readdir_getfh(READDIR3args *args)
3308 {
3309
3310 return (&args->dir);
3311 }
3312
3313 void
rfs3_readdir_free(READDIR3res * resp)3314 rfs3_readdir_free(READDIR3res *resp)
3315 {
3316
3317 if (resp->status == NFS3_OK)
3318 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3319 }
3320
3321 #ifdef nextdp
3322 #undef nextdp
3323 #endif
3324 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3325
3326 /*
3327 * This macro computes the size of a response which contains
3328 * one directory entry including the attributes as well as file handle.
3329 * If the incoming request is larger than this, then we are guaranteed to be
3330 * able to return at least one more directory entry if one exists.
3331 *
3332 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3333 *
3334 * boolean - 1 * BYTES_PER_XDR_UNIT
3335 * file id - 2 * BYTES_PER_XDR_UNIT
3336 * directory name length - 1 * BYTES_PER_XDR_UNIT
3337 * cookie - 2 * BYTES_PER_XDR_UNIT
3338 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3339 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3340 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3341 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3342 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3343 * name length of the entry to the nearest bytes
3344 */
3345 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3346 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3347 BYTES_PER_XDR_UNIT + \
3348 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3349
3350 static int rfs3_readdir_unit = MAXBSIZE;
3351
3352 /* ARGSUSED */
3353 void
rfs3_readdirplus(READDIRPLUS3args * args,READDIRPLUS3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3354 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3355 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3356 {
3357 int error;
3358 vnode_t *vp;
3359 struct vattr *vap;
3360 struct vattr va;
3361 struct iovec iov;
3362 struct uio uio;
3363 char *data;
3364 int iseof;
3365 struct dirent64 *dp;
3366 vnode_t *nvp;
3367 struct vattr *nvap;
3368 struct vattr nva;
3369 entryplus3_info *infop = NULL;
3370 int size = 0;
3371 int nents = 0;
3372 int bufsize = 0;
3373 int entrysize = 0;
3374 int tofit = 0;
3375 int rd_unit = rfs3_readdir_unit;
3376 int prev_len;
3377 int space_left;
3378 int i;
3379 uint_t *namlen = NULL;
3380 char *ndata = NULL;
3381 struct sockaddr *ca;
3382 size_t ret;
3383
3384 vap = NULL;
3385
3386 vp = nfs3_fhtovp(&args->dir, exi);
3387
3388 DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3389 cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3390
3391 if (vp == NULL) {
3392 error = ESTALE;
3393 goto out;
3394 }
3395
3396 if (is_system_labeled()) {
3397 bslabel_t *clabel = req->rq_label;
3398
3399 ASSERT(clabel != NULL);
3400 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3401 char *, "got client label from request(1)",
3402 struct svc_req *, req);
3403
3404 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3405 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3406 exi)) {
3407 resp->status = NFS3ERR_ACCES;
3408 goto out1;
3409 }
3410 }
3411 }
3412
3413 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3414
3415 va.va_mask = AT_ALL;
3416 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3417
3418 if (vp->v_type != VDIR) {
3419 error = ENOTDIR;
3420 goto out;
3421 }
3422
3423 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3424 if (error)
3425 goto out;
3426
3427 /*
3428 * Don't allow arbitrary counts for allocation
3429 */
3430 if (args->maxcount > rfs3_tsize(req))
3431 args->maxcount = rfs3_tsize(req);
3432
3433 /*
3434 * Make sure that there is room to read at least one entry
3435 * if any are available
3436 */
3437 args->dircount = MIN(args->dircount, args->maxcount);
3438
3439 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3440 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3441
3442 /*
3443 * This allocation relies on a minimum directory entry
3444 * being roughly 24 bytes. Therefore, the namlen array
3445 * will have enough space based on the maximum number of
3446 * entries to read.
3447 */
3448 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3449
3450 space_left = args->dircount;
3451 data = kmem_alloc(args->dircount, KM_SLEEP);
3452 dp = (struct dirent64 *)data;
3453 uio.uio_iov = &iov;
3454 uio.uio_iovcnt = 1;
3455 uio.uio_segflg = UIO_SYSSPACE;
3456 uio.uio_extflg = UIO_COPY_CACHED;
3457 uio.uio_loffset = (offset_t)args->cookie;
3458
3459 /*
3460 * bufsize is used to keep track of the size of the response as we
3461 * get post op attributes and filehandles for each entry. This is
3462 * an optimization as the server may have read more entries than will
3463 * fit in the buffer specified by maxcount. We stop calculating
3464 * post op attributes and filehandles once we have exceeded maxcount.
3465 * This will minimize the effect of truncation.
3466 *
3467 * It is primed with:
3468 * 1 for the status +
3469 * 1 for the dir_attributes.attributes boolean +
3470 * 2 for the cookie verifier
3471 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3472 * to bytes. If there are directory attributes to be
3473 * returned, then:
3474 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3475 * time BYTES_PER_XDR_UNIT is added to account for them.
3476 */
3477 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3478 if (vap != NULL)
3479 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3480
3481 getmoredents:
3482 /*
3483 * Here we make a check so that our read unit is not larger than
3484 * the space left in the buffer.
3485 */
3486 rd_unit = MIN(rd_unit, space_left);
3487 iov.iov_base = (char *)dp;
3488 iov.iov_len = rd_unit;
3489 uio.uio_resid = rd_unit;
3490 prev_len = rd_unit;
3491
3492 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3493
3494 if (error) {
3495 kmem_free(data, args->dircount);
3496 goto out;
3497 }
3498
3499 if (uio.uio_resid == prev_len && !iseof) {
3500 if (nents == 0) {
3501 kmem_free(data, args->dircount);
3502 resp->status = NFS3ERR_TOOSMALL;
3503 goto out1;
3504 }
3505
3506 /*
3507 * We could not get any more entries, so get the attributes
3508 * and filehandle for the entries already obtained.
3509 */
3510 goto good;
3511 }
3512
3513 /*
3514 * We estimate the size of the response by assuming the
3515 * entry exists and attributes and filehandle are also valid
3516 */
3517 for (size = prev_len - uio.uio_resid;
3518 size > 0;
3519 size -= dp->d_reclen, dp = nextdp(dp)) {
3520
3521 if (dp->d_ino == 0) {
3522 nents++;
3523 continue;
3524 }
3525
3526 namlen[nents] = strlen(dp->d_name);
3527 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3528
3529 /*
3530 * We need to check to see if the number of bytes left
3531 * to go into the buffer will actually fit into the
3532 * buffer. This is calculated as the size of this
3533 * entry plus:
3534 * 1 for the true/false list indicator +
3535 * 1 for the eof indicator
3536 * times BYTES_PER_XDR_UNIT to convert from XDR units
3537 * to bytes.
3538 *
3539 * Also check the dircount limit against the first entry read
3540 *
3541 */
3542 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3543 if (bufsize + tofit > args->maxcount) {
3544 /*
3545 * We make a check here to see if this was the
3546 * first entry being measured. If so, then maxcount
3547 * was too small to begin with and so we need to
3548 * return with NFS3ERR_TOOSMALL.
3549 */
3550 if (nents == 0) {
3551 kmem_free(data, args->dircount);
3552 resp->status = NFS3ERR_TOOSMALL;
3553 goto out1;
3554 }
3555 iseof = FALSE;
3556 goto good;
3557 }
3558 bufsize += entrysize;
3559 nents++;
3560 }
3561
3562 /*
3563 * If there is enough room to fit at least 1 more entry including
3564 * post op attributes and filehandle in the buffer AND that we haven't
3565 * exceeded dircount then go back and get some more.
3566 */
3567 if (!iseof &&
3568 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3569 space_left -= (prev_len - uio.uio_resid);
3570 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3571 goto getmoredents;
3572
3573 /* else, fall through */
3574 }
3575 good:
3576 va.va_mask = AT_ALL;
3577 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3578
3579 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3580
3581 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3582 resp->resok.infop = infop;
3583
3584 dp = (struct dirent64 *)data;
3585 for (i = 0; i < nents; i++) {
3586
3587 if (dp->d_ino == 0) {
3588 infop[i].attr.attributes = FALSE;
3589 infop[i].fh.handle_follows = FALSE;
3590 dp = nextdp(dp);
3591 continue;
3592 }
3593
3594 infop[i].namelen = namlen[i];
3595
3596 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3597 NULL, NULL, NULL);
3598 if (error) {
3599 infop[i].attr.attributes = FALSE;
3600 infop[i].fh.handle_follows = FALSE;
3601 dp = nextdp(dp);
3602 continue;
3603 }
3604
3605 nva.va_mask = AT_ALL;
3606 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3607
3608 /* Lie about the object type for a referral */
3609 if (vn_is_nfs_reparse(nvp, cr))
3610 nvap->va_type = VLNK;
3611
3612 vattr_to_post_op_attr(nvap, &infop[i].attr);
3613
3614 error = makefh3(&infop[i].fh.handle, nvp, exi);
3615 if (!error)
3616 infop[i].fh.handle_follows = TRUE;
3617 else
3618 infop[i].fh.handle_follows = FALSE;
3619
3620 VN_RELE(nvp);
3621 dp = nextdp(dp);
3622 }
3623
3624 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3625 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3626 if (ndata == NULL)
3627 ndata = data;
3628
3629 if (ret > 0) {
3630 /*
3631 * We had to drop one or more entries in order to fit
3632 * during the character conversion. We need to patch
3633 * up the size and eof info.
3634 */
3635 if (iseof)
3636 iseof = FALSE;
3637
3638 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3639 nents, ret);
3640 }
3641
3642
3643 #if 0 /* notyet */
3644 /*
3645 * Don't do this. It causes local disk writes when just
3646 * reading the file and the overhead is deemed larger
3647 * than the benefit.
3648 */
3649 /*
3650 * Force modified metadata out to stable storage.
3651 */
3652 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3653 #endif
3654
3655 kmem_free(namlen, args->dircount);
3656
3657 resp->status = NFS3_OK;
3658 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3659 resp->resok.cookieverf = 0;
3660 resp->resok.reply.entries = (entryplus3 *)ndata;
3661 resp->resok.reply.eof = iseof;
3662 resp->resok.size = nents;
3663 resp->resok.count = args->dircount - ret;
3664 resp->resok.maxcount = args->maxcount;
3665
3666 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3667 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3668 if (ndata != data)
3669 kmem_free(data, args->dircount);
3670
3671
3672 VN_RELE(vp);
3673
3674 return;
3675
3676 out:
3677 if (curthread->t_flag & T_WOULDBLOCK) {
3678 curthread->t_flag &= ~T_WOULDBLOCK;
3679 resp->status = NFS3ERR_JUKEBOX;
3680 } else {
3681 resp->status = puterrno3(error);
3682 }
3683 out1:
3684 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3685 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3686
3687 if (vp != NULL) {
3688 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3689 VN_RELE(vp);
3690 }
3691
3692 if (namlen != NULL)
3693 kmem_free(namlen, args->dircount);
3694
3695 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3696 }
3697
3698 void *
rfs3_readdirplus_getfh(READDIRPLUS3args * args)3699 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3700 {
3701
3702 return (&args->dir);
3703 }
3704
3705 void
rfs3_readdirplus_free(READDIRPLUS3res * resp)3706 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3707 {
3708
3709 if (resp->status == NFS3_OK) {
3710 kmem_free(resp->resok.reply.entries, resp->resok.count);
3711 kmem_free(resp->resok.infop,
3712 resp->resok.size * sizeof (struct entryplus3_info));
3713 }
3714 }
3715
3716 /* ARGSUSED */
3717 void
rfs3_fsstat(FSSTAT3args * args,FSSTAT3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3718 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3719 struct svc_req *req, cred_t *cr, bool_t ro)
3720 {
3721 int error;
3722 vnode_t *vp;
3723 struct vattr *vap;
3724 struct vattr va;
3725 struct statvfs64 sb;
3726
3727 vap = NULL;
3728
3729 vp = nfs3_fhtovp(&args->fsroot, exi);
3730
3731 DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3732 cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3733
3734 if (vp == NULL) {
3735 error = ESTALE;
3736 goto out;
3737 }
3738
3739 if (is_system_labeled()) {
3740 bslabel_t *clabel = req->rq_label;
3741
3742 ASSERT(clabel != NULL);
3743 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3744 "got client label from request(1)", struct svc_req *, req);
3745
3746 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3747 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3748 exi)) {
3749 resp->status = NFS3ERR_ACCES;
3750 goto out1;
3751 }
3752 }
3753 }
3754
3755 error = VFS_STATVFS(vp->v_vfsp, &sb);
3756
3757 va.va_mask = AT_ALL;
3758 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3759
3760 if (error)
3761 goto out;
3762
3763 resp->status = NFS3_OK;
3764 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3765 if (sb.f_blocks != (fsblkcnt64_t)-1)
3766 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3767 else
3768 resp->resok.tbytes = (size3)sb.f_blocks;
3769 if (sb.f_bfree != (fsblkcnt64_t)-1)
3770 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3771 else
3772 resp->resok.fbytes = (size3)sb.f_bfree;
3773 if (sb.f_bavail != (fsblkcnt64_t)-1)
3774 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3775 else
3776 resp->resok.abytes = (size3)sb.f_bavail;
3777 resp->resok.tfiles = (size3)sb.f_files;
3778 resp->resok.ffiles = (size3)sb.f_ffree;
3779 resp->resok.afiles = (size3)sb.f_favail;
3780 resp->resok.invarsec = 0;
3781
3782 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3783 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3784 VN_RELE(vp);
3785
3786 return;
3787
3788 out:
3789 if (curthread->t_flag & T_WOULDBLOCK) {
3790 curthread->t_flag &= ~T_WOULDBLOCK;
3791 resp->status = NFS3ERR_JUKEBOX;
3792 } else
3793 resp->status = puterrno3(error);
3794 out1:
3795 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3796 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3797
3798 if (vp != NULL)
3799 VN_RELE(vp);
3800 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3801 }
3802
3803 void *
rfs3_fsstat_getfh(FSSTAT3args * args)3804 rfs3_fsstat_getfh(FSSTAT3args *args)
3805 {
3806
3807 return (&args->fsroot);
3808 }
3809
3810 /* ARGSUSED */
3811 void
rfs3_fsinfo(FSINFO3args * args,FSINFO3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3812 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3813 struct svc_req *req, cred_t *cr, bool_t ro)
3814 {
3815 vnode_t *vp;
3816 struct vattr *vap;
3817 struct vattr va;
3818 uint32_t xfer_size;
3819 ulong_t l = 0;
3820 int error;
3821
3822 vp = nfs3_fhtovp(&args->fsroot, exi);
3823
3824 DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3825 cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3826
3827 if (vp == NULL) {
3828 if (curthread->t_flag & T_WOULDBLOCK) {
3829 curthread->t_flag &= ~T_WOULDBLOCK;
3830 resp->status = NFS3ERR_JUKEBOX;
3831 } else
3832 resp->status = NFS3ERR_STALE;
3833 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3834 goto out;
3835 }
3836
3837 if (is_system_labeled()) {
3838 bslabel_t *clabel = req->rq_label;
3839
3840 ASSERT(clabel != NULL);
3841 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3842 "got client label from request(1)", struct svc_req *, req);
3843
3844 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3845 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3846 exi)) {
3847 resp->status = NFS3ERR_STALE;
3848 vattr_to_post_op_attr(NULL,
3849 &resp->resfail.obj_attributes);
3850 goto out;
3851 }
3852 }
3853 }
3854
3855 va.va_mask = AT_ALL;
3856 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3857
3858 resp->status = NFS3_OK;
3859 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3860 xfer_size = rfs3_tsize(req);
3861 resp->resok.rtmax = xfer_size;
3862 resp->resok.rtpref = xfer_size;
3863 resp->resok.rtmult = DEV_BSIZE;
3864 resp->resok.wtmax = xfer_size;
3865 resp->resok.wtpref = xfer_size;
3866 resp->resok.wtmult = DEV_BSIZE;
3867 resp->resok.dtpref = MAXBSIZE;
3868
3869 /*
3870 * Large file spec: want maxfilesize based on limit of
3871 * underlying filesystem. We can guess 2^31-1 if need be.
3872 */
3873 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3874 if (error) {
3875 resp->status = puterrno3(error);
3876 goto out;
3877 }
3878
3879 /*
3880 * If the underlying file system does not support _PC_FILESIZEBITS,
3881 * return a reasonable default. Note that error code on VOP_PATHCONF
3882 * will be 0, even if the underlying file system does not support
3883 * _PC_FILESIZEBITS.
3884 */
3885 if (l == (ulong_t)-1) {
3886 resp->resok.maxfilesize = MAXOFF32_T;
3887 } else {
3888 if (l >= (sizeof (uint64_t) * 8))
3889 resp->resok.maxfilesize = INT64_MAX;
3890 else
3891 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3892 }
3893
3894 resp->resok.time_delta.seconds = 0;
3895 resp->resok.time_delta.nseconds = 1000;
3896 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3897 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3898
3899 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3900 cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3901
3902 VN_RELE(vp);
3903
3904 return;
3905
3906 out:
3907 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3908 cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3909 if (vp != NULL)
3910 VN_RELE(vp);
3911 }
3912
3913 void *
rfs3_fsinfo_getfh(FSINFO3args * args)3914 rfs3_fsinfo_getfh(FSINFO3args *args)
3915 {
3916 return (&args->fsroot);
3917 }
3918
3919 /* ARGSUSED */
3920 void
rfs3_pathconf(PATHCONF3args * args,PATHCONF3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3921 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3922 struct svc_req *req, cred_t *cr, bool_t ro)
3923 {
3924 int error;
3925 vnode_t *vp;
3926 struct vattr *vap;
3927 struct vattr va;
3928 ulong_t val;
3929
3930 vap = NULL;
3931
3932 vp = nfs3_fhtovp(&args->object, exi);
3933
3934 DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3935 cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3936
3937 if (vp == NULL) {
3938 error = ESTALE;
3939 goto out;
3940 }
3941
3942 if (is_system_labeled()) {
3943 bslabel_t *clabel = req->rq_label;
3944
3945 ASSERT(clabel != NULL);
3946 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3947 "got client label from request(1)", struct svc_req *, req);
3948
3949 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3950 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3951 exi)) {
3952 resp->status = NFS3ERR_ACCES;
3953 goto out1;
3954 }
3955 }
3956 }
3957
3958 va.va_mask = AT_ALL;
3959 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3960
3961 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3962 if (error)
3963 goto out;
3964 resp->resok.info.link_max = (uint32)val;
3965
3966 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3967 if (error)
3968 goto out;
3969 resp->resok.info.name_max = (uint32)val;
3970
3971 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3972 if (error)
3973 goto out;
3974 if (val == 1)
3975 resp->resok.info.no_trunc = TRUE;
3976 else
3977 resp->resok.info.no_trunc = FALSE;
3978
3979 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3980 if (error)
3981 goto out;
3982 if (val == 1)
3983 resp->resok.info.chown_restricted = TRUE;
3984 else
3985 resp->resok.info.chown_restricted = FALSE;
3986
3987 resp->status = NFS3_OK;
3988 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3989 resp->resok.info.case_insensitive = FALSE;
3990 resp->resok.info.case_preserving = TRUE;
3991 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3992 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3993 VN_RELE(vp);
3994 return;
3995
3996 out:
3997 if (curthread->t_flag & T_WOULDBLOCK) {
3998 curthread->t_flag &= ~T_WOULDBLOCK;
3999 resp->status = NFS3ERR_JUKEBOX;
4000 } else
4001 resp->status = puterrno3(error);
4002 out1:
4003 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005 if (vp != NULL)
4006 VN_RELE(vp);
4007 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4008 }
4009
4010 void *
rfs3_pathconf_getfh(PATHCONF3args * args)4011 rfs3_pathconf_getfh(PATHCONF3args *args)
4012 {
4013
4014 return (&args->object);
4015 }
4016
4017 void
rfs3_commit(COMMIT3args * args,COMMIT3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)4018 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4019 struct svc_req *req, cred_t *cr, bool_t ro)
4020 {
4021 int error;
4022 vnode_t *vp;
4023 struct vattr *bvap;
4024 struct vattr bva;
4025 struct vattr *avap;
4026 struct vattr ava;
4027
4028 bvap = NULL;
4029 avap = NULL;
4030
4031 vp = nfs3_fhtovp(&args->file, exi);
4032
4033 DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4034 cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4035
4036 if (vp == NULL) {
4037 error = ESTALE;
4038 goto out;
4039 }
4040
4041 bva.va_mask = AT_ALL;
4042 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4043
4044 /*
4045 * If we can't get the attributes, then we can't do the
4046 * right access checking. So, we'll fail the request.
4047 */
4048 if (error)
4049 goto out;
4050
4051 bvap = &bva;
4052
4053 if (rdonly(ro, vp)) {
4054 resp->status = NFS3ERR_ROFS;
4055 goto out1;
4056 }
4057
4058 if (vp->v_type != VREG) {
4059 resp->status = NFS3ERR_INVAL;
4060 goto out1;
4061 }
4062
4063 if (is_system_labeled()) {
4064 bslabel_t *clabel = req->rq_label;
4065
4066 ASSERT(clabel != NULL);
4067 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4068 "got client label from request(1)", struct svc_req *, req);
4069
4070 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4071 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4072 exi)) {
4073 resp->status = NFS3ERR_ACCES;
4074 goto out1;
4075 }
4076 }
4077 }
4078
4079 if (crgetuid(cr) != bva.va_uid &&
4080 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4081 goto out;
4082
4083 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4084
4085 ava.va_mask = AT_ALL;
4086 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4087
4088 if (error)
4089 goto out;
4090
4091 resp->status = NFS3_OK;
4092 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4093 resp->resok.verf = write3verf;
4094
4095 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4096 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4097
4098 VN_RELE(vp);
4099
4100 return;
4101
4102 out:
4103 if (curthread->t_flag & T_WOULDBLOCK) {
4104 curthread->t_flag &= ~T_WOULDBLOCK;
4105 resp->status = NFS3ERR_JUKEBOX;
4106 } else
4107 resp->status = puterrno3(error);
4108 out1:
4109 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4110 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4111
4112 if (vp != NULL)
4113 VN_RELE(vp);
4114 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4115 }
4116
4117 void *
rfs3_commit_getfh(COMMIT3args * args)4118 rfs3_commit_getfh(COMMIT3args *args)
4119 {
4120
4121 return (&args->file);
4122 }
4123
4124 static int
sattr3_to_vattr(sattr3 * sap,struct vattr * vap)4125 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4126 {
4127
4128 vap->va_mask = 0;
4129
4130 if (sap->mode.set_it) {
4131 vap->va_mode = (mode_t)sap->mode.mode;
4132 vap->va_mask |= AT_MODE;
4133 }
4134 if (sap->uid.set_it) {
4135 vap->va_uid = (uid_t)sap->uid.uid;
4136 vap->va_mask |= AT_UID;
4137 }
4138 if (sap->gid.set_it) {
4139 vap->va_gid = (gid_t)sap->gid.gid;
4140 vap->va_mask |= AT_GID;
4141 }
4142 if (sap->size.set_it) {
4143 if (sap->size.size > (size3)((u_longlong_t)-1))
4144 return (EINVAL);
4145 vap->va_size = sap->size.size;
4146 vap->va_mask |= AT_SIZE;
4147 }
4148 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4149 #ifndef _LP64
4150 /* check time validity */
4151 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4152 return (EOVERFLOW);
4153 #endif
4154 /*
4155 * nfs protocol defines times as unsigned so don't extend sign,
4156 * unless sysadmin set nfs_allow_preepoch_time.
4157 */
4158 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4159 sap->atime.atime.seconds);
4160 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4161 vap->va_mask |= AT_ATIME;
4162 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4163 gethrestime(&vap->va_atime);
4164 vap->va_mask |= AT_ATIME;
4165 }
4166 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4167 #ifndef _LP64
4168 /* check time validity */
4169 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4170 return (EOVERFLOW);
4171 #endif
4172 /*
4173 * nfs protocol defines times as unsigned so don't extend sign,
4174 * unless sysadmin set nfs_allow_preepoch_time.
4175 */
4176 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4177 sap->mtime.mtime.seconds);
4178 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4179 vap->va_mask |= AT_MTIME;
4180 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4181 gethrestime(&vap->va_mtime);
4182 vap->va_mask |= AT_MTIME;
4183 }
4184
4185 return (0);
4186 }
4187
4188 static ftype3 vt_to_nf3[] = {
4189 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4190 };
4191
4192 static int
vattr_to_fattr3(struct vattr * vap,fattr3 * fap)4193 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4194 {
4195
4196 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4197 /* Return error if time or size overflow */
4198 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4199 return (EOVERFLOW);
4200 }
4201 fap->type = vt_to_nf3[vap->va_type];
4202 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4203 fap->nlink = (uint32)vap->va_nlink;
4204 if (vap->va_uid == UID_NOBODY)
4205 fap->uid = (uid3)NFS_UID_NOBODY;
4206 else
4207 fap->uid = (uid3)vap->va_uid;
4208 if (vap->va_gid == GID_NOBODY)
4209 fap->gid = (gid3)NFS_GID_NOBODY;
4210 else
4211 fap->gid = (gid3)vap->va_gid;
4212 fap->size = (size3)vap->va_size;
4213 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4214 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4215 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4216 fap->fsid = (uint64)vap->va_fsid;
4217 fap->fileid = (fileid3)vap->va_nodeid;
4218 fap->atime.seconds = vap->va_atime.tv_sec;
4219 fap->atime.nseconds = vap->va_atime.tv_nsec;
4220 fap->mtime.seconds = vap->va_mtime.tv_sec;
4221 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4222 fap->ctime.seconds = vap->va_ctime.tv_sec;
4223 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4224 return (0);
4225 }
4226
4227 static int
vattr_to_wcc_attr(struct vattr * vap,wcc_attr * wccap)4228 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4229 {
4230
4231 /* Return error if time or size overflow */
4232 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4233 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4234 NFS3_SIZE_OK(vap->va_size))) {
4235 return (EOVERFLOW);
4236 }
4237 wccap->size = (size3)vap->va_size;
4238 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4239 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4241 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242 return (0);
4243 }
4244
4245 static void
vattr_to_pre_op_attr(struct vattr * vap,pre_op_attr * poap)4246 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4247 {
4248
4249 /* don't return attrs if time overflow */
4250 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4251 poap->attributes = TRUE;
4252 } else
4253 poap->attributes = FALSE;
4254 }
4255
4256 void
vattr_to_post_op_attr(struct vattr * vap,post_op_attr * poap)4257 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4258 {
4259
4260 /* don't return attrs if time overflow */
4261 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4262 poap->attributes = TRUE;
4263 } else
4264 poap->attributes = FALSE;
4265 }
4266
4267 static void
vattr_to_wcc_data(struct vattr * bvap,struct vattr * avap,wcc_data * wccp)4268 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4269 {
4270
4271 vattr_to_pre_op_attr(bvap, &wccp->before);
4272 vattr_to_post_op_attr(avap, &wccp->after);
4273 }
4274
4275 void
rfs3_srvrinit(void)4276 rfs3_srvrinit(void)
4277 {
4278 struct rfs3_verf_overlay {
4279 uint_t id; /* a "unique" identifier */
4280 int ts; /* a unique timestamp */
4281 } *verfp;
4282 timestruc_t now;
4283
4284 /*
4285 * The following algorithm attempts to find a unique verifier
4286 * to be used as the write verifier returned from the server
4287 * to the client. It is important that this verifier change
4288 * whenever the server reboots. Of secondary importance, it
4289 * is important for the verifier to be unique between two
4290 * different servers.
4291 *
4292 * Thus, an attempt is made to use the system hostid and the
4293 * current time in seconds when the nfssrv kernel module is
4294 * loaded. It is assumed that an NFS server will not be able
4295 * to boot and then to reboot in less than a second. If the
4296 * hostid has not been set, then the current high resolution
4297 * time is used. This will ensure different verifiers each
4298 * time the server reboots and minimize the chances that two
4299 * different servers will have the same verifier.
4300 */
4301
4302 #ifndef lint
4303 /*
4304 * We ASSERT that this constant logic expression is
4305 * always true because in the past, it wasn't.
4306 */
4307 ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4308 #endif
4309
4310 gethrestime(&now);
4311 verfp = (struct rfs3_verf_overlay *)&write3verf;
4312 verfp->ts = (int)now.tv_sec;
4313 verfp->id = zone_get_hostid(NULL);
4314
4315 if (verfp->id == 0)
4316 verfp->id = (uint_t)now.tv_nsec;
4317
4318 nfs3_srv_caller_id = fs_new_caller_id();
4319
4320 }
4321
4322 static int
rdma_setup_read_data3(READ3args * args,READ3resok * rok)4323 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4324 {
4325 struct clist *wcl;
4326 int wlist_len;
4327 count3 count = rok->count;
4328
4329 wcl = args->wlist;
4330 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4331 return (FALSE);
4332 }
4333
4334 wcl = args->wlist;
4335 rok->wlist_len = wlist_len;
4336 rok->wlist = wcl;
4337 return (TRUE);
4338 }
4339
4340 void
rfs3_srvrfini(void)4341 rfs3_srvrfini(void)
4342 {
4343 /* Nothing to do */
4344 }
4345