1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2018 Nexenta Systems, Inc. 24 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2013 by Delphix. All rights reserved. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/buf.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/uio.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/dirent.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/systeminfo.h> 48 #include <sys/flock.h> 49 #include <sys/nbmlock.h> 50 #include <sys/policy.h> 51 #include <sys/sdt.h> 52 53 #include <rpc/types.h> 54 #include <rpc/auth.h> 55 #include <rpc/svc.h> 56 #include <rpc/rpc_rdma.h> 57 58 #include <nfs/nfs.h> 59 #include <nfs/export.h> 60 #include <nfs/nfs_cmd.h> 61 62 #include <sys/strsubr.h> 63 #include <sys/tsol/label.h> 64 #include <sys/tsol/tndb.h> 65 66 #include <sys/zone.h> 67 68 #include <inet/ip.h> 69 #include <inet/ip6.h> 70 71 /* 72 * Zone global variables of NFSv3 server 73 */ 74 typedef struct nfs3_srv { 75 writeverf3 write3verf; 76 } nfs3_srv_t; 77 78 /* 79 * These are the interface routines for the server side of the 80 * Network File System. See the NFS version 3 protocol specification 81 * for a description of this interface. 82 */ 83 84 static int sattr3_to_vattr(sattr3 *, struct vattr *); 85 static int vattr_to_fattr3(struct vattr *, fattr3 *); 86 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *); 87 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *); 88 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *); 89 static int rdma_setup_read_data3(READ3args *, READ3resok *); 90 91 extern int nfs_loaned_buffers; 92 93 u_longlong_t nfs3_srv_caller_id; 94 95 static nfs3_srv_t * 96 nfs3_get_srv(void) 97 { 98 nfs_globals_t *ng = nfs_srv_getzg(); 99 nfs3_srv_t *srv = ng->nfs3_srv; 100 ASSERT(srv != NULL); 101 return (srv); 102 } 103 104 /* ARGSUSED */ 105 void 106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi, 107 struct svc_req *req, cred_t *cr, bool_t ro) 108 { 109 int error; 110 vnode_t *vp; 111 struct vattr va; 112 113 vp = nfs3_fhtovp(&args->object, exi); 114 115 DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req, 116 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 117 GETATTR3args *, args); 118 119 if (vp == NULL) { 120 error = ESTALE; 121 goto out; 122 } 123 124 va.va_mask = AT_ALL; 125 error = rfs4_delegated_getattr(vp, &va, 0, cr); 126 127 if (!error) { 128 /* Lie about the object type for a referral */ 129 if (vn_is_nfs_reparse(vp, cr)) 130 va.va_type = VLNK; 131 132 /* overflow error if time or size is out of range */ 133 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes); 134 if (error) 135 goto out; 136 resp->status = NFS3_OK; 137 138 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req, 139 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 140 GETATTR3res *, resp); 141 142 VN_RELE(vp); 143 144 return; 145 } 146 147 out: 148 if (curthread->t_flag & T_WOULDBLOCK) { 149 curthread->t_flag &= ~T_WOULDBLOCK; 150 resp->status = NFS3ERR_JUKEBOX; 151 } else 152 resp->status = puterrno3(error); 153 154 DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req, 155 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 156 GETATTR3res *, resp); 157 158 if (vp != NULL) 159 VN_RELE(vp); 160 } 161 162 void * 163 rfs3_getattr_getfh(GETATTR3args *args) 164 { 165 166 return (&args->object); 167 } 168 169 void 170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, 171 struct svc_req *req, cred_t *cr, bool_t ro) 172 { 173 int error; 174 vnode_t *vp; 175 struct vattr *bvap; 176 struct vattr bva; 177 struct vattr *avap; 178 struct vattr ava; 179 int flag; 180 int in_crit = 0; 181 struct flock64 bf; 182 caller_context_t ct; 183 184 bvap = NULL; 185 avap = NULL; 186 187 vp = nfs3_fhtovp(&args->object, exi); 188 189 DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req, 190 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 191 SETATTR3args *, args); 192 193 if (vp == NULL) { 194 error = ESTALE; 195 goto out; 196 } 197 198 error = sattr3_to_vattr(&args->new_attributes, &ava); 199 if (error) 200 goto out; 201 202 if (is_system_labeled()) { 203 bslabel_t *clabel = req->rq_label; 204 205 ASSERT(clabel != NULL); 206 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *, 207 "got client label from request(1)", struct svc_req *, req); 208 209 if (!blequal(&l_admin_low->tsl_label, clabel)) { 210 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 211 exi)) { 212 resp->status = NFS3ERR_ACCES; 213 goto out1; 214 } 215 } 216 } 217 218 /* 219 * We need to specially handle size changes because of 220 * possible conflicting NBMAND locks. Get into critical 221 * region before VOP_GETATTR, so the size attribute is 222 * valid when checking conflicts. 223 * 224 * Also, check to see if the v4 side of the server has 225 * delegated this file. If so, then we return JUKEBOX to 226 * allow the client to retrasmit its request. 227 */ 228 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) { 229 if (nbl_need_check(vp)) { 230 nbl_start_crit(vp, RW_READER); 231 in_crit = 1; 232 } 233 } 234 235 bva.va_mask = AT_ALL; 236 error = rfs4_delegated_getattr(vp, &bva, 0, cr); 237 238 /* 239 * If we can't get the attributes, then we can't do the 240 * right access checking. So, we'll fail the request. 241 */ 242 if (error) 243 goto out; 244 245 bvap = &bva; 246 247 if (rdonly(ro, vp)) { 248 resp->status = NFS3ERR_ROFS; 249 goto out1; 250 } 251 252 if (args->guard.check && 253 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec || 254 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) { 255 resp->status = NFS3ERR_NOT_SYNC; 256 goto out1; 257 } 258 259 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME) 260 flag = ATTR_UTIME; 261 else 262 flag = 0; 263 264 /* 265 * If the filesystem is exported with nosuid, then mask off 266 * the setuid and setgid bits. 267 */ 268 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG && 269 (exi->exi_export.ex_flags & EX_NOSUID)) 270 ava.va_mode &= ~(VSUID | VSGID); 271 272 ct.cc_sysid = 0; 273 ct.cc_pid = 0; 274 ct.cc_caller_id = nfs3_srv_caller_id; 275 ct.cc_flags = CC_DONTBLOCK; 276 277 /* 278 * We need to specially handle size changes because it is 279 * possible for the client to create a file with modes 280 * which indicate read-only, but with the file opened for 281 * writing. If the client then tries to set the size of 282 * the file, then the normal access checking done in 283 * VOP_SETATTR would prevent the client from doing so, 284 * although it should be legal for it to do so. To get 285 * around this, we do the access checking for ourselves 286 * and then use VOP_SPACE which doesn't do the access 287 * checking which VOP_SETATTR does. VOP_SPACE can only 288 * operate on VREG files, let VOP_SETATTR handle the other 289 * extremely rare cases. 290 * Also the client should not be allowed to change the 291 * size of the file if there is a conflicting non-blocking 292 * mandatory lock in the region the change. 293 */ 294 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) { 295 if (in_crit) { 296 u_offset_t offset; 297 ssize_t length; 298 299 if (ava.va_size < bva.va_size) { 300 offset = ava.va_size; 301 length = bva.va_size - ava.va_size; 302 } else { 303 offset = bva.va_size; 304 length = ava.va_size - bva.va_size; 305 } 306 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 307 NULL)) { 308 error = EACCES; 309 goto out; 310 } 311 } 312 313 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) { 314 ava.va_mask &= ~AT_SIZE; 315 bf.l_type = F_WRLCK; 316 bf.l_whence = 0; 317 bf.l_start = (off64_t)ava.va_size; 318 bf.l_len = 0; 319 bf.l_sysid = 0; 320 bf.l_pid = 0; 321 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 322 (offset_t)ava.va_size, cr, &ct); 323 } 324 } 325 326 if (!error && ava.va_mask) 327 error = VOP_SETATTR(vp, &ava, flag, cr, &ct); 328 329 /* check if a monitor detected a delegation conflict */ 330 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 331 resp->status = NFS3ERR_JUKEBOX; 332 goto out1; 333 } 334 335 ava.va_mask = AT_ALL; 336 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava; 337 338 /* 339 * Force modified metadata out to stable storage. 340 */ 341 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 342 343 if (error) 344 goto out; 345 346 if (in_crit) 347 nbl_end_crit(vp); 348 349 resp->status = NFS3_OK; 350 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc); 351 352 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req, 353 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 354 SETATTR3res *, resp); 355 356 VN_RELE(vp); 357 358 return; 359 360 out: 361 if (curthread->t_flag & T_WOULDBLOCK) { 362 curthread->t_flag &= ~T_WOULDBLOCK; 363 resp->status = NFS3ERR_JUKEBOX; 364 } else 365 resp->status = puterrno3(error); 366 out1: 367 DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req, 368 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 369 SETATTR3res *, resp); 370 371 if (vp != NULL) { 372 if (in_crit) 373 nbl_end_crit(vp); 374 VN_RELE(vp); 375 } 376 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc); 377 } 378 379 void * 380 rfs3_setattr_getfh(SETATTR3args *args) 381 { 382 383 return (&args->object); 384 } 385 386 /* ARGSUSED */ 387 void 388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, 389 struct svc_req *req, cred_t *cr, bool_t ro) 390 { 391 int error; 392 vnode_t *vp; 393 vnode_t *dvp; 394 struct vattr *vap; 395 struct vattr va; 396 struct vattr *dvap; 397 struct vattr dva; 398 nfs_fh3 *fhp; 399 struct sec_ol sec = {0, 0}; 400 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 401 struct sockaddr *ca; 402 char *name = NULL; 403 404 dvap = NULL; 405 406 if (exi != NULL) 407 exi_hold(exi); 408 409 /* 410 * Allow lookups from the root - the default 411 * location of the public filehandle. 412 */ 413 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 414 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); 415 dvp = ZONE_ROOTVP(); 416 VN_HOLD(dvp); 417 418 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req, 419 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 420 LOOKUP3args *, args); 421 } else { 422 dvp = nfs3_fhtovp(&args->what.dir, exi); 423 424 DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req, 425 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 426 LOOKUP3args *, args); 427 428 if (dvp == NULL) { 429 error = ESTALE; 430 goto out; 431 } 432 } 433 434 dva.va_mask = AT_ALL; 435 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva; 436 437 if (args->what.name == nfs3nametoolong) { 438 resp->status = NFS3ERR_NAMETOOLONG; 439 goto out1; 440 } 441 442 if (args->what.name == NULL || *(args->what.name) == '\0') { 443 resp->status = NFS3ERR_ACCES; 444 goto out1; 445 } 446 447 fhp = &args->what.dir; 448 ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */ 449 if (strcmp(args->what.name, "..") == 0 && 450 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) { 451 if ((exi->exi_export.ex_flags & EX_NOHIDE) && 452 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) { 453 /* 454 * special case for ".." and 'nohide'exported root 455 */ 456 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) { 457 resp->status = NFS3ERR_ACCES; 458 goto out1; 459 } 460 } else { 461 resp->status = NFS3ERR_NOENT; 462 goto out1; 463 } 464 } 465 466 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 467 name = nfscmd_convname(ca, exi, args->what.name, 468 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 469 470 if (name == NULL) { 471 resp->status = NFS3ERR_ACCES; 472 goto out1; 473 } 474 475 /* 476 * If the public filehandle is used then allow 477 * a multi-component lookup 478 */ 479 if (PUBLIC_FH3(&args->what.dir)) { 480 publicfh_flag = TRUE; 481 482 exi_rele(exi); 483 exi = NULL; 484 485 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, 486 &exi, &sec); 487 488 /* 489 * Since WebNFS may bypass MOUNT, we need to ensure this 490 * request didn't come from an unlabeled admin_low client. 491 */ 492 if (is_system_labeled() && error == 0) { 493 int addr_type; 494 void *ipaddr; 495 tsol_tpc_t *tp; 496 497 if (ca->sa_family == AF_INET) { 498 addr_type = IPV4_VERSION; 499 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 500 } else if (ca->sa_family == AF_INET6) { 501 addr_type = IPV6_VERSION; 502 ipaddr = &((struct sockaddr_in6 *) 503 ca)->sin6_addr; 504 } 505 tp = find_tpc(ipaddr, addr_type, B_FALSE); 506 if (tp == NULL || tp->tpc_tp.tp_doi != 507 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 508 SUN_CIPSO) { 509 VN_RELE(vp); 510 error = EACCES; 511 } 512 if (tp != NULL) 513 TPC_RELE(tp); 514 } 515 } else { 516 error = VOP_LOOKUP(dvp, name, &vp, 517 NULL, 0, NULL, cr, NULL, NULL, NULL); 518 } 519 520 if (name != args->what.name) 521 kmem_free(name, MAXPATHLEN + 1); 522 523 if (error == 0 && vn_ismntpt(vp)) { 524 error = rfs_cross_mnt(&vp, &exi); 525 if (error) 526 VN_RELE(vp); 527 } 528 529 if (is_system_labeled() && error == 0) { 530 bslabel_t *clabel = req->rq_label; 531 532 ASSERT(clabel != NULL); 533 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *, 534 "got client label from request(1)", struct svc_req *, req); 535 536 if (!blequal(&l_admin_low->tsl_label, clabel)) { 537 if (!do_rfs_label_check(clabel, dvp, 538 DOMINANCE_CHECK, exi)) { 539 VN_RELE(vp); 540 error = EACCES; 541 } 542 } 543 } 544 545 dva.va_mask = AT_ALL; 546 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva; 547 548 if (error) 549 goto out; 550 551 if (sec.sec_flags & SEC_QUERY) { 552 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index); 553 } else { 554 error = makefh3(&resp->resok.object, vp, exi); 555 if (!error && publicfh_flag && !chk_clnt_sec(exi, req)) 556 auth_weak = TRUE; 557 } 558 559 if (error) { 560 VN_RELE(vp); 561 goto out; 562 } 563 564 va.va_mask = AT_ALL; 565 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va; 566 567 VN_RELE(vp); 568 569 resp->status = NFS3_OK; 570 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 571 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes); 572 573 /* 574 * If it's public fh, no 0x81, and client's flavor is 575 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 576 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 577 */ 578 if (auth_weak) 579 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR; 580 581 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req, 582 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 583 LOOKUP3res *, resp); 584 VN_RELE(dvp); 585 exi_rele(exi); 586 587 return; 588 589 out: 590 if (curthread->t_flag & T_WOULDBLOCK) { 591 curthread->t_flag &= ~T_WOULDBLOCK; 592 resp->status = NFS3ERR_JUKEBOX; 593 } else 594 resp->status = puterrno3(error); 595 out1: 596 DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req, 597 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 598 LOOKUP3res *, resp); 599 600 if (exi != NULL) 601 exi_rele(exi); 602 603 if (dvp != NULL) 604 VN_RELE(dvp); 605 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes); 606 607 } 608 609 void * 610 rfs3_lookup_getfh(LOOKUP3args *args) 611 { 612 613 return (&args->what.dir); 614 } 615 616 /* ARGSUSED */ 617 void 618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi, 619 struct svc_req *req, cred_t *cr, bool_t ro) 620 { 621 int error; 622 vnode_t *vp; 623 struct vattr *vap; 624 struct vattr va; 625 int checkwriteperm; 626 boolean_t dominant_label = B_FALSE; 627 boolean_t equal_label = B_FALSE; 628 boolean_t admin_low_client; 629 630 vap = NULL; 631 632 vp = nfs3_fhtovp(&args->object, exi); 633 634 DTRACE_NFSV3_5(op__access__start, struct svc_req *, req, 635 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 636 ACCESS3args *, args); 637 638 if (vp == NULL) { 639 error = ESTALE; 640 goto out; 641 } 642 643 /* 644 * If the file system is exported read only, it is not appropriate 645 * to check write permissions for regular files and directories. 646 * Special files are interpreted by the client, so the underlying 647 * permissions are sent back to the client for interpretation. 648 */ 649 if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR)) 650 checkwriteperm = 0; 651 else 652 checkwriteperm = 1; 653 654 /* 655 * We need the mode so that we can correctly determine access 656 * permissions relative to a mandatory lock file. Access to 657 * mandatory lock files is denied on the server, so it might 658 * as well be reflected to the server during the open. 659 */ 660 va.va_mask = AT_MODE; 661 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 662 if (error) 663 goto out; 664 665 vap = &va; 666 667 resp->resok.access = 0; 668 669 if (is_system_labeled()) { 670 bslabel_t *clabel = req->rq_label; 671 672 ASSERT(clabel != NULL); 673 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *, 674 "got client label from request(1)", struct svc_req *, req); 675 676 if (!blequal(&l_admin_low->tsl_label, clabel)) { 677 if ((equal_label = do_rfs_label_check(clabel, vp, 678 EQUALITY_CHECK, exi)) == B_FALSE) { 679 dominant_label = do_rfs_label_check(clabel, 680 vp, DOMINANCE_CHECK, exi); 681 } else 682 dominant_label = B_TRUE; 683 admin_low_client = B_FALSE; 684 } else 685 admin_low_client = B_TRUE; 686 } 687 688 if (args->access & ACCESS3_READ) { 689 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 690 if (error) { 691 if (curthread->t_flag & T_WOULDBLOCK) 692 goto out; 693 } else if (!MANDLOCK(vp, va.va_mode) && 694 (!is_system_labeled() || admin_low_client || 695 dominant_label)) 696 resp->resok.access |= ACCESS3_READ; 697 } 698 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) { 699 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 700 if (error) { 701 if (curthread->t_flag & T_WOULDBLOCK) 702 goto out; 703 } else if (!is_system_labeled() || admin_low_client || 704 dominant_label) 705 resp->resok.access |= ACCESS3_LOOKUP; 706 } 707 if (checkwriteperm && 708 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) { 709 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 710 if (error) { 711 if (curthread->t_flag & T_WOULDBLOCK) 712 goto out; 713 } else if (!MANDLOCK(vp, va.va_mode) && 714 (!is_system_labeled() || admin_low_client || equal_label)) { 715 resp->resok.access |= 716 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND)); 717 } 718 } 719 if (checkwriteperm && 720 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) { 721 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 722 if (error) { 723 if (curthread->t_flag & T_WOULDBLOCK) 724 goto out; 725 } else if (!is_system_labeled() || admin_low_client || 726 equal_label) 727 resp->resok.access |= ACCESS3_DELETE; 728 } 729 if (args->access & ACCESS3_EXECUTE) { 730 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 731 if (error) { 732 if (curthread->t_flag & T_WOULDBLOCK) 733 goto out; 734 } else if (!MANDLOCK(vp, va.va_mode) && 735 (!is_system_labeled() || admin_low_client || 736 dominant_label)) 737 resp->resok.access |= ACCESS3_EXECUTE; 738 } 739 740 va.va_mask = AT_ALL; 741 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va; 742 743 resp->status = NFS3_OK; 744 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 745 746 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req, 747 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 748 ACCESS3res *, resp); 749 750 VN_RELE(vp); 751 752 return; 753 754 out: 755 if (curthread->t_flag & T_WOULDBLOCK) { 756 curthread->t_flag &= ~T_WOULDBLOCK; 757 resp->status = NFS3ERR_JUKEBOX; 758 } else 759 resp->status = puterrno3(error); 760 DTRACE_NFSV3_5(op__access__done, struct svc_req *, req, 761 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 762 ACCESS3res *, resp); 763 if (vp != NULL) 764 VN_RELE(vp); 765 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes); 766 } 767 768 void * 769 rfs3_access_getfh(ACCESS3args *args) 770 { 771 772 return (&args->object); 773 } 774 775 /* ARGSUSED */ 776 void 777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, 778 struct svc_req *req, cred_t *cr, bool_t ro) 779 { 780 int error; 781 vnode_t *vp; 782 struct vattr *vap; 783 struct vattr va; 784 struct iovec iov; 785 struct uio uio; 786 char *data; 787 struct sockaddr *ca; 788 char *name = NULL; 789 int is_referral = 0; 790 791 vap = NULL; 792 793 vp = nfs3_fhtovp(&args->symlink, exi); 794 795 DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req, 796 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 797 READLINK3args *, args); 798 799 if (vp == NULL) { 800 error = ESTALE; 801 goto out; 802 } 803 804 va.va_mask = AT_ALL; 805 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 806 if (error) 807 goto out; 808 809 vap = &va; 810 811 /* We lied about the object type for a referral */ 812 if (vn_is_nfs_reparse(vp, cr)) 813 is_referral = 1; 814 815 if (vp->v_type != VLNK && !is_referral) { 816 resp->status = NFS3ERR_INVAL; 817 goto out1; 818 } 819 820 if (MANDLOCK(vp, va.va_mode)) { 821 resp->status = NFS3ERR_ACCES; 822 goto out1; 823 } 824 825 if (is_system_labeled()) { 826 bslabel_t *clabel = req->rq_label; 827 828 ASSERT(clabel != NULL); 829 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *, 830 "got client label from request(1)", struct svc_req *, req); 831 832 if (!blequal(&l_admin_low->tsl_label, clabel)) { 833 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 834 exi)) { 835 resp->status = NFS3ERR_ACCES; 836 goto out1; 837 } 838 } 839 } 840 841 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 842 843 if (is_referral) { 844 char *s; 845 size_t strsz; 846 kstat_named_t *stat = exi->exi_ne->ne_globals->svstat[NFS_V3]; 847 848 /* Get an artificial symlink based on a referral */ 849 s = build_symlink(vp, cr, &strsz); 850 stat[NFS_REFERLINKS].value.ui64++; 851 DTRACE_PROBE2(nfs3serv__func__referral__reflink, 852 vnode_t *, vp, char *, s); 853 if (s == NULL) 854 error = EINVAL; 855 else { 856 error = 0; 857 (void) strlcpy(data, s, MAXPATHLEN + 1); 858 kmem_free(s, strsz); 859 } 860 861 } else { 862 863 iov.iov_base = data; 864 iov.iov_len = MAXPATHLEN; 865 uio.uio_iov = &iov; 866 uio.uio_iovcnt = 1; 867 uio.uio_segflg = UIO_SYSSPACE; 868 uio.uio_extflg = UIO_COPY_CACHED; 869 uio.uio_loffset = 0; 870 uio.uio_resid = MAXPATHLEN; 871 872 error = VOP_READLINK(vp, &uio, cr, NULL); 873 874 if (!error) 875 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 876 } 877 878 va.va_mask = AT_ALL; 879 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 880 881 /* Lie about object type again just to be consistent */ 882 if (is_referral && vap != NULL) 883 vap->va_type = VLNK; 884 885 #if 0 /* notyet */ 886 /* 887 * Don't do this. It causes local disk writes when just 888 * reading the file and the overhead is deemed larger 889 * than the benefit. 890 */ 891 /* 892 * Force modified metadata out to stable storage. 893 */ 894 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 895 #endif 896 897 if (error) { 898 kmem_free(data, MAXPATHLEN + 1); 899 goto out; 900 } 901 902 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 903 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND, 904 MAXPATHLEN + 1); 905 906 if (name == NULL) { 907 /* 908 * Even though the conversion failed, we return 909 * something. We just don't translate it. 910 */ 911 name = data; 912 } 913 914 resp->status = NFS3_OK; 915 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes); 916 resp->resok.data = name; 917 918 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req, 919 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 920 READLINK3res *, resp); 921 VN_RELE(vp); 922 923 if (name != data) 924 kmem_free(data, MAXPATHLEN + 1); 925 926 return; 927 928 out: 929 if (curthread->t_flag & T_WOULDBLOCK) { 930 curthread->t_flag &= ~T_WOULDBLOCK; 931 resp->status = NFS3ERR_JUKEBOX; 932 } else 933 resp->status = puterrno3(error); 934 out1: 935 DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req, 936 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 937 READLINK3res *, resp); 938 if (vp != NULL) 939 VN_RELE(vp); 940 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes); 941 } 942 943 void * 944 rfs3_readlink_getfh(READLINK3args *args) 945 { 946 947 return (&args->symlink); 948 } 949 950 void 951 rfs3_readlink_free(READLINK3res *resp) 952 { 953 954 if (resp->status == NFS3_OK) 955 kmem_free(resp->resok.data, MAXPATHLEN + 1); 956 } 957 958 /* 959 * Server routine to handle read 960 * May handle RDMA data as well as mblks 961 */ 962 /* ARGSUSED */ 963 void 964 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, 965 struct svc_req *req, cred_t *cr, bool_t ro) 966 { 967 int error; 968 vnode_t *vp; 969 struct vattr *vap; 970 struct vattr va; 971 struct iovec iov, *iovp = NULL; 972 int iovcnt; 973 struct uio uio; 974 u_offset_t offset; 975 mblk_t *mp = NULL; 976 int in_crit = 0; 977 int need_rwunlock = 0; 978 caller_context_t ct; 979 int rdma_used = 0; 980 int loaned_buffers; 981 struct uio *uiop; 982 983 vap = NULL; 984 985 vp = nfs3_fhtovp(&args->file, exi); 986 987 DTRACE_NFSV3_5(op__read__start, struct svc_req *, req, 988 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 989 READ3args *, args); 990 991 992 if (vp == NULL) { 993 error = ESTALE; 994 goto out; 995 } 996 997 if (args->wlist) { 998 if (args->count > clist_len(args->wlist)) { 999 error = EINVAL; 1000 goto out; 1001 } 1002 rdma_used = 1; 1003 } 1004 1005 /* use loaned buffers for TCP */ 1006 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0; 1007 1008 if (is_system_labeled()) { 1009 bslabel_t *clabel = req->rq_label; 1010 1011 ASSERT(clabel != NULL); 1012 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *, 1013 "got client label from request(1)", struct svc_req *, req); 1014 1015 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1016 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 1017 exi)) { 1018 resp->status = NFS3ERR_ACCES; 1019 goto out1; 1020 } 1021 } 1022 } 1023 1024 ct.cc_sysid = 0; 1025 ct.cc_pid = 0; 1026 ct.cc_caller_id = nfs3_srv_caller_id; 1027 ct.cc_flags = CC_DONTBLOCK; 1028 1029 /* 1030 * Enter the critical region before calling VOP_RWLOCK 1031 * to avoid a deadlock with write requests. 1032 */ 1033 if (nbl_need_check(vp)) { 1034 nbl_start_crit(vp, RW_READER); 1035 in_crit = 1; 1036 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0, 1037 NULL)) { 1038 error = EACCES; 1039 goto out; 1040 } 1041 } 1042 1043 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 1044 1045 /* check if a monitor detected a delegation conflict */ 1046 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1047 resp->status = NFS3ERR_JUKEBOX; 1048 goto out1; 1049 } 1050 1051 need_rwunlock = 1; 1052 1053 va.va_mask = AT_ALL; 1054 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1055 1056 /* 1057 * If we can't get the attributes, then we can't do the 1058 * right access checking. So, we'll fail the request. 1059 */ 1060 if (error) 1061 goto out; 1062 1063 vap = &va; 1064 1065 if (vp->v_type != VREG) { 1066 resp->status = NFS3ERR_INVAL; 1067 goto out1; 1068 } 1069 1070 if (crgetuid(cr) != va.va_uid) { 1071 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 1072 if (error) { 1073 if (curthread->t_flag & T_WOULDBLOCK) 1074 goto out; 1075 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 1076 if (error) 1077 goto out; 1078 } 1079 } 1080 1081 if (MANDLOCK(vp, va.va_mode)) { 1082 resp->status = NFS3ERR_ACCES; 1083 goto out1; 1084 } 1085 1086 offset = args->offset; 1087 if (offset >= va.va_size) { 1088 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 1089 if (in_crit) 1090 nbl_end_crit(vp); 1091 resp->status = NFS3_OK; 1092 vattr_to_post_op_attr(vap, &resp->resok.file_attributes); 1093 resp->resok.count = 0; 1094 resp->resok.eof = TRUE; 1095 resp->resok.data.data_len = 0; 1096 resp->resok.data.data_val = NULL; 1097 resp->resok.data.mp = NULL; 1098 /* RDMA */ 1099 resp->resok.wlist = args->wlist; 1100 resp->resok.wlist_len = resp->resok.count; 1101 if (resp->resok.wlist) 1102 clist_zero_len(resp->resok.wlist); 1103 goto done; 1104 } 1105 1106 if (args->count == 0) { 1107 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 1108 if (in_crit) 1109 nbl_end_crit(vp); 1110 resp->status = NFS3_OK; 1111 vattr_to_post_op_attr(vap, &resp->resok.file_attributes); 1112 resp->resok.count = 0; 1113 resp->resok.eof = FALSE; 1114 resp->resok.data.data_len = 0; 1115 resp->resok.data.data_val = NULL; 1116 resp->resok.data.mp = NULL; 1117 /* RDMA */ 1118 resp->resok.wlist = args->wlist; 1119 resp->resok.wlist_len = resp->resok.count; 1120 if (resp->resok.wlist) 1121 clist_zero_len(resp->resok.wlist); 1122 goto done; 1123 } 1124 1125 /* 1126 * do not allocate memory more the max. allowed 1127 * transfer size 1128 */ 1129 if (args->count > rfs3_tsize(req)) 1130 args->count = rfs3_tsize(req); 1131 1132 if (loaned_buffers) { 1133 uiop = (uio_t *)rfs_setup_xuio(vp); 1134 ASSERT(uiop != NULL); 1135 uiop->uio_segflg = UIO_SYSSPACE; 1136 uiop->uio_loffset = args->offset; 1137 uiop->uio_resid = args->count; 1138 1139 /* Jump to do the read if successful */ 1140 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) { 1141 /* 1142 * Need to hold the vnode until after VOP_RETZCBUF() 1143 * is called. 1144 */ 1145 VN_HOLD(vp); 1146 goto doio_read; 1147 } 1148 1149 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int, 1150 uiop->uio_loffset, int, uiop->uio_resid); 1151 1152 uiop->uio_extflg = 0; 1153 /* failure to setup for zero copy */ 1154 rfs_free_xuio((void *)uiop); 1155 loaned_buffers = 0; 1156 } 1157 1158 /* 1159 * If returning data via RDMA Write, then grab the chunk list. 1160 * If we aren't returning READ data w/RDMA_WRITE, then grab 1161 * a mblk. 1162 */ 1163 if (rdma_used) { 1164 (void) rdma_get_wchunk(req, &iov, args->wlist); 1165 uio.uio_iov = &iov; 1166 uio.uio_iovcnt = 1; 1167 } else { 1168 /* 1169 * mp will contain the data to be sent out in the read reply. 1170 * For UDP, this will be freed after the reply has been sent 1171 * out by the driver. For TCP, it will be freed after the last 1172 * segment associated with the reply has been ACKed by the 1173 * client. 1174 */ 1175 mp = rfs_read_alloc(args->count, &iovp, &iovcnt); 1176 uio.uio_iov = iovp; 1177 uio.uio_iovcnt = iovcnt; 1178 } 1179 1180 uio.uio_segflg = UIO_SYSSPACE; 1181 uio.uio_extflg = UIO_COPY_CACHED; 1182 uio.uio_loffset = args->offset; 1183 uio.uio_resid = args->count; 1184 uiop = &uio; 1185 1186 doio_read: 1187 error = VOP_READ(vp, uiop, 0, cr, &ct); 1188 1189 if (error) { 1190 if (mp) 1191 freemsg(mp); 1192 /* check if a monitor detected a delegation conflict */ 1193 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1194 resp->status = NFS3ERR_JUKEBOX; 1195 goto out1; 1196 } 1197 goto out; 1198 } 1199 1200 /* make mblk using zc buffers */ 1201 if (loaned_buffers) { 1202 mp = uio_to_mblk(uiop); 1203 ASSERT(mp != NULL); 1204 } 1205 1206 va.va_mask = AT_ALL; 1207 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1208 1209 if (error) 1210 vap = NULL; 1211 else 1212 vap = &va; 1213 1214 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 1215 1216 if (in_crit) 1217 nbl_end_crit(vp); 1218 1219 resp->status = NFS3_OK; 1220 vattr_to_post_op_attr(vap, &resp->resok.file_attributes); 1221 resp->resok.count = args->count - uiop->uio_resid; 1222 if (!error && offset + resp->resok.count == va.va_size) 1223 resp->resok.eof = TRUE; 1224 else 1225 resp->resok.eof = FALSE; 1226 resp->resok.data.data_len = resp->resok.count; 1227 1228 if (mp) 1229 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers); 1230 1231 resp->resok.data.mp = mp; 1232 resp->resok.size = (uint_t)args->count; 1233 1234 if (rdma_used) { 1235 resp->resok.data.data_val = (caddr_t)iov.iov_base; 1236 if (!rdma_setup_read_data3(args, &(resp->resok))) { 1237 resp->status = NFS3ERR_INVAL; 1238 } 1239 } else { 1240 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base; 1241 (resp->resok).wlist = NULL; 1242 } 1243 1244 done: 1245 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req, 1246 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 1247 READ3res *, resp); 1248 1249 VN_RELE(vp); 1250 1251 if (iovp != NULL) 1252 kmem_free(iovp, iovcnt * sizeof (struct iovec)); 1253 1254 return; 1255 1256 out: 1257 if (curthread->t_flag & T_WOULDBLOCK) { 1258 curthread->t_flag &= ~T_WOULDBLOCK; 1259 resp->status = NFS3ERR_JUKEBOX; 1260 } else 1261 resp->status = puterrno3(error); 1262 out1: 1263 DTRACE_NFSV3_5(op__read__done, struct svc_req *, req, 1264 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 1265 READ3res *, resp); 1266 1267 if (vp != NULL) { 1268 if (need_rwunlock) 1269 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 1270 if (in_crit) 1271 nbl_end_crit(vp); 1272 VN_RELE(vp); 1273 } 1274 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes); 1275 1276 if (iovp != NULL) 1277 kmem_free(iovp, iovcnt * sizeof (struct iovec)); 1278 } 1279 1280 void 1281 rfs3_read_free(READ3res *resp) 1282 { 1283 mblk_t *mp; 1284 1285 if (resp->status == NFS3_OK) { 1286 mp = resp->resok.data.mp; 1287 if (mp != NULL) 1288 freemsg(mp); 1289 } 1290 } 1291 1292 void * 1293 rfs3_read_getfh(READ3args *args) 1294 { 1295 1296 return (&args->file); 1297 } 1298 1299 #define MAX_IOVECS 12 1300 1301 #ifdef DEBUG 1302 static int rfs3_write_hits = 0; 1303 static int rfs3_write_misses = 0; 1304 #endif 1305 1306 void 1307 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, 1308 struct svc_req *req, cred_t *cr, bool_t ro) 1309 { 1310 nfs3_srv_t *ns; 1311 int error; 1312 vnode_t *vp; 1313 struct vattr *bvap = NULL; 1314 struct vattr bva; 1315 struct vattr *avap = NULL; 1316 struct vattr ava; 1317 u_offset_t rlimit; 1318 struct uio uio; 1319 struct iovec iov[MAX_IOVECS]; 1320 mblk_t *m; 1321 struct iovec *iovp; 1322 int iovcnt; 1323 int ioflag; 1324 cred_t *savecred; 1325 int in_crit = 0; 1326 int rwlock_ret = -1; 1327 caller_context_t ct; 1328 1329 vp = nfs3_fhtovp(&args->file, exi); 1330 1331 DTRACE_NFSV3_5(op__write__start, struct svc_req *, req, 1332 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 1333 WRITE3args *, args); 1334 1335 if (vp == NULL) { 1336 error = ESTALE; 1337 goto err; 1338 } 1339 1340 ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */ 1341 ns = nfs3_get_srv(); 1342 1343 if (is_system_labeled()) { 1344 bslabel_t *clabel = req->rq_label; 1345 1346 ASSERT(clabel != NULL); 1347 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *, 1348 "got client label from request(1)", struct svc_req *, req); 1349 1350 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1351 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 1352 exi)) { 1353 resp->status = NFS3ERR_ACCES; 1354 goto err1; 1355 } 1356 } 1357 } 1358 1359 ct.cc_sysid = 0; 1360 ct.cc_pid = 0; 1361 ct.cc_caller_id = nfs3_srv_caller_id; 1362 ct.cc_flags = CC_DONTBLOCK; 1363 1364 /* 1365 * We have to enter the critical region before calling VOP_RWLOCK 1366 * to avoid a deadlock with ufs. 1367 */ 1368 if (nbl_need_check(vp)) { 1369 nbl_start_crit(vp, RW_READER); 1370 in_crit = 1; 1371 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0, 1372 NULL)) { 1373 error = EACCES; 1374 goto err; 1375 } 1376 } 1377 1378 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1379 1380 /* check if a monitor detected a delegation conflict */ 1381 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1382 resp->status = NFS3ERR_JUKEBOX; 1383 rwlock_ret = -1; 1384 goto err1; 1385 } 1386 1387 1388 bva.va_mask = AT_ALL; 1389 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 1390 1391 /* 1392 * If we can't get the attributes, then we can't do the 1393 * right access checking. So, we'll fail the request. 1394 */ 1395 if (error) 1396 goto err; 1397 1398 bvap = &bva; 1399 avap = bvap; 1400 1401 if (args->count != args->data.data_len) { 1402 resp->status = NFS3ERR_INVAL; 1403 goto err1; 1404 } 1405 1406 if (rdonly(ro, vp)) { 1407 resp->status = NFS3ERR_ROFS; 1408 goto err1; 1409 } 1410 1411 if (vp->v_type != VREG) { 1412 resp->status = NFS3ERR_INVAL; 1413 goto err1; 1414 } 1415 1416 if (crgetuid(cr) != bva.va_uid && 1417 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) 1418 goto err; 1419 1420 if (MANDLOCK(vp, bva.va_mode)) { 1421 resp->status = NFS3ERR_ACCES; 1422 goto err1; 1423 } 1424 1425 if (args->count == 0) { 1426 resp->status = NFS3_OK; 1427 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); 1428 resp->resok.count = 0; 1429 resp->resok.committed = args->stable; 1430 resp->resok.verf = ns->write3verf; 1431 goto out; 1432 } 1433 1434 if (args->mblk != NULL) { 1435 iovcnt = 0; 1436 for (m = args->mblk; m != NULL; m = m->b_cont) 1437 iovcnt++; 1438 if (iovcnt <= MAX_IOVECS) { 1439 #ifdef DEBUG 1440 rfs3_write_hits++; 1441 #endif 1442 iovp = iov; 1443 } else { 1444 #ifdef DEBUG 1445 rfs3_write_misses++; 1446 #endif 1447 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1448 } 1449 mblk_to_iov(args->mblk, iovcnt, iovp); 1450 1451 } else if (args->rlist != NULL) { 1452 iovcnt = 1; 1453 iovp = iov; 1454 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3); 1455 iovp->iov_len = args->count; 1456 } else { 1457 iovcnt = 1; 1458 iovp = iov; 1459 iovp->iov_base = args->data.data_val; 1460 iovp->iov_len = args->count; 1461 } 1462 1463 uio.uio_iov = iovp; 1464 uio.uio_iovcnt = iovcnt; 1465 1466 uio.uio_segflg = UIO_SYSSPACE; 1467 uio.uio_extflg = UIO_COPY_DEFAULT; 1468 uio.uio_loffset = args->offset; 1469 uio.uio_resid = args->count; 1470 uio.uio_llimit = curproc->p_fsz_ctl; 1471 rlimit = uio.uio_llimit - args->offset; 1472 if (rlimit < (u_offset_t)uio.uio_resid) 1473 uio.uio_resid = (int)rlimit; 1474 1475 if (args->stable == UNSTABLE) 1476 ioflag = 0; 1477 else if (args->stable == FILE_SYNC) 1478 ioflag = FSYNC; 1479 else if (args->stable == DATA_SYNC) 1480 ioflag = FDSYNC; 1481 else { 1482 if (iovp != iov) 1483 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1484 resp->status = NFS3ERR_INVAL; 1485 goto err1; 1486 } 1487 1488 /* 1489 * We're changing creds because VM may fault and we need 1490 * the cred of the current thread to be used if quota 1491 * checking is enabled. 1492 */ 1493 savecred = curthread->t_cred; 1494 curthread->t_cred = cr; 1495 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct); 1496 curthread->t_cred = savecred; 1497 1498 if (iovp != iov) 1499 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1500 1501 /* check if a monitor detected a delegation conflict */ 1502 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1503 resp->status = NFS3ERR_JUKEBOX; 1504 goto err1; 1505 } 1506 1507 ava.va_mask = AT_ALL; 1508 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava; 1509 1510 if (error) 1511 goto err; 1512 1513 /* 1514 * If we were unable to get the V_WRITELOCK_TRUE, then we 1515 * may not have accurate after attrs, so check if 1516 * we have both attributes, they have a non-zero va_seq, and 1517 * va_seq has changed by exactly one, 1518 * if not, turn off the before attr. 1519 */ 1520 if (rwlock_ret != V_WRITELOCK_TRUE) { 1521 if (bvap == NULL || avap == NULL || 1522 bvap->va_seq == 0 || avap->va_seq == 0 || 1523 avap->va_seq != (bvap->va_seq + 1)) { 1524 bvap = NULL; 1525 } 1526 } 1527 1528 resp->status = NFS3_OK; 1529 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); 1530 resp->resok.count = args->count - uio.uio_resid; 1531 resp->resok.committed = args->stable; 1532 resp->resok.verf = ns->write3verf; 1533 goto out; 1534 1535 err: 1536 if (curthread->t_flag & T_WOULDBLOCK) { 1537 curthread->t_flag &= ~T_WOULDBLOCK; 1538 resp->status = NFS3ERR_JUKEBOX; 1539 } else 1540 resp->status = puterrno3(error); 1541 err1: 1542 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc); 1543 out: 1544 DTRACE_NFSV3_5(op__write__done, struct svc_req *, req, 1545 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 1546 WRITE3res *, resp); 1547 1548 if (vp != NULL) { 1549 if (rwlock_ret != -1) 1550 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1551 if (in_crit) 1552 nbl_end_crit(vp); 1553 VN_RELE(vp); 1554 } 1555 } 1556 1557 void * 1558 rfs3_write_getfh(WRITE3args *args) 1559 { 1560 1561 return (&args->file); 1562 } 1563 1564 void 1565 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi, 1566 struct svc_req *req, cred_t *cr, bool_t ro) 1567 { 1568 int error; 1569 int in_crit = 0; 1570 vnode_t *vp; 1571 vnode_t *tvp = NULL; 1572 vnode_t *dvp; 1573 struct vattr *vap; 1574 struct vattr va; 1575 struct vattr *dbvap; 1576 struct vattr dbva; 1577 struct vattr *davap; 1578 struct vattr dava; 1579 enum vcexcl excl; 1580 nfstime3 *mtime; 1581 len_t reqsize; 1582 bool_t trunc; 1583 struct sockaddr *ca; 1584 char *name = NULL; 1585 1586 dbvap = NULL; 1587 davap = NULL; 1588 1589 dvp = nfs3_fhtovp(&args->where.dir, exi); 1590 1591 DTRACE_NFSV3_5(op__create__start, struct svc_req *, req, 1592 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 1593 CREATE3args *, args); 1594 1595 if (dvp == NULL) { 1596 error = ESTALE; 1597 goto out; 1598 } 1599 1600 dbva.va_mask = AT_ALL; 1601 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva; 1602 davap = dbvap; 1603 1604 if (args->where.name == nfs3nametoolong) { 1605 resp->status = NFS3ERR_NAMETOOLONG; 1606 goto out1; 1607 } 1608 1609 if (args->where.name == NULL || *(args->where.name) == '\0') { 1610 resp->status = NFS3ERR_ACCES; 1611 goto out1; 1612 } 1613 1614 if (rdonly(ro, dvp)) { 1615 resp->status = NFS3ERR_ROFS; 1616 goto out1; 1617 } 1618 1619 if (is_system_labeled()) { 1620 bslabel_t *clabel = req->rq_label; 1621 1622 ASSERT(clabel != NULL); 1623 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *, 1624 "got client label from request(1)", struct svc_req *, req); 1625 1626 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1627 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 1628 exi)) { 1629 resp->status = NFS3ERR_ACCES; 1630 goto out1; 1631 } 1632 } 1633 } 1634 1635 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1636 name = nfscmd_convname(ca, exi, args->where.name, 1637 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 1638 1639 if (name == NULL) { 1640 /* This is really a Solaris EILSEQ */ 1641 resp->status = NFS3ERR_INVAL; 1642 goto out1; 1643 } 1644 1645 if (args->how.mode == EXCLUSIVE) { 1646 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME; 1647 va.va_type = VREG; 1648 va.va_mode = (mode_t)0; 1649 /* 1650 * Ensure no time overflows and that types match 1651 */ 1652 mtime = (nfstime3 *)&args->how.createhow3_u.verf; 1653 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX; 1654 va.va_mtime.tv_nsec = mtime->nseconds; 1655 excl = EXCL; 1656 } else { 1657 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes, 1658 &va); 1659 if (error) 1660 goto out; 1661 va.va_mask |= AT_TYPE; 1662 va.va_type = VREG; 1663 if (args->how.mode == GUARDED) 1664 excl = EXCL; 1665 else { 1666 excl = NONEXCL; 1667 1668 /* 1669 * During creation of file in non-exclusive mode 1670 * if size of file is being set then make sure 1671 * that if the file already exists that no conflicting 1672 * non-blocking mandatory locks exists in the region 1673 * being modified. If there are conflicting locks fail 1674 * the operation with EACCES. 1675 */ 1676 if (va.va_mask & AT_SIZE) { 1677 struct vattr tva; 1678 1679 /* 1680 * Does file already exist? 1681 */ 1682 error = VOP_LOOKUP(dvp, name, &tvp, 1683 NULL, 0, NULL, cr, NULL, NULL, NULL); 1684 1685 /* 1686 * Check to see if the file has been delegated 1687 * to a v4 client. If so, then begin recall of 1688 * the delegation and return JUKEBOX to allow 1689 * the client to retrasmit its request. 1690 */ 1691 1692 trunc = va.va_size == 0; 1693 if (!error && 1694 rfs4_check_delegated(FWRITE, tvp, trunc)) { 1695 resp->status = NFS3ERR_JUKEBOX; 1696 goto out1; 1697 } 1698 1699 /* 1700 * Check for NBMAND lock conflicts 1701 */ 1702 if (!error && nbl_need_check(tvp)) { 1703 u_offset_t offset; 1704 ssize_t len; 1705 1706 nbl_start_crit(tvp, RW_READER); 1707 in_crit = 1; 1708 1709 tva.va_mask = AT_SIZE; 1710 error = VOP_GETATTR(tvp, &tva, 0, cr, 1711 NULL); 1712 /* 1713 * Can't check for conflicts, so return 1714 * error. 1715 */ 1716 if (error) 1717 goto out; 1718 1719 offset = tva.va_size < va.va_size ? 1720 tva.va_size : va.va_size; 1721 len = tva.va_size < va.va_size ? 1722 va.va_size - tva.va_size : 1723 tva.va_size - va.va_size; 1724 if (nbl_conflict(tvp, NBL_WRITE, 1725 offset, len, 0, NULL)) { 1726 error = EACCES; 1727 goto out; 1728 } 1729 } else if (tvp) { 1730 VN_RELE(tvp); 1731 tvp = NULL; 1732 } 1733 } 1734 } 1735 if (va.va_mask & AT_SIZE) 1736 reqsize = va.va_size; 1737 } 1738 1739 /* 1740 * Must specify the mode. 1741 */ 1742 if (!(va.va_mask & AT_MODE)) { 1743 resp->status = NFS3ERR_INVAL; 1744 goto out1; 1745 } 1746 1747 /* 1748 * If the filesystem is exported with nosuid, then mask off 1749 * the setuid and setgid bits. 1750 */ 1751 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID)) 1752 va.va_mode &= ~(VSUID | VSGID); 1753 1754 tryagain: 1755 /* 1756 * The file open mode used is VWRITE. If the client needs 1757 * some other semantic, then it should do the access checking 1758 * itself. It would have been nice to have the file open mode 1759 * passed as part of the arguments. 1760 */ 1761 error = VOP_CREATE(dvp, name, &va, excl, VWRITE, 1762 &vp, cr, 0, NULL, NULL); 1763 1764 dava.va_mask = AT_ALL; 1765 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava; 1766 1767 if (error) { 1768 /* 1769 * If we got something other than file already exists 1770 * then just return this error. Otherwise, we got 1771 * EEXIST. If we were doing a GUARDED create, then 1772 * just return this error. Otherwise, we need to 1773 * make sure that this wasn't a duplicate of an 1774 * exclusive create request. 1775 * 1776 * The assumption is made that a non-exclusive create 1777 * request will never return EEXIST. 1778 */ 1779 if (error != EEXIST || args->how.mode == GUARDED) 1780 goto out; 1781 /* 1782 * Lookup the file so that we can get a vnode for it. 1783 */ 1784 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, 1785 NULL, cr, NULL, NULL, NULL); 1786 if (error) { 1787 /* 1788 * We couldn't find the file that we thought that 1789 * we just created. So, we'll just try creating 1790 * it again. 1791 */ 1792 if (error == ENOENT) 1793 goto tryagain; 1794 goto out; 1795 } 1796 1797 /* 1798 * If the file is delegated to a v4 client, go ahead 1799 * and initiate recall, this create is a hint that a 1800 * conflicting v3 open has occurred. 1801 */ 1802 1803 if (rfs4_check_delegated(FWRITE, vp, FALSE)) { 1804 VN_RELE(vp); 1805 resp->status = NFS3ERR_JUKEBOX; 1806 goto out1; 1807 } 1808 1809 va.va_mask = AT_ALL; 1810 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 1811 1812 mtime = (nfstime3 *)&args->how.createhow3_u.verf; 1813 /* % with INT32_MAX to prevent overflows */ 1814 if (args->how.mode == EXCLUSIVE && (vap == NULL || 1815 vap->va_mtime.tv_sec != 1816 (mtime->seconds % INT32_MAX) || 1817 vap->va_mtime.tv_nsec != mtime->nseconds)) { 1818 VN_RELE(vp); 1819 error = EEXIST; 1820 goto out; 1821 } 1822 } else { 1823 1824 if ((args->how.mode == UNCHECKED || 1825 args->how.mode == GUARDED) && 1826 args->how.createhow3_u.obj_attributes.size.set_it && 1827 va.va_size == 0) 1828 trunc = TRUE; 1829 else 1830 trunc = FALSE; 1831 1832 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1833 VN_RELE(vp); 1834 resp->status = NFS3ERR_JUKEBOX; 1835 goto out1; 1836 } 1837 1838 va.va_mask = AT_ALL; 1839 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 1840 1841 /* 1842 * We need to check to make sure that the file got 1843 * created to the indicated size. If not, we do a 1844 * setattr to try to change the size, but we don't 1845 * try too hard. This shouldn't a problem as most 1846 * clients will only specifiy a size of zero which 1847 * local file systems handle. However, even if 1848 * the client does specify a non-zero size, it can 1849 * still recover by checking the size of the file 1850 * after it has created it and then issue a setattr 1851 * request of its own to set the size of the file. 1852 */ 1853 if (vap != NULL && 1854 (args->how.mode == UNCHECKED || 1855 args->how.mode == GUARDED) && 1856 args->how.createhow3_u.obj_attributes.size.set_it && 1857 vap->va_size != reqsize) { 1858 va.va_mask = AT_SIZE; 1859 va.va_size = reqsize; 1860 (void) VOP_SETATTR(vp, &va, 0, cr, NULL); 1861 va.va_mask = AT_ALL; 1862 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 1863 } 1864 } 1865 1866 if (name != args->where.name) 1867 kmem_free(name, MAXPATHLEN + 1); 1868 1869 error = makefh3(&resp->resok.obj.handle, vp, exi); 1870 if (error) 1871 resp->resok.obj.handle_follows = FALSE; 1872 else 1873 resp->resok.obj.handle_follows = TRUE; 1874 1875 /* 1876 * Force modified data and metadata out to stable storage. 1877 */ 1878 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1879 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1880 1881 VN_RELE(vp); 1882 if (tvp != NULL) { 1883 if (in_crit) 1884 nbl_end_crit(tvp); 1885 VN_RELE(tvp); 1886 } 1887 1888 resp->status = NFS3_OK; 1889 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 1890 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); 1891 1892 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req, 1893 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 1894 CREATE3res *, resp); 1895 1896 VN_RELE(dvp); 1897 return; 1898 1899 out: 1900 if (curthread->t_flag & T_WOULDBLOCK) { 1901 curthread->t_flag &= ~T_WOULDBLOCK; 1902 resp->status = NFS3ERR_JUKEBOX; 1903 } else 1904 resp->status = puterrno3(error); 1905 out1: 1906 DTRACE_NFSV3_5(op__create__done, struct svc_req *, req, 1907 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 1908 CREATE3res *, resp); 1909 1910 if (name != NULL && name != args->where.name) 1911 kmem_free(name, MAXPATHLEN + 1); 1912 1913 if (tvp != NULL) { 1914 if (in_crit) 1915 nbl_end_crit(tvp); 1916 VN_RELE(tvp); 1917 } 1918 if (dvp != NULL) 1919 VN_RELE(dvp); 1920 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc); 1921 } 1922 1923 void * 1924 rfs3_create_getfh(CREATE3args *args) 1925 { 1926 1927 return (&args->where.dir); 1928 } 1929 1930 void 1931 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi, 1932 struct svc_req *req, cred_t *cr, bool_t ro) 1933 { 1934 int error; 1935 vnode_t *vp = NULL; 1936 vnode_t *dvp; 1937 struct vattr *vap; 1938 struct vattr va; 1939 struct vattr *dbvap; 1940 struct vattr dbva; 1941 struct vattr *davap; 1942 struct vattr dava; 1943 struct sockaddr *ca; 1944 char *name = NULL; 1945 1946 dbvap = NULL; 1947 davap = NULL; 1948 1949 dvp = nfs3_fhtovp(&args->where.dir, exi); 1950 1951 DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req, 1952 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 1953 MKDIR3args *, args); 1954 1955 if (dvp == NULL) { 1956 error = ESTALE; 1957 goto out; 1958 } 1959 1960 dbva.va_mask = AT_ALL; 1961 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva; 1962 davap = dbvap; 1963 1964 if (args->where.name == nfs3nametoolong) { 1965 resp->status = NFS3ERR_NAMETOOLONG; 1966 goto out1; 1967 } 1968 1969 if (args->where.name == NULL || *(args->where.name) == '\0') { 1970 resp->status = NFS3ERR_ACCES; 1971 goto out1; 1972 } 1973 1974 if (rdonly(ro, dvp)) { 1975 resp->status = NFS3ERR_ROFS; 1976 goto out1; 1977 } 1978 1979 if (is_system_labeled()) { 1980 bslabel_t *clabel = req->rq_label; 1981 1982 ASSERT(clabel != NULL); 1983 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *, 1984 "got client label from request(1)", struct svc_req *, req); 1985 1986 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1987 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 1988 exi)) { 1989 resp->status = NFS3ERR_ACCES; 1990 goto out1; 1991 } 1992 } 1993 } 1994 1995 error = sattr3_to_vattr(&args->attributes, &va); 1996 if (error) 1997 goto out; 1998 1999 if (!(va.va_mask & AT_MODE)) { 2000 resp->status = NFS3ERR_INVAL; 2001 goto out1; 2002 } 2003 2004 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2005 name = nfscmd_convname(ca, exi, args->where.name, 2006 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2007 2008 if (name == NULL) { 2009 resp->status = NFS3ERR_INVAL; 2010 goto out1; 2011 } 2012 2013 va.va_mask |= AT_TYPE; 2014 va.va_type = VDIR; 2015 2016 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL); 2017 2018 if (name != args->where.name) 2019 kmem_free(name, MAXPATHLEN + 1); 2020 2021 dava.va_mask = AT_ALL; 2022 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava; 2023 2024 /* 2025 * Force modified data and metadata out to stable storage. 2026 */ 2027 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2028 2029 if (error) 2030 goto out; 2031 2032 error = makefh3(&resp->resok.obj.handle, vp, exi); 2033 if (error) 2034 resp->resok.obj.handle_follows = FALSE; 2035 else 2036 resp->resok.obj.handle_follows = TRUE; 2037 2038 va.va_mask = AT_ALL; 2039 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 2040 2041 /* 2042 * Force modified data and metadata out to stable storage. 2043 */ 2044 (void) VOP_FSYNC(vp, 0, cr, NULL); 2045 2046 VN_RELE(vp); 2047 2048 resp->status = NFS3_OK; 2049 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 2050 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); 2051 2052 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req, 2053 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2054 MKDIR3res *, resp); 2055 VN_RELE(dvp); 2056 2057 return; 2058 2059 out: 2060 if (curthread->t_flag & T_WOULDBLOCK) { 2061 curthread->t_flag &= ~T_WOULDBLOCK; 2062 resp->status = NFS3ERR_JUKEBOX; 2063 } else 2064 resp->status = puterrno3(error); 2065 out1: 2066 DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req, 2067 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2068 MKDIR3res *, resp); 2069 if (dvp != NULL) 2070 VN_RELE(dvp); 2071 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc); 2072 } 2073 2074 void * 2075 rfs3_mkdir_getfh(MKDIR3args *args) 2076 { 2077 2078 return (&args->where.dir); 2079 } 2080 2081 void 2082 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi, 2083 struct svc_req *req, cred_t *cr, bool_t ro) 2084 { 2085 int error; 2086 vnode_t *vp; 2087 vnode_t *dvp; 2088 struct vattr *vap; 2089 struct vattr va; 2090 struct vattr *dbvap; 2091 struct vattr dbva; 2092 struct vattr *davap; 2093 struct vattr dava; 2094 struct sockaddr *ca; 2095 char *name = NULL; 2096 char *symdata = NULL; 2097 2098 dbvap = NULL; 2099 davap = NULL; 2100 2101 dvp = nfs3_fhtovp(&args->where.dir, exi); 2102 2103 DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req, 2104 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2105 SYMLINK3args *, args); 2106 2107 if (dvp == NULL) { 2108 error = ESTALE; 2109 goto err; 2110 } 2111 2112 dbva.va_mask = AT_ALL; 2113 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva; 2114 davap = dbvap; 2115 2116 if (args->where.name == nfs3nametoolong) { 2117 resp->status = NFS3ERR_NAMETOOLONG; 2118 goto err1; 2119 } 2120 2121 if (args->where.name == NULL || *(args->where.name) == '\0') { 2122 resp->status = NFS3ERR_ACCES; 2123 goto err1; 2124 } 2125 2126 if (rdonly(ro, dvp)) { 2127 resp->status = NFS3ERR_ROFS; 2128 goto err1; 2129 } 2130 2131 if (is_system_labeled()) { 2132 bslabel_t *clabel = req->rq_label; 2133 2134 ASSERT(clabel != NULL); 2135 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *, 2136 "got client label from request(1)", struct svc_req *, req); 2137 2138 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2139 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 2140 exi)) { 2141 resp->status = NFS3ERR_ACCES; 2142 goto err1; 2143 } 2144 } 2145 } 2146 2147 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va); 2148 if (error) 2149 goto err; 2150 2151 if (!(va.va_mask & AT_MODE)) { 2152 resp->status = NFS3ERR_INVAL; 2153 goto err1; 2154 } 2155 2156 if (args->symlink.symlink_data == nfs3nametoolong) { 2157 resp->status = NFS3ERR_NAMETOOLONG; 2158 goto err1; 2159 } 2160 2161 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2162 name = nfscmd_convname(ca, exi, args->where.name, 2163 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2164 2165 if (name == NULL) { 2166 /* This is really a Solaris EILSEQ */ 2167 resp->status = NFS3ERR_INVAL; 2168 goto err1; 2169 } 2170 2171 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data, 2172 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2173 if (symdata == NULL) { 2174 /* This is really a Solaris EILSEQ */ 2175 resp->status = NFS3ERR_INVAL; 2176 goto err1; 2177 } 2178 2179 2180 va.va_mask |= AT_TYPE; 2181 va.va_type = VLNK; 2182 2183 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0); 2184 2185 dava.va_mask = AT_ALL; 2186 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava; 2187 2188 if (error) 2189 goto err; 2190 2191 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 2192 NULL, NULL, NULL); 2193 2194 /* 2195 * Force modified data and metadata out to stable storage. 2196 */ 2197 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2198 2199 2200 resp->status = NFS3_OK; 2201 if (error) { 2202 resp->resok.obj.handle_follows = FALSE; 2203 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes); 2204 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); 2205 goto out; 2206 } 2207 2208 error = makefh3(&resp->resok.obj.handle, vp, exi); 2209 if (error) 2210 resp->resok.obj.handle_follows = FALSE; 2211 else 2212 resp->resok.obj.handle_follows = TRUE; 2213 2214 va.va_mask = AT_ALL; 2215 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 2216 2217 /* 2218 * Force modified data and metadata out to stable storage. 2219 */ 2220 (void) VOP_FSYNC(vp, 0, cr, NULL); 2221 2222 VN_RELE(vp); 2223 2224 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 2225 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); 2226 goto out; 2227 2228 err: 2229 if (curthread->t_flag & T_WOULDBLOCK) { 2230 curthread->t_flag &= ~T_WOULDBLOCK; 2231 resp->status = NFS3ERR_JUKEBOX; 2232 } else 2233 resp->status = puterrno3(error); 2234 err1: 2235 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc); 2236 out: 2237 if (name != NULL && name != args->where.name) 2238 kmem_free(name, MAXPATHLEN + 1); 2239 if (symdata != NULL && symdata != args->symlink.symlink_data) 2240 kmem_free(symdata, MAXPATHLEN + 1); 2241 2242 DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req, 2243 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2244 SYMLINK3res *, resp); 2245 2246 if (dvp != NULL) 2247 VN_RELE(dvp); 2248 } 2249 2250 void * 2251 rfs3_symlink_getfh(SYMLINK3args *args) 2252 { 2253 2254 return (&args->where.dir); 2255 } 2256 2257 void 2258 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi, 2259 struct svc_req *req, cred_t *cr, bool_t ro) 2260 { 2261 int error; 2262 vnode_t *vp; 2263 vnode_t *realvp; 2264 vnode_t *dvp; 2265 struct vattr *vap; 2266 struct vattr va; 2267 struct vattr *dbvap; 2268 struct vattr dbva; 2269 struct vattr *davap; 2270 struct vattr dava; 2271 int mode; 2272 enum vcexcl excl; 2273 struct sockaddr *ca; 2274 char *name = NULL; 2275 2276 dbvap = NULL; 2277 davap = NULL; 2278 2279 dvp = nfs3_fhtovp(&args->where.dir, exi); 2280 2281 DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req, 2282 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2283 MKNOD3args *, args); 2284 2285 if (dvp == NULL) { 2286 error = ESTALE; 2287 goto out; 2288 } 2289 2290 dbva.va_mask = AT_ALL; 2291 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva; 2292 davap = dbvap; 2293 2294 if (args->where.name == nfs3nametoolong) { 2295 resp->status = NFS3ERR_NAMETOOLONG; 2296 goto out1; 2297 } 2298 2299 if (args->where.name == NULL || *(args->where.name) == '\0') { 2300 resp->status = NFS3ERR_ACCES; 2301 goto out1; 2302 } 2303 2304 if (rdonly(ro, dvp)) { 2305 resp->status = NFS3ERR_ROFS; 2306 goto out1; 2307 } 2308 2309 if (is_system_labeled()) { 2310 bslabel_t *clabel = req->rq_label; 2311 2312 ASSERT(clabel != NULL); 2313 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *, 2314 "got client label from request(1)", struct svc_req *, req); 2315 2316 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2317 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 2318 exi)) { 2319 resp->status = NFS3ERR_ACCES; 2320 goto out1; 2321 } 2322 } 2323 } 2324 2325 switch (args->what.type) { 2326 case NF3CHR: 2327 case NF3BLK: 2328 error = sattr3_to_vattr( 2329 &args->what.mknoddata3_u.device.dev_attributes, &va); 2330 if (error) 2331 goto out; 2332 if (secpolicy_sys_devices(cr) != 0) { 2333 resp->status = NFS3ERR_PERM; 2334 goto out1; 2335 } 2336 if (args->what.type == NF3CHR) 2337 va.va_type = VCHR; 2338 else 2339 va.va_type = VBLK; 2340 va.va_rdev = makedevice( 2341 args->what.mknoddata3_u.device.spec.specdata1, 2342 args->what.mknoddata3_u.device.spec.specdata2); 2343 va.va_mask |= AT_TYPE | AT_RDEV; 2344 break; 2345 case NF3SOCK: 2346 error = sattr3_to_vattr( 2347 &args->what.mknoddata3_u.pipe_attributes, &va); 2348 if (error) 2349 goto out; 2350 va.va_type = VSOCK; 2351 va.va_mask |= AT_TYPE; 2352 break; 2353 case NF3FIFO: 2354 error = sattr3_to_vattr( 2355 &args->what.mknoddata3_u.pipe_attributes, &va); 2356 if (error) 2357 goto out; 2358 va.va_type = VFIFO; 2359 va.va_mask |= AT_TYPE; 2360 break; 2361 default: 2362 resp->status = NFS3ERR_BADTYPE; 2363 goto out1; 2364 } 2365 2366 /* 2367 * Must specify the mode. 2368 */ 2369 if (!(va.va_mask & AT_MODE)) { 2370 resp->status = NFS3ERR_INVAL; 2371 goto out1; 2372 } 2373 2374 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2375 name = nfscmd_convname(ca, exi, args->where.name, 2376 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2377 2378 if (name == NULL) { 2379 resp->status = NFS3ERR_INVAL; 2380 goto out1; 2381 } 2382 2383 excl = EXCL; 2384 2385 mode = 0; 2386 2387 error = VOP_CREATE(dvp, name, &va, excl, mode, 2388 &vp, cr, 0, NULL, NULL); 2389 2390 if (name != args->where.name) 2391 kmem_free(name, MAXPATHLEN + 1); 2392 2393 dava.va_mask = AT_ALL; 2394 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava; 2395 2396 /* 2397 * Force modified data and metadata out to stable storage. 2398 */ 2399 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2400 2401 if (error) 2402 goto out; 2403 2404 resp->status = NFS3_OK; 2405 2406 error = makefh3(&resp->resok.obj.handle, vp, exi); 2407 if (error) 2408 resp->resok.obj.handle_follows = FALSE; 2409 else 2410 resp->resok.obj.handle_follows = TRUE; 2411 2412 va.va_mask = AT_ALL; 2413 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 2414 2415 /* 2416 * Force modified metadata out to stable storage. 2417 * 2418 * if a underlying vp exists, pass it to VOP_FSYNC 2419 */ 2420 if (VOP_REALVP(vp, &realvp, NULL) == 0) 2421 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 2422 else 2423 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2424 2425 VN_RELE(vp); 2426 2427 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 2428 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); 2429 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req, 2430 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2431 MKNOD3res *, resp); 2432 VN_RELE(dvp); 2433 return; 2434 2435 out: 2436 if (curthread->t_flag & T_WOULDBLOCK) { 2437 curthread->t_flag &= ~T_WOULDBLOCK; 2438 resp->status = NFS3ERR_JUKEBOX; 2439 } else 2440 resp->status = puterrno3(error); 2441 out1: 2442 DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req, 2443 cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, 2444 MKNOD3res *, resp); 2445 if (dvp != NULL) 2446 VN_RELE(dvp); 2447 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc); 2448 } 2449 2450 void * 2451 rfs3_mknod_getfh(MKNOD3args *args) 2452 { 2453 2454 return (&args->where.dir); 2455 } 2456 2457 void 2458 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi, 2459 struct svc_req *req, cred_t *cr, bool_t ro) 2460 { 2461 int error = 0; 2462 vnode_t *vp; 2463 struct vattr *bvap; 2464 struct vattr bva; 2465 struct vattr *avap; 2466 struct vattr ava; 2467 vnode_t *targvp = NULL; 2468 struct sockaddr *ca; 2469 char *name = NULL; 2470 2471 bvap = NULL; 2472 avap = NULL; 2473 2474 vp = nfs3_fhtovp(&args->object.dir, exi); 2475 2476 DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req, 2477 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 2478 REMOVE3args *, args); 2479 2480 if (vp == NULL) { 2481 error = ESTALE; 2482 goto err; 2483 } 2484 2485 bva.va_mask = AT_ALL; 2486 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva; 2487 avap = bvap; 2488 2489 if (vp->v_type != VDIR) { 2490 resp->status = NFS3ERR_NOTDIR; 2491 goto err1; 2492 } 2493 2494 if (args->object.name == nfs3nametoolong) { 2495 resp->status = NFS3ERR_NAMETOOLONG; 2496 goto err1; 2497 } 2498 2499 if (args->object.name == NULL || *(args->object.name) == '\0') { 2500 resp->status = NFS3ERR_ACCES; 2501 goto err1; 2502 } 2503 2504 if (rdonly(ro, vp)) { 2505 resp->status = NFS3ERR_ROFS; 2506 goto err1; 2507 } 2508 2509 if (is_system_labeled()) { 2510 bslabel_t *clabel = req->rq_label; 2511 2512 ASSERT(clabel != NULL); 2513 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *, 2514 "got client label from request(1)", struct svc_req *, req); 2515 2516 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2517 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 2518 exi)) { 2519 resp->status = NFS3ERR_ACCES; 2520 goto err1; 2521 } 2522 } 2523 } 2524 2525 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2526 name = nfscmd_convname(ca, exi, args->object.name, 2527 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2528 2529 if (name == NULL) { 2530 resp->status = NFS3ERR_INVAL; 2531 goto err1; 2532 } 2533 2534 /* 2535 * Check for a conflict with a non-blocking mandatory share 2536 * reservation and V4 delegations 2537 */ 2538 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0, 2539 NULL, cr, NULL, NULL, NULL); 2540 if (error != 0) 2541 goto err; 2542 2543 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2544 resp->status = NFS3ERR_JUKEBOX; 2545 goto err1; 2546 } 2547 2548 if (!nbl_need_check(targvp)) { 2549 error = VOP_REMOVE(vp, name, cr, NULL, 0); 2550 } else { 2551 nbl_start_crit(targvp, RW_READER); 2552 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2553 error = EACCES; 2554 } else { 2555 error = VOP_REMOVE(vp, name, cr, NULL, 0); 2556 } 2557 nbl_end_crit(targvp); 2558 } 2559 VN_RELE(targvp); 2560 targvp = NULL; 2561 2562 ava.va_mask = AT_ALL; 2563 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava; 2564 2565 /* 2566 * Force modified data and metadata out to stable storage. 2567 */ 2568 (void) VOP_FSYNC(vp, 0, cr, NULL); 2569 2570 if (error) 2571 goto err; 2572 2573 resp->status = NFS3_OK; 2574 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc); 2575 goto out; 2576 2577 err: 2578 if (curthread->t_flag & T_WOULDBLOCK) { 2579 curthread->t_flag &= ~T_WOULDBLOCK; 2580 resp->status = NFS3ERR_JUKEBOX; 2581 } else 2582 resp->status = puterrno3(error); 2583 err1: 2584 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc); 2585 out: 2586 DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req, 2587 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 2588 REMOVE3res *, resp); 2589 2590 if (name != NULL && name != args->object.name) 2591 kmem_free(name, MAXPATHLEN + 1); 2592 2593 if (vp != NULL) 2594 VN_RELE(vp); 2595 } 2596 2597 void * 2598 rfs3_remove_getfh(REMOVE3args *args) 2599 { 2600 2601 return (&args->object.dir); 2602 } 2603 2604 void 2605 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi, 2606 struct svc_req *req, cred_t *cr, bool_t ro) 2607 { 2608 int error; 2609 vnode_t *vp; 2610 struct vattr *bvap; 2611 struct vattr bva; 2612 struct vattr *avap; 2613 struct vattr ava; 2614 struct sockaddr *ca; 2615 char *name = NULL; 2616 2617 bvap = NULL; 2618 avap = NULL; 2619 2620 vp = nfs3_fhtovp(&args->object.dir, exi); 2621 2622 DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req, 2623 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 2624 RMDIR3args *, args); 2625 2626 if (vp == NULL) { 2627 error = ESTALE; 2628 goto err; 2629 } 2630 2631 bva.va_mask = AT_ALL; 2632 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva; 2633 avap = bvap; 2634 2635 if (vp->v_type != VDIR) { 2636 resp->status = NFS3ERR_NOTDIR; 2637 goto err1; 2638 } 2639 2640 if (args->object.name == nfs3nametoolong) { 2641 resp->status = NFS3ERR_NAMETOOLONG; 2642 goto err1; 2643 } 2644 2645 if (args->object.name == NULL || *(args->object.name) == '\0') { 2646 resp->status = NFS3ERR_ACCES; 2647 goto err1; 2648 } 2649 2650 if (rdonly(ro, vp)) { 2651 resp->status = NFS3ERR_ROFS; 2652 goto err1; 2653 } 2654 2655 if (is_system_labeled()) { 2656 bslabel_t *clabel = req->rq_label; 2657 2658 ASSERT(clabel != NULL); 2659 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *, 2660 "got client label from request(1)", struct svc_req *, req); 2661 2662 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2663 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 2664 exi)) { 2665 resp->status = NFS3ERR_ACCES; 2666 goto err1; 2667 } 2668 } 2669 } 2670 2671 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2672 name = nfscmd_convname(ca, exi, args->object.name, 2673 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2674 2675 if (name == NULL) { 2676 resp->status = NFS3ERR_INVAL; 2677 goto err1; 2678 } 2679 2680 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); 2681 error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0); 2682 2683 if (name != args->object.name) 2684 kmem_free(name, MAXPATHLEN + 1); 2685 2686 ava.va_mask = AT_ALL; 2687 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava; 2688 2689 /* 2690 * Force modified data and metadata out to stable storage. 2691 */ 2692 (void) VOP_FSYNC(vp, 0, cr, NULL); 2693 2694 if (error) { 2695 /* 2696 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2697 * if the directory is not empty. A System V NFS server 2698 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit 2699 * over the wire. 2700 */ 2701 if (error == EEXIST) 2702 error = ENOTEMPTY; 2703 goto err; 2704 } 2705 2706 resp->status = NFS3_OK; 2707 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc); 2708 goto out; 2709 2710 err: 2711 if (curthread->t_flag & T_WOULDBLOCK) { 2712 curthread->t_flag &= ~T_WOULDBLOCK; 2713 resp->status = NFS3ERR_JUKEBOX; 2714 } else 2715 resp->status = puterrno3(error); 2716 err1: 2717 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc); 2718 out: 2719 DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req, 2720 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 2721 RMDIR3res *, resp); 2722 if (vp != NULL) 2723 VN_RELE(vp); 2724 2725 } 2726 2727 void * 2728 rfs3_rmdir_getfh(RMDIR3args *args) 2729 { 2730 2731 return (&args->object.dir); 2732 } 2733 2734 void 2735 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, 2736 struct svc_req *req, cred_t *cr, bool_t ro) 2737 { 2738 int error = 0; 2739 vnode_t *fvp; 2740 vnode_t *tvp; 2741 vnode_t *targvp; 2742 struct vattr *fbvap; 2743 struct vattr fbva; 2744 struct vattr *favap; 2745 struct vattr fava; 2746 struct vattr *tbvap; 2747 struct vattr tbva; 2748 struct vattr *tavap; 2749 struct vattr tava; 2750 nfs_fh3 *fh3; 2751 struct exportinfo *to_exi; 2752 vnode_t *srcvp = NULL; 2753 bslabel_t *clabel; 2754 struct sockaddr *ca; 2755 char *name = NULL; 2756 char *toname = NULL; 2757 2758 fbvap = NULL; 2759 favap = NULL; 2760 tbvap = NULL; 2761 tavap = NULL; 2762 tvp = NULL; 2763 2764 fvp = nfs3_fhtovp(&args->from.dir, exi); 2765 2766 DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req, 2767 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi, 2768 RENAME3args *, args); 2769 2770 if (fvp == NULL) { 2771 error = ESTALE; 2772 goto err; 2773 } 2774 2775 if (is_system_labeled()) { 2776 clabel = req->rq_label; 2777 ASSERT(clabel != NULL); 2778 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *, 2779 "got client label from request(1)", struct svc_req *, req); 2780 2781 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2782 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK, 2783 exi)) { 2784 resp->status = NFS3ERR_ACCES; 2785 goto err1; 2786 } 2787 } 2788 } 2789 2790 fbva.va_mask = AT_ALL; 2791 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva; 2792 favap = fbvap; 2793 2794 fh3 = &args->to.dir; 2795 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3)); 2796 if (to_exi == NULL) { 2797 resp->status = NFS3ERR_ACCES; 2798 goto err1; 2799 } 2800 exi_rele(to_exi); 2801 2802 if (to_exi != exi) { 2803 resp->status = NFS3ERR_XDEV; 2804 goto err1; 2805 } 2806 2807 tvp = nfs3_fhtovp(&args->to.dir, exi); 2808 if (tvp == NULL) { 2809 error = ESTALE; 2810 goto err; 2811 } 2812 2813 tbva.va_mask = AT_ALL; 2814 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva; 2815 tavap = tbvap; 2816 2817 if (fvp->v_type != VDIR || tvp->v_type != VDIR) { 2818 resp->status = NFS3ERR_NOTDIR; 2819 goto err1; 2820 } 2821 2822 if (args->from.name == nfs3nametoolong || 2823 args->to.name == nfs3nametoolong) { 2824 resp->status = NFS3ERR_NAMETOOLONG; 2825 goto err1; 2826 } 2827 if (args->from.name == NULL || *(args->from.name) == '\0' || 2828 args->to.name == NULL || *(args->to.name) == '\0') { 2829 resp->status = NFS3ERR_ACCES; 2830 goto err1; 2831 } 2832 2833 if (rdonly(ro, tvp)) { 2834 resp->status = NFS3ERR_ROFS; 2835 goto err1; 2836 } 2837 2838 if (is_system_labeled()) { 2839 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2840 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK, 2841 exi)) { 2842 resp->status = NFS3ERR_ACCES; 2843 goto err1; 2844 } 2845 } 2846 } 2847 2848 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2849 name = nfscmd_convname(ca, exi, args->from.name, 2850 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2851 2852 if (name == NULL) { 2853 resp->status = NFS3ERR_INVAL; 2854 goto err1; 2855 } 2856 2857 toname = nfscmd_convname(ca, exi, args->to.name, 2858 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 2859 2860 if (toname == NULL) { 2861 resp->status = NFS3ERR_INVAL; 2862 goto err1; 2863 } 2864 2865 /* 2866 * Check for a conflict with a non-blocking mandatory share 2867 * reservation or V4 delegations. 2868 */ 2869 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0, 2870 NULL, cr, NULL, NULL, NULL); 2871 if (error != 0) 2872 goto err; 2873 2874 /* 2875 * If we rename a delegated file we should recall the 2876 * delegation, since future opens should fail or would 2877 * refer to a new file. 2878 */ 2879 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2880 resp->status = NFS3ERR_JUKEBOX; 2881 goto err1; 2882 } 2883 2884 /* 2885 * Check for renaming over a delegated file. Check nfs4_deleg_policy 2886 * first to avoid VOP_LOOKUP if possible. 2887 */ 2888 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && 2889 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr, 2890 NULL, NULL, NULL) == 0) { 2891 2892 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2893 VN_RELE(targvp); 2894 resp->status = NFS3ERR_JUKEBOX; 2895 goto err1; 2896 } 2897 VN_RELE(targvp); 2898 } 2899 2900 if (!nbl_need_check(srcvp)) { 2901 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0); 2902 } else { 2903 nbl_start_crit(srcvp, RW_READER); 2904 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) 2905 error = EACCES; 2906 else 2907 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0); 2908 nbl_end_crit(srcvp); 2909 } 2910 if (error == 0) 2911 vn_renamepath(tvp, srcvp, args->to.name, 2912 strlen(args->to.name)); 2913 VN_RELE(srcvp); 2914 srcvp = NULL; 2915 2916 fava.va_mask = AT_ALL; 2917 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava; 2918 tava.va_mask = AT_ALL; 2919 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava; 2920 2921 /* 2922 * Force modified data and metadata out to stable storage. 2923 */ 2924 (void) VOP_FSYNC(fvp, 0, cr, NULL); 2925 (void) VOP_FSYNC(tvp, 0, cr, NULL); 2926 2927 if (error) 2928 goto err; 2929 2930 resp->status = NFS3_OK; 2931 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc); 2932 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc); 2933 goto out; 2934 2935 err: 2936 if (curthread->t_flag & T_WOULDBLOCK) { 2937 curthread->t_flag &= ~T_WOULDBLOCK; 2938 resp->status = NFS3ERR_JUKEBOX; 2939 } else { 2940 resp->status = puterrno3(error); 2941 } 2942 err1: 2943 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc); 2944 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc); 2945 2946 out: 2947 if (name != NULL && name != args->from.name) 2948 kmem_free(name, MAXPATHLEN + 1); 2949 if (toname != NULL && toname != args->to.name) 2950 kmem_free(toname, MAXPATHLEN + 1); 2951 2952 DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req, 2953 cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi, 2954 RENAME3res *, resp); 2955 if (fvp != NULL) 2956 VN_RELE(fvp); 2957 if (tvp != NULL) 2958 VN_RELE(tvp); 2959 } 2960 2961 void * 2962 rfs3_rename_getfh(RENAME3args *args) 2963 { 2964 2965 return (&args->from.dir); 2966 } 2967 2968 void 2969 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi, 2970 struct svc_req *req, cred_t *cr, bool_t ro) 2971 { 2972 int error; 2973 vnode_t *vp; 2974 vnode_t *dvp; 2975 struct vattr *vap; 2976 struct vattr va; 2977 struct vattr *bvap; 2978 struct vattr bva; 2979 struct vattr *avap; 2980 struct vattr ava; 2981 nfs_fh3 *fh3; 2982 struct exportinfo *to_exi; 2983 bslabel_t *clabel; 2984 struct sockaddr *ca; 2985 char *name = NULL; 2986 2987 vap = NULL; 2988 bvap = NULL; 2989 avap = NULL; 2990 dvp = NULL; 2991 2992 vp = nfs3_fhtovp(&args->file, exi); 2993 2994 DTRACE_NFSV3_5(op__link__start, struct svc_req *, req, 2995 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 2996 LINK3args *, args); 2997 2998 if (vp == NULL) { 2999 error = ESTALE; 3000 goto out; 3001 } 3002 3003 va.va_mask = AT_ALL; 3004 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3005 3006 fh3 = &args->link.dir; 3007 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3)); 3008 if (to_exi == NULL) { 3009 resp->status = NFS3ERR_ACCES; 3010 goto out1; 3011 } 3012 exi_rele(to_exi); 3013 3014 if (to_exi != exi) { 3015 resp->status = NFS3ERR_XDEV; 3016 goto out1; 3017 } 3018 3019 if (is_system_labeled()) { 3020 clabel = req->rq_label; 3021 3022 ASSERT(clabel != NULL); 3023 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *, 3024 "got client label from request(1)", struct svc_req *, req); 3025 3026 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3027 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3028 exi)) { 3029 resp->status = NFS3ERR_ACCES; 3030 goto out1; 3031 } 3032 } 3033 } 3034 3035 dvp = nfs3_fhtovp(&args->link.dir, exi); 3036 if (dvp == NULL) { 3037 error = ESTALE; 3038 goto out; 3039 } 3040 3041 bva.va_mask = AT_ALL; 3042 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva; 3043 3044 if (dvp->v_type != VDIR) { 3045 resp->status = NFS3ERR_NOTDIR; 3046 goto out1; 3047 } 3048 3049 if (args->link.name == nfs3nametoolong) { 3050 resp->status = NFS3ERR_NAMETOOLONG; 3051 goto out1; 3052 } 3053 3054 if (args->link.name == NULL || *(args->link.name) == '\0') { 3055 resp->status = NFS3ERR_ACCES; 3056 goto out1; 3057 } 3058 3059 if (rdonly(ro, dvp)) { 3060 resp->status = NFS3ERR_ROFS; 3061 goto out1; 3062 } 3063 3064 if (is_system_labeled()) { 3065 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *, 3066 "got client label from request(1)", struct svc_req *, req); 3067 3068 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3069 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK, 3070 exi)) { 3071 resp->status = NFS3ERR_ACCES; 3072 goto out1; 3073 } 3074 } 3075 } 3076 3077 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3078 name = nfscmd_convname(ca, exi, args->link.name, 3079 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1); 3080 3081 if (name == NULL) { 3082 resp->status = NFS3ERR_SERVERFAULT; 3083 goto out1; 3084 } 3085 3086 error = VOP_LINK(dvp, vp, name, cr, NULL, 0); 3087 3088 va.va_mask = AT_ALL; 3089 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3090 ava.va_mask = AT_ALL; 3091 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava; 3092 3093 /* 3094 * Force modified data and metadata out to stable storage. 3095 */ 3096 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 3097 (void) VOP_FSYNC(dvp, 0, cr, NULL); 3098 3099 if (error) 3100 goto out; 3101 3102 VN_RELE(dvp); 3103 3104 resp->status = NFS3_OK; 3105 vattr_to_post_op_attr(vap, &resp->resok.file_attributes); 3106 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc); 3107 3108 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req, 3109 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3110 LINK3res *, resp); 3111 3112 VN_RELE(vp); 3113 3114 return; 3115 3116 out: 3117 if (curthread->t_flag & T_WOULDBLOCK) { 3118 curthread->t_flag &= ~T_WOULDBLOCK; 3119 resp->status = NFS3ERR_JUKEBOX; 3120 } else 3121 resp->status = puterrno3(error); 3122 out1: 3123 if (name != NULL && name != args->link.name) 3124 kmem_free(name, MAXPATHLEN + 1); 3125 3126 DTRACE_NFSV3_5(op__link__done, struct svc_req *, req, 3127 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3128 LINK3res *, resp); 3129 3130 if (vp != NULL) 3131 VN_RELE(vp); 3132 if (dvp != NULL) 3133 VN_RELE(dvp); 3134 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes); 3135 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc); 3136 } 3137 3138 void * 3139 rfs3_link_getfh(LINK3args *args) 3140 { 3141 3142 return (&args->file); 3143 } 3144 3145 /* 3146 * This macro defines the size of a response which contains attribute 3147 * information and one directory entry (whose length is specified by 3148 * the macro parameter). If the incoming request is larger than this, 3149 * then we are guaranteed to be able to return at one directory entry 3150 * if one exists. Therefore, we do not need to check for 3151 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it 3152 * is not, then we need to check to make sure that this error does not 3153 * need to be returned. 3154 * 3155 * NFS3_READDIR_MIN_COUNT is comprised of following : 3156 * 3157 * status - 1 * BYTES_PER_XDR_UNIT 3158 * attr. flag - 1 * BYTES_PER_XDR_UNIT 3159 * cookie verifier - 2 * BYTES_PER_XDR_UNIT 3160 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT 3161 * boolean - 1 * BYTES_PER_XDR_UNIT 3162 * file id - 2 * BYTES_PER_XDR_UNIT 3163 * directory name length - 1 * BYTES_PER_XDR_UNIT 3164 * cookie - 2 * BYTES_PER_XDR_UNIT 3165 * end of list - 1 * BYTES_PER_XDR_UNIT 3166 * end of file - 1 * BYTES_PER_XDR_UNIT 3167 * Name length of directory to the nearest byte 3168 */ 3169 3170 #define NFS3_READDIR_MIN_COUNT(length) \ 3171 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \ 3172 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT)) 3173 3174 /* ARGSUSED */ 3175 void 3176 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi, 3177 struct svc_req *req, cred_t *cr, bool_t ro) 3178 { 3179 int error; 3180 vnode_t *vp; 3181 struct vattr *vap; 3182 struct vattr va; 3183 struct iovec iov; 3184 struct uio uio; 3185 char *data; 3186 int iseof; 3187 int bufsize; 3188 int namlen; 3189 uint_t count; 3190 struct sockaddr *ca; 3191 3192 vap = NULL; 3193 3194 vp = nfs3_fhtovp(&args->dir, exi); 3195 3196 DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req, 3197 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3198 READDIR3args *, args); 3199 3200 if (vp == NULL) { 3201 error = ESTALE; 3202 goto out; 3203 } 3204 3205 if (is_system_labeled()) { 3206 bslabel_t *clabel = req->rq_label; 3207 3208 ASSERT(clabel != NULL); 3209 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *, 3210 "got client label from request(1)", struct svc_req *, req); 3211 3212 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3213 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3214 exi)) { 3215 resp->status = NFS3ERR_ACCES; 3216 goto out1; 3217 } 3218 } 3219 } 3220 3221 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 3222 3223 va.va_mask = AT_ALL; 3224 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3225 3226 if (vp->v_type != VDIR) { 3227 resp->status = NFS3ERR_NOTDIR; 3228 goto out1; 3229 } 3230 3231 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 3232 if (error) 3233 goto out; 3234 3235 /* 3236 * Now don't allow arbitrary count to alloc; 3237 * allow the maximum not to exceed rfs3_tsize() 3238 */ 3239 if (args->count > rfs3_tsize(req)) 3240 args->count = rfs3_tsize(req); 3241 3242 /* 3243 * Make sure that there is room to read at least one entry 3244 * if any are available. 3245 */ 3246 if (args->count < DIRENT64_RECLEN(MAXNAMELEN)) 3247 count = DIRENT64_RECLEN(MAXNAMELEN); 3248 else 3249 count = args->count; 3250 3251 data = kmem_alloc(count, KM_SLEEP); 3252 3253 iov.iov_base = data; 3254 iov.iov_len = count; 3255 uio.uio_iov = &iov; 3256 uio.uio_iovcnt = 1; 3257 uio.uio_segflg = UIO_SYSSPACE; 3258 uio.uio_extflg = UIO_COPY_CACHED; 3259 uio.uio_loffset = (offset_t)args->cookie; 3260 uio.uio_resid = count; 3261 3262 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 3263 3264 va.va_mask = AT_ALL; 3265 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3266 3267 if (error) { 3268 kmem_free(data, count); 3269 goto out; 3270 } 3271 3272 /* 3273 * If the count was not large enough to be able to guarantee 3274 * to be able to return at least one entry, then need to 3275 * check to see if NFS3ERR_TOOSMALL should be returned. 3276 */ 3277 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) { 3278 /* 3279 * bufsize is used to keep track of the size of the response. 3280 * It is primed with: 3281 * 1 for the status + 3282 * 1 for the dir_attributes.attributes boolean + 3283 * 2 for the cookie verifier 3284 * all times BYTES_PER_XDR_UNIT to convert from XDR units 3285 * to bytes. If there are directory attributes to be 3286 * returned, then: 3287 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3 3288 * time BYTES_PER_XDR_UNIT is added to account for them. 3289 */ 3290 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT; 3291 if (vap != NULL) 3292 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT; 3293 /* 3294 * An entry is composed of: 3295 * 1 for the true/false list indicator + 3296 * 2 for the fileid + 3297 * 1 for the length of the name + 3298 * 2 for the cookie + 3299 * all times BYTES_PER_XDR_UNIT to convert from 3300 * XDR units to bytes, plus the length of the name 3301 * rounded up to the nearest BYTES_PER_XDR_UNIT. 3302 */ 3303 if (count != uio.uio_resid) { 3304 namlen = strlen(((struct dirent64 *)data)->d_name); 3305 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT + 3306 roundup(namlen, BYTES_PER_XDR_UNIT); 3307 } 3308 /* 3309 * We need to check to see if the number of bytes left 3310 * to go into the buffer will actually fit into the 3311 * buffer. This is calculated as the size of this 3312 * entry plus: 3313 * 1 for the true/false list indicator + 3314 * 1 for the eof indicator 3315 * times BYTES_PER_XDR_UNIT to convert from from 3316 * XDR units to bytes. 3317 */ 3318 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT; 3319 if (bufsize > args->count) { 3320 kmem_free(data, count); 3321 resp->status = NFS3ERR_TOOSMALL; 3322 goto out1; 3323 } 3324 } 3325 3326 /* 3327 * Have a valid readir buffer for the native character 3328 * set. Need to check if a conversion is necessary and 3329 * potentially rewrite the whole buffer. Note that if the 3330 * conversion expands names enough, the structure may not 3331 * fit. In this case, we need to drop entries until if fits 3332 * and patch the counts in order that the next readdir will 3333 * get the correct entries. 3334 */ 3335 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3336 data = nfscmd_convdirent(ca, exi, data, count, &resp->status); 3337 3338 3339 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 3340 3341 #if 0 /* notyet */ 3342 /* 3343 * Don't do this. It causes local disk writes when just 3344 * reading the file and the overhead is deemed larger 3345 * than the benefit. 3346 */ 3347 /* 3348 * Force modified metadata out to stable storage. 3349 */ 3350 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 3351 #endif 3352 3353 resp->status = NFS3_OK; 3354 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes); 3355 resp->resok.cookieverf = 0; 3356 resp->resok.reply.entries = (entry3 *)data; 3357 resp->resok.reply.eof = iseof; 3358 resp->resok.size = count - uio.uio_resid; 3359 resp->resok.count = args->count; 3360 resp->resok.freecount = count; 3361 3362 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req, 3363 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3364 READDIR3res *, resp); 3365 3366 VN_RELE(vp); 3367 3368 return; 3369 3370 out: 3371 if (curthread->t_flag & T_WOULDBLOCK) { 3372 curthread->t_flag &= ~T_WOULDBLOCK; 3373 resp->status = NFS3ERR_JUKEBOX; 3374 } else 3375 resp->status = puterrno3(error); 3376 out1: 3377 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes); 3378 3379 DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req, 3380 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3381 READDIR3res *, resp); 3382 3383 if (vp != NULL) { 3384 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 3385 VN_RELE(vp); 3386 } 3387 } 3388 3389 void * 3390 rfs3_readdir_getfh(READDIR3args *args) 3391 { 3392 3393 return (&args->dir); 3394 } 3395 3396 void 3397 rfs3_readdir_free(READDIR3res *resp) 3398 { 3399 3400 if (resp->status == NFS3_OK) 3401 kmem_free(resp->resok.reply.entries, resp->resok.freecount); 3402 } 3403 3404 #ifdef nextdp 3405 #undef nextdp 3406 #endif 3407 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 3408 3409 /* 3410 * This macro computes the size of a response which contains 3411 * one directory entry including the attributes as well as file handle. 3412 * If the incoming request is larger than this, then we are guaranteed to be 3413 * able to return at least one more directory entry if one exists. 3414 * 3415 * NFS3_READDIRPLUS_ENTRY is made up of the following: 3416 * 3417 * boolean - 1 * BYTES_PER_XDR_UNIT 3418 * file id - 2 * BYTES_PER_XDR_UNIT 3419 * directory name length - 1 * BYTES_PER_XDR_UNIT 3420 * cookie - 2 * BYTES_PER_XDR_UNIT 3421 * attribute flag - 1 * BYTES_PER_XDR_UNIT 3422 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT 3423 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT 3424 * length of a file handle - 1 * BYTES_PER_XDR_UNIT 3425 * Maximum length of a file handle (NFS3_MAXFHSIZE) 3426 * name length of the entry to the nearest bytes 3427 */ 3428 #define NFS3_READDIRPLUS_ENTRY(namelen) \ 3429 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \ 3430 BYTES_PER_XDR_UNIT + \ 3431 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT)) 3432 3433 static int rfs3_readdir_unit = MAXBSIZE; 3434 3435 /* ARGSUSED */ 3436 void 3437 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp, 3438 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 3439 { 3440 int error; 3441 vnode_t *vp; 3442 struct vattr *vap; 3443 struct vattr va; 3444 struct iovec iov; 3445 struct uio uio; 3446 char *data; 3447 int iseof; 3448 struct dirent64 *dp; 3449 vnode_t *nvp; 3450 struct vattr *nvap; 3451 struct vattr nva; 3452 entryplus3_info *infop = NULL; 3453 int size = 0; 3454 int nents = 0; 3455 int bufsize = 0; 3456 int entrysize = 0; 3457 int tofit = 0; 3458 int rd_unit = rfs3_readdir_unit; 3459 int prev_len; 3460 int space_left; 3461 int i; 3462 uint_t *namlen = NULL; 3463 char *ndata = NULL; 3464 struct sockaddr *ca; 3465 size_t ret; 3466 3467 vap = NULL; 3468 3469 vp = nfs3_fhtovp(&args->dir, exi); 3470 3471 DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req, 3472 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3473 READDIRPLUS3args *, args); 3474 3475 if (vp == NULL) { 3476 error = ESTALE; 3477 goto out; 3478 } 3479 3480 if (is_system_labeled()) { 3481 bslabel_t *clabel = req->rq_label; 3482 3483 ASSERT(clabel != NULL); 3484 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel, 3485 char *, "got client label from request(1)", 3486 struct svc_req *, req); 3487 3488 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3489 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3490 exi)) { 3491 resp->status = NFS3ERR_ACCES; 3492 goto out1; 3493 } 3494 } 3495 } 3496 3497 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 3498 3499 va.va_mask = AT_ALL; 3500 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3501 3502 if (vp->v_type != VDIR) { 3503 error = ENOTDIR; 3504 goto out; 3505 } 3506 3507 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 3508 if (error) 3509 goto out; 3510 3511 /* 3512 * Don't allow arbitrary counts for allocation 3513 */ 3514 if (args->maxcount > rfs3_tsize(req)) 3515 args->maxcount = rfs3_tsize(req); 3516 3517 /* 3518 * Make sure that there is room to read at least one entry 3519 * if any are available 3520 */ 3521 args->dircount = MIN(args->dircount, args->maxcount); 3522 3523 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN)) 3524 args->dircount = DIRENT64_RECLEN(MAXNAMELEN); 3525 3526 /* 3527 * This allocation relies on a minimum directory entry 3528 * being roughly 24 bytes. Therefore, the namlen array 3529 * will have enough space based on the maximum number of 3530 * entries to read. 3531 */ 3532 namlen = kmem_alloc(args->dircount, KM_SLEEP); 3533 3534 space_left = args->dircount; 3535 data = kmem_alloc(args->dircount, KM_SLEEP); 3536 dp = (struct dirent64 *)data; 3537 uio.uio_iov = &iov; 3538 uio.uio_iovcnt = 1; 3539 uio.uio_segflg = UIO_SYSSPACE; 3540 uio.uio_extflg = UIO_COPY_CACHED; 3541 uio.uio_loffset = (offset_t)args->cookie; 3542 3543 /* 3544 * bufsize is used to keep track of the size of the response as we 3545 * get post op attributes and filehandles for each entry. This is 3546 * an optimization as the server may have read more entries than will 3547 * fit in the buffer specified by maxcount. We stop calculating 3548 * post op attributes and filehandles once we have exceeded maxcount. 3549 * This will minimize the effect of truncation. 3550 * 3551 * It is primed with: 3552 * 1 for the status + 3553 * 1 for the dir_attributes.attributes boolean + 3554 * 2 for the cookie verifier 3555 * all times BYTES_PER_XDR_UNIT to convert from XDR units 3556 * to bytes. If there are directory attributes to be 3557 * returned, then: 3558 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3 3559 * time BYTES_PER_XDR_UNIT is added to account for them. 3560 */ 3561 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT; 3562 if (vap != NULL) 3563 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT; 3564 3565 getmoredents: 3566 /* 3567 * Here we make a check so that our read unit is not larger than 3568 * the space left in the buffer. 3569 */ 3570 rd_unit = MIN(rd_unit, space_left); 3571 iov.iov_base = (char *)dp; 3572 iov.iov_len = rd_unit; 3573 uio.uio_resid = rd_unit; 3574 prev_len = rd_unit; 3575 3576 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 3577 3578 if (error) { 3579 kmem_free(data, args->dircount); 3580 goto out; 3581 } 3582 3583 if (uio.uio_resid == prev_len && !iseof) { 3584 if (nents == 0) { 3585 kmem_free(data, args->dircount); 3586 resp->status = NFS3ERR_TOOSMALL; 3587 goto out1; 3588 } 3589 3590 /* 3591 * We could not get any more entries, so get the attributes 3592 * and filehandle for the entries already obtained. 3593 */ 3594 goto good; 3595 } 3596 3597 /* 3598 * We estimate the size of the response by assuming the 3599 * entry exists and attributes and filehandle are also valid 3600 */ 3601 for (size = prev_len - uio.uio_resid; 3602 size > 0; 3603 size -= dp->d_reclen, dp = nextdp(dp)) { 3604 3605 if (dp->d_ino == 0) { 3606 nents++; 3607 continue; 3608 } 3609 3610 namlen[nents] = strlen(dp->d_name); 3611 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]); 3612 3613 /* 3614 * We need to check to see if the number of bytes left 3615 * to go into the buffer will actually fit into the 3616 * buffer. This is calculated as the size of this 3617 * entry plus: 3618 * 1 for the true/false list indicator + 3619 * 1 for the eof indicator 3620 * times BYTES_PER_XDR_UNIT to convert from XDR units 3621 * to bytes. 3622 * 3623 * Also check the dircount limit against the first entry read 3624 * 3625 */ 3626 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT; 3627 if (bufsize + tofit > args->maxcount) { 3628 /* 3629 * We make a check here to see if this was the 3630 * first entry being measured. If so, then maxcount 3631 * was too small to begin with and so we need to 3632 * return with NFS3ERR_TOOSMALL. 3633 */ 3634 if (nents == 0) { 3635 kmem_free(data, args->dircount); 3636 resp->status = NFS3ERR_TOOSMALL; 3637 goto out1; 3638 } 3639 iseof = FALSE; 3640 goto good; 3641 } 3642 bufsize += entrysize; 3643 nents++; 3644 } 3645 3646 /* 3647 * If there is enough room to fit at least 1 more entry including 3648 * post op attributes and filehandle in the buffer AND that we haven't 3649 * exceeded dircount then go back and get some more. 3650 */ 3651 if (!iseof && 3652 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) { 3653 space_left -= (prev_len - uio.uio_resid); 3654 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN)) 3655 goto getmoredents; 3656 3657 /* else, fall through */ 3658 } 3659 good: 3660 va.va_mask = AT_ALL; 3661 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3662 3663 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 3664 3665 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP); 3666 resp->resok.infop = infop; 3667 3668 dp = (struct dirent64 *)data; 3669 for (i = 0; i < nents; i++) { 3670 3671 if (dp->d_ino == 0) { 3672 infop[i].attr.attributes = FALSE; 3673 infop[i].fh.handle_follows = FALSE; 3674 dp = nextdp(dp); 3675 continue; 3676 } 3677 3678 infop[i].namelen = namlen[i]; 3679 3680 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr, 3681 NULL, NULL, NULL); 3682 if (error) { 3683 infop[i].attr.attributes = FALSE; 3684 infop[i].fh.handle_follows = FALSE; 3685 dp = nextdp(dp); 3686 continue; 3687 } 3688 3689 nva.va_mask = AT_ALL; 3690 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva; 3691 3692 /* Lie about the object type for a referral */ 3693 if (vn_is_nfs_reparse(nvp, cr)) 3694 nvap->va_type = VLNK; 3695 3696 if (vn_ismntpt(nvp)) { 3697 infop[i].attr.attributes = FALSE; 3698 infop[i].fh.handle_follows = FALSE; 3699 } else { 3700 vattr_to_post_op_attr(nvap, &infop[i].attr); 3701 3702 error = makefh3(&infop[i].fh.handle, nvp, exi); 3703 if (!error) 3704 infop[i].fh.handle_follows = TRUE; 3705 else 3706 infop[i].fh.handle_follows = FALSE; 3707 } 3708 3709 VN_RELE(nvp); 3710 dp = nextdp(dp); 3711 } 3712 3713 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 3714 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata); 3715 if (ndata == NULL) 3716 ndata = data; 3717 3718 if (ret > 0) { 3719 /* 3720 * We had to drop one or more entries in order to fit 3721 * during the character conversion. We need to patch 3722 * up the size and eof info. 3723 */ 3724 if (iseof) 3725 iseof = FALSE; 3726 3727 ret = nfscmd_dropped_entrysize((struct dirent64 *)data, 3728 nents, ret); 3729 } 3730 3731 3732 #if 0 /* notyet */ 3733 /* 3734 * Don't do this. It causes local disk writes when just 3735 * reading the file and the overhead is deemed larger 3736 * than the benefit. 3737 */ 3738 /* 3739 * Force modified metadata out to stable storage. 3740 */ 3741 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 3742 #endif 3743 3744 kmem_free(namlen, args->dircount); 3745 if (ndata != data) 3746 kmem_free(data, args->dircount); 3747 3748 resp->status = NFS3_OK; 3749 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes); 3750 resp->resok.cookieverf = 0; 3751 resp->resok.reply.entries = (entryplus3 *)ndata; 3752 resp->resok.reply.eof = iseof; 3753 resp->resok.size = nents; 3754 resp->resok.count = args->dircount - ret; 3755 resp->resok.maxcount = args->maxcount; 3756 3757 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req, 3758 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3759 READDIRPLUS3res *, resp); 3760 3761 VN_RELE(vp); 3762 3763 return; 3764 3765 out: 3766 if (curthread->t_flag & T_WOULDBLOCK) { 3767 curthread->t_flag &= ~T_WOULDBLOCK; 3768 resp->status = NFS3ERR_JUKEBOX; 3769 } else { 3770 resp->status = puterrno3(error); 3771 } 3772 out1: 3773 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes); 3774 3775 DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req, 3776 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3777 READDIRPLUS3res *, resp); 3778 3779 if (vp != NULL) { 3780 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 3781 VN_RELE(vp); 3782 } 3783 3784 if (namlen != NULL) 3785 kmem_free(namlen, args->dircount); 3786 } 3787 3788 void * 3789 rfs3_readdirplus_getfh(READDIRPLUS3args *args) 3790 { 3791 3792 return (&args->dir); 3793 } 3794 3795 void 3796 rfs3_readdirplus_free(READDIRPLUS3res *resp) 3797 { 3798 3799 if (resp->status == NFS3_OK) { 3800 kmem_free(resp->resok.reply.entries, resp->resok.count); 3801 kmem_free(resp->resok.infop, 3802 resp->resok.size * sizeof (struct entryplus3_info)); 3803 } 3804 } 3805 3806 /* ARGSUSED */ 3807 void 3808 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi, 3809 struct svc_req *req, cred_t *cr, bool_t ro) 3810 { 3811 int error; 3812 vnode_t *vp; 3813 struct vattr *vap; 3814 struct vattr va; 3815 struct statvfs64 sb; 3816 3817 vap = NULL; 3818 3819 vp = nfs3_fhtovp(&args->fsroot, exi); 3820 3821 DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req, 3822 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3823 FSSTAT3args *, args); 3824 3825 if (vp == NULL) { 3826 error = ESTALE; 3827 goto out; 3828 } 3829 3830 if (is_system_labeled()) { 3831 bslabel_t *clabel = req->rq_label; 3832 3833 ASSERT(clabel != NULL); 3834 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *, 3835 "got client label from request(1)", struct svc_req *, req); 3836 3837 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3838 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3839 exi)) { 3840 resp->status = NFS3ERR_ACCES; 3841 goto out1; 3842 } 3843 } 3844 } 3845 3846 error = VFS_STATVFS(vp->v_vfsp, &sb); 3847 3848 va.va_mask = AT_ALL; 3849 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3850 3851 if (error) 3852 goto out; 3853 3854 resp->status = NFS3_OK; 3855 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 3856 if (sb.f_blocks != (fsblkcnt64_t)-1) 3857 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks; 3858 else 3859 resp->resok.tbytes = (size3)sb.f_blocks; 3860 if (sb.f_bfree != (fsblkcnt64_t)-1) 3861 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree; 3862 else 3863 resp->resok.fbytes = (size3)sb.f_bfree; 3864 if (sb.f_bavail != (fsblkcnt64_t)-1) 3865 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail; 3866 else 3867 resp->resok.abytes = (size3)sb.f_bavail; 3868 resp->resok.tfiles = (size3)sb.f_files; 3869 resp->resok.ffiles = (size3)sb.f_ffree; 3870 resp->resok.afiles = (size3)sb.f_favail; 3871 resp->resok.invarsec = 0; 3872 3873 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req, 3874 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3875 FSSTAT3res *, resp); 3876 VN_RELE(vp); 3877 3878 return; 3879 3880 out: 3881 if (curthread->t_flag & T_WOULDBLOCK) { 3882 curthread->t_flag &= ~T_WOULDBLOCK; 3883 resp->status = NFS3ERR_JUKEBOX; 3884 } else 3885 resp->status = puterrno3(error); 3886 out1: 3887 DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req, 3888 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3889 FSSTAT3res *, resp); 3890 3891 if (vp != NULL) 3892 VN_RELE(vp); 3893 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes); 3894 } 3895 3896 void * 3897 rfs3_fsstat_getfh(FSSTAT3args *args) 3898 { 3899 3900 return (&args->fsroot); 3901 } 3902 3903 /* ARGSUSED */ 3904 void 3905 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi, 3906 struct svc_req *req, cred_t *cr, bool_t ro) 3907 { 3908 vnode_t *vp; 3909 struct vattr *vap; 3910 struct vattr va; 3911 uint32_t xfer_size; 3912 ulong_t l = 0; 3913 int error; 3914 3915 vp = nfs3_fhtovp(&args->fsroot, exi); 3916 3917 DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req, 3918 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3919 FSINFO3args *, args); 3920 3921 if (vp == NULL) { 3922 if (curthread->t_flag & T_WOULDBLOCK) { 3923 curthread->t_flag &= ~T_WOULDBLOCK; 3924 resp->status = NFS3ERR_JUKEBOX; 3925 } else 3926 resp->status = NFS3ERR_STALE; 3927 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes); 3928 goto out; 3929 } 3930 3931 if (is_system_labeled()) { 3932 bslabel_t *clabel = req->rq_label; 3933 3934 ASSERT(clabel != NULL); 3935 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *, 3936 "got client label from request(1)", struct svc_req *, req); 3937 3938 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3939 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 3940 exi)) { 3941 resp->status = NFS3ERR_STALE; 3942 vattr_to_post_op_attr(NULL, 3943 &resp->resfail.obj_attributes); 3944 goto out; 3945 } 3946 } 3947 } 3948 3949 va.va_mask = AT_ALL; 3950 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 3951 3952 resp->status = NFS3_OK; 3953 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 3954 xfer_size = rfs3_tsize(req); 3955 resp->resok.rtmax = xfer_size; 3956 resp->resok.rtpref = xfer_size; 3957 resp->resok.rtmult = DEV_BSIZE; 3958 resp->resok.wtmax = xfer_size; 3959 resp->resok.wtpref = xfer_size; 3960 resp->resok.wtmult = DEV_BSIZE; 3961 resp->resok.dtpref = MAXBSIZE; 3962 3963 /* 3964 * Large file spec: want maxfilesize based on limit of 3965 * underlying filesystem. We can guess 2^31-1 if need be. 3966 */ 3967 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL); 3968 if (error) { 3969 resp->status = puterrno3(error); 3970 goto out; 3971 } 3972 3973 /* 3974 * If the underlying file system does not support _PC_FILESIZEBITS, 3975 * return a reasonable default. Note that error code on VOP_PATHCONF 3976 * will be 0, even if the underlying file system does not support 3977 * _PC_FILESIZEBITS. 3978 */ 3979 if (l == (ulong_t)-1) { 3980 resp->resok.maxfilesize = MAXOFF32_T; 3981 } else { 3982 if (l >= (sizeof (uint64_t) * 8)) 3983 resp->resok.maxfilesize = INT64_MAX; 3984 else 3985 resp->resok.maxfilesize = (1LL << (l-1)) - 1; 3986 } 3987 3988 resp->resok.time_delta.seconds = 0; 3989 resp->resok.time_delta.nseconds = 1000; 3990 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK | 3991 FSF3_HOMOGENEOUS | FSF3_CANSETTIME; 3992 3993 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req, 3994 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 3995 FSINFO3res *, resp); 3996 3997 VN_RELE(vp); 3998 3999 return; 4000 4001 out: 4002 DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req, 4003 cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi, 4004 FSINFO3res *, resp); 4005 if (vp != NULL) 4006 VN_RELE(vp); 4007 } 4008 4009 void * 4010 rfs3_fsinfo_getfh(FSINFO3args *args) 4011 { 4012 return (&args->fsroot); 4013 } 4014 4015 /* ARGSUSED */ 4016 void 4017 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi, 4018 struct svc_req *req, cred_t *cr, bool_t ro) 4019 { 4020 int error; 4021 vnode_t *vp; 4022 struct vattr *vap; 4023 struct vattr va; 4024 ulong_t val; 4025 4026 vap = NULL; 4027 4028 vp = nfs3_fhtovp(&args->object, exi); 4029 4030 DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req, 4031 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 4032 PATHCONF3args *, args); 4033 4034 if (vp == NULL) { 4035 error = ESTALE; 4036 goto out; 4037 } 4038 4039 if (is_system_labeled()) { 4040 bslabel_t *clabel = req->rq_label; 4041 4042 ASSERT(clabel != NULL); 4043 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *, 4044 "got client label from request(1)", struct svc_req *, req); 4045 4046 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4047 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK, 4048 exi)) { 4049 resp->status = NFS3ERR_ACCES; 4050 goto out1; 4051 } 4052 } 4053 } 4054 4055 va.va_mask = AT_ALL; 4056 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; 4057 4058 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL); 4059 if (error) 4060 goto out; 4061 resp->resok.info.link_max = (uint32)val; 4062 4063 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL); 4064 if (error) 4065 goto out; 4066 resp->resok.info.name_max = (uint32)val; 4067 4068 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL); 4069 if (error) 4070 goto out; 4071 if (val == 1) 4072 resp->resok.info.no_trunc = TRUE; 4073 else 4074 resp->resok.info.no_trunc = FALSE; 4075 4076 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL); 4077 if (error) 4078 goto out; 4079 if (val == 1) 4080 resp->resok.info.chown_restricted = TRUE; 4081 else 4082 resp->resok.info.chown_restricted = FALSE; 4083 4084 resp->status = NFS3_OK; 4085 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); 4086 resp->resok.info.case_insensitive = FALSE; 4087 resp->resok.info.case_preserving = TRUE; 4088 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req, 4089 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 4090 PATHCONF3res *, resp); 4091 VN_RELE(vp); 4092 return; 4093 4094 out: 4095 if (curthread->t_flag & T_WOULDBLOCK) { 4096 curthread->t_flag &= ~T_WOULDBLOCK; 4097 resp->status = NFS3ERR_JUKEBOX; 4098 } else 4099 resp->status = puterrno3(error); 4100 out1: 4101 DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req, 4102 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 4103 PATHCONF3res *, resp); 4104 if (vp != NULL) 4105 VN_RELE(vp); 4106 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes); 4107 } 4108 4109 void * 4110 rfs3_pathconf_getfh(PATHCONF3args *args) 4111 { 4112 4113 return (&args->object); 4114 } 4115 4116 void 4117 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, 4118 struct svc_req *req, cred_t *cr, bool_t ro) 4119 { 4120 nfs3_srv_t *ns; 4121 int error; 4122 vnode_t *vp; 4123 struct vattr *bvap; 4124 struct vattr bva; 4125 struct vattr *avap; 4126 struct vattr ava; 4127 4128 bvap = NULL; 4129 avap = NULL; 4130 4131 vp = nfs3_fhtovp(&args->file, exi); 4132 4133 DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req, 4134 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 4135 COMMIT3args *, args); 4136 4137 if (vp == NULL) { 4138 error = ESTALE; 4139 goto out; 4140 } 4141 4142 ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */ 4143 ns = nfs3_get_srv(); 4144 bva.va_mask = AT_ALL; 4145 error = VOP_GETATTR(vp, &bva, 0, cr, NULL); 4146 4147 /* 4148 * If we can't get the attributes, then we can't do the 4149 * right access checking. So, we'll fail the request. 4150 */ 4151 if (error) 4152 goto out; 4153 4154 bvap = &bva; 4155 4156 if (rdonly(ro, vp)) { 4157 resp->status = NFS3ERR_ROFS; 4158 goto out1; 4159 } 4160 4161 if (vp->v_type != VREG) { 4162 resp->status = NFS3ERR_INVAL; 4163 goto out1; 4164 } 4165 4166 if (is_system_labeled()) { 4167 bslabel_t *clabel = req->rq_label; 4168 4169 ASSERT(clabel != NULL); 4170 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *, 4171 "got client label from request(1)", struct svc_req *, req); 4172 4173 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4174 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK, 4175 exi)) { 4176 resp->status = NFS3ERR_ACCES; 4177 goto out1; 4178 } 4179 } 4180 } 4181 4182 if (crgetuid(cr) != bva.va_uid && 4183 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL))) 4184 goto out; 4185 4186 error = VOP_FSYNC(vp, FSYNC, cr, NULL); 4187 4188 ava.va_mask = AT_ALL; 4189 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava; 4190 4191 if (error) 4192 goto out; 4193 4194 resp->status = NFS3_OK; 4195 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); 4196 resp->resok.verf = ns->write3verf; 4197 4198 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req, 4199 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 4200 COMMIT3res *, resp); 4201 4202 VN_RELE(vp); 4203 4204 return; 4205 4206 out: 4207 if (curthread->t_flag & T_WOULDBLOCK) { 4208 curthread->t_flag &= ~T_WOULDBLOCK; 4209 resp->status = NFS3ERR_JUKEBOX; 4210 } else 4211 resp->status = puterrno3(error); 4212 out1: 4213 DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req, 4214 cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, 4215 COMMIT3res *, resp); 4216 4217 if (vp != NULL) 4218 VN_RELE(vp); 4219 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc); 4220 } 4221 4222 void * 4223 rfs3_commit_getfh(COMMIT3args *args) 4224 { 4225 4226 return (&args->file); 4227 } 4228 4229 static int 4230 sattr3_to_vattr(sattr3 *sap, struct vattr *vap) 4231 { 4232 4233 vap->va_mask = 0; 4234 4235 if (sap->mode.set_it) { 4236 vap->va_mode = (mode_t)sap->mode.mode; 4237 vap->va_mask |= AT_MODE; 4238 } 4239 if (sap->uid.set_it) { 4240 vap->va_uid = (uid_t)sap->uid.uid; 4241 vap->va_mask |= AT_UID; 4242 } 4243 if (sap->gid.set_it) { 4244 vap->va_gid = (gid_t)sap->gid.gid; 4245 vap->va_mask |= AT_GID; 4246 } 4247 if (sap->size.set_it) { 4248 if (sap->size.size > (size3)((u_longlong_t)-1)) 4249 return (EINVAL); 4250 vap->va_size = sap->size.size; 4251 vap->va_mask |= AT_SIZE; 4252 } 4253 if (sap->atime.set_it == SET_TO_CLIENT_TIME) { 4254 #ifndef _LP64 4255 /* check time validity */ 4256 if (!NFS3_TIME_OK(sap->atime.atime.seconds)) 4257 return (EOVERFLOW); 4258 #endif 4259 /* 4260 * nfs protocol defines times as unsigned so don't extend sign, 4261 * unless sysadmin set nfs_allow_preepoch_time. 4262 */ 4263 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, 4264 sap->atime.atime.seconds); 4265 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds; 4266 vap->va_mask |= AT_ATIME; 4267 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) { 4268 gethrestime(&vap->va_atime); 4269 vap->va_mask |= AT_ATIME; 4270 } 4271 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) { 4272 #ifndef _LP64 4273 /* check time validity */ 4274 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds)) 4275 return (EOVERFLOW); 4276 #endif 4277 /* 4278 * nfs protocol defines times as unsigned so don't extend sign, 4279 * unless sysadmin set nfs_allow_preepoch_time. 4280 */ 4281 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, 4282 sap->mtime.mtime.seconds); 4283 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds; 4284 vap->va_mask |= AT_MTIME; 4285 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) { 4286 gethrestime(&vap->va_mtime); 4287 vap->va_mask |= AT_MTIME; 4288 } 4289 4290 return (0); 4291 } 4292 4293 static const ftype3 vt_to_nf3[] = { 4294 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0 4295 }; 4296 4297 static int 4298 vattr_to_fattr3(struct vattr *vap, fattr3 *fap) 4299 { 4300 4301 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 4302 /* Return error if time or size overflow */ 4303 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) { 4304 return (EOVERFLOW); 4305 } 4306 fap->type = vt_to_nf3[vap->va_type]; 4307 fap->mode = (mode3)(vap->va_mode & MODEMASK); 4308 fap->nlink = (uint32)vap->va_nlink; 4309 if (vap->va_uid == UID_NOBODY) 4310 fap->uid = (uid3)NFS_UID_NOBODY; 4311 else 4312 fap->uid = (uid3)vap->va_uid; 4313 if (vap->va_gid == GID_NOBODY) 4314 fap->gid = (gid3)NFS_GID_NOBODY; 4315 else 4316 fap->gid = (gid3)vap->va_gid; 4317 fap->size = (size3)vap->va_size; 4318 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks; 4319 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev); 4320 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev); 4321 fap->fsid = (uint64)vap->va_fsid; 4322 fap->fileid = (fileid3)vap->va_nodeid; 4323 fap->atime.seconds = vap->va_atime.tv_sec; 4324 fap->atime.nseconds = vap->va_atime.tv_nsec; 4325 fap->mtime.seconds = vap->va_mtime.tv_sec; 4326 fap->mtime.nseconds = vap->va_mtime.tv_nsec; 4327 fap->ctime.seconds = vap->va_ctime.tv_sec; 4328 fap->ctime.nseconds = vap->va_ctime.tv_nsec; 4329 return (0); 4330 } 4331 4332 static int 4333 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap) 4334 { 4335 4336 /* Return error if time or size overflow */ 4337 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) && 4338 NFS_TIME_T_OK(vap->va_ctime.tv_sec) && 4339 NFS3_SIZE_OK(vap->va_size))) { 4340 return (EOVERFLOW); 4341 } 4342 wccap->size = (size3)vap->va_size; 4343 wccap->mtime.seconds = vap->va_mtime.tv_sec; 4344 wccap->mtime.nseconds = vap->va_mtime.tv_nsec; 4345 wccap->ctime.seconds = vap->va_ctime.tv_sec; 4346 wccap->ctime.nseconds = vap->va_ctime.tv_nsec; 4347 return (0); 4348 } 4349 4350 static void 4351 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap) 4352 { 4353 4354 /* don't return attrs if time overflow */ 4355 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) { 4356 poap->attributes = TRUE; 4357 } else 4358 poap->attributes = FALSE; 4359 } 4360 4361 void 4362 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap) 4363 { 4364 4365 /* don't return attrs if time overflow */ 4366 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) { 4367 poap->attributes = TRUE; 4368 } else 4369 poap->attributes = FALSE; 4370 } 4371 4372 static void 4373 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp) 4374 { 4375 vattr_to_pre_op_attr(bvap, &wccp->before); 4376 vattr_to_post_op_attr(avap, &wccp->after); 4377 } 4378 4379 static int 4380 rdma_setup_read_data3(READ3args *args, READ3resok *rok) 4381 { 4382 struct clist *wcl; 4383 int wlist_len; 4384 count3 count = rok->count; 4385 4386 wcl = args->wlist; 4387 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) 4388 return (FALSE); 4389 4390 wcl = args->wlist; 4391 rok->wlist_len = wlist_len; 4392 rok->wlist = wcl; 4393 return (TRUE); 4394 } 4395 4396 void 4397 rfs3_srv_zone_init(nfs_globals_t *ng) 4398 { 4399 nfs3_srv_t *ns; 4400 struct rfs3_verf_overlay { 4401 uint_t id; /* a "unique" identifier */ 4402 int ts; /* a unique timestamp */ 4403 } *verfp; 4404 timestruc_t now; 4405 4406 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); 4407 4408 /* 4409 * The following algorithm attempts to find a unique verifier 4410 * to be used as the write verifier returned from the server 4411 * to the client. It is important that this verifier change 4412 * whenever the server reboots. Of secondary importance, it 4413 * is important for the verifier to be unique between two 4414 * different servers. 4415 * 4416 * Thus, an attempt is made to use the system hostid and the 4417 * current time in seconds when the nfssrv kernel module is 4418 * loaded. It is assumed that an NFS server will not be able 4419 * to boot and then to reboot in less than a second. If the 4420 * hostid has not been set, then the current high resolution 4421 * time is used. This will ensure different verifiers each 4422 * time the server reboots and minimize the chances that two 4423 * different servers will have the same verifier. 4424 */ 4425 4426 #ifndef lint 4427 /* 4428 * We ASSERT that this constant logic expression is 4429 * always true because in the past, it wasn't. 4430 */ 4431 ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf)); 4432 #endif 4433 4434 gethrestime(&now); 4435 verfp = (struct rfs3_verf_overlay *)&ns->write3verf; 4436 verfp->ts = (int)now.tv_sec; 4437 verfp->id = zone_get_hostid(NULL); 4438 4439 if (verfp->id == 0) 4440 verfp->id = (uint_t)now.tv_nsec; 4441 4442 ng->nfs3_srv = ns; 4443 } 4444 4445 void 4446 rfs3_srv_zone_fini(nfs_globals_t *ng) 4447 { 4448 nfs3_srv_t *ns = ng->nfs3_srv; 4449 4450 ng->nfs3_srv = NULL; 4451 4452 kmem_free(ns, sizeof (*ns)); 4453 } 4454 4455 void 4456 rfs3_srvrinit(void) 4457 { 4458 nfs3_srv_caller_id = fs_new_caller_id(); 4459 } 4460 4461 void 4462 rfs3_srvrfini(void) 4463 { 4464 /* Nothing to do */ 4465 } 4466