1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 acl_perm(vp, exi, &va, cr); 121 error = vattr_to_nattr(&va, &ns->ns_attr); 122 } 123 124 VN_RELE(vp); 125 126 ns->ns_status = puterrno(error); 127 } 128 void * 129 rfs_getattr_getfh(fhandle_t *fhp) 130 { 131 return (fhp); 132 } 133 134 /* 135 * Set file attributes. 136 * Sets the attributes of the file with the given fhandle. Returns 137 * the new attributes. 138 */ 139 void 140 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 141 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 142 { 143 int error; 144 int flag; 145 int in_crit = 0; 146 vnode_t *vp; 147 struct vattr va; 148 struct vattr bva; 149 struct flock64 bf; 150 caller_context_t ct; 151 152 153 vp = nfs_fhtovp(&args->saa_fh, exi); 154 if (vp == NULL) { 155 ns->ns_status = NFSERR_STALE; 156 return; 157 } 158 159 if (rdonly(exi, req) || vn_is_readonly(vp)) { 160 VN_RELE(vp); 161 ns->ns_status = NFSERR_ROFS; 162 return; 163 } 164 165 error = sattr_to_vattr(&args->saa_sa, &va); 166 if (error) { 167 VN_RELE(vp); 168 ns->ns_status = puterrno(error); 169 return; 170 } 171 172 /* 173 * If the client is requesting a change to the mtime, 174 * but the nanosecond field is set to 1 billion, then 175 * this is a flag to the server that it should set the 176 * atime and mtime fields to the server's current time. 177 * The 1 billion number actually came from the client 178 * as 1 million, but the units in the over the wire 179 * request are microseconds instead of nanoseconds. 180 * 181 * This is an overload of the protocol and should be 182 * documented in the NFS Version 2 protocol specification. 183 */ 184 if (va.va_mask & AT_MTIME) { 185 if (va.va_mtime.tv_nsec == 1000000000) { 186 gethrestime(&va.va_mtime); 187 va.va_atime = va.va_mtime; 188 va.va_mask |= AT_ATIME; 189 flag = 0; 190 } else 191 flag = ATTR_UTIME; 192 } else 193 flag = 0; 194 195 /* 196 * If the filesystem is exported with nosuid, then mask off 197 * the setuid and setgid bits. 198 */ 199 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 200 (exi->exi_export.ex_flags & EX_NOSUID)) 201 va.va_mode &= ~(VSUID | VSGID); 202 203 ct.cc_sysid = 0; 204 ct.cc_pid = 0; 205 ct.cc_caller_id = nfs2_srv_caller_id; 206 ct.cc_flags = CC_DONTBLOCK; 207 208 /* 209 * We need to specially handle size changes because it is 210 * possible for the client to create a file with modes 211 * which indicate read-only, but with the file opened for 212 * writing. If the client then tries to set the size of 213 * the file, then the normal access checking done in 214 * VOP_SETATTR would prevent the client from doing so, 215 * although it should be legal for it to do so. To get 216 * around this, we do the access checking for ourselves 217 * and then use VOP_SPACE which doesn't do the access 218 * checking which VOP_SETATTR does. VOP_SPACE can only 219 * operate on VREG files, let VOP_SETATTR handle the other 220 * extremely rare cases. 221 * Also the client should not be allowed to change the 222 * size of the file if there is a conflicting non-blocking 223 * mandatory lock in the region of change. 224 */ 225 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 226 if (nbl_need_check(vp)) { 227 nbl_start_crit(vp, RW_READER); 228 in_crit = 1; 229 } 230 231 bva.va_mask = AT_UID | AT_SIZE; 232 233 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 234 235 if (error) { 236 if (in_crit) 237 nbl_end_crit(vp); 238 VN_RELE(vp); 239 ns->ns_status = puterrno(error); 240 return; 241 } 242 243 if (in_crit) { 244 u_offset_t offset; 245 ssize_t length; 246 247 if (va.va_size < bva.va_size) { 248 offset = va.va_size; 249 length = bva.va_size - va.va_size; 250 } else { 251 offset = bva.va_size; 252 length = va.va_size - bva.va_size; 253 } 254 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 255 NULL)) { 256 error = EACCES; 257 } 258 } 259 260 if (crgetuid(cr) == bva.va_uid && !error && 261 va.va_size != bva.va_size) { 262 va.va_mask &= ~AT_SIZE; 263 bf.l_type = F_WRLCK; 264 bf.l_whence = 0; 265 bf.l_start = (off64_t)va.va_size; 266 bf.l_len = 0; 267 bf.l_sysid = 0; 268 bf.l_pid = 0; 269 270 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 271 (offset_t)va.va_size, cr, &ct); 272 } 273 if (in_crit) 274 nbl_end_crit(vp); 275 } else 276 error = 0; 277 278 /* 279 * Do the setattr. 280 */ 281 if (!error && va.va_mask) { 282 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 283 } 284 285 /* 286 * check if the monitor on either vop_space or vop_setattr detected 287 * a delegation conflict and if so, mark the thread flag as 288 * wouldblock so that the response is dropped and the client will 289 * try again. 290 */ 291 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 292 VN_RELE(vp); 293 curthread->t_flag |= T_WOULDBLOCK; 294 return; 295 } 296 297 if (!error) { 298 va.va_mask = AT_ALL; /* get everything */ 299 300 error = rfs4_delegated_getattr(vp, &va, 0, cr); 301 302 /* check for overflows */ 303 if (!error) { 304 acl_perm(vp, exi, &va, cr); 305 error = vattr_to_nattr(&va, &ns->ns_attr); 306 } 307 } 308 309 ct.cc_flags = 0; 310 311 /* 312 * Force modified metadata out to stable storage. 313 */ 314 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 315 316 VN_RELE(vp); 317 318 ns->ns_status = puterrno(error); 319 } 320 void * 321 rfs_setattr_getfh(struct nfssaargs *args) 322 { 323 return (&args->saa_fh); 324 } 325 326 /* 327 * Directory lookup. 328 * Returns an fhandle and file attributes for file name in a directory. 329 */ 330 /* ARGSUSED */ 331 void 332 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 333 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 334 { 335 int error; 336 vnode_t *dvp; 337 vnode_t *vp; 338 struct vattr va; 339 fhandle_t *fhp = da->da_fhandle; 340 struct sec_ol sec = {0, 0}; 341 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 342 char *name; 343 struct sockaddr *ca; 344 345 /* 346 * Trusted Extension doesn't support NFSv2. MOUNT 347 * will reject v2 clients. Need to prevent v2 client 348 * access via WebNFS here. 349 */ 350 if (is_system_labeled() && req->rq_vers == 2) { 351 dr->dr_status = NFSERR_ACCES; 352 return; 353 } 354 355 /* 356 * Disallow NULL paths 357 */ 358 if (da->da_name == NULL || *da->da_name == '\0') { 359 dr->dr_status = NFSERR_ACCES; 360 return; 361 } 362 363 /* 364 * Allow lookups from the root - the default 365 * location of the public filehandle. 366 */ 367 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 368 dvp = rootdir; 369 VN_HOLD(dvp); 370 } else { 371 dvp = nfs_fhtovp(fhp, exi); 372 if (dvp == NULL) { 373 dr->dr_status = NFSERR_STALE; 374 return; 375 } 376 } 377 378 /* 379 * Not allow lookup beyond root. 380 * If the filehandle matches a filehandle of the exi, 381 * then the ".." refers beyond the root of an exported filesystem. 382 */ 383 if (strcmp(da->da_name, "..") == 0 && 384 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 385 VN_RELE(dvp); 386 dr->dr_status = NFSERR_NOENT; 387 return; 388 } 389 390 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 391 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 392 MAXPATHLEN); 393 394 if (name == NULL) { 395 dr->dr_status = NFSERR_ACCES; 396 return; 397 } 398 399 /* 400 * If the public filehandle is used then allow 401 * a multi-component lookup, i.e. evaluate 402 * a pathname and follow symbolic links if 403 * necessary. 404 * 405 * This may result in a vnode in another filesystem 406 * which is OK as long as the filesystem is exported. 407 */ 408 if (PUBLIC_FH2(fhp)) { 409 publicfh_flag = TRUE; 410 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 411 &sec); 412 } else { 413 /* 414 * Do a normal single component lookup. 415 */ 416 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 417 NULL, NULL, NULL); 418 } 419 420 if (name != da->da_name) 421 kmem_free(name, MAXPATHLEN); 422 423 424 if (!error) { 425 va.va_mask = AT_ALL; /* we want everything */ 426 427 error = rfs4_delegated_getattr(vp, &va, 0, cr); 428 429 /* check for overflows */ 430 if (!error) { 431 acl_perm(vp, exi, &va, cr); 432 error = vattr_to_nattr(&va, &dr->dr_attr); 433 if (!error) { 434 if (sec.sec_flags & SEC_QUERY) 435 error = makefh_ol(&dr->dr_fhandle, exi, 436 sec.sec_index); 437 else { 438 error = makefh(&dr->dr_fhandle, vp, 439 exi); 440 if (!error && publicfh_flag && 441 !chk_clnt_sec(exi, req)) 442 auth_weak = TRUE; 443 } 444 } 445 } 446 VN_RELE(vp); 447 } 448 449 VN_RELE(dvp); 450 451 /* 452 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 453 * and have obtained a new exportinfo in exi which needs to be 454 * released. Note the the original exportinfo pointed to by exi 455 * will be released by the caller, comon_dispatch. 456 */ 457 if (publicfh_flag && exi != NULL) 458 exi_rele(exi); 459 460 /* 461 * If it's public fh, no 0x81, and client's flavor is 462 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 463 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 464 */ 465 if (auth_weak) 466 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 467 else 468 dr->dr_status = puterrno(error); 469 } 470 void * 471 rfs_lookup_getfh(struct nfsdiropargs *da) 472 { 473 return (da->da_fhandle); 474 } 475 476 /* 477 * Read symbolic link. 478 * Returns the string in the symbolic link at the given fhandle. 479 */ 480 /* ARGSUSED */ 481 void 482 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 483 struct svc_req *req, cred_t *cr) 484 { 485 int error; 486 struct iovec iov; 487 struct uio uio; 488 vnode_t *vp; 489 struct vattr va; 490 struct sockaddr *ca; 491 char *name = NULL; 492 493 vp = nfs_fhtovp(fhp, exi); 494 if (vp == NULL) { 495 rl->rl_data = NULL; 496 rl->rl_status = NFSERR_STALE; 497 return; 498 } 499 500 va.va_mask = AT_MODE; 501 502 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 503 504 if (error) { 505 VN_RELE(vp); 506 rl->rl_data = NULL; 507 rl->rl_status = puterrno(error); 508 return; 509 } 510 511 if (MANDLOCK(vp, va.va_mode)) { 512 VN_RELE(vp); 513 rl->rl_data = NULL; 514 rl->rl_status = NFSERR_ACCES; 515 return; 516 } 517 518 /* 519 * XNFS and RFC1094 require us to return ENXIO if argument 520 * is not a link. BUGID 1138002. 521 */ 522 if (vp->v_type != VLNK) { 523 VN_RELE(vp); 524 rl->rl_data = NULL; 525 rl->rl_status = NFSERR_NXIO; 526 return; 527 } 528 529 /* 530 * Allocate data for pathname. This will be freed by rfs_rlfree. 531 */ 532 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 533 534 /* 535 * Set up io vector to read sym link data 536 */ 537 iov.iov_base = rl->rl_data; 538 iov.iov_len = NFS_MAXPATHLEN; 539 uio.uio_iov = &iov; 540 uio.uio_iovcnt = 1; 541 uio.uio_segflg = UIO_SYSSPACE; 542 uio.uio_extflg = UIO_COPY_CACHED; 543 uio.uio_loffset = (offset_t)0; 544 uio.uio_resid = NFS_MAXPATHLEN; 545 546 /* 547 * Do the readlink. 548 */ 549 error = VOP_READLINK(vp, &uio, cr, NULL); 550 551 VN_RELE(vp); 552 553 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 554 rl->rl_data[rl->rl_count] = '\0'; 555 556 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 557 name = nfscmd_convname(ca, exi, rl->rl_data, 558 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 559 560 if (name != NULL && name != rl->rl_data) { 561 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 562 rl->rl_data = name; 563 } 564 565 /* 566 * XNFS and RFC1094 require us to return ENXIO if argument 567 * is not a link. UFS returns EINVAL if this is the case, 568 * so we do the mapping here. BUGID 1138002. 569 */ 570 if (error == EINVAL) 571 rl->rl_status = NFSERR_NXIO; 572 else 573 rl->rl_status = puterrno(error); 574 575 } 576 void * 577 rfs_readlink_getfh(fhandle_t *fhp) 578 { 579 return (fhp); 580 } 581 /* 582 * Free data allocated by rfs_readlink 583 */ 584 void 585 rfs_rlfree(struct nfsrdlnres *rl) 586 { 587 if (rl->rl_data != NULL) 588 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 589 } 590 591 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 592 593 /* 594 * Read data. 595 * Returns some data read from the file at the given fhandle. 596 */ 597 /* ARGSUSED */ 598 void 599 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 600 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 601 { 602 vnode_t *vp; 603 int error; 604 struct vattr va; 605 struct iovec iov; 606 struct uio uio; 607 mblk_t *mp; 608 int alloc_err = 0; 609 int in_crit = 0; 610 caller_context_t ct; 611 612 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 613 if (vp == NULL) { 614 rr->rr_data = NULL; 615 rr->rr_status = NFSERR_STALE; 616 return; 617 } 618 619 if (vp->v_type != VREG) { 620 VN_RELE(vp); 621 rr->rr_data = NULL; 622 rr->rr_status = NFSERR_ISDIR; 623 return; 624 } 625 626 ct.cc_sysid = 0; 627 ct.cc_pid = 0; 628 ct.cc_caller_id = nfs2_srv_caller_id; 629 ct.cc_flags = CC_DONTBLOCK; 630 631 /* 632 * Enter the critical region before calling VOP_RWLOCK 633 * to avoid a deadlock with write requests. 634 */ 635 if (nbl_need_check(vp)) { 636 nbl_start_crit(vp, RW_READER); 637 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 638 0, NULL)) { 639 nbl_end_crit(vp); 640 VN_RELE(vp); 641 rr->rr_data = NULL; 642 rr->rr_status = NFSERR_ACCES; 643 return; 644 } 645 in_crit = 1; 646 } 647 648 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 649 650 /* check if a monitor detected a delegation conflict */ 651 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 652 VN_RELE(vp); 653 /* mark as wouldblock so response is dropped */ 654 curthread->t_flag |= T_WOULDBLOCK; 655 656 rr->rr_data = NULL; 657 return; 658 } 659 660 va.va_mask = AT_ALL; 661 662 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 663 664 if (error) { 665 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 666 if (in_crit) 667 nbl_end_crit(vp); 668 669 VN_RELE(vp); 670 rr->rr_data = NULL; 671 rr->rr_status = puterrno(error); 672 673 return; 674 } 675 676 /* 677 * This is a kludge to allow reading of files created 678 * with no read permission. The owner of the file 679 * is always allowed to read it. 680 */ 681 if (crgetuid(cr) != va.va_uid) { 682 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 683 684 if (error) { 685 /* 686 * Exec is the same as read over the net because 687 * of demand loading. 688 */ 689 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 690 } 691 if (error) { 692 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 693 if (in_crit) 694 nbl_end_crit(vp); 695 VN_RELE(vp); 696 rr->rr_data = NULL; 697 rr->rr_status = puterrno(error); 698 699 return; 700 } 701 } 702 703 if (MANDLOCK(vp, va.va_mode)) { 704 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 705 if (in_crit) 706 nbl_end_crit(vp); 707 708 VN_RELE(vp); 709 rr->rr_data = NULL; 710 rr->rr_status = NFSERR_ACCES; 711 712 return; 713 } 714 715 rr->rr_ok.rrok_wlist_len = 0; 716 rr->rr_ok.rrok_wlist = NULL; 717 718 if ((u_offset_t)ra->ra_offset >= va.va_size) { 719 rr->rr_count = 0; 720 rr->rr_data = NULL; 721 /* 722 * In this case, status is NFS_OK, but there is no data 723 * to encode. So set rr_mp to NULL. 724 */ 725 rr->rr_mp = NULL; 726 goto done; 727 } 728 729 if (ra->ra_wlist) { 730 mp = NULL; 731 rr->rr_mp = NULL; 732 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 733 } else { 734 /* 735 * mp will contain the data to be sent out in the read reply. 736 * This will be freed after the reply has been sent out (by the 737 * driver). 738 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 739 * that the call to xdrmblk_putmblk() never fails. 740 */ 741 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 742 &alloc_err); 743 ASSERT(mp != NULL); 744 ASSERT(alloc_err == 0); 745 746 rr->rr_mp = mp; 747 748 /* 749 * Set up io vector 750 */ 751 iov.iov_base = (caddr_t)mp->b_datap->db_base; 752 iov.iov_len = ra->ra_count; 753 } 754 755 uio.uio_iov = &iov; 756 uio.uio_iovcnt = 1; 757 uio.uio_segflg = UIO_SYSSPACE; 758 uio.uio_extflg = UIO_COPY_CACHED; 759 uio.uio_loffset = (offset_t)ra->ra_offset; 760 uio.uio_resid = ra->ra_count; 761 762 error = VOP_READ(vp, &uio, 0, cr, &ct); 763 764 if (error) { 765 if (mp) 766 freeb(mp); 767 768 /* 769 * check if a monitor detected a delegation conflict and 770 * mark as wouldblock so response is dropped 771 */ 772 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 773 curthread->t_flag |= T_WOULDBLOCK; 774 else 775 rr->rr_status = puterrno(error); 776 777 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 778 if (in_crit) 779 nbl_end_crit(vp); 780 781 VN_RELE(vp); 782 rr->rr_data = NULL; 783 784 return; 785 } 786 787 /* 788 * Get attributes again so we can send the latest access 789 * time to the client side for his cache. 790 */ 791 va.va_mask = AT_ALL; 792 793 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 794 795 if (error) { 796 if (mp) 797 freeb(mp); 798 799 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 800 if (in_crit) 801 nbl_end_crit(vp); 802 803 VN_RELE(vp); 804 rr->rr_data = NULL; 805 rr->rr_status = puterrno(error); 806 807 return; 808 } 809 810 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 811 812 if (mp) { 813 rr->rr_data = (char *)mp->b_datap->db_base; 814 } else { 815 if (ra->ra_wlist) { 816 rr->rr_data = (caddr_t)iov.iov_base; 817 if (!rdma_setup_read_data2(ra, rr)) { 818 rr->rr_data = NULL; 819 rr->rr_status = puterrno(NFSERR_INVAL); 820 } 821 } 822 } 823 done: 824 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 825 if (in_crit) 826 nbl_end_crit(vp); 827 828 acl_perm(vp, exi, &va, cr); 829 830 /* check for overflows */ 831 error = vattr_to_nattr(&va, &rr->rr_attr); 832 833 VN_RELE(vp); 834 835 rr->rr_status = puterrno(error); 836 } 837 838 /* 839 * Free data allocated by rfs_read 840 */ 841 void 842 rfs_rdfree(struct nfsrdresult *rr) 843 { 844 mblk_t *mp; 845 846 if (rr->rr_status == NFS_OK) { 847 mp = rr->rr_mp; 848 if (mp != NULL) 849 freeb(mp); 850 } 851 } 852 853 void * 854 rfs_read_getfh(struct nfsreadargs *ra) 855 { 856 return (&ra->ra_fhandle); 857 } 858 859 #define MAX_IOVECS 12 860 861 #ifdef DEBUG 862 static int rfs_write_sync_hits = 0; 863 static int rfs_write_sync_misses = 0; 864 #endif 865 866 /* 867 * Write data to file. 868 * Returns attributes of a file after writing some data to it. 869 * 870 * Any changes made here, especially in error handling might have 871 * to also be done in rfs_write (which clusters write requests). 872 */ 873 void 874 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 875 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 876 { 877 int error; 878 vnode_t *vp; 879 rlim64_t rlimit; 880 struct vattr va; 881 struct uio uio; 882 struct iovec iov[MAX_IOVECS]; 883 mblk_t *m; 884 struct iovec *iovp; 885 int iovcnt; 886 cred_t *savecred; 887 int in_crit = 0; 888 caller_context_t ct; 889 890 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 891 if (vp == NULL) { 892 ns->ns_status = NFSERR_STALE; 893 return; 894 } 895 896 if (rdonly(exi, req)) { 897 VN_RELE(vp); 898 ns->ns_status = NFSERR_ROFS; 899 return; 900 } 901 902 if (vp->v_type != VREG) { 903 VN_RELE(vp); 904 ns->ns_status = NFSERR_ISDIR; 905 return; 906 } 907 908 ct.cc_sysid = 0; 909 ct.cc_pid = 0; 910 ct.cc_caller_id = nfs2_srv_caller_id; 911 ct.cc_flags = CC_DONTBLOCK; 912 913 va.va_mask = AT_UID|AT_MODE; 914 915 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 916 917 if (error) { 918 VN_RELE(vp); 919 ns->ns_status = puterrno(error); 920 921 return; 922 } 923 924 if (crgetuid(cr) != va.va_uid) { 925 /* 926 * This is a kludge to allow writes of files created 927 * with read only permission. The owner of the file 928 * is always allowed to write it. 929 */ 930 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 931 932 if (error) { 933 VN_RELE(vp); 934 ns->ns_status = puterrno(error); 935 return; 936 } 937 } 938 939 /* 940 * Can't access a mandatory lock file. This might cause 941 * the NFS service thread to block forever waiting for a 942 * lock to be released that will never be released. 943 */ 944 if (MANDLOCK(vp, va.va_mode)) { 945 VN_RELE(vp); 946 ns->ns_status = NFSERR_ACCES; 947 return; 948 } 949 950 /* 951 * We have to enter the critical region before calling VOP_RWLOCK 952 * to avoid a deadlock with ufs. 953 */ 954 if (nbl_need_check(vp)) { 955 nbl_start_crit(vp, RW_READER); 956 in_crit = 1; 957 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 958 wa->wa_count, 0, NULL)) { 959 error = EACCES; 960 goto out; 961 } 962 } 963 964 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 965 966 /* check if a monitor detected a delegation conflict */ 967 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 968 VN_RELE(vp); 969 /* mark as wouldblock so response is dropped */ 970 curthread->t_flag |= T_WOULDBLOCK; 971 return; 972 } 973 974 if (wa->wa_data || wa->wa_rlist) { 975 /* Do the RDMA thing if necessary */ 976 if (wa->wa_rlist) { 977 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 978 iov[0].iov_len = wa->wa_count; 979 } else { 980 iov[0].iov_base = wa->wa_data; 981 iov[0].iov_len = wa->wa_count; 982 } 983 uio.uio_iov = iov; 984 uio.uio_iovcnt = 1; 985 uio.uio_segflg = UIO_SYSSPACE; 986 uio.uio_extflg = UIO_COPY_DEFAULT; 987 uio.uio_loffset = (offset_t)wa->wa_offset; 988 uio.uio_resid = wa->wa_count; 989 /* 990 * The limit is checked on the client. We 991 * should allow any size writes here. 992 */ 993 uio.uio_llimit = curproc->p_fsz_ctl; 994 rlimit = uio.uio_llimit - wa->wa_offset; 995 if (rlimit < (rlim64_t)uio.uio_resid) 996 uio.uio_resid = (uint_t)rlimit; 997 998 /* 999 * for now we assume no append mode 1000 */ 1001 /* 1002 * We're changing creds because VM may fault and we need 1003 * the cred of the current thread to be used if quota 1004 * checking is enabled. 1005 */ 1006 savecred = curthread->t_cred; 1007 curthread->t_cred = cr; 1008 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1009 curthread->t_cred = savecred; 1010 } else { 1011 iovcnt = 0; 1012 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1013 iovcnt++; 1014 if (iovcnt <= MAX_IOVECS) { 1015 #ifdef DEBUG 1016 rfs_write_sync_hits++; 1017 #endif 1018 iovp = iov; 1019 } else { 1020 #ifdef DEBUG 1021 rfs_write_sync_misses++; 1022 #endif 1023 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1024 } 1025 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1026 uio.uio_iov = iovp; 1027 uio.uio_iovcnt = iovcnt; 1028 uio.uio_segflg = UIO_SYSSPACE; 1029 uio.uio_extflg = UIO_COPY_DEFAULT; 1030 uio.uio_loffset = (offset_t)wa->wa_offset; 1031 uio.uio_resid = wa->wa_count; 1032 /* 1033 * The limit is checked on the client. We 1034 * should allow any size writes here. 1035 */ 1036 uio.uio_llimit = curproc->p_fsz_ctl; 1037 rlimit = uio.uio_llimit - wa->wa_offset; 1038 if (rlimit < (rlim64_t)uio.uio_resid) 1039 uio.uio_resid = (uint_t)rlimit; 1040 1041 /* 1042 * For now we assume no append mode. 1043 */ 1044 /* 1045 * We're changing creds because VM may fault and we need 1046 * the cred of the current thread to be used if quota 1047 * checking is enabled. 1048 */ 1049 savecred = curthread->t_cred; 1050 curthread->t_cred = cr; 1051 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1052 curthread->t_cred = savecred; 1053 1054 if (iovp != iov) 1055 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1056 } 1057 1058 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1059 1060 if (!error) { 1061 /* 1062 * Get attributes again so we send the latest mod 1063 * time to the client side for his cache. 1064 */ 1065 va.va_mask = AT_ALL; /* now we want everything */ 1066 1067 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1068 1069 /* check for overflows */ 1070 if (!error) { 1071 acl_perm(vp, exi, &va, cr); 1072 error = vattr_to_nattr(&va, &ns->ns_attr); 1073 } 1074 } 1075 1076 out: 1077 if (in_crit) 1078 nbl_end_crit(vp); 1079 VN_RELE(vp); 1080 1081 /* check if a monitor detected a delegation conflict */ 1082 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1083 /* mark as wouldblock so response is dropped */ 1084 curthread->t_flag |= T_WOULDBLOCK; 1085 else 1086 ns->ns_status = puterrno(error); 1087 1088 } 1089 1090 struct rfs_async_write { 1091 struct nfswriteargs *wa; 1092 struct nfsattrstat *ns; 1093 struct svc_req *req; 1094 cred_t *cr; 1095 kthread_t *thread; 1096 struct rfs_async_write *list; 1097 }; 1098 1099 struct rfs_async_write_list { 1100 fhandle_t *fhp; 1101 kcondvar_t cv; 1102 struct rfs_async_write *list; 1103 struct rfs_async_write_list *next; 1104 }; 1105 1106 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1107 static kmutex_t rfs_async_write_lock; 1108 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1109 1110 #define MAXCLIOVECS 42 1111 #define RFSWRITE_INITVAL (enum nfsstat) -1 1112 1113 #ifdef DEBUG 1114 static int rfs_write_hits = 0; 1115 static int rfs_write_misses = 0; 1116 #endif 1117 1118 /* 1119 * Write data to file. 1120 * Returns attributes of a file after writing some data to it. 1121 */ 1122 void 1123 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1124 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1125 { 1126 int error; 1127 vnode_t *vp; 1128 rlim64_t rlimit; 1129 struct vattr va; 1130 struct uio uio; 1131 struct rfs_async_write_list *lp; 1132 struct rfs_async_write_list *nlp; 1133 struct rfs_async_write *rp; 1134 struct rfs_async_write *nrp; 1135 struct rfs_async_write *trp; 1136 struct rfs_async_write *lrp; 1137 int data_written; 1138 int iovcnt; 1139 mblk_t *m; 1140 struct iovec *iovp; 1141 struct iovec *niovp; 1142 struct iovec iov[MAXCLIOVECS]; 1143 int count; 1144 int rcount; 1145 uint_t off; 1146 uint_t len; 1147 struct rfs_async_write nrpsp; 1148 struct rfs_async_write_list nlpsp; 1149 ushort_t t_flag; 1150 cred_t *savecred; 1151 int in_crit = 0; 1152 caller_context_t ct; 1153 1154 if (!rfs_write_async) { 1155 rfs_write_sync(wa, ns, exi, req, cr); 1156 return; 1157 } 1158 1159 /* 1160 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1161 * is considered an OK. 1162 */ 1163 ns->ns_status = RFSWRITE_INITVAL; 1164 1165 nrp = &nrpsp; 1166 nrp->wa = wa; 1167 nrp->ns = ns; 1168 nrp->req = req; 1169 nrp->cr = cr; 1170 nrp->thread = curthread; 1171 1172 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1173 1174 /* 1175 * Look to see if there is already a cluster started 1176 * for this file. 1177 */ 1178 mutex_enter(&rfs_async_write_lock); 1179 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1180 if (bcmp(&wa->wa_fhandle, lp->fhp, 1181 sizeof (fhandle_t)) == 0) 1182 break; 1183 } 1184 1185 /* 1186 * If lp is non-NULL, then there is already a cluster 1187 * started. We need to place ourselves in the cluster 1188 * list in the right place as determined by starting 1189 * offset. Conflicts with non-blocking mandatory locked 1190 * regions will be checked when the cluster is processed. 1191 */ 1192 if (lp != NULL) { 1193 rp = lp->list; 1194 trp = NULL; 1195 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1196 trp = rp; 1197 rp = rp->list; 1198 } 1199 nrp->list = rp; 1200 if (trp == NULL) 1201 lp->list = nrp; 1202 else 1203 trp->list = nrp; 1204 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1205 cv_wait(&lp->cv, &rfs_async_write_lock); 1206 mutex_exit(&rfs_async_write_lock); 1207 1208 return; 1209 } 1210 1211 /* 1212 * No cluster started yet, start one and add ourselves 1213 * to the list of clusters. 1214 */ 1215 nrp->list = NULL; 1216 1217 nlp = &nlpsp; 1218 nlp->fhp = &wa->wa_fhandle; 1219 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1220 nlp->list = nrp; 1221 nlp->next = NULL; 1222 1223 if (rfs_async_write_head == NULL) { 1224 rfs_async_write_head = nlp; 1225 } else { 1226 lp = rfs_async_write_head; 1227 while (lp->next != NULL) 1228 lp = lp->next; 1229 lp->next = nlp; 1230 } 1231 mutex_exit(&rfs_async_write_lock); 1232 1233 /* 1234 * Convert the file handle common to all of the requests 1235 * in this cluster to a vnode. 1236 */ 1237 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1238 if (vp == NULL) { 1239 mutex_enter(&rfs_async_write_lock); 1240 if (rfs_async_write_head == nlp) 1241 rfs_async_write_head = nlp->next; 1242 else { 1243 lp = rfs_async_write_head; 1244 while (lp->next != nlp) 1245 lp = lp->next; 1246 lp->next = nlp->next; 1247 } 1248 t_flag = curthread->t_flag & T_WOULDBLOCK; 1249 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1250 rp->ns->ns_status = NFSERR_STALE; 1251 rp->thread->t_flag |= t_flag; 1252 } 1253 cv_broadcast(&nlp->cv); 1254 mutex_exit(&rfs_async_write_lock); 1255 1256 return; 1257 } 1258 1259 /* 1260 * Can only write regular files. Attempts to write any 1261 * other file types fail with EISDIR. 1262 */ 1263 if (vp->v_type != VREG) { 1264 VN_RELE(vp); 1265 mutex_enter(&rfs_async_write_lock); 1266 if (rfs_async_write_head == nlp) 1267 rfs_async_write_head = nlp->next; 1268 else { 1269 lp = rfs_async_write_head; 1270 while (lp->next != nlp) 1271 lp = lp->next; 1272 lp->next = nlp->next; 1273 } 1274 t_flag = curthread->t_flag & T_WOULDBLOCK; 1275 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1276 rp->ns->ns_status = NFSERR_ISDIR; 1277 rp->thread->t_flag |= t_flag; 1278 } 1279 cv_broadcast(&nlp->cv); 1280 mutex_exit(&rfs_async_write_lock); 1281 1282 return; 1283 } 1284 1285 /* 1286 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1287 * deadlock with ufs. 1288 */ 1289 if (nbl_need_check(vp)) { 1290 nbl_start_crit(vp, RW_READER); 1291 in_crit = 1; 1292 } 1293 1294 ct.cc_sysid = 0; 1295 ct.cc_pid = 0; 1296 ct.cc_caller_id = nfs2_srv_caller_id; 1297 ct.cc_flags = CC_DONTBLOCK; 1298 1299 /* 1300 * Lock the file for writing. This operation provides 1301 * the delay which allows clusters to grow. 1302 */ 1303 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1304 1305 /* check if a monitor detected a delegation conflict */ 1306 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1307 if (in_crit) 1308 nbl_end_crit(vp); 1309 VN_RELE(vp); 1310 /* mark as wouldblock so response is dropped */ 1311 curthread->t_flag |= T_WOULDBLOCK; 1312 mutex_enter(&rfs_async_write_lock); 1313 if (rfs_async_write_head == nlp) 1314 rfs_async_write_head = nlp->next; 1315 else { 1316 lp = rfs_async_write_head; 1317 while (lp->next != nlp) 1318 lp = lp->next; 1319 lp->next = nlp->next; 1320 } 1321 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1322 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1323 rp->ns->ns_status = puterrno(error); 1324 rp->thread->t_flag |= T_WOULDBLOCK; 1325 } 1326 } 1327 cv_broadcast(&nlp->cv); 1328 mutex_exit(&rfs_async_write_lock); 1329 1330 return; 1331 } 1332 1333 /* 1334 * Disconnect this cluster from the list of clusters. 1335 * The cluster that is being dealt with must be fixed 1336 * in size after this point, so there is no reason 1337 * to leave it on the list so that new requests can 1338 * find it. 1339 * 1340 * The algorithm is that the first write request will 1341 * create a cluster, convert the file handle to a 1342 * vnode pointer, and then lock the file for writing. 1343 * This request is not likely to be clustered with 1344 * any others. However, the next request will create 1345 * a new cluster and be blocked in VOP_RWLOCK while 1346 * the first request is being processed. This delay 1347 * will allow more requests to be clustered in this 1348 * second cluster. 1349 */ 1350 mutex_enter(&rfs_async_write_lock); 1351 if (rfs_async_write_head == nlp) 1352 rfs_async_write_head = nlp->next; 1353 else { 1354 lp = rfs_async_write_head; 1355 while (lp->next != nlp) 1356 lp = lp->next; 1357 lp->next = nlp->next; 1358 } 1359 mutex_exit(&rfs_async_write_lock); 1360 1361 /* 1362 * Step through the list of requests in this cluster. 1363 * We need to check permissions to make sure that all 1364 * of the requests have sufficient permission to write 1365 * the file. A cluster can be composed of requests 1366 * from different clients and different users on each 1367 * client. 1368 * 1369 * As a side effect, we also calculate the size of the 1370 * byte range that this cluster encompasses. 1371 */ 1372 rp = nlp->list; 1373 off = rp->wa->wa_offset; 1374 len = (uint_t)0; 1375 do { 1376 if (rdonly(exi, rp->req)) { 1377 rp->ns->ns_status = NFSERR_ROFS; 1378 t_flag = curthread->t_flag & T_WOULDBLOCK; 1379 rp->thread->t_flag |= t_flag; 1380 continue; 1381 } 1382 1383 va.va_mask = AT_UID|AT_MODE; 1384 1385 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1386 1387 if (!error) { 1388 if (crgetuid(rp->cr) != va.va_uid) { 1389 /* 1390 * This is a kludge to allow writes of files 1391 * created with read only permission. The 1392 * owner of the file is always allowed to 1393 * write it. 1394 */ 1395 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1396 } 1397 if (!error && MANDLOCK(vp, va.va_mode)) 1398 error = EACCES; 1399 } 1400 1401 /* 1402 * Check for a conflict with a nbmand-locked region. 1403 */ 1404 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1405 rp->wa->wa_count, 0, NULL)) { 1406 error = EACCES; 1407 } 1408 1409 if (error) { 1410 rp->ns->ns_status = puterrno(error); 1411 t_flag = curthread->t_flag & T_WOULDBLOCK; 1412 rp->thread->t_flag |= t_flag; 1413 continue; 1414 } 1415 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1416 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1417 } while ((rp = rp->list) != NULL); 1418 1419 /* 1420 * Step through the cluster attempting to gather as many 1421 * requests which are contiguous as possible. These 1422 * contiguous requests are handled via one call to VOP_WRITE 1423 * instead of different calls to VOP_WRITE. We also keep 1424 * track of the fact that any data was written. 1425 */ 1426 rp = nlp->list; 1427 data_written = 0; 1428 do { 1429 /* 1430 * Skip any requests which are already marked as having an 1431 * error. 1432 */ 1433 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1434 rp = rp->list; 1435 continue; 1436 } 1437 1438 /* 1439 * Count the number of iovec's which are required 1440 * to handle this set of requests. One iovec is 1441 * needed for each data buffer, whether addressed 1442 * by wa_data or by the b_rptr pointers in the 1443 * mblk chains. 1444 */ 1445 iovcnt = 0; 1446 lrp = rp; 1447 for (;;) { 1448 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1449 iovcnt++; 1450 else { 1451 m = lrp->wa->wa_mblk; 1452 while (m != NULL) { 1453 iovcnt++; 1454 m = m->b_cont; 1455 } 1456 } 1457 if (lrp->list == NULL || 1458 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1459 lrp->wa->wa_offset + lrp->wa->wa_count != 1460 lrp->list->wa->wa_offset) { 1461 lrp = lrp->list; 1462 break; 1463 } 1464 lrp = lrp->list; 1465 } 1466 1467 if (iovcnt <= MAXCLIOVECS) { 1468 #ifdef DEBUG 1469 rfs_write_hits++; 1470 #endif 1471 niovp = iov; 1472 } else { 1473 #ifdef DEBUG 1474 rfs_write_misses++; 1475 #endif 1476 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1477 } 1478 /* 1479 * Put together the scatter/gather iovecs. 1480 */ 1481 iovp = niovp; 1482 trp = rp; 1483 count = 0; 1484 do { 1485 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1486 if (trp->wa->wa_rlist) { 1487 iovp->iov_base = 1488 (char *)((trp->wa->wa_rlist)-> 1489 u.c_daddr3); 1490 iovp->iov_len = trp->wa->wa_count; 1491 } else { 1492 iovp->iov_base = trp->wa->wa_data; 1493 iovp->iov_len = trp->wa->wa_count; 1494 } 1495 iovp++; 1496 } else { 1497 m = trp->wa->wa_mblk; 1498 rcount = trp->wa->wa_count; 1499 while (m != NULL) { 1500 iovp->iov_base = (caddr_t)m->b_rptr; 1501 iovp->iov_len = (m->b_wptr - m->b_rptr); 1502 rcount -= iovp->iov_len; 1503 if (rcount < 0) 1504 iovp->iov_len += rcount; 1505 iovp++; 1506 if (rcount <= 0) 1507 break; 1508 m = m->b_cont; 1509 } 1510 } 1511 count += trp->wa->wa_count; 1512 trp = trp->list; 1513 } while (trp != lrp); 1514 1515 uio.uio_iov = niovp; 1516 uio.uio_iovcnt = iovcnt; 1517 uio.uio_segflg = UIO_SYSSPACE; 1518 uio.uio_extflg = UIO_COPY_DEFAULT; 1519 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1520 uio.uio_resid = count; 1521 /* 1522 * The limit is checked on the client. We 1523 * should allow any size writes here. 1524 */ 1525 uio.uio_llimit = curproc->p_fsz_ctl; 1526 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1527 if (rlimit < (rlim64_t)uio.uio_resid) 1528 uio.uio_resid = (uint_t)rlimit; 1529 1530 /* 1531 * For now we assume no append mode. 1532 */ 1533 1534 /* 1535 * We're changing creds because VM may fault 1536 * and we need the cred of the current 1537 * thread to be used if quota * checking is 1538 * enabled. 1539 */ 1540 savecred = curthread->t_cred; 1541 curthread->t_cred = cr; 1542 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1543 curthread->t_cred = savecred; 1544 1545 /* check if a monitor detected a delegation conflict */ 1546 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1547 /* mark as wouldblock so response is dropped */ 1548 curthread->t_flag |= T_WOULDBLOCK; 1549 1550 if (niovp != iov) 1551 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1552 1553 if (!error) { 1554 data_written = 1; 1555 /* 1556 * Get attributes again so we send the latest mod 1557 * time to the client side for his cache. 1558 */ 1559 va.va_mask = AT_ALL; /* now we want everything */ 1560 1561 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1562 1563 if (!error) 1564 acl_perm(vp, exi, &va, rp->cr); 1565 } 1566 1567 /* 1568 * Fill in the status responses for each request 1569 * which was just handled. Also, copy the latest 1570 * attributes in to the attribute responses if 1571 * appropriate. 1572 */ 1573 t_flag = curthread->t_flag & T_WOULDBLOCK; 1574 do { 1575 rp->thread->t_flag |= t_flag; 1576 /* check for overflows */ 1577 if (!error) { 1578 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1579 } 1580 rp->ns->ns_status = puterrno(error); 1581 rp = rp->list; 1582 } while (rp != lrp); 1583 } while (rp != NULL); 1584 1585 /* 1586 * If any data was written at all, then we need to flush 1587 * the data and metadata to stable storage. 1588 */ 1589 if (data_written) { 1590 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1591 1592 if (!error) { 1593 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1594 } 1595 } 1596 1597 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1598 1599 if (in_crit) 1600 nbl_end_crit(vp); 1601 VN_RELE(vp); 1602 1603 t_flag = curthread->t_flag & T_WOULDBLOCK; 1604 mutex_enter(&rfs_async_write_lock); 1605 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1606 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1607 rp->ns->ns_status = puterrno(error); 1608 rp->thread->t_flag |= t_flag; 1609 } 1610 } 1611 cv_broadcast(&nlp->cv); 1612 mutex_exit(&rfs_async_write_lock); 1613 1614 } 1615 1616 void * 1617 rfs_write_getfh(struct nfswriteargs *wa) 1618 { 1619 return (&wa->wa_fhandle); 1620 } 1621 1622 /* 1623 * Create a file. 1624 * Creates a file with given attributes and returns those attributes 1625 * and an fhandle for the new file. 1626 */ 1627 void 1628 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1629 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1630 { 1631 int error; 1632 int lookuperr; 1633 int in_crit = 0; 1634 struct vattr va; 1635 vnode_t *vp; 1636 vnode_t *realvp; 1637 vnode_t *dvp; 1638 char *name = args->ca_da.da_name; 1639 vnode_t *tvp = NULL; 1640 int mode; 1641 int lookup_ok; 1642 bool_t trunc; 1643 struct sockaddr *ca; 1644 1645 /* 1646 * Disallow NULL paths 1647 */ 1648 if (name == NULL || *name == '\0') { 1649 dr->dr_status = NFSERR_ACCES; 1650 return; 1651 } 1652 1653 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1654 if (dvp == NULL) { 1655 dr->dr_status = NFSERR_STALE; 1656 return; 1657 } 1658 1659 error = sattr_to_vattr(args->ca_sa, &va); 1660 if (error) { 1661 dr->dr_status = puterrno(error); 1662 return; 1663 } 1664 1665 /* 1666 * Must specify the mode. 1667 */ 1668 if (!(va.va_mask & AT_MODE)) { 1669 VN_RELE(dvp); 1670 dr->dr_status = NFSERR_INVAL; 1671 return; 1672 } 1673 1674 /* 1675 * This is a completely gross hack to make mknod 1676 * work over the wire until we can wack the protocol 1677 */ 1678 if ((va.va_mode & IFMT) == IFCHR) { 1679 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1680 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1681 else { 1682 va.va_type = VCHR; 1683 /* 1684 * uncompress the received dev_t 1685 * if the top half is zero indicating a request 1686 * from an `older style' OS. 1687 */ 1688 if ((va.va_size & 0xffff0000) == 0) 1689 va.va_rdev = nfsv2_expdev(va.va_size); 1690 else 1691 va.va_rdev = (dev_t)va.va_size; 1692 } 1693 va.va_mask &= ~AT_SIZE; 1694 } else if ((va.va_mode & IFMT) == IFBLK) { 1695 va.va_type = VBLK; 1696 /* 1697 * uncompress the received dev_t 1698 * if the top half is zero indicating a request 1699 * from an `older style' OS. 1700 */ 1701 if ((va.va_size & 0xffff0000) == 0) 1702 va.va_rdev = nfsv2_expdev(va.va_size); 1703 else 1704 va.va_rdev = (dev_t)va.va_size; 1705 va.va_mask &= ~AT_SIZE; 1706 } else if ((va.va_mode & IFMT) == IFSOCK) { 1707 va.va_type = VSOCK; 1708 } else { 1709 va.va_type = VREG; 1710 } 1711 va.va_mode &= ~IFMT; 1712 va.va_mask |= AT_TYPE; 1713 1714 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1715 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1716 MAXPATHLEN); 1717 if (name == NULL) { 1718 dr->dr_status = puterrno(EINVAL); 1719 return; 1720 } 1721 1722 /* 1723 * Why was the choice made to use VWRITE as the mode to the 1724 * call to VOP_CREATE ? This results in a bug. When a client 1725 * opens a file that already exists and is RDONLY, the second 1726 * open fails with an EACESS because of the mode. 1727 * bug ID 1054648. 1728 */ 1729 lookup_ok = 0; 1730 mode = VWRITE; 1731 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1732 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1733 NULL, NULL, NULL); 1734 if (!error) { 1735 struct vattr at; 1736 1737 lookup_ok = 1; 1738 at.va_mask = AT_MODE; 1739 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1740 if (!error) 1741 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1742 VN_RELE(tvp); 1743 tvp = NULL; 1744 } 1745 } 1746 1747 if (!lookup_ok) { 1748 if (rdonly(exi, req)) { 1749 error = EROFS; 1750 } else if (va.va_type != VREG && va.va_type != VFIFO && 1751 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1752 error = EPERM; 1753 } else { 1754 error = 0; 1755 } 1756 } 1757 1758 /* 1759 * If file size is being modified on an already existing file 1760 * make sure that there are no conflicting non-blocking mandatory 1761 * locks in the region being manipulated. Return EACCES if there 1762 * are conflicting locks. 1763 */ 1764 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1765 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1766 NULL, NULL, NULL); 1767 1768 if (!lookuperr && 1769 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1770 VN_RELE(tvp); 1771 curthread->t_flag |= T_WOULDBLOCK; 1772 goto out; 1773 } 1774 1775 if (!lookuperr && nbl_need_check(tvp)) { 1776 /* 1777 * The file exists. Now check if it has any 1778 * conflicting non-blocking mandatory locks 1779 * in the region being changed. 1780 */ 1781 struct vattr bva; 1782 u_offset_t offset; 1783 ssize_t length; 1784 1785 nbl_start_crit(tvp, RW_READER); 1786 in_crit = 1; 1787 1788 bva.va_mask = AT_SIZE; 1789 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1790 if (!error) { 1791 if (va.va_size < bva.va_size) { 1792 offset = va.va_size; 1793 length = bva.va_size - va.va_size; 1794 } else { 1795 offset = bva.va_size; 1796 length = va.va_size - bva.va_size; 1797 } 1798 if (length) { 1799 if (nbl_conflict(tvp, NBL_WRITE, 1800 offset, length, 0, NULL)) { 1801 error = EACCES; 1802 } 1803 } 1804 } 1805 if (error) { 1806 nbl_end_crit(tvp); 1807 VN_RELE(tvp); 1808 in_crit = 0; 1809 } 1810 } else if (tvp != NULL) { 1811 VN_RELE(tvp); 1812 } 1813 } 1814 1815 if (!error) { 1816 /* 1817 * If filesystem is shared with nosuid the remove any 1818 * setuid/setgid bits on create. 1819 */ 1820 if (va.va_type == VREG && 1821 exi->exi_export.ex_flags & EX_NOSUID) 1822 va.va_mode &= ~(VSUID | VSGID); 1823 1824 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1825 NULL, NULL); 1826 1827 if (!error) { 1828 1829 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1830 trunc = TRUE; 1831 else 1832 trunc = FALSE; 1833 1834 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1835 VN_RELE(vp); 1836 curthread->t_flag |= T_WOULDBLOCK; 1837 goto out; 1838 } 1839 va.va_mask = AT_ALL; 1840 1841 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1842 1843 /* check for overflows */ 1844 if (!error) { 1845 acl_perm(vp, exi, &va, cr); 1846 error = vattr_to_nattr(&va, &dr->dr_attr); 1847 if (!error) { 1848 error = makefh(&dr->dr_fhandle, vp, 1849 exi); 1850 } 1851 } 1852 /* 1853 * Force modified metadata out to stable storage. 1854 * 1855 * if a underlying vp exists, pass it to VOP_FSYNC 1856 */ 1857 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1858 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1859 else 1860 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1861 VN_RELE(vp); 1862 } 1863 1864 if (in_crit) { 1865 nbl_end_crit(tvp); 1866 VN_RELE(tvp); 1867 } 1868 } 1869 1870 /* 1871 * Force modified data and metadata out to stable storage. 1872 */ 1873 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1874 1875 out: 1876 1877 VN_RELE(dvp); 1878 1879 dr->dr_status = puterrno(error); 1880 1881 if (name != args->ca_da.da_name) 1882 kmem_free(name, MAXPATHLEN); 1883 } 1884 void * 1885 rfs_create_getfh(struct nfscreatargs *args) 1886 { 1887 return (args->ca_da.da_fhandle); 1888 } 1889 1890 /* 1891 * Remove a file. 1892 * Remove named file from parent directory. 1893 */ 1894 void 1895 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1896 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1897 { 1898 int error = 0; 1899 vnode_t *vp; 1900 vnode_t *targvp; 1901 int in_crit = 0; 1902 1903 /* 1904 * Disallow NULL paths 1905 */ 1906 if (da->da_name == NULL || *da->da_name == '\0') { 1907 *status = NFSERR_ACCES; 1908 return; 1909 } 1910 1911 vp = nfs_fhtovp(da->da_fhandle, exi); 1912 if (vp == NULL) { 1913 *status = NFSERR_STALE; 1914 return; 1915 } 1916 1917 if (rdonly(exi, req)) { 1918 VN_RELE(vp); 1919 *status = NFSERR_ROFS; 1920 return; 1921 } 1922 1923 /* 1924 * Check for a conflict with a non-blocking mandatory share reservation. 1925 */ 1926 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1927 NULL, cr, NULL, NULL, NULL); 1928 if (error != 0) { 1929 VN_RELE(vp); 1930 *status = puterrno(error); 1931 return; 1932 } 1933 1934 /* 1935 * If the file is delegated to an v4 client, then initiate 1936 * recall and drop this request (by setting T_WOULDBLOCK). 1937 * The client will eventually re-transmit the request and 1938 * (hopefully), by then, the v4 client will have returned 1939 * the delegation. 1940 */ 1941 1942 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1943 VN_RELE(vp); 1944 VN_RELE(targvp); 1945 curthread->t_flag |= T_WOULDBLOCK; 1946 return; 1947 } 1948 1949 if (nbl_need_check(targvp)) { 1950 nbl_start_crit(targvp, RW_READER); 1951 in_crit = 1; 1952 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1953 error = EACCES; 1954 goto out; 1955 } 1956 } 1957 1958 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 1959 1960 /* 1961 * Force modified data and metadata out to stable storage. 1962 */ 1963 (void) VOP_FSYNC(vp, 0, cr, NULL); 1964 1965 out: 1966 if (in_crit) 1967 nbl_end_crit(targvp); 1968 VN_RELE(targvp); 1969 VN_RELE(vp); 1970 1971 *status = puterrno(error); 1972 1973 } 1974 1975 void * 1976 rfs_remove_getfh(struct nfsdiropargs *da) 1977 { 1978 return (da->da_fhandle); 1979 } 1980 1981 /* 1982 * rename a file 1983 * Give a file (from) a new name (to). 1984 */ 1985 void 1986 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 1987 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1988 { 1989 int error = 0; 1990 vnode_t *fromvp; 1991 vnode_t *tovp; 1992 struct exportinfo *to_exi; 1993 fhandle_t *fh; 1994 vnode_t *srcvp; 1995 vnode_t *targvp; 1996 int in_crit = 0; 1997 1998 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 1999 if (fromvp == NULL) { 2000 *status = NFSERR_STALE; 2001 return; 2002 } 2003 2004 fh = args->rna_to.da_fhandle; 2005 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2006 if (to_exi == NULL) { 2007 VN_RELE(fromvp); 2008 *status = NFSERR_ACCES; 2009 return; 2010 } 2011 exi_rele(to_exi); 2012 2013 if (to_exi != exi) { 2014 VN_RELE(fromvp); 2015 *status = NFSERR_XDEV; 2016 return; 2017 } 2018 2019 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2020 if (tovp == NULL) { 2021 VN_RELE(fromvp); 2022 *status = NFSERR_STALE; 2023 return; 2024 } 2025 2026 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2027 VN_RELE(tovp); 2028 VN_RELE(fromvp); 2029 *status = NFSERR_NOTDIR; 2030 return; 2031 } 2032 2033 /* 2034 * Disallow NULL paths 2035 */ 2036 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2037 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2038 VN_RELE(tovp); 2039 VN_RELE(fromvp); 2040 *status = NFSERR_ACCES; 2041 return; 2042 } 2043 2044 if (rdonly(exi, req)) { 2045 VN_RELE(tovp); 2046 VN_RELE(fromvp); 2047 *status = NFSERR_ROFS; 2048 return; 2049 } 2050 2051 /* 2052 * Check for a conflict with a non-blocking mandatory share reservation. 2053 */ 2054 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2055 NULL, cr, NULL, NULL, NULL); 2056 if (error != 0) { 2057 VN_RELE(tovp); 2058 VN_RELE(fromvp); 2059 *status = puterrno(error); 2060 return; 2061 } 2062 2063 /* Check for delegations on the source file */ 2064 2065 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2066 VN_RELE(tovp); 2067 VN_RELE(fromvp); 2068 VN_RELE(srcvp); 2069 curthread->t_flag |= T_WOULDBLOCK; 2070 return; 2071 } 2072 2073 /* Check for delegation on the file being renamed over, if it exists */ 2074 2075 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2076 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2077 NULL, NULL, NULL) == 0) { 2078 2079 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2080 VN_RELE(tovp); 2081 VN_RELE(fromvp); 2082 VN_RELE(srcvp); 2083 VN_RELE(targvp); 2084 curthread->t_flag |= T_WOULDBLOCK; 2085 return; 2086 } 2087 VN_RELE(targvp); 2088 } 2089 2090 2091 if (nbl_need_check(srcvp)) { 2092 nbl_start_crit(srcvp, RW_READER); 2093 in_crit = 1; 2094 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2095 error = EACCES; 2096 goto out; 2097 } 2098 } 2099 2100 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2101 tovp, args->rna_to.da_name, cr, NULL, 0); 2102 2103 if (error == 0) 2104 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2105 strlen(args->rna_to.da_name)); 2106 2107 /* 2108 * Force modified data and metadata out to stable storage. 2109 */ 2110 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2111 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2112 2113 out: 2114 if (in_crit) 2115 nbl_end_crit(srcvp); 2116 VN_RELE(srcvp); 2117 VN_RELE(tovp); 2118 VN_RELE(fromvp); 2119 2120 *status = puterrno(error); 2121 2122 } 2123 void * 2124 rfs_rename_getfh(struct nfsrnmargs *args) 2125 { 2126 return (args->rna_from.da_fhandle); 2127 } 2128 2129 /* 2130 * Link to a file. 2131 * Create a file (to) which is a hard link to the given file (from). 2132 */ 2133 void 2134 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2135 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2136 { 2137 int error; 2138 vnode_t *fromvp; 2139 vnode_t *tovp; 2140 struct exportinfo *to_exi; 2141 fhandle_t *fh; 2142 2143 fromvp = nfs_fhtovp(args->la_from, exi); 2144 if (fromvp == NULL) { 2145 *status = NFSERR_STALE; 2146 return; 2147 } 2148 2149 fh = args->la_to.da_fhandle; 2150 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2151 if (to_exi == NULL) { 2152 VN_RELE(fromvp); 2153 *status = NFSERR_ACCES; 2154 return; 2155 } 2156 exi_rele(to_exi); 2157 2158 if (to_exi != exi) { 2159 VN_RELE(fromvp); 2160 *status = NFSERR_XDEV; 2161 return; 2162 } 2163 2164 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2165 if (tovp == NULL) { 2166 VN_RELE(fromvp); 2167 *status = NFSERR_STALE; 2168 return; 2169 } 2170 2171 if (tovp->v_type != VDIR) { 2172 VN_RELE(tovp); 2173 VN_RELE(fromvp); 2174 *status = NFSERR_NOTDIR; 2175 return; 2176 } 2177 /* 2178 * Disallow NULL paths 2179 */ 2180 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2181 VN_RELE(tovp); 2182 VN_RELE(fromvp); 2183 *status = NFSERR_ACCES; 2184 return; 2185 } 2186 2187 if (rdonly(exi, req)) { 2188 VN_RELE(tovp); 2189 VN_RELE(fromvp); 2190 *status = NFSERR_ROFS; 2191 return; 2192 } 2193 2194 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2195 2196 /* 2197 * Force modified data and metadata out to stable storage. 2198 */ 2199 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2200 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2201 2202 VN_RELE(tovp); 2203 VN_RELE(fromvp); 2204 2205 *status = puterrno(error); 2206 2207 } 2208 void * 2209 rfs_link_getfh(struct nfslinkargs *args) 2210 { 2211 return (args->la_from); 2212 } 2213 2214 /* 2215 * Symbolicly link to a file. 2216 * Create a file (to) with the given attributes which is a symbolic link 2217 * to the given path name (to). 2218 */ 2219 void 2220 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2221 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2222 { 2223 int error; 2224 struct vattr va; 2225 vnode_t *vp; 2226 vnode_t *svp; 2227 int lerror; 2228 struct sockaddr *ca; 2229 char *name = NULL; 2230 2231 /* 2232 * Disallow NULL paths 2233 */ 2234 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2235 *status = NFSERR_ACCES; 2236 return; 2237 } 2238 2239 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2240 if (vp == NULL) { 2241 *status = NFSERR_STALE; 2242 return; 2243 } 2244 2245 if (rdonly(exi, req)) { 2246 VN_RELE(vp); 2247 *status = NFSERR_ROFS; 2248 return; 2249 } 2250 2251 error = sattr_to_vattr(args->sla_sa, &va); 2252 if (error) { 2253 VN_RELE(vp); 2254 *status = puterrno(error); 2255 return; 2256 } 2257 2258 if (!(va.va_mask & AT_MODE)) { 2259 VN_RELE(vp); 2260 *status = NFSERR_INVAL; 2261 return; 2262 } 2263 2264 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2265 name = nfscmd_convname(ca, exi, args->sla_tnm, 2266 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2267 2268 if (name == NULL) { 2269 *status = NFSERR_ACCES; 2270 return; 2271 } 2272 2273 va.va_type = VLNK; 2274 va.va_mask |= AT_TYPE; 2275 2276 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2277 2278 /* 2279 * Force new data and metadata out to stable storage. 2280 */ 2281 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2282 NULL, cr, NULL, NULL, NULL); 2283 2284 if (!lerror) { 2285 (void) VOP_FSYNC(svp, 0, cr, NULL); 2286 VN_RELE(svp); 2287 } 2288 2289 /* 2290 * Force modified data and metadata out to stable storage. 2291 */ 2292 (void) VOP_FSYNC(vp, 0, cr, NULL); 2293 2294 VN_RELE(vp); 2295 2296 *status = puterrno(error); 2297 if (name != args->sla_tnm) 2298 kmem_free(name, MAXPATHLEN); 2299 2300 } 2301 void * 2302 rfs_symlink_getfh(struct nfsslargs *args) 2303 { 2304 return (args->sla_from.da_fhandle); 2305 } 2306 2307 /* 2308 * Make a directory. 2309 * Create a directory with the given name, parent directory, and attributes. 2310 * Returns a file handle and attributes for the new directory. 2311 */ 2312 void 2313 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2314 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2315 { 2316 int error; 2317 struct vattr va; 2318 vnode_t *dvp = NULL; 2319 vnode_t *vp; 2320 char *name = args->ca_da.da_name; 2321 2322 /* 2323 * Disallow NULL paths 2324 */ 2325 if (name == NULL || *name == '\0') { 2326 dr->dr_status = NFSERR_ACCES; 2327 return; 2328 } 2329 2330 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2331 if (vp == NULL) { 2332 dr->dr_status = NFSERR_STALE; 2333 return; 2334 } 2335 2336 if (rdonly(exi, req)) { 2337 VN_RELE(vp); 2338 dr->dr_status = NFSERR_ROFS; 2339 return; 2340 } 2341 2342 error = sattr_to_vattr(args->ca_sa, &va); 2343 if (error) { 2344 VN_RELE(vp); 2345 dr->dr_status = puterrno(error); 2346 return; 2347 } 2348 2349 if (!(va.va_mask & AT_MODE)) { 2350 VN_RELE(vp); 2351 dr->dr_status = NFSERR_INVAL; 2352 return; 2353 } 2354 2355 va.va_type = VDIR; 2356 va.va_mask |= AT_TYPE; 2357 2358 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2359 2360 if (!error) { 2361 /* 2362 * Attribtutes of the newly created directory should 2363 * be returned to the client. 2364 */ 2365 va.va_mask = AT_ALL; /* We want everything */ 2366 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2367 2368 /* check for overflows */ 2369 if (!error) { 2370 acl_perm(vp, exi, &va, cr); 2371 error = vattr_to_nattr(&va, &dr->dr_attr); 2372 if (!error) { 2373 error = makefh(&dr->dr_fhandle, dvp, exi); 2374 } 2375 } 2376 /* 2377 * Force new data and metadata out to stable storage. 2378 */ 2379 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2380 VN_RELE(dvp); 2381 } 2382 2383 /* 2384 * Force modified data and metadata out to stable storage. 2385 */ 2386 (void) VOP_FSYNC(vp, 0, cr, NULL); 2387 2388 VN_RELE(vp); 2389 2390 dr->dr_status = puterrno(error); 2391 2392 } 2393 void * 2394 rfs_mkdir_getfh(struct nfscreatargs *args) 2395 { 2396 return (args->ca_da.da_fhandle); 2397 } 2398 2399 /* 2400 * Remove a directory. 2401 * Remove the given directory name from the given parent directory. 2402 */ 2403 void 2404 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2405 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2406 { 2407 int error; 2408 vnode_t *vp; 2409 2410 2411 /* 2412 * Disallow NULL paths 2413 */ 2414 if (da->da_name == NULL || *da->da_name == '\0') { 2415 *status = NFSERR_ACCES; 2416 return; 2417 } 2418 2419 vp = nfs_fhtovp(da->da_fhandle, exi); 2420 if (vp == NULL) { 2421 *status = NFSERR_STALE; 2422 return; 2423 } 2424 2425 if (rdonly(exi, req)) { 2426 VN_RELE(vp); 2427 *status = NFSERR_ROFS; 2428 return; 2429 } 2430 2431 /* 2432 * VOP_RMDIR now takes a new third argument (the current 2433 * directory of the process). That's because someone 2434 * wants to return EINVAL if one tries to remove ".". 2435 * Of course, NFS servers have no idea what their 2436 * clients' current directories are. We fake it by 2437 * supplying a vnode known to exist and illegal to 2438 * remove. 2439 */ 2440 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2441 2442 /* 2443 * Force modified data and metadata out to stable storage. 2444 */ 2445 (void) VOP_FSYNC(vp, 0, cr, NULL); 2446 2447 VN_RELE(vp); 2448 2449 /* 2450 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2451 * if the directory is not empty. A System V NFS server 2452 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2453 * over the wire. 2454 */ 2455 if (error == EEXIST) 2456 *status = NFSERR_NOTEMPTY; 2457 else 2458 *status = puterrno(error); 2459 2460 } 2461 void * 2462 rfs_rmdir_getfh(struct nfsdiropargs *da) 2463 { 2464 return (da->da_fhandle); 2465 } 2466 2467 /* ARGSUSED */ 2468 void 2469 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2470 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2471 { 2472 int error; 2473 int iseof; 2474 struct iovec iov; 2475 struct uio uio; 2476 vnode_t *vp; 2477 char *ndata = NULL; 2478 struct sockaddr *ca; 2479 size_t nents; 2480 int ret; 2481 2482 vp = nfs_fhtovp(&rda->rda_fh, exi); 2483 if (vp == NULL) { 2484 rd->rd_entries = NULL; 2485 rd->rd_status = NFSERR_STALE; 2486 return; 2487 } 2488 2489 if (vp->v_type != VDIR) { 2490 VN_RELE(vp); 2491 rd->rd_entries = NULL; 2492 rd->rd_status = NFSERR_NOTDIR; 2493 return; 2494 } 2495 2496 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2497 2498 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2499 2500 if (error) { 2501 rd->rd_entries = NULL; 2502 goto bad; 2503 } 2504 2505 if (rda->rda_count == 0) { 2506 rd->rd_entries = NULL; 2507 rd->rd_size = 0; 2508 rd->rd_eof = FALSE; 2509 goto bad; 2510 } 2511 2512 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2513 2514 /* 2515 * Allocate data for entries. This will be freed by rfs_rddirfree. 2516 */ 2517 rd->rd_bufsize = (uint_t)rda->rda_count; 2518 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2519 2520 /* 2521 * Set up io vector to read directory data 2522 */ 2523 iov.iov_base = (caddr_t)rd->rd_entries; 2524 iov.iov_len = rda->rda_count; 2525 uio.uio_iov = &iov; 2526 uio.uio_iovcnt = 1; 2527 uio.uio_segflg = UIO_SYSSPACE; 2528 uio.uio_extflg = UIO_COPY_CACHED; 2529 uio.uio_loffset = (offset_t)rda->rda_offset; 2530 uio.uio_resid = rda->rda_count; 2531 2532 /* 2533 * read directory 2534 */ 2535 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2536 2537 /* 2538 * Clean up 2539 */ 2540 if (!error) { 2541 /* 2542 * set size and eof 2543 */ 2544 if (uio.uio_resid == rda->rda_count) { 2545 rd->rd_size = 0; 2546 rd->rd_eof = TRUE; 2547 } else { 2548 rd->rd_size = (uint32_t)(rda->rda_count - 2549 uio.uio_resid); 2550 rd->rd_eof = iseof ? TRUE : FALSE; 2551 } 2552 } 2553 2554 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2555 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2556 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2557 rda->rda_count, &ndata); 2558 2559 if (ret != 0) { 2560 size_t dropbytes; 2561 /* 2562 * We had to drop one or more entries in order to fit 2563 * during the character conversion. We need to patch 2564 * up the size and eof info. 2565 */ 2566 if (rd->rd_eof) 2567 rd->rd_eof = FALSE; 2568 dropbytes = nfscmd_dropped_entrysize( 2569 (struct dirent64 *)rd->rd_entries, nents, ret); 2570 rd->rd_size -= dropbytes; 2571 } 2572 if (ndata == NULL) { 2573 ndata = (char *)rd->rd_entries; 2574 } else if (ndata != (char *)rd->rd_entries) { 2575 kmem_free(rd->rd_entries, rd->rd_bufsize); 2576 rd->rd_entries = (void *)ndata; 2577 rd->rd_bufsize = rda->rda_count; 2578 } 2579 2580 bad: 2581 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2582 2583 #if 0 /* notyet */ 2584 /* 2585 * Don't do this. It causes local disk writes when just 2586 * reading the file and the overhead is deemed larger 2587 * than the benefit. 2588 */ 2589 /* 2590 * Force modified metadata out to stable storage. 2591 */ 2592 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2593 #endif 2594 2595 VN_RELE(vp); 2596 2597 rd->rd_status = puterrno(error); 2598 2599 } 2600 void * 2601 rfs_readdir_getfh(struct nfsrddirargs *rda) 2602 { 2603 return (&rda->rda_fh); 2604 } 2605 void 2606 rfs_rddirfree(struct nfsrddirres *rd) 2607 { 2608 if (rd->rd_entries != NULL) 2609 kmem_free(rd->rd_entries, rd->rd_bufsize); 2610 } 2611 2612 /* ARGSUSED */ 2613 void 2614 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2615 struct svc_req *req, cred_t *cr) 2616 { 2617 int error; 2618 struct statvfs64 sb; 2619 vnode_t *vp; 2620 2621 vp = nfs_fhtovp(fh, exi); 2622 if (vp == NULL) { 2623 fs->fs_status = NFSERR_STALE; 2624 return; 2625 } 2626 2627 error = VFS_STATVFS(vp->v_vfsp, &sb); 2628 2629 if (!error) { 2630 fs->fs_tsize = nfstsize(); 2631 fs->fs_bsize = sb.f_frsize; 2632 fs->fs_blocks = sb.f_blocks; 2633 fs->fs_bfree = sb.f_bfree; 2634 fs->fs_bavail = sb.f_bavail; 2635 } 2636 2637 VN_RELE(vp); 2638 2639 fs->fs_status = puterrno(error); 2640 2641 } 2642 void * 2643 rfs_statfs_getfh(fhandle_t *fh) 2644 { 2645 return (fh); 2646 } 2647 2648 static int 2649 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2650 { 2651 vap->va_mask = 0; 2652 2653 /* 2654 * There was a sign extension bug in some VFS based systems 2655 * which stored the mode as a short. When it would get 2656 * assigned to a u_long, no sign extension would occur. 2657 * It needed to, but this wasn't noticed because sa_mode 2658 * would then get assigned back to the short, thus ignoring 2659 * the upper 16 bits of sa_mode. 2660 * 2661 * To make this implementation work for both broken 2662 * clients and good clients, we check for both versions 2663 * of the mode. 2664 */ 2665 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2666 sa->sa_mode != (uint32_t)-1) { 2667 vap->va_mask |= AT_MODE; 2668 vap->va_mode = sa->sa_mode; 2669 } 2670 if (sa->sa_uid != (uint32_t)-1) { 2671 vap->va_mask |= AT_UID; 2672 vap->va_uid = sa->sa_uid; 2673 } 2674 if (sa->sa_gid != (uint32_t)-1) { 2675 vap->va_mask |= AT_GID; 2676 vap->va_gid = sa->sa_gid; 2677 } 2678 if (sa->sa_size != (uint32_t)-1) { 2679 vap->va_mask |= AT_SIZE; 2680 vap->va_size = sa->sa_size; 2681 } 2682 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2683 sa->sa_atime.tv_usec != (int32_t)-1) { 2684 #ifndef _LP64 2685 /* return error if time overflow */ 2686 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2687 return (EOVERFLOW); 2688 #endif 2689 vap->va_mask |= AT_ATIME; 2690 /* 2691 * nfs protocol defines times as unsigned so don't extend sign, 2692 * unless sysadmin set nfs_allow_preepoch_time. 2693 */ 2694 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2695 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2696 } 2697 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2698 sa->sa_mtime.tv_usec != (int32_t)-1) { 2699 #ifndef _LP64 2700 /* return error if time overflow */ 2701 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2702 return (EOVERFLOW); 2703 #endif 2704 vap->va_mask |= AT_MTIME; 2705 /* 2706 * nfs protocol defines times as unsigned so don't extend sign, 2707 * unless sysadmin set nfs_allow_preepoch_time. 2708 */ 2709 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2710 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2711 } 2712 return (0); 2713 } 2714 2715 static enum nfsftype vt_to_nf[] = { 2716 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2717 }; 2718 2719 /* 2720 * check the following fields for overflow: nodeid, size, and time. 2721 * There could be a problem when converting 64-bit LP64 fields 2722 * into 32-bit ones. Return an error if there is an overflow. 2723 */ 2724 int 2725 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2726 { 2727 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2728 na->na_type = vt_to_nf[vap->va_type]; 2729 2730 if (vap->va_mode == (unsigned short) -1) 2731 na->na_mode = (uint32_t)-1; 2732 else 2733 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2734 2735 if (vap->va_uid == (unsigned short)(-1)) 2736 na->na_uid = (uint32_t)(-1); 2737 else if (vap->va_uid == UID_NOBODY) 2738 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2739 else 2740 na->na_uid = vap->va_uid; 2741 2742 if (vap->va_gid == (unsigned short)(-1)) 2743 na->na_gid = (uint32_t)-1; 2744 else if (vap->va_gid == GID_NOBODY) 2745 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2746 else 2747 na->na_gid = vap->va_gid; 2748 2749 /* 2750 * Do we need to check fsid for overflow? It is 64-bit in the 2751 * vattr, but are bigger than 32 bit values supported? 2752 */ 2753 na->na_fsid = vap->va_fsid; 2754 2755 na->na_nodeid = vap->va_nodeid; 2756 2757 /* 2758 * Check to make sure that the nodeid is representable over the 2759 * wire without losing bits. 2760 */ 2761 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2762 return (EFBIG); 2763 na->na_nlink = vap->va_nlink; 2764 2765 /* 2766 * Check for big files here, instead of at the caller. See 2767 * comments in cstat for large special file explanation. 2768 */ 2769 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2770 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2771 return (EFBIG); 2772 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2773 /* UNKNOWN_SIZE | OVERFLOW */ 2774 na->na_size = MAXOFF32_T; 2775 } else 2776 na->na_size = vap->va_size; 2777 } else 2778 na->na_size = vap->va_size; 2779 2780 /* 2781 * If the vnode times overflow the 32-bit times that NFS2 2782 * uses on the wire then return an error. 2783 */ 2784 if (!NFS_VAP_TIME_OK(vap)) { 2785 return (EOVERFLOW); 2786 } 2787 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2788 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2789 2790 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2791 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2792 2793 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2794 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2795 2796 /* 2797 * If the dev_t will fit into 16 bits then compress 2798 * it, otherwise leave it alone. See comments in 2799 * nfs_client.c. 2800 */ 2801 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2802 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2803 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2804 else 2805 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2806 2807 na->na_blocks = vap->va_nblocks; 2808 na->na_blocksize = vap->va_blksize; 2809 2810 /* 2811 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2812 * over-the-wire protocols for named-pipe vnodes. It remaps the 2813 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2814 * 2815 * BUYER BEWARE: 2816 * If you are porting the NFS to a non-Sun server, you probably 2817 * don't want to include the following block of code. The 2818 * over-the-wire special file types will be changing with the 2819 * NFS Protocol Revision. 2820 */ 2821 if (vap->va_type == VFIFO) 2822 NA_SETFIFO(na); 2823 return (0); 2824 } 2825 2826 /* 2827 * acl v2 support: returns approximate permission. 2828 * default: returns minimal permission (more restrictive) 2829 * aclok: returns maximal permission (less restrictive) 2830 * This routine changes the permissions that are alaredy in *va. 2831 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2832 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2833 */ 2834 static void 2835 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2836 { 2837 vsecattr_t vsa; 2838 int aclcnt; 2839 aclent_t *aclentp; 2840 mode_t mask_perm; 2841 mode_t grp_perm; 2842 mode_t other_perm; 2843 mode_t other_orig; 2844 int error; 2845 2846 /* dont care default acl */ 2847 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2848 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2849 2850 if (!error) { 2851 aclcnt = vsa.vsa_aclcnt; 2852 if (aclcnt > MIN_ACL_ENTRIES) { 2853 /* non-trivial ACL */ 2854 aclentp = vsa.vsa_aclentp; 2855 if (exi->exi_export.ex_flags & EX_ACLOK) { 2856 /* maximal permissions */ 2857 grp_perm = 0; 2858 other_perm = 0; 2859 for (; aclcnt > 0; aclcnt--, aclentp++) { 2860 switch (aclentp->a_type) { 2861 case USER_OBJ: 2862 break; 2863 case USER: 2864 grp_perm |= 2865 aclentp->a_perm << 3; 2866 other_perm |= aclentp->a_perm; 2867 break; 2868 case GROUP_OBJ: 2869 grp_perm |= 2870 aclentp->a_perm << 3; 2871 break; 2872 case GROUP: 2873 other_perm |= aclentp->a_perm; 2874 break; 2875 case OTHER_OBJ: 2876 other_orig = aclentp->a_perm; 2877 break; 2878 case CLASS_OBJ: 2879 mask_perm = aclentp->a_perm; 2880 break; 2881 default: 2882 break; 2883 } 2884 } 2885 grp_perm &= mask_perm << 3; 2886 other_perm &= mask_perm; 2887 other_perm |= other_orig; 2888 2889 } else { 2890 /* minimal permissions */ 2891 grp_perm = 070; 2892 other_perm = 07; 2893 for (; aclcnt > 0; aclcnt--, aclentp++) { 2894 switch (aclentp->a_type) { 2895 case USER_OBJ: 2896 break; 2897 case USER: 2898 case CLASS_OBJ: 2899 grp_perm &= 2900 aclentp->a_perm << 3; 2901 other_perm &= 2902 aclentp->a_perm; 2903 break; 2904 case GROUP_OBJ: 2905 grp_perm &= 2906 aclentp->a_perm << 3; 2907 break; 2908 case GROUP: 2909 other_perm &= 2910 aclentp->a_perm; 2911 break; 2912 case OTHER_OBJ: 2913 other_perm &= 2914 aclentp->a_perm; 2915 break; 2916 default: 2917 break; 2918 } 2919 } 2920 } 2921 /* copy to va */ 2922 va->va_mode &= ~077; 2923 va->va_mode |= grp_perm | other_perm; 2924 } 2925 if (vsa.vsa_aclcnt) 2926 kmem_free(vsa.vsa_aclentp, 2927 vsa.vsa_aclcnt * sizeof (aclent_t)); 2928 } 2929 } 2930 2931 void 2932 rfs_srvrinit(void) 2933 { 2934 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2935 nfs2_srv_caller_id = fs_new_caller_id(); 2936 } 2937 2938 void 2939 rfs_srvrfini(void) 2940 { 2941 mutex_destroy(&rfs_async_write_lock); 2942 } 2943 2944 static int 2945 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2946 { 2947 struct clist *wcl; 2948 int data_len, avail_len, num; 2949 uint32_t count = rr->rr_count; 2950 2951 data_len = num = avail_len = 0; 2952 2953 wcl = ra->ra_wlist; 2954 while (wcl != NULL) { 2955 if (wcl->c_dmemhandle.mrc_rmr == 0) 2956 break; 2957 2958 avail_len += wcl->c_len; 2959 if (wcl->c_len < count) { 2960 data_len += wcl->c_len; 2961 } else { 2962 /* Can make the rest chunks all 0-len */ 2963 data_len += count; 2964 wcl->c_len = count; 2965 } 2966 count -= wcl->c_len; 2967 num ++; 2968 wcl = wcl->c_next; 2969 } 2970 2971 /* 2972 * MUST fail if there are still more data 2973 */ 2974 if (count > 0) { 2975 DTRACE_PROBE2(nfss__e__read__wlist__fail, 2976 int, data_len, int, count); 2977 return (FALSE); 2978 } 2979 2980 wcl = ra->ra_wlist; 2981 rr->rr_count = data_len; 2982 rr->rr_ok.rrok_wlist_len = data_len; 2983 rr->rr_ok.rrok_wlist = wcl; 2984 2985 return (TRUE); 2986 } 2987