1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 /* Lie about the object type for a referral */ 121 if (vn_is_nfs_reparse(vp, cr)) 122 va.va_type = VLNK; 123 124 acl_perm(vp, exi, &va, cr); 125 error = vattr_to_nattr(&va, &ns->ns_attr); 126 } 127 128 VN_RELE(vp); 129 130 ns->ns_status = puterrno(error); 131 } 132 void * 133 rfs_getattr_getfh(fhandle_t *fhp) 134 { 135 return (fhp); 136 } 137 138 /* 139 * Set file attributes. 140 * Sets the attributes of the file with the given fhandle. Returns 141 * the new attributes. 142 */ 143 void 144 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 145 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 146 { 147 int error; 148 int flag; 149 int in_crit = 0; 150 vnode_t *vp; 151 struct vattr va; 152 struct vattr bva; 153 struct flock64 bf; 154 caller_context_t ct; 155 156 157 vp = nfs_fhtovp(&args->saa_fh, exi); 158 if (vp == NULL) { 159 ns->ns_status = NFSERR_STALE; 160 return; 161 } 162 163 if (rdonly(exi, req) || vn_is_readonly(vp)) { 164 VN_RELE(vp); 165 ns->ns_status = NFSERR_ROFS; 166 return; 167 } 168 169 error = sattr_to_vattr(&args->saa_sa, &va); 170 if (error) { 171 VN_RELE(vp); 172 ns->ns_status = puterrno(error); 173 return; 174 } 175 176 /* 177 * If the client is requesting a change to the mtime, 178 * but the nanosecond field is set to 1 billion, then 179 * this is a flag to the server that it should set the 180 * atime and mtime fields to the server's current time. 181 * The 1 billion number actually came from the client 182 * as 1 million, but the units in the over the wire 183 * request are microseconds instead of nanoseconds. 184 * 185 * This is an overload of the protocol and should be 186 * documented in the NFS Version 2 protocol specification. 187 */ 188 if (va.va_mask & AT_MTIME) { 189 if (va.va_mtime.tv_nsec == 1000000000) { 190 gethrestime(&va.va_mtime); 191 va.va_atime = va.va_mtime; 192 va.va_mask |= AT_ATIME; 193 flag = 0; 194 } else 195 flag = ATTR_UTIME; 196 } else 197 flag = 0; 198 199 /* 200 * If the filesystem is exported with nosuid, then mask off 201 * the setuid and setgid bits. 202 */ 203 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 204 (exi->exi_export.ex_flags & EX_NOSUID)) 205 va.va_mode &= ~(VSUID | VSGID); 206 207 ct.cc_sysid = 0; 208 ct.cc_pid = 0; 209 ct.cc_caller_id = nfs2_srv_caller_id; 210 ct.cc_flags = CC_DONTBLOCK; 211 212 /* 213 * We need to specially handle size changes because it is 214 * possible for the client to create a file with modes 215 * which indicate read-only, but with the file opened for 216 * writing. If the client then tries to set the size of 217 * the file, then the normal access checking done in 218 * VOP_SETATTR would prevent the client from doing so, 219 * although it should be legal for it to do so. To get 220 * around this, we do the access checking for ourselves 221 * and then use VOP_SPACE which doesn't do the access 222 * checking which VOP_SETATTR does. VOP_SPACE can only 223 * operate on VREG files, let VOP_SETATTR handle the other 224 * extremely rare cases. 225 * Also the client should not be allowed to change the 226 * size of the file if there is a conflicting non-blocking 227 * mandatory lock in the region of change. 228 */ 229 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 230 if (nbl_need_check(vp)) { 231 nbl_start_crit(vp, RW_READER); 232 in_crit = 1; 233 } 234 235 bva.va_mask = AT_UID | AT_SIZE; 236 237 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 238 239 if (error) { 240 if (in_crit) 241 nbl_end_crit(vp); 242 VN_RELE(vp); 243 ns->ns_status = puterrno(error); 244 return; 245 } 246 247 if (in_crit) { 248 u_offset_t offset; 249 ssize_t length; 250 251 if (va.va_size < bva.va_size) { 252 offset = va.va_size; 253 length = bva.va_size - va.va_size; 254 } else { 255 offset = bva.va_size; 256 length = va.va_size - bva.va_size; 257 } 258 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 259 NULL)) { 260 error = EACCES; 261 } 262 } 263 264 if (crgetuid(cr) == bva.va_uid && !error && 265 va.va_size != bva.va_size) { 266 va.va_mask &= ~AT_SIZE; 267 bf.l_type = F_WRLCK; 268 bf.l_whence = 0; 269 bf.l_start = (off64_t)va.va_size; 270 bf.l_len = 0; 271 bf.l_sysid = 0; 272 bf.l_pid = 0; 273 274 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 275 (offset_t)va.va_size, cr, &ct); 276 } 277 if (in_crit) 278 nbl_end_crit(vp); 279 } else 280 error = 0; 281 282 /* 283 * Do the setattr. 284 */ 285 if (!error && va.va_mask) { 286 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 287 } 288 289 /* 290 * check if the monitor on either vop_space or vop_setattr detected 291 * a delegation conflict and if so, mark the thread flag as 292 * wouldblock so that the response is dropped and the client will 293 * try again. 294 */ 295 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 296 VN_RELE(vp); 297 curthread->t_flag |= T_WOULDBLOCK; 298 return; 299 } 300 301 if (!error) { 302 va.va_mask = AT_ALL; /* get everything */ 303 304 error = rfs4_delegated_getattr(vp, &va, 0, cr); 305 306 /* check for overflows */ 307 if (!error) { 308 acl_perm(vp, exi, &va, cr); 309 error = vattr_to_nattr(&va, &ns->ns_attr); 310 } 311 } 312 313 ct.cc_flags = 0; 314 315 /* 316 * Force modified metadata out to stable storage. 317 */ 318 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 319 320 VN_RELE(vp); 321 322 ns->ns_status = puterrno(error); 323 } 324 void * 325 rfs_setattr_getfh(struct nfssaargs *args) 326 { 327 return (&args->saa_fh); 328 } 329 330 /* 331 * Directory lookup. 332 * Returns an fhandle and file attributes for file name in a directory. 333 */ 334 /* ARGSUSED */ 335 void 336 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 337 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 338 { 339 int error; 340 vnode_t *dvp; 341 vnode_t *vp; 342 struct vattr va; 343 fhandle_t *fhp = da->da_fhandle; 344 struct sec_ol sec = {0, 0}; 345 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 346 char *name; 347 struct sockaddr *ca; 348 349 /* 350 * Trusted Extension doesn't support NFSv2. MOUNT 351 * will reject v2 clients. Need to prevent v2 client 352 * access via WebNFS here. 353 */ 354 if (is_system_labeled() && req->rq_vers == 2) { 355 dr->dr_status = NFSERR_ACCES; 356 return; 357 } 358 359 /* 360 * Disallow NULL paths 361 */ 362 if (da->da_name == NULL || *da->da_name == '\0') { 363 dr->dr_status = NFSERR_ACCES; 364 return; 365 } 366 367 /* 368 * Allow lookups from the root - the default 369 * location of the public filehandle. 370 */ 371 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 372 dvp = rootdir; 373 VN_HOLD(dvp); 374 } else { 375 dvp = nfs_fhtovp(fhp, exi); 376 if (dvp == NULL) { 377 dr->dr_status = NFSERR_STALE; 378 return; 379 } 380 } 381 382 /* 383 * Not allow lookup beyond root. 384 * If the filehandle matches a filehandle of the exi, 385 * then the ".." refers beyond the root of an exported filesystem. 386 */ 387 if (strcmp(da->da_name, "..") == 0 && 388 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 389 VN_RELE(dvp); 390 dr->dr_status = NFSERR_NOENT; 391 return; 392 } 393 394 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 395 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 396 MAXPATHLEN); 397 398 if (name == NULL) { 399 dr->dr_status = NFSERR_ACCES; 400 return; 401 } 402 403 /* 404 * If the public filehandle is used then allow 405 * a multi-component lookup, i.e. evaluate 406 * a pathname and follow symbolic links if 407 * necessary. 408 * 409 * This may result in a vnode in another filesystem 410 * which is OK as long as the filesystem is exported. 411 */ 412 if (PUBLIC_FH2(fhp)) { 413 publicfh_flag = TRUE; 414 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 415 &sec); 416 } else { 417 /* 418 * Do a normal single component lookup. 419 */ 420 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 421 NULL, NULL, NULL); 422 } 423 424 if (name != da->da_name) 425 kmem_free(name, MAXPATHLEN); 426 427 428 if (!error) { 429 va.va_mask = AT_ALL; /* we want everything */ 430 431 error = rfs4_delegated_getattr(vp, &va, 0, cr); 432 433 /* check for overflows */ 434 if (!error) { 435 acl_perm(vp, exi, &va, cr); 436 error = vattr_to_nattr(&va, &dr->dr_attr); 437 if (!error) { 438 if (sec.sec_flags & SEC_QUERY) 439 error = makefh_ol(&dr->dr_fhandle, exi, 440 sec.sec_index); 441 else { 442 error = makefh(&dr->dr_fhandle, vp, 443 exi); 444 if (!error && publicfh_flag && 445 !chk_clnt_sec(exi, req)) 446 auth_weak = TRUE; 447 } 448 } 449 } 450 VN_RELE(vp); 451 } 452 453 VN_RELE(dvp); 454 455 /* 456 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 457 * and have obtained a new exportinfo in exi which needs to be 458 * released. Note the the original exportinfo pointed to by exi 459 * will be released by the caller, comon_dispatch. 460 */ 461 if (publicfh_flag && exi != NULL) 462 exi_rele(exi); 463 464 /* 465 * If it's public fh, no 0x81, and client's flavor is 466 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 467 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 468 */ 469 if (auth_weak) 470 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 471 else 472 dr->dr_status = puterrno(error); 473 } 474 void * 475 rfs_lookup_getfh(struct nfsdiropargs *da) 476 { 477 return (da->da_fhandle); 478 } 479 480 /* 481 * Read symbolic link. 482 * Returns the string in the symbolic link at the given fhandle. 483 */ 484 /* ARGSUSED */ 485 void 486 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 487 struct svc_req *req, cred_t *cr) 488 { 489 int error; 490 struct iovec iov; 491 struct uio uio; 492 vnode_t *vp; 493 struct vattr va; 494 struct sockaddr *ca; 495 char *name = NULL; 496 int is_referral = 0; 497 498 vp = nfs_fhtovp(fhp, exi); 499 if (vp == NULL) { 500 rl->rl_data = NULL; 501 rl->rl_status = NFSERR_STALE; 502 return; 503 } 504 505 va.va_mask = AT_MODE; 506 507 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 508 509 if (error) { 510 VN_RELE(vp); 511 rl->rl_data = NULL; 512 rl->rl_status = puterrno(error); 513 return; 514 } 515 516 if (MANDLOCK(vp, va.va_mode)) { 517 VN_RELE(vp); 518 rl->rl_data = NULL; 519 rl->rl_status = NFSERR_ACCES; 520 return; 521 } 522 523 /* We lied about the object type for a referral */ 524 if (vn_is_nfs_reparse(vp, cr)) 525 is_referral = 1; 526 527 /* 528 * XNFS and RFC1094 require us to return ENXIO if argument 529 * is not a link. BUGID 1138002. 530 */ 531 if (vp->v_type != VLNK && !is_referral) { 532 VN_RELE(vp); 533 rl->rl_data = NULL; 534 rl->rl_status = NFSERR_NXIO; 535 return; 536 } 537 538 /* 539 * Allocate data for pathname. This will be freed by rfs_rlfree. 540 */ 541 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 542 543 if (is_referral) { 544 char *s; 545 size_t strsz; 546 547 /* Get an artificial symlink based on a referral */ 548 s = build_symlink(vp, cr, &strsz); 549 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 550 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 551 vnode_t *, vp, char *, s); 552 if (s == NULL) 553 error = EINVAL; 554 else { 555 error = 0; 556 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 557 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 558 kmem_free(s, strsz); 559 } 560 561 } else { 562 563 /* 564 * Set up io vector to read sym link data 565 */ 566 iov.iov_base = rl->rl_data; 567 iov.iov_len = NFS_MAXPATHLEN; 568 uio.uio_iov = &iov; 569 uio.uio_iovcnt = 1; 570 uio.uio_segflg = UIO_SYSSPACE; 571 uio.uio_extflg = UIO_COPY_CACHED; 572 uio.uio_loffset = (offset_t)0; 573 uio.uio_resid = NFS_MAXPATHLEN; 574 575 /* 576 * Do the readlink. 577 */ 578 error = VOP_READLINK(vp, &uio, cr, NULL); 579 580 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 581 582 if (!error) 583 rl->rl_data[rl->rl_count] = '\0'; 584 585 } 586 587 588 VN_RELE(vp); 589 590 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 591 name = nfscmd_convname(ca, exi, rl->rl_data, 592 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 593 594 if (name != NULL && name != rl->rl_data) { 595 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 596 rl->rl_data = name; 597 } 598 599 /* 600 * XNFS and RFC1094 require us to return ENXIO if argument 601 * is not a link. UFS returns EINVAL if this is the case, 602 * so we do the mapping here. BUGID 1138002. 603 */ 604 if (error == EINVAL) 605 rl->rl_status = NFSERR_NXIO; 606 else 607 rl->rl_status = puterrno(error); 608 609 } 610 void * 611 rfs_readlink_getfh(fhandle_t *fhp) 612 { 613 return (fhp); 614 } 615 /* 616 * Free data allocated by rfs_readlink 617 */ 618 void 619 rfs_rlfree(struct nfsrdlnres *rl) 620 { 621 if (rl->rl_data != NULL) 622 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 623 } 624 625 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 626 627 /* 628 * Read data. 629 * Returns some data read from the file at the given fhandle. 630 */ 631 /* ARGSUSED */ 632 void 633 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 634 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 635 { 636 vnode_t *vp; 637 int error; 638 struct vattr va; 639 struct iovec iov; 640 struct uio uio; 641 mblk_t *mp; 642 int alloc_err = 0; 643 int in_crit = 0; 644 caller_context_t ct; 645 646 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 647 if (vp == NULL) { 648 rr->rr_data = NULL; 649 rr->rr_status = NFSERR_STALE; 650 return; 651 } 652 653 if (vp->v_type != VREG) { 654 VN_RELE(vp); 655 rr->rr_data = NULL; 656 rr->rr_status = NFSERR_ISDIR; 657 return; 658 } 659 660 ct.cc_sysid = 0; 661 ct.cc_pid = 0; 662 ct.cc_caller_id = nfs2_srv_caller_id; 663 ct.cc_flags = CC_DONTBLOCK; 664 665 /* 666 * Enter the critical region before calling VOP_RWLOCK 667 * to avoid a deadlock with write requests. 668 */ 669 if (nbl_need_check(vp)) { 670 nbl_start_crit(vp, RW_READER); 671 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 672 0, NULL)) { 673 nbl_end_crit(vp); 674 VN_RELE(vp); 675 rr->rr_data = NULL; 676 rr->rr_status = NFSERR_ACCES; 677 return; 678 } 679 in_crit = 1; 680 } 681 682 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 683 684 /* check if a monitor detected a delegation conflict */ 685 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 686 VN_RELE(vp); 687 /* mark as wouldblock so response is dropped */ 688 curthread->t_flag |= T_WOULDBLOCK; 689 690 rr->rr_data = NULL; 691 return; 692 } 693 694 va.va_mask = AT_ALL; 695 696 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 697 698 if (error) { 699 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 700 if (in_crit) 701 nbl_end_crit(vp); 702 703 VN_RELE(vp); 704 rr->rr_data = NULL; 705 rr->rr_status = puterrno(error); 706 707 return; 708 } 709 710 /* 711 * This is a kludge to allow reading of files created 712 * with no read permission. The owner of the file 713 * is always allowed to read it. 714 */ 715 if (crgetuid(cr) != va.va_uid) { 716 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 717 718 if (error) { 719 /* 720 * Exec is the same as read over the net because 721 * of demand loading. 722 */ 723 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 724 } 725 if (error) { 726 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 727 if (in_crit) 728 nbl_end_crit(vp); 729 VN_RELE(vp); 730 rr->rr_data = NULL; 731 rr->rr_status = puterrno(error); 732 733 return; 734 } 735 } 736 737 if (MANDLOCK(vp, va.va_mode)) { 738 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 739 if (in_crit) 740 nbl_end_crit(vp); 741 742 VN_RELE(vp); 743 rr->rr_data = NULL; 744 rr->rr_status = NFSERR_ACCES; 745 746 return; 747 } 748 749 rr->rr_ok.rrok_wlist_len = 0; 750 rr->rr_ok.rrok_wlist = NULL; 751 752 if ((u_offset_t)ra->ra_offset >= va.va_size) { 753 rr->rr_count = 0; 754 rr->rr_data = NULL; 755 /* 756 * In this case, status is NFS_OK, but there is no data 757 * to encode. So set rr_mp to NULL. 758 */ 759 rr->rr_mp = NULL; 760 rr->rr_ok.rrok_wlist = ra->ra_wlist; 761 if (rr->rr_ok.rrok_wlist) 762 clist_zero_len(rr->rr_ok.rrok_wlist); 763 goto done; 764 } 765 766 if (ra->ra_wlist) { 767 mp = NULL; 768 rr->rr_mp = NULL; 769 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 770 } else { 771 /* 772 * mp will contain the data to be sent out in the read reply. 773 * This will be freed after the reply has been sent out (by the 774 * driver). 775 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 776 * that the call to xdrmblk_putmblk() never fails. 777 */ 778 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 779 &alloc_err); 780 ASSERT(mp != NULL); 781 ASSERT(alloc_err == 0); 782 783 rr->rr_mp = mp; 784 785 /* 786 * Set up io vector 787 */ 788 iov.iov_base = (caddr_t)mp->b_datap->db_base; 789 iov.iov_len = ra->ra_count; 790 } 791 792 uio.uio_iov = &iov; 793 uio.uio_iovcnt = 1; 794 uio.uio_segflg = UIO_SYSSPACE; 795 uio.uio_extflg = UIO_COPY_CACHED; 796 uio.uio_loffset = (offset_t)ra->ra_offset; 797 uio.uio_resid = ra->ra_count; 798 799 error = VOP_READ(vp, &uio, 0, cr, &ct); 800 801 if (error) { 802 if (mp) 803 freeb(mp); 804 805 /* 806 * check if a monitor detected a delegation conflict and 807 * mark as wouldblock so response is dropped 808 */ 809 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 810 curthread->t_flag |= T_WOULDBLOCK; 811 else 812 rr->rr_status = puterrno(error); 813 814 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 815 if (in_crit) 816 nbl_end_crit(vp); 817 818 VN_RELE(vp); 819 rr->rr_data = NULL; 820 821 return; 822 } 823 824 /* 825 * Get attributes again so we can send the latest access 826 * time to the client side for his cache. 827 */ 828 va.va_mask = AT_ALL; 829 830 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 831 832 if (error) { 833 if (mp) 834 freeb(mp); 835 836 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 837 if (in_crit) 838 nbl_end_crit(vp); 839 840 VN_RELE(vp); 841 rr->rr_data = NULL; 842 rr->rr_status = puterrno(error); 843 844 return; 845 } 846 847 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 848 849 if (mp) { 850 rr->rr_data = (char *)mp->b_datap->db_base; 851 } else { 852 if (ra->ra_wlist) { 853 rr->rr_data = (caddr_t)iov.iov_base; 854 if (!rdma_setup_read_data2(ra, rr)) { 855 rr->rr_data = NULL; 856 rr->rr_status = puterrno(NFSERR_INVAL); 857 } 858 } 859 } 860 done: 861 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 862 if (in_crit) 863 nbl_end_crit(vp); 864 865 acl_perm(vp, exi, &va, cr); 866 867 /* check for overflows */ 868 error = vattr_to_nattr(&va, &rr->rr_attr); 869 870 VN_RELE(vp); 871 872 rr->rr_status = puterrno(error); 873 } 874 875 /* 876 * Free data allocated by rfs_read 877 */ 878 void 879 rfs_rdfree(struct nfsrdresult *rr) 880 { 881 mblk_t *mp; 882 883 if (rr->rr_status == NFS_OK) { 884 mp = rr->rr_mp; 885 if (mp != NULL) 886 freeb(mp); 887 } 888 } 889 890 void * 891 rfs_read_getfh(struct nfsreadargs *ra) 892 { 893 return (&ra->ra_fhandle); 894 } 895 896 #define MAX_IOVECS 12 897 898 #ifdef DEBUG 899 static int rfs_write_sync_hits = 0; 900 static int rfs_write_sync_misses = 0; 901 #endif 902 903 /* 904 * Write data to file. 905 * Returns attributes of a file after writing some data to it. 906 * 907 * Any changes made here, especially in error handling might have 908 * to also be done in rfs_write (which clusters write requests). 909 */ 910 void 911 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 912 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 913 { 914 int error; 915 vnode_t *vp; 916 rlim64_t rlimit; 917 struct vattr va; 918 struct uio uio; 919 struct iovec iov[MAX_IOVECS]; 920 mblk_t *m; 921 struct iovec *iovp; 922 int iovcnt; 923 cred_t *savecred; 924 int in_crit = 0; 925 caller_context_t ct; 926 927 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 928 if (vp == NULL) { 929 ns->ns_status = NFSERR_STALE; 930 return; 931 } 932 933 if (rdonly(exi, req)) { 934 VN_RELE(vp); 935 ns->ns_status = NFSERR_ROFS; 936 return; 937 } 938 939 if (vp->v_type != VREG) { 940 VN_RELE(vp); 941 ns->ns_status = NFSERR_ISDIR; 942 return; 943 } 944 945 ct.cc_sysid = 0; 946 ct.cc_pid = 0; 947 ct.cc_caller_id = nfs2_srv_caller_id; 948 ct.cc_flags = CC_DONTBLOCK; 949 950 va.va_mask = AT_UID|AT_MODE; 951 952 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 953 954 if (error) { 955 VN_RELE(vp); 956 ns->ns_status = puterrno(error); 957 958 return; 959 } 960 961 if (crgetuid(cr) != va.va_uid) { 962 /* 963 * This is a kludge to allow writes of files created 964 * with read only permission. The owner of the file 965 * is always allowed to write it. 966 */ 967 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 968 969 if (error) { 970 VN_RELE(vp); 971 ns->ns_status = puterrno(error); 972 return; 973 } 974 } 975 976 /* 977 * Can't access a mandatory lock file. This might cause 978 * the NFS service thread to block forever waiting for a 979 * lock to be released that will never be released. 980 */ 981 if (MANDLOCK(vp, va.va_mode)) { 982 VN_RELE(vp); 983 ns->ns_status = NFSERR_ACCES; 984 return; 985 } 986 987 /* 988 * We have to enter the critical region before calling VOP_RWLOCK 989 * to avoid a deadlock with ufs. 990 */ 991 if (nbl_need_check(vp)) { 992 nbl_start_crit(vp, RW_READER); 993 in_crit = 1; 994 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 995 wa->wa_count, 0, NULL)) { 996 error = EACCES; 997 goto out; 998 } 999 } 1000 1001 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1002 1003 /* check if a monitor detected a delegation conflict */ 1004 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1005 VN_RELE(vp); 1006 /* mark as wouldblock so response is dropped */ 1007 curthread->t_flag |= T_WOULDBLOCK; 1008 return; 1009 } 1010 1011 if (wa->wa_data || wa->wa_rlist) { 1012 /* Do the RDMA thing if necessary */ 1013 if (wa->wa_rlist) { 1014 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1015 iov[0].iov_len = wa->wa_count; 1016 } else { 1017 iov[0].iov_base = wa->wa_data; 1018 iov[0].iov_len = wa->wa_count; 1019 } 1020 uio.uio_iov = iov; 1021 uio.uio_iovcnt = 1; 1022 uio.uio_segflg = UIO_SYSSPACE; 1023 uio.uio_extflg = UIO_COPY_DEFAULT; 1024 uio.uio_loffset = (offset_t)wa->wa_offset; 1025 uio.uio_resid = wa->wa_count; 1026 /* 1027 * The limit is checked on the client. We 1028 * should allow any size writes here. 1029 */ 1030 uio.uio_llimit = curproc->p_fsz_ctl; 1031 rlimit = uio.uio_llimit - wa->wa_offset; 1032 if (rlimit < (rlim64_t)uio.uio_resid) 1033 uio.uio_resid = (uint_t)rlimit; 1034 1035 /* 1036 * for now we assume no append mode 1037 */ 1038 /* 1039 * We're changing creds because VM may fault and we need 1040 * the cred of the current thread to be used if quota 1041 * checking is enabled. 1042 */ 1043 savecred = curthread->t_cred; 1044 curthread->t_cred = cr; 1045 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1046 curthread->t_cred = savecred; 1047 } else { 1048 iovcnt = 0; 1049 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1050 iovcnt++; 1051 if (iovcnt <= MAX_IOVECS) { 1052 #ifdef DEBUG 1053 rfs_write_sync_hits++; 1054 #endif 1055 iovp = iov; 1056 } else { 1057 #ifdef DEBUG 1058 rfs_write_sync_misses++; 1059 #endif 1060 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1061 } 1062 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1063 uio.uio_iov = iovp; 1064 uio.uio_iovcnt = iovcnt; 1065 uio.uio_segflg = UIO_SYSSPACE; 1066 uio.uio_extflg = UIO_COPY_DEFAULT; 1067 uio.uio_loffset = (offset_t)wa->wa_offset; 1068 uio.uio_resid = wa->wa_count; 1069 /* 1070 * The limit is checked on the client. We 1071 * should allow any size writes here. 1072 */ 1073 uio.uio_llimit = curproc->p_fsz_ctl; 1074 rlimit = uio.uio_llimit - wa->wa_offset; 1075 if (rlimit < (rlim64_t)uio.uio_resid) 1076 uio.uio_resid = (uint_t)rlimit; 1077 1078 /* 1079 * For now we assume no append mode. 1080 */ 1081 /* 1082 * We're changing creds because VM may fault and we need 1083 * the cred of the current thread to be used if quota 1084 * checking is enabled. 1085 */ 1086 savecred = curthread->t_cred; 1087 curthread->t_cred = cr; 1088 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1089 curthread->t_cred = savecred; 1090 1091 if (iovp != iov) 1092 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1093 } 1094 1095 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1096 1097 if (!error) { 1098 /* 1099 * Get attributes again so we send the latest mod 1100 * time to the client side for his cache. 1101 */ 1102 va.va_mask = AT_ALL; /* now we want everything */ 1103 1104 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1105 1106 /* check for overflows */ 1107 if (!error) { 1108 acl_perm(vp, exi, &va, cr); 1109 error = vattr_to_nattr(&va, &ns->ns_attr); 1110 } 1111 } 1112 1113 out: 1114 if (in_crit) 1115 nbl_end_crit(vp); 1116 VN_RELE(vp); 1117 1118 /* check if a monitor detected a delegation conflict */ 1119 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1120 /* mark as wouldblock so response is dropped */ 1121 curthread->t_flag |= T_WOULDBLOCK; 1122 else 1123 ns->ns_status = puterrno(error); 1124 1125 } 1126 1127 struct rfs_async_write { 1128 struct nfswriteargs *wa; 1129 struct nfsattrstat *ns; 1130 struct svc_req *req; 1131 cred_t *cr; 1132 kthread_t *thread; 1133 struct rfs_async_write *list; 1134 }; 1135 1136 struct rfs_async_write_list { 1137 fhandle_t *fhp; 1138 kcondvar_t cv; 1139 struct rfs_async_write *list; 1140 struct rfs_async_write_list *next; 1141 }; 1142 1143 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1144 static kmutex_t rfs_async_write_lock; 1145 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1146 1147 #define MAXCLIOVECS 42 1148 #define RFSWRITE_INITVAL (enum nfsstat) -1 1149 1150 #ifdef DEBUG 1151 static int rfs_write_hits = 0; 1152 static int rfs_write_misses = 0; 1153 #endif 1154 1155 /* 1156 * Write data to file. 1157 * Returns attributes of a file after writing some data to it. 1158 */ 1159 void 1160 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1161 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1162 { 1163 int error; 1164 vnode_t *vp; 1165 rlim64_t rlimit; 1166 struct vattr va; 1167 struct uio uio; 1168 struct rfs_async_write_list *lp; 1169 struct rfs_async_write_list *nlp; 1170 struct rfs_async_write *rp; 1171 struct rfs_async_write *nrp; 1172 struct rfs_async_write *trp; 1173 struct rfs_async_write *lrp; 1174 int data_written; 1175 int iovcnt; 1176 mblk_t *m; 1177 struct iovec *iovp; 1178 struct iovec *niovp; 1179 struct iovec iov[MAXCLIOVECS]; 1180 int count; 1181 int rcount; 1182 uint_t off; 1183 uint_t len; 1184 struct rfs_async_write nrpsp; 1185 struct rfs_async_write_list nlpsp; 1186 ushort_t t_flag; 1187 cred_t *savecred; 1188 int in_crit = 0; 1189 caller_context_t ct; 1190 1191 if (!rfs_write_async) { 1192 rfs_write_sync(wa, ns, exi, req, cr); 1193 return; 1194 } 1195 1196 /* 1197 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1198 * is considered an OK. 1199 */ 1200 ns->ns_status = RFSWRITE_INITVAL; 1201 1202 nrp = &nrpsp; 1203 nrp->wa = wa; 1204 nrp->ns = ns; 1205 nrp->req = req; 1206 nrp->cr = cr; 1207 nrp->thread = curthread; 1208 1209 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1210 1211 /* 1212 * Look to see if there is already a cluster started 1213 * for this file. 1214 */ 1215 mutex_enter(&rfs_async_write_lock); 1216 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1217 if (bcmp(&wa->wa_fhandle, lp->fhp, 1218 sizeof (fhandle_t)) == 0) 1219 break; 1220 } 1221 1222 /* 1223 * If lp is non-NULL, then there is already a cluster 1224 * started. We need to place ourselves in the cluster 1225 * list in the right place as determined by starting 1226 * offset. Conflicts with non-blocking mandatory locked 1227 * regions will be checked when the cluster is processed. 1228 */ 1229 if (lp != NULL) { 1230 rp = lp->list; 1231 trp = NULL; 1232 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1233 trp = rp; 1234 rp = rp->list; 1235 } 1236 nrp->list = rp; 1237 if (trp == NULL) 1238 lp->list = nrp; 1239 else 1240 trp->list = nrp; 1241 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1242 cv_wait(&lp->cv, &rfs_async_write_lock); 1243 mutex_exit(&rfs_async_write_lock); 1244 1245 return; 1246 } 1247 1248 /* 1249 * No cluster started yet, start one and add ourselves 1250 * to the list of clusters. 1251 */ 1252 nrp->list = NULL; 1253 1254 nlp = &nlpsp; 1255 nlp->fhp = &wa->wa_fhandle; 1256 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1257 nlp->list = nrp; 1258 nlp->next = NULL; 1259 1260 if (rfs_async_write_head == NULL) { 1261 rfs_async_write_head = nlp; 1262 } else { 1263 lp = rfs_async_write_head; 1264 while (lp->next != NULL) 1265 lp = lp->next; 1266 lp->next = nlp; 1267 } 1268 mutex_exit(&rfs_async_write_lock); 1269 1270 /* 1271 * Convert the file handle common to all of the requests 1272 * in this cluster to a vnode. 1273 */ 1274 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1275 if (vp == NULL) { 1276 mutex_enter(&rfs_async_write_lock); 1277 if (rfs_async_write_head == nlp) 1278 rfs_async_write_head = nlp->next; 1279 else { 1280 lp = rfs_async_write_head; 1281 while (lp->next != nlp) 1282 lp = lp->next; 1283 lp->next = nlp->next; 1284 } 1285 t_flag = curthread->t_flag & T_WOULDBLOCK; 1286 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1287 rp->ns->ns_status = NFSERR_STALE; 1288 rp->thread->t_flag |= t_flag; 1289 } 1290 cv_broadcast(&nlp->cv); 1291 mutex_exit(&rfs_async_write_lock); 1292 1293 return; 1294 } 1295 1296 /* 1297 * Can only write regular files. Attempts to write any 1298 * other file types fail with EISDIR. 1299 */ 1300 if (vp->v_type != VREG) { 1301 VN_RELE(vp); 1302 mutex_enter(&rfs_async_write_lock); 1303 if (rfs_async_write_head == nlp) 1304 rfs_async_write_head = nlp->next; 1305 else { 1306 lp = rfs_async_write_head; 1307 while (lp->next != nlp) 1308 lp = lp->next; 1309 lp->next = nlp->next; 1310 } 1311 t_flag = curthread->t_flag & T_WOULDBLOCK; 1312 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1313 rp->ns->ns_status = NFSERR_ISDIR; 1314 rp->thread->t_flag |= t_flag; 1315 } 1316 cv_broadcast(&nlp->cv); 1317 mutex_exit(&rfs_async_write_lock); 1318 1319 return; 1320 } 1321 1322 /* 1323 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1324 * deadlock with ufs. 1325 */ 1326 if (nbl_need_check(vp)) { 1327 nbl_start_crit(vp, RW_READER); 1328 in_crit = 1; 1329 } 1330 1331 ct.cc_sysid = 0; 1332 ct.cc_pid = 0; 1333 ct.cc_caller_id = nfs2_srv_caller_id; 1334 ct.cc_flags = CC_DONTBLOCK; 1335 1336 /* 1337 * Lock the file for writing. This operation provides 1338 * the delay which allows clusters to grow. 1339 */ 1340 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1341 1342 /* check if a monitor detected a delegation conflict */ 1343 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1344 if (in_crit) 1345 nbl_end_crit(vp); 1346 VN_RELE(vp); 1347 /* mark as wouldblock so response is dropped */ 1348 curthread->t_flag |= T_WOULDBLOCK; 1349 mutex_enter(&rfs_async_write_lock); 1350 if (rfs_async_write_head == nlp) 1351 rfs_async_write_head = nlp->next; 1352 else { 1353 lp = rfs_async_write_head; 1354 while (lp->next != nlp) 1355 lp = lp->next; 1356 lp->next = nlp->next; 1357 } 1358 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1359 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1360 rp->ns->ns_status = puterrno(error); 1361 rp->thread->t_flag |= T_WOULDBLOCK; 1362 } 1363 } 1364 cv_broadcast(&nlp->cv); 1365 mutex_exit(&rfs_async_write_lock); 1366 1367 return; 1368 } 1369 1370 /* 1371 * Disconnect this cluster from the list of clusters. 1372 * The cluster that is being dealt with must be fixed 1373 * in size after this point, so there is no reason 1374 * to leave it on the list so that new requests can 1375 * find it. 1376 * 1377 * The algorithm is that the first write request will 1378 * create a cluster, convert the file handle to a 1379 * vnode pointer, and then lock the file for writing. 1380 * This request is not likely to be clustered with 1381 * any others. However, the next request will create 1382 * a new cluster and be blocked in VOP_RWLOCK while 1383 * the first request is being processed. This delay 1384 * will allow more requests to be clustered in this 1385 * second cluster. 1386 */ 1387 mutex_enter(&rfs_async_write_lock); 1388 if (rfs_async_write_head == nlp) 1389 rfs_async_write_head = nlp->next; 1390 else { 1391 lp = rfs_async_write_head; 1392 while (lp->next != nlp) 1393 lp = lp->next; 1394 lp->next = nlp->next; 1395 } 1396 mutex_exit(&rfs_async_write_lock); 1397 1398 /* 1399 * Step through the list of requests in this cluster. 1400 * We need to check permissions to make sure that all 1401 * of the requests have sufficient permission to write 1402 * the file. A cluster can be composed of requests 1403 * from different clients and different users on each 1404 * client. 1405 * 1406 * As a side effect, we also calculate the size of the 1407 * byte range that this cluster encompasses. 1408 */ 1409 rp = nlp->list; 1410 off = rp->wa->wa_offset; 1411 len = (uint_t)0; 1412 do { 1413 if (rdonly(exi, rp->req)) { 1414 rp->ns->ns_status = NFSERR_ROFS; 1415 t_flag = curthread->t_flag & T_WOULDBLOCK; 1416 rp->thread->t_flag |= t_flag; 1417 continue; 1418 } 1419 1420 va.va_mask = AT_UID|AT_MODE; 1421 1422 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1423 1424 if (!error) { 1425 if (crgetuid(rp->cr) != va.va_uid) { 1426 /* 1427 * This is a kludge to allow writes of files 1428 * created with read only permission. The 1429 * owner of the file is always allowed to 1430 * write it. 1431 */ 1432 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1433 } 1434 if (!error && MANDLOCK(vp, va.va_mode)) 1435 error = EACCES; 1436 } 1437 1438 /* 1439 * Check for a conflict with a nbmand-locked region. 1440 */ 1441 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1442 rp->wa->wa_count, 0, NULL)) { 1443 error = EACCES; 1444 } 1445 1446 if (error) { 1447 rp->ns->ns_status = puterrno(error); 1448 t_flag = curthread->t_flag & T_WOULDBLOCK; 1449 rp->thread->t_flag |= t_flag; 1450 continue; 1451 } 1452 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1453 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1454 } while ((rp = rp->list) != NULL); 1455 1456 /* 1457 * Step through the cluster attempting to gather as many 1458 * requests which are contiguous as possible. These 1459 * contiguous requests are handled via one call to VOP_WRITE 1460 * instead of different calls to VOP_WRITE. We also keep 1461 * track of the fact that any data was written. 1462 */ 1463 rp = nlp->list; 1464 data_written = 0; 1465 do { 1466 /* 1467 * Skip any requests which are already marked as having an 1468 * error. 1469 */ 1470 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1471 rp = rp->list; 1472 continue; 1473 } 1474 1475 /* 1476 * Count the number of iovec's which are required 1477 * to handle this set of requests. One iovec is 1478 * needed for each data buffer, whether addressed 1479 * by wa_data or by the b_rptr pointers in the 1480 * mblk chains. 1481 */ 1482 iovcnt = 0; 1483 lrp = rp; 1484 for (;;) { 1485 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1486 iovcnt++; 1487 else { 1488 m = lrp->wa->wa_mblk; 1489 while (m != NULL) { 1490 iovcnt++; 1491 m = m->b_cont; 1492 } 1493 } 1494 if (lrp->list == NULL || 1495 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1496 lrp->wa->wa_offset + lrp->wa->wa_count != 1497 lrp->list->wa->wa_offset) { 1498 lrp = lrp->list; 1499 break; 1500 } 1501 lrp = lrp->list; 1502 } 1503 1504 if (iovcnt <= MAXCLIOVECS) { 1505 #ifdef DEBUG 1506 rfs_write_hits++; 1507 #endif 1508 niovp = iov; 1509 } else { 1510 #ifdef DEBUG 1511 rfs_write_misses++; 1512 #endif 1513 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1514 } 1515 /* 1516 * Put together the scatter/gather iovecs. 1517 */ 1518 iovp = niovp; 1519 trp = rp; 1520 count = 0; 1521 do { 1522 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1523 if (trp->wa->wa_rlist) { 1524 iovp->iov_base = 1525 (char *)((trp->wa->wa_rlist)-> 1526 u.c_daddr3); 1527 iovp->iov_len = trp->wa->wa_count; 1528 } else { 1529 iovp->iov_base = trp->wa->wa_data; 1530 iovp->iov_len = trp->wa->wa_count; 1531 } 1532 iovp++; 1533 } else { 1534 m = trp->wa->wa_mblk; 1535 rcount = trp->wa->wa_count; 1536 while (m != NULL) { 1537 iovp->iov_base = (caddr_t)m->b_rptr; 1538 iovp->iov_len = (m->b_wptr - m->b_rptr); 1539 rcount -= iovp->iov_len; 1540 if (rcount < 0) 1541 iovp->iov_len += rcount; 1542 iovp++; 1543 if (rcount <= 0) 1544 break; 1545 m = m->b_cont; 1546 } 1547 } 1548 count += trp->wa->wa_count; 1549 trp = trp->list; 1550 } while (trp != lrp); 1551 1552 uio.uio_iov = niovp; 1553 uio.uio_iovcnt = iovcnt; 1554 uio.uio_segflg = UIO_SYSSPACE; 1555 uio.uio_extflg = UIO_COPY_DEFAULT; 1556 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1557 uio.uio_resid = count; 1558 /* 1559 * The limit is checked on the client. We 1560 * should allow any size writes here. 1561 */ 1562 uio.uio_llimit = curproc->p_fsz_ctl; 1563 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1564 if (rlimit < (rlim64_t)uio.uio_resid) 1565 uio.uio_resid = (uint_t)rlimit; 1566 1567 /* 1568 * For now we assume no append mode. 1569 */ 1570 1571 /* 1572 * We're changing creds because VM may fault 1573 * and we need the cred of the current 1574 * thread to be used if quota * checking is 1575 * enabled. 1576 */ 1577 savecred = curthread->t_cred; 1578 curthread->t_cred = cr; 1579 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1580 curthread->t_cred = savecred; 1581 1582 /* check if a monitor detected a delegation conflict */ 1583 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1584 /* mark as wouldblock so response is dropped */ 1585 curthread->t_flag |= T_WOULDBLOCK; 1586 1587 if (niovp != iov) 1588 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1589 1590 if (!error) { 1591 data_written = 1; 1592 /* 1593 * Get attributes again so we send the latest mod 1594 * time to the client side for his cache. 1595 */ 1596 va.va_mask = AT_ALL; /* now we want everything */ 1597 1598 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1599 1600 if (!error) 1601 acl_perm(vp, exi, &va, rp->cr); 1602 } 1603 1604 /* 1605 * Fill in the status responses for each request 1606 * which was just handled. Also, copy the latest 1607 * attributes in to the attribute responses if 1608 * appropriate. 1609 */ 1610 t_flag = curthread->t_flag & T_WOULDBLOCK; 1611 do { 1612 rp->thread->t_flag |= t_flag; 1613 /* check for overflows */ 1614 if (!error) { 1615 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1616 } 1617 rp->ns->ns_status = puterrno(error); 1618 rp = rp->list; 1619 } while (rp != lrp); 1620 } while (rp != NULL); 1621 1622 /* 1623 * If any data was written at all, then we need to flush 1624 * the data and metadata to stable storage. 1625 */ 1626 if (data_written) { 1627 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1628 1629 if (!error) { 1630 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1631 } 1632 } 1633 1634 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1635 1636 if (in_crit) 1637 nbl_end_crit(vp); 1638 VN_RELE(vp); 1639 1640 t_flag = curthread->t_flag & T_WOULDBLOCK; 1641 mutex_enter(&rfs_async_write_lock); 1642 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1643 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1644 rp->ns->ns_status = puterrno(error); 1645 rp->thread->t_flag |= t_flag; 1646 } 1647 } 1648 cv_broadcast(&nlp->cv); 1649 mutex_exit(&rfs_async_write_lock); 1650 1651 } 1652 1653 void * 1654 rfs_write_getfh(struct nfswriteargs *wa) 1655 { 1656 return (&wa->wa_fhandle); 1657 } 1658 1659 /* 1660 * Create a file. 1661 * Creates a file with given attributes and returns those attributes 1662 * and an fhandle for the new file. 1663 */ 1664 void 1665 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1666 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1667 { 1668 int error; 1669 int lookuperr; 1670 int in_crit = 0; 1671 struct vattr va; 1672 vnode_t *vp; 1673 vnode_t *realvp; 1674 vnode_t *dvp; 1675 char *name = args->ca_da.da_name; 1676 vnode_t *tvp = NULL; 1677 int mode; 1678 int lookup_ok; 1679 bool_t trunc; 1680 struct sockaddr *ca; 1681 1682 /* 1683 * Disallow NULL paths 1684 */ 1685 if (name == NULL || *name == '\0') { 1686 dr->dr_status = NFSERR_ACCES; 1687 return; 1688 } 1689 1690 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1691 if (dvp == NULL) { 1692 dr->dr_status = NFSERR_STALE; 1693 return; 1694 } 1695 1696 error = sattr_to_vattr(args->ca_sa, &va); 1697 if (error) { 1698 dr->dr_status = puterrno(error); 1699 return; 1700 } 1701 1702 /* 1703 * Must specify the mode. 1704 */ 1705 if (!(va.va_mask & AT_MODE)) { 1706 VN_RELE(dvp); 1707 dr->dr_status = NFSERR_INVAL; 1708 return; 1709 } 1710 1711 /* 1712 * This is a completely gross hack to make mknod 1713 * work over the wire until we can wack the protocol 1714 */ 1715 if ((va.va_mode & IFMT) == IFCHR) { 1716 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1717 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1718 else { 1719 va.va_type = VCHR; 1720 /* 1721 * uncompress the received dev_t 1722 * if the top half is zero indicating a request 1723 * from an `older style' OS. 1724 */ 1725 if ((va.va_size & 0xffff0000) == 0) 1726 va.va_rdev = nfsv2_expdev(va.va_size); 1727 else 1728 va.va_rdev = (dev_t)va.va_size; 1729 } 1730 va.va_mask &= ~AT_SIZE; 1731 } else if ((va.va_mode & IFMT) == IFBLK) { 1732 va.va_type = VBLK; 1733 /* 1734 * uncompress the received dev_t 1735 * if the top half is zero indicating a request 1736 * from an `older style' OS. 1737 */ 1738 if ((va.va_size & 0xffff0000) == 0) 1739 va.va_rdev = nfsv2_expdev(va.va_size); 1740 else 1741 va.va_rdev = (dev_t)va.va_size; 1742 va.va_mask &= ~AT_SIZE; 1743 } else if ((va.va_mode & IFMT) == IFSOCK) { 1744 va.va_type = VSOCK; 1745 } else { 1746 va.va_type = VREG; 1747 } 1748 va.va_mode &= ~IFMT; 1749 va.va_mask |= AT_TYPE; 1750 1751 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1752 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1753 MAXPATHLEN); 1754 if (name == NULL) { 1755 dr->dr_status = puterrno(EINVAL); 1756 return; 1757 } 1758 1759 /* 1760 * Why was the choice made to use VWRITE as the mode to the 1761 * call to VOP_CREATE ? This results in a bug. When a client 1762 * opens a file that already exists and is RDONLY, the second 1763 * open fails with an EACESS because of the mode. 1764 * bug ID 1054648. 1765 */ 1766 lookup_ok = 0; 1767 mode = VWRITE; 1768 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1769 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1770 NULL, NULL, NULL); 1771 if (!error) { 1772 struct vattr at; 1773 1774 lookup_ok = 1; 1775 at.va_mask = AT_MODE; 1776 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1777 if (!error) 1778 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1779 VN_RELE(tvp); 1780 tvp = NULL; 1781 } 1782 } 1783 1784 if (!lookup_ok) { 1785 if (rdonly(exi, req)) { 1786 error = EROFS; 1787 } else if (va.va_type != VREG && va.va_type != VFIFO && 1788 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1789 error = EPERM; 1790 } else { 1791 error = 0; 1792 } 1793 } 1794 1795 /* 1796 * If file size is being modified on an already existing file 1797 * make sure that there are no conflicting non-blocking mandatory 1798 * locks in the region being manipulated. Return EACCES if there 1799 * are conflicting locks. 1800 */ 1801 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1802 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1803 NULL, NULL, NULL); 1804 1805 if (!lookuperr && 1806 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1807 VN_RELE(tvp); 1808 curthread->t_flag |= T_WOULDBLOCK; 1809 goto out; 1810 } 1811 1812 if (!lookuperr && nbl_need_check(tvp)) { 1813 /* 1814 * The file exists. Now check if it has any 1815 * conflicting non-blocking mandatory locks 1816 * in the region being changed. 1817 */ 1818 struct vattr bva; 1819 u_offset_t offset; 1820 ssize_t length; 1821 1822 nbl_start_crit(tvp, RW_READER); 1823 in_crit = 1; 1824 1825 bva.va_mask = AT_SIZE; 1826 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1827 if (!error) { 1828 if (va.va_size < bva.va_size) { 1829 offset = va.va_size; 1830 length = bva.va_size - va.va_size; 1831 } else { 1832 offset = bva.va_size; 1833 length = va.va_size - bva.va_size; 1834 } 1835 if (length) { 1836 if (nbl_conflict(tvp, NBL_WRITE, 1837 offset, length, 0, NULL)) { 1838 error = EACCES; 1839 } 1840 } 1841 } 1842 if (error) { 1843 nbl_end_crit(tvp); 1844 VN_RELE(tvp); 1845 in_crit = 0; 1846 } 1847 } else if (tvp != NULL) { 1848 VN_RELE(tvp); 1849 } 1850 } 1851 1852 if (!error) { 1853 /* 1854 * If filesystem is shared with nosuid the remove any 1855 * setuid/setgid bits on create. 1856 */ 1857 if (va.va_type == VREG && 1858 exi->exi_export.ex_flags & EX_NOSUID) 1859 va.va_mode &= ~(VSUID | VSGID); 1860 1861 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1862 NULL, NULL); 1863 1864 if (!error) { 1865 1866 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1867 trunc = TRUE; 1868 else 1869 trunc = FALSE; 1870 1871 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1872 VN_RELE(vp); 1873 curthread->t_flag |= T_WOULDBLOCK; 1874 goto out; 1875 } 1876 va.va_mask = AT_ALL; 1877 1878 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1879 1880 /* check for overflows */ 1881 if (!error) { 1882 acl_perm(vp, exi, &va, cr); 1883 error = vattr_to_nattr(&va, &dr->dr_attr); 1884 if (!error) { 1885 error = makefh(&dr->dr_fhandle, vp, 1886 exi); 1887 } 1888 } 1889 /* 1890 * Force modified metadata out to stable storage. 1891 * 1892 * if a underlying vp exists, pass it to VOP_FSYNC 1893 */ 1894 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1895 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1896 else 1897 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1898 VN_RELE(vp); 1899 } 1900 1901 if (in_crit) { 1902 nbl_end_crit(tvp); 1903 VN_RELE(tvp); 1904 } 1905 } 1906 1907 /* 1908 * Force modified data and metadata out to stable storage. 1909 */ 1910 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1911 1912 out: 1913 1914 VN_RELE(dvp); 1915 1916 dr->dr_status = puterrno(error); 1917 1918 if (name != args->ca_da.da_name) 1919 kmem_free(name, MAXPATHLEN); 1920 } 1921 void * 1922 rfs_create_getfh(struct nfscreatargs *args) 1923 { 1924 return (args->ca_da.da_fhandle); 1925 } 1926 1927 /* 1928 * Remove a file. 1929 * Remove named file from parent directory. 1930 */ 1931 void 1932 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1933 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1934 { 1935 int error = 0; 1936 vnode_t *vp; 1937 vnode_t *targvp; 1938 int in_crit = 0; 1939 1940 /* 1941 * Disallow NULL paths 1942 */ 1943 if (da->da_name == NULL || *da->da_name == '\0') { 1944 *status = NFSERR_ACCES; 1945 return; 1946 } 1947 1948 vp = nfs_fhtovp(da->da_fhandle, exi); 1949 if (vp == NULL) { 1950 *status = NFSERR_STALE; 1951 return; 1952 } 1953 1954 if (rdonly(exi, req)) { 1955 VN_RELE(vp); 1956 *status = NFSERR_ROFS; 1957 return; 1958 } 1959 1960 /* 1961 * Check for a conflict with a non-blocking mandatory share reservation. 1962 */ 1963 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1964 NULL, cr, NULL, NULL, NULL); 1965 if (error != 0) { 1966 VN_RELE(vp); 1967 *status = puterrno(error); 1968 return; 1969 } 1970 1971 /* 1972 * If the file is delegated to an v4 client, then initiate 1973 * recall and drop this request (by setting T_WOULDBLOCK). 1974 * The client will eventually re-transmit the request and 1975 * (hopefully), by then, the v4 client will have returned 1976 * the delegation. 1977 */ 1978 1979 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1980 VN_RELE(vp); 1981 VN_RELE(targvp); 1982 curthread->t_flag |= T_WOULDBLOCK; 1983 return; 1984 } 1985 1986 if (nbl_need_check(targvp)) { 1987 nbl_start_crit(targvp, RW_READER); 1988 in_crit = 1; 1989 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1990 error = EACCES; 1991 goto out; 1992 } 1993 } 1994 1995 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 1996 1997 /* 1998 * Force modified data and metadata out to stable storage. 1999 */ 2000 (void) VOP_FSYNC(vp, 0, cr, NULL); 2001 2002 out: 2003 if (in_crit) 2004 nbl_end_crit(targvp); 2005 VN_RELE(targvp); 2006 VN_RELE(vp); 2007 2008 *status = puterrno(error); 2009 2010 } 2011 2012 void * 2013 rfs_remove_getfh(struct nfsdiropargs *da) 2014 { 2015 return (da->da_fhandle); 2016 } 2017 2018 /* 2019 * rename a file 2020 * Give a file (from) a new name (to). 2021 */ 2022 void 2023 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2024 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2025 { 2026 int error = 0; 2027 vnode_t *fromvp; 2028 vnode_t *tovp; 2029 struct exportinfo *to_exi; 2030 fhandle_t *fh; 2031 vnode_t *srcvp; 2032 vnode_t *targvp; 2033 int in_crit = 0; 2034 2035 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2036 if (fromvp == NULL) { 2037 *status = NFSERR_STALE; 2038 return; 2039 } 2040 2041 fh = args->rna_to.da_fhandle; 2042 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2043 if (to_exi == NULL) { 2044 VN_RELE(fromvp); 2045 *status = NFSERR_ACCES; 2046 return; 2047 } 2048 exi_rele(to_exi); 2049 2050 if (to_exi != exi) { 2051 VN_RELE(fromvp); 2052 *status = NFSERR_XDEV; 2053 return; 2054 } 2055 2056 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2057 if (tovp == NULL) { 2058 VN_RELE(fromvp); 2059 *status = NFSERR_STALE; 2060 return; 2061 } 2062 2063 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2064 VN_RELE(tovp); 2065 VN_RELE(fromvp); 2066 *status = NFSERR_NOTDIR; 2067 return; 2068 } 2069 2070 /* 2071 * Disallow NULL paths 2072 */ 2073 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2074 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2075 VN_RELE(tovp); 2076 VN_RELE(fromvp); 2077 *status = NFSERR_ACCES; 2078 return; 2079 } 2080 2081 if (rdonly(exi, req)) { 2082 VN_RELE(tovp); 2083 VN_RELE(fromvp); 2084 *status = NFSERR_ROFS; 2085 return; 2086 } 2087 2088 /* 2089 * Check for a conflict with a non-blocking mandatory share reservation. 2090 */ 2091 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2092 NULL, cr, NULL, NULL, NULL); 2093 if (error != 0) { 2094 VN_RELE(tovp); 2095 VN_RELE(fromvp); 2096 *status = puterrno(error); 2097 return; 2098 } 2099 2100 /* Check for delegations on the source file */ 2101 2102 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2103 VN_RELE(tovp); 2104 VN_RELE(fromvp); 2105 VN_RELE(srcvp); 2106 curthread->t_flag |= T_WOULDBLOCK; 2107 return; 2108 } 2109 2110 /* Check for delegation on the file being renamed over, if it exists */ 2111 2112 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2113 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2114 NULL, NULL, NULL) == 0) { 2115 2116 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2117 VN_RELE(tovp); 2118 VN_RELE(fromvp); 2119 VN_RELE(srcvp); 2120 VN_RELE(targvp); 2121 curthread->t_flag |= T_WOULDBLOCK; 2122 return; 2123 } 2124 VN_RELE(targvp); 2125 } 2126 2127 2128 if (nbl_need_check(srcvp)) { 2129 nbl_start_crit(srcvp, RW_READER); 2130 in_crit = 1; 2131 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2132 error = EACCES; 2133 goto out; 2134 } 2135 } 2136 2137 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2138 tovp, args->rna_to.da_name, cr, NULL, 0); 2139 2140 if (error == 0) 2141 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2142 strlen(args->rna_to.da_name)); 2143 2144 /* 2145 * Force modified data and metadata out to stable storage. 2146 */ 2147 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2148 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2149 2150 out: 2151 if (in_crit) 2152 nbl_end_crit(srcvp); 2153 VN_RELE(srcvp); 2154 VN_RELE(tovp); 2155 VN_RELE(fromvp); 2156 2157 *status = puterrno(error); 2158 2159 } 2160 void * 2161 rfs_rename_getfh(struct nfsrnmargs *args) 2162 { 2163 return (args->rna_from.da_fhandle); 2164 } 2165 2166 /* 2167 * Link to a file. 2168 * Create a file (to) which is a hard link to the given file (from). 2169 */ 2170 void 2171 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2172 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2173 { 2174 int error; 2175 vnode_t *fromvp; 2176 vnode_t *tovp; 2177 struct exportinfo *to_exi; 2178 fhandle_t *fh; 2179 2180 fromvp = nfs_fhtovp(args->la_from, exi); 2181 if (fromvp == NULL) { 2182 *status = NFSERR_STALE; 2183 return; 2184 } 2185 2186 fh = args->la_to.da_fhandle; 2187 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2188 if (to_exi == NULL) { 2189 VN_RELE(fromvp); 2190 *status = NFSERR_ACCES; 2191 return; 2192 } 2193 exi_rele(to_exi); 2194 2195 if (to_exi != exi) { 2196 VN_RELE(fromvp); 2197 *status = NFSERR_XDEV; 2198 return; 2199 } 2200 2201 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2202 if (tovp == NULL) { 2203 VN_RELE(fromvp); 2204 *status = NFSERR_STALE; 2205 return; 2206 } 2207 2208 if (tovp->v_type != VDIR) { 2209 VN_RELE(tovp); 2210 VN_RELE(fromvp); 2211 *status = NFSERR_NOTDIR; 2212 return; 2213 } 2214 /* 2215 * Disallow NULL paths 2216 */ 2217 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2218 VN_RELE(tovp); 2219 VN_RELE(fromvp); 2220 *status = NFSERR_ACCES; 2221 return; 2222 } 2223 2224 if (rdonly(exi, req)) { 2225 VN_RELE(tovp); 2226 VN_RELE(fromvp); 2227 *status = NFSERR_ROFS; 2228 return; 2229 } 2230 2231 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2232 2233 /* 2234 * Force modified data and metadata out to stable storage. 2235 */ 2236 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2237 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2238 2239 VN_RELE(tovp); 2240 VN_RELE(fromvp); 2241 2242 *status = puterrno(error); 2243 2244 } 2245 void * 2246 rfs_link_getfh(struct nfslinkargs *args) 2247 { 2248 return (args->la_from); 2249 } 2250 2251 /* 2252 * Symbolicly link to a file. 2253 * Create a file (to) with the given attributes which is a symbolic link 2254 * to the given path name (to). 2255 */ 2256 void 2257 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2258 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2259 { 2260 int error; 2261 struct vattr va; 2262 vnode_t *vp; 2263 vnode_t *svp; 2264 int lerror; 2265 struct sockaddr *ca; 2266 char *name = NULL; 2267 2268 /* 2269 * Disallow NULL paths 2270 */ 2271 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2272 *status = NFSERR_ACCES; 2273 return; 2274 } 2275 2276 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2277 if (vp == NULL) { 2278 *status = NFSERR_STALE; 2279 return; 2280 } 2281 2282 if (rdonly(exi, req)) { 2283 VN_RELE(vp); 2284 *status = NFSERR_ROFS; 2285 return; 2286 } 2287 2288 error = sattr_to_vattr(args->sla_sa, &va); 2289 if (error) { 2290 VN_RELE(vp); 2291 *status = puterrno(error); 2292 return; 2293 } 2294 2295 if (!(va.va_mask & AT_MODE)) { 2296 VN_RELE(vp); 2297 *status = NFSERR_INVAL; 2298 return; 2299 } 2300 2301 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2302 name = nfscmd_convname(ca, exi, args->sla_tnm, 2303 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2304 2305 if (name == NULL) { 2306 *status = NFSERR_ACCES; 2307 return; 2308 } 2309 2310 va.va_type = VLNK; 2311 va.va_mask |= AT_TYPE; 2312 2313 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2314 2315 /* 2316 * Force new data and metadata out to stable storage. 2317 */ 2318 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2319 NULL, cr, NULL, NULL, NULL); 2320 2321 if (!lerror) { 2322 (void) VOP_FSYNC(svp, 0, cr, NULL); 2323 VN_RELE(svp); 2324 } 2325 2326 /* 2327 * Force modified data and metadata out to stable storage. 2328 */ 2329 (void) VOP_FSYNC(vp, 0, cr, NULL); 2330 2331 VN_RELE(vp); 2332 2333 *status = puterrno(error); 2334 if (name != args->sla_tnm) 2335 kmem_free(name, MAXPATHLEN); 2336 2337 } 2338 void * 2339 rfs_symlink_getfh(struct nfsslargs *args) 2340 { 2341 return (args->sla_from.da_fhandle); 2342 } 2343 2344 /* 2345 * Make a directory. 2346 * Create a directory with the given name, parent directory, and attributes. 2347 * Returns a file handle and attributes for the new directory. 2348 */ 2349 void 2350 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2351 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2352 { 2353 int error; 2354 struct vattr va; 2355 vnode_t *dvp = NULL; 2356 vnode_t *vp; 2357 char *name = args->ca_da.da_name; 2358 2359 /* 2360 * Disallow NULL paths 2361 */ 2362 if (name == NULL || *name == '\0') { 2363 dr->dr_status = NFSERR_ACCES; 2364 return; 2365 } 2366 2367 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2368 if (vp == NULL) { 2369 dr->dr_status = NFSERR_STALE; 2370 return; 2371 } 2372 2373 if (rdonly(exi, req)) { 2374 VN_RELE(vp); 2375 dr->dr_status = NFSERR_ROFS; 2376 return; 2377 } 2378 2379 error = sattr_to_vattr(args->ca_sa, &va); 2380 if (error) { 2381 VN_RELE(vp); 2382 dr->dr_status = puterrno(error); 2383 return; 2384 } 2385 2386 if (!(va.va_mask & AT_MODE)) { 2387 VN_RELE(vp); 2388 dr->dr_status = NFSERR_INVAL; 2389 return; 2390 } 2391 2392 va.va_type = VDIR; 2393 va.va_mask |= AT_TYPE; 2394 2395 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2396 2397 if (!error) { 2398 /* 2399 * Attribtutes of the newly created directory should 2400 * be returned to the client. 2401 */ 2402 va.va_mask = AT_ALL; /* We want everything */ 2403 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2404 2405 /* check for overflows */ 2406 if (!error) { 2407 acl_perm(vp, exi, &va, cr); 2408 error = vattr_to_nattr(&va, &dr->dr_attr); 2409 if (!error) { 2410 error = makefh(&dr->dr_fhandle, dvp, exi); 2411 } 2412 } 2413 /* 2414 * Force new data and metadata out to stable storage. 2415 */ 2416 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2417 VN_RELE(dvp); 2418 } 2419 2420 /* 2421 * Force modified data and metadata out to stable storage. 2422 */ 2423 (void) VOP_FSYNC(vp, 0, cr, NULL); 2424 2425 VN_RELE(vp); 2426 2427 dr->dr_status = puterrno(error); 2428 2429 } 2430 void * 2431 rfs_mkdir_getfh(struct nfscreatargs *args) 2432 { 2433 return (args->ca_da.da_fhandle); 2434 } 2435 2436 /* 2437 * Remove a directory. 2438 * Remove the given directory name from the given parent directory. 2439 */ 2440 void 2441 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2442 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2443 { 2444 int error; 2445 vnode_t *vp; 2446 2447 2448 /* 2449 * Disallow NULL paths 2450 */ 2451 if (da->da_name == NULL || *da->da_name == '\0') { 2452 *status = NFSERR_ACCES; 2453 return; 2454 } 2455 2456 vp = nfs_fhtovp(da->da_fhandle, exi); 2457 if (vp == NULL) { 2458 *status = NFSERR_STALE; 2459 return; 2460 } 2461 2462 if (rdonly(exi, req)) { 2463 VN_RELE(vp); 2464 *status = NFSERR_ROFS; 2465 return; 2466 } 2467 2468 /* 2469 * VOP_RMDIR now takes a new third argument (the current 2470 * directory of the process). That's because someone 2471 * wants to return EINVAL if one tries to remove ".". 2472 * Of course, NFS servers have no idea what their 2473 * clients' current directories are. We fake it by 2474 * supplying a vnode known to exist and illegal to 2475 * remove. 2476 */ 2477 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2478 2479 /* 2480 * Force modified data and metadata out to stable storage. 2481 */ 2482 (void) VOP_FSYNC(vp, 0, cr, NULL); 2483 2484 VN_RELE(vp); 2485 2486 /* 2487 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2488 * if the directory is not empty. A System V NFS server 2489 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2490 * over the wire. 2491 */ 2492 if (error == EEXIST) 2493 *status = NFSERR_NOTEMPTY; 2494 else 2495 *status = puterrno(error); 2496 2497 } 2498 void * 2499 rfs_rmdir_getfh(struct nfsdiropargs *da) 2500 { 2501 return (da->da_fhandle); 2502 } 2503 2504 /* ARGSUSED */ 2505 void 2506 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2507 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2508 { 2509 int error; 2510 int iseof; 2511 struct iovec iov; 2512 struct uio uio; 2513 vnode_t *vp; 2514 char *ndata = NULL; 2515 struct sockaddr *ca; 2516 size_t nents; 2517 int ret; 2518 2519 vp = nfs_fhtovp(&rda->rda_fh, exi); 2520 if (vp == NULL) { 2521 rd->rd_entries = NULL; 2522 rd->rd_status = NFSERR_STALE; 2523 return; 2524 } 2525 2526 if (vp->v_type != VDIR) { 2527 VN_RELE(vp); 2528 rd->rd_entries = NULL; 2529 rd->rd_status = NFSERR_NOTDIR; 2530 return; 2531 } 2532 2533 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2534 2535 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2536 2537 if (error) { 2538 rd->rd_entries = NULL; 2539 goto bad; 2540 } 2541 2542 if (rda->rda_count == 0) { 2543 rd->rd_entries = NULL; 2544 rd->rd_size = 0; 2545 rd->rd_eof = FALSE; 2546 goto bad; 2547 } 2548 2549 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2550 2551 /* 2552 * Allocate data for entries. This will be freed by rfs_rddirfree. 2553 */ 2554 rd->rd_bufsize = (uint_t)rda->rda_count; 2555 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2556 2557 /* 2558 * Set up io vector to read directory data 2559 */ 2560 iov.iov_base = (caddr_t)rd->rd_entries; 2561 iov.iov_len = rda->rda_count; 2562 uio.uio_iov = &iov; 2563 uio.uio_iovcnt = 1; 2564 uio.uio_segflg = UIO_SYSSPACE; 2565 uio.uio_extflg = UIO_COPY_CACHED; 2566 uio.uio_loffset = (offset_t)rda->rda_offset; 2567 uio.uio_resid = rda->rda_count; 2568 2569 /* 2570 * read directory 2571 */ 2572 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2573 2574 /* 2575 * Clean up 2576 */ 2577 if (!error) { 2578 /* 2579 * set size and eof 2580 */ 2581 if (uio.uio_resid == rda->rda_count) { 2582 rd->rd_size = 0; 2583 rd->rd_eof = TRUE; 2584 } else { 2585 rd->rd_size = (uint32_t)(rda->rda_count - 2586 uio.uio_resid); 2587 rd->rd_eof = iseof ? TRUE : FALSE; 2588 } 2589 } 2590 2591 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2592 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2593 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2594 rda->rda_count, &ndata); 2595 2596 if (ret != 0) { 2597 size_t dropbytes; 2598 /* 2599 * We had to drop one or more entries in order to fit 2600 * during the character conversion. We need to patch 2601 * up the size and eof info. 2602 */ 2603 if (rd->rd_eof) 2604 rd->rd_eof = FALSE; 2605 dropbytes = nfscmd_dropped_entrysize( 2606 (struct dirent64 *)rd->rd_entries, nents, ret); 2607 rd->rd_size -= dropbytes; 2608 } 2609 if (ndata == NULL) { 2610 ndata = (char *)rd->rd_entries; 2611 } else if (ndata != (char *)rd->rd_entries) { 2612 kmem_free(rd->rd_entries, rd->rd_bufsize); 2613 rd->rd_entries = (void *)ndata; 2614 rd->rd_bufsize = rda->rda_count; 2615 } 2616 2617 bad: 2618 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2619 2620 #if 0 /* notyet */ 2621 /* 2622 * Don't do this. It causes local disk writes when just 2623 * reading the file and the overhead is deemed larger 2624 * than the benefit. 2625 */ 2626 /* 2627 * Force modified metadata out to stable storage. 2628 */ 2629 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2630 #endif 2631 2632 VN_RELE(vp); 2633 2634 rd->rd_status = puterrno(error); 2635 2636 } 2637 void * 2638 rfs_readdir_getfh(struct nfsrddirargs *rda) 2639 { 2640 return (&rda->rda_fh); 2641 } 2642 void 2643 rfs_rddirfree(struct nfsrddirres *rd) 2644 { 2645 if (rd->rd_entries != NULL) 2646 kmem_free(rd->rd_entries, rd->rd_bufsize); 2647 } 2648 2649 /* ARGSUSED */ 2650 void 2651 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2652 struct svc_req *req, cred_t *cr) 2653 { 2654 int error; 2655 struct statvfs64 sb; 2656 vnode_t *vp; 2657 2658 vp = nfs_fhtovp(fh, exi); 2659 if (vp == NULL) { 2660 fs->fs_status = NFSERR_STALE; 2661 return; 2662 } 2663 2664 error = VFS_STATVFS(vp->v_vfsp, &sb); 2665 2666 if (!error) { 2667 fs->fs_tsize = nfstsize(); 2668 fs->fs_bsize = sb.f_frsize; 2669 fs->fs_blocks = sb.f_blocks; 2670 fs->fs_bfree = sb.f_bfree; 2671 fs->fs_bavail = sb.f_bavail; 2672 } 2673 2674 VN_RELE(vp); 2675 2676 fs->fs_status = puterrno(error); 2677 2678 } 2679 void * 2680 rfs_statfs_getfh(fhandle_t *fh) 2681 { 2682 return (fh); 2683 } 2684 2685 static int 2686 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2687 { 2688 vap->va_mask = 0; 2689 2690 /* 2691 * There was a sign extension bug in some VFS based systems 2692 * which stored the mode as a short. When it would get 2693 * assigned to a u_long, no sign extension would occur. 2694 * It needed to, but this wasn't noticed because sa_mode 2695 * would then get assigned back to the short, thus ignoring 2696 * the upper 16 bits of sa_mode. 2697 * 2698 * To make this implementation work for both broken 2699 * clients and good clients, we check for both versions 2700 * of the mode. 2701 */ 2702 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2703 sa->sa_mode != (uint32_t)-1) { 2704 vap->va_mask |= AT_MODE; 2705 vap->va_mode = sa->sa_mode; 2706 } 2707 if (sa->sa_uid != (uint32_t)-1) { 2708 vap->va_mask |= AT_UID; 2709 vap->va_uid = sa->sa_uid; 2710 } 2711 if (sa->sa_gid != (uint32_t)-1) { 2712 vap->va_mask |= AT_GID; 2713 vap->va_gid = sa->sa_gid; 2714 } 2715 if (sa->sa_size != (uint32_t)-1) { 2716 vap->va_mask |= AT_SIZE; 2717 vap->va_size = sa->sa_size; 2718 } 2719 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2720 sa->sa_atime.tv_usec != (int32_t)-1) { 2721 #ifndef _LP64 2722 /* return error if time overflow */ 2723 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2724 return (EOVERFLOW); 2725 #endif 2726 vap->va_mask |= AT_ATIME; 2727 /* 2728 * nfs protocol defines times as unsigned so don't extend sign, 2729 * unless sysadmin set nfs_allow_preepoch_time. 2730 */ 2731 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2732 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2733 } 2734 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2735 sa->sa_mtime.tv_usec != (int32_t)-1) { 2736 #ifndef _LP64 2737 /* return error if time overflow */ 2738 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2739 return (EOVERFLOW); 2740 #endif 2741 vap->va_mask |= AT_MTIME; 2742 /* 2743 * nfs protocol defines times as unsigned so don't extend sign, 2744 * unless sysadmin set nfs_allow_preepoch_time. 2745 */ 2746 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2747 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2748 } 2749 return (0); 2750 } 2751 2752 static enum nfsftype vt_to_nf[] = { 2753 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2754 }; 2755 2756 /* 2757 * check the following fields for overflow: nodeid, size, and time. 2758 * There could be a problem when converting 64-bit LP64 fields 2759 * into 32-bit ones. Return an error if there is an overflow. 2760 */ 2761 int 2762 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2763 { 2764 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2765 na->na_type = vt_to_nf[vap->va_type]; 2766 2767 if (vap->va_mode == (unsigned short) -1) 2768 na->na_mode = (uint32_t)-1; 2769 else 2770 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2771 2772 if (vap->va_uid == (unsigned short)(-1)) 2773 na->na_uid = (uint32_t)(-1); 2774 else if (vap->va_uid == UID_NOBODY) 2775 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2776 else 2777 na->na_uid = vap->va_uid; 2778 2779 if (vap->va_gid == (unsigned short)(-1)) 2780 na->na_gid = (uint32_t)-1; 2781 else if (vap->va_gid == GID_NOBODY) 2782 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2783 else 2784 na->na_gid = vap->va_gid; 2785 2786 /* 2787 * Do we need to check fsid for overflow? It is 64-bit in the 2788 * vattr, but are bigger than 32 bit values supported? 2789 */ 2790 na->na_fsid = vap->va_fsid; 2791 2792 na->na_nodeid = vap->va_nodeid; 2793 2794 /* 2795 * Check to make sure that the nodeid is representable over the 2796 * wire without losing bits. 2797 */ 2798 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2799 return (EFBIG); 2800 na->na_nlink = vap->va_nlink; 2801 2802 /* 2803 * Check for big files here, instead of at the caller. See 2804 * comments in cstat for large special file explanation. 2805 */ 2806 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2807 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2808 return (EFBIG); 2809 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2810 /* UNKNOWN_SIZE | OVERFLOW */ 2811 na->na_size = MAXOFF32_T; 2812 } else 2813 na->na_size = vap->va_size; 2814 } else 2815 na->na_size = vap->va_size; 2816 2817 /* 2818 * If the vnode times overflow the 32-bit times that NFS2 2819 * uses on the wire then return an error. 2820 */ 2821 if (!NFS_VAP_TIME_OK(vap)) { 2822 return (EOVERFLOW); 2823 } 2824 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2825 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2826 2827 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2828 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2829 2830 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2831 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2832 2833 /* 2834 * If the dev_t will fit into 16 bits then compress 2835 * it, otherwise leave it alone. See comments in 2836 * nfs_client.c. 2837 */ 2838 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2839 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2840 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2841 else 2842 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2843 2844 na->na_blocks = vap->va_nblocks; 2845 na->na_blocksize = vap->va_blksize; 2846 2847 /* 2848 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2849 * over-the-wire protocols for named-pipe vnodes. It remaps the 2850 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2851 * 2852 * BUYER BEWARE: 2853 * If you are porting the NFS to a non-Sun server, you probably 2854 * don't want to include the following block of code. The 2855 * over-the-wire special file types will be changing with the 2856 * NFS Protocol Revision. 2857 */ 2858 if (vap->va_type == VFIFO) 2859 NA_SETFIFO(na); 2860 return (0); 2861 } 2862 2863 /* 2864 * acl v2 support: returns approximate permission. 2865 * default: returns minimal permission (more restrictive) 2866 * aclok: returns maximal permission (less restrictive) 2867 * This routine changes the permissions that are alaredy in *va. 2868 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2869 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2870 */ 2871 static void 2872 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2873 { 2874 vsecattr_t vsa; 2875 int aclcnt; 2876 aclent_t *aclentp; 2877 mode_t mask_perm; 2878 mode_t grp_perm; 2879 mode_t other_perm; 2880 mode_t other_orig; 2881 int error; 2882 2883 /* dont care default acl */ 2884 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2885 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2886 2887 if (!error) { 2888 aclcnt = vsa.vsa_aclcnt; 2889 if (aclcnt > MIN_ACL_ENTRIES) { 2890 /* non-trivial ACL */ 2891 aclentp = vsa.vsa_aclentp; 2892 if (exi->exi_export.ex_flags & EX_ACLOK) { 2893 /* maximal permissions */ 2894 grp_perm = 0; 2895 other_perm = 0; 2896 for (; aclcnt > 0; aclcnt--, aclentp++) { 2897 switch (aclentp->a_type) { 2898 case USER_OBJ: 2899 break; 2900 case USER: 2901 grp_perm |= 2902 aclentp->a_perm << 3; 2903 other_perm |= aclentp->a_perm; 2904 break; 2905 case GROUP_OBJ: 2906 grp_perm |= 2907 aclentp->a_perm << 3; 2908 break; 2909 case GROUP: 2910 other_perm |= aclentp->a_perm; 2911 break; 2912 case OTHER_OBJ: 2913 other_orig = aclentp->a_perm; 2914 break; 2915 case CLASS_OBJ: 2916 mask_perm = aclentp->a_perm; 2917 break; 2918 default: 2919 break; 2920 } 2921 } 2922 grp_perm &= mask_perm << 3; 2923 other_perm &= mask_perm; 2924 other_perm |= other_orig; 2925 2926 } else { 2927 /* minimal permissions */ 2928 grp_perm = 070; 2929 other_perm = 07; 2930 for (; aclcnt > 0; aclcnt--, aclentp++) { 2931 switch (aclentp->a_type) { 2932 case USER_OBJ: 2933 break; 2934 case USER: 2935 case CLASS_OBJ: 2936 grp_perm &= 2937 aclentp->a_perm << 3; 2938 other_perm &= 2939 aclentp->a_perm; 2940 break; 2941 case GROUP_OBJ: 2942 grp_perm &= 2943 aclentp->a_perm << 3; 2944 break; 2945 case GROUP: 2946 other_perm &= 2947 aclentp->a_perm; 2948 break; 2949 case OTHER_OBJ: 2950 other_perm &= 2951 aclentp->a_perm; 2952 break; 2953 default: 2954 break; 2955 } 2956 } 2957 } 2958 /* copy to va */ 2959 va->va_mode &= ~077; 2960 va->va_mode |= grp_perm | other_perm; 2961 } 2962 if (vsa.vsa_aclcnt) 2963 kmem_free(vsa.vsa_aclentp, 2964 vsa.vsa_aclcnt * sizeof (aclent_t)); 2965 } 2966 } 2967 2968 void 2969 rfs_srvrinit(void) 2970 { 2971 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2972 nfs2_srv_caller_id = fs_new_caller_id(); 2973 } 2974 2975 void 2976 rfs_srvrfini(void) 2977 { 2978 mutex_destroy(&rfs_async_write_lock); 2979 } 2980 2981 static int 2982 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2983 { 2984 struct clist *wcl; 2985 int wlist_len; 2986 uint32_t count = rr->rr_count; 2987 2988 wcl = ra->ra_wlist; 2989 2990 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 2991 return (FALSE); 2992 } 2993 2994 wcl = ra->ra_wlist; 2995 rr->rr_ok.rrok_wlist_len = wlist_len; 2996 rr->rr_ok.rrok_wlist = wcl; 2997 2998 return (TRUE); 2999 } 3000