1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 /* Lie about the object type for a referral */ 121 if (vn_is_nfs_reparse(vp, cr)) 122 va.va_type = VLNK; 123 124 acl_perm(vp, exi, &va, cr); 125 error = vattr_to_nattr(&va, &ns->ns_attr); 126 } 127 128 VN_RELE(vp); 129 130 ns->ns_status = puterrno(error); 131 } 132 void * 133 rfs_getattr_getfh(fhandle_t *fhp) 134 { 135 return (fhp); 136 } 137 138 /* 139 * Set file attributes. 140 * Sets the attributes of the file with the given fhandle. Returns 141 * the new attributes. 142 */ 143 void 144 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 145 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 146 { 147 int error; 148 int flag; 149 int in_crit = 0; 150 vnode_t *vp; 151 struct vattr va; 152 struct vattr bva; 153 struct flock64 bf; 154 caller_context_t ct; 155 156 157 vp = nfs_fhtovp(&args->saa_fh, exi); 158 if (vp == NULL) { 159 ns->ns_status = NFSERR_STALE; 160 return; 161 } 162 163 if (rdonly(exi, vp, req)) { 164 VN_RELE(vp); 165 ns->ns_status = NFSERR_ROFS; 166 return; 167 } 168 169 error = sattr_to_vattr(&args->saa_sa, &va); 170 if (error) { 171 VN_RELE(vp); 172 ns->ns_status = puterrno(error); 173 return; 174 } 175 176 /* 177 * If the client is requesting a change to the mtime, 178 * but the nanosecond field is set to 1 billion, then 179 * this is a flag to the server that it should set the 180 * atime and mtime fields to the server's current time. 181 * The 1 billion number actually came from the client 182 * as 1 million, but the units in the over the wire 183 * request are microseconds instead of nanoseconds. 184 * 185 * This is an overload of the protocol and should be 186 * documented in the NFS Version 2 protocol specification. 187 */ 188 if (va.va_mask & AT_MTIME) { 189 if (va.va_mtime.tv_nsec == 1000000000) { 190 gethrestime(&va.va_mtime); 191 va.va_atime = va.va_mtime; 192 va.va_mask |= AT_ATIME; 193 flag = 0; 194 } else 195 flag = ATTR_UTIME; 196 } else 197 flag = 0; 198 199 /* 200 * If the filesystem is exported with nosuid, then mask off 201 * the setuid and setgid bits. 202 */ 203 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 204 (exi->exi_export.ex_flags & EX_NOSUID)) 205 va.va_mode &= ~(VSUID | VSGID); 206 207 ct.cc_sysid = 0; 208 ct.cc_pid = 0; 209 ct.cc_caller_id = nfs2_srv_caller_id; 210 ct.cc_flags = CC_DONTBLOCK; 211 212 /* 213 * We need to specially handle size changes because it is 214 * possible for the client to create a file with modes 215 * which indicate read-only, but with the file opened for 216 * writing. If the client then tries to set the size of 217 * the file, then the normal access checking done in 218 * VOP_SETATTR would prevent the client from doing so, 219 * although it should be legal for it to do so. To get 220 * around this, we do the access checking for ourselves 221 * and then use VOP_SPACE which doesn't do the access 222 * checking which VOP_SETATTR does. VOP_SPACE can only 223 * operate on VREG files, let VOP_SETATTR handle the other 224 * extremely rare cases. 225 * Also the client should not be allowed to change the 226 * size of the file if there is a conflicting non-blocking 227 * mandatory lock in the region of change. 228 */ 229 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 230 if (nbl_need_check(vp)) { 231 nbl_start_crit(vp, RW_READER); 232 in_crit = 1; 233 } 234 235 bva.va_mask = AT_UID | AT_SIZE; 236 237 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 238 239 if (error) { 240 if (in_crit) 241 nbl_end_crit(vp); 242 VN_RELE(vp); 243 ns->ns_status = puterrno(error); 244 return; 245 } 246 247 if (in_crit) { 248 u_offset_t offset; 249 ssize_t length; 250 251 if (va.va_size < bva.va_size) { 252 offset = va.va_size; 253 length = bva.va_size - va.va_size; 254 } else { 255 offset = bva.va_size; 256 length = va.va_size - bva.va_size; 257 } 258 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 259 NULL)) { 260 error = EACCES; 261 } 262 } 263 264 if (crgetuid(cr) == bva.va_uid && !error && 265 va.va_size != bva.va_size) { 266 va.va_mask &= ~AT_SIZE; 267 bf.l_type = F_WRLCK; 268 bf.l_whence = 0; 269 bf.l_start = (off64_t)va.va_size; 270 bf.l_len = 0; 271 bf.l_sysid = 0; 272 bf.l_pid = 0; 273 274 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 275 (offset_t)va.va_size, cr, &ct); 276 } 277 if (in_crit) 278 nbl_end_crit(vp); 279 } else 280 error = 0; 281 282 /* 283 * Do the setattr. 284 */ 285 if (!error && va.va_mask) { 286 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 287 } 288 289 /* 290 * check if the monitor on either vop_space or vop_setattr detected 291 * a delegation conflict and if so, mark the thread flag as 292 * wouldblock so that the response is dropped and the client will 293 * try again. 294 */ 295 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 296 VN_RELE(vp); 297 curthread->t_flag |= T_WOULDBLOCK; 298 return; 299 } 300 301 if (!error) { 302 va.va_mask = AT_ALL; /* get everything */ 303 304 error = rfs4_delegated_getattr(vp, &va, 0, cr); 305 306 /* check for overflows */ 307 if (!error) { 308 acl_perm(vp, exi, &va, cr); 309 error = vattr_to_nattr(&va, &ns->ns_attr); 310 } 311 } 312 313 ct.cc_flags = 0; 314 315 /* 316 * Force modified metadata out to stable storage. 317 */ 318 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 319 320 VN_RELE(vp); 321 322 ns->ns_status = puterrno(error); 323 } 324 void * 325 rfs_setattr_getfh(struct nfssaargs *args) 326 { 327 return (&args->saa_fh); 328 } 329 330 /* 331 * Directory lookup. 332 * Returns an fhandle and file attributes for file name in a directory. 333 */ 334 /* ARGSUSED */ 335 void 336 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 337 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 338 { 339 int error; 340 vnode_t *dvp; 341 vnode_t *vp; 342 struct vattr va; 343 fhandle_t *fhp = da->da_fhandle; 344 struct sec_ol sec = {0, 0}; 345 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 346 char *name; 347 struct sockaddr *ca; 348 349 /* 350 * Trusted Extension doesn't support NFSv2. MOUNT 351 * will reject v2 clients. Need to prevent v2 client 352 * access via WebNFS here. 353 */ 354 if (is_system_labeled() && req->rq_vers == 2) { 355 dr->dr_status = NFSERR_ACCES; 356 return; 357 } 358 359 /* 360 * Disallow NULL paths 361 */ 362 if (da->da_name == NULL || *da->da_name == '\0') { 363 dr->dr_status = NFSERR_ACCES; 364 return; 365 } 366 367 /* 368 * Allow lookups from the root - the default 369 * location of the public filehandle. 370 */ 371 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 372 dvp = rootdir; 373 VN_HOLD(dvp); 374 } else { 375 dvp = nfs_fhtovp(fhp, exi); 376 if (dvp == NULL) { 377 dr->dr_status = NFSERR_STALE; 378 return; 379 } 380 } 381 382 /* 383 * Not allow lookup beyond root. 384 * If the filehandle matches a filehandle of the exi, 385 * then the ".." refers beyond the root of an exported filesystem. 386 */ 387 if (strcmp(da->da_name, "..") == 0 && 388 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 389 VN_RELE(dvp); 390 dr->dr_status = NFSERR_NOENT; 391 return; 392 } 393 394 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 395 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 396 MAXPATHLEN); 397 398 if (name == NULL) { 399 dr->dr_status = NFSERR_ACCES; 400 return; 401 } 402 403 /* 404 * If the public filehandle is used then allow 405 * a multi-component lookup, i.e. evaluate 406 * a pathname and follow symbolic links if 407 * necessary. 408 * 409 * This may result in a vnode in another filesystem 410 * which is OK as long as the filesystem is exported. 411 */ 412 if (PUBLIC_FH2(fhp)) { 413 publicfh_flag = TRUE; 414 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 415 &sec); 416 } else { 417 /* 418 * Do a normal single component lookup. 419 */ 420 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 421 NULL, NULL, NULL); 422 } 423 424 if (name != da->da_name) 425 kmem_free(name, MAXPATHLEN); 426 427 428 if (!error) { 429 va.va_mask = AT_ALL; /* we want everything */ 430 431 error = rfs4_delegated_getattr(vp, &va, 0, cr); 432 433 /* check for overflows */ 434 if (!error) { 435 acl_perm(vp, exi, &va, cr); 436 error = vattr_to_nattr(&va, &dr->dr_attr); 437 if (!error) { 438 if (sec.sec_flags & SEC_QUERY) 439 error = makefh_ol(&dr->dr_fhandle, exi, 440 sec.sec_index); 441 else { 442 error = makefh(&dr->dr_fhandle, vp, 443 exi); 444 if (!error && publicfh_flag && 445 !chk_clnt_sec(exi, req)) 446 auth_weak = TRUE; 447 } 448 } 449 } 450 VN_RELE(vp); 451 } 452 453 VN_RELE(dvp); 454 455 /* 456 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 457 * and have obtained a new exportinfo in exi which needs to be 458 * released. Note the the original exportinfo pointed to by exi 459 * will be released by the caller, comon_dispatch. 460 */ 461 if (publicfh_flag && exi != NULL) 462 exi_rele(exi); 463 464 /* 465 * If it's public fh, no 0x81, and client's flavor is 466 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 467 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 468 */ 469 if (auth_weak) 470 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 471 else 472 dr->dr_status = puterrno(error); 473 } 474 void * 475 rfs_lookup_getfh(struct nfsdiropargs *da) 476 { 477 return (da->da_fhandle); 478 } 479 480 /* 481 * Read symbolic link. 482 * Returns the string in the symbolic link at the given fhandle. 483 */ 484 /* ARGSUSED */ 485 void 486 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 487 struct svc_req *req, cred_t *cr) 488 { 489 int error; 490 struct iovec iov; 491 struct uio uio; 492 vnode_t *vp; 493 struct vattr va; 494 struct sockaddr *ca; 495 char *name = NULL; 496 int is_referral = 0; 497 498 vp = nfs_fhtovp(fhp, exi); 499 if (vp == NULL) { 500 rl->rl_data = NULL; 501 rl->rl_status = NFSERR_STALE; 502 return; 503 } 504 505 va.va_mask = AT_MODE; 506 507 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 508 509 if (error) { 510 VN_RELE(vp); 511 rl->rl_data = NULL; 512 rl->rl_status = puterrno(error); 513 return; 514 } 515 516 if (MANDLOCK(vp, va.va_mode)) { 517 VN_RELE(vp); 518 rl->rl_data = NULL; 519 rl->rl_status = NFSERR_ACCES; 520 return; 521 } 522 523 /* We lied about the object type for a referral */ 524 if (vn_is_nfs_reparse(vp, cr)) 525 is_referral = 1; 526 527 /* 528 * XNFS and RFC1094 require us to return ENXIO if argument 529 * is not a link. BUGID 1138002. 530 */ 531 if (vp->v_type != VLNK && !is_referral) { 532 VN_RELE(vp); 533 rl->rl_data = NULL; 534 rl->rl_status = NFSERR_NXIO; 535 return; 536 } 537 538 /* 539 * Allocate data for pathname. This will be freed by rfs_rlfree. 540 */ 541 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 542 543 if (is_referral) { 544 char *s; 545 size_t strsz; 546 547 /* Get an artificial symlink based on a referral */ 548 s = build_symlink(vp, cr, &strsz); 549 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 550 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 551 vnode_t *, vp, char *, s); 552 if (s == NULL) 553 error = EINVAL; 554 else { 555 error = 0; 556 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 557 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 558 kmem_free(s, strsz); 559 } 560 561 } else { 562 563 /* 564 * Set up io vector to read sym link data 565 */ 566 iov.iov_base = rl->rl_data; 567 iov.iov_len = NFS_MAXPATHLEN; 568 uio.uio_iov = &iov; 569 uio.uio_iovcnt = 1; 570 uio.uio_segflg = UIO_SYSSPACE; 571 uio.uio_extflg = UIO_COPY_CACHED; 572 uio.uio_loffset = (offset_t)0; 573 uio.uio_resid = NFS_MAXPATHLEN; 574 575 /* 576 * Do the readlink. 577 */ 578 error = VOP_READLINK(vp, &uio, cr, NULL); 579 580 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 581 582 if (!error) 583 rl->rl_data[rl->rl_count] = '\0'; 584 585 } 586 587 588 VN_RELE(vp); 589 590 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 591 name = nfscmd_convname(ca, exi, rl->rl_data, 592 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 593 594 if (name != NULL && name != rl->rl_data) { 595 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 596 rl->rl_data = name; 597 } 598 599 /* 600 * XNFS and RFC1094 require us to return ENXIO if argument 601 * is not a link. UFS returns EINVAL if this is the case, 602 * so we do the mapping here. BUGID 1138002. 603 */ 604 if (error == EINVAL) 605 rl->rl_status = NFSERR_NXIO; 606 else 607 rl->rl_status = puterrno(error); 608 609 } 610 void * 611 rfs_readlink_getfh(fhandle_t *fhp) 612 { 613 return (fhp); 614 } 615 /* 616 * Free data allocated by rfs_readlink 617 */ 618 void 619 rfs_rlfree(struct nfsrdlnres *rl) 620 { 621 if (rl->rl_data != NULL) 622 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 623 } 624 625 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 626 627 /* 628 * Read data. 629 * Returns some data read from the file at the given fhandle. 630 */ 631 /* ARGSUSED */ 632 void 633 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 634 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 635 { 636 vnode_t *vp; 637 int error; 638 struct vattr va; 639 struct iovec iov; 640 struct uio uio; 641 mblk_t *mp; 642 int alloc_err = 0; 643 int in_crit = 0; 644 caller_context_t ct; 645 646 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 647 if (vp == NULL) { 648 rr->rr_data = NULL; 649 rr->rr_status = NFSERR_STALE; 650 return; 651 } 652 653 if (vp->v_type != VREG) { 654 VN_RELE(vp); 655 rr->rr_data = NULL; 656 rr->rr_status = NFSERR_ISDIR; 657 return; 658 } 659 660 ct.cc_sysid = 0; 661 ct.cc_pid = 0; 662 ct.cc_caller_id = nfs2_srv_caller_id; 663 ct.cc_flags = CC_DONTBLOCK; 664 665 /* 666 * Enter the critical region before calling VOP_RWLOCK 667 * to avoid a deadlock with write requests. 668 */ 669 if (nbl_need_check(vp)) { 670 nbl_start_crit(vp, RW_READER); 671 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 672 0, NULL)) { 673 nbl_end_crit(vp); 674 VN_RELE(vp); 675 rr->rr_data = NULL; 676 rr->rr_status = NFSERR_ACCES; 677 return; 678 } 679 in_crit = 1; 680 } 681 682 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 683 684 /* check if a monitor detected a delegation conflict */ 685 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 686 VN_RELE(vp); 687 /* mark as wouldblock so response is dropped */ 688 curthread->t_flag |= T_WOULDBLOCK; 689 690 rr->rr_data = NULL; 691 return; 692 } 693 694 va.va_mask = AT_ALL; 695 696 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 697 698 if (error) { 699 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 700 if (in_crit) 701 nbl_end_crit(vp); 702 703 VN_RELE(vp); 704 rr->rr_data = NULL; 705 rr->rr_status = puterrno(error); 706 707 return; 708 } 709 710 /* 711 * This is a kludge to allow reading of files created 712 * with no read permission. The owner of the file 713 * is always allowed to read it. 714 */ 715 if (crgetuid(cr) != va.va_uid) { 716 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 717 718 if (error) { 719 /* 720 * Exec is the same as read over the net because 721 * of demand loading. 722 */ 723 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 724 } 725 if (error) { 726 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 727 if (in_crit) 728 nbl_end_crit(vp); 729 VN_RELE(vp); 730 rr->rr_data = NULL; 731 rr->rr_status = puterrno(error); 732 733 return; 734 } 735 } 736 737 if (MANDLOCK(vp, va.va_mode)) { 738 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 739 if (in_crit) 740 nbl_end_crit(vp); 741 742 VN_RELE(vp); 743 rr->rr_data = NULL; 744 rr->rr_status = NFSERR_ACCES; 745 746 return; 747 } 748 749 rr->rr_ok.rrok_wlist_len = 0; 750 rr->rr_ok.rrok_wlist = NULL; 751 752 if ((u_offset_t)ra->ra_offset >= va.va_size) { 753 rr->rr_count = 0; 754 rr->rr_data = NULL; 755 /* 756 * In this case, status is NFS_OK, but there is no data 757 * to encode. So set rr_mp to NULL. 758 */ 759 rr->rr_mp = NULL; 760 rr->rr_ok.rrok_wlist = ra->ra_wlist; 761 if (rr->rr_ok.rrok_wlist) 762 clist_zero_len(rr->rr_ok.rrok_wlist); 763 goto done; 764 } 765 766 if (ra->ra_wlist) { 767 mp = NULL; 768 rr->rr_mp = NULL; 769 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 770 if (ra->ra_count > iov.iov_len) { 771 rr->rr_data = NULL; 772 rr->rr_status = NFSERR_INVAL; 773 goto done; 774 } 775 } else { 776 /* 777 * mp will contain the data to be sent out in the read reply. 778 * This will be freed after the reply has been sent out (by the 779 * driver). 780 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 781 * that the call to xdrmblk_putmblk() never fails. 782 */ 783 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 784 &alloc_err); 785 ASSERT(mp != NULL); 786 ASSERT(alloc_err == 0); 787 788 rr->rr_mp = mp; 789 790 /* 791 * Set up io vector 792 */ 793 iov.iov_base = (caddr_t)mp->b_datap->db_base; 794 iov.iov_len = ra->ra_count; 795 } 796 797 uio.uio_iov = &iov; 798 uio.uio_iovcnt = 1; 799 uio.uio_segflg = UIO_SYSSPACE; 800 uio.uio_extflg = UIO_COPY_CACHED; 801 uio.uio_loffset = (offset_t)ra->ra_offset; 802 uio.uio_resid = ra->ra_count; 803 804 error = VOP_READ(vp, &uio, 0, cr, &ct); 805 806 if (error) { 807 if (mp) 808 freeb(mp); 809 810 /* 811 * check if a monitor detected a delegation conflict and 812 * mark as wouldblock so response is dropped 813 */ 814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 815 curthread->t_flag |= T_WOULDBLOCK; 816 else 817 rr->rr_status = puterrno(error); 818 819 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 820 if (in_crit) 821 nbl_end_crit(vp); 822 823 VN_RELE(vp); 824 rr->rr_data = NULL; 825 826 return; 827 } 828 829 /* 830 * Get attributes again so we can send the latest access 831 * time to the client side for his cache. 832 */ 833 va.va_mask = AT_ALL; 834 835 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 836 837 if (error) { 838 if (mp) 839 freeb(mp); 840 841 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 842 if (in_crit) 843 nbl_end_crit(vp); 844 845 VN_RELE(vp); 846 rr->rr_data = NULL; 847 rr->rr_status = puterrno(error); 848 849 return; 850 } 851 852 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 853 854 if (mp) { 855 rr->rr_data = (char *)mp->b_datap->db_base; 856 } else { 857 if (ra->ra_wlist) { 858 rr->rr_data = (caddr_t)iov.iov_base; 859 if (!rdma_setup_read_data2(ra, rr)) { 860 rr->rr_data = NULL; 861 rr->rr_status = puterrno(NFSERR_INVAL); 862 } 863 } 864 } 865 done: 866 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 867 if (in_crit) 868 nbl_end_crit(vp); 869 870 acl_perm(vp, exi, &va, cr); 871 872 /* check for overflows */ 873 error = vattr_to_nattr(&va, &rr->rr_attr); 874 875 VN_RELE(vp); 876 877 rr->rr_status = puterrno(error); 878 } 879 880 /* 881 * Free data allocated by rfs_read 882 */ 883 void 884 rfs_rdfree(struct nfsrdresult *rr) 885 { 886 mblk_t *mp; 887 888 if (rr->rr_status == NFS_OK) { 889 mp = rr->rr_mp; 890 if (mp != NULL) 891 freeb(mp); 892 } 893 } 894 895 void * 896 rfs_read_getfh(struct nfsreadargs *ra) 897 { 898 return (&ra->ra_fhandle); 899 } 900 901 #define MAX_IOVECS 12 902 903 #ifdef DEBUG 904 static int rfs_write_sync_hits = 0; 905 static int rfs_write_sync_misses = 0; 906 #endif 907 908 /* 909 * Write data to file. 910 * Returns attributes of a file after writing some data to it. 911 * 912 * Any changes made here, especially in error handling might have 913 * to also be done in rfs_write (which clusters write requests). 914 */ 915 void 916 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 917 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 918 { 919 int error; 920 vnode_t *vp; 921 rlim64_t rlimit; 922 struct vattr va; 923 struct uio uio; 924 struct iovec iov[MAX_IOVECS]; 925 mblk_t *m; 926 struct iovec *iovp; 927 int iovcnt; 928 cred_t *savecred; 929 int in_crit = 0; 930 caller_context_t ct; 931 932 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 933 if (vp == NULL) { 934 ns->ns_status = NFSERR_STALE; 935 return; 936 } 937 938 if (rdonly(exi, vp, req)) { 939 VN_RELE(vp); 940 ns->ns_status = NFSERR_ROFS; 941 return; 942 } 943 944 if (vp->v_type != VREG) { 945 VN_RELE(vp); 946 ns->ns_status = NFSERR_ISDIR; 947 return; 948 } 949 950 ct.cc_sysid = 0; 951 ct.cc_pid = 0; 952 ct.cc_caller_id = nfs2_srv_caller_id; 953 ct.cc_flags = CC_DONTBLOCK; 954 955 va.va_mask = AT_UID|AT_MODE; 956 957 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 958 959 if (error) { 960 VN_RELE(vp); 961 ns->ns_status = puterrno(error); 962 963 return; 964 } 965 966 if (crgetuid(cr) != va.va_uid) { 967 /* 968 * This is a kludge to allow writes of files created 969 * with read only permission. The owner of the file 970 * is always allowed to write it. 971 */ 972 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 973 974 if (error) { 975 VN_RELE(vp); 976 ns->ns_status = puterrno(error); 977 return; 978 } 979 } 980 981 /* 982 * Can't access a mandatory lock file. This might cause 983 * the NFS service thread to block forever waiting for a 984 * lock to be released that will never be released. 985 */ 986 if (MANDLOCK(vp, va.va_mode)) { 987 VN_RELE(vp); 988 ns->ns_status = NFSERR_ACCES; 989 return; 990 } 991 992 /* 993 * We have to enter the critical region before calling VOP_RWLOCK 994 * to avoid a deadlock with ufs. 995 */ 996 if (nbl_need_check(vp)) { 997 nbl_start_crit(vp, RW_READER); 998 in_crit = 1; 999 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1000 wa->wa_count, 0, NULL)) { 1001 error = EACCES; 1002 goto out; 1003 } 1004 } 1005 1006 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1007 1008 /* check if a monitor detected a delegation conflict */ 1009 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1010 VN_RELE(vp); 1011 /* mark as wouldblock so response is dropped */ 1012 curthread->t_flag |= T_WOULDBLOCK; 1013 return; 1014 } 1015 1016 if (wa->wa_data || wa->wa_rlist) { 1017 /* Do the RDMA thing if necessary */ 1018 if (wa->wa_rlist) { 1019 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1020 iov[0].iov_len = wa->wa_count; 1021 } else { 1022 iov[0].iov_base = wa->wa_data; 1023 iov[0].iov_len = wa->wa_count; 1024 } 1025 uio.uio_iov = iov; 1026 uio.uio_iovcnt = 1; 1027 uio.uio_segflg = UIO_SYSSPACE; 1028 uio.uio_extflg = UIO_COPY_DEFAULT; 1029 uio.uio_loffset = (offset_t)wa->wa_offset; 1030 uio.uio_resid = wa->wa_count; 1031 /* 1032 * The limit is checked on the client. We 1033 * should allow any size writes here. 1034 */ 1035 uio.uio_llimit = curproc->p_fsz_ctl; 1036 rlimit = uio.uio_llimit - wa->wa_offset; 1037 if (rlimit < (rlim64_t)uio.uio_resid) 1038 uio.uio_resid = (uint_t)rlimit; 1039 1040 /* 1041 * for now we assume no append mode 1042 */ 1043 /* 1044 * We're changing creds because VM may fault and we need 1045 * the cred of the current thread to be used if quota 1046 * checking is enabled. 1047 */ 1048 savecred = curthread->t_cred; 1049 curthread->t_cred = cr; 1050 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1051 curthread->t_cred = savecred; 1052 } else { 1053 iovcnt = 0; 1054 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1055 iovcnt++; 1056 if (iovcnt <= MAX_IOVECS) { 1057 #ifdef DEBUG 1058 rfs_write_sync_hits++; 1059 #endif 1060 iovp = iov; 1061 } else { 1062 #ifdef DEBUG 1063 rfs_write_sync_misses++; 1064 #endif 1065 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1066 } 1067 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1068 uio.uio_iov = iovp; 1069 uio.uio_iovcnt = iovcnt; 1070 uio.uio_segflg = UIO_SYSSPACE; 1071 uio.uio_extflg = UIO_COPY_DEFAULT; 1072 uio.uio_loffset = (offset_t)wa->wa_offset; 1073 uio.uio_resid = wa->wa_count; 1074 /* 1075 * The limit is checked on the client. We 1076 * should allow any size writes here. 1077 */ 1078 uio.uio_llimit = curproc->p_fsz_ctl; 1079 rlimit = uio.uio_llimit - wa->wa_offset; 1080 if (rlimit < (rlim64_t)uio.uio_resid) 1081 uio.uio_resid = (uint_t)rlimit; 1082 1083 /* 1084 * For now we assume no append mode. 1085 */ 1086 /* 1087 * We're changing creds because VM may fault and we need 1088 * the cred of the current thread to be used if quota 1089 * checking is enabled. 1090 */ 1091 savecred = curthread->t_cred; 1092 curthread->t_cred = cr; 1093 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1094 curthread->t_cred = savecred; 1095 1096 if (iovp != iov) 1097 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1098 } 1099 1100 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1101 1102 if (!error) { 1103 /* 1104 * Get attributes again so we send the latest mod 1105 * time to the client side for his cache. 1106 */ 1107 va.va_mask = AT_ALL; /* now we want everything */ 1108 1109 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1110 1111 /* check for overflows */ 1112 if (!error) { 1113 acl_perm(vp, exi, &va, cr); 1114 error = vattr_to_nattr(&va, &ns->ns_attr); 1115 } 1116 } 1117 1118 out: 1119 if (in_crit) 1120 nbl_end_crit(vp); 1121 VN_RELE(vp); 1122 1123 /* check if a monitor detected a delegation conflict */ 1124 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1125 /* mark as wouldblock so response is dropped */ 1126 curthread->t_flag |= T_WOULDBLOCK; 1127 else 1128 ns->ns_status = puterrno(error); 1129 1130 } 1131 1132 struct rfs_async_write { 1133 struct nfswriteargs *wa; 1134 struct nfsattrstat *ns; 1135 struct svc_req *req; 1136 cred_t *cr; 1137 kthread_t *thread; 1138 struct rfs_async_write *list; 1139 }; 1140 1141 struct rfs_async_write_list { 1142 fhandle_t *fhp; 1143 kcondvar_t cv; 1144 struct rfs_async_write *list; 1145 struct rfs_async_write_list *next; 1146 }; 1147 1148 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1149 static kmutex_t rfs_async_write_lock; 1150 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1151 1152 #define MAXCLIOVECS 42 1153 #define RFSWRITE_INITVAL (enum nfsstat) -1 1154 1155 #ifdef DEBUG 1156 static int rfs_write_hits = 0; 1157 static int rfs_write_misses = 0; 1158 #endif 1159 1160 /* 1161 * Write data to file. 1162 * Returns attributes of a file after writing some data to it. 1163 */ 1164 void 1165 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1166 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1167 { 1168 int error; 1169 vnode_t *vp; 1170 rlim64_t rlimit; 1171 struct vattr va; 1172 struct uio uio; 1173 struct rfs_async_write_list *lp; 1174 struct rfs_async_write_list *nlp; 1175 struct rfs_async_write *rp; 1176 struct rfs_async_write *nrp; 1177 struct rfs_async_write *trp; 1178 struct rfs_async_write *lrp; 1179 int data_written; 1180 int iovcnt; 1181 mblk_t *m; 1182 struct iovec *iovp; 1183 struct iovec *niovp; 1184 struct iovec iov[MAXCLIOVECS]; 1185 int count; 1186 int rcount; 1187 uint_t off; 1188 uint_t len; 1189 struct rfs_async_write nrpsp; 1190 struct rfs_async_write_list nlpsp; 1191 ushort_t t_flag; 1192 cred_t *savecred; 1193 int in_crit = 0; 1194 caller_context_t ct; 1195 1196 if (!rfs_write_async) { 1197 rfs_write_sync(wa, ns, exi, req, cr); 1198 return; 1199 } 1200 1201 /* 1202 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1203 * is considered an OK. 1204 */ 1205 ns->ns_status = RFSWRITE_INITVAL; 1206 1207 nrp = &nrpsp; 1208 nrp->wa = wa; 1209 nrp->ns = ns; 1210 nrp->req = req; 1211 nrp->cr = cr; 1212 nrp->thread = curthread; 1213 1214 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1215 1216 /* 1217 * Look to see if there is already a cluster started 1218 * for this file. 1219 */ 1220 mutex_enter(&rfs_async_write_lock); 1221 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1222 if (bcmp(&wa->wa_fhandle, lp->fhp, 1223 sizeof (fhandle_t)) == 0) 1224 break; 1225 } 1226 1227 /* 1228 * If lp is non-NULL, then there is already a cluster 1229 * started. We need to place ourselves in the cluster 1230 * list in the right place as determined by starting 1231 * offset. Conflicts with non-blocking mandatory locked 1232 * regions will be checked when the cluster is processed. 1233 */ 1234 if (lp != NULL) { 1235 rp = lp->list; 1236 trp = NULL; 1237 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1238 trp = rp; 1239 rp = rp->list; 1240 } 1241 nrp->list = rp; 1242 if (trp == NULL) 1243 lp->list = nrp; 1244 else 1245 trp->list = nrp; 1246 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1247 cv_wait(&lp->cv, &rfs_async_write_lock); 1248 mutex_exit(&rfs_async_write_lock); 1249 1250 return; 1251 } 1252 1253 /* 1254 * No cluster started yet, start one and add ourselves 1255 * to the list of clusters. 1256 */ 1257 nrp->list = NULL; 1258 1259 nlp = &nlpsp; 1260 nlp->fhp = &wa->wa_fhandle; 1261 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1262 nlp->list = nrp; 1263 nlp->next = NULL; 1264 1265 if (rfs_async_write_head == NULL) { 1266 rfs_async_write_head = nlp; 1267 } else { 1268 lp = rfs_async_write_head; 1269 while (lp->next != NULL) 1270 lp = lp->next; 1271 lp->next = nlp; 1272 } 1273 mutex_exit(&rfs_async_write_lock); 1274 1275 /* 1276 * Convert the file handle common to all of the requests 1277 * in this cluster to a vnode. 1278 */ 1279 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1280 if (vp == NULL) { 1281 mutex_enter(&rfs_async_write_lock); 1282 if (rfs_async_write_head == nlp) 1283 rfs_async_write_head = nlp->next; 1284 else { 1285 lp = rfs_async_write_head; 1286 while (lp->next != nlp) 1287 lp = lp->next; 1288 lp->next = nlp->next; 1289 } 1290 t_flag = curthread->t_flag & T_WOULDBLOCK; 1291 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1292 rp->ns->ns_status = NFSERR_STALE; 1293 rp->thread->t_flag |= t_flag; 1294 } 1295 cv_broadcast(&nlp->cv); 1296 mutex_exit(&rfs_async_write_lock); 1297 1298 return; 1299 } 1300 1301 /* 1302 * Can only write regular files. Attempts to write any 1303 * other file types fail with EISDIR. 1304 */ 1305 if (vp->v_type != VREG) { 1306 VN_RELE(vp); 1307 mutex_enter(&rfs_async_write_lock); 1308 if (rfs_async_write_head == nlp) 1309 rfs_async_write_head = nlp->next; 1310 else { 1311 lp = rfs_async_write_head; 1312 while (lp->next != nlp) 1313 lp = lp->next; 1314 lp->next = nlp->next; 1315 } 1316 t_flag = curthread->t_flag & T_WOULDBLOCK; 1317 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1318 rp->ns->ns_status = NFSERR_ISDIR; 1319 rp->thread->t_flag |= t_flag; 1320 } 1321 cv_broadcast(&nlp->cv); 1322 mutex_exit(&rfs_async_write_lock); 1323 1324 return; 1325 } 1326 1327 /* 1328 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1329 * deadlock with ufs. 1330 */ 1331 if (nbl_need_check(vp)) { 1332 nbl_start_crit(vp, RW_READER); 1333 in_crit = 1; 1334 } 1335 1336 ct.cc_sysid = 0; 1337 ct.cc_pid = 0; 1338 ct.cc_caller_id = nfs2_srv_caller_id; 1339 ct.cc_flags = CC_DONTBLOCK; 1340 1341 /* 1342 * Lock the file for writing. This operation provides 1343 * the delay which allows clusters to grow. 1344 */ 1345 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1346 1347 /* check if a monitor detected a delegation conflict */ 1348 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1349 if (in_crit) 1350 nbl_end_crit(vp); 1351 VN_RELE(vp); 1352 /* mark as wouldblock so response is dropped */ 1353 curthread->t_flag |= T_WOULDBLOCK; 1354 mutex_enter(&rfs_async_write_lock); 1355 if (rfs_async_write_head == nlp) 1356 rfs_async_write_head = nlp->next; 1357 else { 1358 lp = rfs_async_write_head; 1359 while (lp->next != nlp) 1360 lp = lp->next; 1361 lp->next = nlp->next; 1362 } 1363 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1364 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1365 rp->ns->ns_status = puterrno(error); 1366 rp->thread->t_flag |= T_WOULDBLOCK; 1367 } 1368 } 1369 cv_broadcast(&nlp->cv); 1370 mutex_exit(&rfs_async_write_lock); 1371 1372 return; 1373 } 1374 1375 /* 1376 * Disconnect this cluster from the list of clusters. 1377 * The cluster that is being dealt with must be fixed 1378 * in size after this point, so there is no reason 1379 * to leave it on the list so that new requests can 1380 * find it. 1381 * 1382 * The algorithm is that the first write request will 1383 * create a cluster, convert the file handle to a 1384 * vnode pointer, and then lock the file for writing. 1385 * This request is not likely to be clustered with 1386 * any others. However, the next request will create 1387 * a new cluster and be blocked in VOP_RWLOCK while 1388 * the first request is being processed. This delay 1389 * will allow more requests to be clustered in this 1390 * second cluster. 1391 */ 1392 mutex_enter(&rfs_async_write_lock); 1393 if (rfs_async_write_head == nlp) 1394 rfs_async_write_head = nlp->next; 1395 else { 1396 lp = rfs_async_write_head; 1397 while (lp->next != nlp) 1398 lp = lp->next; 1399 lp->next = nlp->next; 1400 } 1401 mutex_exit(&rfs_async_write_lock); 1402 1403 /* 1404 * Step through the list of requests in this cluster. 1405 * We need to check permissions to make sure that all 1406 * of the requests have sufficient permission to write 1407 * the file. A cluster can be composed of requests 1408 * from different clients and different users on each 1409 * client. 1410 * 1411 * As a side effect, we also calculate the size of the 1412 * byte range that this cluster encompasses. 1413 */ 1414 rp = nlp->list; 1415 off = rp->wa->wa_offset; 1416 len = (uint_t)0; 1417 do { 1418 if (rdonly(exi, vp, rp->req)) { 1419 rp->ns->ns_status = NFSERR_ROFS; 1420 t_flag = curthread->t_flag & T_WOULDBLOCK; 1421 rp->thread->t_flag |= t_flag; 1422 continue; 1423 } 1424 1425 va.va_mask = AT_UID|AT_MODE; 1426 1427 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1428 1429 if (!error) { 1430 if (crgetuid(rp->cr) != va.va_uid) { 1431 /* 1432 * This is a kludge to allow writes of files 1433 * created with read only permission. The 1434 * owner of the file is always allowed to 1435 * write it. 1436 */ 1437 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1438 } 1439 if (!error && MANDLOCK(vp, va.va_mode)) 1440 error = EACCES; 1441 } 1442 1443 /* 1444 * Check for a conflict with a nbmand-locked region. 1445 */ 1446 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1447 rp->wa->wa_count, 0, NULL)) { 1448 error = EACCES; 1449 } 1450 1451 if (error) { 1452 rp->ns->ns_status = puterrno(error); 1453 t_flag = curthread->t_flag & T_WOULDBLOCK; 1454 rp->thread->t_flag |= t_flag; 1455 continue; 1456 } 1457 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1458 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1459 } while ((rp = rp->list) != NULL); 1460 1461 /* 1462 * Step through the cluster attempting to gather as many 1463 * requests which are contiguous as possible. These 1464 * contiguous requests are handled via one call to VOP_WRITE 1465 * instead of different calls to VOP_WRITE. We also keep 1466 * track of the fact that any data was written. 1467 */ 1468 rp = nlp->list; 1469 data_written = 0; 1470 do { 1471 /* 1472 * Skip any requests which are already marked as having an 1473 * error. 1474 */ 1475 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1476 rp = rp->list; 1477 continue; 1478 } 1479 1480 /* 1481 * Count the number of iovec's which are required 1482 * to handle this set of requests. One iovec is 1483 * needed for each data buffer, whether addressed 1484 * by wa_data or by the b_rptr pointers in the 1485 * mblk chains. 1486 */ 1487 iovcnt = 0; 1488 lrp = rp; 1489 for (;;) { 1490 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1491 iovcnt++; 1492 else { 1493 m = lrp->wa->wa_mblk; 1494 while (m != NULL) { 1495 iovcnt++; 1496 m = m->b_cont; 1497 } 1498 } 1499 if (lrp->list == NULL || 1500 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1501 lrp->wa->wa_offset + lrp->wa->wa_count != 1502 lrp->list->wa->wa_offset) { 1503 lrp = lrp->list; 1504 break; 1505 } 1506 lrp = lrp->list; 1507 } 1508 1509 if (iovcnt <= MAXCLIOVECS) { 1510 #ifdef DEBUG 1511 rfs_write_hits++; 1512 #endif 1513 niovp = iov; 1514 } else { 1515 #ifdef DEBUG 1516 rfs_write_misses++; 1517 #endif 1518 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1519 } 1520 /* 1521 * Put together the scatter/gather iovecs. 1522 */ 1523 iovp = niovp; 1524 trp = rp; 1525 count = 0; 1526 do { 1527 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1528 if (trp->wa->wa_rlist) { 1529 iovp->iov_base = 1530 (char *)((trp->wa->wa_rlist)-> 1531 u.c_daddr3); 1532 iovp->iov_len = trp->wa->wa_count; 1533 } else { 1534 iovp->iov_base = trp->wa->wa_data; 1535 iovp->iov_len = trp->wa->wa_count; 1536 } 1537 iovp++; 1538 } else { 1539 m = trp->wa->wa_mblk; 1540 rcount = trp->wa->wa_count; 1541 while (m != NULL) { 1542 iovp->iov_base = (caddr_t)m->b_rptr; 1543 iovp->iov_len = (m->b_wptr - m->b_rptr); 1544 rcount -= iovp->iov_len; 1545 if (rcount < 0) 1546 iovp->iov_len += rcount; 1547 iovp++; 1548 if (rcount <= 0) 1549 break; 1550 m = m->b_cont; 1551 } 1552 } 1553 count += trp->wa->wa_count; 1554 trp = trp->list; 1555 } while (trp != lrp); 1556 1557 uio.uio_iov = niovp; 1558 uio.uio_iovcnt = iovcnt; 1559 uio.uio_segflg = UIO_SYSSPACE; 1560 uio.uio_extflg = UIO_COPY_DEFAULT; 1561 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1562 uio.uio_resid = count; 1563 /* 1564 * The limit is checked on the client. We 1565 * should allow any size writes here. 1566 */ 1567 uio.uio_llimit = curproc->p_fsz_ctl; 1568 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1569 if (rlimit < (rlim64_t)uio.uio_resid) 1570 uio.uio_resid = (uint_t)rlimit; 1571 1572 /* 1573 * For now we assume no append mode. 1574 */ 1575 1576 /* 1577 * We're changing creds because VM may fault 1578 * and we need the cred of the current 1579 * thread to be used if quota * checking is 1580 * enabled. 1581 */ 1582 savecred = curthread->t_cred; 1583 curthread->t_cred = cr; 1584 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1585 curthread->t_cred = savecred; 1586 1587 /* check if a monitor detected a delegation conflict */ 1588 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1589 /* mark as wouldblock so response is dropped */ 1590 curthread->t_flag |= T_WOULDBLOCK; 1591 1592 if (niovp != iov) 1593 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1594 1595 if (!error) { 1596 data_written = 1; 1597 /* 1598 * Get attributes again so we send the latest mod 1599 * time to the client side for his cache. 1600 */ 1601 va.va_mask = AT_ALL; /* now we want everything */ 1602 1603 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1604 1605 if (!error) 1606 acl_perm(vp, exi, &va, rp->cr); 1607 } 1608 1609 /* 1610 * Fill in the status responses for each request 1611 * which was just handled. Also, copy the latest 1612 * attributes in to the attribute responses if 1613 * appropriate. 1614 */ 1615 t_flag = curthread->t_flag & T_WOULDBLOCK; 1616 do { 1617 rp->thread->t_flag |= t_flag; 1618 /* check for overflows */ 1619 if (!error) { 1620 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1621 } 1622 rp->ns->ns_status = puterrno(error); 1623 rp = rp->list; 1624 } while (rp != lrp); 1625 } while (rp != NULL); 1626 1627 /* 1628 * If any data was written at all, then we need to flush 1629 * the data and metadata to stable storage. 1630 */ 1631 if (data_written) { 1632 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1633 1634 if (!error) { 1635 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1636 } 1637 } 1638 1639 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1640 1641 if (in_crit) 1642 nbl_end_crit(vp); 1643 VN_RELE(vp); 1644 1645 t_flag = curthread->t_flag & T_WOULDBLOCK; 1646 mutex_enter(&rfs_async_write_lock); 1647 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1648 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1649 rp->ns->ns_status = puterrno(error); 1650 rp->thread->t_flag |= t_flag; 1651 } 1652 } 1653 cv_broadcast(&nlp->cv); 1654 mutex_exit(&rfs_async_write_lock); 1655 1656 } 1657 1658 void * 1659 rfs_write_getfh(struct nfswriteargs *wa) 1660 { 1661 return (&wa->wa_fhandle); 1662 } 1663 1664 /* 1665 * Create a file. 1666 * Creates a file with given attributes and returns those attributes 1667 * and an fhandle for the new file. 1668 */ 1669 void 1670 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1671 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1672 { 1673 int error; 1674 int lookuperr; 1675 int in_crit = 0; 1676 struct vattr va; 1677 vnode_t *vp; 1678 vnode_t *realvp; 1679 vnode_t *dvp; 1680 char *name = args->ca_da.da_name; 1681 vnode_t *tvp = NULL; 1682 int mode; 1683 int lookup_ok; 1684 bool_t trunc; 1685 struct sockaddr *ca; 1686 1687 /* 1688 * Disallow NULL paths 1689 */ 1690 if (name == NULL || *name == '\0') { 1691 dr->dr_status = NFSERR_ACCES; 1692 return; 1693 } 1694 1695 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1696 if (dvp == NULL) { 1697 dr->dr_status = NFSERR_STALE; 1698 return; 1699 } 1700 1701 error = sattr_to_vattr(args->ca_sa, &va); 1702 if (error) { 1703 dr->dr_status = puterrno(error); 1704 return; 1705 } 1706 1707 /* 1708 * Must specify the mode. 1709 */ 1710 if (!(va.va_mask & AT_MODE)) { 1711 VN_RELE(dvp); 1712 dr->dr_status = NFSERR_INVAL; 1713 return; 1714 } 1715 1716 /* 1717 * This is a completely gross hack to make mknod 1718 * work over the wire until we can wack the protocol 1719 */ 1720 if ((va.va_mode & IFMT) == IFCHR) { 1721 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1722 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1723 else { 1724 va.va_type = VCHR; 1725 /* 1726 * uncompress the received dev_t 1727 * if the top half is zero indicating a request 1728 * from an `older style' OS. 1729 */ 1730 if ((va.va_size & 0xffff0000) == 0) 1731 va.va_rdev = nfsv2_expdev(va.va_size); 1732 else 1733 va.va_rdev = (dev_t)va.va_size; 1734 } 1735 va.va_mask &= ~AT_SIZE; 1736 } else if ((va.va_mode & IFMT) == IFBLK) { 1737 va.va_type = VBLK; 1738 /* 1739 * uncompress the received dev_t 1740 * if the top half is zero indicating a request 1741 * from an `older style' OS. 1742 */ 1743 if ((va.va_size & 0xffff0000) == 0) 1744 va.va_rdev = nfsv2_expdev(va.va_size); 1745 else 1746 va.va_rdev = (dev_t)va.va_size; 1747 va.va_mask &= ~AT_SIZE; 1748 } else if ((va.va_mode & IFMT) == IFSOCK) { 1749 va.va_type = VSOCK; 1750 } else { 1751 va.va_type = VREG; 1752 } 1753 va.va_mode &= ~IFMT; 1754 va.va_mask |= AT_TYPE; 1755 1756 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1757 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1758 MAXPATHLEN); 1759 if (name == NULL) { 1760 dr->dr_status = puterrno(EINVAL); 1761 return; 1762 } 1763 1764 /* 1765 * Why was the choice made to use VWRITE as the mode to the 1766 * call to VOP_CREATE ? This results in a bug. When a client 1767 * opens a file that already exists and is RDONLY, the second 1768 * open fails with an EACESS because of the mode. 1769 * bug ID 1054648. 1770 */ 1771 lookup_ok = 0; 1772 mode = VWRITE; 1773 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1774 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1775 NULL, NULL, NULL); 1776 if (!error) { 1777 struct vattr at; 1778 1779 lookup_ok = 1; 1780 at.va_mask = AT_MODE; 1781 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1782 if (!error) 1783 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1784 VN_RELE(tvp); 1785 tvp = NULL; 1786 } 1787 } 1788 1789 if (!lookup_ok) { 1790 if (rdonly(exi, dvp, req)) { 1791 error = EROFS; 1792 } else if (va.va_type != VREG && va.va_type != VFIFO && 1793 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1794 error = EPERM; 1795 } else { 1796 error = 0; 1797 } 1798 } 1799 1800 /* 1801 * If file size is being modified on an already existing file 1802 * make sure that there are no conflicting non-blocking mandatory 1803 * locks in the region being manipulated. Return EACCES if there 1804 * are conflicting locks. 1805 */ 1806 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1807 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1808 NULL, NULL, NULL); 1809 1810 if (!lookuperr && 1811 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1812 VN_RELE(tvp); 1813 curthread->t_flag |= T_WOULDBLOCK; 1814 goto out; 1815 } 1816 1817 if (!lookuperr && nbl_need_check(tvp)) { 1818 /* 1819 * The file exists. Now check if it has any 1820 * conflicting non-blocking mandatory locks 1821 * in the region being changed. 1822 */ 1823 struct vattr bva; 1824 u_offset_t offset; 1825 ssize_t length; 1826 1827 nbl_start_crit(tvp, RW_READER); 1828 in_crit = 1; 1829 1830 bva.va_mask = AT_SIZE; 1831 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1832 if (!error) { 1833 if (va.va_size < bva.va_size) { 1834 offset = va.va_size; 1835 length = bva.va_size - va.va_size; 1836 } else { 1837 offset = bva.va_size; 1838 length = va.va_size - bva.va_size; 1839 } 1840 if (length) { 1841 if (nbl_conflict(tvp, NBL_WRITE, 1842 offset, length, 0, NULL)) { 1843 error = EACCES; 1844 } 1845 } 1846 } 1847 if (error) { 1848 nbl_end_crit(tvp); 1849 VN_RELE(tvp); 1850 in_crit = 0; 1851 } 1852 } else if (tvp != NULL) { 1853 VN_RELE(tvp); 1854 } 1855 } 1856 1857 if (!error) { 1858 /* 1859 * If filesystem is shared with nosuid the remove any 1860 * setuid/setgid bits on create. 1861 */ 1862 if (va.va_type == VREG && 1863 exi->exi_export.ex_flags & EX_NOSUID) 1864 va.va_mode &= ~(VSUID | VSGID); 1865 1866 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1867 NULL, NULL); 1868 1869 if (!error) { 1870 1871 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1872 trunc = TRUE; 1873 else 1874 trunc = FALSE; 1875 1876 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1877 VN_RELE(vp); 1878 curthread->t_flag |= T_WOULDBLOCK; 1879 goto out; 1880 } 1881 va.va_mask = AT_ALL; 1882 1883 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1884 1885 /* check for overflows */ 1886 if (!error) { 1887 acl_perm(vp, exi, &va, cr); 1888 error = vattr_to_nattr(&va, &dr->dr_attr); 1889 if (!error) { 1890 error = makefh(&dr->dr_fhandle, vp, 1891 exi); 1892 } 1893 } 1894 /* 1895 * Force modified metadata out to stable storage. 1896 * 1897 * if a underlying vp exists, pass it to VOP_FSYNC 1898 */ 1899 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1900 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1901 else 1902 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1903 VN_RELE(vp); 1904 } 1905 1906 if (in_crit) { 1907 nbl_end_crit(tvp); 1908 VN_RELE(tvp); 1909 } 1910 } 1911 1912 /* 1913 * Force modified data and metadata out to stable storage. 1914 */ 1915 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1916 1917 out: 1918 1919 VN_RELE(dvp); 1920 1921 dr->dr_status = puterrno(error); 1922 1923 if (name != args->ca_da.da_name) 1924 kmem_free(name, MAXPATHLEN); 1925 } 1926 void * 1927 rfs_create_getfh(struct nfscreatargs *args) 1928 { 1929 return (args->ca_da.da_fhandle); 1930 } 1931 1932 /* 1933 * Remove a file. 1934 * Remove named file from parent directory. 1935 */ 1936 void 1937 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1938 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1939 { 1940 int error = 0; 1941 vnode_t *vp; 1942 vnode_t *targvp; 1943 int in_crit = 0; 1944 1945 /* 1946 * Disallow NULL paths 1947 */ 1948 if (da->da_name == NULL || *da->da_name == '\0') { 1949 *status = NFSERR_ACCES; 1950 return; 1951 } 1952 1953 vp = nfs_fhtovp(da->da_fhandle, exi); 1954 if (vp == NULL) { 1955 *status = NFSERR_STALE; 1956 return; 1957 } 1958 1959 if (rdonly(exi, vp, req)) { 1960 VN_RELE(vp); 1961 *status = NFSERR_ROFS; 1962 return; 1963 } 1964 1965 /* 1966 * Check for a conflict with a non-blocking mandatory share reservation. 1967 */ 1968 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1969 NULL, cr, NULL, NULL, NULL); 1970 if (error != 0) { 1971 VN_RELE(vp); 1972 *status = puterrno(error); 1973 return; 1974 } 1975 1976 /* 1977 * If the file is delegated to an v4 client, then initiate 1978 * recall and drop this request (by setting T_WOULDBLOCK). 1979 * The client will eventually re-transmit the request and 1980 * (hopefully), by then, the v4 client will have returned 1981 * the delegation. 1982 */ 1983 1984 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1985 VN_RELE(vp); 1986 VN_RELE(targvp); 1987 curthread->t_flag |= T_WOULDBLOCK; 1988 return; 1989 } 1990 1991 if (nbl_need_check(targvp)) { 1992 nbl_start_crit(targvp, RW_READER); 1993 in_crit = 1; 1994 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1995 error = EACCES; 1996 goto out; 1997 } 1998 } 1999 2000 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2001 2002 /* 2003 * Force modified data and metadata out to stable storage. 2004 */ 2005 (void) VOP_FSYNC(vp, 0, cr, NULL); 2006 2007 out: 2008 if (in_crit) 2009 nbl_end_crit(targvp); 2010 VN_RELE(targvp); 2011 VN_RELE(vp); 2012 2013 *status = puterrno(error); 2014 2015 } 2016 2017 void * 2018 rfs_remove_getfh(struct nfsdiropargs *da) 2019 { 2020 return (da->da_fhandle); 2021 } 2022 2023 /* 2024 * rename a file 2025 * Give a file (from) a new name (to). 2026 */ 2027 void 2028 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2029 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2030 { 2031 int error = 0; 2032 vnode_t *fromvp; 2033 vnode_t *tovp; 2034 struct exportinfo *to_exi; 2035 fhandle_t *fh; 2036 vnode_t *srcvp; 2037 vnode_t *targvp; 2038 int in_crit = 0; 2039 2040 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2041 if (fromvp == NULL) { 2042 *status = NFSERR_STALE; 2043 return; 2044 } 2045 2046 fh = args->rna_to.da_fhandle; 2047 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2048 if (to_exi == NULL) { 2049 VN_RELE(fromvp); 2050 *status = NFSERR_ACCES; 2051 return; 2052 } 2053 exi_rele(to_exi); 2054 2055 if (to_exi != exi) { 2056 VN_RELE(fromvp); 2057 *status = NFSERR_XDEV; 2058 return; 2059 } 2060 2061 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2062 if (tovp == NULL) { 2063 VN_RELE(fromvp); 2064 *status = NFSERR_STALE; 2065 return; 2066 } 2067 2068 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2069 VN_RELE(tovp); 2070 VN_RELE(fromvp); 2071 *status = NFSERR_NOTDIR; 2072 return; 2073 } 2074 2075 /* 2076 * Disallow NULL paths 2077 */ 2078 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2079 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2080 VN_RELE(tovp); 2081 VN_RELE(fromvp); 2082 *status = NFSERR_ACCES; 2083 return; 2084 } 2085 2086 if (rdonly(exi, tovp, req)) { 2087 VN_RELE(tovp); 2088 VN_RELE(fromvp); 2089 *status = NFSERR_ROFS; 2090 return; 2091 } 2092 2093 /* 2094 * Check for a conflict with a non-blocking mandatory share reservation. 2095 */ 2096 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2097 NULL, cr, NULL, NULL, NULL); 2098 if (error != 0) { 2099 VN_RELE(tovp); 2100 VN_RELE(fromvp); 2101 *status = puterrno(error); 2102 return; 2103 } 2104 2105 /* Check for delegations on the source file */ 2106 2107 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2108 VN_RELE(tovp); 2109 VN_RELE(fromvp); 2110 VN_RELE(srcvp); 2111 curthread->t_flag |= T_WOULDBLOCK; 2112 return; 2113 } 2114 2115 /* Check for delegation on the file being renamed over, if it exists */ 2116 2117 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2118 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2119 NULL, NULL, NULL) == 0) { 2120 2121 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2122 VN_RELE(tovp); 2123 VN_RELE(fromvp); 2124 VN_RELE(srcvp); 2125 VN_RELE(targvp); 2126 curthread->t_flag |= T_WOULDBLOCK; 2127 return; 2128 } 2129 VN_RELE(targvp); 2130 } 2131 2132 2133 if (nbl_need_check(srcvp)) { 2134 nbl_start_crit(srcvp, RW_READER); 2135 in_crit = 1; 2136 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2137 error = EACCES; 2138 goto out; 2139 } 2140 } 2141 2142 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2143 tovp, args->rna_to.da_name, cr, NULL, 0); 2144 2145 if (error == 0) 2146 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2147 strlen(args->rna_to.da_name)); 2148 2149 /* 2150 * Force modified data and metadata out to stable storage. 2151 */ 2152 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2153 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2154 2155 out: 2156 if (in_crit) 2157 nbl_end_crit(srcvp); 2158 VN_RELE(srcvp); 2159 VN_RELE(tovp); 2160 VN_RELE(fromvp); 2161 2162 *status = puterrno(error); 2163 2164 } 2165 void * 2166 rfs_rename_getfh(struct nfsrnmargs *args) 2167 { 2168 return (args->rna_from.da_fhandle); 2169 } 2170 2171 /* 2172 * Link to a file. 2173 * Create a file (to) which is a hard link to the given file (from). 2174 */ 2175 void 2176 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2177 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2178 { 2179 int error; 2180 vnode_t *fromvp; 2181 vnode_t *tovp; 2182 struct exportinfo *to_exi; 2183 fhandle_t *fh; 2184 2185 fromvp = nfs_fhtovp(args->la_from, exi); 2186 if (fromvp == NULL) { 2187 *status = NFSERR_STALE; 2188 return; 2189 } 2190 2191 fh = args->la_to.da_fhandle; 2192 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2193 if (to_exi == NULL) { 2194 VN_RELE(fromvp); 2195 *status = NFSERR_ACCES; 2196 return; 2197 } 2198 exi_rele(to_exi); 2199 2200 if (to_exi != exi) { 2201 VN_RELE(fromvp); 2202 *status = NFSERR_XDEV; 2203 return; 2204 } 2205 2206 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2207 if (tovp == NULL) { 2208 VN_RELE(fromvp); 2209 *status = NFSERR_STALE; 2210 return; 2211 } 2212 2213 if (tovp->v_type != VDIR) { 2214 VN_RELE(tovp); 2215 VN_RELE(fromvp); 2216 *status = NFSERR_NOTDIR; 2217 return; 2218 } 2219 /* 2220 * Disallow NULL paths 2221 */ 2222 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2223 VN_RELE(tovp); 2224 VN_RELE(fromvp); 2225 *status = NFSERR_ACCES; 2226 return; 2227 } 2228 2229 if (rdonly(exi, tovp, req)) { 2230 VN_RELE(tovp); 2231 VN_RELE(fromvp); 2232 *status = NFSERR_ROFS; 2233 return; 2234 } 2235 2236 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2237 2238 /* 2239 * Force modified data and metadata out to stable storage. 2240 */ 2241 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2242 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2243 2244 VN_RELE(tovp); 2245 VN_RELE(fromvp); 2246 2247 *status = puterrno(error); 2248 2249 } 2250 void * 2251 rfs_link_getfh(struct nfslinkargs *args) 2252 { 2253 return (args->la_from); 2254 } 2255 2256 /* 2257 * Symbolicly link to a file. 2258 * Create a file (to) with the given attributes which is a symbolic link 2259 * to the given path name (to). 2260 */ 2261 void 2262 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2263 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2264 { 2265 int error; 2266 struct vattr va; 2267 vnode_t *vp; 2268 vnode_t *svp; 2269 int lerror; 2270 struct sockaddr *ca; 2271 char *name = NULL; 2272 2273 /* 2274 * Disallow NULL paths 2275 */ 2276 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2277 *status = NFSERR_ACCES; 2278 return; 2279 } 2280 2281 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2282 if (vp == NULL) { 2283 *status = NFSERR_STALE; 2284 return; 2285 } 2286 2287 if (rdonly(exi, vp, req)) { 2288 VN_RELE(vp); 2289 *status = NFSERR_ROFS; 2290 return; 2291 } 2292 2293 error = sattr_to_vattr(args->sla_sa, &va); 2294 if (error) { 2295 VN_RELE(vp); 2296 *status = puterrno(error); 2297 return; 2298 } 2299 2300 if (!(va.va_mask & AT_MODE)) { 2301 VN_RELE(vp); 2302 *status = NFSERR_INVAL; 2303 return; 2304 } 2305 2306 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2307 name = nfscmd_convname(ca, exi, args->sla_tnm, 2308 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2309 2310 if (name == NULL) { 2311 *status = NFSERR_ACCES; 2312 return; 2313 } 2314 2315 va.va_type = VLNK; 2316 va.va_mask |= AT_TYPE; 2317 2318 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2319 2320 /* 2321 * Force new data and metadata out to stable storage. 2322 */ 2323 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2324 NULL, cr, NULL, NULL, NULL); 2325 2326 if (!lerror) { 2327 (void) VOP_FSYNC(svp, 0, cr, NULL); 2328 VN_RELE(svp); 2329 } 2330 2331 /* 2332 * Force modified data and metadata out to stable storage. 2333 */ 2334 (void) VOP_FSYNC(vp, 0, cr, NULL); 2335 2336 VN_RELE(vp); 2337 2338 *status = puterrno(error); 2339 if (name != args->sla_tnm) 2340 kmem_free(name, MAXPATHLEN); 2341 2342 } 2343 void * 2344 rfs_symlink_getfh(struct nfsslargs *args) 2345 { 2346 return (args->sla_from.da_fhandle); 2347 } 2348 2349 /* 2350 * Make a directory. 2351 * Create a directory with the given name, parent directory, and attributes. 2352 * Returns a file handle and attributes for the new directory. 2353 */ 2354 void 2355 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2356 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2357 { 2358 int error; 2359 struct vattr va; 2360 vnode_t *dvp = NULL; 2361 vnode_t *vp; 2362 char *name = args->ca_da.da_name; 2363 2364 /* 2365 * Disallow NULL paths 2366 */ 2367 if (name == NULL || *name == '\0') { 2368 dr->dr_status = NFSERR_ACCES; 2369 return; 2370 } 2371 2372 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2373 if (vp == NULL) { 2374 dr->dr_status = NFSERR_STALE; 2375 return; 2376 } 2377 2378 if (rdonly(exi, vp, req)) { 2379 VN_RELE(vp); 2380 dr->dr_status = NFSERR_ROFS; 2381 return; 2382 } 2383 2384 error = sattr_to_vattr(args->ca_sa, &va); 2385 if (error) { 2386 VN_RELE(vp); 2387 dr->dr_status = puterrno(error); 2388 return; 2389 } 2390 2391 if (!(va.va_mask & AT_MODE)) { 2392 VN_RELE(vp); 2393 dr->dr_status = NFSERR_INVAL; 2394 return; 2395 } 2396 2397 va.va_type = VDIR; 2398 va.va_mask |= AT_TYPE; 2399 2400 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2401 2402 if (!error) { 2403 /* 2404 * Attribtutes of the newly created directory should 2405 * be returned to the client. 2406 */ 2407 va.va_mask = AT_ALL; /* We want everything */ 2408 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2409 2410 /* check for overflows */ 2411 if (!error) { 2412 acl_perm(vp, exi, &va, cr); 2413 error = vattr_to_nattr(&va, &dr->dr_attr); 2414 if (!error) { 2415 error = makefh(&dr->dr_fhandle, dvp, exi); 2416 } 2417 } 2418 /* 2419 * Force new data and metadata out to stable storage. 2420 */ 2421 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2422 VN_RELE(dvp); 2423 } 2424 2425 /* 2426 * Force modified data and metadata out to stable storage. 2427 */ 2428 (void) VOP_FSYNC(vp, 0, cr, NULL); 2429 2430 VN_RELE(vp); 2431 2432 dr->dr_status = puterrno(error); 2433 2434 } 2435 void * 2436 rfs_mkdir_getfh(struct nfscreatargs *args) 2437 { 2438 return (args->ca_da.da_fhandle); 2439 } 2440 2441 /* 2442 * Remove a directory. 2443 * Remove the given directory name from the given parent directory. 2444 */ 2445 void 2446 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2447 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2448 { 2449 int error; 2450 vnode_t *vp; 2451 2452 /* 2453 * Disallow NULL paths 2454 */ 2455 if (da->da_name == NULL || *da->da_name == '\0') { 2456 *status = NFSERR_ACCES; 2457 return; 2458 } 2459 2460 vp = nfs_fhtovp(da->da_fhandle, exi); 2461 if (vp == NULL) { 2462 *status = NFSERR_STALE; 2463 return; 2464 } 2465 2466 if (rdonly(exi, vp, req)) { 2467 VN_RELE(vp); 2468 *status = NFSERR_ROFS; 2469 return; 2470 } 2471 2472 /* 2473 * VOP_RMDIR takes a third argument (the current 2474 * directory of the process). That's because someone 2475 * wants to return EINVAL if one tries to remove ".". 2476 * Of course, NFS servers have no idea what their 2477 * clients' current directories are. We fake it by 2478 * supplying a vnode known to exist and illegal to 2479 * remove. 2480 */ 2481 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2482 2483 /* 2484 * Force modified data and metadata out to stable storage. 2485 */ 2486 (void) VOP_FSYNC(vp, 0, cr, NULL); 2487 2488 VN_RELE(vp); 2489 2490 /* 2491 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2492 * if the directory is not empty. A System V NFS server 2493 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2494 * over the wire. 2495 */ 2496 if (error == EEXIST) 2497 *status = NFSERR_NOTEMPTY; 2498 else 2499 *status = puterrno(error); 2500 2501 } 2502 void * 2503 rfs_rmdir_getfh(struct nfsdiropargs *da) 2504 { 2505 return (da->da_fhandle); 2506 } 2507 2508 /* ARGSUSED */ 2509 void 2510 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2511 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2512 { 2513 int error; 2514 int iseof; 2515 struct iovec iov; 2516 struct uio uio; 2517 vnode_t *vp; 2518 char *ndata = NULL; 2519 struct sockaddr *ca; 2520 size_t nents; 2521 int ret; 2522 2523 vp = nfs_fhtovp(&rda->rda_fh, exi); 2524 if (vp == NULL) { 2525 rd->rd_entries = NULL; 2526 rd->rd_status = NFSERR_STALE; 2527 return; 2528 } 2529 2530 if (vp->v_type != VDIR) { 2531 VN_RELE(vp); 2532 rd->rd_entries = NULL; 2533 rd->rd_status = NFSERR_NOTDIR; 2534 return; 2535 } 2536 2537 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2538 2539 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2540 2541 if (error) { 2542 rd->rd_entries = NULL; 2543 goto bad; 2544 } 2545 2546 if (rda->rda_count == 0) { 2547 rd->rd_entries = NULL; 2548 rd->rd_size = 0; 2549 rd->rd_eof = FALSE; 2550 goto bad; 2551 } 2552 2553 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2554 2555 /* 2556 * Allocate data for entries. This will be freed by rfs_rddirfree. 2557 */ 2558 rd->rd_bufsize = (uint_t)rda->rda_count; 2559 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2560 2561 /* 2562 * Set up io vector to read directory data 2563 */ 2564 iov.iov_base = (caddr_t)rd->rd_entries; 2565 iov.iov_len = rda->rda_count; 2566 uio.uio_iov = &iov; 2567 uio.uio_iovcnt = 1; 2568 uio.uio_segflg = UIO_SYSSPACE; 2569 uio.uio_extflg = UIO_COPY_CACHED; 2570 uio.uio_loffset = (offset_t)rda->rda_offset; 2571 uio.uio_resid = rda->rda_count; 2572 2573 /* 2574 * read directory 2575 */ 2576 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2577 2578 /* 2579 * Clean up 2580 */ 2581 if (!error) { 2582 /* 2583 * set size and eof 2584 */ 2585 if (uio.uio_resid == rda->rda_count) { 2586 rd->rd_size = 0; 2587 rd->rd_eof = TRUE; 2588 } else { 2589 rd->rd_size = (uint32_t)(rda->rda_count - 2590 uio.uio_resid); 2591 rd->rd_eof = iseof ? TRUE : FALSE; 2592 } 2593 } 2594 2595 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2596 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2597 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2598 rda->rda_count, &ndata); 2599 2600 if (ret != 0) { 2601 size_t dropbytes; 2602 /* 2603 * We had to drop one or more entries in order to fit 2604 * during the character conversion. We need to patch 2605 * up the size and eof info. 2606 */ 2607 if (rd->rd_eof) 2608 rd->rd_eof = FALSE; 2609 dropbytes = nfscmd_dropped_entrysize( 2610 (struct dirent64 *)rd->rd_entries, nents, ret); 2611 rd->rd_size -= dropbytes; 2612 } 2613 if (ndata == NULL) { 2614 ndata = (char *)rd->rd_entries; 2615 } else if (ndata != (char *)rd->rd_entries) { 2616 kmem_free(rd->rd_entries, rd->rd_bufsize); 2617 rd->rd_entries = (void *)ndata; 2618 rd->rd_bufsize = rda->rda_count; 2619 } 2620 2621 bad: 2622 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2623 2624 #if 0 /* notyet */ 2625 /* 2626 * Don't do this. It causes local disk writes when just 2627 * reading the file and the overhead is deemed larger 2628 * than the benefit. 2629 */ 2630 /* 2631 * Force modified metadata out to stable storage. 2632 */ 2633 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2634 #endif 2635 2636 VN_RELE(vp); 2637 2638 rd->rd_status = puterrno(error); 2639 2640 } 2641 void * 2642 rfs_readdir_getfh(struct nfsrddirargs *rda) 2643 { 2644 return (&rda->rda_fh); 2645 } 2646 void 2647 rfs_rddirfree(struct nfsrddirres *rd) 2648 { 2649 if (rd->rd_entries != NULL) 2650 kmem_free(rd->rd_entries, rd->rd_bufsize); 2651 } 2652 2653 /* ARGSUSED */ 2654 void 2655 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2656 struct svc_req *req, cred_t *cr) 2657 { 2658 int error; 2659 struct statvfs64 sb; 2660 vnode_t *vp; 2661 2662 vp = nfs_fhtovp(fh, exi); 2663 if (vp == NULL) { 2664 fs->fs_status = NFSERR_STALE; 2665 return; 2666 } 2667 2668 error = VFS_STATVFS(vp->v_vfsp, &sb); 2669 2670 if (!error) { 2671 fs->fs_tsize = nfstsize(); 2672 fs->fs_bsize = sb.f_frsize; 2673 fs->fs_blocks = sb.f_blocks; 2674 fs->fs_bfree = sb.f_bfree; 2675 fs->fs_bavail = sb.f_bavail; 2676 } 2677 2678 VN_RELE(vp); 2679 2680 fs->fs_status = puterrno(error); 2681 2682 } 2683 void * 2684 rfs_statfs_getfh(fhandle_t *fh) 2685 { 2686 return (fh); 2687 } 2688 2689 static int 2690 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2691 { 2692 vap->va_mask = 0; 2693 2694 /* 2695 * There was a sign extension bug in some VFS based systems 2696 * which stored the mode as a short. When it would get 2697 * assigned to a u_long, no sign extension would occur. 2698 * It needed to, but this wasn't noticed because sa_mode 2699 * would then get assigned back to the short, thus ignoring 2700 * the upper 16 bits of sa_mode. 2701 * 2702 * To make this implementation work for both broken 2703 * clients and good clients, we check for both versions 2704 * of the mode. 2705 */ 2706 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2707 sa->sa_mode != (uint32_t)-1) { 2708 vap->va_mask |= AT_MODE; 2709 vap->va_mode = sa->sa_mode; 2710 } 2711 if (sa->sa_uid != (uint32_t)-1) { 2712 vap->va_mask |= AT_UID; 2713 vap->va_uid = sa->sa_uid; 2714 } 2715 if (sa->sa_gid != (uint32_t)-1) { 2716 vap->va_mask |= AT_GID; 2717 vap->va_gid = sa->sa_gid; 2718 } 2719 if (sa->sa_size != (uint32_t)-1) { 2720 vap->va_mask |= AT_SIZE; 2721 vap->va_size = sa->sa_size; 2722 } 2723 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2724 sa->sa_atime.tv_usec != (int32_t)-1) { 2725 #ifndef _LP64 2726 /* return error if time overflow */ 2727 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2728 return (EOVERFLOW); 2729 #endif 2730 vap->va_mask |= AT_ATIME; 2731 /* 2732 * nfs protocol defines times as unsigned so don't extend sign, 2733 * unless sysadmin set nfs_allow_preepoch_time. 2734 */ 2735 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2736 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2737 } 2738 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2739 sa->sa_mtime.tv_usec != (int32_t)-1) { 2740 #ifndef _LP64 2741 /* return error if time overflow */ 2742 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2743 return (EOVERFLOW); 2744 #endif 2745 vap->va_mask |= AT_MTIME; 2746 /* 2747 * nfs protocol defines times as unsigned so don't extend sign, 2748 * unless sysadmin set nfs_allow_preepoch_time. 2749 */ 2750 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2751 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2752 } 2753 return (0); 2754 } 2755 2756 static enum nfsftype vt_to_nf[] = { 2757 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2758 }; 2759 2760 /* 2761 * check the following fields for overflow: nodeid, size, and time. 2762 * There could be a problem when converting 64-bit LP64 fields 2763 * into 32-bit ones. Return an error if there is an overflow. 2764 */ 2765 int 2766 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2767 { 2768 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2769 na->na_type = vt_to_nf[vap->va_type]; 2770 2771 if (vap->va_mode == (unsigned short) -1) 2772 na->na_mode = (uint32_t)-1; 2773 else 2774 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2775 2776 if (vap->va_uid == (unsigned short)(-1)) 2777 na->na_uid = (uint32_t)(-1); 2778 else if (vap->va_uid == UID_NOBODY) 2779 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2780 else 2781 na->na_uid = vap->va_uid; 2782 2783 if (vap->va_gid == (unsigned short)(-1)) 2784 na->na_gid = (uint32_t)-1; 2785 else if (vap->va_gid == GID_NOBODY) 2786 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2787 else 2788 na->na_gid = vap->va_gid; 2789 2790 /* 2791 * Do we need to check fsid for overflow? It is 64-bit in the 2792 * vattr, but are bigger than 32 bit values supported? 2793 */ 2794 na->na_fsid = vap->va_fsid; 2795 2796 na->na_nodeid = vap->va_nodeid; 2797 2798 /* 2799 * Check to make sure that the nodeid is representable over the 2800 * wire without losing bits. 2801 */ 2802 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2803 return (EFBIG); 2804 na->na_nlink = vap->va_nlink; 2805 2806 /* 2807 * Check for big files here, instead of at the caller. See 2808 * comments in cstat for large special file explanation. 2809 */ 2810 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2811 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2812 return (EFBIG); 2813 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2814 /* UNKNOWN_SIZE | OVERFLOW */ 2815 na->na_size = MAXOFF32_T; 2816 } else 2817 na->na_size = vap->va_size; 2818 } else 2819 na->na_size = vap->va_size; 2820 2821 /* 2822 * If the vnode times overflow the 32-bit times that NFS2 2823 * uses on the wire then return an error. 2824 */ 2825 if (!NFS_VAP_TIME_OK(vap)) { 2826 return (EOVERFLOW); 2827 } 2828 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2829 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2830 2831 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2832 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2833 2834 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2835 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2836 2837 /* 2838 * If the dev_t will fit into 16 bits then compress 2839 * it, otherwise leave it alone. See comments in 2840 * nfs_client.c. 2841 */ 2842 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2843 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2844 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2845 else 2846 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2847 2848 na->na_blocks = vap->va_nblocks; 2849 na->na_blocksize = vap->va_blksize; 2850 2851 /* 2852 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2853 * over-the-wire protocols for named-pipe vnodes. It remaps the 2854 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2855 * 2856 * BUYER BEWARE: 2857 * If you are porting the NFS to a non-Sun server, you probably 2858 * don't want to include the following block of code. The 2859 * over-the-wire special file types will be changing with the 2860 * NFS Protocol Revision. 2861 */ 2862 if (vap->va_type == VFIFO) 2863 NA_SETFIFO(na); 2864 return (0); 2865 } 2866 2867 /* 2868 * acl v2 support: returns approximate permission. 2869 * default: returns minimal permission (more restrictive) 2870 * aclok: returns maximal permission (less restrictive) 2871 * This routine changes the permissions that are alaredy in *va. 2872 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2873 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2874 */ 2875 static void 2876 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2877 { 2878 vsecattr_t vsa; 2879 int aclcnt; 2880 aclent_t *aclentp; 2881 mode_t mask_perm; 2882 mode_t grp_perm; 2883 mode_t other_perm; 2884 mode_t other_orig; 2885 int error; 2886 2887 /* dont care default acl */ 2888 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2889 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2890 2891 if (!error) { 2892 aclcnt = vsa.vsa_aclcnt; 2893 if (aclcnt > MIN_ACL_ENTRIES) { 2894 /* non-trivial ACL */ 2895 aclentp = vsa.vsa_aclentp; 2896 if (exi->exi_export.ex_flags & EX_ACLOK) { 2897 /* maximal permissions */ 2898 grp_perm = 0; 2899 other_perm = 0; 2900 for (; aclcnt > 0; aclcnt--, aclentp++) { 2901 switch (aclentp->a_type) { 2902 case USER_OBJ: 2903 break; 2904 case USER: 2905 grp_perm |= 2906 aclentp->a_perm << 3; 2907 other_perm |= aclentp->a_perm; 2908 break; 2909 case GROUP_OBJ: 2910 grp_perm |= 2911 aclentp->a_perm << 3; 2912 break; 2913 case GROUP: 2914 other_perm |= aclentp->a_perm; 2915 break; 2916 case OTHER_OBJ: 2917 other_orig = aclentp->a_perm; 2918 break; 2919 case CLASS_OBJ: 2920 mask_perm = aclentp->a_perm; 2921 break; 2922 default: 2923 break; 2924 } 2925 } 2926 grp_perm &= mask_perm << 3; 2927 other_perm &= mask_perm; 2928 other_perm |= other_orig; 2929 2930 } else { 2931 /* minimal permissions */ 2932 grp_perm = 070; 2933 other_perm = 07; 2934 for (; aclcnt > 0; aclcnt--, aclentp++) { 2935 switch (aclentp->a_type) { 2936 case USER_OBJ: 2937 break; 2938 case USER: 2939 case CLASS_OBJ: 2940 grp_perm &= 2941 aclentp->a_perm << 3; 2942 other_perm &= 2943 aclentp->a_perm; 2944 break; 2945 case GROUP_OBJ: 2946 grp_perm &= 2947 aclentp->a_perm << 3; 2948 break; 2949 case GROUP: 2950 other_perm &= 2951 aclentp->a_perm; 2952 break; 2953 case OTHER_OBJ: 2954 other_perm &= 2955 aclentp->a_perm; 2956 break; 2957 default: 2958 break; 2959 } 2960 } 2961 } 2962 /* copy to va */ 2963 va->va_mode &= ~077; 2964 va->va_mode |= grp_perm | other_perm; 2965 } 2966 if (vsa.vsa_aclcnt) 2967 kmem_free(vsa.vsa_aclentp, 2968 vsa.vsa_aclcnt * sizeof (aclent_t)); 2969 } 2970 } 2971 2972 void 2973 rfs_srvrinit(void) 2974 { 2975 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2976 nfs2_srv_caller_id = fs_new_caller_id(); 2977 } 2978 2979 void 2980 rfs_srvrfini(void) 2981 { 2982 mutex_destroy(&rfs_async_write_lock); 2983 } 2984 2985 static int 2986 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2987 { 2988 struct clist *wcl; 2989 int wlist_len; 2990 uint32_t count = rr->rr_count; 2991 2992 wcl = ra->ra_wlist; 2993 2994 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 2995 return (FALSE); 2996 } 2997 2998 wcl = ra->ra_wlist; 2999 rr->rr_ok.rrok_wlist_len = wlist_len; 3000 rr->rr_ok.rrok_wlist = wcl; 3001 3002 return (TRUE); 3003 } 3004