1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 /* Lie about the object type for a referral */ 121 if (vn_is_nfs_reparse(vp, cr)) 122 va.va_type = VLNK; 123 124 acl_perm(vp, exi, &va, cr); 125 error = vattr_to_nattr(&va, &ns->ns_attr); 126 } 127 128 VN_RELE(vp); 129 130 ns->ns_status = puterrno(error); 131 } 132 void * 133 rfs_getattr_getfh(fhandle_t *fhp) 134 { 135 return (fhp); 136 } 137 138 /* 139 * Set file attributes. 140 * Sets the attributes of the file with the given fhandle. Returns 141 * the new attributes. 142 */ 143 void 144 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 145 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 146 { 147 int error; 148 int flag; 149 int in_crit = 0; 150 vnode_t *vp; 151 struct vattr va; 152 struct vattr bva; 153 struct flock64 bf; 154 caller_context_t ct; 155 156 157 vp = nfs_fhtovp(&args->saa_fh, exi); 158 if (vp == NULL) { 159 ns->ns_status = NFSERR_STALE; 160 return; 161 } 162 163 if (rdonly(exi, req) || vn_is_readonly(vp)) { 164 VN_RELE(vp); 165 ns->ns_status = NFSERR_ROFS; 166 return; 167 } 168 169 error = sattr_to_vattr(&args->saa_sa, &va); 170 if (error) { 171 VN_RELE(vp); 172 ns->ns_status = puterrno(error); 173 return; 174 } 175 176 /* 177 * If the client is requesting a change to the mtime, 178 * but the nanosecond field is set to 1 billion, then 179 * this is a flag to the server that it should set the 180 * atime and mtime fields to the server's current time. 181 * The 1 billion number actually came from the client 182 * as 1 million, but the units in the over the wire 183 * request are microseconds instead of nanoseconds. 184 * 185 * This is an overload of the protocol and should be 186 * documented in the NFS Version 2 protocol specification. 187 */ 188 if (va.va_mask & AT_MTIME) { 189 if (va.va_mtime.tv_nsec == 1000000000) { 190 gethrestime(&va.va_mtime); 191 va.va_atime = va.va_mtime; 192 va.va_mask |= AT_ATIME; 193 flag = 0; 194 } else 195 flag = ATTR_UTIME; 196 } else 197 flag = 0; 198 199 /* 200 * If the filesystem is exported with nosuid, then mask off 201 * the setuid and setgid bits. 202 */ 203 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 204 (exi->exi_export.ex_flags & EX_NOSUID)) 205 va.va_mode &= ~(VSUID | VSGID); 206 207 ct.cc_sysid = 0; 208 ct.cc_pid = 0; 209 ct.cc_caller_id = nfs2_srv_caller_id; 210 ct.cc_flags = CC_DONTBLOCK; 211 212 /* 213 * We need to specially handle size changes because it is 214 * possible for the client to create a file with modes 215 * which indicate read-only, but with the file opened for 216 * writing. If the client then tries to set the size of 217 * the file, then the normal access checking done in 218 * VOP_SETATTR would prevent the client from doing so, 219 * although it should be legal for it to do so. To get 220 * around this, we do the access checking for ourselves 221 * and then use VOP_SPACE which doesn't do the access 222 * checking which VOP_SETATTR does. VOP_SPACE can only 223 * operate on VREG files, let VOP_SETATTR handle the other 224 * extremely rare cases. 225 * Also the client should not be allowed to change the 226 * size of the file if there is a conflicting non-blocking 227 * mandatory lock in the region of change. 228 */ 229 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 230 if (nbl_need_check(vp)) { 231 nbl_start_crit(vp, RW_READER); 232 in_crit = 1; 233 } 234 235 bva.va_mask = AT_UID | AT_SIZE; 236 237 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 238 239 if (error) { 240 if (in_crit) 241 nbl_end_crit(vp); 242 VN_RELE(vp); 243 ns->ns_status = puterrno(error); 244 return; 245 } 246 247 if (in_crit) { 248 u_offset_t offset; 249 ssize_t length; 250 251 if (va.va_size < bva.va_size) { 252 offset = va.va_size; 253 length = bva.va_size - va.va_size; 254 } else { 255 offset = bva.va_size; 256 length = va.va_size - bva.va_size; 257 } 258 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 259 NULL)) { 260 error = EACCES; 261 } 262 } 263 264 if (crgetuid(cr) == bva.va_uid && !error && 265 va.va_size != bva.va_size) { 266 va.va_mask &= ~AT_SIZE; 267 bf.l_type = F_WRLCK; 268 bf.l_whence = 0; 269 bf.l_start = (off64_t)va.va_size; 270 bf.l_len = 0; 271 bf.l_sysid = 0; 272 bf.l_pid = 0; 273 274 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 275 (offset_t)va.va_size, cr, &ct); 276 } 277 if (in_crit) 278 nbl_end_crit(vp); 279 } else 280 error = 0; 281 282 /* 283 * Do the setattr. 284 */ 285 if (!error && va.va_mask) { 286 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 287 } 288 289 /* 290 * check if the monitor on either vop_space or vop_setattr detected 291 * a delegation conflict and if so, mark the thread flag as 292 * wouldblock so that the response is dropped and the client will 293 * try again. 294 */ 295 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 296 VN_RELE(vp); 297 curthread->t_flag |= T_WOULDBLOCK; 298 return; 299 } 300 301 if (!error) { 302 va.va_mask = AT_ALL; /* get everything */ 303 304 error = rfs4_delegated_getattr(vp, &va, 0, cr); 305 306 /* check for overflows */ 307 if (!error) { 308 acl_perm(vp, exi, &va, cr); 309 error = vattr_to_nattr(&va, &ns->ns_attr); 310 } 311 } 312 313 ct.cc_flags = 0; 314 315 /* 316 * Force modified metadata out to stable storage. 317 */ 318 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 319 320 VN_RELE(vp); 321 322 ns->ns_status = puterrno(error); 323 } 324 void * 325 rfs_setattr_getfh(struct nfssaargs *args) 326 { 327 return (&args->saa_fh); 328 } 329 330 /* 331 * Directory lookup. 332 * Returns an fhandle and file attributes for file name in a directory. 333 */ 334 /* ARGSUSED */ 335 void 336 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 337 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 338 { 339 int error; 340 vnode_t *dvp; 341 vnode_t *vp; 342 struct vattr va; 343 fhandle_t *fhp = da->da_fhandle; 344 struct sec_ol sec = {0, 0}; 345 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 346 char *name; 347 struct sockaddr *ca; 348 349 /* 350 * Trusted Extension doesn't support NFSv2. MOUNT 351 * will reject v2 clients. Need to prevent v2 client 352 * access via WebNFS here. 353 */ 354 if (is_system_labeled() && req->rq_vers == 2) { 355 dr->dr_status = NFSERR_ACCES; 356 return; 357 } 358 359 /* 360 * Disallow NULL paths 361 */ 362 if (da->da_name == NULL || *da->da_name == '\0') { 363 dr->dr_status = NFSERR_ACCES; 364 return; 365 } 366 367 /* 368 * Allow lookups from the root - the default 369 * location of the public filehandle. 370 */ 371 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 372 dvp = rootdir; 373 VN_HOLD(dvp); 374 } else { 375 dvp = nfs_fhtovp(fhp, exi); 376 if (dvp == NULL) { 377 dr->dr_status = NFSERR_STALE; 378 return; 379 } 380 } 381 382 /* 383 * Not allow lookup beyond root. 384 * If the filehandle matches a filehandle of the exi, 385 * then the ".." refers beyond the root of an exported filesystem. 386 */ 387 if (strcmp(da->da_name, "..") == 0 && 388 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 389 VN_RELE(dvp); 390 dr->dr_status = NFSERR_NOENT; 391 return; 392 } 393 394 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 395 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 396 MAXPATHLEN); 397 398 if (name == NULL) { 399 dr->dr_status = NFSERR_ACCES; 400 return; 401 } 402 403 exi_hold(exi); 404 405 /* 406 * If the public filehandle is used then allow 407 * a multi-component lookup, i.e. evaluate 408 * a pathname and follow symbolic links if 409 * necessary. 410 * 411 * This may result in a vnode in another filesystem 412 * which is OK as long as the filesystem is exported. 413 */ 414 if (PUBLIC_FH2(fhp)) { 415 struct exportinfo *new; 416 417 publicfh_flag = TRUE; 418 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new, 419 &sec); 420 421 if (error == 0) { 422 exi_rele(exi); 423 exi = new; 424 } 425 } else { 426 /* 427 * Do a normal single component lookup. 428 */ 429 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 430 NULL, NULL, NULL); 431 } 432 433 if (name != da->da_name) 434 kmem_free(name, MAXPATHLEN); 435 436 437 if (!error) { 438 va.va_mask = AT_ALL; /* we want everything */ 439 440 error = rfs4_delegated_getattr(vp, &va, 0, cr); 441 442 /* check for overflows */ 443 if (!error) { 444 acl_perm(vp, exi, &va, cr); 445 error = vattr_to_nattr(&va, &dr->dr_attr); 446 if (!error) { 447 if (sec.sec_flags & SEC_QUERY) 448 error = makefh_ol(&dr->dr_fhandle, exi, 449 sec.sec_index); 450 else { 451 error = makefh(&dr->dr_fhandle, vp, 452 exi); 453 if (!error && publicfh_flag && 454 !chk_clnt_sec(exi, req)) 455 auth_weak = TRUE; 456 } 457 } 458 } 459 VN_RELE(vp); 460 } 461 462 VN_RELE(dvp); 463 464 /* 465 * The passed argument exportinfo is released by the 466 * caller, comon_dispatch 467 */ 468 exi_rele(exi); 469 470 /* 471 * If it's public fh, no 0x81, and client's flavor is 472 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 473 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 474 */ 475 if (auth_weak) 476 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 477 else 478 dr->dr_status = puterrno(error); 479 } 480 void * 481 rfs_lookup_getfh(struct nfsdiropargs *da) 482 { 483 return (da->da_fhandle); 484 } 485 486 /* 487 * Read symbolic link. 488 * Returns the string in the symbolic link at the given fhandle. 489 */ 490 /* ARGSUSED */ 491 void 492 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 493 struct svc_req *req, cred_t *cr) 494 { 495 int error; 496 struct iovec iov; 497 struct uio uio; 498 vnode_t *vp; 499 struct vattr va; 500 struct sockaddr *ca; 501 char *name = NULL; 502 int is_referral = 0; 503 504 vp = nfs_fhtovp(fhp, exi); 505 if (vp == NULL) { 506 rl->rl_data = NULL; 507 rl->rl_status = NFSERR_STALE; 508 return; 509 } 510 511 va.va_mask = AT_MODE; 512 513 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 514 515 if (error) { 516 VN_RELE(vp); 517 rl->rl_data = NULL; 518 rl->rl_status = puterrno(error); 519 return; 520 } 521 522 if (MANDLOCK(vp, va.va_mode)) { 523 VN_RELE(vp); 524 rl->rl_data = NULL; 525 rl->rl_status = NFSERR_ACCES; 526 return; 527 } 528 529 /* We lied about the object type for a referral */ 530 if (vn_is_nfs_reparse(vp, cr)) 531 is_referral = 1; 532 533 /* 534 * XNFS and RFC1094 require us to return ENXIO if argument 535 * is not a link. BUGID 1138002. 536 */ 537 if (vp->v_type != VLNK && !is_referral) { 538 VN_RELE(vp); 539 rl->rl_data = NULL; 540 rl->rl_status = NFSERR_NXIO; 541 return; 542 } 543 544 /* 545 * Allocate data for pathname. This will be freed by rfs_rlfree. 546 */ 547 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 548 549 if (is_referral) { 550 char *s; 551 size_t strsz; 552 553 /* Get an artificial symlink based on a referral */ 554 s = build_symlink(vp, cr, &strsz); 555 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 556 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 557 vnode_t *, vp, char *, s); 558 if (s == NULL) 559 error = EINVAL; 560 else { 561 error = 0; 562 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 563 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 564 kmem_free(s, strsz); 565 } 566 567 } else { 568 569 /* 570 * Set up io vector to read sym link data 571 */ 572 iov.iov_base = rl->rl_data; 573 iov.iov_len = NFS_MAXPATHLEN; 574 uio.uio_iov = &iov; 575 uio.uio_iovcnt = 1; 576 uio.uio_segflg = UIO_SYSSPACE; 577 uio.uio_extflg = UIO_COPY_CACHED; 578 uio.uio_loffset = (offset_t)0; 579 uio.uio_resid = NFS_MAXPATHLEN; 580 581 /* 582 * Do the readlink. 583 */ 584 error = VOP_READLINK(vp, &uio, cr, NULL); 585 586 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 587 588 if (!error) 589 rl->rl_data[rl->rl_count] = '\0'; 590 591 } 592 593 594 VN_RELE(vp); 595 596 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 597 name = nfscmd_convname(ca, exi, rl->rl_data, 598 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 599 600 if (name != NULL && name != rl->rl_data) { 601 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 602 rl->rl_data = name; 603 } 604 605 /* 606 * XNFS and RFC1094 require us to return ENXIO if argument 607 * is not a link. UFS returns EINVAL if this is the case, 608 * so we do the mapping here. BUGID 1138002. 609 */ 610 if (error == EINVAL) 611 rl->rl_status = NFSERR_NXIO; 612 else 613 rl->rl_status = puterrno(error); 614 615 } 616 void * 617 rfs_readlink_getfh(fhandle_t *fhp) 618 { 619 return (fhp); 620 } 621 /* 622 * Free data allocated by rfs_readlink 623 */ 624 void 625 rfs_rlfree(struct nfsrdlnres *rl) 626 { 627 if (rl->rl_data != NULL) 628 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 629 } 630 631 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 632 633 /* 634 * Read data. 635 * Returns some data read from the file at the given fhandle. 636 */ 637 /* ARGSUSED */ 638 void 639 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 640 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 641 { 642 vnode_t *vp; 643 int error; 644 struct vattr va; 645 struct iovec iov; 646 struct uio uio; 647 mblk_t *mp; 648 int alloc_err = 0; 649 int in_crit = 0; 650 caller_context_t ct; 651 652 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 653 if (vp == NULL) { 654 rr->rr_data = NULL; 655 rr->rr_status = NFSERR_STALE; 656 return; 657 } 658 659 if (vp->v_type != VREG) { 660 VN_RELE(vp); 661 rr->rr_data = NULL; 662 rr->rr_status = NFSERR_ISDIR; 663 return; 664 } 665 666 ct.cc_sysid = 0; 667 ct.cc_pid = 0; 668 ct.cc_caller_id = nfs2_srv_caller_id; 669 ct.cc_flags = CC_DONTBLOCK; 670 671 /* 672 * Enter the critical region before calling VOP_RWLOCK 673 * to avoid a deadlock with write requests. 674 */ 675 if (nbl_need_check(vp)) { 676 nbl_start_crit(vp, RW_READER); 677 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 678 0, NULL)) { 679 nbl_end_crit(vp); 680 VN_RELE(vp); 681 rr->rr_data = NULL; 682 rr->rr_status = NFSERR_ACCES; 683 return; 684 } 685 in_crit = 1; 686 } 687 688 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 689 690 /* check if a monitor detected a delegation conflict */ 691 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 692 VN_RELE(vp); 693 /* mark as wouldblock so response is dropped */ 694 curthread->t_flag |= T_WOULDBLOCK; 695 696 rr->rr_data = NULL; 697 return; 698 } 699 700 va.va_mask = AT_ALL; 701 702 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 703 704 if (error) { 705 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 706 if (in_crit) 707 nbl_end_crit(vp); 708 709 VN_RELE(vp); 710 rr->rr_data = NULL; 711 rr->rr_status = puterrno(error); 712 713 return; 714 } 715 716 /* 717 * This is a kludge to allow reading of files created 718 * with no read permission. The owner of the file 719 * is always allowed to read it. 720 */ 721 if (crgetuid(cr) != va.va_uid) { 722 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 723 724 if (error) { 725 /* 726 * Exec is the same as read over the net because 727 * of demand loading. 728 */ 729 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 730 } 731 if (error) { 732 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 733 if (in_crit) 734 nbl_end_crit(vp); 735 VN_RELE(vp); 736 rr->rr_data = NULL; 737 rr->rr_status = puterrno(error); 738 739 return; 740 } 741 } 742 743 if (MANDLOCK(vp, va.va_mode)) { 744 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 745 if (in_crit) 746 nbl_end_crit(vp); 747 748 VN_RELE(vp); 749 rr->rr_data = NULL; 750 rr->rr_status = NFSERR_ACCES; 751 752 return; 753 } 754 755 rr->rr_ok.rrok_wlist_len = 0; 756 rr->rr_ok.rrok_wlist = NULL; 757 758 if ((u_offset_t)ra->ra_offset >= va.va_size) { 759 rr->rr_count = 0; 760 rr->rr_data = NULL; 761 /* 762 * In this case, status is NFS_OK, but there is no data 763 * to encode. So set rr_mp to NULL. 764 */ 765 rr->rr_mp = NULL; 766 rr->rr_ok.rrok_wlist = ra->ra_wlist; 767 if (rr->rr_ok.rrok_wlist) 768 clist_zero_len(rr->rr_ok.rrok_wlist); 769 goto done; 770 } 771 772 if (ra->ra_wlist) { 773 mp = NULL; 774 rr->rr_mp = NULL; 775 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 776 if (ra->ra_count > iov.iov_len) { 777 rr->rr_data = NULL; 778 rr->rr_status = NFSERR_INVAL; 779 goto done; 780 } 781 } else { 782 /* 783 * mp will contain the data to be sent out in the read reply. 784 * This will be freed after the reply has been sent out (by the 785 * driver). 786 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 787 * that the call to xdrmblk_putmblk() never fails. 788 */ 789 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 790 &alloc_err); 791 ASSERT(mp != NULL); 792 ASSERT(alloc_err == 0); 793 794 rr->rr_mp = mp; 795 796 /* 797 * Set up io vector 798 */ 799 iov.iov_base = (caddr_t)mp->b_datap->db_base; 800 iov.iov_len = ra->ra_count; 801 } 802 803 uio.uio_iov = &iov; 804 uio.uio_iovcnt = 1; 805 uio.uio_segflg = UIO_SYSSPACE; 806 uio.uio_extflg = UIO_COPY_CACHED; 807 uio.uio_loffset = (offset_t)ra->ra_offset; 808 uio.uio_resid = ra->ra_count; 809 810 error = VOP_READ(vp, &uio, 0, cr, &ct); 811 812 if (error) { 813 if (mp) 814 freeb(mp); 815 816 /* 817 * check if a monitor detected a delegation conflict and 818 * mark as wouldblock so response is dropped 819 */ 820 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 821 curthread->t_flag |= T_WOULDBLOCK; 822 else 823 rr->rr_status = puterrno(error); 824 825 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 826 if (in_crit) 827 nbl_end_crit(vp); 828 829 VN_RELE(vp); 830 rr->rr_data = NULL; 831 832 return; 833 } 834 835 /* 836 * Get attributes again so we can send the latest access 837 * time to the client side for his cache. 838 */ 839 va.va_mask = AT_ALL; 840 841 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 842 843 if (error) { 844 if (mp) 845 freeb(mp); 846 847 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 848 if (in_crit) 849 nbl_end_crit(vp); 850 851 VN_RELE(vp); 852 rr->rr_data = NULL; 853 rr->rr_status = puterrno(error); 854 855 return; 856 } 857 858 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 859 860 if (mp) { 861 rr->rr_data = (char *)mp->b_datap->db_base; 862 } else { 863 if (ra->ra_wlist) { 864 rr->rr_data = (caddr_t)iov.iov_base; 865 if (!rdma_setup_read_data2(ra, rr)) { 866 rr->rr_data = NULL; 867 rr->rr_status = puterrno(NFSERR_INVAL); 868 } 869 } 870 } 871 done: 872 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 873 if (in_crit) 874 nbl_end_crit(vp); 875 876 acl_perm(vp, exi, &va, cr); 877 878 /* check for overflows */ 879 error = vattr_to_nattr(&va, &rr->rr_attr); 880 881 VN_RELE(vp); 882 883 rr->rr_status = puterrno(error); 884 } 885 886 /* 887 * Free data allocated by rfs_read 888 */ 889 void 890 rfs_rdfree(struct nfsrdresult *rr) 891 { 892 mblk_t *mp; 893 894 if (rr->rr_status == NFS_OK) { 895 mp = rr->rr_mp; 896 if (mp != NULL) 897 freeb(mp); 898 } 899 } 900 901 void * 902 rfs_read_getfh(struct nfsreadargs *ra) 903 { 904 return (&ra->ra_fhandle); 905 } 906 907 #define MAX_IOVECS 12 908 909 #ifdef DEBUG 910 static int rfs_write_sync_hits = 0; 911 static int rfs_write_sync_misses = 0; 912 #endif 913 914 /* 915 * Write data to file. 916 * Returns attributes of a file after writing some data to it. 917 * 918 * Any changes made here, especially in error handling might have 919 * to also be done in rfs_write (which clusters write requests). 920 */ 921 void 922 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 923 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 924 { 925 int error; 926 vnode_t *vp; 927 rlim64_t rlimit; 928 struct vattr va; 929 struct uio uio; 930 struct iovec iov[MAX_IOVECS]; 931 mblk_t *m; 932 struct iovec *iovp; 933 int iovcnt; 934 cred_t *savecred; 935 int in_crit = 0; 936 caller_context_t ct; 937 938 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 939 if (vp == NULL) { 940 ns->ns_status = NFSERR_STALE; 941 return; 942 } 943 944 if (rdonly(exi, req)) { 945 VN_RELE(vp); 946 ns->ns_status = NFSERR_ROFS; 947 return; 948 } 949 950 if (vp->v_type != VREG) { 951 VN_RELE(vp); 952 ns->ns_status = NFSERR_ISDIR; 953 return; 954 } 955 956 ct.cc_sysid = 0; 957 ct.cc_pid = 0; 958 ct.cc_caller_id = nfs2_srv_caller_id; 959 ct.cc_flags = CC_DONTBLOCK; 960 961 va.va_mask = AT_UID|AT_MODE; 962 963 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 964 965 if (error) { 966 VN_RELE(vp); 967 ns->ns_status = puterrno(error); 968 969 return; 970 } 971 972 if (crgetuid(cr) != va.va_uid) { 973 /* 974 * This is a kludge to allow writes of files created 975 * with read only permission. The owner of the file 976 * is always allowed to write it. 977 */ 978 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 979 980 if (error) { 981 VN_RELE(vp); 982 ns->ns_status = puterrno(error); 983 return; 984 } 985 } 986 987 /* 988 * Can't access a mandatory lock file. This might cause 989 * the NFS service thread to block forever waiting for a 990 * lock to be released that will never be released. 991 */ 992 if (MANDLOCK(vp, va.va_mode)) { 993 VN_RELE(vp); 994 ns->ns_status = NFSERR_ACCES; 995 return; 996 } 997 998 /* 999 * We have to enter the critical region before calling VOP_RWLOCK 1000 * to avoid a deadlock with ufs. 1001 */ 1002 if (nbl_need_check(vp)) { 1003 nbl_start_crit(vp, RW_READER); 1004 in_crit = 1; 1005 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1006 wa->wa_count, 0, NULL)) { 1007 error = EACCES; 1008 goto out; 1009 } 1010 } 1011 1012 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1013 1014 /* check if a monitor detected a delegation conflict */ 1015 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1016 VN_RELE(vp); 1017 /* mark as wouldblock so response is dropped */ 1018 curthread->t_flag |= T_WOULDBLOCK; 1019 return; 1020 } 1021 1022 if (wa->wa_data || wa->wa_rlist) { 1023 /* Do the RDMA thing if necessary */ 1024 if (wa->wa_rlist) { 1025 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1026 iov[0].iov_len = wa->wa_count; 1027 } else { 1028 iov[0].iov_base = wa->wa_data; 1029 iov[0].iov_len = wa->wa_count; 1030 } 1031 uio.uio_iov = iov; 1032 uio.uio_iovcnt = 1; 1033 uio.uio_segflg = UIO_SYSSPACE; 1034 uio.uio_extflg = UIO_COPY_DEFAULT; 1035 uio.uio_loffset = (offset_t)wa->wa_offset; 1036 uio.uio_resid = wa->wa_count; 1037 /* 1038 * The limit is checked on the client. We 1039 * should allow any size writes here. 1040 */ 1041 uio.uio_llimit = curproc->p_fsz_ctl; 1042 rlimit = uio.uio_llimit - wa->wa_offset; 1043 if (rlimit < (rlim64_t)uio.uio_resid) 1044 uio.uio_resid = (uint_t)rlimit; 1045 1046 /* 1047 * for now we assume no append mode 1048 */ 1049 /* 1050 * We're changing creds because VM may fault and we need 1051 * the cred of the current thread to be used if quota 1052 * checking is enabled. 1053 */ 1054 savecred = curthread->t_cred; 1055 curthread->t_cred = cr; 1056 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1057 curthread->t_cred = savecred; 1058 } else { 1059 iovcnt = 0; 1060 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1061 iovcnt++; 1062 if (iovcnt <= MAX_IOVECS) { 1063 #ifdef DEBUG 1064 rfs_write_sync_hits++; 1065 #endif 1066 iovp = iov; 1067 } else { 1068 #ifdef DEBUG 1069 rfs_write_sync_misses++; 1070 #endif 1071 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1072 } 1073 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1074 uio.uio_iov = iovp; 1075 uio.uio_iovcnt = iovcnt; 1076 uio.uio_segflg = UIO_SYSSPACE; 1077 uio.uio_extflg = UIO_COPY_DEFAULT; 1078 uio.uio_loffset = (offset_t)wa->wa_offset; 1079 uio.uio_resid = wa->wa_count; 1080 /* 1081 * The limit is checked on the client. We 1082 * should allow any size writes here. 1083 */ 1084 uio.uio_llimit = curproc->p_fsz_ctl; 1085 rlimit = uio.uio_llimit - wa->wa_offset; 1086 if (rlimit < (rlim64_t)uio.uio_resid) 1087 uio.uio_resid = (uint_t)rlimit; 1088 1089 /* 1090 * For now we assume no append mode. 1091 */ 1092 /* 1093 * We're changing creds because VM may fault and we need 1094 * the cred of the current thread to be used if quota 1095 * checking is enabled. 1096 */ 1097 savecred = curthread->t_cred; 1098 curthread->t_cred = cr; 1099 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1100 curthread->t_cred = savecred; 1101 1102 if (iovp != iov) 1103 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1104 } 1105 1106 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1107 1108 if (!error) { 1109 /* 1110 * Get attributes again so we send the latest mod 1111 * time to the client side for his cache. 1112 */ 1113 va.va_mask = AT_ALL; /* now we want everything */ 1114 1115 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1116 1117 /* check for overflows */ 1118 if (!error) { 1119 acl_perm(vp, exi, &va, cr); 1120 error = vattr_to_nattr(&va, &ns->ns_attr); 1121 } 1122 } 1123 1124 out: 1125 if (in_crit) 1126 nbl_end_crit(vp); 1127 VN_RELE(vp); 1128 1129 /* check if a monitor detected a delegation conflict */ 1130 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1131 /* mark as wouldblock so response is dropped */ 1132 curthread->t_flag |= T_WOULDBLOCK; 1133 else 1134 ns->ns_status = puterrno(error); 1135 1136 } 1137 1138 struct rfs_async_write { 1139 struct nfswriteargs *wa; 1140 struct nfsattrstat *ns; 1141 struct svc_req *req; 1142 cred_t *cr; 1143 kthread_t *thread; 1144 struct rfs_async_write *list; 1145 }; 1146 1147 struct rfs_async_write_list { 1148 fhandle_t *fhp; 1149 kcondvar_t cv; 1150 struct rfs_async_write *list; 1151 struct rfs_async_write_list *next; 1152 }; 1153 1154 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1155 static kmutex_t rfs_async_write_lock; 1156 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1157 1158 #define MAXCLIOVECS 42 1159 #define RFSWRITE_INITVAL (enum nfsstat) -1 1160 1161 #ifdef DEBUG 1162 static int rfs_write_hits = 0; 1163 static int rfs_write_misses = 0; 1164 #endif 1165 1166 /* 1167 * Write data to file. 1168 * Returns attributes of a file after writing some data to it. 1169 */ 1170 void 1171 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1172 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1173 { 1174 int error; 1175 vnode_t *vp; 1176 rlim64_t rlimit; 1177 struct vattr va; 1178 struct uio uio; 1179 struct rfs_async_write_list *lp; 1180 struct rfs_async_write_list *nlp; 1181 struct rfs_async_write *rp; 1182 struct rfs_async_write *nrp; 1183 struct rfs_async_write *trp; 1184 struct rfs_async_write *lrp; 1185 int data_written; 1186 int iovcnt; 1187 mblk_t *m; 1188 struct iovec *iovp; 1189 struct iovec *niovp; 1190 struct iovec iov[MAXCLIOVECS]; 1191 int count; 1192 int rcount; 1193 uint_t off; 1194 uint_t len; 1195 struct rfs_async_write nrpsp; 1196 struct rfs_async_write_list nlpsp; 1197 ushort_t t_flag; 1198 cred_t *savecred; 1199 int in_crit = 0; 1200 caller_context_t ct; 1201 1202 if (!rfs_write_async) { 1203 rfs_write_sync(wa, ns, exi, req, cr); 1204 return; 1205 } 1206 1207 /* 1208 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1209 * is considered an OK. 1210 */ 1211 ns->ns_status = RFSWRITE_INITVAL; 1212 1213 nrp = &nrpsp; 1214 nrp->wa = wa; 1215 nrp->ns = ns; 1216 nrp->req = req; 1217 nrp->cr = cr; 1218 nrp->thread = curthread; 1219 1220 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1221 1222 /* 1223 * Look to see if there is already a cluster started 1224 * for this file. 1225 */ 1226 mutex_enter(&rfs_async_write_lock); 1227 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1228 if (bcmp(&wa->wa_fhandle, lp->fhp, 1229 sizeof (fhandle_t)) == 0) 1230 break; 1231 } 1232 1233 /* 1234 * If lp is non-NULL, then there is already a cluster 1235 * started. We need to place ourselves in the cluster 1236 * list in the right place as determined by starting 1237 * offset. Conflicts with non-blocking mandatory locked 1238 * regions will be checked when the cluster is processed. 1239 */ 1240 if (lp != NULL) { 1241 rp = lp->list; 1242 trp = NULL; 1243 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1244 trp = rp; 1245 rp = rp->list; 1246 } 1247 nrp->list = rp; 1248 if (trp == NULL) 1249 lp->list = nrp; 1250 else 1251 trp->list = nrp; 1252 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1253 cv_wait(&lp->cv, &rfs_async_write_lock); 1254 mutex_exit(&rfs_async_write_lock); 1255 1256 return; 1257 } 1258 1259 /* 1260 * No cluster started yet, start one and add ourselves 1261 * to the list of clusters. 1262 */ 1263 nrp->list = NULL; 1264 1265 nlp = &nlpsp; 1266 nlp->fhp = &wa->wa_fhandle; 1267 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1268 nlp->list = nrp; 1269 nlp->next = NULL; 1270 1271 if (rfs_async_write_head == NULL) { 1272 rfs_async_write_head = nlp; 1273 } else { 1274 lp = rfs_async_write_head; 1275 while (lp->next != NULL) 1276 lp = lp->next; 1277 lp->next = nlp; 1278 } 1279 mutex_exit(&rfs_async_write_lock); 1280 1281 /* 1282 * Convert the file handle common to all of the requests 1283 * in this cluster to a vnode. 1284 */ 1285 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1286 if (vp == NULL) { 1287 mutex_enter(&rfs_async_write_lock); 1288 if (rfs_async_write_head == nlp) 1289 rfs_async_write_head = nlp->next; 1290 else { 1291 lp = rfs_async_write_head; 1292 while (lp->next != nlp) 1293 lp = lp->next; 1294 lp->next = nlp->next; 1295 } 1296 t_flag = curthread->t_flag & T_WOULDBLOCK; 1297 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1298 rp->ns->ns_status = NFSERR_STALE; 1299 rp->thread->t_flag |= t_flag; 1300 } 1301 cv_broadcast(&nlp->cv); 1302 mutex_exit(&rfs_async_write_lock); 1303 1304 return; 1305 } 1306 1307 /* 1308 * Can only write regular files. Attempts to write any 1309 * other file types fail with EISDIR. 1310 */ 1311 if (vp->v_type != VREG) { 1312 VN_RELE(vp); 1313 mutex_enter(&rfs_async_write_lock); 1314 if (rfs_async_write_head == nlp) 1315 rfs_async_write_head = nlp->next; 1316 else { 1317 lp = rfs_async_write_head; 1318 while (lp->next != nlp) 1319 lp = lp->next; 1320 lp->next = nlp->next; 1321 } 1322 t_flag = curthread->t_flag & T_WOULDBLOCK; 1323 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1324 rp->ns->ns_status = NFSERR_ISDIR; 1325 rp->thread->t_flag |= t_flag; 1326 } 1327 cv_broadcast(&nlp->cv); 1328 mutex_exit(&rfs_async_write_lock); 1329 1330 return; 1331 } 1332 1333 /* 1334 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1335 * deadlock with ufs. 1336 */ 1337 if (nbl_need_check(vp)) { 1338 nbl_start_crit(vp, RW_READER); 1339 in_crit = 1; 1340 } 1341 1342 ct.cc_sysid = 0; 1343 ct.cc_pid = 0; 1344 ct.cc_caller_id = nfs2_srv_caller_id; 1345 ct.cc_flags = CC_DONTBLOCK; 1346 1347 /* 1348 * Lock the file for writing. This operation provides 1349 * the delay which allows clusters to grow. 1350 */ 1351 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1352 1353 /* check if a monitor detected a delegation conflict */ 1354 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1355 if (in_crit) 1356 nbl_end_crit(vp); 1357 VN_RELE(vp); 1358 /* mark as wouldblock so response is dropped */ 1359 curthread->t_flag |= T_WOULDBLOCK; 1360 mutex_enter(&rfs_async_write_lock); 1361 if (rfs_async_write_head == nlp) 1362 rfs_async_write_head = nlp->next; 1363 else { 1364 lp = rfs_async_write_head; 1365 while (lp->next != nlp) 1366 lp = lp->next; 1367 lp->next = nlp->next; 1368 } 1369 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1370 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1371 rp->ns->ns_status = puterrno(error); 1372 rp->thread->t_flag |= T_WOULDBLOCK; 1373 } 1374 } 1375 cv_broadcast(&nlp->cv); 1376 mutex_exit(&rfs_async_write_lock); 1377 1378 return; 1379 } 1380 1381 /* 1382 * Disconnect this cluster from the list of clusters. 1383 * The cluster that is being dealt with must be fixed 1384 * in size after this point, so there is no reason 1385 * to leave it on the list so that new requests can 1386 * find it. 1387 * 1388 * The algorithm is that the first write request will 1389 * create a cluster, convert the file handle to a 1390 * vnode pointer, and then lock the file for writing. 1391 * This request is not likely to be clustered with 1392 * any others. However, the next request will create 1393 * a new cluster and be blocked in VOP_RWLOCK while 1394 * the first request is being processed. This delay 1395 * will allow more requests to be clustered in this 1396 * second cluster. 1397 */ 1398 mutex_enter(&rfs_async_write_lock); 1399 if (rfs_async_write_head == nlp) 1400 rfs_async_write_head = nlp->next; 1401 else { 1402 lp = rfs_async_write_head; 1403 while (lp->next != nlp) 1404 lp = lp->next; 1405 lp->next = nlp->next; 1406 } 1407 mutex_exit(&rfs_async_write_lock); 1408 1409 /* 1410 * Step through the list of requests in this cluster. 1411 * We need to check permissions to make sure that all 1412 * of the requests have sufficient permission to write 1413 * the file. A cluster can be composed of requests 1414 * from different clients and different users on each 1415 * client. 1416 * 1417 * As a side effect, we also calculate the size of the 1418 * byte range that this cluster encompasses. 1419 */ 1420 rp = nlp->list; 1421 off = rp->wa->wa_offset; 1422 len = (uint_t)0; 1423 do { 1424 if (rdonly(exi, rp->req)) { 1425 rp->ns->ns_status = NFSERR_ROFS; 1426 t_flag = curthread->t_flag & T_WOULDBLOCK; 1427 rp->thread->t_flag |= t_flag; 1428 continue; 1429 } 1430 1431 va.va_mask = AT_UID|AT_MODE; 1432 1433 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1434 1435 if (!error) { 1436 if (crgetuid(rp->cr) != va.va_uid) { 1437 /* 1438 * This is a kludge to allow writes of files 1439 * created with read only permission. The 1440 * owner of the file is always allowed to 1441 * write it. 1442 */ 1443 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1444 } 1445 if (!error && MANDLOCK(vp, va.va_mode)) 1446 error = EACCES; 1447 } 1448 1449 /* 1450 * Check for a conflict with a nbmand-locked region. 1451 */ 1452 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1453 rp->wa->wa_count, 0, NULL)) { 1454 error = EACCES; 1455 } 1456 1457 if (error) { 1458 rp->ns->ns_status = puterrno(error); 1459 t_flag = curthread->t_flag & T_WOULDBLOCK; 1460 rp->thread->t_flag |= t_flag; 1461 continue; 1462 } 1463 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1464 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1465 } while ((rp = rp->list) != NULL); 1466 1467 /* 1468 * Step through the cluster attempting to gather as many 1469 * requests which are contiguous as possible. These 1470 * contiguous requests are handled via one call to VOP_WRITE 1471 * instead of different calls to VOP_WRITE. We also keep 1472 * track of the fact that any data was written. 1473 */ 1474 rp = nlp->list; 1475 data_written = 0; 1476 do { 1477 /* 1478 * Skip any requests which are already marked as having an 1479 * error. 1480 */ 1481 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1482 rp = rp->list; 1483 continue; 1484 } 1485 1486 /* 1487 * Count the number of iovec's which are required 1488 * to handle this set of requests. One iovec is 1489 * needed for each data buffer, whether addressed 1490 * by wa_data or by the b_rptr pointers in the 1491 * mblk chains. 1492 */ 1493 iovcnt = 0; 1494 lrp = rp; 1495 for (;;) { 1496 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1497 iovcnt++; 1498 else { 1499 m = lrp->wa->wa_mblk; 1500 while (m != NULL) { 1501 iovcnt++; 1502 m = m->b_cont; 1503 } 1504 } 1505 if (lrp->list == NULL || 1506 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1507 lrp->wa->wa_offset + lrp->wa->wa_count != 1508 lrp->list->wa->wa_offset) { 1509 lrp = lrp->list; 1510 break; 1511 } 1512 lrp = lrp->list; 1513 } 1514 1515 if (iovcnt <= MAXCLIOVECS) { 1516 #ifdef DEBUG 1517 rfs_write_hits++; 1518 #endif 1519 niovp = iov; 1520 } else { 1521 #ifdef DEBUG 1522 rfs_write_misses++; 1523 #endif 1524 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1525 } 1526 /* 1527 * Put together the scatter/gather iovecs. 1528 */ 1529 iovp = niovp; 1530 trp = rp; 1531 count = 0; 1532 do { 1533 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1534 if (trp->wa->wa_rlist) { 1535 iovp->iov_base = 1536 (char *)((trp->wa->wa_rlist)-> 1537 u.c_daddr3); 1538 iovp->iov_len = trp->wa->wa_count; 1539 } else { 1540 iovp->iov_base = trp->wa->wa_data; 1541 iovp->iov_len = trp->wa->wa_count; 1542 } 1543 iovp++; 1544 } else { 1545 m = trp->wa->wa_mblk; 1546 rcount = trp->wa->wa_count; 1547 while (m != NULL) { 1548 iovp->iov_base = (caddr_t)m->b_rptr; 1549 iovp->iov_len = (m->b_wptr - m->b_rptr); 1550 rcount -= iovp->iov_len; 1551 if (rcount < 0) 1552 iovp->iov_len += rcount; 1553 iovp++; 1554 if (rcount <= 0) 1555 break; 1556 m = m->b_cont; 1557 } 1558 } 1559 count += trp->wa->wa_count; 1560 trp = trp->list; 1561 } while (trp != lrp); 1562 1563 uio.uio_iov = niovp; 1564 uio.uio_iovcnt = iovcnt; 1565 uio.uio_segflg = UIO_SYSSPACE; 1566 uio.uio_extflg = UIO_COPY_DEFAULT; 1567 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1568 uio.uio_resid = count; 1569 /* 1570 * The limit is checked on the client. We 1571 * should allow any size writes here. 1572 */ 1573 uio.uio_llimit = curproc->p_fsz_ctl; 1574 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1575 if (rlimit < (rlim64_t)uio.uio_resid) 1576 uio.uio_resid = (uint_t)rlimit; 1577 1578 /* 1579 * For now we assume no append mode. 1580 */ 1581 1582 /* 1583 * We're changing creds because VM may fault 1584 * and we need the cred of the current 1585 * thread to be used if quota * checking is 1586 * enabled. 1587 */ 1588 savecred = curthread->t_cred; 1589 curthread->t_cred = cr; 1590 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1591 curthread->t_cred = savecred; 1592 1593 /* check if a monitor detected a delegation conflict */ 1594 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1595 /* mark as wouldblock so response is dropped */ 1596 curthread->t_flag |= T_WOULDBLOCK; 1597 1598 if (niovp != iov) 1599 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1600 1601 if (!error) { 1602 data_written = 1; 1603 /* 1604 * Get attributes again so we send the latest mod 1605 * time to the client side for his cache. 1606 */ 1607 va.va_mask = AT_ALL; /* now we want everything */ 1608 1609 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1610 1611 if (!error) 1612 acl_perm(vp, exi, &va, rp->cr); 1613 } 1614 1615 /* 1616 * Fill in the status responses for each request 1617 * which was just handled. Also, copy the latest 1618 * attributes in to the attribute responses if 1619 * appropriate. 1620 */ 1621 t_flag = curthread->t_flag & T_WOULDBLOCK; 1622 do { 1623 rp->thread->t_flag |= t_flag; 1624 /* check for overflows */ 1625 if (!error) { 1626 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1627 } 1628 rp->ns->ns_status = puterrno(error); 1629 rp = rp->list; 1630 } while (rp != lrp); 1631 } while (rp != NULL); 1632 1633 /* 1634 * If any data was written at all, then we need to flush 1635 * the data and metadata to stable storage. 1636 */ 1637 if (data_written) { 1638 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1639 1640 if (!error) { 1641 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1642 } 1643 } 1644 1645 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1646 1647 if (in_crit) 1648 nbl_end_crit(vp); 1649 VN_RELE(vp); 1650 1651 t_flag = curthread->t_flag & T_WOULDBLOCK; 1652 mutex_enter(&rfs_async_write_lock); 1653 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1654 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1655 rp->ns->ns_status = puterrno(error); 1656 rp->thread->t_flag |= t_flag; 1657 } 1658 } 1659 cv_broadcast(&nlp->cv); 1660 mutex_exit(&rfs_async_write_lock); 1661 1662 } 1663 1664 void * 1665 rfs_write_getfh(struct nfswriteargs *wa) 1666 { 1667 return (&wa->wa_fhandle); 1668 } 1669 1670 /* 1671 * Create a file. 1672 * Creates a file with given attributes and returns those attributes 1673 * and an fhandle for the new file. 1674 */ 1675 void 1676 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1677 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1678 { 1679 int error; 1680 int lookuperr; 1681 int in_crit = 0; 1682 struct vattr va; 1683 vnode_t *vp; 1684 vnode_t *realvp; 1685 vnode_t *dvp; 1686 char *name = args->ca_da.da_name; 1687 vnode_t *tvp = NULL; 1688 int mode; 1689 int lookup_ok; 1690 bool_t trunc; 1691 struct sockaddr *ca; 1692 1693 /* 1694 * Disallow NULL paths 1695 */ 1696 if (name == NULL || *name == '\0') { 1697 dr->dr_status = NFSERR_ACCES; 1698 return; 1699 } 1700 1701 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1702 if (dvp == NULL) { 1703 dr->dr_status = NFSERR_STALE; 1704 return; 1705 } 1706 1707 error = sattr_to_vattr(args->ca_sa, &va); 1708 if (error) { 1709 dr->dr_status = puterrno(error); 1710 return; 1711 } 1712 1713 /* 1714 * Must specify the mode. 1715 */ 1716 if (!(va.va_mask & AT_MODE)) { 1717 VN_RELE(dvp); 1718 dr->dr_status = NFSERR_INVAL; 1719 return; 1720 } 1721 1722 /* 1723 * This is a completely gross hack to make mknod 1724 * work over the wire until we can wack the protocol 1725 */ 1726 if ((va.va_mode & IFMT) == IFCHR) { 1727 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1728 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1729 else { 1730 va.va_type = VCHR; 1731 /* 1732 * uncompress the received dev_t 1733 * if the top half is zero indicating a request 1734 * from an `older style' OS. 1735 */ 1736 if ((va.va_size & 0xffff0000) == 0) 1737 va.va_rdev = nfsv2_expdev(va.va_size); 1738 else 1739 va.va_rdev = (dev_t)va.va_size; 1740 } 1741 va.va_mask &= ~AT_SIZE; 1742 } else if ((va.va_mode & IFMT) == IFBLK) { 1743 va.va_type = VBLK; 1744 /* 1745 * uncompress the received dev_t 1746 * if the top half is zero indicating a request 1747 * from an `older style' OS. 1748 */ 1749 if ((va.va_size & 0xffff0000) == 0) 1750 va.va_rdev = nfsv2_expdev(va.va_size); 1751 else 1752 va.va_rdev = (dev_t)va.va_size; 1753 va.va_mask &= ~AT_SIZE; 1754 } else if ((va.va_mode & IFMT) == IFSOCK) { 1755 va.va_type = VSOCK; 1756 } else { 1757 va.va_type = VREG; 1758 } 1759 va.va_mode &= ~IFMT; 1760 va.va_mask |= AT_TYPE; 1761 1762 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1763 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1764 MAXPATHLEN); 1765 if (name == NULL) { 1766 dr->dr_status = puterrno(EINVAL); 1767 return; 1768 } 1769 1770 /* 1771 * Why was the choice made to use VWRITE as the mode to the 1772 * call to VOP_CREATE ? This results in a bug. When a client 1773 * opens a file that already exists and is RDONLY, the second 1774 * open fails with an EACESS because of the mode. 1775 * bug ID 1054648. 1776 */ 1777 lookup_ok = 0; 1778 mode = VWRITE; 1779 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1780 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1781 NULL, NULL, NULL); 1782 if (!error) { 1783 struct vattr at; 1784 1785 lookup_ok = 1; 1786 at.va_mask = AT_MODE; 1787 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1788 if (!error) 1789 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1790 VN_RELE(tvp); 1791 tvp = NULL; 1792 } 1793 } 1794 1795 if (!lookup_ok) { 1796 if (rdonly(exi, req)) { 1797 error = EROFS; 1798 } else if (va.va_type != VREG && va.va_type != VFIFO && 1799 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1800 error = EPERM; 1801 } else { 1802 error = 0; 1803 } 1804 } 1805 1806 /* 1807 * If file size is being modified on an already existing file 1808 * make sure that there are no conflicting non-blocking mandatory 1809 * locks in the region being manipulated. Return EACCES if there 1810 * are conflicting locks. 1811 */ 1812 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1813 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1814 NULL, NULL, NULL); 1815 1816 if (!lookuperr && 1817 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1818 VN_RELE(tvp); 1819 curthread->t_flag |= T_WOULDBLOCK; 1820 goto out; 1821 } 1822 1823 if (!lookuperr && nbl_need_check(tvp)) { 1824 /* 1825 * The file exists. Now check if it has any 1826 * conflicting non-blocking mandatory locks 1827 * in the region being changed. 1828 */ 1829 struct vattr bva; 1830 u_offset_t offset; 1831 ssize_t length; 1832 1833 nbl_start_crit(tvp, RW_READER); 1834 in_crit = 1; 1835 1836 bva.va_mask = AT_SIZE; 1837 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1838 if (!error) { 1839 if (va.va_size < bva.va_size) { 1840 offset = va.va_size; 1841 length = bva.va_size - va.va_size; 1842 } else { 1843 offset = bva.va_size; 1844 length = va.va_size - bva.va_size; 1845 } 1846 if (length) { 1847 if (nbl_conflict(tvp, NBL_WRITE, 1848 offset, length, 0, NULL)) { 1849 error = EACCES; 1850 } 1851 } 1852 } 1853 if (error) { 1854 nbl_end_crit(tvp); 1855 VN_RELE(tvp); 1856 in_crit = 0; 1857 } 1858 } else if (tvp != NULL) { 1859 VN_RELE(tvp); 1860 } 1861 } 1862 1863 if (!error) { 1864 /* 1865 * If filesystem is shared with nosuid the remove any 1866 * setuid/setgid bits on create. 1867 */ 1868 if (va.va_type == VREG && 1869 exi->exi_export.ex_flags & EX_NOSUID) 1870 va.va_mode &= ~(VSUID | VSGID); 1871 1872 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1873 NULL, NULL); 1874 1875 if (!error) { 1876 1877 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1878 trunc = TRUE; 1879 else 1880 trunc = FALSE; 1881 1882 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1883 VN_RELE(vp); 1884 curthread->t_flag |= T_WOULDBLOCK; 1885 goto out; 1886 } 1887 va.va_mask = AT_ALL; 1888 1889 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1890 1891 /* check for overflows */ 1892 if (!error) { 1893 acl_perm(vp, exi, &va, cr); 1894 error = vattr_to_nattr(&va, &dr->dr_attr); 1895 if (!error) { 1896 error = makefh(&dr->dr_fhandle, vp, 1897 exi); 1898 } 1899 } 1900 /* 1901 * Force modified metadata out to stable storage. 1902 * 1903 * if a underlying vp exists, pass it to VOP_FSYNC 1904 */ 1905 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1906 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1907 else 1908 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1909 VN_RELE(vp); 1910 } 1911 1912 if (in_crit) { 1913 nbl_end_crit(tvp); 1914 VN_RELE(tvp); 1915 } 1916 } 1917 1918 /* 1919 * Force modified data and metadata out to stable storage. 1920 */ 1921 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1922 1923 out: 1924 1925 VN_RELE(dvp); 1926 1927 dr->dr_status = puterrno(error); 1928 1929 if (name != args->ca_da.da_name) 1930 kmem_free(name, MAXPATHLEN); 1931 } 1932 void * 1933 rfs_create_getfh(struct nfscreatargs *args) 1934 { 1935 return (args->ca_da.da_fhandle); 1936 } 1937 1938 /* 1939 * Remove a file. 1940 * Remove named file from parent directory. 1941 */ 1942 void 1943 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1944 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1945 { 1946 int error = 0; 1947 vnode_t *vp; 1948 vnode_t *targvp; 1949 int in_crit = 0; 1950 1951 /* 1952 * Disallow NULL paths 1953 */ 1954 if (da->da_name == NULL || *da->da_name == '\0') { 1955 *status = NFSERR_ACCES; 1956 return; 1957 } 1958 1959 vp = nfs_fhtovp(da->da_fhandle, exi); 1960 if (vp == NULL) { 1961 *status = NFSERR_STALE; 1962 return; 1963 } 1964 1965 if (rdonly(exi, req)) { 1966 VN_RELE(vp); 1967 *status = NFSERR_ROFS; 1968 return; 1969 } 1970 1971 /* 1972 * Check for a conflict with a non-blocking mandatory share reservation. 1973 */ 1974 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1975 NULL, cr, NULL, NULL, NULL); 1976 if (error != 0) { 1977 VN_RELE(vp); 1978 *status = puterrno(error); 1979 return; 1980 } 1981 1982 /* 1983 * If the file is delegated to an v4 client, then initiate 1984 * recall and drop this request (by setting T_WOULDBLOCK). 1985 * The client will eventually re-transmit the request and 1986 * (hopefully), by then, the v4 client will have returned 1987 * the delegation. 1988 */ 1989 1990 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1991 VN_RELE(vp); 1992 VN_RELE(targvp); 1993 curthread->t_flag |= T_WOULDBLOCK; 1994 return; 1995 } 1996 1997 if (nbl_need_check(targvp)) { 1998 nbl_start_crit(targvp, RW_READER); 1999 in_crit = 1; 2000 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2001 error = EACCES; 2002 goto out; 2003 } 2004 } 2005 2006 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2007 2008 /* 2009 * Force modified data and metadata out to stable storage. 2010 */ 2011 (void) VOP_FSYNC(vp, 0, cr, NULL); 2012 2013 out: 2014 if (in_crit) 2015 nbl_end_crit(targvp); 2016 VN_RELE(targvp); 2017 VN_RELE(vp); 2018 2019 *status = puterrno(error); 2020 2021 } 2022 2023 void * 2024 rfs_remove_getfh(struct nfsdiropargs *da) 2025 { 2026 return (da->da_fhandle); 2027 } 2028 2029 /* 2030 * rename a file 2031 * Give a file (from) a new name (to). 2032 */ 2033 void 2034 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2035 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2036 { 2037 int error = 0; 2038 vnode_t *fromvp; 2039 vnode_t *tovp; 2040 struct exportinfo *to_exi; 2041 fhandle_t *fh; 2042 vnode_t *srcvp; 2043 vnode_t *targvp; 2044 int in_crit = 0; 2045 2046 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2047 if (fromvp == NULL) { 2048 *status = NFSERR_STALE; 2049 return; 2050 } 2051 2052 fh = args->rna_to.da_fhandle; 2053 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2054 if (to_exi == NULL) { 2055 VN_RELE(fromvp); 2056 *status = NFSERR_ACCES; 2057 return; 2058 } 2059 exi_rele(to_exi); 2060 2061 if (to_exi != exi) { 2062 VN_RELE(fromvp); 2063 *status = NFSERR_XDEV; 2064 return; 2065 } 2066 2067 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2068 if (tovp == NULL) { 2069 VN_RELE(fromvp); 2070 *status = NFSERR_STALE; 2071 return; 2072 } 2073 2074 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2075 VN_RELE(tovp); 2076 VN_RELE(fromvp); 2077 *status = NFSERR_NOTDIR; 2078 return; 2079 } 2080 2081 /* 2082 * Disallow NULL paths 2083 */ 2084 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2085 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2086 VN_RELE(tovp); 2087 VN_RELE(fromvp); 2088 *status = NFSERR_ACCES; 2089 return; 2090 } 2091 2092 if (rdonly(exi, req)) { 2093 VN_RELE(tovp); 2094 VN_RELE(fromvp); 2095 *status = NFSERR_ROFS; 2096 return; 2097 } 2098 2099 /* 2100 * Check for a conflict with a non-blocking mandatory share reservation. 2101 */ 2102 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2103 NULL, cr, NULL, NULL, NULL); 2104 if (error != 0) { 2105 VN_RELE(tovp); 2106 VN_RELE(fromvp); 2107 *status = puterrno(error); 2108 return; 2109 } 2110 2111 /* Check for delegations on the source file */ 2112 2113 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2114 VN_RELE(tovp); 2115 VN_RELE(fromvp); 2116 VN_RELE(srcvp); 2117 curthread->t_flag |= T_WOULDBLOCK; 2118 return; 2119 } 2120 2121 /* Check for delegation on the file being renamed over, if it exists */ 2122 2123 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2124 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2125 NULL, NULL, NULL) == 0) { 2126 2127 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2128 VN_RELE(tovp); 2129 VN_RELE(fromvp); 2130 VN_RELE(srcvp); 2131 VN_RELE(targvp); 2132 curthread->t_flag |= T_WOULDBLOCK; 2133 return; 2134 } 2135 VN_RELE(targvp); 2136 } 2137 2138 2139 if (nbl_need_check(srcvp)) { 2140 nbl_start_crit(srcvp, RW_READER); 2141 in_crit = 1; 2142 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2143 error = EACCES; 2144 goto out; 2145 } 2146 } 2147 2148 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2149 tovp, args->rna_to.da_name, cr, NULL, 0); 2150 2151 if (error == 0) 2152 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2153 strlen(args->rna_to.da_name)); 2154 2155 /* 2156 * Force modified data and metadata out to stable storage. 2157 */ 2158 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2159 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2160 2161 out: 2162 if (in_crit) 2163 nbl_end_crit(srcvp); 2164 VN_RELE(srcvp); 2165 VN_RELE(tovp); 2166 VN_RELE(fromvp); 2167 2168 *status = puterrno(error); 2169 2170 } 2171 void * 2172 rfs_rename_getfh(struct nfsrnmargs *args) 2173 { 2174 return (args->rna_from.da_fhandle); 2175 } 2176 2177 /* 2178 * Link to a file. 2179 * Create a file (to) which is a hard link to the given file (from). 2180 */ 2181 void 2182 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2183 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2184 { 2185 int error; 2186 vnode_t *fromvp; 2187 vnode_t *tovp; 2188 struct exportinfo *to_exi; 2189 fhandle_t *fh; 2190 2191 fromvp = nfs_fhtovp(args->la_from, exi); 2192 if (fromvp == NULL) { 2193 *status = NFSERR_STALE; 2194 return; 2195 } 2196 2197 fh = args->la_to.da_fhandle; 2198 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2199 if (to_exi == NULL) { 2200 VN_RELE(fromvp); 2201 *status = NFSERR_ACCES; 2202 return; 2203 } 2204 exi_rele(to_exi); 2205 2206 if (to_exi != exi) { 2207 VN_RELE(fromvp); 2208 *status = NFSERR_XDEV; 2209 return; 2210 } 2211 2212 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2213 if (tovp == NULL) { 2214 VN_RELE(fromvp); 2215 *status = NFSERR_STALE; 2216 return; 2217 } 2218 2219 if (tovp->v_type != VDIR) { 2220 VN_RELE(tovp); 2221 VN_RELE(fromvp); 2222 *status = NFSERR_NOTDIR; 2223 return; 2224 } 2225 /* 2226 * Disallow NULL paths 2227 */ 2228 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2229 VN_RELE(tovp); 2230 VN_RELE(fromvp); 2231 *status = NFSERR_ACCES; 2232 return; 2233 } 2234 2235 if (rdonly(exi, req)) { 2236 VN_RELE(tovp); 2237 VN_RELE(fromvp); 2238 *status = NFSERR_ROFS; 2239 return; 2240 } 2241 2242 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2243 2244 /* 2245 * Force modified data and metadata out to stable storage. 2246 */ 2247 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2248 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2249 2250 VN_RELE(tovp); 2251 VN_RELE(fromvp); 2252 2253 *status = puterrno(error); 2254 2255 } 2256 void * 2257 rfs_link_getfh(struct nfslinkargs *args) 2258 { 2259 return (args->la_from); 2260 } 2261 2262 /* 2263 * Symbolicly link to a file. 2264 * Create a file (to) with the given attributes which is a symbolic link 2265 * to the given path name (to). 2266 */ 2267 void 2268 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2269 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2270 { 2271 int error; 2272 struct vattr va; 2273 vnode_t *vp; 2274 vnode_t *svp; 2275 int lerror; 2276 struct sockaddr *ca; 2277 char *name = NULL; 2278 2279 /* 2280 * Disallow NULL paths 2281 */ 2282 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2283 *status = NFSERR_ACCES; 2284 return; 2285 } 2286 2287 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2288 if (vp == NULL) { 2289 *status = NFSERR_STALE; 2290 return; 2291 } 2292 2293 if (rdonly(exi, req)) { 2294 VN_RELE(vp); 2295 *status = NFSERR_ROFS; 2296 return; 2297 } 2298 2299 error = sattr_to_vattr(args->sla_sa, &va); 2300 if (error) { 2301 VN_RELE(vp); 2302 *status = puterrno(error); 2303 return; 2304 } 2305 2306 if (!(va.va_mask & AT_MODE)) { 2307 VN_RELE(vp); 2308 *status = NFSERR_INVAL; 2309 return; 2310 } 2311 2312 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2313 name = nfscmd_convname(ca, exi, args->sla_tnm, 2314 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2315 2316 if (name == NULL) { 2317 *status = NFSERR_ACCES; 2318 return; 2319 } 2320 2321 va.va_type = VLNK; 2322 va.va_mask |= AT_TYPE; 2323 2324 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2325 2326 /* 2327 * Force new data and metadata out to stable storage. 2328 */ 2329 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2330 NULL, cr, NULL, NULL, NULL); 2331 2332 if (!lerror) { 2333 (void) VOP_FSYNC(svp, 0, cr, NULL); 2334 VN_RELE(svp); 2335 } 2336 2337 /* 2338 * Force modified data and metadata out to stable storage. 2339 */ 2340 (void) VOP_FSYNC(vp, 0, cr, NULL); 2341 2342 VN_RELE(vp); 2343 2344 *status = puterrno(error); 2345 if (name != args->sla_tnm) 2346 kmem_free(name, MAXPATHLEN); 2347 2348 } 2349 void * 2350 rfs_symlink_getfh(struct nfsslargs *args) 2351 { 2352 return (args->sla_from.da_fhandle); 2353 } 2354 2355 /* 2356 * Make a directory. 2357 * Create a directory with the given name, parent directory, and attributes. 2358 * Returns a file handle and attributes for the new directory. 2359 */ 2360 void 2361 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2362 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2363 { 2364 int error; 2365 struct vattr va; 2366 vnode_t *dvp = NULL; 2367 vnode_t *vp; 2368 char *name = args->ca_da.da_name; 2369 2370 /* 2371 * Disallow NULL paths 2372 */ 2373 if (name == NULL || *name == '\0') { 2374 dr->dr_status = NFSERR_ACCES; 2375 return; 2376 } 2377 2378 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2379 if (vp == NULL) { 2380 dr->dr_status = NFSERR_STALE; 2381 return; 2382 } 2383 2384 if (rdonly(exi, req)) { 2385 VN_RELE(vp); 2386 dr->dr_status = NFSERR_ROFS; 2387 return; 2388 } 2389 2390 error = sattr_to_vattr(args->ca_sa, &va); 2391 if (error) { 2392 VN_RELE(vp); 2393 dr->dr_status = puterrno(error); 2394 return; 2395 } 2396 2397 if (!(va.va_mask & AT_MODE)) { 2398 VN_RELE(vp); 2399 dr->dr_status = NFSERR_INVAL; 2400 return; 2401 } 2402 2403 va.va_type = VDIR; 2404 va.va_mask |= AT_TYPE; 2405 2406 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2407 2408 if (!error) { 2409 /* 2410 * Attribtutes of the newly created directory should 2411 * be returned to the client. 2412 */ 2413 va.va_mask = AT_ALL; /* We want everything */ 2414 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2415 2416 /* check for overflows */ 2417 if (!error) { 2418 acl_perm(vp, exi, &va, cr); 2419 error = vattr_to_nattr(&va, &dr->dr_attr); 2420 if (!error) { 2421 error = makefh(&dr->dr_fhandle, dvp, exi); 2422 } 2423 } 2424 /* 2425 * Force new data and metadata out to stable storage. 2426 */ 2427 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2428 VN_RELE(dvp); 2429 } 2430 2431 /* 2432 * Force modified data and metadata out to stable storage. 2433 */ 2434 (void) VOP_FSYNC(vp, 0, cr, NULL); 2435 2436 VN_RELE(vp); 2437 2438 dr->dr_status = puterrno(error); 2439 2440 } 2441 void * 2442 rfs_mkdir_getfh(struct nfscreatargs *args) 2443 { 2444 return (args->ca_da.da_fhandle); 2445 } 2446 2447 /* 2448 * Remove a directory. 2449 * Remove the given directory name from the given parent directory. 2450 */ 2451 void 2452 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2453 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2454 { 2455 int error; 2456 vnode_t *vp; 2457 2458 2459 /* 2460 * Disallow NULL paths 2461 */ 2462 if (da->da_name == NULL || *da->da_name == '\0') { 2463 *status = NFSERR_ACCES; 2464 return; 2465 } 2466 2467 vp = nfs_fhtovp(da->da_fhandle, exi); 2468 if (vp == NULL) { 2469 *status = NFSERR_STALE; 2470 return; 2471 } 2472 2473 if (rdonly(exi, req)) { 2474 VN_RELE(vp); 2475 *status = NFSERR_ROFS; 2476 return; 2477 } 2478 2479 /* 2480 * VOP_RMDIR now takes a new third argument (the current 2481 * directory of the process). That's because someone 2482 * wants to return EINVAL if one tries to remove ".". 2483 * Of course, NFS servers have no idea what their 2484 * clients' current directories are. We fake it by 2485 * supplying a vnode known to exist and illegal to 2486 * remove. 2487 */ 2488 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2489 2490 /* 2491 * Force modified data and metadata out to stable storage. 2492 */ 2493 (void) VOP_FSYNC(vp, 0, cr, NULL); 2494 2495 VN_RELE(vp); 2496 2497 /* 2498 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2499 * if the directory is not empty. A System V NFS server 2500 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2501 * over the wire. 2502 */ 2503 if (error == EEXIST) 2504 *status = NFSERR_NOTEMPTY; 2505 else 2506 *status = puterrno(error); 2507 2508 } 2509 void * 2510 rfs_rmdir_getfh(struct nfsdiropargs *da) 2511 { 2512 return (da->da_fhandle); 2513 } 2514 2515 /* ARGSUSED */ 2516 void 2517 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2518 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2519 { 2520 int error; 2521 int iseof; 2522 struct iovec iov; 2523 struct uio uio; 2524 vnode_t *vp; 2525 char *ndata = NULL; 2526 struct sockaddr *ca; 2527 size_t nents; 2528 int ret; 2529 2530 vp = nfs_fhtovp(&rda->rda_fh, exi); 2531 if (vp == NULL) { 2532 rd->rd_entries = NULL; 2533 rd->rd_status = NFSERR_STALE; 2534 return; 2535 } 2536 2537 if (vp->v_type != VDIR) { 2538 VN_RELE(vp); 2539 rd->rd_entries = NULL; 2540 rd->rd_status = NFSERR_NOTDIR; 2541 return; 2542 } 2543 2544 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2545 2546 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2547 2548 if (error) { 2549 rd->rd_entries = NULL; 2550 goto bad; 2551 } 2552 2553 if (rda->rda_count == 0) { 2554 rd->rd_entries = NULL; 2555 rd->rd_size = 0; 2556 rd->rd_eof = FALSE; 2557 goto bad; 2558 } 2559 2560 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2561 2562 /* 2563 * Allocate data for entries. This will be freed by rfs_rddirfree. 2564 */ 2565 rd->rd_bufsize = (uint_t)rda->rda_count; 2566 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2567 2568 /* 2569 * Set up io vector to read directory data 2570 */ 2571 iov.iov_base = (caddr_t)rd->rd_entries; 2572 iov.iov_len = rda->rda_count; 2573 uio.uio_iov = &iov; 2574 uio.uio_iovcnt = 1; 2575 uio.uio_segflg = UIO_SYSSPACE; 2576 uio.uio_extflg = UIO_COPY_CACHED; 2577 uio.uio_loffset = (offset_t)rda->rda_offset; 2578 uio.uio_resid = rda->rda_count; 2579 2580 /* 2581 * read directory 2582 */ 2583 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2584 2585 /* 2586 * Clean up 2587 */ 2588 if (!error) { 2589 /* 2590 * set size and eof 2591 */ 2592 if (uio.uio_resid == rda->rda_count) { 2593 rd->rd_size = 0; 2594 rd->rd_eof = TRUE; 2595 } else { 2596 rd->rd_size = (uint32_t)(rda->rda_count - 2597 uio.uio_resid); 2598 rd->rd_eof = iseof ? TRUE : FALSE; 2599 } 2600 } 2601 2602 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2603 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2604 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2605 rda->rda_count, &ndata); 2606 2607 if (ret != 0) { 2608 size_t dropbytes; 2609 /* 2610 * We had to drop one or more entries in order to fit 2611 * during the character conversion. We need to patch 2612 * up the size and eof info. 2613 */ 2614 if (rd->rd_eof) 2615 rd->rd_eof = FALSE; 2616 dropbytes = nfscmd_dropped_entrysize( 2617 (struct dirent64 *)rd->rd_entries, nents, ret); 2618 rd->rd_size -= dropbytes; 2619 } 2620 if (ndata == NULL) { 2621 ndata = (char *)rd->rd_entries; 2622 } else if (ndata != (char *)rd->rd_entries) { 2623 kmem_free(rd->rd_entries, rd->rd_bufsize); 2624 rd->rd_entries = (void *)ndata; 2625 rd->rd_bufsize = rda->rda_count; 2626 } 2627 2628 bad: 2629 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2630 2631 #if 0 /* notyet */ 2632 /* 2633 * Don't do this. It causes local disk writes when just 2634 * reading the file and the overhead is deemed larger 2635 * than the benefit. 2636 */ 2637 /* 2638 * Force modified metadata out to stable storage. 2639 */ 2640 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2641 #endif 2642 2643 VN_RELE(vp); 2644 2645 rd->rd_status = puterrno(error); 2646 2647 } 2648 void * 2649 rfs_readdir_getfh(struct nfsrddirargs *rda) 2650 { 2651 return (&rda->rda_fh); 2652 } 2653 void 2654 rfs_rddirfree(struct nfsrddirres *rd) 2655 { 2656 if (rd->rd_entries != NULL) 2657 kmem_free(rd->rd_entries, rd->rd_bufsize); 2658 } 2659 2660 /* ARGSUSED */ 2661 void 2662 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2663 struct svc_req *req, cred_t *cr) 2664 { 2665 int error; 2666 struct statvfs64 sb; 2667 vnode_t *vp; 2668 2669 vp = nfs_fhtovp(fh, exi); 2670 if (vp == NULL) { 2671 fs->fs_status = NFSERR_STALE; 2672 return; 2673 } 2674 2675 error = VFS_STATVFS(vp->v_vfsp, &sb); 2676 2677 if (!error) { 2678 fs->fs_tsize = nfstsize(); 2679 fs->fs_bsize = sb.f_frsize; 2680 fs->fs_blocks = sb.f_blocks; 2681 fs->fs_bfree = sb.f_bfree; 2682 fs->fs_bavail = sb.f_bavail; 2683 } 2684 2685 VN_RELE(vp); 2686 2687 fs->fs_status = puterrno(error); 2688 2689 } 2690 void * 2691 rfs_statfs_getfh(fhandle_t *fh) 2692 { 2693 return (fh); 2694 } 2695 2696 static int 2697 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2698 { 2699 vap->va_mask = 0; 2700 2701 /* 2702 * There was a sign extension bug in some VFS based systems 2703 * which stored the mode as a short. When it would get 2704 * assigned to a u_long, no sign extension would occur. 2705 * It needed to, but this wasn't noticed because sa_mode 2706 * would then get assigned back to the short, thus ignoring 2707 * the upper 16 bits of sa_mode. 2708 * 2709 * To make this implementation work for both broken 2710 * clients and good clients, we check for both versions 2711 * of the mode. 2712 */ 2713 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2714 sa->sa_mode != (uint32_t)-1) { 2715 vap->va_mask |= AT_MODE; 2716 vap->va_mode = sa->sa_mode; 2717 } 2718 if (sa->sa_uid != (uint32_t)-1) { 2719 vap->va_mask |= AT_UID; 2720 vap->va_uid = sa->sa_uid; 2721 } 2722 if (sa->sa_gid != (uint32_t)-1) { 2723 vap->va_mask |= AT_GID; 2724 vap->va_gid = sa->sa_gid; 2725 } 2726 if (sa->sa_size != (uint32_t)-1) { 2727 vap->va_mask |= AT_SIZE; 2728 vap->va_size = sa->sa_size; 2729 } 2730 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2731 sa->sa_atime.tv_usec != (int32_t)-1) { 2732 #ifndef _LP64 2733 /* return error if time overflow */ 2734 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2735 return (EOVERFLOW); 2736 #endif 2737 vap->va_mask |= AT_ATIME; 2738 /* 2739 * nfs protocol defines times as unsigned so don't extend sign, 2740 * unless sysadmin set nfs_allow_preepoch_time. 2741 */ 2742 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2743 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2744 } 2745 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2746 sa->sa_mtime.tv_usec != (int32_t)-1) { 2747 #ifndef _LP64 2748 /* return error if time overflow */ 2749 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2750 return (EOVERFLOW); 2751 #endif 2752 vap->va_mask |= AT_MTIME; 2753 /* 2754 * nfs protocol defines times as unsigned so don't extend sign, 2755 * unless sysadmin set nfs_allow_preepoch_time. 2756 */ 2757 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2758 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2759 } 2760 return (0); 2761 } 2762 2763 static enum nfsftype vt_to_nf[] = { 2764 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2765 }; 2766 2767 /* 2768 * check the following fields for overflow: nodeid, size, and time. 2769 * There could be a problem when converting 64-bit LP64 fields 2770 * into 32-bit ones. Return an error if there is an overflow. 2771 */ 2772 int 2773 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2774 { 2775 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2776 na->na_type = vt_to_nf[vap->va_type]; 2777 2778 if (vap->va_mode == (unsigned short) -1) 2779 na->na_mode = (uint32_t)-1; 2780 else 2781 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2782 2783 if (vap->va_uid == (unsigned short)(-1)) 2784 na->na_uid = (uint32_t)(-1); 2785 else if (vap->va_uid == UID_NOBODY) 2786 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2787 else 2788 na->na_uid = vap->va_uid; 2789 2790 if (vap->va_gid == (unsigned short)(-1)) 2791 na->na_gid = (uint32_t)-1; 2792 else if (vap->va_gid == GID_NOBODY) 2793 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2794 else 2795 na->na_gid = vap->va_gid; 2796 2797 /* 2798 * Do we need to check fsid for overflow? It is 64-bit in the 2799 * vattr, but are bigger than 32 bit values supported? 2800 */ 2801 na->na_fsid = vap->va_fsid; 2802 2803 na->na_nodeid = vap->va_nodeid; 2804 2805 /* 2806 * Check to make sure that the nodeid is representable over the 2807 * wire without losing bits. 2808 */ 2809 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2810 return (EFBIG); 2811 na->na_nlink = vap->va_nlink; 2812 2813 /* 2814 * Check for big files here, instead of at the caller. See 2815 * comments in cstat for large special file explanation. 2816 */ 2817 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2818 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2819 return (EFBIG); 2820 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2821 /* UNKNOWN_SIZE | OVERFLOW */ 2822 na->na_size = MAXOFF32_T; 2823 } else 2824 na->na_size = vap->va_size; 2825 } else 2826 na->na_size = vap->va_size; 2827 2828 /* 2829 * If the vnode times overflow the 32-bit times that NFS2 2830 * uses on the wire then return an error. 2831 */ 2832 if (!NFS_VAP_TIME_OK(vap)) { 2833 return (EOVERFLOW); 2834 } 2835 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2836 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2837 2838 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2839 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2840 2841 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2842 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2843 2844 /* 2845 * If the dev_t will fit into 16 bits then compress 2846 * it, otherwise leave it alone. See comments in 2847 * nfs_client.c. 2848 */ 2849 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2850 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2851 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2852 else 2853 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2854 2855 na->na_blocks = vap->va_nblocks; 2856 na->na_blocksize = vap->va_blksize; 2857 2858 /* 2859 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2860 * over-the-wire protocols for named-pipe vnodes. It remaps the 2861 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2862 * 2863 * BUYER BEWARE: 2864 * If you are porting the NFS to a non-Sun server, you probably 2865 * don't want to include the following block of code. The 2866 * over-the-wire special file types will be changing with the 2867 * NFS Protocol Revision. 2868 */ 2869 if (vap->va_type == VFIFO) 2870 NA_SETFIFO(na); 2871 return (0); 2872 } 2873 2874 /* 2875 * acl v2 support: returns approximate permission. 2876 * default: returns minimal permission (more restrictive) 2877 * aclok: returns maximal permission (less restrictive) 2878 * This routine changes the permissions that are alaredy in *va. 2879 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2880 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2881 */ 2882 static void 2883 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2884 { 2885 vsecattr_t vsa; 2886 int aclcnt; 2887 aclent_t *aclentp; 2888 mode_t mask_perm; 2889 mode_t grp_perm; 2890 mode_t other_perm; 2891 mode_t other_orig; 2892 int error; 2893 2894 /* dont care default acl */ 2895 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2896 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2897 2898 if (!error) { 2899 aclcnt = vsa.vsa_aclcnt; 2900 if (aclcnt > MIN_ACL_ENTRIES) { 2901 /* non-trivial ACL */ 2902 aclentp = vsa.vsa_aclentp; 2903 if (exi->exi_export.ex_flags & EX_ACLOK) { 2904 /* maximal permissions */ 2905 grp_perm = 0; 2906 other_perm = 0; 2907 for (; aclcnt > 0; aclcnt--, aclentp++) { 2908 switch (aclentp->a_type) { 2909 case USER_OBJ: 2910 break; 2911 case USER: 2912 grp_perm |= 2913 aclentp->a_perm << 3; 2914 other_perm |= aclentp->a_perm; 2915 break; 2916 case GROUP_OBJ: 2917 grp_perm |= 2918 aclentp->a_perm << 3; 2919 break; 2920 case GROUP: 2921 other_perm |= aclentp->a_perm; 2922 break; 2923 case OTHER_OBJ: 2924 other_orig = aclentp->a_perm; 2925 break; 2926 case CLASS_OBJ: 2927 mask_perm = aclentp->a_perm; 2928 break; 2929 default: 2930 break; 2931 } 2932 } 2933 grp_perm &= mask_perm << 3; 2934 other_perm &= mask_perm; 2935 other_perm |= other_orig; 2936 2937 } else { 2938 /* minimal permissions */ 2939 grp_perm = 070; 2940 other_perm = 07; 2941 for (; aclcnt > 0; aclcnt--, aclentp++) { 2942 switch (aclentp->a_type) { 2943 case USER_OBJ: 2944 break; 2945 case USER: 2946 case CLASS_OBJ: 2947 grp_perm &= 2948 aclentp->a_perm << 3; 2949 other_perm &= 2950 aclentp->a_perm; 2951 break; 2952 case GROUP_OBJ: 2953 grp_perm &= 2954 aclentp->a_perm << 3; 2955 break; 2956 case GROUP: 2957 other_perm &= 2958 aclentp->a_perm; 2959 break; 2960 case OTHER_OBJ: 2961 other_perm &= 2962 aclentp->a_perm; 2963 break; 2964 default: 2965 break; 2966 } 2967 } 2968 } 2969 /* copy to va */ 2970 va->va_mode &= ~077; 2971 va->va_mode |= grp_perm | other_perm; 2972 } 2973 if (vsa.vsa_aclcnt) 2974 kmem_free(vsa.vsa_aclentp, 2975 vsa.vsa_aclcnt * sizeof (aclent_t)); 2976 } 2977 } 2978 2979 void 2980 rfs_srvrinit(void) 2981 { 2982 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2983 nfs2_srv_caller_id = fs_new_caller_id(); 2984 } 2985 2986 void 2987 rfs_srvrfini(void) 2988 { 2989 mutex_destroy(&rfs_async_write_lock); 2990 } 2991 2992 static int 2993 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2994 { 2995 struct clist *wcl; 2996 int wlist_len; 2997 uint32_t count = rr->rr_count; 2998 2999 wcl = ra->ra_wlist; 3000 3001 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3002 return (FALSE); 3003 } 3004 3005 wcl = ra->ra_wlist; 3006 rr->rr_ok.rrok_wlist_len = wlist_len; 3007 rr->rr_ok.rrok_wlist = wcl; 3008 3009 return (TRUE); 3010 } 3011