1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr, bool_t ro) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 /* Lie about the object type for a referral */ 121 if (vn_is_nfs_reparse(vp, cr)) 122 va.va_type = VLNK; 123 124 acl_perm(vp, exi, &va, cr); 125 error = vattr_to_nattr(&va, &ns->ns_attr); 126 } 127 128 VN_RELE(vp); 129 130 ns->ns_status = puterrno(error); 131 } 132 void * 133 rfs_getattr_getfh(fhandle_t *fhp) 134 { 135 return (fhp); 136 } 137 138 /* 139 * Set file attributes. 140 * Sets the attributes of the file with the given fhandle. Returns 141 * the new attributes. 142 */ 143 /* ARGSUSED */ 144 void 145 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 146 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 147 { 148 int error; 149 int flag; 150 int in_crit = 0; 151 vnode_t *vp; 152 struct vattr va; 153 struct vattr bva; 154 struct flock64 bf; 155 caller_context_t ct; 156 157 158 vp = nfs_fhtovp(&args->saa_fh, exi); 159 if (vp == NULL) { 160 ns->ns_status = NFSERR_STALE; 161 return; 162 } 163 164 if (rdonly(ro, vp)) { 165 VN_RELE(vp); 166 ns->ns_status = NFSERR_ROFS; 167 return; 168 } 169 170 error = sattr_to_vattr(&args->saa_sa, &va); 171 if (error) { 172 VN_RELE(vp); 173 ns->ns_status = puterrno(error); 174 return; 175 } 176 177 /* 178 * If the client is requesting a change to the mtime, 179 * but the nanosecond field is set to 1 billion, then 180 * this is a flag to the server that it should set the 181 * atime and mtime fields to the server's current time. 182 * The 1 billion number actually came from the client 183 * as 1 million, but the units in the over the wire 184 * request are microseconds instead of nanoseconds. 185 * 186 * This is an overload of the protocol and should be 187 * documented in the NFS Version 2 protocol specification. 188 */ 189 if (va.va_mask & AT_MTIME) { 190 if (va.va_mtime.tv_nsec == 1000000000) { 191 gethrestime(&va.va_mtime); 192 va.va_atime = va.va_mtime; 193 va.va_mask |= AT_ATIME; 194 flag = 0; 195 } else 196 flag = ATTR_UTIME; 197 } else 198 flag = 0; 199 200 /* 201 * If the filesystem is exported with nosuid, then mask off 202 * the setuid and setgid bits. 203 */ 204 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 205 (exi->exi_export.ex_flags & EX_NOSUID)) 206 va.va_mode &= ~(VSUID | VSGID); 207 208 ct.cc_sysid = 0; 209 ct.cc_pid = 0; 210 ct.cc_caller_id = nfs2_srv_caller_id; 211 ct.cc_flags = CC_DONTBLOCK; 212 213 /* 214 * We need to specially handle size changes because it is 215 * possible for the client to create a file with modes 216 * which indicate read-only, but with the file opened for 217 * writing. If the client then tries to set the size of 218 * the file, then the normal access checking done in 219 * VOP_SETATTR would prevent the client from doing so, 220 * although it should be legal for it to do so. To get 221 * around this, we do the access checking for ourselves 222 * and then use VOP_SPACE which doesn't do the access 223 * checking which VOP_SETATTR does. VOP_SPACE can only 224 * operate on VREG files, let VOP_SETATTR handle the other 225 * extremely rare cases. 226 * Also the client should not be allowed to change the 227 * size of the file if there is a conflicting non-blocking 228 * mandatory lock in the region of change. 229 */ 230 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 231 if (nbl_need_check(vp)) { 232 nbl_start_crit(vp, RW_READER); 233 in_crit = 1; 234 } 235 236 bva.va_mask = AT_UID | AT_SIZE; 237 238 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 239 240 if (error) { 241 if (in_crit) 242 nbl_end_crit(vp); 243 VN_RELE(vp); 244 ns->ns_status = puterrno(error); 245 return; 246 } 247 248 if (in_crit) { 249 u_offset_t offset; 250 ssize_t length; 251 252 if (va.va_size < bva.va_size) { 253 offset = va.va_size; 254 length = bva.va_size - va.va_size; 255 } else { 256 offset = bva.va_size; 257 length = va.va_size - bva.va_size; 258 } 259 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 260 NULL)) { 261 error = EACCES; 262 } 263 } 264 265 if (crgetuid(cr) == bva.va_uid && !error && 266 va.va_size != bva.va_size) { 267 va.va_mask &= ~AT_SIZE; 268 bf.l_type = F_WRLCK; 269 bf.l_whence = 0; 270 bf.l_start = (off64_t)va.va_size; 271 bf.l_len = 0; 272 bf.l_sysid = 0; 273 bf.l_pid = 0; 274 275 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 276 (offset_t)va.va_size, cr, &ct); 277 } 278 if (in_crit) 279 nbl_end_crit(vp); 280 } else 281 error = 0; 282 283 /* 284 * Do the setattr. 285 */ 286 if (!error && va.va_mask) { 287 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 288 } 289 290 /* 291 * check if the monitor on either vop_space or vop_setattr detected 292 * a delegation conflict and if so, mark the thread flag as 293 * wouldblock so that the response is dropped and the client will 294 * try again. 295 */ 296 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 297 VN_RELE(vp); 298 curthread->t_flag |= T_WOULDBLOCK; 299 return; 300 } 301 302 if (!error) { 303 va.va_mask = AT_ALL; /* get everything */ 304 305 error = rfs4_delegated_getattr(vp, &va, 0, cr); 306 307 /* check for overflows */ 308 if (!error) { 309 acl_perm(vp, exi, &va, cr); 310 error = vattr_to_nattr(&va, &ns->ns_attr); 311 } 312 } 313 314 ct.cc_flags = 0; 315 316 /* 317 * Force modified metadata out to stable storage. 318 */ 319 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 320 321 VN_RELE(vp); 322 323 ns->ns_status = puterrno(error); 324 } 325 void * 326 rfs_setattr_getfh(struct nfssaargs *args) 327 { 328 return (&args->saa_fh); 329 } 330 331 /* 332 * Directory lookup. 333 * Returns an fhandle and file attributes for file name in a directory. 334 */ 335 /* ARGSUSED */ 336 void 337 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 338 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 339 { 340 int error; 341 vnode_t *dvp; 342 vnode_t *vp; 343 struct vattr va; 344 fhandle_t *fhp = da->da_fhandle; 345 struct sec_ol sec = {0, 0}; 346 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 347 char *name; 348 struct sockaddr *ca; 349 350 /* 351 * Trusted Extension doesn't support NFSv2. MOUNT 352 * will reject v2 clients. Need to prevent v2 client 353 * access via WebNFS here. 354 */ 355 if (is_system_labeled() && req->rq_vers == 2) { 356 dr->dr_status = NFSERR_ACCES; 357 return; 358 } 359 360 /* 361 * Disallow NULL paths 362 */ 363 if (da->da_name == NULL || *da->da_name == '\0') { 364 dr->dr_status = NFSERR_ACCES; 365 return; 366 } 367 368 /* 369 * Allow lookups from the root - the default 370 * location of the public filehandle. 371 */ 372 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 373 dvp = rootdir; 374 VN_HOLD(dvp); 375 } else { 376 dvp = nfs_fhtovp(fhp, exi); 377 if (dvp == NULL) { 378 dr->dr_status = NFSERR_STALE; 379 return; 380 } 381 } 382 383 /* 384 * Not allow lookup beyond root. 385 * If the filehandle matches a filehandle of the exi, 386 * then the ".." refers beyond the root of an exported filesystem. 387 */ 388 if (strcmp(da->da_name, "..") == 0 && 389 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 390 VN_RELE(dvp); 391 dr->dr_status = NFSERR_NOENT; 392 return; 393 } 394 395 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 396 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 397 MAXPATHLEN); 398 399 if (name == NULL) { 400 dr->dr_status = NFSERR_ACCES; 401 return; 402 } 403 404 /* 405 * If the public filehandle is used then allow 406 * a multi-component lookup, i.e. evaluate 407 * a pathname and follow symbolic links if 408 * necessary. 409 * 410 * This may result in a vnode in another filesystem 411 * which is OK as long as the filesystem is exported. 412 */ 413 if (PUBLIC_FH2(fhp)) { 414 publicfh_flag = TRUE; 415 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 416 &sec); 417 } else { 418 /* 419 * Do a normal single component lookup. 420 */ 421 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 422 NULL, NULL, NULL); 423 } 424 425 if (name != da->da_name) 426 kmem_free(name, MAXPATHLEN); 427 428 429 if (!error) { 430 va.va_mask = AT_ALL; /* we want everything */ 431 432 error = rfs4_delegated_getattr(vp, &va, 0, cr); 433 434 /* check for overflows */ 435 if (!error) { 436 acl_perm(vp, exi, &va, cr); 437 error = vattr_to_nattr(&va, &dr->dr_attr); 438 if (!error) { 439 if (sec.sec_flags & SEC_QUERY) 440 error = makefh_ol(&dr->dr_fhandle, exi, 441 sec.sec_index); 442 else { 443 error = makefh(&dr->dr_fhandle, vp, 444 exi); 445 if (!error && publicfh_flag && 446 !chk_clnt_sec(exi, req)) 447 auth_weak = TRUE; 448 } 449 } 450 } 451 VN_RELE(vp); 452 } 453 454 VN_RELE(dvp); 455 456 /* 457 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 458 * and have obtained a new exportinfo in exi which needs to be 459 * released. Note the the original exportinfo pointed to by exi 460 * will be released by the caller, comon_dispatch. 461 */ 462 if (publicfh_flag && exi != NULL) 463 exi_rele(exi); 464 465 /* 466 * If it's public fh, no 0x81, and client's flavor is 467 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 468 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 469 */ 470 if (auth_weak) 471 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 472 else 473 dr->dr_status = puterrno(error); 474 } 475 void * 476 rfs_lookup_getfh(struct nfsdiropargs *da) 477 { 478 return (da->da_fhandle); 479 } 480 481 /* 482 * Read symbolic link. 483 * Returns the string in the symbolic link at the given fhandle. 484 */ 485 /* ARGSUSED */ 486 void 487 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 488 struct svc_req *req, cred_t *cr, bool_t ro) 489 { 490 int error; 491 struct iovec iov; 492 struct uio uio; 493 vnode_t *vp; 494 struct vattr va; 495 struct sockaddr *ca; 496 char *name = NULL; 497 int is_referral = 0; 498 499 vp = nfs_fhtovp(fhp, exi); 500 if (vp == NULL) { 501 rl->rl_data = NULL; 502 rl->rl_status = NFSERR_STALE; 503 return; 504 } 505 506 va.va_mask = AT_MODE; 507 508 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 509 510 if (error) { 511 VN_RELE(vp); 512 rl->rl_data = NULL; 513 rl->rl_status = puterrno(error); 514 return; 515 } 516 517 if (MANDLOCK(vp, va.va_mode)) { 518 VN_RELE(vp); 519 rl->rl_data = NULL; 520 rl->rl_status = NFSERR_ACCES; 521 return; 522 } 523 524 /* We lied about the object type for a referral */ 525 if (vn_is_nfs_reparse(vp, cr)) 526 is_referral = 1; 527 528 /* 529 * XNFS and RFC1094 require us to return ENXIO if argument 530 * is not a link. BUGID 1138002. 531 */ 532 if (vp->v_type != VLNK && !is_referral) { 533 VN_RELE(vp); 534 rl->rl_data = NULL; 535 rl->rl_status = NFSERR_NXIO; 536 return; 537 } 538 539 /* 540 * Allocate data for pathname. This will be freed by rfs_rlfree. 541 */ 542 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 543 544 if (is_referral) { 545 char *s; 546 size_t strsz; 547 548 /* Get an artificial symlink based on a referral */ 549 s = build_symlink(vp, cr, &strsz); 550 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 551 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 552 vnode_t *, vp, char *, s); 553 if (s == NULL) 554 error = EINVAL; 555 else { 556 error = 0; 557 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 558 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 559 kmem_free(s, strsz); 560 } 561 562 } else { 563 564 /* 565 * Set up io vector to read sym link data 566 */ 567 iov.iov_base = rl->rl_data; 568 iov.iov_len = NFS_MAXPATHLEN; 569 uio.uio_iov = &iov; 570 uio.uio_iovcnt = 1; 571 uio.uio_segflg = UIO_SYSSPACE; 572 uio.uio_extflg = UIO_COPY_CACHED; 573 uio.uio_loffset = (offset_t)0; 574 uio.uio_resid = NFS_MAXPATHLEN; 575 576 /* 577 * Do the readlink. 578 */ 579 error = VOP_READLINK(vp, &uio, cr, NULL); 580 581 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 582 583 if (!error) 584 rl->rl_data[rl->rl_count] = '\0'; 585 586 } 587 588 589 VN_RELE(vp); 590 591 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 592 name = nfscmd_convname(ca, exi, rl->rl_data, 593 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 594 595 if (name != NULL && name != rl->rl_data) { 596 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 597 rl->rl_data = name; 598 } 599 600 /* 601 * XNFS and RFC1094 require us to return ENXIO if argument 602 * is not a link. UFS returns EINVAL if this is the case, 603 * so we do the mapping here. BUGID 1138002. 604 */ 605 if (error == EINVAL) 606 rl->rl_status = NFSERR_NXIO; 607 else 608 rl->rl_status = puterrno(error); 609 610 } 611 void * 612 rfs_readlink_getfh(fhandle_t *fhp) 613 { 614 return (fhp); 615 } 616 /* 617 * Free data allocated by rfs_readlink 618 */ 619 void 620 rfs_rlfree(struct nfsrdlnres *rl) 621 { 622 if (rl->rl_data != NULL) 623 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 624 } 625 626 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 627 628 /* 629 * Read data. 630 * Returns some data read from the file at the given fhandle. 631 */ 632 /* ARGSUSED */ 633 void 634 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 635 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 636 { 637 vnode_t *vp; 638 int error; 639 struct vattr va; 640 struct iovec iov; 641 struct uio uio; 642 mblk_t *mp; 643 int alloc_err = 0; 644 int in_crit = 0; 645 caller_context_t ct; 646 647 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 648 if (vp == NULL) { 649 rr->rr_data = NULL; 650 rr->rr_status = NFSERR_STALE; 651 return; 652 } 653 654 if (vp->v_type != VREG) { 655 VN_RELE(vp); 656 rr->rr_data = NULL; 657 rr->rr_status = NFSERR_ISDIR; 658 return; 659 } 660 661 ct.cc_sysid = 0; 662 ct.cc_pid = 0; 663 ct.cc_caller_id = nfs2_srv_caller_id; 664 ct.cc_flags = CC_DONTBLOCK; 665 666 /* 667 * Enter the critical region before calling VOP_RWLOCK 668 * to avoid a deadlock with write requests. 669 */ 670 if (nbl_need_check(vp)) { 671 nbl_start_crit(vp, RW_READER); 672 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 673 0, NULL)) { 674 nbl_end_crit(vp); 675 VN_RELE(vp); 676 rr->rr_data = NULL; 677 rr->rr_status = NFSERR_ACCES; 678 return; 679 } 680 in_crit = 1; 681 } 682 683 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 684 685 /* check if a monitor detected a delegation conflict */ 686 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 687 VN_RELE(vp); 688 /* mark as wouldblock so response is dropped */ 689 curthread->t_flag |= T_WOULDBLOCK; 690 691 rr->rr_data = NULL; 692 return; 693 } 694 695 va.va_mask = AT_ALL; 696 697 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 698 699 if (error) { 700 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 701 if (in_crit) 702 nbl_end_crit(vp); 703 704 VN_RELE(vp); 705 rr->rr_data = NULL; 706 rr->rr_status = puterrno(error); 707 708 return; 709 } 710 711 /* 712 * This is a kludge to allow reading of files created 713 * with no read permission. The owner of the file 714 * is always allowed to read it. 715 */ 716 if (crgetuid(cr) != va.va_uid) { 717 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 718 719 if (error) { 720 /* 721 * Exec is the same as read over the net because 722 * of demand loading. 723 */ 724 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 725 } 726 if (error) { 727 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 728 if (in_crit) 729 nbl_end_crit(vp); 730 VN_RELE(vp); 731 rr->rr_data = NULL; 732 rr->rr_status = puterrno(error); 733 734 return; 735 } 736 } 737 738 if (MANDLOCK(vp, va.va_mode)) { 739 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 740 if (in_crit) 741 nbl_end_crit(vp); 742 743 VN_RELE(vp); 744 rr->rr_data = NULL; 745 rr->rr_status = NFSERR_ACCES; 746 747 return; 748 } 749 750 rr->rr_ok.rrok_wlist_len = 0; 751 rr->rr_ok.rrok_wlist = NULL; 752 753 if ((u_offset_t)ra->ra_offset >= va.va_size) { 754 rr->rr_count = 0; 755 rr->rr_data = NULL; 756 /* 757 * In this case, status is NFS_OK, but there is no data 758 * to encode. So set rr_mp to NULL. 759 */ 760 rr->rr_mp = NULL; 761 rr->rr_ok.rrok_wlist = ra->ra_wlist; 762 if (rr->rr_ok.rrok_wlist) 763 clist_zero_len(rr->rr_ok.rrok_wlist); 764 goto done; 765 } 766 767 if (ra->ra_wlist) { 768 mp = NULL; 769 rr->rr_mp = NULL; 770 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 771 if (ra->ra_count > iov.iov_len) { 772 rr->rr_data = NULL; 773 rr->rr_status = NFSERR_INVAL; 774 goto done; 775 } 776 } else { 777 /* 778 * mp will contain the data to be sent out in the read reply. 779 * This will be freed after the reply has been sent out (by the 780 * driver). 781 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 782 * that the call to xdrmblk_putmblk() never fails. 783 */ 784 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 785 &alloc_err); 786 ASSERT(mp != NULL); 787 ASSERT(alloc_err == 0); 788 789 rr->rr_mp = mp; 790 791 /* 792 * Set up io vector 793 */ 794 iov.iov_base = (caddr_t)mp->b_datap->db_base; 795 iov.iov_len = ra->ra_count; 796 } 797 798 uio.uio_iov = &iov; 799 uio.uio_iovcnt = 1; 800 uio.uio_segflg = UIO_SYSSPACE; 801 uio.uio_extflg = UIO_COPY_CACHED; 802 uio.uio_loffset = (offset_t)ra->ra_offset; 803 uio.uio_resid = ra->ra_count; 804 805 error = VOP_READ(vp, &uio, 0, cr, &ct); 806 807 if (error) { 808 if (mp) 809 freeb(mp); 810 811 /* 812 * check if a monitor detected a delegation conflict and 813 * mark as wouldblock so response is dropped 814 */ 815 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 816 curthread->t_flag |= T_WOULDBLOCK; 817 else 818 rr->rr_status = puterrno(error); 819 820 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 821 if (in_crit) 822 nbl_end_crit(vp); 823 824 VN_RELE(vp); 825 rr->rr_data = NULL; 826 827 return; 828 } 829 830 /* 831 * Get attributes again so we can send the latest access 832 * time to the client side for his cache. 833 */ 834 va.va_mask = AT_ALL; 835 836 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 837 838 if (error) { 839 if (mp) 840 freeb(mp); 841 842 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 843 if (in_crit) 844 nbl_end_crit(vp); 845 846 VN_RELE(vp); 847 rr->rr_data = NULL; 848 rr->rr_status = puterrno(error); 849 850 return; 851 } 852 853 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 854 855 if (mp) { 856 rr->rr_data = (char *)mp->b_datap->db_base; 857 } else { 858 if (ra->ra_wlist) { 859 rr->rr_data = (caddr_t)iov.iov_base; 860 if (!rdma_setup_read_data2(ra, rr)) { 861 rr->rr_data = NULL; 862 rr->rr_status = puterrno(NFSERR_INVAL); 863 } 864 } 865 } 866 done: 867 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 868 if (in_crit) 869 nbl_end_crit(vp); 870 871 acl_perm(vp, exi, &va, cr); 872 873 /* check for overflows */ 874 error = vattr_to_nattr(&va, &rr->rr_attr); 875 876 VN_RELE(vp); 877 878 rr->rr_status = puterrno(error); 879 } 880 881 /* 882 * Free data allocated by rfs_read 883 */ 884 void 885 rfs_rdfree(struct nfsrdresult *rr) 886 { 887 mblk_t *mp; 888 889 if (rr->rr_status == NFS_OK) { 890 mp = rr->rr_mp; 891 if (mp != NULL) 892 freeb(mp); 893 } 894 } 895 896 void * 897 rfs_read_getfh(struct nfsreadargs *ra) 898 { 899 return (&ra->ra_fhandle); 900 } 901 902 #define MAX_IOVECS 12 903 904 #ifdef DEBUG 905 static int rfs_write_sync_hits = 0; 906 static int rfs_write_sync_misses = 0; 907 #endif 908 909 /* 910 * Write data to file. 911 * Returns attributes of a file after writing some data to it. 912 * 913 * Any changes made here, especially in error handling might have 914 * to also be done in rfs_write (which clusters write requests). 915 */ 916 /* ARGSUSED */ 917 void 918 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 919 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 920 { 921 int error; 922 vnode_t *vp; 923 rlim64_t rlimit; 924 struct vattr va; 925 struct uio uio; 926 struct iovec iov[MAX_IOVECS]; 927 mblk_t *m; 928 struct iovec *iovp; 929 int iovcnt; 930 cred_t *savecred; 931 int in_crit = 0; 932 caller_context_t ct; 933 934 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 935 if (vp == NULL) { 936 ns->ns_status = NFSERR_STALE; 937 return; 938 } 939 940 if (rdonly(ro, vp)) { 941 VN_RELE(vp); 942 ns->ns_status = NFSERR_ROFS; 943 return; 944 } 945 946 if (vp->v_type != VREG) { 947 VN_RELE(vp); 948 ns->ns_status = NFSERR_ISDIR; 949 return; 950 } 951 952 ct.cc_sysid = 0; 953 ct.cc_pid = 0; 954 ct.cc_caller_id = nfs2_srv_caller_id; 955 ct.cc_flags = CC_DONTBLOCK; 956 957 va.va_mask = AT_UID|AT_MODE; 958 959 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 960 961 if (error) { 962 VN_RELE(vp); 963 ns->ns_status = puterrno(error); 964 965 return; 966 } 967 968 if (crgetuid(cr) != va.va_uid) { 969 /* 970 * This is a kludge to allow writes of files created 971 * with read only permission. The owner of the file 972 * is always allowed to write it. 973 */ 974 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 975 976 if (error) { 977 VN_RELE(vp); 978 ns->ns_status = puterrno(error); 979 return; 980 } 981 } 982 983 /* 984 * Can't access a mandatory lock file. This might cause 985 * the NFS service thread to block forever waiting for a 986 * lock to be released that will never be released. 987 */ 988 if (MANDLOCK(vp, va.va_mode)) { 989 VN_RELE(vp); 990 ns->ns_status = NFSERR_ACCES; 991 return; 992 } 993 994 /* 995 * We have to enter the critical region before calling VOP_RWLOCK 996 * to avoid a deadlock with ufs. 997 */ 998 if (nbl_need_check(vp)) { 999 nbl_start_crit(vp, RW_READER); 1000 in_crit = 1; 1001 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1002 wa->wa_count, 0, NULL)) { 1003 error = EACCES; 1004 goto out; 1005 } 1006 } 1007 1008 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1009 1010 /* check if a monitor detected a delegation conflict */ 1011 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1012 VN_RELE(vp); 1013 /* mark as wouldblock so response is dropped */ 1014 curthread->t_flag |= T_WOULDBLOCK; 1015 return; 1016 } 1017 1018 if (wa->wa_data || wa->wa_rlist) { 1019 /* Do the RDMA thing if necessary */ 1020 if (wa->wa_rlist) { 1021 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1022 iov[0].iov_len = wa->wa_count; 1023 } else { 1024 iov[0].iov_base = wa->wa_data; 1025 iov[0].iov_len = wa->wa_count; 1026 } 1027 uio.uio_iov = iov; 1028 uio.uio_iovcnt = 1; 1029 uio.uio_segflg = UIO_SYSSPACE; 1030 uio.uio_extflg = UIO_COPY_DEFAULT; 1031 uio.uio_loffset = (offset_t)wa->wa_offset; 1032 uio.uio_resid = wa->wa_count; 1033 /* 1034 * The limit is checked on the client. We 1035 * should allow any size writes here. 1036 */ 1037 uio.uio_llimit = curproc->p_fsz_ctl; 1038 rlimit = uio.uio_llimit - wa->wa_offset; 1039 if (rlimit < (rlim64_t)uio.uio_resid) 1040 uio.uio_resid = (uint_t)rlimit; 1041 1042 /* 1043 * for now we assume no append mode 1044 */ 1045 /* 1046 * We're changing creds because VM may fault and we need 1047 * the cred of the current thread to be used if quota 1048 * checking is enabled. 1049 */ 1050 savecred = curthread->t_cred; 1051 curthread->t_cred = cr; 1052 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1053 curthread->t_cred = savecred; 1054 } else { 1055 iovcnt = 0; 1056 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1057 iovcnt++; 1058 if (iovcnt <= MAX_IOVECS) { 1059 #ifdef DEBUG 1060 rfs_write_sync_hits++; 1061 #endif 1062 iovp = iov; 1063 } else { 1064 #ifdef DEBUG 1065 rfs_write_sync_misses++; 1066 #endif 1067 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1068 } 1069 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1070 uio.uio_iov = iovp; 1071 uio.uio_iovcnt = iovcnt; 1072 uio.uio_segflg = UIO_SYSSPACE; 1073 uio.uio_extflg = UIO_COPY_DEFAULT; 1074 uio.uio_loffset = (offset_t)wa->wa_offset; 1075 uio.uio_resid = wa->wa_count; 1076 /* 1077 * The limit is checked on the client. We 1078 * should allow any size writes here. 1079 */ 1080 uio.uio_llimit = curproc->p_fsz_ctl; 1081 rlimit = uio.uio_llimit - wa->wa_offset; 1082 if (rlimit < (rlim64_t)uio.uio_resid) 1083 uio.uio_resid = (uint_t)rlimit; 1084 1085 /* 1086 * For now we assume no append mode. 1087 */ 1088 /* 1089 * We're changing creds because VM may fault and we need 1090 * the cred of the current thread to be used if quota 1091 * checking is enabled. 1092 */ 1093 savecred = curthread->t_cred; 1094 curthread->t_cred = cr; 1095 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1096 curthread->t_cred = savecred; 1097 1098 if (iovp != iov) 1099 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1100 } 1101 1102 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1103 1104 if (!error) { 1105 /* 1106 * Get attributes again so we send the latest mod 1107 * time to the client side for his cache. 1108 */ 1109 va.va_mask = AT_ALL; /* now we want everything */ 1110 1111 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1112 1113 /* check for overflows */ 1114 if (!error) { 1115 acl_perm(vp, exi, &va, cr); 1116 error = vattr_to_nattr(&va, &ns->ns_attr); 1117 } 1118 } 1119 1120 out: 1121 if (in_crit) 1122 nbl_end_crit(vp); 1123 VN_RELE(vp); 1124 1125 /* check if a monitor detected a delegation conflict */ 1126 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1127 /* mark as wouldblock so response is dropped */ 1128 curthread->t_flag |= T_WOULDBLOCK; 1129 else 1130 ns->ns_status = puterrno(error); 1131 1132 } 1133 1134 struct rfs_async_write { 1135 struct nfswriteargs *wa; 1136 struct nfsattrstat *ns; 1137 struct svc_req *req; 1138 cred_t *cr; 1139 bool_t ro; 1140 kthread_t *thread; 1141 struct rfs_async_write *list; 1142 }; 1143 1144 struct rfs_async_write_list { 1145 fhandle_t *fhp; 1146 kcondvar_t cv; 1147 struct rfs_async_write *list; 1148 struct rfs_async_write_list *next; 1149 }; 1150 1151 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1152 static kmutex_t rfs_async_write_lock; 1153 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1154 1155 #define MAXCLIOVECS 42 1156 #define RFSWRITE_INITVAL (enum nfsstat) -1 1157 1158 #ifdef DEBUG 1159 static int rfs_write_hits = 0; 1160 static int rfs_write_misses = 0; 1161 #endif 1162 1163 /* 1164 * Write data to file. 1165 * Returns attributes of a file after writing some data to it. 1166 */ 1167 void 1168 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1169 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1170 { 1171 int error; 1172 vnode_t *vp; 1173 rlim64_t rlimit; 1174 struct vattr va; 1175 struct uio uio; 1176 struct rfs_async_write_list *lp; 1177 struct rfs_async_write_list *nlp; 1178 struct rfs_async_write *rp; 1179 struct rfs_async_write *nrp; 1180 struct rfs_async_write *trp; 1181 struct rfs_async_write *lrp; 1182 int data_written; 1183 int iovcnt; 1184 mblk_t *m; 1185 struct iovec *iovp; 1186 struct iovec *niovp; 1187 struct iovec iov[MAXCLIOVECS]; 1188 int count; 1189 int rcount; 1190 uint_t off; 1191 uint_t len; 1192 struct rfs_async_write nrpsp; 1193 struct rfs_async_write_list nlpsp; 1194 ushort_t t_flag; 1195 cred_t *savecred; 1196 int in_crit = 0; 1197 caller_context_t ct; 1198 1199 if (!rfs_write_async) { 1200 rfs_write_sync(wa, ns, exi, req, cr, ro); 1201 return; 1202 } 1203 1204 /* 1205 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1206 * is considered an OK. 1207 */ 1208 ns->ns_status = RFSWRITE_INITVAL; 1209 1210 nrp = &nrpsp; 1211 nrp->wa = wa; 1212 nrp->ns = ns; 1213 nrp->req = req; 1214 nrp->cr = cr; 1215 nrp->ro = ro; 1216 nrp->thread = curthread; 1217 1218 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1219 1220 /* 1221 * Look to see if there is already a cluster started 1222 * for this file. 1223 */ 1224 mutex_enter(&rfs_async_write_lock); 1225 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1226 if (bcmp(&wa->wa_fhandle, lp->fhp, 1227 sizeof (fhandle_t)) == 0) 1228 break; 1229 } 1230 1231 /* 1232 * If lp is non-NULL, then there is already a cluster 1233 * started. We need to place ourselves in the cluster 1234 * list in the right place as determined by starting 1235 * offset. Conflicts with non-blocking mandatory locked 1236 * regions will be checked when the cluster is processed. 1237 */ 1238 if (lp != NULL) { 1239 rp = lp->list; 1240 trp = NULL; 1241 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1242 trp = rp; 1243 rp = rp->list; 1244 } 1245 nrp->list = rp; 1246 if (trp == NULL) 1247 lp->list = nrp; 1248 else 1249 trp->list = nrp; 1250 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1251 cv_wait(&lp->cv, &rfs_async_write_lock); 1252 mutex_exit(&rfs_async_write_lock); 1253 1254 return; 1255 } 1256 1257 /* 1258 * No cluster started yet, start one and add ourselves 1259 * to the list of clusters. 1260 */ 1261 nrp->list = NULL; 1262 1263 nlp = &nlpsp; 1264 nlp->fhp = &wa->wa_fhandle; 1265 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1266 nlp->list = nrp; 1267 nlp->next = NULL; 1268 1269 if (rfs_async_write_head == NULL) { 1270 rfs_async_write_head = nlp; 1271 } else { 1272 lp = rfs_async_write_head; 1273 while (lp->next != NULL) 1274 lp = lp->next; 1275 lp->next = nlp; 1276 } 1277 mutex_exit(&rfs_async_write_lock); 1278 1279 /* 1280 * Convert the file handle common to all of the requests 1281 * in this cluster to a vnode. 1282 */ 1283 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1284 if (vp == NULL) { 1285 mutex_enter(&rfs_async_write_lock); 1286 if (rfs_async_write_head == nlp) 1287 rfs_async_write_head = nlp->next; 1288 else { 1289 lp = rfs_async_write_head; 1290 while (lp->next != nlp) 1291 lp = lp->next; 1292 lp->next = nlp->next; 1293 } 1294 t_flag = curthread->t_flag & T_WOULDBLOCK; 1295 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1296 rp->ns->ns_status = NFSERR_STALE; 1297 rp->thread->t_flag |= t_flag; 1298 } 1299 cv_broadcast(&nlp->cv); 1300 mutex_exit(&rfs_async_write_lock); 1301 1302 return; 1303 } 1304 1305 /* 1306 * Can only write regular files. Attempts to write any 1307 * other file types fail with EISDIR. 1308 */ 1309 if (vp->v_type != VREG) { 1310 VN_RELE(vp); 1311 mutex_enter(&rfs_async_write_lock); 1312 if (rfs_async_write_head == nlp) 1313 rfs_async_write_head = nlp->next; 1314 else { 1315 lp = rfs_async_write_head; 1316 while (lp->next != nlp) 1317 lp = lp->next; 1318 lp->next = nlp->next; 1319 } 1320 t_flag = curthread->t_flag & T_WOULDBLOCK; 1321 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1322 rp->ns->ns_status = NFSERR_ISDIR; 1323 rp->thread->t_flag |= t_flag; 1324 } 1325 cv_broadcast(&nlp->cv); 1326 mutex_exit(&rfs_async_write_lock); 1327 1328 return; 1329 } 1330 1331 /* 1332 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1333 * deadlock with ufs. 1334 */ 1335 if (nbl_need_check(vp)) { 1336 nbl_start_crit(vp, RW_READER); 1337 in_crit = 1; 1338 } 1339 1340 ct.cc_sysid = 0; 1341 ct.cc_pid = 0; 1342 ct.cc_caller_id = nfs2_srv_caller_id; 1343 ct.cc_flags = CC_DONTBLOCK; 1344 1345 /* 1346 * Lock the file for writing. This operation provides 1347 * the delay which allows clusters to grow. 1348 */ 1349 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1350 1351 /* check if a monitor detected a delegation conflict */ 1352 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1353 if (in_crit) 1354 nbl_end_crit(vp); 1355 VN_RELE(vp); 1356 /* mark as wouldblock so response is dropped */ 1357 curthread->t_flag |= T_WOULDBLOCK; 1358 mutex_enter(&rfs_async_write_lock); 1359 if (rfs_async_write_head == nlp) 1360 rfs_async_write_head = nlp->next; 1361 else { 1362 lp = rfs_async_write_head; 1363 while (lp->next != nlp) 1364 lp = lp->next; 1365 lp->next = nlp->next; 1366 } 1367 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1368 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1369 rp->ns->ns_status = puterrno(error); 1370 rp->thread->t_flag |= T_WOULDBLOCK; 1371 } 1372 } 1373 cv_broadcast(&nlp->cv); 1374 mutex_exit(&rfs_async_write_lock); 1375 1376 return; 1377 } 1378 1379 /* 1380 * Disconnect this cluster from the list of clusters. 1381 * The cluster that is being dealt with must be fixed 1382 * in size after this point, so there is no reason 1383 * to leave it on the list so that new requests can 1384 * find it. 1385 * 1386 * The algorithm is that the first write request will 1387 * create a cluster, convert the file handle to a 1388 * vnode pointer, and then lock the file for writing. 1389 * This request is not likely to be clustered with 1390 * any others. However, the next request will create 1391 * a new cluster and be blocked in VOP_RWLOCK while 1392 * the first request is being processed. This delay 1393 * will allow more requests to be clustered in this 1394 * second cluster. 1395 */ 1396 mutex_enter(&rfs_async_write_lock); 1397 if (rfs_async_write_head == nlp) 1398 rfs_async_write_head = nlp->next; 1399 else { 1400 lp = rfs_async_write_head; 1401 while (lp->next != nlp) 1402 lp = lp->next; 1403 lp->next = nlp->next; 1404 } 1405 mutex_exit(&rfs_async_write_lock); 1406 1407 /* 1408 * Step through the list of requests in this cluster. 1409 * We need to check permissions to make sure that all 1410 * of the requests have sufficient permission to write 1411 * the file. A cluster can be composed of requests 1412 * from different clients and different users on each 1413 * client. 1414 * 1415 * As a side effect, we also calculate the size of the 1416 * byte range that this cluster encompasses. 1417 */ 1418 rp = nlp->list; 1419 off = rp->wa->wa_offset; 1420 len = (uint_t)0; 1421 do { 1422 if (rdonly(rp->ro, vp)) { 1423 rp->ns->ns_status = NFSERR_ROFS; 1424 t_flag = curthread->t_flag & T_WOULDBLOCK; 1425 rp->thread->t_flag |= t_flag; 1426 continue; 1427 } 1428 1429 va.va_mask = AT_UID|AT_MODE; 1430 1431 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1432 1433 if (!error) { 1434 if (crgetuid(rp->cr) != va.va_uid) { 1435 /* 1436 * This is a kludge to allow writes of files 1437 * created with read only permission. The 1438 * owner of the file is always allowed to 1439 * write it. 1440 */ 1441 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1442 } 1443 if (!error && MANDLOCK(vp, va.va_mode)) 1444 error = EACCES; 1445 } 1446 1447 /* 1448 * Check for a conflict with a nbmand-locked region. 1449 */ 1450 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1451 rp->wa->wa_count, 0, NULL)) { 1452 error = EACCES; 1453 } 1454 1455 if (error) { 1456 rp->ns->ns_status = puterrno(error); 1457 t_flag = curthread->t_flag & T_WOULDBLOCK; 1458 rp->thread->t_flag |= t_flag; 1459 continue; 1460 } 1461 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1462 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1463 } while ((rp = rp->list) != NULL); 1464 1465 /* 1466 * Step through the cluster attempting to gather as many 1467 * requests which are contiguous as possible. These 1468 * contiguous requests are handled via one call to VOP_WRITE 1469 * instead of different calls to VOP_WRITE. We also keep 1470 * track of the fact that any data was written. 1471 */ 1472 rp = nlp->list; 1473 data_written = 0; 1474 do { 1475 /* 1476 * Skip any requests which are already marked as having an 1477 * error. 1478 */ 1479 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1480 rp = rp->list; 1481 continue; 1482 } 1483 1484 /* 1485 * Count the number of iovec's which are required 1486 * to handle this set of requests. One iovec is 1487 * needed for each data buffer, whether addressed 1488 * by wa_data or by the b_rptr pointers in the 1489 * mblk chains. 1490 */ 1491 iovcnt = 0; 1492 lrp = rp; 1493 for (;;) { 1494 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1495 iovcnt++; 1496 else { 1497 m = lrp->wa->wa_mblk; 1498 while (m != NULL) { 1499 iovcnt++; 1500 m = m->b_cont; 1501 } 1502 } 1503 if (lrp->list == NULL || 1504 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1505 lrp->wa->wa_offset + lrp->wa->wa_count != 1506 lrp->list->wa->wa_offset) { 1507 lrp = lrp->list; 1508 break; 1509 } 1510 lrp = lrp->list; 1511 } 1512 1513 if (iovcnt <= MAXCLIOVECS) { 1514 #ifdef DEBUG 1515 rfs_write_hits++; 1516 #endif 1517 niovp = iov; 1518 } else { 1519 #ifdef DEBUG 1520 rfs_write_misses++; 1521 #endif 1522 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1523 } 1524 /* 1525 * Put together the scatter/gather iovecs. 1526 */ 1527 iovp = niovp; 1528 trp = rp; 1529 count = 0; 1530 do { 1531 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1532 if (trp->wa->wa_rlist) { 1533 iovp->iov_base = 1534 (char *)((trp->wa->wa_rlist)-> 1535 u.c_daddr3); 1536 iovp->iov_len = trp->wa->wa_count; 1537 } else { 1538 iovp->iov_base = trp->wa->wa_data; 1539 iovp->iov_len = trp->wa->wa_count; 1540 } 1541 iovp++; 1542 } else { 1543 m = trp->wa->wa_mblk; 1544 rcount = trp->wa->wa_count; 1545 while (m != NULL) { 1546 iovp->iov_base = (caddr_t)m->b_rptr; 1547 iovp->iov_len = (m->b_wptr - m->b_rptr); 1548 rcount -= iovp->iov_len; 1549 if (rcount < 0) 1550 iovp->iov_len += rcount; 1551 iovp++; 1552 if (rcount <= 0) 1553 break; 1554 m = m->b_cont; 1555 } 1556 } 1557 count += trp->wa->wa_count; 1558 trp = trp->list; 1559 } while (trp != lrp); 1560 1561 uio.uio_iov = niovp; 1562 uio.uio_iovcnt = iovcnt; 1563 uio.uio_segflg = UIO_SYSSPACE; 1564 uio.uio_extflg = UIO_COPY_DEFAULT; 1565 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1566 uio.uio_resid = count; 1567 /* 1568 * The limit is checked on the client. We 1569 * should allow any size writes here. 1570 */ 1571 uio.uio_llimit = curproc->p_fsz_ctl; 1572 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1573 if (rlimit < (rlim64_t)uio.uio_resid) 1574 uio.uio_resid = (uint_t)rlimit; 1575 1576 /* 1577 * For now we assume no append mode. 1578 */ 1579 1580 /* 1581 * We're changing creds because VM may fault 1582 * and we need the cred of the current 1583 * thread to be used if quota * checking is 1584 * enabled. 1585 */ 1586 savecred = curthread->t_cred; 1587 curthread->t_cred = cr; 1588 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1589 curthread->t_cred = savecred; 1590 1591 /* check if a monitor detected a delegation conflict */ 1592 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1593 /* mark as wouldblock so response is dropped */ 1594 curthread->t_flag |= T_WOULDBLOCK; 1595 1596 if (niovp != iov) 1597 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1598 1599 if (!error) { 1600 data_written = 1; 1601 /* 1602 * Get attributes again so we send the latest mod 1603 * time to the client side for his cache. 1604 */ 1605 va.va_mask = AT_ALL; /* now we want everything */ 1606 1607 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1608 1609 if (!error) 1610 acl_perm(vp, exi, &va, rp->cr); 1611 } 1612 1613 /* 1614 * Fill in the status responses for each request 1615 * which was just handled. Also, copy the latest 1616 * attributes in to the attribute responses if 1617 * appropriate. 1618 */ 1619 t_flag = curthread->t_flag & T_WOULDBLOCK; 1620 do { 1621 rp->thread->t_flag |= t_flag; 1622 /* check for overflows */ 1623 if (!error) { 1624 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1625 } 1626 rp->ns->ns_status = puterrno(error); 1627 rp = rp->list; 1628 } while (rp != lrp); 1629 } while (rp != NULL); 1630 1631 /* 1632 * If any data was written at all, then we need to flush 1633 * the data and metadata to stable storage. 1634 */ 1635 if (data_written) { 1636 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1637 1638 if (!error) { 1639 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1640 } 1641 } 1642 1643 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1644 1645 if (in_crit) 1646 nbl_end_crit(vp); 1647 VN_RELE(vp); 1648 1649 t_flag = curthread->t_flag & T_WOULDBLOCK; 1650 mutex_enter(&rfs_async_write_lock); 1651 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1652 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1653 rp->ns->ns_status = puterrno(error); 1654 rp->thread->t_flag |= t_flag; 1655 } 1656 } 1657 cv_broadcast(&nlp->cv); 1658 mutex_exit(&rfs_async_write_lock); 1659 1660 } 1661 1662 void * 1663 rfs_write_getfh(struct nfswriteargs *wa) 1664 { 1665 return (&wa->wa_fhandle); 1666 } 1667 1668 /* 1669 * Create a file. 1670 * Creates a file with given attributes and returns those attributes 1671 * and an fhandle for the new file. 1672 */ 1673 void 1674 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1675 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1676 { 1677 int error; 1678 int lookuperr; 1679 int in_crit = 0; 1680 struct vattr va; 1681 vnode_t *vp; 1682 vnode_t *realvp; 1683 vnode_t *dvp; 1684 char *name = args->ca_da.da_name; 1685 vnode_t *tvp = NULL; 1686 int mode; 1687 int lookup_ok; 1688 bool_t trunc; 1689 struct sockaddr *ca; 1690 1691 /* 1692 * Disallow NULL paths 1693 */ 1694 if (name == NULL || *name == '\0') { 1695 dr->dr_status = NFSERR_ACCES; 1696 return; 1697 } 1698 1699 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1700 if (dvp == NULL) { 1701 dr->dr_status = NFSERR_STALE; 1702 return; 1703 } 1704 1705 error = sattr_to_vattr(args->ca_sa, &va); 1706 if (error) { 1707 dr->dr_status = puterrno(error); 1708 return; 1709 } 1710 1711 /* 1712 * Must specify the mode. 1713 */ 1714 if (!(va.va_mask & AT_MODE)) { 1715 VN_RELE(dvp); 1716 dr->dr_status = NFSERR_INVAL; 1717 return; 1718 } 1719 1720 /* 1721 * This is a completely gross hack to make mknod 1722 * work over the wire until we can wack the protocol 1723 */ 1724 if ((va.va_mode & IFMT) == IFCHR) { 1725 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1726 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1727 else { 1728 va.va_type = VCHR; 1729 /* 1730 * uncompress the received dev_t 1731 * if the top half is zero indicating a request 1732 * from an `older style' OS. 1733 */ 1734 if ((va.va_size & 0xffff0000) == 0) 1735 va.va_rdev = nfsv2_expdev(va.va_size); 1736 else 1737 va.va_rdev = (dev_t)va.va_size; 1738 } 1739 va.va_mask &= ~AT_SIZE; 1740 } else if ((va.va_mode & IFMT) == IFBLK) { 1741 va.va_type = VBLK; 1742 /* 1743 * uncompress the received dev_t 1744 * if the top half is zero indicating a request 1745 * from an `older style' OS. 1746 */ 1747 if ((va.va_size & 0xffff0000) == 0) 1748 va.va_rdev = nfsv2_expdev(va.va_size); 1749 else 1750 va.va_rdev = (dev_t)va.va_size; 1751 va.va_mask &= ~AT_SIZE; 1752 } else if ((va.va_mode & IFMT) == IFSOCK) { 1753 va.va_type = VSOCK; 1754 } else { 1755 va.va_type = VREG; 1756 } 1757 va.va_mode &= ~IFMT; 1758 va.va_mask |= AT_TYPE; 1759 1760 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1761 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1762 MAXPATHLEN); 1763 if (name == NULL) { 1764 dr->dr_status = puterrno(EINVAL); 1765 return; 1766 } 1767 1768 /* 1769 * Why was the choice made to use VWRITE as the mode to the 1770 * call to VOP_CREATE ? This results in a bug. When a client 1771 * opens a file that already exists and is RDONLY, the second 1772 * open fails with an EACESS because of the mode. 1773 * bug ID 1054648. 1774 */ 1775 lookup_ok = 0; 1776 mode = VWRITE; 1777 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1778 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1779 NULL, NULL, NULL); 1780 if (!error) { 1781 struct vattr at; 1782 1783 lookup_ok = 1; 1784 at.va_mask = AT_MODE; 1785 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1786 if (!error) 1787 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1788 VN_RELE(tvp); 1789 tvp = NULL; 1790 } 1791 } 1792 1793 if (!lookup_ok) { 1794 if (rdonly(ro, dvp)) { 1795 error = EROFS; 1796 } else if (va.va_type != VREG && va.va_type != VFIFO && 1797 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1798 error = EPERM; 1799 } else { 1800 error = 0; 1801 } 1802 } 1803 1804 /* 1805 * If file size is being modified on an already existing file 1806 * make sure that there are no conflicting non-blocking mandatory 1807 * locks in the region being manipulated. Return EACCES if there 1808 * are conflicting locks. 1809 */ 1810 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1811 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1812 NULL, NULL, NULL); 1813 1814 if (!lookuperr && 1815 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1816 VN_RELE(tvp); 1817 curthread->t_flag |= T_WOULDBLOCK; 1818 goto out; 1819 } 1820 1821 if (!lookuperr && nbl_need_check(tvp)) { 1822 /* 1823 * The file exists. Now check if it has any 1824 * conflicting non-blocking mandatory locks 1825 * in the region being changed. 1826 */ 1827 struct vattr bva; 1828 u_offset_t offset; 1829 ssize_t length; 1830 1831 nbl_start_crit(tvp, RW_READER); 1832 in_crit = 1; 1833 1834 bva.va_mask = AT_SIZE; 1835 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1836 if (!error) { 1837 if (va.va_size < bva.va_size) { 1838 offset = va.va_size; 1839 length = bva.va_size - va.va_size; 1840 } else { 1841 offset = bva.va_size; 1842 length = va.va_size - bva.va_size; 1843 } 1844 if (length) { 1845 if (nbl_conflict(tvp, NBL_WRITE, 1846 offset, length, 0, NULL)) { 1847 error = EACCES; 1848 } 1849 } 1850 } 1851 if (error) { 1852 nbl_end_crit(tvp); 1853 VN_RELE(tvp); 1854 in_crit = 0; 1855 } 1856 } else if (tvp != NULL) { 1857 VN_RELE(tvp); 1858 } 1859 } 1860 1861 if (!error) { 1862 /* 1863 * If filesystem is shared with nosuid the remove any 1864 * setuid/setgid bits on create. 1865 */ 1866 if (va.va_type == VREG && 1867 exi->exi_export.ex_flags & EX_NOSUID) 1868 va.va_mode &= ~(VSUID | VSGID); 1869 1870 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1871 NULL, NULL); 1872 1873 if (!error) { 1874 1875 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1876 trunc = TRUE; 1877 else 1878 trunc = FALSE; 1879 1880 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1881 VN_RELE(vp); 1882 curthread->t_flag |= T_WOULDBLOCK; 1883 goto out; 1884 } 1885 va.va_mask = AT_ALL; 1886 1887 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1888 1889 /* check for overflows */ 1890 if (!error) { 1891 acl_perm(vp, exi, &va, cr); 1892 error = vattr_to_nattr(&va, &dr->dr_attr); 1893 if (!error) { 1894 error = makefh(&dr->dr_fhandle, vp, 1895 exi); 1896 } 1897 } 1898 /* 1899 * Force modified metadata out to stable storage. 1900 * 1901 * if a underlying vp exists, pass it to VOP_FSYNC 1902 */ 1903 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1904 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1905 else 1906 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1907 VN_RELE(vp); 1908 } 1909 1910 if (in_crit) { 1911 nbl_end_crit(tvp); 1912 VN_RELE(tvp); 1913 } 1914 } 1915 1916 /* 1917 * Force modified data and metadata out to stable storage. 1918 */ 1919 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1920 1921 out: 1922 1923 VN_RELE(dvp); 1924 1925 dr->dr_status = puterrno(error); 1926 1927 if (name != args->ca_da.da_name) 1928 kmem_free(name, MAXPATHLEN); 1929 } 1930 void * 1931 rfs_create_getfh(struct nfscreatargs *args) 1932 { 1933 return (args->ca_da.da_fhandle); 1934 } 1935 1936 /* 1937 * Remove a file. 1938 * Remove named file from parent directory. 1939 */ 1940 /* ARGSUSED */ 1941 void 1942 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1943 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1944 { 1945 int error = 0; 1946 vnode_t *vp; 1947 vnode_t *targvp; 1948 int in_crit = 0; 1949 1950 /* 1951 * Disallow NULL paths 1952 */ 1953 if (da->da_name == NULL || *da->da_name == '\0') { 1954 *status = NFSERR_ACCES; 1955 return; 1956 } 1957 1958 vp = nfs_fhtovp(da->da_fhandle, exi); 1959 if (vp == NULL) { 1960 *status = NFSERR_STALE; 1961 return; 1962 } 1963 1964 if (rdonly(ro, vp)) { 1965 VN_RELE(vp); 1966 *status = NFSERR_ROFS; 1967 return; 1968 } 1969 1970 /* 1971 * Check for a conflict with a non-blocking mandatory share reservation. 1972 */ 1973 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1974 NULL, cr, NULL, NULL, NULL); 1975 if (error != 0) { 1976 VN_RELE(vp); 1977 *status = puterrno(error); 1978 return; 1979 } 1980 1981 /* 1982 * If the file is delegated to an v4 client, then initiate 1983 * recall and drop this request (by setting T_WOULDBLOCK). 1984 * The client will eventually re-transmit the request and 1985 * (hopefully), by then, the v4 client will have returned 1986 * the delegation. 1987 */ 1988 1989 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1990 VN_RELE(vp); 1991 VN_RELE(targvp); 1992 curthread->t_flag |= T_WOULDBLOCK; 1993 return; 1994 } 1995 1996 if (nbl_need_check(targvp)) { 1997 nbl_start_crit(targvp, RW_READER); 1998 in_crit = 1; 1999 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2000 error = EACCES; 2001 goto out; 2002 } 2003 } 2004 2005 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2006 2007 /* 2008 * Force modified data and metadata out to stable storage. 2009 */ 2010 (void) VOP_FSYNC(vp, 0, cr, NULL); 2011 2012 out: 2013 if (in_crit) 2014 nbl_end_crit(targvp); 2015 VN_RELE(targvp); 2016 VN_RELE(vp); 2017 2018 *status = puterrno(error); 2019 2020 } 2021 2022 void * 2023 rfs_remove_getfh(struct nfsdiropargs *da) 2024 { 2025 return (da->da_fhandle); 2026 } 2027 2028 /* 2029 * rename a file 2030 * Give a file (from) a new name (to). 2031 */ 2032 /* ARGSUSED */ 2033 void 2034 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2035 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2036 { 2037 int error = 0; 2038 vnode_t *fromvp; 2039 vnode_t *tovp; 2040 struct exportinfo *to_exi; 2041 fhandle_t *fh; 2042 vnode_t *srcvp; 2043 vnode_t *targvp; 2044 int in_crit = 0; 2045 2046 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2047 if (fromvp == NULL) { 2048 *status = NFSERR_STALE; 2049 return; 2050 } 2051 2052 fh = args->rna_to.da_fhandle; 2053 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen, NULL); 2054 if (to_exi == NULL) { 2055 VN_RELE(fromvp); 2056 *status = NFSERR_ACCES; 2057 return; 2058 } 2059 exi_rele(to_exi); 2060 2061 if (to_exi != exi) { 2062 VN_RELE(fromvp); 2063 *status = NFSERR_XDEV; 2064 return; 2065 } 2066 2067 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2068 if (tovp == NULL) { 2069 VN_RELE(fromvp); 2070 *status = NFSERR_STALE; 2071 return; 2072 } 2073 2074 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2075 VN_RELE(tovp); 2076 VN_RELE(fromvp); 2077 *status = NFSERR_NOTDIR; 2078 return; 2079 } 2080 2081 /* 2082 * Disallow NULL paths 2083 */ 2084 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2085 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2086 VN_RELE(tovp); 2087 VN_RELE(fromvp); 2088 *status = NFSERR_ACCES; 2089 return; 2090 } 2091 2092 if (rdonly(ro, tovp)) { 2093 VN_RELE(tovp); 2094 VN_RELE(fromvp); 2095 *status = NFSERR_ROFS; 2096 return; 2097 } 2098 2099 /* 2100 * Check for a conflict with a non-blocking mandatory share reservation. 2101 */ 2102 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2103 NULL, cr, NULL, NULL, NULL); 2104 if (error != 0) { 2105 VN_RELE(tovp); 2106 VN_RELE(fromvp); 2107 *status = puterrno(error); 2108 return; 2109 } 2110 2111 /* Check for delegations on the source file */ 2112 2113 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2114 VN_RELE(tovp); 2115 VN_RELE(fromvp); 2116 VN_RELE(srcvp); 2117 curthread->t_flag |= T_WOULDBLOCK; 2118 return; 2119 } 2120 2121 /* Check for delegation on the file being renamed over, if it exists */ 2122 2123 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2124 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2125 NULL, NULL, NULL) == 0) { 2126 2127 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2128 VN_RELE(tovp); 2129 VN_RELE(fromvp); 2130 VN_RELE(srcvp); 2131 VN_RELE(targvp); 2132 curthread->t_flag |= T_WOULDBLOCK; 2133 return; 2134 } 2135 VN_RELE(targvp); 2136 } 2137 2138 2139 if (nbl_need_check(srcvp)) { 2140 nbl_start_crit(srcvp, RW_READER); 2141 in_crit = 1; 2142 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2143 error = EACCES; 2144 goto out; 2145 } 2146 } 2147 2148 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2149 tovp, args->rna_to.da_name, cr, NULL, 0); 2150 2151 if (error == 0) 2152 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2153 strlen(args->rna_to.da_name)); 2154 2155 /* 2156 * Force modified data and metadata out to stable storage. 2157 */ 2158 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2159 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2160 2161 out: 2162 if (in_crit) 2163 nbl_end_crit(srcvp); 2164 VN_RELE(srcvp); 2165 VN_RELE(tovp); 2166 VN_RELE(fromvp); 2167 2168 *status = puterrno(error); 2169 2170 } 2171 void * 2172 rfs_rename_getfh(struct nfsrnmargs *args) 2173 { 2174 return (args->rna_from.da_fhandle); 2175 } 2176 2177 /* 2178 * Link to a file. 2179 * Create a file (to) which is a hard link to the given file (from). 2180 */ 2181 /* ARGSUSED */ 2182 void 2183 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2184 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2185 { 2186 int error; 2187 vnode_t *fromvp; 2188 vnode_t *tovp; 2189 struct exportinfo *to_exi; 2190 fhandle_t *fh; 2191 2192 fromvp = nfs_fhtovp(args->la_from, exi); 2193 if (fromvp == NULL) { 2194 *status = NFSERR_STALE; 2195 return; 2196 } 2197 2198 fh = args->la_to.da_fhandle; 2199 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen, NULL); 2200 if (to_exi == NULL) { 2201 VN_RELE(fromvp); 2202 *status = NFSERR_ACCES; 2203 return; 2204 } 2205 exi_rele(to_exi); 2206 2207 if (to_exi != exi) { 2208 VN_RELE(fromvp); 2209 *status = NFSERR_XDEV; 2210 return; 2211 } 2212 2213 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2214 if (tovp == NULL) { 2215 VN_RELE(fromvp); 2216 *status = NFSERR_STALE; 2217 return; 2218 } 2219 2220 if (tovp->v_type != VDIR) { 2221 VN_RELE(tovp); 2222 VN_RELE(fromvp); 2223 *status = NFSERR_NOTDIR; 2224 return; 2225 } 2226 /* 2227 * Disallow NULL paths 2228 */ 2229 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2230 VN_RELE(tovp); 2231 VN_RELE(fromvp); 2232 *status = NFSERR_ACCES; 2233 return; 2234 } 2235 2236 if (rdonly(ro, tovp)) { 2237 VN_RELE(tovp); 2238 VN_RELE(fromvp); 2239 *status = NFSERR_ROFS; 2240 return; 2241 } 2242 2243 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2244 2245 /* 2246 * Force modified data and metadata out to stable storage. 2247 */ 2248 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2249 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2250 2251 VN_RELE(tovp); 2252 VN_RELE(fromvp); 2253 2254 *status = puterrno(error); 2255 2256 } 2257 void * 2258 rfs_link_getfh(struct nfslinkargs *args) 2259 { 2260 return (args->la_from); 2261 } 2262 2263 /* 2264 * Symbolicly link to a file. 2265 * Create a file (to) with the given attributes which is a symbolic link 2266 * to the given path name (to). 2267 */ 2268 void 2269 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2270 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2271 { 2272 int error; 2273 struct vattr va; 2274 vnode_t *vp; 2275 vnode_t *svp; 2276 int lerror; 2277 struct sockaddr *ca; 2278 char *name = NULL; 2279 2280 /* 2281 * Disallow NULL paths 2282 */ 2283 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2284 *status = NFSERR_ACCES; 2285 return; 2286 } 2287 2288 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2289 if (vp == NULL) { 2290 *status = NFSERR_STALE; 2291 return; 2292 } 2293 2294 if (rdonly(ro, vp)) { 2295 VN_RELE(vp); 2296 *status = NFSERR_ROFS; 2297 return; 2298 } 2299 2300 error = sattr_to_vattr(args->sla_sa, &va); 2301 if (error) { 2302 VN_RELE(vp); 2303 *status = puterrno(error); 2304 return; 2305 } 2306 2307 if (!(va.va_mask & AT_MODE)) { 2308 VN_RELE(vp); 2309 *status = NFSERR_INVAL; 2310 return; 2311 } 2312 2313 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2314 name = nfscmd_convname(ca, exi, args->sla_tnm, 2315 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2316 2317 if (name == NULL) { 2318 *status = NFSERR_ACCES; 2319 return; 2320 } 2321 2322 va.va_type = VLNK; 2323 va.va_mask |= AT_TYPE; 2324 2325 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2326 2327 /* 2328 * Force new data and metadata out to stable storage. 2329 */ 2330 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2331 NULL, cr, NULL, NULL, NULL); 2332 2333 if (!lerror) { 2334 (void) VOP_FSYNC(svp, 0, cr, NULL); 2335 VN_RELE(svp); 2336 } 2337 2338 /* 2339 * Force modified data and metadata out to stable storage. 2340 */ 2341 (void) VOP_FSYNC(vp, 0, cr, NULL); 2342 2343 VN_RELE(vp); 2344 2345 *status = puterrno(error); 2346 if (name != args->sla_tnm) 2347 kmem_free(name, MAXPATHLEN); 2348 2349 } 2350 void * 2351 rfs_symlink_getfh(struct nfsslargs *args) 2352 { 2353 return (args->sla_from.da_fhandle); 2354 } 2355 2356 /* 2357 * Make a directory. 2358 * Create a directory with the given name, parent directory, and attributes. 2359 * Returns a file handle and attributes for the new directory. 2360 */ 2361 /* ARGSUSED */ 2362 void 2363 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2364 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2365 { 2366 int error; 2367 struct vattr va; 2368 vnode_t *dvp = NULL; 2369 vnode_t *vp; 2370 char *name = args->ca_da.da_name; 2371 2372 /* 2373 * Disallow NULL paths 2374 */ 2375 if (name == NULL || *name == '\0') { 2376 dr->dr_status = NFSERR_ACCES; 2377 return; 2378 } 2379 2380 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2381 if (vp == NULL) { 2382 dr->dr_status = NFSERR_STALE; 2383 return; 2384 } 2385 2386 if (rdonly(ro, vp)) { 2387 VN_RELE(vp); 2388 dr->dr_status = NFSERR_ROFS; 2389 return; 2390 } 2391 2392 error = sattr_to_vattr(args->ca_sa, &va); 2393 if (error) { 2394 VN_RELE(vp); 2395 dr->dr_status = puterrno(error); 2396 return; 2397 } 2398 2399 if (!(va.va_mask & AT_MODE)) { 2400 VN_RELE(vp); 2401 dr->dr_status = NFSERR_INVAL; 2402 return; 2403 } 2404 2405 va.va_type = VDIR; 2406 va.va_mask |= AT_TYPE; 2407 2408 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2409 2410 if (!error) { 2411 /* 2412 * Attribtutes of the newly created directory should 2413 * be returned to the client. 2414 */ 2415 va.va_mask = AT_ALL; /* We want everything */ 2416 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2417 2418 /* check for overflows */ 2419 if (!error) { 2420 acl_perm(vp, exi, &va, cr); 2421 error = vattr_to_nattr(&va, &dr->dr_attr); 2422 if (!error) { 2423 error = makefh(&dr->dr_fhandle, dvp, exi); 2424 } 2425 } 2426 /* 2427 * Force new data and metadata out to stable storage. 2428 */ 2429 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2430 VN_RELE(dvp); 2431 } 2432 2433 /* 2434 * Force modified data and metadata out to stable storage. 2435 */ 2436 (void) VOP_FSYNC(vp, 0, cr, NULL); 2437 2438 VN_RELE(vp); 2439 2440 dr->dr_status = puterrno(error); 2441 2442 } 2443 void * 2444 rfs_mkdir_getfh(struct nfscreatargs *args) 2445 { 2446 return (args->ca_da.da_fhandle); 2447 } 2448 2449 /* 2450 * Remove a directory. 2451 * Remove the given directory name from the given parent directory. 2452 */ 2453 /* ARGSUSED */ 2454 void 2455 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2456 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2457 { 2458 int error; 2459 vnode_t *vp; 2460 2461 /* 2462 * Disallow NULL paths 2463 */ 2464 if (da->da_name == NULL || *da->da_name == '\0') { 2465 *status = NFSERR_ACCES; 2466 return; 2467 } 2468 2469 vp = nfs_fhtovp(da->da_fhandle, exi); 2470 if (vp == NULL) { 2471 *status = NFSERR_STALE; 2472 return; 2473 } 2474 2475 if (rdonly(ro, vp)) { 2476 VN_RELE(vp); 2477 *status = NFSERR_ROFS; 2478 return; 2479 } 2480 2481 /* 2482 * VOP_RMDIR takes a third argument (the current 2483 * directory of the process). That's because someone 2484 * wants to return EINVAL if one tries to remove ".". 2485 * Of course, NFS servers have no idea what their 2486 * clients' current directories are. We fake it by 2487 * supplying a vnode known to exist and illegal to 2488 * remove. 2489 */ 2490 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2491 2492 /* 2493 * Force modified data and metadata out to stable storage. 2494 */ 2495 (void) VOP_FSYNC(vp, 0, cr, NULL); 2496 2497 VN_RELE(vp); 2498 2499 /* 2500 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2501 * if the directory is not empty. A System V NFS server 2502 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2503 * over the wire. 2504 */ 2505 if (error == EEXIST) 2506 *status = NFSERR_NOTEMPTY; 2507 else 2508 *status = puterrno(error); 2509 2510 } 2511 void * 2512 rfs_rmdir_getfh(struct nfsdiropargs *da) 2513 { 2514 return (da->da_fhandle); 2515 } 2516 2517 /* ARGSUSED */ 2518 void 2519 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2520 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2521 { 2522 int error; 2523 int iseof; 2524 struct iovec iov; 2525 struct uio uio; 2526 vnode_t *vp; 2527 char *ndata = NULL; 2528 struct sockaddr *ca; 2529 size_t nents; 2530 int ret; 2531 2532 vp = nfs_fhtovp(&rda->rda_fh, exi); 2533 if (vp == NULL) { 2534 rd->rd_entries = NULL; 2535 rd->rd_status = NFSERR_STALE; 2536 return; 2537 } 2538 2539 if (vp->v_type != VDIR) { 2540 VN_RELE(vp); 2541 rd->rd_entries = NULL; 2542 rd->rd_status = NFSERR_NOTDIR; 2543 return; 2544 } 2545 2546 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2547 2548 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2549 2550 if (error) { 2551 rd->rd_entries = NULL; 2552 goto bad; 2553 } 2554 2555 if (rda->rda_count == 0) { 2556 rd->rd_entries = NULL; 2557 rd->rd_size = 0; 2558 rd->rd_eof = FALSE; 2559 goto bad; 2560 } 2561 2562 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2563 2564 /* 2565 * Allocate data for entries. This will be freed by rfs_rddirfree. 2566 */ 2567 rd->rd_bufsize = (uint_t)rda->rda_count; 2568 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2569 2570 /* 2571 * Set up io vector to read directory data 2572 */ 2573 iov.iov_base = (caddr_t)rd->rd_entries; 2574 iov.iov_len = rda->rda_count; 2575 uio.uio_iov = &iov; 2576 uio.uio_iovcnt = 1; 2577 uio.uio_segflg = UIO_SYSSPACE; 2578 uio.uio_extflg = UIO_COPY_CACHED; 2579 uio.uio_loffset = (offset_t)rda->rda_offset; 2580 uio.uio_resid = rda->rda_count; 2581 2582 /* 2583 * read directory 2584 */ 2585 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2586 2587 /* 2588 * Clean up 2589 */ 2590 if (!error) { 2591 /* 2592 * set size and eof 2593 */ 2594 if (uio.uio_resid == rda->rda_count) { 2595 rd->rd_size = 0; 2596 rd->rd_eof = TRUE; 2597 } else { 2598 rd->rd_size = (uint32_t)(rda->rda_count - 2599 uio.uio_resid); 2600 rd->rd_eof = iseof ? TRUE : FALSE; 2601 } 2602 } 2603 2604 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2605 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2606 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2607 rda->rda_count, &ndata); 2608 2609 if (ret != 0) { 2610 size_t dropbytes; 2611 /* 2612 * We had to drop one or more entries in order to fit 2613 * during the character conversion. We need to patch 2614 * up the size and eof info. 2615 */ 2616 if (rd->rd_eof) 2617 rd->rd_eof = FALSE; 2618 dropbytes = nfscmd_dropped_entrysize( 2619 (struct dirent64 *)rd->rd_entries, nents, ret); 2620 rd->rd_size -= dropbytes; 2621 } 2622 if (ndata == NULL) { 2623 ndata = (char *)rd->rd_entries; 2624 } else if (ndata != (char *)rd->rd_entries) { 2625 kmem_free(rd->rd_entries, rd->rd_bufsize); 2626 rd->rd_entries = (void *)ndata; 2627 rd->rd_bufsize = rda->rda_count; 2628 } 2629 2630 bad: 2631 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2632 2633 #if 0 /* notyet */ 2634 /* 2635 * Don't do this. It causes local disk writes when just 2636 * reading the file and the overhead is deemed larger 2637 * than the benefit. 2638 */ 2639 /* 2640 * Force modified metadata out to stable storage. 2641 */ 2642 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2643 #endif 2644 2645 VN_RELE(vp); 2646 2647 rd->rd_status = puterrno(error); 2648 2649 } 2650 void * 2651 rfs_readdir_getfh(struct nfsrddirargs *rda) 2652 { 2653 return (&rda->rda_fh); 2654 } 2655 void 2656 rfs_rddirfree(struct nfsrddirres *rd) 2657 { 2658 if (rd->rd_entries != NULL) 2659 kmem_free(rd->rd_entries, rd->rd_bufsize); 2660 } 2661 2662 /* ARGSUSED */ 2663 void 2664 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2665 struct svc_req *req, cred_t *cr, bool_t ro) 2666 { 2667 int error; 2668 struct statvfs64 sb; 2669 vnode_t *vp; 2670 2671 vp = nfs_fhtovp(fh, exi); 2672 if (vp == NULL) { 2673 fs->fs_status = NFSERR_STALE; 2674 return; 2675 } 2676 2677 error = VFS_STATVFS(vp->v_vfsp, &sb); 2678 2679 if (!error) { 2680 fs->fs_tsize = nfstsize(); 2681 fs->fs_bsize = sb.f_frsize; 2682 fs->fs_blocks = sb.f_blocks; 2683 fs->fs_bfree = sb.f_bfree; 2684 fs->fs_bavail = sb.f_bavail; 2685 } 2686 2687 VN_RELE(vp); 2688 2689 fs->fs_status = puterrno(error); 2690 2691 } 2692 void * 2693 rfs_statfs_getfh(fhandle_t *fh) 2694 { 2695 return (fh); 2696 } 2697 2698 static int 2699 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2700 { 2701 vap->va_mask = 0; 2702 2703 /* 2704 * There was a sign extension bug in some VFS based systems 2705 * which stored the mode as a short. When it would get 2706 * assigned to a u_long, no sign extension would occur. 2707 * It needed to, but this wasn't noticed because sa_mode 2708 * would then get assigned back to the short, thus ignoring 2709 * the upper 16 bits of sa_mode. 2710 * 2711 * To make this implementation work for both broken 2712 * clients and good clients, we check for both versions 2713 * of the mode. 2714 */ 2715 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2716 sa->sa_mode != (uint32_t)-1) { 2717 vap->va_mask |= AT_MODE; 2718 vap->va_mode = sa->sa_mode; 2719 } 2720 if (sa->sa_uid != (uint32_t)-1) { 2721 vap->va_mask |= AT_UID; 2722 vap->va_uid = sa->sa_uid; 2723 } 2724 if (sa->sa_gid != (uint32_t)-1) { 2725 vap->va_mask |= AT_GID; 2726 vap->va_gid = sa->sa_gid; 2727 } 2728 if (sa->sa_size != (uint32_t)-1) { 2729 vap->va_mask |= AT_SIZE; 2730 vap->va_size = sa->sa_size; 2731 } 2732 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2733 sa->sa_atime.tv_usec != (int32_t)-1) { 2734 #ifndef _LP64 2735 /* return error if time overflow */ 2736 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2737 return (EOVERFLOW); 2738 #endif 2739 vap->va_mask |= AT_ATIME; 2740 /* 2741 * nfs protocol defines times as unsigned so don't extend sign, 2742 * unless sysadmin set nfs_allow_preepoch_time. 2743 */ 2744 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2745 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2746 } 2747 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2748 sa->sa_mtime.tv_usec != (int32_t)-1) { 2749 #ifndef _LP64 2750 /* return error if time overflow */ 2751 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2752 return (EOVERFLOW); 2753 #endif 2754 vap->va_mask |= AT_MTIME; 2755 /* 2756 * nfs protocol defines times as unsigned so don't extend sign, 2757 * unless sysadmin set nfs_allow_preepoch_time. 2758 */ 2759 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2760 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2761 } 2762 return (0); 2763 } 2764 2765 static enum nfsftype vt_to_nf[] = { 2766 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2767 }; 2768 2769 /* 2770 * check the following fields for overflow: nodeid, size, and time. 2771 * There could be a problem when converting 64-bit LP64 fields 2772 * into 32-bit ones. Return an error if there is an overflow. 2773 */ 2774 int 2775 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2776 { 2777 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2778 na->na_type = vt_to_nf[vap->va_type]; 2779 2780 if (vap->va_mode == (unsigned short) -1) 2781 na->na_mode = (uint32_t)-1; 2782 else 2783 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2784 2785 if (vap->va_uid == (unsigned short)(-1)) 2786 na->na_uid = (uint32_t)(-1); 2787 else if (vap->va_uid == UID_NOBODY) 2788 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2789 else 2790 na->na_uid = vap->va_uid; 2791 2792 if (vap->va_gid == (unsigned short)(-1)) 2793 na->na_gid = (uint32_t)-1; 2794 else if (vap->va_gid == GID_NOBODY) 2795 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2796 else 2797 na->na_gid = vap->va_gid; 2798 2799 /* 2800 * Do we need to check fsid for overflow? It is 64-bit in the 2801 * vattr, but are bigger than 32 bit values supported? 2802 */ 2803 na->na_fsid = vap->va_fsid; 2804 2805 na->na_nodeid = vap->va_nodeid; 2806 2807 /* 2808 * Check to make sure that the nodeid is representable over the 2809 * wire without losing bits. 2810 */ 2811 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2812 return (EFBIG); 2813 na->na_nlink = vap->va_nlink; 2814 2815 /* 2816 * Check for big files here, instead of at the caller. See 2817 * comments in cstat for large special file explanation. 2818 */ 2819 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2820 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2821 return (EFBIG); 2822 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2823 /* UNKNOWN_SIZE | OVERFLOW */ 2824 na->na_size = MAXOFF32_T; 2825 } else 2826 na->na_size = vap->va_size; 2827 } else 2828 na->na_size = vap->va_size; 2829 2830 /* 2831 * If the vnode times overflow the 32-bit times that NFS2 2832 * uses on the wire then return an error. 2833 */ 2834 if (!NFS_VAP_TIME_OK(vap)) { 2835 return (EOVERFLOW); 2836 } 2837 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2838 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2839 2840 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2841 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2842 2843 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2844 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2845 2846 /* 2847 * If the dev_t will fit into 16 bits then compress 2848 * it, otherwise leave it alone. See comments in 2849 * nfs_client.c. 2850 */ 2851 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2852 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2853 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2854 else 2855 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2856 2857 na->na_blocks = vap->va_nblocks; 2858 na->na_blocksize = vap->va_blksize; 2859 2860 /* 2861 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2862 * over-the-wire protocols for named-pipe vnodes. It remaps the 2863 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2864 * 2865 * BUYER BEWARE: 2866 * If you are porting the NFS to a non-Sun server, you probably 2867 * don't want to include the following block of code. The 2868 * over-the-wire special file types will be changing with the 2869 * NFS Protocol Revision. 2870 */ 2871 if (vap->va_type == VFIFO) 2872 NA_SETFIFO(na); 2873 return (0); 2874 } 2875 2876 /* 2877 * acl v2 support: returns approximate permission. 2878 * default: returns minimal permission (more restrictive) 2879 * aclok: returns maximal permission (less restrictive) 2880 * This routine changes the permissions that are alaredy in *va. 2881 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2882 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2883 */ 2884 static void 2885 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2886 { 2887 vsecattr_t vsa; 2888 int aclcnt; 2889 aclent_t *aclentp; 2890 mode_t mask_perm; 2891 mode_t grp_perm; 2892 mode_t other_perm; 2893 mode_t other_orig; 2894 int error; 2895 2896 /* dont care default acl */ 2897 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2898 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2899 2900 if (!error) { 2901 aclcnt = vsa.vsa_aclcnt; 2902 if (aclcnt > MIN_ACL_ENTRIES) { 2903 /* non-trivial ACL */ 2904 aclentp = vsa.vsa_aclentp; 2905 if (exi->exi_export.ex_flags & EX_ACLOK) { 2906 /* maximal permissions */ 2907 grp_perm = 0; 2908 other_perm = 0; 2909 for (; aclcnt > 0; aclcnt--, aclentp++) { 2910 switch (aclentp->a_type) { 2911 case USER_OBJ: 2912 break; 2913 case USER: 2914 grp_perm |= 2915 aclentp->a_perm << 3; 2916 other_perm |= aclentp->a_perm; 2917 break; 2918 case GROUP_OBJ: 2919 grp_perm |= 2920 aclentp->a_perm << 3; 2921 break; 2922 case GROUP: 2923 other_perm |= aclentp->a_perm; 2924 break; 2925 case OTHER_OBJ: 2926 other_orig = aclentp->a_perm; 2927 break; 2928 case CLASS_OBJ: 2929 mask_perm = aclentp->a_perm; 2930 break; 2931 default: 2932 break; 2933 } 2934 } 2935 grp_perm &= mask_perm << 3; 2936 other_perm &= mask_perm; 2937 other_perm |= other_orig; 2938 2939 } else { 2940 /* minimal permissions */ 2941 grp_perm = 070; 2942 other_perm = 07; 2943 for (; aclcnt > 0; aclcnt--, aclentp++) { 2944 switch (aclentp->a_type) { 2945 case USER_OBJ: 2946 break; 2947 case USER: 2948 case CLASS_OBJ: 2949 grp_perm &= 2950 aclentp->a_perm << 3; 2951 other_perm &= 2952 aclentp->a_perm; 2953 break; 2954 case GROUP_OBJ: 2955 grp_perm &= 2956 aclentp->a_perm << 3; 2957 break; 2958 case GROUP: 2959 other_perm &= 2960 aclentp->a_perm; 2961 break; 2962 case OTHER_OBJ: 2963 other_perm &= 2964 aclentp->a_perm; 2965 break; 2966 default: 2967 break; 2968 } 2969 } 2970 } 2971 /* copy to va */ 2972 va->va_mode &= ~077; 2973 va->va_mode |= grp_perm | other_perm; 2974 } 2975 if (vsa.vsa_aclcnt) 2976 kmem_free(vsa.vsa_aclentp, 2977 vsa.vsa_aclcnt * sizeof (aclent_t)); 2978 } 2979 } 2980 2981 void 2982 rfs_srvrinit(void) 2983 { 2984 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2985 nfs2_srv_caller_id = fs_new_caller_id(); 2986 } 2987 2988 void 2989 rfs_srvrfini(void) 2990 { 2991 mutex_destroy(&rfs_async_write_lock); 2992 } 2993 2994 static int 2995 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2996 { 2997 struct clist *wcl; 2998 int wlist_len; 2999 uint32_t count = rr->rr_count; 3000 3001 wcl = ra->ra_wlist; 3002 3003 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3004 return (FALSE); 3005 } 3006 3007 wcl = ra->ra_wlist; 3008 rr->rr_ok.rrok_wlist_len = wlist_len; 3009 rr->rr_ok.rrok_wlist = wcl; 3010 3011 return (TRUE); 3012 } 3013