1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2016 by Delphix. All rights reserved. 25 */ 26 27 /* 28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 29 * All rights reserved. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/buf.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/uio.h> 40 #include <sys/stat.h> 41 #include <sys/errno.h> 42 #include <sys/sysmacros.h> 43 #include <sys/statvfs.h> 44 #include <sys/kmem.h> 45 #include <sys/kstat.h> 46 #include <sys/dirent.h> 47 #include <sys/cmn_err.h> 48 #include <sys/debug.h> 49 #include <sys/vtrace.h> 50 #include <sys/mode.h> 51 #include <sys/acl.h> 52 #include <sys/nbmlock.h> 53 #include <sys/policy.h> 54 #include <sys/sdt.h> 55 56 #include <rpc/types.h> 57 #include <rpc/auth.h> 58 #include <rpc/svc.h> 59 60 #include <nfs/nfs.h> 61 #include <nfs/export.h> 62 #include <nfs/nfs_cmd.h> 63 64 #include <vm/hat.h> 65 #include <vm/as.h> 66 #include <vm/seg.h> 67 #include <vm/seg_map.h> 68 #include <vm/seg_kmem.h> 69 70 #include <sys/strsubr.h> 71 72 /* 73 * These are the interface routines for the server side of the 74 * Network File System. See the NFS version 2 protocol specification 75 * for a description of this interface. 76 */ 77 78 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 79 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 80 cred_t *); 81 82 /* 83 * Some "over the wire" UNIX file types. These are encoded 84 * into the mode. This needs to be fixed in the next rev. 85 */ 86 #define IFMT 0170000 /* type of file */ 87 #define IFCHR 0020000 /* character special */ 88 #define IFBLK 0060000 /* block special */ 89 #define IFSOCK 0140000 /* socket */ 90 91 u_longlong_t nfs2_srv_caller_id; 92 93 /* 94 * Get file attributes. 95 * Returns the current attributes of the file with the given fhandle. 96 */ 97 /* ARGSUSED */ 98 void 99 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 100 struct svc_req *req, cred_t *cr, bool_t ro) 101 { 102 int error; 103 vnode_t *vp; 104 struct vattr va; 105 106 vp = nfs_fhtovp(fhp, exi); 107 if (vp == NULL) { 108 ns->ns_status = NFSERR_STALE; 109 return; 110 } 111 112 /* 113 * Do the getattr. 114 */ 115 va.va_mask = AT_ALL; /* we want all the attributes */ 116 117 error = rfs4_delegated_getattr(vp, &va, 0, cr); 118 119 /* check for overflows */ 120 if (!error) { 121 /* Lie about the object type for a referral */ 122 if (vn_is_nfs_reparse(vp, cr)) 123 va.va_type = VLNK; 124 125 acl_perm(vp, exi, &va, cr); 126 error = vattr_to_nattr(&va, &ns->ns_attr); 127 } 128 129 VN_RELE(vp); 130 131 ns->ns_status = puterrno(error); 132 } 133 void * 134 rfs_getattr_getfh(fhandle_t *fhp) 135 { 136 return (fhp); 137 } 138 139 /* 140 * Set file attributes. 141 * Sets the attributes of the file with the given fhandle. Returns 142 * the new attributes. 143 */ 144 /* ARGSUSED */ 145 void 146 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 147 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 148 { 149 int error; 150 int flag; 151 int in_crit = 0; 152 vnode_t *vp; 153 struct vattr va; 154 struct vattr bva; 155 struct flock64 bf; 156 caller_context_t ct; 157 158 159 vp = nfs_fhtovp(&args->saa_fh, exi); 160 if (vp == NULL) { 161 ns->ns_status = NFSERR_STALE; 162 return; 163 } 164 165 if (rdonly(ro, vp)) { 166 VN_RELE(vp); 167 ns->ns_status = NFSERR_ROFS; 168 return; 169 } 170 171 error = sattr_to_vattr(&args->saa_sa, &va); 172 if (error) { 173 VN_RELE(vp); 174 ns->ns_status = puterrno(error); 175 return; 176 } 177 178 /* 179 * If the client is requesting a change to the mtime, 180 * but the nanosecond field is set to 1 billion, then 181 * this is a flag to the server that it should set the 182 * atime and mtime fields to the server's current time. 183 * The 1 billion number actually came from the client 184 * as 1 million, but the units in the over the wire 185 * request are microseconds instead of nanoseconds. 186 * 187 * This is an overload of the protocol and should be 188 * documented in the NFS Version 2 protocol specification. 189 */ 190 if (va.va_mask & AT_MTIME) { 191 if (va.va_mtime.tv_nsec == 1000000000) { 192 gethrestime(&va.va_mtime); 193 va.va_atime = va.va_mtime; 194 va.va_mask |= AT_ATIME; 195 flag = 0; 196 } else 197 flag = ATTR_UTIME; 198 } else 199 flag = 0; 200 201 /* 202 * If the filesystem is exported with nosuid, then mask off 203 * the setuid and setgid bits. 204 */ 205 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 206 (exi->exi_export.ex_flags & EX_NOSUID)) 207 va.va_mode &= ~(VSUID | VSGID); 208 209 ct.cc_sysid = 0; 210 ct.cc_pid = 0; 211 ct.cc_caller_id = nfs2_srv_caller_id; 212 ct.cc_flags = CC_DONTBLOCK; 213 214 /* 215 * We need to specially handle size changes because it is 216 * possible for the client to create a file with modes 217 * which indicate read-only, but with the file opened for 218 * writing. If the client then tries to set the size of 219 * the file, then the normal access checking done in 220 * VOP_SETATTR would prevent the client from doing so, 221 * although it should be legal for it to do so. To get 222 * around this, we do the access checking for ourselves 223 * and then use VOP_SPACE which doesn't do the access 224 * checking which VOP_SETATTR does. VOP_SPACE can only 225 * operate on VREG files, let VOP_SETATTR handle the other 226 * extremely rare cases. 227 * Also the client should not be allowed to change the 228 * size of the file if there is a conflicting non-blocking 229 * mandatory lock in the region of change. 230 */ 231 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 232 if (nbl_need_check(vp)) { 233 nbl_start_crit(vp, RW_READER); 234 in_crit = 1; 235 } 236 237 bva.va_mask = AT_UID | AT_SIZE; 238 239 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 240 241 if (error) { 242 if (in_crit) 243 nbl_end_crit(vp); 244 VN_RELE(vp); 245 ns->ns_status = puterrno(error); 246 return; 247 } 248 249 if (in_crit) { 250 u_offset_t offset; 251 ssize_t length; 252 253 if (va.va_size < bva.va_size) { 254 offset = va.va_size; 255 length = bva.va_size - va.va_size; 256 } else { 257 offset = bva.va_size; 258 length = va.va_size - bva.va_size; 259 } 260 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 261 NULL)) { 262 error = EACCES; 263 } 264 } 265 266 if (crgetuid(cr) == bva.va_uid && !error && 267 va.va_size != bva.va_size) { 268 va.va_mask &= ~AT_SIZE; 269 bf.l_type = F_WRLCK; 270 bf.l_whence = 0; 271 bf.l_start = (off64_t)va.va_size; 272 bf.l_len = 0; 273 bf.l_sysid = 0; 274 bf.l_pid = 0; 275 276 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 277 (offset_t)va.va_size, cr, &ct); 278 } 279 if (in_crit) 280 nbl_end_crit(vp); 281 } else 282 error = 0; 283 284 /* 285 * Do the setattr. 286 */ 287 if (!error && va.va_mask) { 288 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 289 } 290 291 /* 292 * check if the monitor on either vop_space or vop_setattr detected 293 * a delegation conflict and if so, mark the thread flag as 294 * wouldblock so that the response is dropped and the client will 295 * try again. 296 */ 297 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 298 VN_RELE(vp); 299 curthread->t_flag |= T_WOULDBLOCK; 300 return; 301 } 302 303 if (!error) { 304 va.va_mask = AT_ALL; /* get everything */ 305 306 error = rfs4_delegated_getattr(vp, &va, 0, cr); 307 308 /* check for overflows */ 309 if (!error) { 310 acl_perm(vp, exi, &va, cr); 311 error = vattr_to_nattr(&va, &ns->ns_attr); 312 } 313 } 314 315 ct.cc_flags = 0; 316 317 /* 318 * Force modified metadata out to stable storage. 319 */ 320 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 321 322 VN_RELE(vp); 323 324 ns->ns_status = puterrno(error); 325 } 326 void * 327 rfs_setattr_getfh(struct nfssaargs *args) 328 { 329 return (&args->saa_fh); 330 } 331 332 /* 333 * Directory lookup. 334 * Returns an fhandle and file attributes for file name in a directory. 335 */ 336 /* ARGSUSED */ 337 void 338 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 339 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 340 { 341 int error; 342 vnode_t *dvp; 343 vnode_t *vp; 344 struct vattr va; 345 fhandle_t *fhp = da->da_fhandle; 346 struct sec_ol sec = {0, 0}; 347 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 348 char *name; 349 struct sockaddr *ca; 350 351 /* 352 * Trusted Extension doesn't support NFSv2. MOUNT 353 * will reject v2 clients. Need to prevent v2 client 354 * access via WebNFS here. 355 */ 356 if (is_system_labeled() && req->rq_vers == 2) { 357 dr->dr_status = NFSERR_ACCES; 358 return; 359 } 360 361 /* 362 * Disallow NULL paths 363 */ 364 if (da->da_name == NULL || *da->da_name == '\0') { 365 dr->dr_status = NFSERR_ACCES; 366 return; 367 } 368 369 /* 370 * Allow lookups from the root - the default 371 * location of the public filehandle. 372 */ 373 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 374 dvp = rootdir; 375 VN_HOLD(dvp); 376 } else { 377 dvp = nfs_fhtovp(fhp, exi); 378 if (dvp == NULL) { 379 dr->dr_status = NFSERR_STALE; 380 return; 381 } 382 } 383 384 /* 385 * Not allow lookup beyond root. 386 * If the filehandle matches a filehandle of the exi, 387 * then the ".." refers beyond the root of an exported filesystem. 388 */ 389 if (strcmp(da->da_name, "..") == 0 && 390 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 391 VN_RELE(dvp); 392 dr->dr_status = NFSERR_NOENT; 393 return; 394 } 395 396 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 397 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 398 MAXPATHLEN); 399 400 if (name == NULL) { 401 dr->dr_status = NFSERR_ACCES; 402 return; 403 } 404 405 /* 406 * If the public filehandle is used then allow 407 * a multi-component lookup, i.e. evaluate 408 * a pathname and follow symbolic links if 409 * necessary. 410 * 411 * This may result in a vnode in another filesystem 412 * which is OK as long as the filesystem is exported. 413 */ 414 if (PUBLIC_FH2(fhp)) { 415 publicfh_flag = TRUE; 416 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 417 &sec); 418 } else { 419 /* 420 * Do a normal single component lookup. 421 */ 422 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 423 NULL, NULL, NULL); 424 } 425 426 if (name != da->da_name) 427 kmem_free(name, MAXPATHLEN); 428 429 430 if (!error) { 431 va.va_mask = AT_ALL; /* we want everything */ 432 433 error = rfs4_delegated_getattr(vp, &va, 0, cr); 434 435 /* check for overflows */ 436 if (!error) { 437 acl_perm(vp, exi, &va, cr); 438 error = vattr_to_nattr(&va, &dr->dr_attr); 439 if (!error) { 440 if (sec.sec_flags & SEC_QUERY) 441 error = makefh_ol(&dr->dr_fhandle, exi, 442 sec.sec_index); 443 else { 444 error = makefh(&dr->dr_fhandle, vp, 445 exi); 446 if (!error && publicfh_flag && 447 !chk_clnt_sec(exi, req)) 448 auth_weak = TRUE; 449 } 450 } 451 } 452 VN_RELE(vp); 453 } 454 455 VN_RELE(dvp); 456 457 /* 458 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 459 * and have obtained a new exportinfo in exi which needs to be 460 * released. Note the the original exportinfo pointed to by exi 461 * will be released by the caller, comon_dispatch. 462 */ 463 if (publicfh_flag && exi != NULL) 464 exi_rele(exi); 465 466 /* 467 * If it's public fh, no 0x81, and client's flavor is 468 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 469 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 470 */ 471 if (auth_weak) 472 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 473 else 474 dr->dr_status = puterrno(error); 475 } 476 void * 477 rfs_lookup_getfh(struct nfsdiropargs *da) 478 { 479 return (da->da_fhandle); 480 } 481 482 /* 483 * Read symbolic link. 484 * Returns the string in the symbolic link at the given fhandle. 485 */ 486 /* ARGSUSED */ 487 void 488 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 489 struct svc_req *req, cred_t *cr, bool_t ro) 490 { 491 int error; 492 struct iovec iov; 493 struct uio uio; 494 vnode_t *vp; 495 struct vattr va; 496 struct sockaddr *ca; 497 char *name = NULL; 498 int is_referral = 0; 499 500 vp = nfs_fhtovp(fhp, exi); 501 if (vp == NULL) { 502 rl->rl_data = NULL; 503 rl->rl_status = NFSERR_STALE; 504 return; 505 } 506 507 va.va_mask = AT_MODE; 508 509 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 510 511 if (error) { 512 VN_RELE(vp); 513 rl->rl_data = NULL; 514 rl->rl_status = puterrno(error); 515 return; 516 } 517 518 if (MANDLOCK(vp, va.va_mode)) { 519 VN_RELE(vp); 520 rl->rl_data = NULL; 521 rl->rl_status = NFSERR_ACCES; 522 return; 523 } 524 525 /* We lied about the object type for a referral */ 526 if (vn_is_nfs_reparse(vp, cr)) 527 is_referral = 1; 528 529 /* 530 * XNFS and RFC1094 require us to return ENXIO if argument 531 * is not a link. BUGID 1138002. 532 */ 533 if (vp->v_type != VLNK && !is_referral) { 534 VN_RELE(vp); 535 rl->rl_data = NULL; 536 rl->rl_status = NFSERR_NXIO; 537 return; 538 } 539 540 /* 541 * Allocate data for pathname. This will be freed by rfs_rlfree. 542 */ 543 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 544 545 if (is_referral) { 546 char *s; 547 size_t strsz; 548 549 /* Get an artificial symlink based on a referral */ 550 s = build_symlink(vp, cr, &strsz); 551 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 552 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 553 vnode_t *, vp, char *, s); 554 if (s == NULL) 555 error = EINVAL; 556 else { 557 error = 0; 558 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 559 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 560 kmem_free(s, strsz); 561 } 562 563 } else { 564 565 /* 566 * Set up io vector to read sym link data 567 */ 568 iov.iov_base = rl->rl_data; 569 iov.iov_len = NFS_MAXPATHLEN; 570 uio.uio_iov = &iov; 571 uio.uio_iovcnt = 1; 572 uio.uio_segflg = UIO_SYSSPACE; 573 uio.uio_extflg = UIO_COPY_CACHED; 574 uio.uio_loffset = (offset_t)0; 575 uio.uio_resid = NFS_MAXPATHLEN; 576 577 /* 578 * Do the readlink. 579 */ 580 error = VOP_READLINK(vp, &uio, cr, NULL); 581 582 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 583 584 if (!error) 585 rl->rl_data[rl->rl_count] = '\0'; 586 587 } 588 589 590 VN_RELE(vp); 591 592 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 593 name = nfscmd_convname(ca, exi, rl->rl_data, 594 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 595 596 if (name != NULL && name != rl->rl_data) { 597 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 598 rl->rl_data = name; 599 } 600 601 /* 602 * XNFS and RFC1094 require us to return ENXIO if argument 603 * is not a link. UFS returns EINVAL if this is the case, 604 * so we do the mapping here. BUGID 1138002. 605 */ 606 if (error == EINVAL) 607 rl->rl_status = NFSERR_NXIO; 608 else 609 rl->rl_status = puterrno(error); 610 611 } 612 void * 613 rfs_readlink_getfh(fhandle_t *fhp) 614 { 615 return (fhp); 616 } 617 /* 618 * Free data allocated by rfs_readlink 619 */ 620 void 621 rfs_rlfree(struct nfsrdlnres *rl) 622 { 623 if (rl->rl_data != NULL) 624 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 625 } 626 627 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 628 629 /* 630 * Read data. 631 * Returns some data read from the file at the given fhandle. 632 */ 633 /* ARGSUSED */ 634 void 635 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 636 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 637 { 638 vnode_t *vp; 639 int error; 640 struct vattr va; 641 struct iovec iov; 642 struct uio uio; 643 mblk_t *mp; 644 int alloc_err = 0; 645 int in_crit = 0; 646 caller_context_t ct; 647 648 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 649 if (vp == NULL) { 650 rr->rr_data = NULL; 651 rr->rr_status = NFSERR_STALE; 652 return; 653 } 654 655 if (vp->v_type != VREG) { 656 VN_RELE(vp); 657 rr->rr_data = NULL; 658 rr->rr_status = NFSERR_ISDIR; 659 return; 660 } 661 662 ct.cc_sysid = 0; 663 ct.cc_pid = 0; 664 ct.cc_caller_id = nfs2_srv_caller_id; 665 ct.cc_flags = CC_DONTBLOCK; 666 667 /* 668 * Enter the critical region before calling VOP_RWLOCK 669 * to avoid a deadlock with write requests. 670 */ 671 if (nbl_need_check(vp)) { 672 nbl_start_crit(vp, RW_READER); 673 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 674 0, NULL)) { 675 nbl_end_crit(vp); 676 VN_RELE(vp); 677 rr->rr_data = NULL; 678 rr->rr_status = NFSERR_ACCES; 679 return; 680 } 681 in_crit = 1; 682 } 683 684 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 685 686 /* check if a monitor detected a delegation conflict */ 687 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 688 VN_RELE(vp); 689 /* mark as wouldblock so response is dropped */ 690 curthread->t_flag |= T_WOULDBLOCK; 691 692 rr->rr_data = NULL; 693 return; 694 } 695 696 va.va_mask = AT_ALL; 697 698 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 699 700 if (error) { 701 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 702 if (in_crit) 703 nbl_end_crit(vp); 704 705 VN_RELE(vp); 706 rr->rr_data = NULL; 707 rr->rr_status = puterrno(error); 708 709 return; 710 } 711 712 /* 713 * This is a kludge to allow reading of files created 714 * with no read permission. The owner of the file 715 * is always allowed to read it. 716 */ 717 if (crgetuid(cr) != va.va_uid) { 718 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 719 720 if (error) { 721 /* 722 * Exec is the same as read over the net because 723 * of demand loading. 724 */ 725 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 726 } 727 if (error) { 728 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 729 if (in_crit) 730 nbl_end_crit(vp); 731 VN_RELE(vp); 732 rr->rr_data = NULL; 733 rr->rr_status = puterrno(error); 734 735 return; 736 } 737 } 738 739 if (MANDLOCK(vp, va.va_mode)) { 740 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 741 if (in_crit) 742 nbl_end_crit(vp); 743 744 VN_RELE(vp); 745 rr->rr_data = NULL; 746 rr->rr_status = NFSERR_ACCES; 747 748 return; 749 } 750 751 rr->rr_ok.rrok_wlist_len = 0; 752 rr->rr_ok.rrok_wlist = NULL; 753 754 if ((u_offset_t)ra->ra_offset >= va.va_size) { 755 rr->rr_count = 0; 756 rr->rr_data = NULL; 757 /* 758 * In this case, status is NFS_OK, but there is no data 759 * to encode. So set rr_mp to NULL. 760 */ 761 rr->rr_mp = NULL; 762 rr->rr_ok.rrok_wlist = ra->ra_wlist; 763 if (rr->rr_ok.rrok_wlist) 764 clist_zero_len(rr->rr_ok.rrok_wlist); 765 goto done; 766 } 767 768 if (ra->ra_wlist) { 769 mp = NULL; 770 rr->rr_mp = NULL; 771 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 772 if (ra->ra_count > iov.iov_len) { 773 rr->rr_data = NULL; 774 rr->rr_status = NFSERR_INVAL; 775 goto done; 776 } 777 } else { 778 /* 779 * mp will contain the data to be sent out in the read reply. 780 * This will be freed after the reply has been sent out (by the 781 * driver). 782 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 783 * that the call to xdrmblk_putmblk() never fails. 784 */ 785 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 786 &alloc_err); 787 ASSERT(mp != NULL); 788 ASSERT(alloc_err == 0); 789 790 rr->rr_mp = mp; 791 792 /* 793 * Set up io vector 794 */ 795 iov.iov_base = (caddr_t)mp->b_datap->db_base; 796 iov.iov_len = ra->ra_count; 797 } 798 799 uio.uio_iov = &iov; 800 uio.uio_iovcnt = 1; 801 uio.uio_segflg = UIO_SYSSPACE; 802 uio.uio_extflg = UIO_COPY_CACHED; 803 uio.uio_loffset = (offset_t)ra->ra_offset; 804 uio.uio_resid = ra->ra_count; 805 806 error = VOP_READ(vp, &uio, 0, cr, &ct); 807 808 if (error) { 809 if (mp) 810 freeb(mp); 811 812 /* 813 * check if a monitor detected a delegation conflict and 814 * mark as wouldblock so response is dropped 815 */ 816 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 817 curthread->t_flag |= T_WOULDBLOCK; 818 else 819 rr->rr_status = puterrno(error); 820 821 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 822 if (in_crit) 823 nbl_end_crit(vp); 824 825 VN_RELE(vp); 826 rr->rr_data = NULL; 827 828 return; 829 } 830 831 /* 832 * Get attributes again so we can send the latest access 833 * time to the client side for its cache. 834 */ 835 va.va_mask = AT_ALL; 836 837 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 838 839 if (error) { 840 if (mp) 841 freeb(mp); 842 843 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 844 if (in_crit) 845 nbl_end_crit(vp); 846 847 VN_RELE(vp); 848 rr->rr_data = NULL; 849 rr->rr_status = puterrno(error); 850 851 return; 852 } 853 854 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 855 856 if (mp) { 857 rr->rr_data = (char *)mp->b_datap->db_base; 858 } else { 859 if (ra->ra_wlist) { 860 rr->rr_data = (caddr_t)iov.iov_base; 861 if (!rdma_setup_read_data2(ra, rr)) { 862 rr->rr_data = NULL; 863 rr->rr_status = puterrno(NFSERR_INVAL); 864 } 865 } 866 } 867 done: 868 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 869 if (in_crit) 870 nbl_end_crit(vp); 871 872 acl_perm(vp, exi, &va, cr); 873 874 /* check for overflows */ 875 error = vattr_to_nattr(&va, &rr->rr_attr); 876 877 VN_RELE(vp); 878 879 rr->rr_status = puterrno(error); 880 } 881 882 /* 883 * Free data allocated by rfs_read 884 */ 885 void 886 rfs_rdfree(struct nfsrdresult *rr) 887 { 888 mblk_t *mp; 889 890 if (rr->rr_status == NFS_OK) { 891 mp = rr->rr_mp; 892 if (mp != NULL) 893 freeb(mp); 894 } 895 } 896 897 void * 898 rfs_read_getfh(struct nfsreadargs *ra) 899 { 900 return (&ra->ra_fhandle); 901 } 902 903 #define MAX_IOVECS 12 904 905 #ifdef DEBUG 906 static int rfs_write_sync_hits = 0; 907 static int rfs_write_sync_misses = 0; 908 #endif 909 910 /* 911 * Write data to file. 912 * Returns attributes of a file after writing some data to it. 913 * 914 * Any changes made here, especially in error handling might have 915 * to also be done in rfs_write (which clusters write requests). 916 */ 917 /* ARGSUSED */ 918 void 919 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 920 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 921 { 922 int error; 923 vnode_t *vp; 924 rlim64_t rlimit; 925 struct vattr va; 926 struct uio uio; 927 struct iovec iov[MAX_IOVECS]; 928 mblk_t *m; 929 struct iovec *iovp; 930 int iovcnt; 931 cred_t *savecred; 932 int in_crit = 0; 933 caller_context_t ct; 934 935 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 936 if (vp == NULL) { 937 ns->ns_status = NFSERR_STALE; 938 return; 939 } 940 941 if (rdonly(ro, vp)) { 942 VN_RELE(vp); 943 ns->ns_status = NFSERR_ROFS; 944 return; 945 } 946 947 if (vp->v_type != VREG) { 948 VN_RELE(vp); 949 ns->ns_status = NFSERR_ISDIR; 950 return; 951 } 952 953 ct.cc_sysid = 0; 954 ct.cc_pid = 0; 955 ct.cc_caller_id = nfs2_srv_caller_id; 956 ct.cc_flags = CC_DONTBLOCK; 957 958 va.va_mask = AT_UID|AT_MODE; 959 960 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 961 962 if (error) { 963 VN_RELE(vp); 964 ns->ns_status = puterrno(error); 965 966 return; 967 } 968 969 if (crgetuid(cr) != va.va_uid) { 970 /* 971 * This is a kludge to allow writes of files created 972 * with read only permission. The owner of the file 973 * is always allowed to write it. 974 */ 975 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 976 977 if (error) { 978 VN_RELE(vp); 979 ns->ns_status = puterrno(error); 980 return; 981 } 982 } 983 984 /* 985 * Can't access a mandatory lock file. This might cause 986 * the NFS service thread to block forever waiting for a 987 * lock to be released that will never be released. 988 */ 989 if (MANDLOCK(vp, va.va_mode)) { 990 VN_RELE(vp); 991 ns->ns_status = NFSERR_ACCES; 992 return; 993 } 994 995 /* 996 * We have to enter the critical region before calling VOP_RWLOCK 997 * to avoid a deadlock with ufs. 998 */ 999 if (nbl_need_check(vp)) { 1000 nbl_start_crit(vp, RW_READER); 1001 in_crit = 1; 1002 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1003 wa->wa_count, 0, NULL)) { 1004 error = EACCES; 1005 goto out; 1006 } 1007 } 1008 1009 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1010 1011 /* check if a monitor detected a delegation conflict */ 1012 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1013 VN_RELE(vp); 1014 /* mark as wouldblock so response is dropped */ 1015 curthread->t_flag |= T_WOULDBLOCK; 1016 return; 1017 } 1018 1019 if (wa->wa_data || wa->wa_rlist) { 1020 /* Do the RDMA thing if necessary */ 1021 if (wa->wa_rlist) { 1022 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1023 iov[0].iov_len = wa->wa_count; 1024 } else { 1025 iov[0].iov_base = wa->wa_data; 1026 iov[0].iov_len = wa->wa_count; 1027 } 1028 uio.uio_iov = iov; 1029 uio.uio_iovcnt = 1; 1030 uio.uio_segflg = UIO_SYSSPACE; 1031 uio.uio_extflg = UIO_COPY_DEFAULT; 1032 uio.uio_loffset = (offset_t)wa->wa_offset; 1033 uio.uio_resid = wa->wa_count; 1034 /* 1035 * The limit is checked on the client. We 1036 * should allow any size writes here. 1037 */ 1038 uio.uio_llimit = curproc->p_fsz_ctl; 1039 rlimit = uio.uio_llimit - wa->wa_offset; 1040 if (rlimit < (rlim64_t)uio.uio_resid) 1041 uio.uio_resid = (uint_t)rlimit; 1042 1043 /* 1044 * for now we assume no append mode 1045 */ 1046 /* 1047 * We're changing creds because VM may fault and we need 1048 * the cred of the current thread to be used if quota 1049 * checking is enabled. 1050 */ 1051 savecred = curthread->t_cred; 1052 curthread->t_cred = cr; 1053 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1054 curthread->t_cred = savecred; 1055 } else { 1056 iovcnt = 0; 1057 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1058 iovcnt++; 1059 if (iovcnt <= MAX_IOVECS) { 1060 #ifdef DEBUG 1061 rfs_write_sync_hits++; 1062 #endif 1063 iovp = iov; 1064 } else { 1065 #ifdef DEBUG 1066 rfs_write_sync_misses++; 1067 #endif 1068 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1069 } 1070 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1071 uio.uio_iov = iovp; 1072 uio.uio_iovcnt = iovcnt; 1073 uio.uio_segflg = UIO_SYSSPACE; 1074 uio.uio_extflg = UIO_COPY_DEFAULT; 1075 uio.uio_loffset = (offset_t)wa->wa_offset; 1076 uio.uio_resid = wa->wa_count; 1077 /* 1078 * The limit is checked on the client. We 1079 * should allow any size writes here. 1080 */ 1081 uio.uio_llimit = curproc->p_fsz_ctl; 1082 rlimit = uio.uio_llimit - wa->wa_offset; 1083 if (rlimit < (rlim64_t)uio.uio_resid) 1084 uio.uio_resid = (uint_t)rlimit; 1085 1086 /* 1087 * For now we assume no append mode. 1088 */ 1089 /* 1090 * We're changing creds because VM may fault and we need 1091 * the cred of the current thread to be used if quota 1092 * checking is enabled. 1093 */ 1094 savecred = curthread->t_cred; 1095 curthread->t_cred = cr; 1096 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1097 curthread->t_cred = savecred; 1098 1099 if (iovp != iov) 1100 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1101 } 1102 1103 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1104 1105 if (!error) { 1106 /* 1107 * Get attributes again so we send the latest mod 1108 * time to the client side for its cache. 1109 */ 1110 va.va_mask = AT_ALL; /* now we want everything */ 1111 1112 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1113 1114 /* check for overflows */ 1115 if (!error) { 1116 acl_perm(vp, exi, &va, cr); 1117 error = vattr_to_nattr(&va, &ns->ns_attr); 1118 } 1119 } 1120 1121 out: 1122 if (in_crit) 1123 nbl_end_crit(vp); 1124 VN_RELE(vp); 1125 1126 /* check if a monitor detected a delegation conflict */ 1127 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1128 /* mark as wouldblock so response is dropped */ 1129 curthread->t_flag |= T_WOULDBLOCK; 1130 else 1131 ns->ns_status = puterrno(error); 1132 1133 } 1134 1135 struct rfs_async_write { 1136 struct nfswriteargs *wa; 1137 struct nfsattrstat *ns; 1138 struct svc_req *req; 1139 cred_t *cr; 1140 bool_t ro; 1141 kthread_t *thread; 1142 struct rfs_async_write *list; 1143 }; 1144 1145 struct rfs_async_write_list { 1146 fhandle_t *fhp; 1147 kcondvar_t cv; 1148 struct rfs_async_write *list; 1149 struct rfs_async_write_list *next; 1150 }; 1151 1152 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1153 static kmutex_t rfs_async_write_lock; 1154 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1155 1156 #define MAXCLIOVECS 42 1157 #define RFSWRITE_INITVAL (enum nfsstat) -1 1158 1159 #ifdef DEBUG 1160 static int rfs_write_hits = 0; 1161 static int rfs_write_misses = 0; 1162 #endif 1163 1164 /* 1165 * Write data to file. 1166 * Returns attributes of a file after writing some data to it. 1167 */ 1168 void 1169 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1170 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1171 { 1172 int error; 1173 vnode_t *vp; 1174 rlim64_t rlimit; 1175 struct vattr va; 1176 struct uio uio; 1177 struct rfs_async_write_list *lp; 1178 struct rfs_async_write_list *nlp; 1179 struct rfs_async_write *rp; 1180 struct rfs_async_write *nrp; 1181 struct rfs_async_write *trp; 1182 struct rfs_async_write *lrp; 1183 int data_written; 1184 int iovcnt; 1185 mblk_t *m; 1186 struct iovec *iovp; 1187 struct iovec *niovp; 1188 struct iovec iov[MAXCLIOVECS]; 1189 int count; 1190 int rcount; 1191 uint_t off; 1192 uint_t len; 1193 struct rfs_async_write nrpsp; 1194 struct rfs_async_write_list nlpsp; 1195 ushort_t t_flag; 1196 cred_t *savecred; 1197 int in_crit = 0; 1198 caller_context_t ct; 1199 1200 if (!rfs_write_async) { 1201 rfs_write_sync(wa, ns, exi, req, cr, ro); 1202 return; 1203 } 1204 1205 /* 1206 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1207 * is considered an OK. 1208 */ 1209 ns->ns_status = RFSWRITE_INITVAL; 1210 1211 nrp = &nrpsp; 1212 nrp->wa = wa; 1213 nrp->ns = ns; 1214 nrp->req = req; 1215 nrp->cr = cr; 1216 nrp->ro = ro; 1217 nrp->thread = curthread; 1218 1219 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1220 1221 /* 1222 * Look to see if there is already a cluster started 1223 * for this file. 1224 */ 1225 mutex_enter(&rfs_async_write_lock); 1226 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1227 if (bcmp(&wa->wa_fhandle, lp->fhp, 1228 sizeof (fhandle_t)) == 0) 1229 break; 1230 } 1231 1232 /* 1233 * If lp is non-NULL, then there is already a cluster 1234 * started. We need to place ourselves in the cluster 1235 * list in the right place as determined by starting 1236 * offset. Conflicts with non-blocking mandatory locked 1237 * regions will be checked when the cluster is processed. 1238 */ 1239 if (lp != NULL) { 1240 rp = lp->list; 1241 trp = NULL; 1242 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1243 trp = rp; 1244 rp = rp->list; 1245 } 1246 nrp->list = rp; 1247 if (trp == NULL) 1248 lp->list = nrp; 1249 else 1250 trp->list = nrp; 1251 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1252 cv_wait(&lp->cv, &rfs_async_write_lock); 1253 mutex_exit(&rfs_async_write_lock); 1254 1255 return; 1256 } 1257 1258 /* 1259 * No cluster started yet, start one and add ourselves 1260 * to the list of clusters. 1261 */ 1262 nrp->list = NULL; 1263 1264 nlp = &nlpsp; 1265 nlp->fhp = &wa->wa_fhandle; 1266 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1267 nlp->list = nrp; 1268 nlp->next = NULL; 1269 1270 if (rfs_async_write_head == NULL) { 1271 rfs_async_write_head = nlp; 1272 } else { 1273 lp = rfs_async_write_head; 1274 while (lp->next != NULL) 1275 lp = lp->next; 1276 lp->next = nlp; 1277 } 1278 mutex_exit(&rfs_async_write_lock); 1279 1280 /* 1281 * Convert the file handle common to all of the requests 1282 * in this cluster to a vnode. 1283 */ 1284 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1285 if (vp == NULL) { 1286 mutex_enter(&rfs_async_write_lock); 1287 if (rfs_async_write_head == nlp) 1288 rfs_async_write_head = nlp->next; 1289 else { 1290 lp = rfs_async_write_head; 1291 while (lp->next != nlp) 1292 lp = lp->next; 1293 lp->next = nlp->next; 1294 } 1295 t_flag = curthread->t_flag & T_WOULDBLOCK; 1296 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1297 rp->ns->ns_status = NFSERR_STALE; 1298 rp->thread->t_flag |= t_flag; 1299 } 1300 cv_broadcast(&nlp->cv); 1301 mutex_exit(&rfs_async_write_lock); 1302 1303 return; 1304 } 1305 1306 /* 1307 * Can only write regular files. Attempts to write any 1308 * other file types fail with EISDIR. 1309 */ 1310 if (vp->v_type != VREG) { 1311 VN_RELE(vp); 1312 mutex_enter(&rfs_async_write_lock); 1313 if (rfs_async_write_head == nlp) 1314 rfs_async_write_head = nlp->next; 1315 else { 1316 lp = rfs_async_write_head; 1317 while (lp->next != nlp) 1318 lp = lp->next; 1319 lp->next = nlp->next; 1320 } 1321 t_flag = curthread->t_flag & T_WOULDBLOCK; 1322 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1323 rp->ns->ns_status = NFSERR_ISDIR; 1324 rp->thread->t_flag |= t_flag; 1325 } 1326 cv_broadcast(&nlp->cv); 1327 mutex_exit(&rfs_async_write_lock); 1328 1329 return; 1330 } 1331 1332 /* 1333 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1334 * deadlock with ufs. 1335 */ 1336 if (nbl_need_check(vp)) { 1337 nbl_start_crit(vp, RW_READER); 1338 in_crit = 1; 1339 } 1340 1341 ct.cc_sysid = 0; 1342 ct.cc_pid = 0; 1343 ct.cc_caller_id = nfs2_srv_caller_id; 1344 ct.cc_flags = CC_DONTBLOCK; 1345 1346 /* 1347 * Lock the file for writing. This operation provides 1348 * the delay which allows clusters to grow. 1349 */ 1350 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1351 1352 /* check if a monitor detected a delegation conflict */ 1353 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1354 if (in_crit) 1355 nbl_end_crit(vp); 1356 VN_RELE(vp); 1357 /* mark as wouldblock so response is dropped */ 1358 curthread->t_flag |= T_WOULDBLOCK; 1359 mutex_enter(&rfs_async_write_lock); 1360 if (rfs_async_write_head == nlp) 1361 rfs_async_write_head = nlp->next; 1362 else { 1363 lp = rfs_async_write_head; 1364 while (lp->next != nlp) 1365 lp = lp->next; 1366 lp->next = nlp->next; 1367 } 1368 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1369 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1370 rp->ns->ns_status = puterrno(error); 1371 rp->thread->t_flag |= T_WOULDBLOCK; 1372 } 1373 } 1374 cv_broadcast(&nlp->cv); 1375 mutex_exit(&rfs_async_write_lock); 1376 1377 return; 1378 } 1379 1380 /* 1381 * Disconnect this cluster from the list of clusters. 1382 * The cluster that is being dealt with must be fixed 1383 * in size after this point, so there is no reason 1384 * to leave it on the list so that new requests can 1385 * find it. 1386 * 1387 * The algorithm is that the first write request will 1388 * create a cluster, convert the file handle to a 1389 * vnode pointer, and then lock the file for writing. 1390 * This request is not likely to be clustered with 1391 * any others. However, the next request will create 1392 * a new cluster and be blocked in VOP_RWLOCK while 1393 * the first request is being processed. This delay 1394 * will allow more requests to be clustered in this 1395 * second cluster. 1396 */ 1397 mutex_enter(&rfs_async_write_lock); 1398 if (rfs_async_write_head == nlp) 1399 rfs_async_write_head = nlp->next; 1400 else { 1401 lp = rfs_async_write_head; 1402 while (lp->next != nlp) 1403 lp = lp->next; 1404 lp->next = nlp->next; 1405 } 1406 mutex_exit(&rfs_async_write_lock); 1407 1408 /* 1409 * Step through the list of requests in this cluster. 1410 * We need to check permissions to make sure that all 1411 * of the requests have sufficient permission to write 1412 * the file. A cluster can be composed of requests 1413 * from different clients and different users on each 1414 * client. 1415 * 1416 * As a side effect, we also calculate the size of the 1417 * byte range that this cluster encompasses. 1418 */ 1419 rp = nlp->list; 1420 off = rp->wa->wa_offset; 1421 len = (uint_t)0; 1422 do { 1423 if (rdonly(rp->ro, vp)) { 1424 rp->ns->ns_status = NFSERR_ROFS; 1425 t_flag = curthread->t_flag & T_WOULDBLOCK; 1426 rp->thread->t_flag |= t_flag; 1427 continue; 1428 } 1429 1430 va.va_mask = AT_UID|AT_MODE; 1431 1432 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1433 1434 if (!error) { 1435 if (crgetuid(rp->cr) != va.va_uid) { 1436 /* 1437 * This is a kludge to allow writes of files 1438 * created with read only permission. The 1439 * owner of the file is always allowed to 1440 * write it. 1441 */ 1442 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1443 } 1444 if (!error && MANDLOCK(vp, va.va_mode)) 1445 error = EACCES; 1446 } 1447 1448 /* 1449 * Check for a conflict with a nbmand-locked region. 1450 */ 1451 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1452 rp->wa->wa_count, 0, NULL)) { 1453 error = EACCES; 1454 } 1455 1456 if (error) { 1457 rp->ns->ns_status = puterrno(error); 1458 t_flag = curthread->t_flag & T_WOULDBLOCK; 1459 rp->thread->t_flag |= t_flag; 1460 continue; 1461 } 1462 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1463 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1464 } while ((rp = rp->list) != NULL); 1465 1466 /* 1467 * Step through the cluster attempting to gather as many 1468 * requests which are contiguous as possible. These 1469 * contiguous requests are handled via one call to VOP_WRITE 1470 * instead of different calls to VOP_WRITE. We also keep 1471 * track of the fact that any data was written. 1472 */ 1473 rp = nlp->list; 1474 data_written = 0; 1475 do { 1476 /* 1477 * Skip any requests which are already marked as having an 1478 * error. 1479 */ 1480 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1481 rp = rp->list; 1482 continue; 1483 } 1484 1485 /* 1486 * Count the number of iovec's which are required 1487 * to handle this set of requests. One iovec is 1488 * needed for each data buffer, whether addressed 1489 * by wa_data or by the b_rptr pointers in the 1490 * mblk chains. 1491 */ 1492 iovcnt = 0; 1493 lrp = rp; 1494 for (;;) { 1495 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1496 iovcnt++; 1497 else { 1498 m = lrp->wa->wa_mblk; 1499 while (m != NULL) { 1500 iovcnt++; 1501 m = m->b_cont; 1502 } 1503 } 1504 if (lrp->list == NULL || 1505 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1506 lrp->wa->wa_offset + lrp->wa->wa_count != 1507 lrp->list->wa->wa_offset) { 1508 lrp = lrp->list; 1509 break; 1510 } 1511 lrp = lrp->list; 1512 } 1513 1514 if (iovcnt <= MAXCLIOVECS) { 1515 #ifdef DEBUG 1516 rfs_write_hits++; 1517 #endif 1518 niovp = iov; 1519 } else { 1520 #ifdef DEBUG 1521 rfs_write_misses++; 1522 #endif 1523 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1524 } 1525 /* 1526 * Put together the scatter/gather iovecs. 1527 */ 1528 iovp = niovp; 1529 trp = rp; 1530 count = 0; 1531 do { 1532 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1533 if (trp->wa->wa_rlist) { 1534 iovp->iov_base = 1535 (char *)((trp->wa->wa_rlist)-> 1536 u.c_daddr3); 1537 iovp->iov_len = trp->wa->wa_count; 1538 } else { 1539 iovp->iov_base = trp->wa->wa_data; 1540 iovp->iov_len = trp->wa->wa_count; 1541 } 1542 iovp++; 1543 } else { 1544 m = trp->wa->wa_mblk; 1545 rcount = trp->wa->wa_count; 1546 while (m != NULL) { 1547 iovp->iov_base = (caddr_t)m->b_rptr; 1548 iovp->iov_len = (m->b_wptr - m->b_rptr); 1549 rcount -= iovp->iov_len; 1550 if (rcount < 0) 1551 iovp->iov_len += rcount; 1552 iovp++; 1553 if (rcount <= 0) 1554 break; 1555 m = m->b_cont; 1556 } 1557 } 1558 count += trp->wa->wa_count; 1559 trp = trp->list; 1560 } while (trp != lrp); 1561 1562 uio.uio_iov = niovp; 1563 uio.uio_iovcnt = iovcnt; 1564 uio.uio_segflg = UIO_SYSSPACE; 1565 uio.uio_extflg = UIO_COPY_DEFAULT; 1566 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1567 uio.uio_resid = count; 1568 /* 1569 * The limit is checked on the client. We 1570 * should allow any size writes here. 1571 */ 1572 uio.uio_llimit = curproc->p_fsz_ctl; 1573 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1574 if (rlimit < (rlim64_t)uio.uio_resid) 1575 uio.uio_resid = (uint_t)rlimit; 1576 1577 /* 1578 * For now we assume no append mode. 1579 */ 1580 1581 /* 1582 * We're changing creds because VM may fault 1583 * and we need the cred of the current 1584 * thread to be used if quota * checking is 1585 * enabled. 1586 */ 1587 savecred = curthread->t_cred; 1588 curthread->t_cred = cr; 1589 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1590 curthread->t_cred = savecred; 1591 1592 /* check if a monitor detected a delegation conflict */ 1593 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1594 /* mark as wouldblock so response is dropped */ 1595 curthread->t_flag |= T_WOULDBLOCK; 1596 1597 if (niovp != iov) 1598 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1599 1600 if (!error) { 1601 data_written = 1; 1602 /* 1603 * Get attributes again so we send the latest mod 1604 * time to the client side for its cache. 1605 */ 1606 va.va_mask = AT_ALL; /* now we want everything */ 1607 1608 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1609 1610 if (!error) 1611 acl_perm(vp, exi, &va, rp->cr); 1612 } 1613 1614 /* 1615 * Fill in the status responses for each request 1616 * which was just handled. Also, copy the latest 1617 * attributes in to the attribute responses if 1618 * appropriate. 1619 */ 1620 t_flag = curthread->t_flag & T_WOULDBLOCK; 1621 do { 1622 rp->thread->t_flag |= t_flag; 1623 /* check for overflows */ 1624 if (!error) { 1625 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1626 } 1627 rp->ns->ns_status = puterrno(error); 1628 rp = rp->list; 1629 } while (rp != lrp); 1630 } while (rp != NULL); 1631 1632 /* 1633 * If any data was written at all, then we need to flush 1634 * the data and metadata to stable storage. 1635 */ 1636 if (data_written) { 1637 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1638 1639 if (!error) { 1640 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1641 } 1642 } 1643 1644 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1645 1646 if (in_crit) 1647 nbl_end_crit(vp); 1648 VN_RELE(vp); 1649 1650 t_flag = curthread->t_flag & T_WOULDBLOCK; 1651 mutex_enter(&rfs_async_write_lock); 1652 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1653 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1654 rp->ns->ns_status = puterrno(error); 1655 rp->thread->t_flag |= t_flag; 1656 } 1657 } 1658 cv_broadcast(&nlp->cv); 1659 mutex_exit(&rfs_async_write_lock); 1660 1661 } 1662 1663 void * 1664 rfs_write_getfh(struct nfswriteargs *wa) 1665 { 1666 return (&wa->wa_fhandle); 1667 } 1668 1669 /* 1670 * Create a file. 1671 * Creates a file with given attributes and returns those attributes 1672 * and an fhandle for the new file. 1673 */ 1674 void 1675 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1676 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1677 { 1678 int error; 1679 int lookuperr; 1680 int in_crit = 0; 1681 struct vattr va; 1682 vnode_t *vp; 1683 vnode_t *realvp; 1684 vnode_t *dvp; 1685 char *name = args->ca_da.da_name; 1686 vnode_t *tvp = NULL; 1687 int mode; 1688 int lookup_ok; 1689 bool_t trunc; 1690 struct sockaddr *ca; 1691 1692 /* 1693 * Disallow NULL paths 1694 */ 1695 if (name == NULL || *name == '\0') { 1696 dr->dr_status = NFSERR_ACCES; 1697 return; 1698 } 1699 1700 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1701 if (dvp == NULL) { 1702 dr->dr_status = NFSERR_STALE; 1703 return; 1704 } 1705 1706 error = sattr_to_vattr(args->ca_sa, &va); 1707 if (error) { 1708 dr->dr_status = puterrno(error); 1709 return; 1710 } 1711 1712 /* 1713 * Must specify the mode. 1714 */ 1715 if (!(va.va_mask & AT_MODE)) { 1716 VN_RELE(dvp); 1717 dr->dr_status = NFSERR_INVAL; 1718 return; 1719 } 1720 1721 /* 1722 * This is a completely gross hack to make mknod 1723 * work over the wire until we can wack the protocol 1724 */ 1725 if ((va.va_mode & IFMT) == IFCHR) { 1726 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1727 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1728 else { 1729 va.va_type = VCHR; 1730 /* 1731 * uncompress the received dev_t 1732 * if the top half is zero indicating a request 1733 * from an `older style' OS. 1734 */ 1735 if ((va.va_size & 0xffff0000) == 0) 1736 va.va_rdev = nfsv2_expdev(va.va_size); 1737 else 1738 va.va_rdev = (dev_t)va.va_size; 1739 } 1740 va.va_mask &= ~AT_SIZE; 1741 } else if ((va.va_mode & IFMT) == IFBLK) { 1742 va.va_type = VBLK; 1743 /* 1744 * uncompress the received dev_t 1745 * if the top half is zero indicating a request 1746 * from an `older style' OS. 1747 */ 1748 if ((va.va_size & 0xffff0000) == 0) 1749 va.va_rdev = nfsv2_expdev(va.va_size); 1750 else 1751 va.va_rdev = (dev_t)va.va_size; 1752 va.va_mask &= ~AT_SIZE; 1753 } else if ((va.va_mode & IFMT) == IFSOCK) { 1754 va.va_type = VSOCK; 1755 } else { 1756 va.va_type = VREG; 1757 } 1758 va.va_mode &= ~IFMT; 1759 va.va_mask |= AT_TYPE; 1760 1761 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1762 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1763 MAXPATHLEN); 1764 if (name == NULL) { 1765 dr->dr_status = puterrno(EINVAL); 1766 return; 1767 } 1768 1769 /* 1770 * Why was the choice made to use VWRITE as the mode to the 1771 * call to VOP_CREATE ? This results in a bug. When a client 1772 * opens a file that already exists and is RDONLY, the second 1773 * open fails with an EACESS because of the mode. 1774 * bug ID 1054648. 1775 */ 1776 lookup_ok = 0; 1777 mode = VWRITE; 1778 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1779 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1780 NULL, NULL, NULL); 1781 if (!error) { 1782 struct vattr at; 1783 1784 lookup_ok = 1; 1785 at.va_mask = AT_MODE; 1786 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1787 if (!error) 1788 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1789 VN_RELE(tvp); 1790 tvp = NULL; 1791 } 1792 } 1793 1794 if (!lookup_ok) { 1795 if (rdonly(ro, dvp)) { 1796 error = EROFS; 1797 } else if (va.va_type != VREG && va.va_type != VFIFO && 1798 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1799 error = EPERM; 1800 } else { 1801 error = 0; 1802 } 1803 } 1804 1805 /* 1806 * If file size is being modified on an already existing file 1807 * make sure that there are no conflicting non-blocking mandatory 1808 * locks in the region being manipulated. Return EACCES if there 1809 * are conflicting locks. 1810 */ 1811 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1812 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1813 NULL, NULL, NULL); 1814 1815 if (!lookuperr && 1816 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1817 VN_RELE(tvp); 1818 curthread->t_flag |= T_WOULDBLOCK; 1819 goto out; 1820 } 1821 1822 if (!lookuperr && nbl_need_check(tvp)) { 1823 /* 1824 * The file exists. Now check if it has any 1825 * conflicting non-blocking mandatory locks 1826 * in the region being changed. 1827 */ 1828 struct vattr bva; 1829 u_offset_t offset; 1830 ssize_t length; 1831 1832 nbl_start_crit(tvp, RW_READER); 1833 in_crit = 1; 1834 1835 bva.va_mask = AT_SIZE; 1836 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1837 if (!error) { 1838 if (va.va_size < bva.va_size) { 1839 offset = va.va_size; 1840 length = bva.va_size - va.va_size; 1841 } else { 1842 offset = bva.va_size; 1843 length = va.va_size - bva.va_size; 1844 } 1845 if (length) { 1846 if (nbl_conflict(tvp, NBL_WRITE, 1847 offset, length, 0, NULL)) { 1848 error = EACCES; 1849 } 1850 } 1851 } 1852 if (error) { 1853 nbl_end_crit(tvp); 1854 VN_RELE(tvp); 1855 in_crit = 0; 1856 } 1857 } else if (tvp != NULL) { 1858 VN_RELE(tvp); 1859 } 1860 } 1861 1862 if (!error) { 1863 /* 1864 * If filesystem is shared with nosuid the remove any 1865 * setuid/setgid bits on create. 1866 */ 1867 if (va.va_type == VREG && 1868 exi->exi_export.ex_flags & EX_NOSUID) 1869 va.va_mode &= ~(VSUID | VSGID); 1870 1871 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1872 NULL, NULL); 1873 1874 if (!error) { 1875 1876 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1877 trunc = TRUE; 1878 else 1879 trunc = FALSE; 1880 1881 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1882 VN_RELE(vp); 1883 curthread->t_flag |= T_WOULDBLOCK; 1884 goto out; 1885 } 1886 va.va_mask = AT_ALL; 1887 1888 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1889 1890 /* check for overflows */ 1891 if (!error) { 1892 acl_perm(vp, exi, &va, cr); 1893 error = vattr_to_nattr(&va, &dr->dr_attr); 1894 if (!error) { 1895 error = makefh(&dr->dr_fhandle, vp, 1896 exi); 1897 } 1898 } 1899 /* 1900 * Force modified metadata out to stable storage. 1901 * 1902 * if a underlying vp exists, pass it to VOP_FSYNC 1903 */ 1904 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1905 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1906 else 1907 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1908 VN_RELE(vp); 1909 } 1910 1911 if (in_crit) { 1912 nbl_end_crit(tvp); 1913 VN_RELE(tvp); 1914 } 1915 } 1916 1917 /* 1918 * Force modified data and metadata out to stable storage. 1919 */ 1920 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1921 1922 out: 1923 1924 VN_RELE(dvp); 1925 1926 dr->dr_status = puterrno(error); 1927 1928 if (name != args->ca_da.da_name) 1929 kmem_free(name, MAXPATHLEN); 1930 } 1931 void * 1932 rfs_create_getfh(struct nfscreatargs *args) 1933 { 1934 return (args->ca_da.da_fhandle); 1935 } 1936 1937 /* 1938 * Remove a file. 1939 * Remove named file from parent directory. 1940 */ 1941 /* ARGSUSED */ 1942 void 1943 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1944 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1945 { 1946 int error = 0; 1947 vnode_t *vp; 1948 vnode_t *targvp; 1949 int in_crit = 0; 1950 1951 /* 1952 * Disallow NULL paths 1953 */ 1954 if (da->da_name == NULL || *da->da_name == '\0') { 1955 *status = NFSERR_ACCES; 1956 return; 1957 } 1958 1959 vp = nfs_fhtovp(da->da_fhandle, exi); 1960 if (vp == NULL) { 1961 *status = NFSERR_STALE; 1962 return; 1963 } 1964 1965 if (rdonly(ro, vp)) { 1966 VN_RELE(vp); 1967 *status = NFSERR_ROFS; 1968 return; 1969 } 1970 1971 /* 1972 * Check for a conflict with a non-blocking mandatory share reservation. 1973 */ 1974 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1975 NULL, cr, NULL, NULL, NULL); 1976 if (error != 0) { 1977 VN_RELE(vp); 1978 *status = puterrno(error); 1979 return; 1980 } 1981 1982 /* 1983 * If the file is delegated to an v4 client, then initiate 1984 * recall and drop this request (by setting T_WOULDBLOCK). 1985 * The client will eventually re-transmit the request and 1986 * (hopefully), by then, the v4 client will have returned 1987 * the delegation. 1988 */ 1989 1990 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1991 VN_RELE(vp); 1992 VN_RELE(targvp); 1993 curthread->t_flag |= T_WOULDBLOCK; 1994 return; 1995 } 1996 1997 if (nbl_need_check(targvp)) { 1998 nbl_start_crit(targvp, RW_READER); 1999 in_crit = 1; 2000 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2001 error = EACCES; 2002 goto out; 2003 } 2004 } 2005 2006 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2007 2008 /* 2009 * Force modified data and metadata out to stable storage. 2010 */ 2011 (void) VOP_FSYNC(vp, 0, cr, NULL); 2012 2013 out: 2014 if (in_crit) 2015 nbl_end_crit(targvp); 2016 VN_RELE(targvp); 2017 VN_RELE(vp); 2018 2019 *status = puterrno(error); 2020 2021 } 2022 2023 void * 2024 rfs_remove_getfh(struct nfsdiropargs *da) 2025 { 2026 return (da->da_fhandle); 2027 } 2028 2029 /* 2030 * rename a file 2031 * Give a file (from) a new name (to). 2032 */ 2033 /* ARGSUSED */ 2034 void 2035 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2036 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2037 { 2038 int error = 0; 2039 vnode_t *fromvp; 2040 vnode_t *tovp; 2041 struct exportinfo *to_exi; 2042 fhandle_t *fh; 2043 vnode_t *srcvp; 2044 vnode_t *targvp; 2045 int in_crit = 0; 2046 2047 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2048 if (fromvp == NULL) { 2049 *status = NFSERR_STALE; 2050 return; 2051 } 2052 2053 fh = args->rna_to.da_fhandle; 2054 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2055 if (to_exi == NULL) { 2056 VN_RELE(fromvp); 2057 *status = NFSERR_ACCES; 2058 return; 2059 } 2060 exi_rele(to_exi); 2061 2062 if (to_exi != exi) { 2063 VN_RELE(fromvp); 2064 *status = NFSERR_XDEV; 2065 return; 2066 } 2067 2068 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2069 if (tovp == NULL) { 2070 VN_RELE(fromvp); 2071 *status = NFSERR_STALE; 2072 return; 2073 } 2074 2075 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2076 VN_RELE(tovp); 2077 VN_RELE(fromvp); 2078 *status = NFSERR_NOTDIR; 2079 return; 2080 } 2081 2082 /* 2083 * Disallow NULL paths 2084 */ 2085 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2086 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2087 VN_RELE(tovp); 2088 VN_RELE(fromvp); 2089 *status = NFSERR_ACCES; 2090 return; 2091 } 2092 2093 if (rdonly(ro, tovp)) { 2094 VN_RELE(tovp); 2095 VN_RELE(fromvp); 2096 *status = NFSERR_ROFS; 2097 return; 2098 } 2099 2100 /* 2101 * Check for a conflict with a non-blocking mandatory share reservation. 2102 */ 2103 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2104 NULL, cr, NULL, NULL, NULL); 2105 if (error != 0) { 2106 VN_RELE(tovp); 2107 VN_RELE(fromvp); 2108 *status = puterrno(error); 2109 return; 2110 } 2111 2112 /* Check for delegations on the source file */ 2113 2114 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2115 VN_RELE(tovp); 2116 VN_RELE(fromvp); 2117 VN_RELE(srcvp); 2118 curthread->t_flag |= T_WOULDBLOCK; 2119 return; 2120 } 2121 2122 /* Check for delegation on the file being renamed over, if it exists */ 2123 2124 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2125 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2126 NULL, NULL, NULL) == 0) { 2127 2128 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2129 VN_RELE(tovp); 2130 VN_RELE(fromvp); 2131 VN_RELE(srcvp); 2132 VN_RELE(targvp); 2133 curthread->t_flag |= T_WOULDBLOCK; 2134 return; 2135 } 2136 VN_RELE(targvp); 2137 } 2138 2139 2140 if (nbl_need_check(srcvp)) { 2141 nbl_start_crit(srcvp, RW_READER); 2142 in_crit = 1; 2143 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2144 error = EACCES; 2145 goto out; 2146 } 2147 } 2148 2149 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2150 tovp, args->rna_to.da_name, cr, NULL, 0); 2151 2152 if (error == 0) 2153 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2154 strlen(args->rna_to.da_name)); 2155 2156 /* 2157 * Force modified data and metadata out to stable storage. 2158 */ 2159 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2160 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2161 2162 out: 2163 if (in_crit) 2164 nbl_end_crit(srcvp); 2165 VN_RELE(srcvp); 2166 VN_RELE(tovp); 2167 VN_RELE(fromvp); 2168 2169 *status = puterrno(error); 2170 2171 } 2172 void * 2173 rfs_rename_getfh(struct nfsrnmargs *args) 2174 { 2175 return (args->rna_from.da_fhandle); 2176 } 2177 2178 /* 2179 * Link to a file. 2180 * Create a file (to) which is a hard link to the given file (from). 2181 */ 2182 /* ARGSUSED */ 2183 void 2184 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2185 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2186 { 2187 int error; 2188 vnode_t *fromvp; 2189 vnode_t *tovp; 2190 struct exportinfo *to_exi; 2191 fhandle_t *fh; 2192 2193 fromvp = nfs_fhtovp(args->la_from, exi); 2194 if (fromvp == NULL) { 2195 *status = NFSERR_STALE; 2196 return; 2197 } 2198 2199 fh = args->la_to.da_fhandle; 2200 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2201 if (to_exi == NULL) { 2202 VN_RELE(fromvp); 2203 *status = NFSERR_ACCES; 2204 return; 2205 } 2206 exi_rele(to_exi); 2207 2208 if (to_exi != exi) { 2209 VN_RELE(fromvp); 2210 *status = NFSERR_XDEV; 2211 return; 2212 } 2213 2214 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2215 if (tovp == NULL) { 2216 VN_RELE(fromvp); 2217 *status = NFSERR_STALE; 2218 return; 2219 } 2220 2221 if (tovp->v_type != VDIR) { 2222 VN_RELE(tovp); 2223 VN_RELE(fromvp); 2224 *status = NFSERR_NOTDIR; 2225 return; 2226 } 2227 /* 2228 * Disallow NULL paths 2229 */ 2230 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2231 VN_RELE(tovp); 2232 VN_RELE(fromvp); 2233 *status = NFSERR_ACCES; 2234 return; 2235 } 2236 2237 if (rdonly(ro, tovp)) { 2238 VN_RELE(tovp); 2239 VN_RELE(fromvp); 2240 *status = NFSERR_ROFS; 2241 return; 2242 } 2243 2244 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2245 2246 /* 2247 * Force modified data and metadata out to stable storage. 2248 */ 2249 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2250 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2251 2252 VN_RELE(tovp); 2253 VN_RELE(fromvp); 2254 2255 *status = puterrno(error); 2256 2257 } 2258 void * 2259 rfs_link_getfh(struct nfslinkargs *args) 2260 { 2261 return (args->la_from); 2262 } 2263 2264 /* 2265 * Symbolicly link to a file. 2266 * Create a file (to) with the given attributes which is a symbolic link 2267 * to the given path name (to). 2268 */ 2269 void 2270 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2271 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2272 { 2273 int error; 2274 struct vattr va; 2275 vnode_t *vp; 2276 vnode_t *svp; 2277 int lerror; 2278 struct sockaddr *ca; 2279 char *name = NULL; 2280 2281 /* 2282 * Disallow NULL paths 2283 */ 2284 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2285 *status = NFSERR_ACCES; 2286 return; 2287 } 2288 2289 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2290 if (vp == NULL) { 2291 *status = NFSERR_STALE; 2292 return; 2293 } 2294 2295 if (rdonly(ro, vp)) { 2296 VN_RELE(vp); 2297 *status = NFSERR_ROFS; 2298 return; 2299 } 2300 2301 error = sattr_to_vattr(args->sla_sa, &va); 2302 if (error) { 2303 VN_RELE(vp); 2304 *status = puterrno(error); 2305 return; 2306 } 2307 2308 if (!(va.va_mask & AT_MODE)) { 2309 VN_RELE(vp); 2310 *status = NFSERR_INVAL; 2311 return; 2312 } 2313 2314 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2315 name = nfscmd_convname(ca, exi, args->sla_tnm, 2316 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2317 2318 if (name == NULL) { 2319 *status = NFSERR_ACCES; 2320 return; 2321 } 2322 2323 va.va_type = VLNK; 2324 va.va_mask |= AT_TYPE; 2325 2326 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2327 2328 /* 2329 * Force new data and metadata out to stable storage. 2330 */ 2331 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2332 NULL, cr, NULL, NULL, NULL); 2333 2334 if (!lerror) { 2335 (void) VOP_FSYNC(svp, 0, cr, NULL); 2336 VN_RELE(svp); 2337 } 2338 2339 /* 2340 * Force modified data and metadata out to stable storage. 2341 */ 2342 (void) VOP_FSYNC(vp, 0, cr, NULL); 2343 2344 VN_RELE(vp); 2345 2346 *status = puterrno(error); 2347 if (name != args->sla_tnm) 2348 kmem_free(name, MAXPATHLEN); 2349 2350 } 2351 void * 2352 rfs_symlink_getfh(struct nfsslargs *args) 2353 { 2354 return (args->sla_from.da_fhandle); 2355 } 2356 2357 /* 2358 * Make a directory. 2359 * Create a directory with the given name, parent directory, and attributes. 2360 * Returns a file handle and attributes for the new directory. 2361 */ 2362 /* ARGSUSED */ 2363 void 2364 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2365 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2366 { 2367 int error; 2368 struct vattr va; 2369 vnode_t *dvp = NULL; 2370 vnode_t *vp; 2371 char *name = args->ca_da.da_name; 2372 2373 /* 2374 * Disallow NULL paths 2375 */ 2376 if (name == NULL || *name == '\0') { 2377 dr->dr_status = NFSERR_ACCES; 2378 return; 2379 } 2380 2381 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2382 if (vp == NULL) { 2383 dr->dr_status = NFSERR_STALE; 2384 return; 2385 } 2386 2387 if (rdonly(ro, vp)) { 2388 VN_RELE(vp); 2389 dr->dr_status = NFSERR_ROFS; 2390 return; 2391 } 2392 2393 error = sattr_to_vattr(args->ca_sa, &va); 2394 if (error) { 2395 VN_RELE(vp); 2396 dr->dr_status = puterrno(error); 2397 return; 2398 } 2399 2400 if (!(va.va_mask & AT_MODE)) { 2401 VN_RELE(vp); 2402 dr->dr_status = NFSERR_INVAL; 2403 return; 2404 } 2405 2406 va.va_type = VDIR; 2407 va.va_mask |= AT_TYPE; 2408 2409 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2410 2411 if (!error) { 2412 /* 2413 * Attribtutes of the newly created directory should 2414 * be returned to the client. 2415 */ 2416 va.va_mask = AT_ALL; /* We want everything */ 2417 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2418 2419 /* check for overflows */ 2420 if (!error) { 2421 acl_perm(vp, exi, &va, cr); 2422 error = vattr_to_nattr(&va, &dr->dr_attr); 2423 if (!error) { 2424 error = makefh(&dr->dr_fhandle, dvp, exi); 2425 } 2426 } 2427 /* 2428 * Force new data and metadata out to stable storage. 2429 */ 2430 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2431 VN_RELE(dvp); 2432 } 2433 2434 /* 2435 * Force modified data and metadata out to stable storage. 2436 */ 2437 (void) VOP_FSYNC(vp, 0, cr, NULL); 2438 2439 VN_RELE(vp); 2440 2441 dr->dr_status = puterrno(error); 2442 2443 } 2444 void * 2445 rfs_mkdir_getfh(struct nfscreatargs *args) 2446 { 2447 return (args->ca_da.da_fhandle); 2448 } 2449 2450 /* 2451 * Remove a directory. 2452 * Remove the given directory name from the given parent directory. 2453 */ 2454 /* ARGSUSED */ 2455 void 2456 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2457 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2458 { 2459 int error; 2460 vnode_t *vp; 2461 2462 /* 2463 * Disallow NULL paths 2464 */ 2465 if (da->da_name == NULL || *da->da_name == '\0') { 2466 *status = NFSERR_ACCES; 2467 return; 2468 } 2469 2470 vp = nfs_fhtovp(da->da_fhandle, exi); 2471 if (vp == NULL) { 2472 *status = NFSERR_STALE; 2473 return; 2474 } 2475 2476 if (rdonly(ro, vp)) { 2477 VN_RELE(vp); 2478 *status = NFSERR_ROFS; 2479 return; 2480 } 2481 2482 /* 2483 * VOP_RMDIR takes a third argument (the current 2484 * directory of the process). That's because someone 2485 * wants to return EINVAL if one tries to remove ".". 2486 * Of course, NFS servers have no idea what their 2487 * clients' current directories are. We fake it by 2488 * supplying a vnode known to exist and illegal to 2489 * remove. 2490 */ 2491 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2492 2493 /* 2494 * Force modified data and metadata out to stable storage. 2495 */ 2496 (void) VOP_FSYNC(vp, 0, cr, NULL); 2497 2498 VN_RELE(vp); 2499 2500 /* 2501 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2502 * if the directory is not empty. A System V NFS server 2503 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2504 * over the wire. 2505 */ 2506 if (error == EEXIST) 2507 *status = NFSERR_NOTEMPTY; 2508 else 2509 *status = puterrno(error); 2510 2511 } 2512 void * 2513 rfs_rmdir_getfh(struct nfsdiropargs *da) 2514 { 2515 return (da->da_fhandle); 2516 } 2517 2518 /* ARGSUSED */ 2519 void 2520 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2521 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2522 { 2523 int error; 2524 int iseof; 2525 struct iovec iov; 2526 struct uio uio; 2527 vnode_t *vp; 2528 char *ndata = NULL; 2529 struct sockaddr *ca; 2530 size_t nents; 2531 int ret; 2532 2533 vp = nfs_fhtovp(&rda->rda_fh, exi); 2534 if (vp == NULL) { 2535 rd->rd_entries = NULL; 2536 rd->rd_status = NFSERR_STALE; 2537 return; 2538 } 2539 2540 if (vp->v_type != VDIR) { 2541 VN_RELE(vp); 2542 rd->rd_entries = NULL; 2543 rd->rd_status = NFSERR_NOTDIR; 2544 return; 2545 } 2546 2547 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2548 2549 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2550 2551 if (error) { 2552 rd->rd_entries = NULL; 2553 goto bad; 2554 } 2555 2556 if (rda->rda_count == 0) { 2557 rd->rd_entries = NULL; 2558 rd->rd_size = 0; 2559 rd->rd_eof = FALSE; 2560 goto bad; 2561 } 2562 2563 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2564 2565 /* 2566 * Allocate data for entries. This will be freed by rfs_rddirfree. 2567 */ 2568 rd->rd_bufsize = (uint_t)rda->rda_count; 2569 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2570 2571 /* 2572 * Set up io vector to read directory data 2573 */ 2574 iov.iov_base = (caddr_t)rd->rd_entries; 2575 iov.iov_len = rda->rda_count; 2576 uio.uio_iov = &iov; 2577 uio.uio_iovcnt = 1; 2578 uio.uio_segflg = UIO_SYSSPACE; 2579 uio.uio_extflg = UIO_COPY_CACHED; 2580 uio.uio_loffset = (offset_t)rda->rda_offset; 2581 uio.uio_resid = rda->rda_count; 2582 2583 /* 2584 * read directory 2585 */ 2586 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2587 2588 /* 2589 * Clean up 2590 */ 2591 if (!error) { 2592 /* 2593 * set size and eof 2594 */ 2595 if (uio.uio_resid == rda->rda_count) { 2596 rd->rd_size = 0; 2597 rd->rd_eof = TRUE; 2598 } else { 2599 rd->rd_size = (uint32_t)(rda->rda_count - 2600 uio.uio_resid); 2601 rd->rd_eof = iseof ? TRUE : FALSE; 2602 } 2603 } 2604 2605 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2606 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2607 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2608 rda->rda_count, &ndata); 2609 2610 if (ret != 0) { 2611 size_t dropbytes; 2612 /* 2613 * We had to drop one or more entries in order to fit 2614 * during the character conversion. We need to patch 2615 * up the size and eof info. 2616 */ 2617 if (rd->rd_eof) 2618 rd->rd_eof = FALSE; 2619 dropbytes = nfscmd_dropped_entrysize( 2620 (struct dirent64 *)rd->rd_entries, nents, ret); 2621 rd->rd_size -= dropbytes; 2622 } 2623 if (ndata == NULL) { 2624 ndata = (char *)rd->rd_entries; 2625 } else if (ndata != (char *)rd->rd_entries) { 2626 kmem_free(rd->rd_entries, rd->rd_bufsize); 2627 rd->rd_entries = (void *)ndata; 2628 rd->rd_bufsize = rda->rda_count; 2629 } 2630 2631 bad: 2632 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2633 2634 #if 0 /* notyet */ 2635 /* 2636 * Don't do this. It causes local disk writes when just 2637 * reading the file and the overhead is deemed larger 2638 * than the benefit. 2639 */ 2640 /* 2641 * Force modified metadata out to stable storage. 2642 */ 2643 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2644 #endif 2645 2646 VN_RELE(vp); 2647 2648 rd->rd_status = puterrno(error); 2649 2650 } 2651 void * 2652 rfs_readdir_getfh(struct nfsrddirargs *rda) 2653 { 2654 return (&rda->rda_fh); 2655 } 2656 void 2657 rfs_rddirfree(struct nfsrddirres *rd) 2658 { 2659 if (rd->rd_entries != NULL) 2660 kmem_free(rd->rd_entries, rd->rd_bufsize); 2661 } 2662 2663 /* ARGSUSED */ 2664 void 2665 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2666 struct svc_req *req, cred_t *cr, bool_t ro) 2667 { 2668 int error; 2669 struct statvfs64 sb; 2670 vnode_t *vp; 2671 2672 vp = nfs_fhtovp(fh, exi); 2673 if (vp == NULL) { 2674 fs->fs_status = NFSERR_STALE; 2675 return; 2676 } 2677 2678 error = VFS_STATVFS(vp->v_vfsp, &sb); 2679 2680 if (!error) { 2681 fs->fs_tsize = nfstsize(); 2682 fs->fs_bsize = sb.f_frsize; 2683 fs->fs_blocks = sb.f_blocks; 2684 fs->fs_bfree = sb.f_bfree; 2685 fs->fs_bavail = sb.f_bavail; 2686 } 2687 2688 VN_RELE(vp); 2689 2690 fs->fs_status = puterrno(error); 2691 2692 } 2693 void * 2694 rfs_statfs_getfh(fhandle_t *fh) 2695 { 2696 return (fh); 2697 } 2698 2699 static int 2700 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2701 { 2702 vap->va_mask = 0; 2703 2704 /* 2705 * There was a sign extension bug in some VFS based systems 2706 * which stored the mode as a short. When it would get 2707 * assigned to a u_long, no sign extension would occur. 2708 * It needed to, but this wasn't noticed because sa_mode 2709 * would then get assigned back to the short, thus ignoring 2710 * the upper 16 bits of sa_mode. 2711 * 2712 * To make this implementation work for both broken 2713 * clients and good clients, we check for both versions 2714 * of the mode. 2715 */ 2716 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2717 sa->sa_mode != (uint32_t)-1) { 2718 vap->va_mask |= AT_MODE; 2719 vap->va_mode = sa->sa_mode; 2720 } 2721 if (sa->sa_uid != (uint32_t)-1) { 2722 vap->va_mask |= AT_UID; 2723 vap->va_uid = sa->sa_uid; 2724 } 2725 if (sa->sa_gid != (uint32_t)-1) { 2726 vap->va_mask |= AT_GID; 2727 vap->va_gid = sa->sa_gid; 2728 } 2729 if (sa->sa_size != (uint32_t)-1) { 2730 vap->va_mask |= AT_SIZE; 2731 vap->va_size = sa->sa_size; 2732 } 2733 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2734 sa->sa_atime.tv_usec != (int32_t)-1) { 2735 #ifndef _LP64 2736 /* return error if time overflow */ 2737 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2738 return (EOVERFLOW); 2739 #endif 2740 vap->va_mask |= AT_ATIME; 2741 /* 2742 * nfs protocol defines times as unsigned so don't extend sign, 2743 * unless sysadmin set nfs_allow_preepoch_time. 2744 */ 2745 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2746 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2747 } 2748 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2749 sa->sa_mtime.tv_usec != (int32_t)-1) { 2750 #ifndef _LP64 2751 /* return error if time overflow */ 2752 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2753 return (EOVERFLOW); 2754 #endif 2755 vap->va_mask |= AT_MTIME; 2756 /* 2757 * nfs protocol defines times as unsigned so don't extend sign, 2758 * unless sysadmin set nfs_allow_preepoch_time. 2759 */ 2760 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2761 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2762 } 2763 return (0); 2764 } 2765 2766 static enum nfsftype vt_to_nf[] = { 2767 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2768 }; 2769 2770 /* 2771 * check the following fields for overflow: nodeid, size, and time. 2772 * There could be a problem when converting 64-bit LP64 fields 2773 * into 32-bit ones. Return an error if there is an overflow. 2774 */ 2775 int 2776 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2777 { 2778 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2779 na->na_type = vt_to_nf[vap->va_type]; 2780 2781 if (vap->va_mode == (unsigned short) -1) 2782 na->na_mode = (uint32_t)-1; 2783 else 2784 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2785 2786 if (vap->va_uid == (unsigned short)(-1)) 2787 na->na_uid = (uint32_t)(-1); 2788 else if (vap->va_uid == UID_NOBODY) 2789 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2790 else 2791 na->na_uid = vap->va_uid; 2792 2793 if (vap->va_gid == (unsigned short)(-1)) 2794 na->na_gid = (uint32_t)-1; 2795 else if (vap->va_gid == GID_NOBODY) 2796 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2797 else 2798 na->na_gid = vap->va_gid; 2799 2800 /* 2801 * Do we need to check fsid for overflow? It is 64-bit in the 2802 * vattr, but are bigger than 32 bit values supported? 2803 */ 2804 na->na_fsid = vap->va_fsid; 2805 2806 na->na_nodeid = vap->va_nodeid; 2807 2808 /* 2809 * Check to make sure that the nodeid is representable over the 2810 * wire without losing bits. 2811 */ 2812 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2813 return (EFBIG); 2814 na->na_nlink = vap->va_nlink; 2815 2816 /* 2817 * Check for big files here, instead of at the caller. See 2818 * comments in cstat for large special file explanation. 2819 */ 2820 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2821 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2822 return (EFBIG); 2823 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2824 /* UNKNOWN_SIZE | OVERFLOW */ 2825 na->na_size = MAXOFF32_T; 2826 } else 2827 na->na_size = vap->va_size; 2828 } else 2829 na->na_size = vap->va_size; 2830 2831 /* 2832 * If the vnode times overflow the 32-bit times that NFS2 2833 * uses on the wire then return an error. 2834 */ 2835 if (!NFS_VAP_TIME_OK(vap)) { 2836 return (EOVERFLOW); 2837 } 2838 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2839 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2840 2841 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2842 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2843 2844 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2845 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2846 2847 /* 2848 * If the dev_t will fit into 16 bits then compress 2849 * it, otherwise leave it alone. See comments in 2850 * nfs_client.c. 2851 */ 2852 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2853 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2854 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2855 else 2856 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2857 2858 na->na_blocks = vap->va_nblocks; 2859 na->na_blocksize = vap->va_blksize; 2860 2861 /* 2862 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2863 * over-the-wire protocols for named-pipe vnodes. It remaps the 2864 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2865 * 2866 * BUYER BEWARE: 2867 * If you are porting the NFS to a non-Sun server, you probably 2868 * don't want to include the following block of code. The 2869 * over-the-wire special file types will be changing with the 2870 * NFS Protocol Revision. 2871 */ 2872 if (vap->va_type == VFIFO) 2873 NA_SETFIFO(na); 2874 return (0); 2875 } 2876 2877 /* 2878 * acl v2 support: returns approximate permission. 2879 * default: returns minimal permission (more restrictive) 2880 * aclok: returns maximal permission (less restrictive) 2881 * This routine changes the permissions that are alaredy in *va. 2882 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2883 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2884 */ 2885 static void 2886 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2887 { 2888 vsecattr_t vsa; 2889 int aclcnt; 2890 aclent_t *aclentp; 2891 mode_t mask_perm; 2892 mode_t grp_perm; 2893 mode_t other_perm; 2894 mode_t other_orig; 2895 int error; 2896 2897 /* dont care default acl */ 2898 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2899 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2900 2901 if (!error) { 2902 aclcnt = vsa.vsa_aclcnt; 2903 if (aclcnt > MIN_ACL_ENTRIES) { 2904 /* non-trivial ACL */ 2905 aclentp = vsa.vsa_aclentp; 2906 if (exi->exi_export.ex_flags & EX_ACLOK) { 2907 /* maximal permissions */ 2908 grp_perm = 0; 2909 other_perm = 0; 2910 for (; aclcnt > 0; aclcnt--, aclentp++) { 2911 switch (aclentp->a_type) { 2912 case USER_OBJ: 2913 break; 2914 case USER: 2915 grp_perm |= 2916 aclentp->a_perm << 3; 2917 other_perm |= aclentp->a_perm; 2918 break; 2919 case GROUP_OBJ: 2920 grp_perm |= 2921 aclentp->a_perm << 3; 2922 break; 2923 case GROUP: 2924 other_perm |= aclentp->a_perm; 2925 break; 2926 case OTHER_OBJ: 2927 other_orig = aclentp->a_perm; 2928 break; 2929 case CLASS_OBJ: 2930 mask_perm = aclentp->a_perm; 2931 break; 2932 default: 2933 break; 2934 } 2935 } 2936 grp_perm &= mask_perm << 3; 2937 other_perm &= mask_perm; 2938 other_perm |= other_orig; 2939 2940 } else { 2941 /* minimal permissions */ 2942 grp_perm = 070; 2943 other_perm = 07; 2944 for (; aclcnt > 0; aclcnt--, aclentp++) { 2945 switch (aclentp->a_type) { 2946 case USER_OBJ: 2947 break; 2948 case USER: 2949 case CLASS_OBJ: 2950 grp_perm &= 2951 aclentp->a_perm << 3; 2952 other_perm &= 2953 aclentp->a_perm; 2954 break; 2955 case GROUP_OBJ: 2956 grp_perm &= 2957 aclentp->a_perm << 3; 2958 break; 2959 case GROUP: 2960 other_perm &= 2961 aclentp->a_perm; 2962 break; 2963 case OTHER_OBJ: 2964 other_perm &= 2965 aclentp->a_perm; 2966 break; 2967 default: 2968 break; 2969 } 2970 } 2971 } 2972 /* copy to va */ 2973 va->va_mode &= ~077; 2974 va->va_mode |= grp_perm | other_perm; 2975 } 2976 if (vsa.vsa_aclcnt) 2977 kmem_free(vsa.vsa_aclentp, 2978 vsa.vsa_aclcnt * sizeof (aclent_t)); 2979 } 2980 } 2981 2982 void 2983 rfs_srvrinit(void) 2984 { 2985 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2986 nfs2_srv_caller_id = fs_new_caller_id(); 2987 } 2988 2989 void 2990 rfs_srvrfini(void) 2991 { 2992 mutex_destroy(&rfs_async_write_lock); 2993 } 2994 2995 static int 2996 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2997 { 2998 struct clist *wcl; 2999 int wlist_len; 3000 uint32_t count = rr->rr_count; 3001 3002 wcl = ra->ra_wlist; 3003 3004 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3005 return (FALSE); 3006 } 3007 3008 wcl = ra->ra_wlist; 3009 rr->rr_ok.rrok_wlist_len = wlist_len; 3010 rr->rr_ok.rrok_wlist = wcl; 3011 3012 return (TRUE); 3013 } 3014