1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 30 * All rights reserved. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/buf.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/uio.h> 41 #include <sys/stat.h> 42 #include <sys/errno.h> 43 #include <sys/sysmacros.h> 44 #include <sys/statvfs.h> 45 #include <sys/kmem.h> 46 #include <sys/kstat.h> 47 #include <sys/dirent.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/vtrace.h> 51 #include <sys/mode.h> 52 #include <sys/acl.h> 53 #include <sys/nbmlock.h> 54 #include <sys/policy.h> 55 #include <sys/sdt.h> 56 57 #include <rpc/types.h> 58 #include <rpc/auth.h> 59 #include <rpc/svc.h> 60 61 #include <nfs/nfs.h> 62 #include <nfs/export.h> 63 #include <nfs/nfs_cmd.h> 64 65 #include <vm/hat.h> 66 #include <vm/as.h> 67 #include <vm/seg.h> 68 #include <vm/seg_map.h> 69 #include <vm/seg_kmem.h> 70 71 #include <sys/strsubr.h> 72 73 /* 74 * These are the interface routines for the server side of the 75 * Network File System. See the NFS version 2 protocol specification 76 * for a description of this interface. 77 */ 78 79 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 80 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 81 cred_t *); 82 83 /* 84 * Some "over the wire" UNIX file types. These are encoded 85 * into the mode. This needs to be fixed in the next rev. 86 */ 87 #define IFMT 0170000 /* type of file */ 88 #define IFCHR 0020000 /* character special */ 89 #define IFBLK 0060000 /* block special */ 90 #define IFSOCK 0140000 /* socket */ 91 92 u_longlong_t nfs2_srv_caller_id; 93 94 /* 95 * Get file attributes. 96 * Returns the current attributes of the file with the given fhandle. 97 */ 98 /* ARGSUSED */ 99 void 100 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 101 struct svc_req *req, cred_t *cr, bool_t ro) 102 { 103 int error; 104 vnode_t *vp; 105 struct vattr va; 106 107 vp = nfs_fhtovp(fhp, exi); 108 if (vp == NULL) { 109 ns->ns_status = NFSERR_STALE; 110 return; 111 } 112 113 /* 114 * Do the getattr. 115 */ 116 va.va_mask = AT_ALL; /* we want all the attributes */ 117 118 error = rfs4_delegated_getattr(vp, &va, 0, cr); 119 120 /* check for overflows */ 121 if (!error) { 122 /* Lie about the object type for a referral */ 123 if (vn_is_nfs_reparse(vp, cr)) 124 va.va_type = VLNK; 125 126 acl_perm(vp, exi, &va, cr); 127 error = vattr_to_nattr(&va, &ns->ns_attr); 128 } 129 130 VN_RELE(vp); 131 132 ns->ns_status = puterrno(error); 133 } 134 void * 135 rfs_getattr_getfh(fhandle_t *fhp) 136 { 137 return (fhp); 138 } 139 140 /* 141 * Set file attributes. 142 * Sets the attributes of the file with the given fhandle. Returns 143 * the new attributes. 144 */ 145 /* ARGSUSED */ 146 void 147 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 148 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 149 { 150 int error; 151 int flag; 152 int in_crit = 0; 153 vnode_t *vp; 154 struct vattr va; 155 struct vattr bva; 156 struct flock64 bf; 157 caller_context_t ct; 158 159 160 vp = nfs_fhtovp(&args->saa_fh, exi); 161 if (vp == NULL) { 162 ns->ns_status = NFSERR_STALE; 163 return; 164 } 165 166 if (rdonly(ro, vp)) { 167 VN_RELE(vp); 168 ns->ns_status = NFSERR_ROFS; 169 return; 170 } 171 172 error = sattr_to_vattr(&args->saa_sa, &va); 173 if (error) { 174 VN_RELE(vp); 175 ns->ns_status = puterrno(error); 176 return; 177 } 178 179 /* 180 * If the client is requesting a change to the mtime, 181 * but the nanosecond field is set to 1 billion, then 182 * this is a flag to the server that it should set the 183 * atime and mtime fields to the server's current time. 184 * The 1 billion number actually came from the client 185 * as 1 million, but the units in the over the wire 186 * request are microseconds instead of nanoseconds. 187 * 188 * This is an overload of the protocol and should be 189 * documented in the NFS Version 2 protocol specification. 190 */ 191 if (va.va_mask & AT_MTIME) { 192 if (va.va_mtime.tv_nsec == 1000000000) { 193 gethrestime(&va.va_mtime); 194 va.va_atime = va.va_mtime; 195 va.va_mask |= AT_ATIME; 196 flag = 0; 197 } else 198 flag = ATTR_UTIME; 199 } else 200 flag = 0; 201 202 /* 203 * If the filesystem is exported with nosuid, then mask off 204 * the setuid and setgid bits. 205 */ 206 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 207 (exi->exi_export.ex_flags & EX_NOSUID)) 208 va.va_mode &= ~(VSUID | VSGID); 209 210 ct.cc_sysid = 0; 211 ct.cc_pid = 0; 212 ct.cc_caller_id = nfs2_srv_caller_id; 213 ct.cc_flags = CC_DONTBLOCK; 214 215 /* 216 * We need to specially handle size changes because it is 217 * possible for the client to create a file with modes 218 * which indicate read-only, but with the file opened for 219 * writing. If the client then tries to set the size of 220 * the file, then the normal access checking done in 221 * VOP_SETATTR would prevent the client from doing so, 222 * although it should be legal for it to do so. To get 223 * around this, we do the access checking for ourselves 224 * and then use VOP_SPACE which doesn't do the access 225 * checking which VOP_SETATTR does. VOP_SPACE can only 226 * operate on VREG files, let VOP_SETATTR handle the other 227 * extremely rare cases. 228 * Also the client should not be allowed to change the 229 * size of the file if there is a conflicting non-blocking 230 * mandatory lock in the region of change. 231 */ 232 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 233 if (nbl_need_check(vp)) { 234 nbl_start_crit(vp, RW_READER); 235 in_crit = 1; 236 } 237 238 bva.va_mask = AT_UID | AT_SIZE; 239 240 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 241 242 if (error) { 243 if (in_crit) 244 nbl_end_crit(vp); 245 VN_RELE(vp); 246 ns->ns_status = puterrno(error); 247 return; 248 } 249 250 if (in_crit) { 251 u_offset_t offset; 252 ssize_t length; 253 254 if (va.va_size < bva.va_size) { 255 offset = va.va_size; 256 length = bva.va_size - va.va_size; 257 } else { 258 offset = bva.va_size; 259 length = va.va_size - bva.va_size; 260 } 261 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 262 NULL)) { 263 error = EACCES; 264 } 265 } 266 267 if (crgetuid(cr) == bva.va_uid && !error && 268 va.va_size != bva.va_size) { 269 va.va_mask &= ~AT_SIZE; 270 bf.l_type = F_WRLCK; 271 bf.l_whence = 0; 272 bf.l_start = (off64_t)va.va_size; 273 bf.l_len = 0; 274 bf.l_sysid = 0; 275 bf.l_pid = 0; 276 277 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 278 (offset_t)va.va_size, cr, &ct); 279 } 280 if (in_crit) 281 nbl_end_crit(vp); 282 } else 283 error = 0; 284 285 /* 286 * Do the setattr. 287 */ 288 if (!error && va.va_mask) { 289 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 290 } 291 292 /* 293 * check if the monitor on either vop_space or vop_setattr detected 294 * a delegation conflict and if so, mark the thread flag as 295 * wouldblock so that the response is dropped and the client will 296 * try again. 297 */ 298 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 299 VN_RELE(vp); 300 curthread->t_flag |= T_WOULDBLOCK; 301 return; 302 } 303 304 if (!error) { 305 va.va_mask = AT_ALL; /* get everything */ 306 307 error = rfs4_delegated_getattr(vp, &va, 0, cr); 308 309 /* check for overflows */ 310 if (!error) { 311 acl_perm(vp, exi, &va, cr); 312 error = vattr_to_nattr(&va, &ns->ns_attr); 313 } 314 } 315 316 ct.cc_flags = 0; 317 318 /* 319 * Force modified metadata out to stable storage. 320 */ 321 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 322 323 VN_RELE(vp); 324 325 ns->ns_status = puterrno(error); 326 } 327 void * 328 rfs_setattr_getfh(struct nfssaargs *args) 329 { 330 return (&args->saa_fh); 331 } 332 333 /* Change and release @exip and @vpp only in success */ 334 int 335 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip) 336 { 337 struct exportinfo *exi; 338 vnode_t *vp = *vpp; 339 fid_t fid; 340 int error; 341 342 VN_HOLD(vp); 343 344 if ((error = traverse(&vp)) != 0) { 345 VN_RELE(vp); 346 return (error); 347 } 348 349 bzero(&fid, sizeof (fid)); 350 fid.fid_len = MAXFIDSZ; 351 error = VOP_FID(vp, &fid, NULL); 352 if (error) { 353 VN_RELE(vp); 354 return (error); 355 } 356 357 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid); 358 if (exi == NULL || 359 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) { 360 /* 361 * It is not error, just subdir is not exported 362 * or "nohide" is not set 363 */ 364 if (exi != NULL) 365 exi_rele(exi); 366 VN_RELE(vp); 367 } else { 368 /* go to submount */ 369 exi_rele(*exip); 370 *exip = exi; 371 372 VN_RELE(*vpp); 373 *vpp = vp; 374 } 375 376 return (0); 377 } 378 379 /* 380 * Given mounted "dvp" and "exi", go upper mountpoint 381 * with dvp/exi correction 382 * Return 0 in success 383 */ 384 int 385 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr) 386 { 387 struct exportinfo *exi; 388 vnode_t *dvp = *dvpp; 389 390 ASSERT(dvp->v_flag & VROOT); 391 392 VN_HOLD(dvp); 393 dvp = untraverse(dvp); 394 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE); 395 if (exi == NULL) { 396 VN_RELE(dvp); 397 return (-1); 398 } 399 400 exi_rele(*exip); 401 *exip = exi; 402 VN_RELE(*dvpp); 403 *dvpp = dvp; 404 405 return (0); 406 } 407 /* 408 * Directory lookup. 409 * Returns an fhandle and file attributes for file name in a directory. 410 */ 411 /* ARGSUSED */ 412 void 413 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 414 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 415 { 416 int error; 417 vnode_t *dvp; 418 vnode_t *vp; 419 struct vattr va; 420 fhandle_t *fhp = da->da_fhandle; 421 struct sec_ol sec = {0, 0}; 422 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 423 char *name; 424 struct sockaddr *ca; 425 426 /* 427 * Trusted Extension doesn't support NFSv2. MOUNT 428 * will reject v2 clients. Need to prevent v2 client 429 * access via WebNFS here. 430 */ 431 if (is_system_labeled() && req->rq_vers == 2) { 432 dr->dr_status = NFSERR_ACCES; 433 return; 434 } 435 436 /* 437 * Disallow NULL paths 438 */ 439 if (da->da_name == NULL || *da->da_name == '\0') { 440 dr->dr_status = NFSERR_ACCES; 441 return; 442 } 443 444 /* 445 * Allow lookups from the root - the default 446 * location of the public filehandle. 447 */ 448 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 449 dvp = rootdir; 450 VN_HOLD(dvp); 451 } else { 452 dvp = nfs_fhtovp(fhp, exi); 453 if (dvp == NULL) { 454 dr->dr_status = NFSERR_STALE; 455 return; 456 } 457 } 458 459 exi_hold(exi); 460 461 /* 462 * Not allow lookup beyond root. 463 * If the filehandle matches a filehandle of the exi, 464 * then the ".." refers beyond the root of an exported filesystem. 465 */ 466 if (strcmp(da->da_name, "..") == 0 && 467 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 468 if ((exi->exi_export.ex_flags & EX_NOHIDE) && 469 (dvp->v_flag & VROOT)) { 470 /* 471 * special case for ".." and 'nohide'exported root 472 */ 473 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) { 474 error = NFSERR_ACCES; 475 goto out; 476 } 477 } else { 478 error = NFSERR_NOENT; 479 goto out; 480 } 481 } 482 483 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 484 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 485 MAXPATHLEN); 486 487 if (name == NULL) { 488 error = NFSERR_ACCES; 489 goto out; 490 } 491 492 /* 493 * If the public filehandle is used then allow 494 * a multi-component lookup, i.e. evaluate 495 * a pathname and follow symbolic links if 496 * necessary. 497 * 498 * This may result in a vnode in another filesystem 499 * which is OK as long as the filesystem is exported. 500 */ 501 if (PUBLIC_FH2(fhp)) { 502 publicfh_flag = TRUE; 503 504 exi_rele(exi); 505 506 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 507 &sec); 508 } else { 509 /* 510 * Do a normal single component lookup. 511 */ 512 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 513 NULL, NULL, NULL); 514 } 515 516 if (name != da->da_name) 517 kmem_free(name, MAXPATHLEN); 518 519 if (error == 0 && vn_ismntpt(vp)) { 520 error = rfs_cross_mnt(&vp, &exi); 521 if (error) 522 VN_RELE(vp); 523 } 524 525 if (!error) { 526 va.va_mask = AT_ALL; /* we want everything */ 527 528 error = rfs4_delegated_getattr(vp, &va, 0, cr); 529 530 /* check for overflows */ 531 if (!error) { 532 acl_perm(vp, exi, &va, cr); 533 error = vattr_to_nattr(&va, &dr->dr_attr); 534 if (!error) { 535 if (sec.sec_flags & SEC_QUERY) 536 error = makefh_ol(&dr->dr_fhandle, exi, 537 sec.sec_index); 538 else { 539 error = makefh(&dr->dr_fhandle, vp, 540 exi); 541 if (!error && publicfh_flag && 542 !chk_clnt_sec(exi, req)) 543 auth_weak = TRUE; 544 } 545 } 546 } 547 VN_RELE(vp); 548 } 549 550 out: 551 VN_RELE(dvp); 552 553 if (exi != NULL) 554 exi_rele(exi); 555 556 /* 557 * If it's public fh, no 0x81, and client's flavor is 558 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 559 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 560 */ 561 if (auth_weak) 562 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 563 else 564 dr->dr_status = puterrno(error); 565 } 566 void * 567 rfs_lookup_getfh(struct nfsdiropargs *da) 568 { 569 return (da->da_fhandle); 570 } 571 572 /* 573 * Read symbolic link. 574 * Returns the string in the symbolic link at the given fhandle. 575 */ 576 /* ARGSUSED */ 577 void 578 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 579 struct svc_req *req, cred_t *cr, bool_t ro) 580 { 581 int error; 582 struct iovec iov; 583 struct uio uio; 584 vnode_t *vp; 585 struct vattr va; 586 struct sockaddr *ca; 587 char *name = NULL; 588 int is_referral = 0; 589 590 vp = nfs_fhtovp(fhp, exi); 591 if (vp == NULL) { 592 rl->rl_data = NULL; 593 rl->rl_status = NFSERR_STALE; 594 return; 595 } 596 597 va.va_mask = AT_MODE; 598 599 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 600 601 if (error) { 602 VN_RELE(vp); 603 rl->rl_data = NULL; 604 rl->rl_status = puterrno(error); 605 return; 606 } 607 608 if (MANDLOCK(vp, va.va_mode)) { 609 VN_RELE(vp); 610 rl->rl_data = NULL; 611 rl->rl_status = NFSERR_ACCES; 612 return; 613 } 614 615 /* We lied about the object type for a referral */ 616 if (vn_is_nfs_reparse(vp, cr)) 617 is_referral = 1; 618 619 /* 620 * XNFS and RFC1094 require us to return ENXIO if argument 621 * is not a link. BUGID 1138002. 622 */ 623 if (vp->v_type != VLNK && !is_referral) { 624 VN_RELE(vp); 625 rl->rl_data = NULL; 626 rl->rl_status = NFSERR_NXIO; 627 return; 628 } 629 630 /* 631 * Allocate data for pathname. This will be freed by rfs_rlfree. 632 */ 633 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 634 635 if (is_referral) { 636 char *s; 637 size_t strsz; 638 639 /* Get an artificial symlink based on a referral */ 640 s = build_symlink(vp, cr, &strsz); 641 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 642 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 643 vnode_t *, vp, char *, s); 644 if (s == NULL) 645 error = EINVAL; 646 else { 647 error = 0; 648 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 649 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 650 kmem_free(s, strsz); 651 } 652 653 } else { 654 655 /* 656 * Set up io vector to read sym link data 657 */ 658 iov.iov_base = rl->rl_data; 659 iov.iov_len = NFS_MAXPATHLEN; 660 uio.uio_iov = &iov; 661 uio.uio_iovcnt = 1; 662 uio.uio_segflg = UIO_SYSSPACE; 663 uio.uio_extflg = UIO_COPY_CACHED; 664 uio.uio_loffset = (offset_t)0; 665 uio.uio_resid = NFS_MAXPATHLEN; 666 667 /* 668 * Do the readlink. 669 */ 670 error = VOP_READLINK(vp, &uio, cr, NULL); 671 672 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 673 674 if (!error) 675 rl->rl_data[rl->rl_count] = '\0'; 676 677 } 678 679 680 VN_RELE(vp); 681 682 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 683 name = nfscmd_convname(ca, exi, rl->rl_data, 684 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 685 686 if (name != NULL && name != rl->rl_data) { 687 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 688 rl->rl_data = name; 689 } 690 691 /* 692 * XNFS and RFC1094 require us to return ENXIO if argument 693 * is not a link. UFS returns EINVAL if this is the case, 694 * so we do the mapping here. BUGID 1138002. 695 */ 696 if (error == EINVAL) 697 rl->rl_status = NFSERR_NXIO; 698 else 699 rl->rl_status = puterrno(error); 700 701 } 702 void * 703 rfs_readlink_getfh(fhandle_t *fhp) 704 { 705 return (fhp); 706 } 707 /* 708 * Free data allocated by rfs_readlink 709 */ 710 void 711 rfs_rlfree(struct nfsrdlnres *rl) 712 { 713 if (rl->rl_data != NULL) 714 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 715 } 716 717 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 718 719 /* 720 * Read data. 721 * Returns some data read from the file at the given fhandle. 722 */ 723 /* ARGSUSED */ 724 void 725 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 726 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 727 { 728 vnode_t *vp; 729 int error; 730 struct vattr va; 731 struct iovec iov; 732 struct uio uio; 733 mblk_t *mp; 734 int alloc_err = 0; 735 int in_crit = 0; 736 caller_context_t ct; 737 738 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 739 if (vp == NULL) { 740 rr->rr_data = NULL; 741 rr->rr_status = NFSERR_STALE; 742 return; 743 } 744 745 if (vp->v_type != VREG) { 746 VN_RELE(vp); 747 rr->rr_data = NULL; 748 rr->rr_status = NFSERR_ISDIR; 749 return; 750 } 751 752 ct.cc_sysid = 0; 753 ct.cc_pid = 0; 754 ct.cc_caller_id = nfs2_srv_caller_id; 755 ct.cc_flags = CC_DONTBLOCK; 756 757 /* 758 * Enter the critical region before calling VOP_RWLOCK 759 * to avoid a deadlock with write requests. 760 */ 761 if (nbl_need_check(vp)) { 762 nbl_start_crit(vp, RW_READER); 763 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 764 0, NULL)) { 765 nbl_end_crit(vp); 766 VN_RELE(vp); 767 rr->rr_data = NULL; 768 rr->rr_status = NFSERR_ACCES; 769 return; 770 } 771 in_crit = 1; 772 } 773 774 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 775 776 /* check if a monitor detected a delegation conflict */ 777 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 778 VN_RELE(vp); 779 /* mark as wouldblock so response is dropped */ 780 curthread->t_flag |= T_WOULDBLOCK; 781 782 rr->rr_data = NULL; 783 return; 784 } 785 786 va.va_mask = AT_ALL; 787 788 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 789 790 if (error) { 791 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 792 if (in_crit) 793 nbl_end_crit(vp); 794 795 VN_RELE(vp); 796 rr->rr_data = NULL; 797 rr->rr_status = puterrno(error); 798 799 return; 800 } 801 802 /* 803 * This is a kludge to allow reading of files created 804 * with no read permission. The owner of the file 805 * is always allowed to read it. 806 */ 807 if (crgetuid(cr) != va.va_uid) { 808 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 809 810 if (error) { 811 /* 812 * Exec is the same as read over the net because 813 * of demand loading. 814 */ 815 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 816 } 817 if (error) { 818 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 819 if (in_crit) 820 nbl_end_crit(vp); 821 VN_RELE(vp); 822 rr->rr_data = NULL; 823 rr->rr_status = puterrno(error); 824 825 return; 826 } 827 } 828 829 if (MANDLOCK(vp, va.va_mode)) { 830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 831 if (in_crit) 832 nbl_end_crit(vp); 833 834 VN_RELE(vp); 835 rr->rr_data = NULL; 836 rr->rr_status = NFSERR_ACCES; 837 838 return; 839 } 840 841 rr->rr_ok.rrok_wlist_len = 0; 842 rr->rr_ok.rrok_wlist = NULL; 843 844 if ((u_offset_t)ra->ra_offset >= va.va_size) { 845 rr->rr_count = 0; 846 rr->rr_data = NULL; 847 /* 848 * In this case, status is NFS_OK, but there is no data 849 * to encode. So set rr_mp to NULL. 850 */ 851 rr->rr_mp = NULL; 852 rr->rr_ok.rrok_wlist = ra->ra_wlist; 853 if (rr->rr_ok.rrok_wlist) 854 clist_zero_len(rr->rr_ok.rrok_wlist); 855 goto done; 856 } 857 858 if (ra->ra_wlist) { 859 mp = NULL; 860 rr->rr_mp = NULL; 861 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 862 if (ra->ra_count > iov.iov_len) { 863 rr->rr_data = NULL; 864 rr->rr_status = NFSERR_INVAL; 865 goto done; 866 } 867 } else { 868 /* 869 * mp will contain the data to be sent out in the read reply. 870 * This will be freed after the reply has been sent out (by the 871 * driver). 872 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 873 * that the call to xdrmblk_putmblk() never fails. 874 */ 875 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 876 &alloc_err); 877 ASSERT(mp != NULL); 878 ASSERT(alloc_err == 0); 879 880 rr->rr_mp = mp; 881 882 /* 883 * Set up io vector 884 */ 885 iov.iov_base = (caddr_t)mp->b_datap->db_base; 886 iov.iov_len = ra->ra_count; 887 } 888 889 uio.uio_iov = &iov; 890 uio.uio_iovcnt = 1; 891 uio.uio_segflg = UIO_SYSSPACE; 892 uio.uio_extflg = UIO_COPY_CACHED; 893 uio.uio_loffset = (offset_t)ra->ra_offset; 894 uio.uio_resid = ra->ra_count; 895 896 error = VOP_READ(vp, &uio, 0, cr, &ct); 897 898 if (error) { 899 if (mp) 900 freeb(mp); 901 902 /* 903 * check if a monitor detected a delegation conflict and 904 * mark as wouldblock so response is dropped 905 */ 906 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 907 curthread->t_flag |= T_WOULDBLOCK; 908 else 909 rr->rr_status = puterrno(error); 910 911 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 912 if (in_crit) 913 nbl_end_crit(vp); 914 915 VN_RELE(vp); 916 rr->rr_data = NULL; 917 918 return; 919 } 920 921 /* 922 * Get attributes again so we can send the latest access 923 * time to the client side for its cache. 924 */ 925 va.va_mask = AT_ALL; 926 927 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 928 929 if (error) { 930 if (mp) 931 freeb(mp); 932 933 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 934 if (in_crit) 935 nbl_end_crit(vp); 936 937 VN_RELE(vp); 938 rr->rr_data = NULL; 939 rr->rr_status = puterrno(error); 940 941 return; 942 } 943 944 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 945 946 if (mp) { 947 rr->rr_data = (char *)mp->b_datap->db_base; 948 } else { 949 if (ra->ra_wlist) { 950 rr->rr_data = (caddr_t)iov.iov_base; 951 if (!rdma_setup_read_data2(ra, rr)) { 952 rr->rr_data = NULL; 953 rr->rr_status = puterrno(NFSERR_INVAL); 954 } 955 } 956 } 957 done: 958 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 959 if (in_crit) 960 nbl_end_crit(vp); 961 962 acl_perm(vp, exi, &va, cr); 963 964 /* check for overflows */ 965 error = vattr_to_nattr(&va, &rr->rr_attr); 966 967 VN_RELE(vp); 968 969 rr->rr_status = puterrno(error); 970 } 971 972 /* 973 * Free data allocated by rfs_read 974 */ 975 void 976 rfs_rdfree(struct nfsrdresult *rr) 977 { 978 mblk_t *mp; 979 980 if (rr->rr_status == NFS_OK) { 981 mp = rr->rr_mp; 982 if (mp != NULL) 983 freeb(mp); 984 } 985 } 986 987 void * 988 rfs_read_getfh(struct nfsreadargs *ra) 989 { 990 return (&ra->ra_fhandle); 991 } 992 993 #define MAX_IOVECS 12 994 995 #ifdef DEBUG 996 static int rfs_write_sync_hits = 0; 997 static int rfs_write_sync_misses = 0; 998 #endif 999 1000 /* 1001 * Write data to file. 1002 * Returns attributes of a file after writing some data to it. 1003 * 1004 * Any changes made here, especially in error handling might have 1005 * to also be done in rfs_write (which clusters write requests). 1006 */ 1007 /* ARGSUSED */ 1008 void 1009 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 1010 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1011 { 1012 int error; 1013 vnode_t *vp; 1014 rlim64_t rlimit; 1015 struct vattr va; 1016 struct uio uio; 1017 struct iovec iov[MAX_IOVECS]; 1018 mblk_t *m; 1019 struct iovec *iovp; 1020 int iovcnt; 1021 cred_t *savecred; 1022 int in_crit = 0; 1023 caller_context_t ct; 1024 1025 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1026 if (vp == NULL) { 1027 ns->ns_status = NFSERR_STALE; 1028 return; 1029 } 1030 1031 if (rdonly(ro, vp)) { 1032 VN_RELE(vp); 1033 ns->ns_status = NFSERR_ROFS; 1034 return; 1035 } 1036 1037 if (vp->v_type != VREG) { 1038 VN_RELE(vp); 1039 ns->ns_status = NFSERR_ISDIR; 1040 return; 1041 } 1042 1043 ct.cc_sysid = 0; 1044 ct.cc_pid = 0; 1045 ct.cc_caller_id = nfs2_srv_caller_id; 1046 ct.cc_flags = CC_DONTBLOCK; 1047 1048 va.va_mask = AT_UID|AT_MODE; 1049 1050 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1051 1052 if (error) { 1053 VN_RELE(vp); 1054 ns->ns_status = puterrno(error); 1055 1056 return; 1057 } 1058 1059 if (crgetuid(cr) != va.va_uid) { 1060 /* 1061 * This is a kludge to allow writes of files created 1062 * with read only permission. The owner of the file 1063 * is always allowed to write it. 1064 */ 1065 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 1066 1067 if (error) { 1068 VN_RELE(vp); 1069 ns->ns_status = puterrno(error); 1070 return; 1071 } 1072 } 1073 1074 /* 1075 * Can't access a mandatory lock file. This might cause 1076 * the NFS service thread to block forever waiting for a 1077 * lock to be released that will never be released. 1078 */ 1079 if (MANDLOCK(vp, va.va_mode)) { 1080 VN_RELE(vp); 1081 ns->ns_status = NFSERR_ACCES; 1082 return; 1083 } 1084 1085 /* 1086 * We have to enter the critical region before calling VOP_RWLOCK 1087 * to avoid a deadlock with ufs. 1088 */ 1089 if (nbl_need_check(vp)) { 1090 nbl_start_crit(vp, RW_READER); 1091 in_crit = 1; 1092 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1093 wa->wa_count, 0, NULL)) { 1094 error = EACCES; 1095 goto out; 1096 } 1097 } 1098 1099 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1100 1101 /* check if a monitor detected a delegation conflict */ 1102 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1103 VN_RELE(vp); 1104 /* mark as wouldblock so response is dropped */ 1105 curthread->t_flag |= T_WOULDBLOCK; 1106 return; 1107 } 1108 1109 if (wa->wa_data || wa->wa_rlist) { 1110 /* Do the RDMA thing if necessary */ 1111 if (wa->wa_rlist) { 1112 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1113 iov[0].iov_len = wa->wa_count; 1114 } else { 1115 iov[0].iov_base = wa->wa_data; 1116 iov[0].iov_len = wa->wa_count; 1117 } 1118 uio.uio_iov = iov; 1119 uio.uio_iovcnt = 1; 1120 uio.uio_segflg = UIO_SYSSPACE; 1121 uio.uio_extflg = UIO_COPY_DEFAULT; 1122 uio.uio_loffset = (offset_t)wa->wa_offset; 1123 uio.uio_resid = wa->wa_count; 1124 /* 1125 * The limit is checked on the client. We 1126 * should allow any size writes here. 1127 */ 1128 uio.uio_llimit = curproc->p_fsz_ctl; 1129 rlimit = uio.uio_llimit - wa->wa_offset; 1130 if (rlimit < (rlim64_t)uio.uio_resid) 1131 uio.uio_resid = (uint_t)rlimit; 1132 1133 /* 1134 * for now we assume no append mode 1135 */ 1136 /* 1137 * We're changing creds because VM may fault and we need 1138 * the cred of the current thread to be used if quota 1139 * checking is enabled. 1140 */ 1141 savecred = curthread->t_cred; 1142 curthread->t_cred = cr; 1143 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1144 curthread->t_cred = savecred; 1145 } else { 1146 iovcnt = 0; 1147 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1148 iovcnt++; 1149 if (iovcnt <= MAX_IOVECS) { 1150 #ifdef DEBUG 1151 rfs_write_sync_hits++; 1152 #endif 1153 iovp = iov; 1154 } else { 1155 #ifdef DEBUG 1156 rfs_write_sync_misses++; 1157 #endif 1158 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1159 } 1160 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1161 uio.uio_iov = iovp; 1162 uio.uio_iovcnt = iovcnt; 1163 uio.uio_segflg = UIO_SYSSPACE; 1164 uio.uio_extflg = UIO_COPY_DEFAULT; 1165 uio.uio_loffset = (offset_t)wa->wa_offset; 1166 uio.uio_resid = wa->wa_count; 1167 /* 1168 * The limit is checked on the client. We 1169 * should allow any size writes here. 1170 */ 1171 uio.uio_llimit = curproc->p_fsz_ctl; 1172 rlimit = uio.uio_llimit - wa->wa_offset; 1173 if (rlimit < (rlim64_t)uio.uio_resid) 1174 uio.uio_resid = (uint_t)rlimit; 1175 1176 /* 1177 * For now we assume no append mode. 1178 */ 1179 /* 1180 * We're changing creds because VM may fault and we need 1181 * the cred of the current thread to be used if quota 1182 * checking is enabled. 1183 */ 1184 savecred = curthread->t_cred; 1185 curthread->t_cred = cr; 1186 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1187 curthread->t_cred = savecred; 1188 1189 if (iovp != iov) 1190 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1191 } 1192 1193 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1194 1195 if (!error) { 1196 /* 1197 * Get attributes again so we send the latest mod 1198 * time to the client side for its cache. 1199 */ 1200 va.va_mask = AT_ALL; /* now we want everything */ 1201 1202 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1203 1204 /* check for overflows */ 1205 if (!error) { 1206 acl_perm(vp, exi, &va, cr); 1207 error = vattr_to_nattr(&va, &ns->ns_attr); 1208 } 1209 } 1210 1211 out: 1212 if (in_crit) 1213 nbl_end_crit(vp); 1214 VN_RELE(vp); 1215 1216 /* check if a monitor detected a delegation conflict */ 1217 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1218 /* mark as wouldblock so response is dropped */ 1219 curthread->t_flag |= T_WOULDBLOCK; 1220 else 1221 ns->ns_status = puterrno(error); 1222 1223 } 1224 1225 struct rfs_async_write { 1226 struct nfswriteargs *wa; 1227 struct nfsattrstat *ns; 1228 struct svc_req *req; 1229 cred_t *cr; 1230 bool_t ro; 1231 kthread_t *thread; 1232 struct rfs_async_write *list; 1233 }; 1234 1235 struct rfs_async_write_list { 1236 fhandle_t *fhp; 1237 kcondvar_t cv; 1238 struct rfs_async_write *list; 1239 struct rfs_async_write_list *next; 1240 }; 1241 1242 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1243 static kmutex_t rfs_async_write_lock; 1244 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1245 1246 #define MAXCLIOVECS 42 1247 #define RFSWRITE_INITVAL (enum nfsstat) -1 1248 1249 #ifdef DEBUG 1250 static int rfs_write_hits = 0; 1251 static int rfs_write_misses = 0; 1252 #endif 1253 1254 /* 1255 * Write data to file. 1256 * Returns attributes of a file after writing some data to it. 1257 */ 1258 void 1259 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1260 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1261 { 1262 int error; 1263 vnode_t *vp; 1264 rlim64_t rlimit; 1265 struct vattr va; 1266 struct uio uio; 1267 struct rfs_async_write_list *lp; 1268 struct rfs_async_write_list *nlp; 1269 struct rfs_async_write *rp; 1270 struct rfs_async_write *nrp; 1271 struct rfs_async_write *trp; 1272 struct rfs_async_write *lrp; 1273 int data_written; 1274 int iovcnt; 1275 mblk_t *m; 1276 struct iovec *iovp; 1277 struct iovec *niovp; 1278 struct iovec iov[MAXCLIOVECS]; 1279 int count; 1280 int rcount; 1281 uint_t off; 1282 uint_t len; 1283 struct rfs_async_write nrpsp; 1284 struct rfs_async_write_list nlpsp; 1285 ushort_t t_flag; 1286 cred_t *savecred; 1287 int in_crit = 0; 1288 caller_context_t ct; 1289 1290 if (!rfs_write_async) { 1291 rfs_write_sync(wa, ns, exi, req, cr, ro); 1292 return; 1293 } 1294 1295 /* 1296 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1297 * is considered an OK. 1298 */ 1299 ns->ns_status = RFSWRITE_INITVAL; 1300 1301 nrp = &nrpsp; 1302 nrp->wa = wa; 1303 nrp->ns = ns; 1304 nrp->req = req; 1305 nrp->cr = cr; 1306 nrp->ro = ro; 1307 nrp->thread = curthread; 1308 1309 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1310 1311 /* 1312 * Look to see if there is already a cluster started 1313 * for this file. 1314 */ 1315 mutex_enter(&rfs_async_write_lock); 1316 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1317 if (bcmp(&wa->wa_fhandle, lp->fhp, 1318 sizeof (fhandle_t)) == 0) 1319 break; 1320 } 1321 1322 /* 1323 * If lp is non-NULL, then there is already a cluster 1324 * started. We need to place ourselves in the cluster 1325 * list in the right place as determined by starting 1326 * offset. Conflicts with non-blocking mandatory locked 1327 * regions will be checked when the cluster is processed. 1328 */ 1329 if (lp != NULL) { 1330 rp = lp->list; 1331 trp = NULL; 1332 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1333 trp = rp; 1334 rp = rp->list; 1335 } 1336 nrp->list = rp; 1337 if (trp == NULL) 1338 lp->list = nrp; 1339 else 1340 trp->list = nrp; 1341 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1342 cv_wait(&lp->cv, &rfs_async_write_lock); 1343 mutex_exit(&rfs_async_write_lock); 1344 1345 return; 1346 } 1347 1348 /* 1349 * No cluster started yet, start one and add ourselves 1350 * to the list of clusters. 1351 */ 1352 nrp->list = NULL; 1353 1354 nlp = &nlpsp; 1355 nlp->fhp = &wa->wa_fhandle; 1356 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1357 nlp->list = nrp; 1358 nlp->next = NULL; 1359 1360 if (rfs_async_write_head == NULL) { 1361 rfs_async_write_head = nlp; 1362 } else { 1363 lp = rfs_async_write_head; 1364 while (lp->next != NULL) 1365 lp = lp->next; 1366 lp->next = nlp; 1367 } 1368 mutex_exit(&rfs_async_write_lock); 1369 1370 /* 1371 * Convert the file handle common to all of the requests 1372 * in this cluster to a vnode. 1373 */ 1374 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1375 if (vp == NULL) { 1376 mutex_enter(&rfs_async_write_lock); 1377 if (rfs_async_write_head == nlp) 1378 rfs_async_write_head = nlp->next; 1379 else { 1380 lp = rfs_async_write_head; 1381 while (lp->next != nlp) 1382 lp = lp->next; 1383 lp->next = nlp->next; 1384 } 1385 t_flag = curthread->t_flag & T_WOULDBLOCK; 1386 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1387 rp->ns->ns_status = NFSERR_STALE; 1388 rp->thread->t_flag |= t_flag; 1389 } 1390 cv_broadcast(&nlp->cv); 1391 mutex_exit(&rfs_async_write_lock); 1392 1393 return; 1394 } 1395 1396 /* 1397 * Can only write regular files. Attempts to write any 1398 * other file types fail with EISDIR. 1399 */ 1400 if (vp->v_type != VREG) { 1401 VN_RELE(vp); 1402 mutex_enter(&rfs_async_write_lock); 1403 if (rfs_async_write_head == nlp) 1404 rfs_async_write_head = nlp->next; 1405 else { 1406 lp = rfs_async_write_head; 1407 while (lp->next != nlp) 1408 lp = lp->next; 1409 lp->next = nlp->next; 1410 } 1411 t_flag = curthread->t_flag & T_WOULDBLOCK; 1412 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1413 rp->ns->ns_status = NFSERR_ISDIR; 1414 rp->thread->t_flag |= t_flag; 1415 } 1416 cv_broadcast(&nlp->cv); 1417 mutex_exit(&rfs_async_write_lock); 1418 1419 return; 1420 } 1421 1422 /* 1423 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1424 * deadlock with ufs. 1425 */ 1426 if (nbl_need_check(vp)) { 1427 nbl_start_crit(vp, RW_READER); 1428 in_crit = 1; 1429 } 1430 1431 ct.cc_sysid = 0; 1432 ct.cc_pid = 0; 1433 ct.cc_caller_id = nfs2_srv_caller_id; 1434 ct.cc_flags = CC_DONTBLOCK; 1435 1436 /* 1437 * Lock the file for writing. This operation provides 1438 * the delay which allows clusters to grow. 1439 */ 1440 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1441 1442 /* check if a monitor detected a delegation conflict */ 1443 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1444 if (in_crit) 1445 nbl_end_crit(vp); 1446 VN_RELE(vp); 1447 /* mark as wouldblock so response is dropped */ 1448 curthread->t_flag |= T_WOULDBLOCK; 1449 mutex_enter(&rfs_async_write_lock); 1450 if (rfs_async_write_head == nlp) 1451 rfs_async_write_head = nlp->next; 1452 else { 1453 lp = rfs_async_write_head; 1454 while (lp->next != nlp) 1455 lp = lp->next; 1456 lp->next = nlp->next; 1457 } 1458 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1459 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1460 rp->ns->ns_status = puterrno(error); 1461 rp->thread->t_flag |= T_WOULDBLOCK; 1462 } 1463 } 1464 cv_broadcast(&nlp->cv); 1465 mutex_exit(&rfs_async_write_lock); 1466 1467 return; 1468 } 1469 1470 /* 1471 * Disconnect this cluster from the list of clusters. 1472 * The cluster that is being dealt with must be fixed 1473 * in size after this point, so there is no reason 1474 * to leave it on the list so that new requests can 1475 * find it. 1476 * 1477 * The algorithm is that the first write request will 1478 * create a cluster, convert the file handle to a 1479 * vnode pointer, and then lock the file for writing. 1480 * This request is not likely to be clustered with 1481 * any others. However, the next request will create 1482 * a new cluster and be blocked in VOP_RWLOCK while 1483 * the first request is being processed. This delay 1484 * will allow more requests to be clustered in this 1485 * second cluster. 1486 */ 1487 mutex_enter(&rfs_async_write_lock); 1488 if (rfs_async_write_head == nlp) 1489 rfs_async_write_head = nlp->next; 1490 else { 1491 lp = rfs_async_write_head; 1492 while (lp->next != nlp) 1493 lp = lp->next; 1494 lp->next = nlp->next; 1495 } 1496 mutex_exit(&rfs_async_write_lock); 1497 1498 /* 1499 * Step through the list of requests in this cluster. 1500 * We need to check permissions to make sure that all 1501 * of the requests have sufficient permission to write 1502 * the file. A cluster can be composed of requests 1503 * from different clients and different users on each 1504 * client. 1505 * 1506 * As a side effect, we also calculate the size of the 1507 * byte range that this cluster encompasses. 1508 */ 1509 rp = nlp->list; 1510 off = rp->wa->wa_offset; 1511 len = (uint_t)0; 1512 do { 1513 if (rdonly(rp->ro, vp)) { 1514 rp->ns->ns_status = NFSERR_ROFS; 1515 t_flag = curthread->t_flag & T_WOULDBLOCK; 1516 rp->thread->t_flag |= t_flag; 1517 continue; 1518 } 1519 1520 va.va_mask = AT_UID|AT_MODE; 1521 1522 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1523 1524 if (!error) { 1525 if (crgetuid(rp->cr) != va.va_uid) { 1526 /* 1527 * This is a kludge to allow writes of files 1528 * created with read only permission. The 1529 * owner of the file is always allowed to 1530 * write it. 1531 */ 1532 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1533 } 1534 if (!error && MANDLOCK(vp, va.va_mode)) 1535 error = EACCES; 1536 } 1537 1538 /* 1539 * Check for a conflict with a nbmand-locked region. 1540 */ 1541 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1542 rp->wa->wa_count, 0, NULL)) { 1543 error = EACCES; 1544 } 1545 1546 if (error) { 1547 rp->ns->ns_status = puterrno(error); 1548 t_flag = curthread->t_flag & T_WOULDBLOCK; 1549 rp->thread->t_flag |= t_flag; 1550 continue; 1551 } 1552 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1553 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1554 } while ((rp = rp->list) != NULL); 1555 1556 /* 1557 * Step through the cluster attempting to gather as many 1558 * requests which are contiguous as possible. These 1559 * contiguous requests are handled via one call to VOP_WRITE 1560 * instead of different calls to VOP_WRITE. We also keep 1561 * track of the fact that any data was written. 1562 */ 1563 rp = nlp->list; 1564 data_written = 0; 1565 do { 1566 /* 1567 * Skip any requests which are already marked as having an 1568 * error. 1569 */ 1570 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1571 rp = rp->list; 1572 continue; 1573 } 1574 1575 /* 1576 * Count the number of iovec's which are required 1577 * to handle this set of requests. One iovec is 1578 * needed for each data buffer, whether addressed 1579 * by wa_data or by the b_rptr pointers in the 1580 * mblk chains. 1581 */ 1582 iovcnt = 0; 1583 lrp = rp; 1584 for (;;) { 1585 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1586 iovcnt++; 1587 else { 1588 m = lrp->wa->wa_mblk; 1589 while (m != NULL) { 1590 iovcnt++; 1591 m = m->b_cont; 1592 } 1593 } 1594 if (lrp->list == NULL || 1595 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1596 lrp->wa->wa_offset + lrp->wa->wa_count != 1597 lrp->list->wa->wa_offset) { 1598 lrp = lrp->list; 1599 break; 1600 } 1601 lrp = lrp->list; 1602 } 1603 1604 if (iovcnt <= MAXCLIOVECS) { 1605 #ifdef DEBUG 1606 rfs_write_hits++; 1607 #endif 1608 niovp = iov; 1609 } else { 1610 #ifdef DEBUG 1611 rfs_write_misses++; 1612 #endif 1613 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1614 } 1615 /* 1616 * Put together the scatter/gather iovecs. 1617 */ 1618 iovp = niovp; 1619 trp = rp; 1620 count = 0; 1621 do { 1622 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1623 if (trp->wa->wa_rlist) { 1624 iovp->iov_base = 1625 (char *)((trp->wa->wa_rlist)-> 1626 u.c_daddr3); 1627 iovp->iov_len = trp->wa->wa_count; 1628 } else { 1629 iovp->iov_base = trp->wa->wa_data; 1630 iovp->iov_len = trp->wa->wa_count; 1631 } 1632 iovp++; 1633 } else { 1634 m = trp->wa->wa_mblk; 1635 rcount = trp->wa->wa_count; 1636 while (m != NULL) { 1637 iovp->iov_base = (caddr_t)m->b_rptr; 1638 iovp->iov_len = (m->b_wptr - m->b_rptr); 1639 rcount -= iovp->iov_len; 1640 if (rcount < 0) 1641 iovp->iov_len += rcount; 1642 iovp++; 1643 if (rcount <= 0) 1644 break; 1645 m = m->b_cont; 1646 } 1647 } 1648 count += trp->wa->wa_count; 1649 trp = trp->list; 1650 } while (trp != lrp); 1651 1652 uio.uio_iov = niovp; 1653 uio.uio_iovcnt = iovcnt; 1654 uio.uio_segflg = UIO_SYSSPACE; 1655 uio.uio_extflg = UIO_COPY_DEFAULT; 1656 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1657 uio.uio_resid = count; 1658 /* 1659 * The limit is checked on the client. We 1660 * should allow any size writes here. 1661 */ 1662 uio.uio_llimit = curproc->p_fsz_ctl; 1663 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1664 if (rlimit < (rlim64_t)uio.uio_resid) 1665 uio.uio_resid = (uint_t)rlimit; 1666 1667 /* 1668 * For now we assume no append mode. 1669 */ 1670 1671 /* 1672 * We're changing creds because VM may fault 1673 * and we need the cred of the current 1674 * thread to be used if quota * checking is 1675 * enabled. 1676 */ 1677 savecred = curthread->t_cred; 1678 curthread->t_cred = cr; 1679 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1680 curthread->t_cred = savecred; 1681 1682 /* check if a monitor detected a delegation conflict */ 1683 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1684 /* mark as wouldblock so response is dropped */ 1685 curthread->t_flag |= T_WOULDBLOCK; 1686 1687 if (niovp != iov) 1688 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1689 1690 if (!error) { 1691 data_written = 1; 1692 /* 1693 * Get attributes again so we send the latest mod 1694 * time to the client side for its cache. 1695 */ 1696 va.va_mask = AT_ALL; /* now we want everything */ 1697 1698 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1699 1700 if (!error) 1701 acl_perm(vp, exi, &va, rp->cr); 1702 } 1703 1704 /* 1705 * Fill in the status responses for each request 1706 * which was just handled. Also, copy the latest 1707 * attributes in to the attribute responses if 1708 * appropriate. 1709 */ 1710 t_flag = curthread->t_flag & T_WOULDBLOCK; 1711 do { 1712 rp->thread->t_flag |= t_flag; 1713 /* check for overflows */ 1714 if (!error) { 1715 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1716 } 1717 rp->ns->ns_status = puterrno(error); 1718 rp = rp->list; 1719 } while (rp != lrp); 1720 } while (rp != NULL); 1721 1722 /* 1723 * If any data was written at all, then we need to flush 1724 * the data and metadata to stable storage. 1725 */ 1726 if (data_written) { 1727 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1728 1729 if (!error) { 1730 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1731 } 1732 } 1733 1734 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1735 1736 if (in_crit) 1737 nbl_end_crit(vp); 1738 VN_RELE(vp); 1739 1740 t_flag = curthread->t_flag & T_WOULDBLOCK; 1741 mutex_enter(&rfs_async_write_lock); 1742 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1743 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1744 rp->ns->ns_status = puterrno(error); 1745 rp->thread->t_flag |= t_flag; 1746 } 1747 } 1748 cv_broadcast(&nlp->cv); 1749 mutex_exit(&rfs_async_write_lock); 1750 1751 } 1752 1753 void * 1754 rfs_write_getfh(struct nfswriteargs *wa) 1755 { 1756 return (&wa->wa_fhandle); 1757 } 1758 1759 /* 1760 * Create a file. 1761 * Creates a file with given attributes and returns those attributes 1762 * and an fhandle for the new file. 1763 */ 1764 void 1765 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1766 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1767 { 1768 int error; 1769 int lookuperr; 1770 int in_crit = 0; 1771 struct vattr va; 1772 vnode_t *vp; 1773 vnode_t *realvp; 1774 vnode_t *dvp; 1775 char *name = args->ca_da.da_name; 1776 vnode_t *tvp = NULL; 1777 int mode; 1778 int lookup_ok; 1779 bool_t trunc; 1780 struct sockaddr *ca; 1781 1782 /* 1783 * Disallow NULL paths 1784 */ 1785 if (name == NULL || *name == '\0') { 1786 dr->dr_status = NFSERR_ACCES; 1787 return; 1788 } 1789 1790 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1791 if (dvp == NULL) { 1792 dr->dr_status = NFSERR_STALE; 1793 return; 1794 } 1795 1796 error = sattr_to_vattr(args->ca_sa, &va); 1797 if (error) { 1798 dr->dr_status = puterrno(error); 1799 return; 1800 } 1801 1802 /* 1803 * Must specify the mode. 1804 */ 1805 if (!(va.va_mask & AT_MODE)) { 1806 VN_RELE(dvp); 1807 dr->dr_status = NFSERR_INVAL; 1808 return; 1809 } 1810 1811 /* 1812 * This is a completely gross hack to make mknod 1813 * work over the wire until we can wack the protocol 1814 */ 1815 if ((va.va_mode & IFMT) == IFCHR) { 1816 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1817 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1818 else { 1819 va.va_type = VCHR; 1820 /* 1821 * uncompress the received dev_t 1822 * if the top half is zero indicating a request 1823 * from an `older style' OS. 1824 */ 1825 if ((va.va_size & 0xffff0000) == 0) 1826 va.va_rdev = nfsv2_expdev(va.va_size); 1827 else 1828 va.va_rdev = (dev_t)va.va_size; 1829 } 1830 va.va_mask &= ~AT_SIZE; 1831 } else if ((va.va_mode & IFMT) == IFBLK) { 1832 va.va_type = VBLK; 1833 /* 1834 * uncompress the received dev_t 1835 * if the top half is zero indicating a request 1836 * from an `older style' OS. 1837 */ 1838 if ((va.va_size & 0xffff0000) == 0) 1839 va.va_rdev = nfsv2_expdev(va.va_size); 1840 else 1841 va.va_rdev = (dev_t)va.va_size; 1842 va.va_mask &= ~AT_SIZE; 1843 } else if ((va.va_mode & IFMT) == IFSOCK) { 1844 va.va_type = VSOCK; 1845 } else { 1846 va.va_type = VREG; 1847 } 1848 va.va_mode &= ~IFMT; 1849 va.va_mask |= AT_TYPE; 1850 1851 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1852 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1853 MAXPATHLEN); 1854 if (name == NULL) { 1855 dr->dr_status = puterrno(EINVAL); 1856 return; 1857 } 1858 1859 /* 1860 * Why was the choice made to use VWRITE as the mode to the 1861 * call to VOP_CREATE ? This results in a bug. When a client 1862 * opens a file that already exists and is RDONLY, the second 1863 * open fails with an EACESS because of the mode. 1864 * bug ID 1054648. 1865 */ 1866 lookup_ok = 0; 1867 mode = VWRITE; 1868 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1869 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1870 NULL, NULL, NULL); 1871 if (!error) { 1872 struct vattr at; 1873 1874 lookup_ok = 1; 1875 at.va_mask = AT_MODE; 1876 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1877 if (!error) 1878 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1879 VN_RELE(tvp); 1880 tvp = NULL; 1881 } 1882 } 1883 1884 if (!lookup_ok) { 1885 if (rdonly(ro, dvp)) { 1886 error = EROFS; 1887 } else if (va.va_type != VREG && va.va_type != VFIFO && 1888 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1889 error = EPERM; 1890 } else { 1891 error = 0; 1892 } 1893 } 1894 1895 /* 1896 * If file size is being modified on an already existing file 1897 * make sure that there are no conflicting non-blocking mandatory 1898 * locks in the region being manipulated. Return EACCES if there 1899 * are conflicting locks. 1900 */ 1901 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1902 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1903 NULL, NULL, NULL); 1904 1905 if (!lookuperr && 1906 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1907 VN_RELE(tvp); 1908 curthread->t_flag |= T_WOULDBLOCK; 1909 goto out; 1910 } 1911 1912 if (!lookuperr && nbl_need_check(tvp)) { 1913 /* 1914 * The file exists. Now check if it has any 1915 * conflicting non-blocking mandatory locks 1916 * in the region being changed. 1917 */ 1918 struct vattr bva; 1919 u_offset_t offset; 1920 ssize_t length; 1921 1922 nbl_start_crit(tvp, RW_READER); 1923 in_crit = 1; 1924 1925 bva.va_mask = AT_SIZE; 1926 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1927 if (!error) { 1928 if (va.va_size < bva.va_size) { 1929 offset = va.va_size; 1930 length = bva.va_size - va.va_size; 1931 } else { 1932 offset = bva.va_size; 1933 length = va.va_size - bva.va_size; 1934 } 1935 if (length) { 1936 if (nbl_conflict(tvp, NBL_WRITE, 1937 offset, length, 0, NULL)) { 1938 error = EACCES; 1939 } 1940 } 1941 } 1942 if (error) { 1943 nbl_end_crit(tvp); 1944 VN_RELE(tvp); 1945 in_crit = 0; 1946 } 1947 } else if (tvp != NULL) { 1948 VN_RELE(tvp); 1949 } 1950 } 1951 1952 if (!error) { 1953 /* 1954 * If filesystem is shared with nosuid the remove any 1955 * setuid/setgid bits on create. 1956 */ 1957 if (va.va_type == VREG && 1958 exi->exi_export.ex_flags & EX_NOSUID) 1959 va.va_mode &= ~(VSUID | VSGID); 1960 1961 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1962 NULL, NULL); 1963 1964 if (!error) { 1965 1966 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1967 trunc = TRUE; 1968 else 1969 trunc = FALSE; 1970 1971 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1972 VN_RELE(vp); 1973 curthread->t_flag |= T_WOULDBLOCK; 1974 goto out; 1975 } 1976 va.va_mask = AT_ALL; 1977 1978 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1979 1980 /* check for overflows */ 1981 if (!error) { 1982 acl_perm(vp, exi, &va, cr); 1983 error = vattr_to_nattr(&va, &dr->dr_attr); 1984 if (!error) { 1985 error = makefh(&dr->dr_fhandle, vp, 1986 exi); 1987 } 1988 } 1989 /* 1990 * Force modified metadata out to stable storage. 1991 * 1992 * if a underlying vp exists, pass it to VOP_FSYNC 1993 */ 1994 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1995 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1996 else 1997 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1998 VN_RELE(vp); 1999 } 2000 2001 if (in_crit) { 2002 nbl_end_crit(tvp); 2003 VN_RELE(tvp); 2004 } 2005 } 2006 2007 /* 2008 * Force modified data and metadata out to stable storage. 2009 */ 2010 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2011 2012 out: 2013 2014 VN_RELE(dvp); 2015 2016 dr->dr_status = puterrno(error); 2017 2018 if (name != args->ca_da.da_name) 2019 kmem_free(name, MAXPATHLEN); 2020 } 2021 void * 2022 rfs_create_getfh(struct nfscreatargs *args) 2023 { 2024 return (args->ca_da.da_fhandle); 2025 } 2026 2027 /* 2028 * Remove a file. 2029 * Remove named file from parent directory. 2030 */ 2031 /* ARGSUSED */ 2032 void 2033 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 2034 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2035 { 2036 int error = 0; 2037 vnode_t *vp; 2038 vnode_t *targvp; 2039 int in_crit = 0; 2040 2041 /* 2042 * Disallow NULL paths 2043 */ 2044 if (da->da_name == NULL || *da->da_name == '\0') { 2045 *status = NFSERR_ACCES; 2046 return; 2047 } 2048 2049 vp = nfs_fhtovp(da->da_fhandle, exi); 2050 if (vp == NULL) { 2051 *status = NFSERR_STALE; 2052 return; 2053 } 2054 2055 if (rdonly(ro, vp)) { 2056 VN_RELE(vp); 2057 *status = NFSERR_ROFS; 2058 return; 2059 } 2060 2061 /* 2062 * Check for a conflict with a non-blocking mandatory share reservation. 2063 */ 2064 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 2065 NULL, cr, NULL, NULL, NULL); 2066 if (error != 0) { 2067 VN_RELE(vp); 2068 *status = puterrno(error); 2069 return; 2070 } 2071 2072 /* 2073 * If the file is delegated to an v4 client, then initiate 2074 * recall and drop this request (by setting T_WOULDBLOCK). 2075 * The client will eventually re-transmit the request and 2076 * (hopefully), by then, the v4 client will have returned 2077 * the delegation. 2078 */ 2079 2080 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2081 VN_RELE(vp); 2082 VN_RELE(targvp); 2083 curthread->t_flag |= T_WOULDBLOCK; 2084 return; 2085 } 2086 2087 if (nbl_need_check(targvp)) { 2088 nbl_start_crit(targvp, RW_READER); 2089 in_crit = 1; 2090 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2091 error = EACCES; 2092 goto out; 2093 } 2094 } 2095 2096 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2097 2098 /* 2099 * Force modified data and metadata out to stable storage. 2100 */ 2101 (void) VOP_FSYNC(vp, 0, cr, NULL); 2102 2103 out: 2104 if (in_crit) 2105 nbl_end_crit(targvp); 2106 VN_RELE(targvp); 2107 VN_RELE(vp); 2108 2109 *status = puterrno(error); 2110 2111 } 2112 2113 void * 2114 rfs_remove_getfh(struct nfsdiropargs *da) 2115 { 2116 return (da->da_fhandle); 2117 } 2118 2119 /* 2120 * rename a file 2121 * Give a file (from) a new name (to). 2122 */ 2123 /* ARGSUSED */ 2124 void 2125 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2126 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2127 { 2128 int error = 0; 2129 vnode_t *fromvp; 2130 vnode_t *tovp; 2131 struct exportinfo *to_exi; 2132 fhandle_t *fh; 2133 vnode_t *srcvp; 2134 vnode_t *targvp; 2135 int in_crit = 0; 2136 2137 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2138 if (fromvp == NULL) { 2139 *status = NFSERR_STALE; 2140 return; 2141 } 2142 2143 fh = args->rna_to.da_fhandle; 2144 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2145 if (to_exi == NULL) { 2146 VN_RELE(fromvp); 2147 *status = NFSERR_ACCES; 2148 return; 2149 } 2150 exi_rele(to_exi); 2151 2152 if (to_exi != exi) { 2153 VN_RELE(fromvp); 2154 *status = NFSERR_XDEV; 2155 return; 2156 } 2157 2158 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2159 if (tovp == NULL) { 2160 VN_RELE(fromvp); 2161 *status = NFSERR_STALE; 2162 return; 2163 } 2164 2165 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2166 VN_RELE(tovp); 2167 VN_RELE(fromvp); 2168 *status = NFSERR_NOTDIR; 2169 return; 2170 } 2171 2172 /* 2173 * Disallow NULL paths 2174 */ 2175 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2176 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2177 VN_RELE(tovp); 2178 VN_RELE(fromvp); 2179 *status = NFSERR_ACCES; 2180 return; 2181 } 2182 2183 if (rdonly(ro, tovp)) { 2184 VN_RELE(tovp); 2185 VN_RELE(fromvp); 2186 *status = NFSERR_ROFS; 2187 return; 2188 } 2189 2190 /* 2191 * Check for a conflict with a non-blocking mandatory share reservation. 2192 */ 2193 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2194 NULL, cr, NULL, NULL, NULL); 2195 if (error != 0) { 2196 VN_RELE(tovp); 2197 VN_RELE(fromvp); 2198 *status = puterrno(error); 2199 return; 2200 } 2201 2202 /* Check for delegations on the source file */ 2203 2204 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2205 VN_RELE(tovp); 2206 VN_RELE(fromvp); 2207 VN_RELE(srcvp); 2208 curthread->t_flag |= T_WOULDBLOCK; 2209 return; 2210 } 2211 2212 /* Check for delegation on the file being renamed over, if it exists */ 2213 2214 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2215 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2216 NULL, NULL, NULL) == 0) { 2217 2218 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2219 VN_RELE(tovp); 2220 VN_RELE(fromvp); 2221 VN_RELE(srcvp); 2222 VN_RELE(targvp); 2223 curthread->t_flag |= T_WOULDBLOCK; 2224 return; 2225 } 2226 VN_RELE(targvp); 2227 } 2228 2229 2230 if (nbl_need_check(srcvp)) { 2231 nbl_start_crit(srcvp, RW_READER); 2232 in_crit = 1; 2233 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2234 error = EACCES; 2235 goto out; 2236 } 2237 } 2238 2239 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2240 tovp, args->rna_to.da_name, cr, NULL, 0); 2241 2242 if (error == 0) 2243 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2244 strlen(args->rna_to.da_name)); 2245 2246 /* 2247 * Force modified data and metadata out to stable storage. 2248 */ 2249 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2250 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2251 2252 out: 2253 if (in_crit) 2254 nbl_end_crit(srcvp); 2255 VN_RELE(srcvp); 2256 VN_RELE(tovp); 2257 VN_RELE(fromvp); 2258 2259 *status = puterrno(error); 2260 2261 } 2262 void * 2263 rfs_rename_getfh(struct nfsrnmargs *args) 2264 { 2265 return (args->rna_from.da_fhandle); 2266 } 2267 2268 /* 2269 * Link to a file. 2270 * Create a file (to) which is a hard link to the given file (from). 2271 */ 2272 /* ARGSUSED */ 2273 void 2274 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2275 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2276 { 2277 int error; 2278 vnode_t *fromvp; 2279 vnode_t *tovp; 2280 struct exportinfo *to_exi; 2281 fhandle_t *fh; 2282 2283 fromvp = nfs_fhtovp(args->la_from, exi); 2284 if (fromvp == NULL) { 2285 *status = NFSERR_STALE; 2286 return; 2287 } 2288 2289 fh = args->la_to.da_fhandle; 2290 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2291 if (to_exi == NULL) { 2292 VN_RELE(fromvp); 2293 *status = NFSERR_ACCES; 2294 return; 2295 } 2296 exi_rele(to_exi); 2297 2298 if (to_exi != exi) { 2299 VN_RELE(fromvp); 2300 *status = NFSERR_XDEV; 2301 return; 2302 } 2303 2304 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2305 if (tovp == NULL) { 2306 VN_RELE(fromvp); 2307 *status = NFSERR_STALE; 2308 return; 2309 } 2310 2311 if (tovp->v_type != VDIR) { 2312 VN_RELE(tovp); 2313 VN_RELE(fromvp); 2314 *status = NFSERR_NOTDIR; 2315 return; 2316 } 2317 /* 2318 * Disallow NULL paths 2319 */ 2320 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2321 VN_RELE(tovp); 2322 VN_RELE(fromvp); 2323 *status = NFSERR_ACCES; 2324 return; 2325 } 2326 2327 if (rdonly(ro, tovp)) { 2328 VN_RELE(tovp); 2329 VN_RELE(fromvp); 2330 *status = NFSERR_ROFS; 2331 return; 2332 } 2333 2334 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2335 2336 /* 2337 * Force modified data and metadata out to stable storage. 2338 */ 2339 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2340 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2341 2342 VN_RELE(tovp); 2343 VN_RELE(fromvp); 2344 2345 *status = puterrno(error); 2346 2347 } 2348 void * 2349 rfs_link_getfh(struct nfslinkargs *args) 2350 { 2351 return (args->la_from); 2352 } 2353 2354 /* 2355 * Symbolicly link to a file. 2356 * Create a file (to) with the given attributes which is a symbolic link 2357 * to the given path name (to). 2358 */ 2359 void 2360 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2361 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2362 { 2363 int error; 2364 struct vattr va; 2365 vnode_t *vp; 2366 vnode_t *svp; 2367 int lerror; 2368 struct sockaddr *ca; 2369 char *name = NULL; 2370 2371 /* 2372 * Disallow NULL paths 2373 */ 2374 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2375 *status = NFSERR_ACCES; 2376 return; 2377 } 2378 2379 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2380 if (vp == NULL) { 2381 *status = NFSERR_STALE; 2382 return; 2383 } 2384 2385 if (rdonly(ro, vp)) { 2386 VN_RELE(vp); 2387 *status = NFSERR_ROFS; 2388 return; 2389 } 2390 2391 error = sattr_to_vattr(args->sla_sa, &va); 2392 if (error) { 2393 VN_RELE(vp); 2394 *status = puterrno(error); 2395 return; 2396 } 2397 2398 if (!(va.va_mask & AT_MODE)) { 2399 VN_RELE(vp); 2400 *status = NFSERR_INVAL; 2401 return; 2402 } 2403 2404 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2405 name = nfscmd_convname(ca, exi, args->sla_tnm, 2406 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2407 2408 if (name == NULL) { 2409 *status = NFSERR_ACCES; 2410 return; 2411 } 2412 2413 va.va_type = VLNK; 2414 va.va_mask |= AT_TYPE; 2415 2416 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2417 2418 /* 2419 * Force new data and metadata out to stable storage. 2420 */ 2421 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2422 NULL, cr, NULL, NULL, NULL); 2423 2424 if (!lerror) { 2425 (void) VOP_FSYNC(svp, 0, cr, NULL); 2426 VN_RELE(svp); 2427 } 2428 2429 /* 2430 * Force modified data and metadata out to stable storage. 2431 */ 2432 (void) VOP_FSYNC(vp, 0, cr, NULL); 2433 2434 VN_RELE(vp); 2435 2436 *status = puterrno(error); 2437 if (name != args->sla_tnm) 2438 kmem_free(name, MAXPATHLEN); 2439 2440 } 2441 void * 2442 rfs_symlink_getfh(struct nfsslargs *args) 2443 { 2444 return (args->sla_from.da_fhandle); 2445 } 2446 2447 /* 2448 * Make a directory. 2449 * Create a directory with the given name, parent directory, and attributes. 2450 * Returns a file handle and attributes for the new directory. 2451 */ 2452 /* ARGSUSED */ 2453 void 2454 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2455 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2456 { 2457 int error; 2458 struct vattr va; 2459 vnode_t *dvp = NULL; 2460 vnode_t *vp; 2461 char *name = args->ca_da.da_name; 2462 2463 /* 2464 * Disallow NULL paths 2465 */ 2466 if (name == NULL || *name == '\0') { 2467 dr->dr_status = NFSERR_ACCES; 2468 return; 2469 } 2470 2471 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2472 if (vp == NULL) { 2473 dr->dr_status = NFSERR_STALE; 2474 return; 2475 } 2476 2477 if (rdonly(ro, vp)) { 2478 VN_RELE(vp); 2479 dr->dr_status = NFSERR_ROFS; 2480 return; 2481 } 2482 2483 error = sattr_to_vattr(args->ca_sa, &va); 2484 if (error) { 2485 VN_RELE(vp); 2486 dr->dr_status = puterrno(error); 2487 return; 2488 } 2489 2490 if (!(va.va_mask & AT_MODE)) { 2491 VN_RELE(vp); 2492 dr->dr_status = NFSERR_INVAL; 2493 return; 2494 } 2495 2496 va.va_type = VDIR; 2497 va.va_mask |= AT_TYPE; 2498 2499 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2500 2501 if (!error) { 2502 /* 2503 * Attribtutes of the newly created directory should 2504 * be returned to the client. 2505 */ 2506 va.va_mask = AT_ALL; /* We want everything */ 2507 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2508 2509 /* check for overflows */ 2510 if (!error) { 2511 acl_perm(vp, exi, &va, cr); 2512 error = vattr_to_nattr(&va, &dr->dr_attr); 2513 if (!error) { 2514 error = makefh(&dr->dr_fhandle, dvp, exi); 2515 } 2516 } 2517 /* 2518 * Force new data and metadata out to stable storage. 2519 */ 2520 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2521 VN_RELE(dvp); 2522 } 2523 2524 /* 2525 * Force modified data and metadata out to stable storage. 2526 */ 2527 (void) VOP_FSYNC(vp, 0, cr, NULL); 2528 2529 VN_RELE(vp); 2530 2531 dr->dr_status = puterrno(error); 2532 2533 } 2534 void * 2535 rfs_mkdir_getfh(struct nfscreatargs *args) 2536 { 2537 return (args->ca_da.da_fhandle); 2538 } 2539 2540 /* 2541 * Remove a directory. 2542 * Remove the given directory name from the given parent directory. 2543 */ 2544 /* ARGSUSED */ 2545 void 2546 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2547 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2548 { 2549 int error; 2550 vnode_t *vp; 2551 2552 /* 2553 * Disallow NULL paths 2554 */ 2555 if (da->da_name == NULL || *da->da_name == '\0') { 2556 *status = NFSERR_ACCES; 2557 return; 2558 } 2559 2560 vp = nfs_fhtovp(da->da_fhandle, exi); 2561 if (vp == NULL) { 2562 *status = NFSERR_STALE; 2563 return; 2564 } 2565 2566 if (rdonly(ro, vp)) { 2567 VN_RELE(vp); 2568 *status = NFSERR_ROFS; 2569 return; 2570 } 2571 2572 /* 2573 * VOP_RMDIR takes a third argument (the current 2574 * directory of the process). That's because someone 2575 * wants to return EINVAL if one tries to remove ".". 2576 * Of course, NFS servers have no idea what their 2577 * clients' current directories are. We fake it by 2578 * supplying a vnode known to exist and illegal to 2579 * remove. 2580 */ 2581 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2582 2583 /* 2584 * Force modified data and metadata out to stable storage. 2585 */ 2586 (void) VOP_FSYNC(vp, 0, cr, NULL); 2587 2588 VN_RELE(vp); 2589 2590 /* 2591 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2592 * if the directory is not empty. A System V NFS server 2593 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2594 * over the wire. 2595 */ 2596 if (error == EEXIST) 2597 *status = NFSERR_NOTEMPTY; 2598 else 2599 *status = puterrno(error); 2600 2601 } 2602 void * 2603 rfs_rmdir_getfh(struct nfsdiropargs *da) 2604 { 2605 return (da->da_fhandle); 2606 } 2607 2608 /* ARGSUSED */ 2609 void 2610 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2611 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2612 { 2613 int error; 2614 int iseof; 2615 struct iovec iov; 2616 struct uio uio; 2617 vnode_t *vp; 2618 char *ndata = NULL; 2619 struct sockaddr *ca; 2620 size_t nents; 2621 int ret; 2622 2623 vp = nfs_fhtovp(&rda->rda_fh, exi); 2624 if (vp == NULL) { 2625 rd->rd_entries = NULL; 2626 rd->rd_status = NFSERR_STALE; 2627 return; 2628 } 2629 2630 if (vp->v_type != VDIR) { 2631 VN_RELE(vp); 2632 rd->rd_entries = NULL; 2633 rd->rd_status = NFSERR_NOTDIR; 2634 return; 2635 } 2636 2637 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2638 2639 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2640 2641 if (error) { 2642 rd->rd_entries = NULL; 2643 goto bad; 2644 } 2645 2646 if (rda->rda_count == 0) { 2647 rd->rd_entries = NULL; 2648 rd->rd_size = 0; 2649 rd->rd_eof = FALSE; 2650 goto bad; 2651 } 2652 2653 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2654 2655 /* 2656 * Allocate data for entries. This will be freed by rfs_rddirfree. 2657 */ 2658 rd->rd_bufsize = (uint_t)rda->rda_count; 2659 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2660 2661 /* 2662 * Set up io vector to read directory data 2663 */ 2664 iov.iov_base = (caddr_t)rd->rd_entries; 2665 iov.iov_len = rda->rda_count; 2666 uio.uio_iov = &iov; 2667 uio.uio_iovcnt = 1; 2668 uio.uio_segflg = UIO_SYSSPACE; 2669 uio.uio_extflg = UIO_COPY_CACHED; 2670 uio.uio_loffset = (offset_t)rda->rda_offset; 2671 uio.uio_resid = rda->rda_count; 2672 2673 /* 2674 * read directory 2675 */ 2676 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2677 2678 /* 2679 * Clean up 2680 */ 2681 if (!error) { 2682 /* 2683 * set size and eof 2684 */ 2685 if (uio.uio_resid == rda->rda_count) { 2686 rd->rd_size = 0; 2687 rd->rd_eof = TRUE; 2688 } else { 2689 rd->rd_size = (uint32_t)(rda->rda_count - 2690 uio.uio_resid); 2691 rd->rd_eof = iseof ? TRUE : FALSE; 2692 } 2693 } 2694 2695 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2696 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2697 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2698 rda->rda_count, &ndata); 2699 2700 if (ret != 0) { 2701 size_t dropbytes; 2702 /* 2703 * We had to drop one or more entries in order to fit 2704 * during the character conversion. We need to patch 2705 * up the size and eof info. 2706 */ 2707 if (rd->rd_eof) 2708 rd->rd_eof = FALSE; 2709 dropbytes = nfscmd_dropped_entrysize( 2710 (struct dirent64 *)rd->rd_entries, nents, ret); 2711 rd->rd_size -= dropbytes; 2712 } 2713 if (ndata == NULL) { 2714 ndata = (char *)rd->rd_entries; 2715 } else if (ndata != (char *)rd->rd_entries) { 2716 kmem_free(rd->rd_entries, rd->rd_bufsize); 2717 rd->rd_entries = (void *)ndata; 2718 rd->rd_bufsize = rda->rda_count; 2719 } 2720 2721 bad: 2722 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2723 2724 #if 0 /* notyet */ 2725 /* 2726 * Don't do this. It causes local disk writes when just 2727 * reading the file and the overhead is deemed larger 2728 * than the benefit. 2729 */ 2730 /* 2731 * Force modified metadata out to stable storage. 2732 */ 2733 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2734 #endif 2735 2736 VN_RELE(vp); 2737 2738 rd->rd_status = puterrno(error); 2739 2740 } 2741 void * 2742 rfs_readdir_getfh(struct nfsrddirargs *rda) 2743 { 2744 return (&rda->rda_fh); 2745 } 2746 void 2747 rfs_rddirfree(struct nfsrddirres *rd) 2748 { 2749 if (rd->rd_entries != NULL) 2750 kmem_free(rd->rd_entries, rd->rd_bufsize); 2751 } 2752 2753 /* ARGSUSED */ 2754 void 2755 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2756 struct svc_req *req, cred_t *cr, bool_t ro) 2757 { 2758 int error; 2759 struct statvfs64 sb; 2760 vnode_t *vp; 2761 2762 vp = nfs_fhtovp(fh, exi); 2763 if (vp == NULL) { 2764 fs->fs_status = NFSERR_STALE; 2765 return; 2766 } 2767 2768 error = VFS_STATVFS(vp->v_vfsp, &sb); 2769 2770 if (!error) { 2771 fs->fs_tsize = nfstsize(); 2772 fs->fs_bsize = sb.f_frsize; 2773 fs->fs_blocks = sb.f_blocks; 2774 fs->fs_bfree = sb.f_bfree; 2775 fs->fs_bavail = sb.f_bavail; 2776 } 2777 2778 VN_RELE(vp); 2779 2780 fs->fs_status = puterrno(error); 2781 2782 } 2783 void * 2784 rfs_statfs_getfh(fhandle_t *fh) 2785 { 2786 return (fh); 2787 } 2788 2789 static int 2790 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2791 { 2792 vap->va_mask = 0; 2793 2794 /* 2795 * There was a sign extension bug in some VFS based systems 2796 * which stored the mode as a short. When it would get 2797 * assigned to a u_long, no sign extension would occur. 2798 * It needed to, but this wasn't noticed because sa_mode 2799 * would then get assigned back to the short, thus ignoring 2800 * the upper 16 bits of sa_mode. 2801 * 2802 * To make this implementation work for both broken 2803 * clients and good clients, we check for both versions 2804 * of the mode. 2805 */ 2806 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2807 sa->sa_mode != (uint32_t)-1) { 2808 vap->va_mask |= AT_MODE; 2809 vap->va_mode = sa->sa_mode; 2810 } 2811 if (sa->sa_uid != (uint32_t)-1) { 2812 vap->va_mask |= AT_UID; 2813 vap->va_uid = sa->sa_uid; 2814 } 2815 if (sa->sa_gid != (uint32_t)-1) { 2816 vap->va_mask |= AT_GID; 2817 vap->va_gid = sa->sa_gid; 2818 } 2819 if (sa->sa_size != (uint32_t)-1) { 2820 vap->va_mask |= AT_SIZE; 2821 vap->va_size = sa->sa_size; 2822 } 2823 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2824 sa->sa_atime.tv_usec != (int32_t)-1) { 2825 #ifndef _LP64 2826 /* return error if time overflow */ 2827 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2828 return (EOVERFLOW); 2829 #endif 2830 vap->va_mask |= AT_ATIME; 2831 /* 2832 * nfs protocol defines times as unsigned so don't extend sign, 2833 * unless sysadmin set nfs_allow_preepoch_time. 2834 */ 2835 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2836 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2837 } 2838 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2839 sa->sa_mtime.tv_usec != (int32_t)-1) { 2840 #ifndef _LP64 2841 /* return error if time overflow */ 2842 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2843 return (EOVERFLOW); 2844 #endif 2845 vap->va_mask |= AT_MTIME; 2846 /* 2847 * nfs protocol defines times as unsigned so don't extend sign, 2848 * unless sysadmin set nfs_allow_preepoch_time. 2849 */ 2850 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2851 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2852 } 2853 return (0); 2854 } 2855 2856 static enum nfsftype vt_to_nf[] = { 2857 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2858 }; 2859 2860 /* 2861 * check the following fields for overflow: nodeid, size, and time. 2862 * There could be a problem when converting 64-bit LP64 fields 2863 * into 32-bit ones. Return an error if there is an overflow. 2864 */ 2865 int 2866 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2867 { 2868 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2869 na->na_type = vt_to_nf[vap->va_type]; 2870 2871 if (vap->va_mode == (unsigned short) -1) 2872 na->na_mode = (uint32_t)-1; 2873 else 2874 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2875 2876 if (vap->va_uid == (unsigned short)(-1)) 2877 na->na_uid = (uint32_t)(-1); 2878 else if (vap->va_uid == UID_NOBODY) 2879 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2880 else 2881 na->na_uid = vap->va_uid; 2882 2883 if (vap->va_gid == (unsigned short)(-1)) 2884 na->na_gid = (uint32_t)-1; 2885 else if (vap->va_gid == GID_NOBODY) 2886 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2887 else 2888 na->na_gid = vap->va_gid; 2889 2890 /* 2891 * Do we need to check fsid for overflow? It is 64-bit in the 2892 * vattr, but are bigger than 32 bit values supported? 2893 */ 2894 na->na_fsid = vap->va_fsid; 2895 2896 na->na_nodeid = vap->va_nodeid; 2897 2898 /* 2899 * Check to make sure that the nodeid is representable over the 2900 * wire without losing bits. 2901 */ 2902 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2903 return (EFBIG); 2904 na->na_nlink = vap->va_nlink; 2905 2906 /* 2907 * Check for big files here, instead of at the caller. See 2908 * comments in cstat for large special file explanation. 2909 */ 2910 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2911 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2912 return (EFBIG); 2913 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2914 /* UNKNOWN_SIZE | OVERFLOW */ 2915 na->na_size = MAXOFF32_T; 2916 } else 2917 na->na_size = vap->va_size; 2918 } else 2919 na->na_size = vap->va_size; 2920 2921 /* 2922 * If the vnode times overflow the 32-bit times that NFS2 2923 * uses on the wire then return an error. 2924 */ 2925 if (!NFS_VAP_TIME_OK(vap)) { 2926 return (EOVERFLOW); 2927 } 2928 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2929 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2930 2931 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2932 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2933 2934 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2935 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2936 2937 /* 2938 * If the dev_t will fit into 16 bits then compress 2939 * it, otherwise leave it alone. See comments in 2940 * nfs_client.c. 2941 */ 2942 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2943 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2944 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2945 else 2946 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2947 2948 na->na_blocks = vap->va_nblocks; 2949 na->na_blocksize = vap->va_blksize; 2950 2951 /* 2952 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2953 * over-the-wire protocols for named-pipe vnodes. It remaps the 2954 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2955 * 2956 * BUYER BEWARE: 2957 * If you are porting the NFS to a non-Sun server, you probably 2958 * don't want to include the following block of code. The 2959 * over-the-wire special file types will be changing with the 2960 * NFS Protocol Revision. 2961 */ 2962 if (vap->va_type == VFIFO) 2963 NA_SETFIFO(na); 2964 return (0); 2965 } 2966 2967 /* 2968 * acl v2 support: returns approximate permission. 2969 * default: returns minimal permission (more restrictive) 2970 * aclok: returns maximal permission (less restrictive) 2971 * This routine changes the permissions that are alaredy in *va. 2972 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2973 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2974 */ 2975 static void 2976 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2977 { 2978 vsecattr_t vsa; 2979 int aclcnt; 2980 aclent_t *aclentp; 2981 mode_t mask_perm; 2982 mode_t grp_perm; 2983 mode_t other_perm; 2984 mode_t other_orig; 2985 int error; 2986 2987 /* dont care default acl */ 2988 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2989 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2990 2991 if (!error) { 2992 aclcnt = vsa.vsa_aclcnt; 2993 if (aclcnt > MIN_ACL_ENTRIES) { 2994 /* non-trivial ACL */ 2995 aclentp = vsa.vsa_aclentp; 2996 if (exi->exi_export.ex_flags & EX_ACLOK) { 2997 /* maximal permissions */ 2998 grp_perm = 0; 2999 other_perm = 0; 3000 for (; aclcnt > 0; aclcnt--, aclentp++) { 3001 switch (aclentp->a_type) { 3002 case USER_OBJ: 3003 break; 3004 case USER: 3005 grp_perm |= 3006 aclentp->a_perm << 3; 3007 other_perm |= aclentp->a_perm; 3008 break; 3009 case GROUP_OBJ: 3010 grp_perm |= 3011 aclentp->a_perm << 3; 3012 break; 3013 case GROUP: 3014 other_perm |= aclentp->a_perm; 3015 break; 3016 case OTHER_OBJ: 3017 other_orig = aclentp->a_perm; 3018 break; 3019 case CLASS_OBJ: 3020 mask_perm = aclentp->a_perm; 3021 break; 3022 default: 3023 break; 3024 } 3025 } 3026 grp_perm &= mask_perm << 3; 3027 other_perm &= mask_perm; 3028 other_perm |= other_orig; 3029 3030 } else { 3031 /* minimal permissions */ 3032 grp_perm = 070; 3033 other_perm = 07; 3034 for (; aclcnt > 0; aclcnt--, aclentp++) { 3035 switch (aclentp->a_type) { 3036 case USER_OBJ: 3037 break; 3038 case USER: 3039 case CLASS_OBJ: 3040 grp_perm &= 3041 aclentp->a_perm << 3; 3042 other_perm &= 3043 aclentp->a_perm; 3044 break; 3045 case GROUP_OBJ: 3046 grp_perm &= 3047 aclentp->a_perm << 3; 3048 break; 3049 case GROUP: 3050 other_perm &= 3051 aclentp->a_perm; 3052 break; 3053 case OTHER_OBJ: 3054 other_perm &= 3055 aclentp->a_perm; 3056 break; 3057 default: 3058 break; 3059 } 3060 } 3061 } 3062 /* copy to va */ 3063 va->va_mode &= ~077; 3064 va->va_mode |= grp_perm | other_perm; 3065 } 3066 if (vsa.vsa_aclcnt) 3067 kmem_free(vsa.vsa_aclentp, 3068 vsa.vsa_aclcnt * sizeof (aclent_t)); 3069 } 3070 } 3071 3072 void 3073 rfs_srvrinit(void) 3074 { 3075 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 3076 nfs2_srv_caller_id = fs_new_caller_id(); 3077 } 3078 3079 void 3080 rfs_srvrfini(void) 3081 { 3082 mutex_destroy(&rfs_async_write_lock); 3083 } 3084 3085 static int 3086 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 3087 { 3088 struct clist *wcl; 3089 int wlist_len; 3090 uint32_t count = rr->rr_count; 3091 3092 wcl = ra->ra_wlist; 3093 3094 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3095 return (FALSE); 3096 } 3097 3098 wcl = ra->ra_wlist; 3099 rr->rr_ok.rrok_wlist_len = wlist_len; 3100 rr->rr_ok.rrok_wlist = wcl; 3101 3102 return (TRUE); 3103 } 3104