1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 30 * All rights reserved. 31 */ 32 33 /* 34 * Copyright 2018 Nexenta Systems, Inc. 35 * Copyright (c) 2016 by Delphix. All rights reserved. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/cred.h> 42 #include <sys/buf.h> 43 #include <sys/vfs.h> 44 #include <sys/vnode.h> 45 #include <sys/uio.h> 46 #include <sys/stat.h> 47 #include <sys/errno.h> 48 #include <sys/sysmacros.h> 49 #include <sys/statvfs.h> 50 #include <sys/kmem.h> 51 #include <sys/kstat.h> 52 #include <sys/dirent.h> 53 #include <sys/cmn_err.h> 54 #include <sys/debug.h> 55 #include <sys/vtrace.h> 56 #include <sys/mode.h> 57 #include <sys/acl.h> 58 #include <sys/nbmlock.h> 59 #include <sys/policy.h> 60 #include <sys/sdt.h> 61 62 #include <rpc/types.h> 63 #include <rpc/auth.h> 64 #include <rpc/svc.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/export.h> 68 #include <nfs/nfs_cmd.h> 69 70 #include <vm/hat.h> 71 #include <vm/as.h> 72 #include <vm/seg.h> 73 #include <vm/seg_map.h> 74 #include <vm/seg_kmem.h> 75 76 #include <sys/strsubr.h> 77 78 struct rfs_async_write_list; 79 80 /* 81 * Zone globals of NFSv2 server 82 */ 83 typedef struct nfs_srv { 84 kmutex_t async_write_lock; 85 struct rfs_async_write_list *async_write_head; 86 87 /* 88 * enables write clustering if == 1 89 */ 90 int write_async; 91 } nfs_srv_t; 92 93 /* 94 * These are the interface routines for the server side of the 95 * Network File System. See the NFS version 2 protocol specification 96 * for a description of this interface. 97 */ 98 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 101 cred_t *); 102 103 104 /* 105 * Some "over the wire" UNIX file types. These are encoded 106 * into the mode. This needs to be fixed in the next rev. 107 */ 108 #define IFMT 0170000 /* type of file */ 109 #define IFCHR 0020000 /* character special */ 110 #define IFBLK 0060000 /* block special */ 111 #define IFSOCK 0140000 /* socket */ 112 113 u_longlong_t nfs2_srv_caller_id; 114 115 static nfs_srv_t * 116 nfs_get_srv(void) 117 { 118 nfs_globals_t *ng = nfs_srv_getzg(); 119 nfs_srv_t *srv = ng->nfs_srv; 120 ASSERT(srv != NULL); 121 return (srv); 122 } 123 124 /* 125 * Get file attributes. 126 * Returns the current attributes of the file with the given fhandle. 127 */ 128 /* ARGSUSED */ 129 void 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 131 struct svc_req *req, cred_t *cr, bool_t ro) 132 { 133 int error; 134 vnode_t *vp; 135 struct vattr va; 136 137 vp = nfs_fhtovp(fhp, exi); 138 if (vp == NULL) { 139 ns->ns_status = NFSERR_STALE; 140 return; 141 } 142 143 /* 144 * Do the getattr. 145 */ 146 va.va_mask = AT_ALL; /* we want all the attributes */ 147 148 error = rfs4_delegated_getattr(vp, &va, 0, cr); 149 150 /* check for overflows */ 151 if (!error) { 152 /* Lie about the object type for a referral */ 153 if (vn_is_nfs_reparse(vp, cr)) 154 va.va_type = VLNK; 155 156 acl_perm(vp, exi, &va, cr); 157 error = vattr_to_nattr(&va, &ns->ns_attr); 158 } 159 160 VN_RELE(vp); 161 162 ns->ns_status = puterrno(error); 163 } 164 void * 165 rfs_getattr_getfh(fhandle_t *fhp) 166 { 167 return (fhp); 168 } 169 170 /* 171 * Set file attributes. 172 * Sets the attributes of the file with the given fhandle. Returns 173 * the new attributes. 174 */ 175 /* ARGSUSED */ 176 void 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 179 { 180 int error; 181 int flag; 182 int in_crit = 0; 183 vnode_t *vp; 184 struct vattr va; 185 struct vattr bva; 186 struct flock64 bf; 187 caller_context_t ct; 188 189 190 vp = nfs_fhtovp(&args->saa_fh, exi); 191 if (vp == NULL) { 192 ns->ns_status = NFSERR_STALE; 193 return; 194 } 195 196 if (rdonly(ro, vp)) { 197 VN_RELE(vp); 198 ns->ns_status = NFSERR_ROFS; 199 return; 200 } 201 202 error = sattr_to_vattr(&args->saa_sa, &va); 203 if (error) { 204 VN_RELE(vp); 205 ns->ns_status = puterrno(error); 206 return; 207 } 208 209 /* 210 * If the client is requesting a change to the mtime, 211 * but the nanosecond field is set to 1 billion, then 212 * this is a flag to the server that it should set the 213 * atime and mtime fields to the server's current time. 214 * The 1 billion number actually came from the client 215 * as 1 million, but the units in the over the wire 216 * request are microseconds instead of nanoseconds. 217 * 218 * This is an overload of the protocol and should be 219 * documented in the NFS Version 2 protocol specification. 220 */ 221 if (va.va_mask & AT_MTIME) { 222 if (va.va_mtime.tv_nsec == 1000000000) { 223 gethrestime(&va.va_mtime); 224 va.va_atime = va.va_mtime; 225 va.va_mask |= AT_ATIME; 226 flag = 0; 227 } else 228 flag = ATTR_UTIME; 229 } else 230 flag = 0; 231 232 /* 233 * If the filesystem is exported with nosuid, then mask off 234 * the setuid and setgid bits. 235 */ 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 237 (exi->exi_export.ex_flags & EX_NOSUID)) 238 va.va_mode &= ~(VSUID | VSGID); 239 240 ct.cc_sysid = 0; 241 ct.cc_pid = 0; 242 ct.cc_caller_id = nfs2_srv_caller_id; 243 ct.cc_flags = CC_DONTBLOCK; 244 245 /* 246 * We need to specially handle size changes because it is 247 * possible for the client to create a file with modes 248 * which indicate read-only, but with the file opened for 249 * writing. If the client then tries to set the size of 250 * the file, then the normal access checking done in 251 * VOP_SETATTR would prevent the client from doing so, 252 * although it should be legal for it to do so. To get 253 * around this, we do the access checking for ourselves 254 * and then use VOP_SPACE which doesn't do the access 255 * checking which VOP_SETATTR does. VOP_SPACE can only 256 * operate on VREG files, let VOP_SETATTR handle the other 257 * extremely rare cases. 258 * Also the client should not be allowed to change the 259 * size of the file if there is a conflicting non-blocking 260 * mandatory lock in the region of change. 261 */ 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 263 if (nbl_need_check(vp)) { 264 nbl_start_crit(vp, RW_READER); 265 in_crit = 1; 266 } 267 268 bva.va_mask = AT_UID | AT_SIZE; 269 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 271 272 if (error) { 273 if (in_crit) 274 nbl_end_crit(vp); 275 VN_RELE(vp); 276 ns->ns_status = puterrno(error); 277 return; 278 } 279 280 if (in_crit) { 281 u_offset_t offset; 282 ssize_t length; 283 284 if (va.va_size < bva.va_size) { 285 offset = va.va_size; 286 length = bva.va_size - va.va_size; 287 } else { 288 offset = bva.va_size; 289 length = va.va_size - bva.va_size; 290 } 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 292 NULL)) { 293 error = EACCES; 294 } 295 } 296 297 if (crgetuid(cr) == bva.va_uid && !error && 298 va.va_size != bva.va_size) { 299 va.va_mask &= ~AT_SIZE; 300 bf.l_type = F_WRLCK; 301 bf.l_whence = 0; 302 bf.l_start = (off64_t)va.va_size; 303 bf.l_len = 0; 304 bf.l_sysid = 0; 305 bf.l_pid = 0; 306 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 308 (offset_t)va.va_size, cr, &ct); 309 } 310 if (in_crit) 311 nbl_end_crit(vp); 312 } else 313 error = 0; 314 315 /* 316 * Do the setattr. 317 */ 318 if (!error && va.va_mask) { 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 320 } 321 322 /* 323 * check if the monitor on either vop_space or vop_setattr detected 324 * a delegation conflict and if so, mark the thread flag as 325 * wouldblock so that the response is dropped and the client will 326 * try again. 327 */ 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 329 VN_RELE(vp); 330 curthread->t_flag |= T_WOULDBLOCK; 331 return; 332 } 333 334 if (!error) { 335 va.va_mask = AT_ALL; /* get everything */ 336 337 error = rfs4_delegated_getattr(vp, &va, 0, cr); 338 339 /* check for overflows */ 340 if (!error) { 341 acl_perm(vp, exi, &va, cr); 342 error = vattr_to_nattr(&va, &ns->ns_attr); 343 } 344 } 345 346 ct.cc_flags = 0; 347 348 /* 349 * Force modified metadata out to stable storage. 350 */ 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 352 353 VN_RELE(vp); 354 355 ns->ns_status = puterrno(error); 356 } 357 void * 358 rfs_setattr_getfh(struct nfssaargs *args) 359 { 360 return (&args->saa_fh); 361 } 362 363 /* Change and release @exip and @vpp only in success */ 364 int 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip) 366 { 367 struct exportinfo *exi; 368 vnode_t *vp = *vpp; 369 fid_t fid; 370 int error; 371 372 VN_HOLD(vp); 373 374 if ((error = traverse(&vp)) != 0) { 375 VN_RELE(vp); 376 return (error); 377 } 378 379 bzero(&fid, sizeof (fid)); 380 fid.fid_len = MAXFIDSZ; 381 error = VOP_FID(vp, &fid, NULL); 382 if (error) { 383 VN_RELE(vp); 384 return (error); 385 } 386 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid); 388 if (exi == NULL || 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) { 390 /* 391 * It is not error, just subdir is not exported 392 * or "nohide" is not set 393 */ 394 if (exi != NULL) 395 exi_rele(exi); 396 VN_RELE(vp); 397 } else { 398 /* go to submount */ 399 exi_rele(*exip); 400 *exip = exi; 401 402 VN_RELE(*vpp); 403 *vpp = vp; 404 } 405 406 return (0); 407 } 408 409 /* 410 * Given mounted "dvp" and "exi", go upper mountpoint 411 * with dvp/exi correction 412 * Return 0 in success 413 */ 414 int 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr) 416 { 417 struct exportinfo *exi; 418 vnode_t *dvp = *dvpp; 419 vnode_t *zone_rootvp; 420 421 zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp; 422 ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp)); 423 424 VN_HOLD(dvp); 425 dvp = untraverse(dvp, zone_rootvp); 426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE); 427 if (exi == NULL) { 428 VN_RELE(dvp); 429 return (-1); 430 } 431 432 ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid); 433 exi_rele(*exip); 434 *exip = exi; 435 VN_RELE(*dvpp); 436 *dvpp = dvp; 437 438 return (0); 439 } 440 /* 441 * Directory lookup. 442 * Returns an fhandle and file attributes for file name in a directory. 443 */ 444 /* ARGSUSED */ 445 void 446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 448 { 449 int error; 450 vnode_t *dvp; 451 vnode_t *vp; 452 struct vattr va; 453 fhandle_t *fhp = da->da_fhandle; 454 struct sec_ol sec = {0, 0}; 455 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 456 char *name; 457 struct sockaddr *ca; 458 459 /* 460 * Trusted Extension doesn't support NFSv2. MOUNT 461 * will reject v2 clients. Need to prevent v2 client 462 * access via WebNFS here. 463 */ 464 if (is_system_labeled() && req->rq_vers == 2) { 465 dr->dr_status = NFSERR_ACCES; 466 return; 467 } 468 469 /* 470 * Disallow NULL paths 471 */ 472 if (da->da_name == NULL || *da->da_name == '\0') { 473 dr->dr_status = NFSERR_ACCES; 474 return; 475 } 476 477 /* 478 * Allow lookups from the root - the default 479 * location of the public filehandle. 480 */ 481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 482 dvp = ZONE_ROOTVP(); 483 VN_HOLD(dvp); 484 } else { 485 dvp = nfs_fhtovp(fhp, exi); 486 if (dvp == NULL) { 487 dr->dr_status = NFSERR_STALE; 488 return; 489 } 490 } 491 492 exi_hold(exi); 493 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); 494 495 /* 496 * Not allow lookup beyond root. 497 * If the filehandle matches a filehandle of the exi, 498 * then the ".." refers beyond the root of an exported filesystem. 499 */ 500 if (strcmp(da->da_name, "..") == 0 && 501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 502 if ((exi->exi_export.ex_flags & EX_NOHIDE) && 503 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) { 504 /* 505 * special case for ".." and 'nohide'exported root 506 */ 507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) { 508 error = NFSERR_ACCES; 509 goto out; 510 } 511 } else { 512 error = NFSERR_NOENT; 513 goto out; 514 } 515 } 516 517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 519 MAXPATHLEN); 520 521 if (name == NULL) { 522 error = NFSERR_ACCES; 523 goto out; 524 } 525 526 /* 527 * If the public filehandle is used then allow 528 * a multi-component lookup, i.e. evaluate 529 * a pathname and follow symbolic links if 530 * necessary. 531 * 532 * This may result in a vnode in another filesystem 533 * which is OK as long as the filesystem is exported. 534 */ 535 if (PUBLIC_FH2(fhp)) { 536 publicfh_flag = TRUE; 537 538 exi_rele(exi); 539 exi = NULL; 540 541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 542 &sec); 543 } else { 544 /* 545 * Do a normal single component lookup. 546 */ 547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 548 NULL, NULL, NULL); 549 } 550 551 if (name != da->da_name) 552 kmem_free(name, MAXPATHLEN); 553 554 if (error == 0 && vn_ismntpt(vp)) { 555 error = rfs_cross_mnt(&vp, &exi); 556 if (error) 557 VN_RELE(vp); 558 } 559 560 if (!error) { 561 va.va_mask = AT_ALL; /* we want everything */ 562 563 error = rfs4_delegated_getattr(vp, &va, 0, cr); 564 565 /* check for overflows */ 566 if (!error) { 567 acl_perm(vp, exi, &va, cr); 568 error = vattr_to_nattr(&va, &dr->dr_attr); 569 if (!error) { 570 if (sec.sec_flags & SEC_QUERY) 571 error = makefh_ol(&dr->dr_fhandle, exi, 572 sec.sec_index); 573 else { 574 error = makefh(&dr->dr_fhandle, vp, 575 exi); 576 if (!error && publicfh_flag && 577 !chk_clnt_sec(exi, req)) 578 auth_weak = TRUE; 579 } 580 } 581 } 582 VN_RELE(vp); 583 } 584 585 out: 586 VN_RELE(dvp); 587 588 if (exi != NULL) 589 exi_rele(exi); 590 591 /* 592 * If it's public fh, no 0x81, and client's flavor is 593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 595 */ 596 if (auth_weak) 597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 598 else 599 dr->dr_status = puterrno(error); 600 } 601 void * 602 rfs_lookup_getfh(struct nfsdiropargs *da) 603 { 604 return (da->da_fhandle); 605 } 606 607 /* 608 * Read symbolic link. 609 * Returns the string in the symbolic link at the given fhandle. 610 */ 611 /* ARGSUSED */ 612 void 613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 614 struct svc_req *req, cred_t *cr, bool_t ro) 615 { 616 int error; 617 struct iovec iov; 618 struct uio uio; 619 vnode_t *vp; 620 struct vattr va; 621 struct sockaddr *ca; 622 char *name = NULL; 623 int is_referral = 0; 624 625 vp = nfs_fhtovp(fhp, exi); 626 if (vp == NULL) { 627 rl->rl_data = NULL; 628 rl->rl_status = NFSERR_STALE; 629 return; 630 } 631 632 va.va_mask = AT_MODE; 633 634 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 635 636 if (error) { 637 VN_RELE(vp); 638 rl->rl_data = NULL; 639 rl->rl_status = puterrno(error); 640 return; 641 } 642 643 if (MANDLOCK(vp, va.va_mode)) { 644 VN_RELE(vp); 645 rl->rl_data = NULL; 646 rl->rl_status = NFSERR_ACCES; 647 return; 648 } 649 650 /* We lied about the object type for a referral */ 651 if (vn_is_nfs_reparse(vp, cr)) 652 is_referral = 1; 653 654 /* 655 * XNFS and RFC1094 require us to return ENXIO if argument 656 * is not a link. BUGID 1138002. 657 */ 658 if (vp->v_type != VLNK && !is_referral) { 659 VN_RELE(vp); 660 rl->rl_data = NULL; 661 rl->rl_status = NFSERR_NXIO; 662 return; 663 } 664 665 /* 666 * Allocate data for pathname. This will be freed by rfs_rlfree. 667 */ 668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 669 670 if (is_referral) { 671 char *s; 672 size_t strsz; 673 kstat_named_t *stat = 674 exi->exi_ne->ne_globals->svstat[NFS_VERSION]; 675 676 /* Get an artificial symlink based on a referral */ 677 s = build_symlink(vp, cr, &strsz); 678 stat[NFS_REFERLINKS].value.ui64++; 679 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 680 vnode_t *, vp, char *, s); 681 if (s == NULL) 682 error = EINVAL; 683 else { 684 error = 0; 685 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 686 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 687 kmem_free(s, strsz); 688 } 689 690 } else { 691 692 /* 693 * Set up io vector to read sym link data 694 */ 695 iov.iov_base = rl->rl_data; 696 iov.iov_len = NFS_MAXPATHLEN; 697 uio.uio_iov = &iov; 698 uio.uio_iovcnt = 1; 699 uio.uio_segflg = UIO_SYSSPACE; 700 uio.uio_extflg = UIO_COPY_CACHED; 701 uio.uio_loffset = (offset_t)0; 702 uio.uio_resid = NFS_MAXPATHLEN; 703 704 /* 705 * Do the readlink. 706 */ 707 error = VOP_READLINK(vp, &uio, cr, NULL); 708 709 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 710 711 if (!error) 712 rl->rl_data[rl->rl_count] = '\0'; 713 714 } 715 716 717 VN_RELE(vp); 718 719 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 720 name = nfscmd_convname(ca, exi, rl->rl_data, 721 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 722 723 if (name != NULL && name != rl->rl_data) { 724 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 725 rl->rl_data = name; 726 } 727 728 /* 729 * XNFS and RFC1094 require us to return ENXIO if argument 730 * is not a link. UFS returns EINVAL if this is the case, 731 * so we do the mapping here. BUGID 1138002. 732 */ 733 if (error == EINVAL) 734 rl->rl_status = NFSERR_NXIO; 735 else 736 rl->rl_status = puterrno(error); 737 738 } 739 void * 740 rfs_readlink_getfh(fhandle_t *fhp) 741 { 742 return (fhp); 743 } 744 /* 745 * Free data allocated by rfs_readlink 746 */ 747 void 748 rfs_rlfree(struct nfsrdlnres *rl) 749 { 750 if (rl->rl_data != NULL) 751 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 752 } 753 754 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 755 756 /* 757 * Read data. 758 * Returns some data read from the file at the given fhandle. 759 */ 760 /* ARGSUSED */ 761 void 762 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 763 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 764 { 765 vnode_t *vp; 766 int error; 767 struct vattr va; 768 struct iovec iov; 769 struct uio uio; 770 mblk_t *mp; 771 int alloc_err = 0; 772 int in_crit = 0; 773 caller_context_t ct; 774 775 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 776 if (vp == NULL) { 777 rr->rr_data = NULL; 778 rr->rr_status = NFSERR_STALE; 779 return; 780 } 781 782 if (vp->v_type != VREG) { 783 VN_RELE(vp); 784 rr->rr_data = NULL; 785 rr->rr_status = NFSERR_ISDIR; 786 return; 787 } 788 789 ct.cc_sysid = 0; 790 ct.cc_pid = 0; 791 ct.cc_caller_id = nfs2_srv_caller_id; 792 ct.cc_flags = CC_DONTBLOCK; 793 794 /* 795 * Enter the critical region before calling VOP_RWLOCK 796 * to avoid a deadlock with write requests. 797 */ 798 if (nbl_need_check(vp)) { 799 nbl_start_crit(vp, RW_READER); 800 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 801 0, NULL)) { 802 nbl_end_crit(vp); 803 VN_RELE(vp); 804 rr->rr_data = NULL; 805 rr->rr_status = NFSERR_ACCES; 806 return; 807 } 808 in_crit = 1; 809 } 810 811 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 812 813 /* check if a monitor detected a delegation conflict */ 814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 815 if (in_crit) 816 nbl_end_crit(vp); 817 VN_RELE(vp); 818 /* mark as wouldblock so response is dropped */ 819 curthread->t_flag |= T_WOULDBLOCK; 820 821 rr->rr_data = NULL; 822 return; 823 } 824 825 va.va_mask = AT_ALL; 826 827 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 828 829 if (error) { 830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 831 if (in_crit) 832 nbl_end_crit(vp); 833 834 VN_RELE(vp); 835 rr->rr_data = NULL; 836 rr->rr_status = puterrno(error); 837 838 return; 839 } 840 841 /* 842 * This is a kludge to allow reading of files created 843 * with no read permission. The owner of the file 844 * is always allowed to read it. 845 */ 846 if (crgetuid(cr) != va.va_uid) { 847 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 848 849 if (error) { 850 /* 851 * Exec is the same as read over the net because 852 * of demand loading. 853 */ 854 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 855 } 856 if (error) { 857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 858 if (in_crit) 859 nbl_end_crit(vp); 860 VN_RELE(vp); 861 rr->rr_data = NULL; 862 rr->rr_status = puterrno(error); 863 864 return; 865 } 866 } 867 868 if (MANDLOCK(vp, va.va_mode)) { 869 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 870 if (in_crit) 871 nbl_end_crit(vp); 872 873 VN_RELE(vp); 874 rr->rr_data = NULL; 875 rr->rr_status = NFSERR_ACCES; 876 877 return; 878 } 879 880 rr->rr_ok.rrok_wlist_len = 0; 881 rr->rr_ok.rrok_wlist = NULL; 882 883 if ((u_offset_t)ra->ra_offset >= va.va_size) { 884 rr->rr_count = 0; 885 rr->rr_data = NULL; 886 /* 887 * In this case, status is NFS_OK, but there is no data 888 * to encode. So set rr_mp to NULL. 889 */ 890 rr->rr_mp = NULL; 891 rr->rr_ok.rrok_wlist = ra->ra_wlist; 892 if (rr->rr_ok.rrok_wlist) 893 clist_zero_len(rr->rr_ok.rrok_wlist); 894 goto done; 895 } 896 897 if (ra->ra_wlist) { 898 mp = NULL; 899 rr->rr_mp = NULL; 900 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 901 if (ra->ra_count > iov.iov_len) { 902 rr->rr_data = NULL; 903 rr->rr_status = NFSERR_INVAL; 904 goto done; 905 } 906 } else { 907 /* 908 * mp will contain the data to be sent out in the read reply. 909 * This will be freed after the reply has been sent out (by the 910 * driver). 911 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 912 * that the call to xdrmblk_putmblk() never fails. 913 */ 914 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 915 &alloc_err); 916 ASSERT(mp != NULL); 917 ASSERT(alloc_err == 0); 918 919 rr->rr_mp = mp; 920 921 /* 922 * Set up io vector 923 */ 924 iov.iov_base = (caddr_t)mp->b_datap->db_base; 925 iov.iov_len = ra->ra_count; 926 } 927 928 uio.uio_iov = &iov; 929 uio.uio_iovcnt = 1; 930 uio.uio_segflg = UIO_SYSSPACE; 931 uio.uio_extflg = UIO_COPY_CACHED; 932 uio.uio_loffset = (offset_t)ra->ra_offset; 933 uio.uio_resid = ra->ra_count; 934 935 error = VOP_READ(vp, &uio, 0, cr, &ct); 936 937 if (error) { 938 if (mp) 939 freeb(mp); 940 941 /* 942 * check if a monitor detected a delegation conflict and 943 * mark as wouldblock so response is dropped 944 */ 945 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 946 curthread->t_flag |= T_WOULDBLOCK; 947 else 948 rr->rr_status = puterrno(error); 949 950 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 951 if (in_crit) 952 nbl_end_crit(vp); 953 954 VN_RELE(vp); 955 rr->rr_data = NULL; 956 957 return; 958 } 959 960 /* 961 * Get attributes again so we can send the latest access 962 * time to the client side for its cache. 963 */ 964 va.va_mask = AT_ALL; 965 966 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 967 968 if (error) { 969 if (mp) 970 freeb(mp); 971 972 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 973 if (in_crit) 974 nbl_end_crit(vp); 975 976 VN_RELE(vp); 977 rr->rr_data = NULL; 978 rr->rr_status = puterrno(error); 979 980 return; 981 } 982 983 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 984 985 if (mp) { 986 rr->rr_data = (char *)mp->b_datap->db_base; 987 } else { 988 if (ra->ra_wlist) { 989 rr->rr_data = (caddr_t)iov.iov_base; 990 if (!rdma_setup_read_data2(ra, rr)) { 991 rr->rr_data = NULL; 992 rr->rr_status = puterrno(NFSERR_INVAL); 993 } 994 } 995 } 996 done: 997 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 998 if (in_crit) 999 nbl_end_crit(vp); 1000 1001 acl_perm(vp, exi, &va, cr); 1002 1003 /* check for overflows */ 1004 error = vattr_to_nattr(&va, &rr->rr_attr); 1005 1006 VN_RELE(vp); 1007 1008 rr->rr_status = puterrno(error); 1009 } 1010 1011 /* 1012 * Free data allocated by rfs_read 1013 */ 1014 void 1015 rfs_rdfree(struct nfsrdresult *rr) 1016 { 1017 mblk_t *mp; 1018 1019 if (rr->rr_status == NFS_OK) { 1020 mp = rr->rr_mp; 1021 if (mp != NULL) 1022 freeb(mp); 1023 } 1024 } 1025 1026 void * 1027 rfs_read_getfh(struct nfsreadargs *ra) 1028 { 1029 return (&ra->ra_fhandle); 1030 } 1031 1032 #define MAX_IOVECS 12 1033 1034 #ifdef DEBUG 1035 static int rfs_write_sync_hits = 0; 1036 static int rfs_write_sync_misses = 0; 1037 #endif 1038 1039 /* 1040 * Write data to file. 1041 * Returns attributes of a file after writing some data to it. 1042 * 1043 * Any changes made here, especially in error handling might have 1044 * to also be done in rfs_write (which clusters write requests). 1045 */ 1046 /* ARGSUSED */ 1047 void 1048 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 1049 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1050 { 1051 int error; 1052 vnode_t *vp; 1053 rlim64_t rlimit; 1054 struct vattr va; 1055 struct uio uio; 1056 struct iovec iov[MAX_IOVECS]; 1057 mblk_t *m; 1058 struct iovec *iovp; 1059 int iovcnt; 1060 cred_t *savecred; 1061 int in_crit = 0; 1062 caller_context_t ct; 1063 1064 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1065 if (vp == NULL) { 1066 ns->ns_status = NFSERR_STALE; 1067 return; 1068 } 1069 1070 if (rdonly(ro, vp)) { 1071 VN_RELE(vp); 1072 ns->ns_status = NFSERR_ROFS; 1073 return; 1074 } 1075 1076 if (vp->v_type != VREG) { 1077 VN_RELE(vp); 1078 ns->ns_status = NFSERR_ISDIR; 1079 return; 1080 } 1081 1082 ct.cc_sysid = 0; 1083 ct.cc_pid = 0; 1084 ct.cc_caller_id = nfs2_srv_caller_id; 1085 ct.cc_flags = CC_DONTBLOCK; 1086 1087 va.va_mask = AT_UID|AT_MODE; 1088 1089 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1090 1091 if (error) { 1092 VN_RELE(vp); 1093 ns->ns_status = puterrno(error); 1094 1095 return; 1096 } 1097 1098 if (crgetuid(cr) != va.va_uid) { 1099 /* 1100 * This is a kludge to allow writes of files created 1101 * with read only permission. The owner of the file 1102 * is always allowed to write it. 1103 */ 1104 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 1105 1106 if (error) { 1107 VN_RELE(vp); 1108 ns->ns_status = puterrno(error); 1109 return; 1110 } 1111 } 1112 1113 /* 1114 * Can't access a mandatory lock file. This might cause 1115 * the NFS service thread to block forever waiting for a 1116 * lock to be released that will never be released. 1117 */ 1118 if (MANDLOCK(vp, va.va_mode)) { 1119 VN_RELE(vp); 1120 ns->ns_status = NFSERR_ACCES; 1121 return; 1122 } 1123 1124 /* 1125 * We have to enter the critical region before calling VOP_RWLOCK 1126 * to avoid a deadlock with ufs. 1127 */ 1128 if (nbl_need_check(vp)) { 1129 nbl_start_crit(vp, RW_READER); 1130 in_crit = 1; 1131 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1132 wa->wa_count, 0, NULL)) { 1133 error = EACCES; 1134 goto out; 1135 } 1136 } 1137 1138 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1139 1140 /* check if a monitor detected a delegation conflict */ 1141 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1142 goto out; 1143 } 1144 1145 if (wa->wa_data || wa->wa_rlist) { 1146 /* Do the RDMA thing if necessary */ 1147 if (wa->wa_rlist) { 1148 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1149 iov[0].iov_len = wa->wa_count; 1150 } else { 1151 iov[0].iov_base = wa->wa_data; 1152 iov[0].iov_len = wa->wa_count; 1153 } 1154 uio.uio_iov = iov; 1155 uio.uio_iovcnt = 1; 1156 uio.uio_segflg = UIO_SYSSPACE; 1157 uio.uio_extflg = UIO_COPY_DEFAULT; 1158 uio.uio_loffset = (offset_t)wa->wa_offset; 1159 uio.uio_resid = wa->wa_count; 1160 /* 1161 * The limit is checked on the client. We 1162 * should allow any size writes here. 1163 */ 1164 uio.uio_llimit = curproc->p_fsz_ctl; 1165 rlimit = uio.uio_llimit - wa->wa_offset; 1166 if (rlimit < (rlim64_t)uio.uio_resid) 1167 uio.uio_resid = (uint_t)rlimit; 1168 1169 /* 1170 * for now we assume no append mode 1171 */ 1172 /* 1173 * We're changing creds because VM may fault and we need 1174 * the cred of the current thread to be used if quota 1175 * checking is enabled. 1176 */ 1177 savecred = curthread->t_cred; 1178 curthread->t_cred = cr; 1179 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1180 curthread->t_cred = savecred; 1181 } else { 1182 1183 iovcnt = 0; 1184 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1185 iovcnt++; 1186 if (iovcnt <= MAX_IOVECS) { 1187 #ifdef DEBUG 1188 rfs_write_sync_hits++; 1189 #endif 1190 iovp = iov; 1191 } else { 1192 #ifdef DEBUG 1193 rfs_write_sync_misses++; 1194 #endif 1195 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1196 } 1197 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1198 uio.uio_iov = iovp; 1199 uio.uio_iovcnt = iovcnt; 1200 uio.uio_segflg = UIO_SYSSPACE; 1201 uio.uio_extflg = UIO_COPY_DEFAULT; 1202 uio.uio_loffset = (offset_t)wa->wa_offset; 1203 uio.uio_resid = wa->wa_count; 1204 /* 1205 * The limit is checked on the client. We 1206 * should allow any size writes here. 1207 */ 1208 uio.uio_llimit = curproc->p_fsz_ctl; 1209 rlimit = uio.uio_llimit - wa->wa_offset; 1210 if (rlimit < (rlim64_t)uio.uio_resid) 1211 uio.uio_resid = (uint_t)rlimit; 1212 1213 /* 1214 * For now we assume no append mode. 1215 */ 1216 /* 1217 * We're changing creds because VM may fault and we need 1218 * the cred of the current thread to be used if quota 1219 * checking is enabled. 1220 */ 1221 savecred = curthread->t_cred; 1222 curthread->t_cred = cr; 1223 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1224 curthread->t_cred = savecred; 1225 1226 if (iovp != iov) 1227 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1228 } 1229 1230 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1231 1232 if (!error) { 1233 /* 1234 * Get attributes again so we send the latest mod 1235 * time to the client side for its cache. 1236 */ 1237 va.va_mask = AT_ALL; /* now we want everything */ 1238 1239 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1240 1241 /* check for overflows */ 1242 if (!error) { 1243 acl_perm(vp, exi, &va, cr); 1244 error = vattr_to_nattr(&va, &ns->ns_attr); 1245 } 1246 } 1247 1248 out: 1249 if (in_crit) 1250 nbl_end_crit(vp); 1251 VN_RELE(vp); 1252 1253 /* check if a monitor detected a delegation conflict */ 1254 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1255 /* mark as wouldblock so response is dropped */ 1256 curthread->t_flag |= T_WOULDBLOCK; 1257 else 1258 ns->ns_status = puterrno(error); 1259 1260 } 1261 1262 struct rfs_async_write { 1263 struct nfswriteargs *wa; 1264 struct nfsattrstat *ns; 1265 struct svc_req *req; 1266 cred_t *cr; 1267 bool_t ro; 1268 kthread_t *thread; 1269 struct rfs_async_write *list; 1270 }; 1271 1272 struct rfs_async_write_list { 1273 fhandle_t *fhp; 1274 kcondvar_t cv; 1275 struct rfs_async_write *list; 1276 struct rfs_async_write_list *next; 1277 }; 1278 1279 #define MAXCLIOVECS 42 1280 #define RFSWRITE_INITVAL (enum nfsstat) -1 1281 1282 #ifdef DEBUG 1283 static int rfs_write_hits = 0; 1284 static int rfs_write_misses = 0; 1285 #endif 1286 1287 /* 1288 * Write data to file. 1289 * Returns attributes of a file after writing some data to it. 1290 */ 1291 void 1292 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1293 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1294 { 1295 int error; 1296 vnode_t *vp; 1297 rlim64_t rlimit; 1298 struct vattr va; 1299 struct uio uio; 1300 struct rfs_async_write_list *lp; 1301 struct rfs_async_write_list *nlp; 1302 struct rfs_async_write *rp; 1303 struct rfs_async_write *nrp; 1304 struct rfs_async_write *trp; 1305 struct rfs_async_write *lrp; 1306 int data_written; 1307 int iovcnt; 1308 mblk_t *m; 1309 struct iovec *iovp; 1310 struct iovec *niovp; 1311 struct iovec iov[MAXCLIOVECS]; 1312 int count; 1313 int rcount; 1314 uint_t off; 1315 uint_t len; 1316 struct rfs_async_write nrpsp; 1317 struct rfs_async_write_list nlpsp; 1318 ushort_t t_flag; 1319 cred_t *savecred; 1320 int in_crit = 0; 1321 caller_context_t ct; 1322 nfs_srv_t *nsrv; 1323 1324 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id); 1325 nsrv = nfs_get_srv(); 1326 if (!nsrv->write_async) { 1327 rfs_write_sync(wa, ns, exi, req, cr, ro); 1328 return; 1329 } 1330 1331 /* 1332 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1333 * is considered an OK. 1334 */ 1335 ns->ns_status = RFSWRITE_INITVAL; 1336 1337 nrp = &nrpsp; 1338 nrp->wa = wa; 1339 nrp->ns = ns; 1340 nrp->req = req; 1341 nrp->cr = cr; 1342 nrp->ro = ro; 1343 nrp->thread = curthread; 1344 1345 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1346 1347 /* 1348 * Look to see if there is already a cluster started 1349 * for this file. 1350 */ 1351 mutex_enter(&nsrv->async_write_lock); 1352 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) { 1353 if (bcmp(&wa->wa_fhandle, lp->fhp, 1354 sizeof (fhandle_t)) == 0) 1355 break; 1356 } 1357 1358 /* 1359 * If lp is non-NULL, then there is already a cluster 1360 * started. We need to place ourselves in the cluster 1361 * list in the right place as determined by starting 1362 * offset. Conflicts with non-blocking mandatory locked 1363 * regions will be checked when the cluster is processed. 1364 */ 1365 if (lp != NULL) { 1366 rp = lp->list; 1367 trp = NULL; 1368 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1369 trp = rp; 1370 rp = rp->list; 1371 } 1372 nrp->list = rp; 1373 if (trp == NULL) 1374 lp->list = nrp; 1375 else 1376 trp->list = nrp; 1377 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1378 cv_wait(&lp->cv, &nsrv->async_write_lock); 1379 mutex_exit(&nsrv->async_write_lock); 1380 1381 return; 1382 } 1383 1384 /* 1385 * No cluster started yet, start one and add ourselves 1386 * to the list of clusters. 1387 */ 1388 nrp->list = NULL; 1389 1390 nlp = &nlpsp; 1391 nlp->fhp = &wa->wa_fhandle; 1392 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1393 nlp->list = nrp; 1394 nlp->next = NULL; 1395 1396 if (nsrv->async_write_head == NULL) { 1397 nsrv->async_write_head = nlp; 1398 } else { 1399 lp = nsrv->async_write_head; 1400 while (lp->next != NULL) 1401 lp = lp->next; 1402 lp->next = nlp; 1403 } 1404 mutex_exit(&nsrv->async_write_lock); 1405 1406 /* 1407 * Convert the file handle common to all of the requests 1408 * in this cluster to a vnode. 1409 */ 1410 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1411 if (vp == NULL) { 1412 mutex_enter(&nsrv->async_write_lock); 1413 if (nsrv->async_write_head == nlp) 1414 nsrv->async_write_head = nlp->next; 1415 else { 1416 lp = nsrv->async_write_head; 1417 while (lp->next != nlp) 1418 lp = lp->next; 1419 lp->next = nlp->next; 1420 } 1421 t_flag = curthread->t_flag & T_WOULDBLOCK; 1422 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1423 rp->ns->ns_status = NFSERR_STALE; 1424 rp->thread->t_flag |= t_flag; 1425 } 1426 cv_broadcast(&nlp->cv); 1427 mutex_exit(&nsrv->async_write_lock); 1428 1429 return; 1430 } 1431 1432 /* 1433 * Can only write regular files. Attempts to write any 1434 * other file types fail with EISDIR. 1435 */ 1436 if (vp->v_type != VREG) { 1437 VN_RELE(vp); 1438 mutex_enter(&nsrv->async_write_lock); 1439 if (nsrv->async_write_head == nlp) 1440 nsrv->async_write_head = nlp->next; 1441 else { 1442 lp = nsrv->async_write_head; 1443 while (lp->next != nlp) 1444 lp = lp->next; 1445 lp->next = nlp->next; 1446 } 1447 t_flag = curthread->t_flag & T_WOULDBLOCK; 1448 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1449 rp->ns->ns_status = NFSERR_ISDIR; 1450 rp->thread->t_flag |= t_flag; 1451 } 1452 cv_broadcast(&nlp->cv); 1453 mutex_exit(&nsrv->async_write_lock); 1454 1455 return; 1456 } 1457 1458 /* 1459 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1460 * deadlock with ufs. 1461 */ 1462 if (nbl_need_check(vp)) { 1463 nbl_start_crit(vp, RW_READER); 1464 in_crit = 1; 1465 } 1466 1467 ct.cc_sysid = 0; 1468 ct.cc_pid = 0; 1469 ct.cc_caller_id = nfs2_srv_caller_id; 1470 ct.cc_flags = CC_DONTBLOCK; 1471 1472 /* 1473 * Lock the file for writing. This operation provides 1474 * the delay which allows clusters to grow. 1475 */ 1476 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1477 1478 /* check if a monitor detected a delegation conflict */ 1479 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1480 if (in_crit) 1481 nbl_end_crit(vp); 1482 VN_RELE(vp); 1483 /* mark as wouldblock so response is dropped */ 1484 curthread->t_flag |= T_WOULDBLOCK; 1485 mutex_enter(&nsrv->async_write_lock); 1486 if (nsrv->async_write_head == nlp) 1487 nsrv->async_write_head = nlp->next; 1488 else { 1489 lp = nsrv->async_write_head; 1490 while (lp->next != nlp) 1491 lp = lp->next; 1492 lp->next = nlp->next; 1493 } 1494 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1495 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1496 rp->ns->ns_status = puterrno(error); 1497 rp->thread->t_flag |= T_WOULDBLOCK; 1498 } 1499 } 1500 cv_broadcast(&nlp->cv); 1501 mutex_exit(&nsrv->async_write_lock); 1502 1503 return; 1504 } 1505 1506 /* 1507 * Disconnect this cluster from the list of clusters. 1508 * The cluster that is being dealt with must be fixed 1509 * in size after this point, so there is no reason 1510 * to leave it on the list so that new requests can 1511 * find it. 1512 * 1513 * The algorithm is that the first write request will 1514 * create a cluster, convert the file handle to a 1515 * vnode pointer, and then lock the file for writing. 1516 * This request is not likely to be clustered with 1517 * any others. However, the next request will create 1518 * a new cluster and be blocked in VOP_RWLOCK while 1519 * the first request is being processed. This delay 1520 * will allow more requests to be clustered in this 1521 * second cluster. 1522 */ 1523 mutex_enter(&nsrv->async_write_lock); 1524 if (nsrv->async_write_head == nlp) 1525 nsrv->async_write_head = nlp->next; 1526 else { 1527 lp = nsrv->async_write_head; 1528 while (lp->next != nlp) 1529 lp = lp->next; 1530 lp->next = nlp->next; 1531 } 1532 mutex_exit(&nsrv->async_write_lock); 1533 1534 /* 1535 * Step through the list of requests in this cluster. 1536 * We need to check permissions to make sure that all 1537 * of the requests have sufficient permission to write 1538 * the file. A cluster can be composed of requests 1539 * from different clients and different users on each 1540 * client. 1541 * 1542 * As a side effect, we also calculate the size of the 1543 * byte range that this cluster encompasses. 1544 */ 1545 rp = nlp->list; 1546 off = rp->wa->wa_offset; 1547 len = (uint_t)0; 1548 do { 1549 if (rdonly(rp->ro, vp)) { 1550 rp->ns->ns_status = NFSERR_ROFS; 1551 t_flag = curthread->t_flag & T_WOULDBLOCK; 1552 rp->thread->t_flag |= t_flag; 1553 continue; 1554 } 1555 1556 va.va_mask = AT_UID|AT_MODE; 1557 1558 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1559 1560 if (!error) { 1561 if (crgetuid(rp->cr) != va.va_uid) { 1562 /* 1563 * This is a kludge to allow writes of files 1564 * created with read only permission. The 1565 * owner of the file is always allowed to 1566 * write it. 1567 */ 1568 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1569 } 1570 if (!error && MANDLOCK(vp, va.va_mode)) 1571 error = EACCES; 1572 } 1573 1574 /* 1575 * Check for a conflict with a nbmand-locked region. 1576 */ 1577 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1578 rp->wa->wa_count, 0, NULL)) { 1579 error = EACCES; 1580 } 1581 1582 if (error) { 1583 rp->ns->ns_status = puterrno(error); 1584 t_flag = curthread->t_flag & T_WOULDBLOCK; 1585 rp->thread->t_flag |= t_flag; 1586 continue; 1587 } 1588 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1589 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1590 } while ((rp = rp->list) != NULL); 1591 1592 /* 1593 * Step through the cluster attempting to gather as many 1594 * requests which are contiguous as possible. These 1595 * contiguous requests are handled via one call to VOP_WRITE 1596 * instead of different calls to VOP_WRITE. We also keep 1597 * track of the fact that any data was written. 1598 */ 1599 rp = nlp->list; 1600 data_written = 0; 1601 do { 1602 /* 1603 * Skip any requests which are already marked as having an 1604 * error. 1605 */ 1606 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1607 rp = rp->list; 1608 continue; 1609 } 1610 1611 /* 1612 * Count the number of iovec's which are required 1613 * to handle this set of requests. One iovec is 1614 * needed for each data buffer, whether addressed 1615 * by wa_data or by the b_rptr pointers in the 1616 * mblk chains. 1617 */ 1618 iovcnt = 0; 1619 lrp = rp; 1620 for (;;) { 1621 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1622 iovcnt++; 1623 else { 1624 m = lrp->wa->wa_mblk; 1625 while (m != NULL) { 1626 iovcnt++; 1627 m = m->b_cont; 1628 } 1629 } 1630 if (lrp->list == NULL || 1631 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1632 lrp->wa->wa_offset + lrp->wa->wa_count != 1633 lrp->list->wa->wa_offset) { 1634 lrp = lrp->list; 1635 break; 1636 } 1637 lrp = lrp->list; 1638 } 1639 1640 if (iovcnt <= MAXCLIOVECS) { 1641 #ifdef DEBUG 1642 rfs_write_hits++; 1643 #endif 1644 niovp = iov; 1645 } else { 1646 #ifdef DEBUG 1647 rfs_write_misses++; 1648 #endif 1649 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1650 } 1651 /* 1652 * Put together the scatter/gather iovecs. 1653 */ 1654 iovp = niovp; 1655 trp = rp; 1656 count = 0; 1657 do { 1658 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1659 if (trp->wa->wa_rlist) { 1660 iovp->iov_base = 1661 (char *)((trp->wa->wa_rlist)-> 1662 u.c_daddr3); 1663 iovp->iov_len = trp->wa->wa_count; 1664 } else { 1665 iovp->iov_base = trp->wa->wa_data; 1666 iovp->iov_len = trp->wa->wa_count; 1667 } 1668 iovp++; 1669 } else { 1670 m = trp->wa->wa_mblk; 1671 rcount = trp->wa->wa_count; 1672 while (m != NULL) { 1673 iovp->iov_base = (caddr_t)m->b_rptr; 1674 iovp->iov_len = (m->b_wptr - m->b_rptr); 1675 rcount -= iovp->iov_len; 1676 if (rcount < 0) 1677 iovp->iov_len += rcount; 1678 iovp++; 1679 if (rcount <= 0) 1680 break; 1681 m = m->b_cont; 1682 } 1683 } 1684 count += trp->wa->wa_count; 1685 trp = trp->list; 1686 } while (trp != lrp); 1687 1688 uio.uio_iov = niovp; 1689 uio.uio_iovcnt = iovcnt; 1690 uio.uio_segflg = UIO_SYSSPACE; 1691 uio.uio_extflg = UIO_COPY_DEFAULT; 1692 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1693 uio.uio_resid = count; 1694 /* 1695 * The limit is checked on the client. We 1696 * should allow any size writes here. 1697 */ 1698 uio.uio_llimit = curproc->p_fsz_ctl; 1699 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1700 if (rlimit < (rlim64_t)uio.uio_resid) 1701 uio.uio_resid = (uint_t)rlimit; 1702 1703 /* 1704 * For now we assume no append mode. 1705 */ 1706 1707 /* 1708 * We're changing creds because VM may fault 1709 * and we need the cred of the current 1710 * thread to be used if quota * checking is 1711 * enabled. 1712 */ 1713 savecred = curthread->t_cred; 1714 curthread->t_cred = cr; 1715 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1716 curthread->t_cred = savecred; 1717 1718 /* check if a monitor detected a delegation conflict */ 1719 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1720 /* mark as wouldblock so response is dropped */ 1721 curthread->t_flag |= T_WOULDBLOCK; 1722 1723 if (niovp != iov) 1724 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1725 1726 if (!error) { 1727 data_written = 1; 1728 /* 1729 * Get attributes again so we send the latest mod 1730 * time to the client side for its cache. 1731 */ 1732 va.va_mask = AT_ALL; /* now we want everything */ 1733 1734 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1735 1736 if (!error) 1737 acl_perm(vp, exi, &va, rp->cr); 1738 } 1739 1740 /* 1741 * Fill in the status responses for each request 1742 * which was just handled. Also, copy the latest 1743 * attributes in to the attribute responses if 1744 * appropriate. 1745 */ 1746 t_flag = curthread->t_flag & T_WOULDBLOCK; 1747 do { 1748 rp->thread->t_flag |= t_flag; 1749 /* check for overflows */ 1750 if (!error) { 1751 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1752 } 1753 rp->ns->ns_status = puterrno(error); 1754 rp = rp->list; 1755 } while (rp != lrp); 1756 } while (rp != NULL); 1757 1758 /* 1759 * If any data was written at all, then we need to flush 1760 * the data and metadata to stable storage. 1761 */ 1762 if (data_written) { 1763 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1764 1765 if (!error) { 1766 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1767 } 1768 } 1769 1770 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1771 1772 if (in_crit) 1773 nbl_end_crit(vp); 1774 VN_RELE(vp); 1775 1776 t_flag = curthread->t_flag & T_WOULDBLOCK; 1777 mutex_enter(&nsrv->async_write_lock); 1778 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1779 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1780 rp->ns->ns_status = puterrno(error); 1781 rp->thread->t_flag |= t_flag; 1782 } 1783 } 1784 cv_broadcast(&nlp->cv); 1785 mutex_exit(&nsrv->async_write_lock); 1786 1787 } 1788 1789 void * 1790 rfs_write_getfh(struct nfswriteargs *wa) 1791 { 1792 return (&wa->wa_fhandle); 1793 } 1794 1795 /* 1796 * Create a file. 1797 * Creates a file with given attributes and returns those attributes 1798 * and an fhandle for the new file. 1799 */ 1800 void 1801 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1802 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1803 { 1804 int error; 1805 int lookuperr; 1806 int in_crit = 0; 1807 struct vattr va; 1808 vnode_t *vp; 1809 vnode_t *realvp; 1810 vnode_t *dvp; 1811 char *name = args->ca_da.da_name; 1812 vnode_t *tvp = NULL; 1813 int mode; 1814 int lookup_ok; 1815 bool_t trunc; 1816 struct sockaddr *ca; 1817 1818 /* 1819 * Disallow NULL paths 1820 */ 1821 if (name == NULL || *name == '\0') { 1822 dr->dr_status = NFSERR_ACCES; 1823 return; 1824 } 1825 1826 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1827 if (dvp == NULL) { 1828 dr->dr_status = NFSERR_STALE; 1829 return; 1830 } 1831 1832 error = sattr_to_vattr(args->ca_sa, &va); 1833 if (error) { 1834 dr->dr_status = puterrno(error); 1835 return; 1836 } 1837 1838 /* 1839 * Must specify the mode. 1840 */ 1841 if (!(va.va_mask & AT_MODE)) { 1842 VN_RELE(dvp); 1843 dr->dr_status = NFSERR_INVAL; 1844 return; 1845 } 1846 1847 /* 1848 * This is a completely gross hack to make mknod 1849 * work over the wire until we can wack the protocol 1850 */ 1851 if ((va.va_mode & IFMT) == IFCHR) { 1852 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1853 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1854 else { 1855 va.va_type = VCHR; 1856 /* 1857 * uncompress the received dev_t 1858 * if the top half is zero indicating a request 1859 * from an `older style' OS. 1860 */ 1861 if ((va.va_size & 0xffff0000) == 0) 1862 va.va_rdev = nfsv2_expdev(va.va_size); 1863 else 1864 va.va_rdev = (dev_t)va.va_size; 1865 } 1866 va.va_mask &= ~AT_SIZE; 1867 } else if ((va.va_mode & IFMT) == IFBLK) { 1868 va.va_type = VBLK; 1869 /* 1870 * uncompress the received dev_t 1871 * if the top half is zero indicating a request 1872 * from an `older style' OS. 1873 */ 1874 if ((va.va_size & 0xffff0000) == 0) 1875 va.va_rdev = nfsv2_expdev(va.va_size); 1876 else 1877 va.va_rdev = (dev_t)va.va_size; 1878 va.va_mask &= ~AT_SIZE; 1879 } else if ((va.va_mode & IFMT) == IFSOCK) { 1880 va.va_type = VSOCK; 1881 } else { 1882 va.va_type = VREG; 1883 } 1884 va.va_mode &= ~IFMT; 1885 va.va_mask |= AT_TYPE; 1886 1887 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1888 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1889 MAXPATHLEN); 1890 if (name == NULL) { 1891 dr->dr_status = puterrno(EINVAL); 1892 return; 1893 } 1894 1895 /* 1896 * Why was the choice made to use VWRITE as the mode to the 1897 * call to VOP_CREATE ? This results in a bug. When a client 1898 * opens a file that already exists and is RDONLY, the second 1899 * open fails with an EACESS because of the mode. 1900 * bug ID 1054648. 1901 */ 1902 lookup_ok = 0; 1903 mode = VWRITE; 1904 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1905 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1906 NULL, NULL, NULL); 1907 if (!error) { 1908 struct vattr at; 1909 1910 lookup_ok = 1; 1911 at.va_mask = AT_MODE; 1912 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1913 if (!error) 1914 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1915 VN_RELE(tvp); 1916 tvp = NULL; 1917 } 1918 } 1919 1920 if (!lookup_ok) { 1921 if (rdonly(ro, dvp)) { 1922 error = EROFS; 1923 } else if (va.va_type != VREG && va.va_type != VFIFO && 1924 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1925 error = EPERM; 1926 } else { 1927 error = 0; 1928 } 1929 } 1930 1931 /* 1932 * If file size is being modified on an already existing file 1933 * make sure that there are no conflicting non-blocking mandatory 1934 * locks in the region being manipulated. Return EACCES if there 1935 * are conflicting locks. 1936 */ 1937 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1938 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1939 NULL, NULL, NULL); 1940 1941 if (!lookuperr && 1942 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1943 VN_RELE(tvp); 1944 curthread->t_flag |= T_WOULDBLOCK; 1945 goto out; 1946 } 1947 1948 if (!lookuperr && nbl_need_check(tvp)) { 1949 /* 1950 * The file exists. Now check if it has any 1951 * conflicting non-blocking mandatory locks 1952 * in the region being changed. 1953 */ 1954 struct vattr bva; 1955 u_offset_t offset; 1956 ssize_t length; 1957 1958 nbl_start_crit(tvp, RW_READER); 1959 in_crit = 1; 1960 1961 bva.va_mask = AT_SIZE; 1962 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1963 if (!error) { 1964 if (va.va_size < bva.va_size) { 1965 offset = va.va_size; 1966 length = bva.va_size - va.va_size; 1967 } else { 1968 offset = bva.va_size; 1969 length = va.va_size - bva.va_size; 1970 } 1971 if (length) { 1972 if (nbl_conflict(tvp, NBL_WRITE, 1973 offset, length, 0, NULL)) { 1974 error = EACCES; 1975 } 1976 } 1977 } 1978 if (error) { 1979 nbl_end_crit(tvp); 1980 VN_RELE(tvp); 1981 in_crit = 0; 1982 } 1983 } else if (tvp != NULL) { 1984 VN_RELE(tvp); 1985 } 1986 } 1987 1988 if (!error) { 1989 /* 1990 * If filesystem is shared with nosuid the remove any 1991 * setuid/setgid bits on create. 1992 */ 1993 if (va.va_type == VREG && 1994 exi->exi_export.ex_flags & EX_NOSUID) 1995 va.va_mode &= ~(VSUID | VSGID); 1996 1997 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1998 NULL, NULL); 1999 2000 if (!error) { 2001 2002 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 2003 trunc = TRUE; 2004 else 2005 trunc = FALSE; 2006 2007 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 2008 VN_RELE(vp); 2009 curthread->t_flag |= T_WOULDBLOCK; 2010 goto out; 2011 } 2012 va.va_mask = AT_ALL; 2013 2014 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 2015 2016 /* check for overflows */ 2017 if (!error) { 2018 acl_perm(vp, exi, &va, cr); 2019 error = vattr_to_nattr(&va, &dr->dr_attr); 2020 if (!error) { 2021 error = makefh(&dr->dr_fhandle, vp, 2022 exi); 2023 } 2024 } 2025 /* 2026 * Force modified metadata out to stable storage. 2027 * 2028 * if a underlying vp exists, pass it to VOP_FSYNC 2029 */ 2030 if (VOP_REALVP(vp, &realvp, NULL) == 0) 2031 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 2032 else 2033 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2034 VN_RELE(vp); 2035 } 2036 2037 if (in_crit) { 2038 nbl_end_crit(tvp); 2039 VN_RELE(tvp); 2040 } 2041 } 2042 2043 /* 2044 * Force modified data and metadata out to stable storage. 2045 */ 2046 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2047 2048 out: 2049 2050 VN_RELE(dvp); 2051 2052 dr->dr_status = puterrno(error); 2053 2054 if (name != args->ca_da.da_name) 2055 kmem_free(name, MAXPATHLEN); 2056 } 2057 void * 2058 rfs_create_getfh(struct nfscreatargs *args) 2059 { 2060 return (args->ca_da.da_fhandle); 2061 } 2062 2063 /* 2064 * Remove a file. 2065 * Remove named file from parent directory. 2066 */ 2067 /* ARGSUSED */ 2068 void 2069 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 2070 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2071 { 2072 int error = 0; 2073 vnode_t *vp; 2074 vnode_t *targvp; 2075 int in_crit = 0; 2076 2077 /* 2078 * Disallow NULL paths 2079 */ 2080 if (da->da_name == NULL || *da->da_name == '\0') { 2081 *status = NFSERR_ACCES; 2082 return; 2083 } 2084 2085 vp = nfs_fhtovp(da->da_fhandle, exi); 2086 if (vp == NULL) { 2087 *status = NFSERR_STALE; 2088 return; 2089 } 2090 2091 if (rdonly(ro, vp)) { 2092 VN_RELE(vp); 2093 *status = NFSERR_ROFS; 2094 return; 2095 } 2096 2097 /* 2098 * Check for a conflict with a non-blocking mandatory share reservation. 2099 */ 2100 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 2101 NULL, cr, NULL, NULL, NULL); 2102 if (error != 0) { 2103 VN_RELE(vp); 2104 *status = puterrno(error); 2105 return; 2106 } 2107 2108 /* 2109 * If the file is delegated to an v4 client, then initiate 2110 * recall and drop this request (by setting T_WOULDBLOCK). 2111 * The client will eventually re-transmit the request and 2112 * (hopefully), by then, the v4 client will have returned 2113 * the delegation. 2114 */ 2115 2116 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2117 VN_RELE(vp); 2118 VN_RELE(targvp); 2119 curthread->t_flag |= T_WOULDBLOCK; 2120 return; 2121 } 2122 2123 if (nbl_need_check(targvp)) { 2124 nbl_start_crit(targvp, RW_READER); 2125 in_crit = 1; 2126 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2127 error = EACCES; 2128 goto out; 2129 } 2130 } 2131 2132 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2133 2134 /* 2135 * Force modified data and metadata out to stable storage. 2136 */ 2137 (void) VOP_FSYNC(vp, 0, cr, NULL); 2138 2139 out: 2140 if (in_crit) 2141 nbl_end_crit(targvp); 2142 VN_RELE(targvp); 2143 VN_RELE(vp); 2144 2145 *status = puterrno(error); 2146 2147 } 2148 2149 void * 2150 rfs_remove_getfh(struct nfsdiropargs *da) 2151 { 2152 return (da->da_fhandle); 2153 } 2154 2155 /* 2156 * rename a file 2157 * Give a file (from) a new name (to). 2158 */ 2159 /* ARGSUSED */ 2160 void 2161 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2162 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2163 { 2164 int error = 0; 2165 vnode_t *fromvp; 2166 vnode_t *tovp; 2167 struct exportinfo *to_exi; 2168 fhandle_t *fh; 2169 vnode_t *srcvp; 2170 vnode_t *targvp; 2171 int in_crit = 0; 2172 2173 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2174 if (fromvp == NULL) { 2175 *status = NFSERR_STALE; 2176 return; 2177 } 2178 2179 fh = args->rna_to.da_fhandle; 2180 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2181 if (to_exi == NULL) { 2182 VN_RELE(fromvp); 2183 *status = NFSERR_ACCES; 2184 return; 2185 } 2186 exi_rele(to_exi); 2187 2188 if (to_exi != exi) { 2189 VN_RELE(fromvp); 2190 *status = NFSERR_XDEV; 2191 return; 2192 } 2193 2194 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2195 if (tovp == NULL) { 2196 VN_RELE(fromvp); 2197 *status = NFSERR_STALE; 2198 return; 2199 } 2200 2201 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2202 VN_RELE(tovp); 2203 VN_RELE(fromvp); 2204 *status = NFSERR_NOTDIR; 2205 return; 2206 } 2207 2208 /* 2209 * Disallow NULL paths 2210 */ 2211 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2212 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2213 VN_RELE(tovp); 2214 VN_RELE(fromvp); 2215 *status = NFSERR_ACCES; 2216 return; 2217 } 2218 2219 if (rdonly(ro, tovp)) { 2220 VN_RELE(tovp); 2221 VN_RELE(fromvp); 2222 *status = NFSERR_ROFS; 2223 return; 2224 } 2225 2226 /* 2227 * Check for a conflict with a non-blocking mandatory share reservation. 2228 */ 2229 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2230 NULL, cr, NULL, NULL, NULL); 2231 if (error != 0) { 2232 VN_RELE(tovp); 2233 VN_RELE(fromvp); 2234 *status = puterrno(error); 2235 return; 2236 } 2237 2238 /* Check for delegations on the source file */ 2239 2240 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2241 VN_RELE(tovp); 2242 VN_RELE(fromvp); 2243 VN_RELE(srcvp); 2244 curthread->t_flag |= T_WOULDBLOCK; 2245 return; 2246 } 2247 2248 /* Check for delegation on the file being renamed over, if it exists */ 2249 2250 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && 2251 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2252 NULL, NULL, NULL) == 0) { 2253 2254 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2255 VN_RELE(tovp); 2256 VN_RELE(fromvp); 2257 VN_RELE(srcvp); 2258 VN_RELE(targvp); 2259 curthread->t_flag |= T_WOULDBLOCK; 2260 return; 2261 } 2262 VN_RELE(targvp); 2263 } 2264 2265 2266 if (nbl_need_check(srcvp)) { 2267 nbl_start_crit(srcvp, RW_READER); 2268 in_crit = 1; 2269 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2270 error = EACCES; 2271 goto out; 2272 } 2273 } 2274 2275 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2276 tovp, args->rna_to.da_name, cr, NULL, 0); 2277 2278 if (error == 0) 2279 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2280 strlen(args->rna_to.da_name)); 2281 2282 /* 2283 * Force modified data and metadata out to stable storage. 2284 */ 2285 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2286 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2287 2288 out: 2289 if (in_crit) 2290 nbl_end_crit(srcvp); 2291 VN_RELE(srcvp); 2292 VN_RELE(tovp); 2293 VN_RELE(fromvp); 2294 2295 *status = puterrno(error); 2296 2297 } 2298 void * 2299 rfs_rename_getfh(struct nfsrnmargs *args) 2300 { 2301 return (args->rna_from.da_fhandle); 2302 } 2303 2304 /* 2305 * Link to a file. 2306 * Create a file (to) which is a hard link to the given file (from). 2307 */ 2308 /* ARGSUSED */ 2309 void 2310 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2311 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2312 { 2313 int error; 2314 vnode_t *fromvp; 2315 vnode_t *tovp; 2316 struct exportinfo *to_exi; 2317 fhandle_t *fh; 2318 2319 fromvp = nfs_fhtovp(args->la_from, exi); 2320 if (fromvp == NULL) { 2321 *status = NFSERR_STALE; 2322 return; 2323 } 2324 2325 fh = args->la_to.da_fhandle; 2326 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2327 if (to_exi == NULL) { 2328 VN_RELE(fromvp); 2329 *status = NFSERR_ACCES; 2330 return; 2331 } 2332 exi_rele(to_exi); 2333 2334 if (to_exi != exi) { 2335 VN_RELE(fromvp); 2336 *status = NFSERR_XDEV; 2337 return; 2338 } 2339 2340 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2341 if (tovp == NULL) { 2342 VN_RELE(fromvp); 2343 *status = NFSERR_STALE; 2344 return; 2345 } 2346 2347 if (tovp->v_type != VDIR) { 2348 VN_RELE(tovp); 2349 VN_RELE(fromvp); 2350 *status = NFSERR_NOTDIR; 2351 return; 2352 } 2353 /* 2354 * Disallow NULL paths 2355 */ 2356 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2357 VN_RELE(tovp); 2358 VN_RELE(fromvp); 2359 *status = NFSERR_ACCES; 2360 return; 2361 } 2362 2363 if (rdonly(ro, tovp)) { 2364 VN_RELE(tovp); 2365 VN_RELE(fromvp); 2366 *status = NFSERR_ROFS; 2367 return; 2368 } 2369 2370 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2371 2372 /* 2373 * Force modified data and metadata out to stable storage. 2374 */ 2375 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2376 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2377 2378 VN_RELE(tovp); 2379 VN_RELE(fromvp); 2380 2381 *status = puterrno(error); 2382 2383 } 2384 void * 2385 rfs_link_getfh(struct nfslinkargs *args) 2386 { 2387 return (args->la_from); 2388 } 2389 2390 /* 2391 * Symbolicly link to a file. 2392 * Create a file (to) with the given attributes which is a symbolic link 2393 * to the given path name (to). 2394 */ 2395 void 2396 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2397 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2398 { 2399 int error; 2400 struct vattr va; 2401 vnode_t *vp; 2402 vnode_t *svp; 2403 int lerror; 2404 struct sockaddr *ca; 2405 char *name = NULL; 2406 2407 /* 2408 * Disallow NULL paths 2409 */ 2410 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2411 *status = NFSERR_ACCES; 2412 return; 2413 } 2414 2415 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2416 if (vp == NULL) { 2417 *status = NFSERR_STALE; 2418 return; 2419 } 2420 2421 if (rdonly(ro, vp)) { 2422 VN_RELE(vp); 2423 *status = NFSERR_ROFS; 2424 return; 2425 } 2426 2427 error = sattr_to_vattr(args->sla_sa, &va); 2428 if (error) { 2429 VN_RELE(vp); 2430 *status = puterrno(error); 2431 return; 2432 } 2433 2434 if (!(va.va_mask & AT_MODE)) { 2435 VN_RELE(vp); 2436 *status = NFSERR_INVAL; 2437 return; 2438 } 2439 2440 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2441 name = nfscmd_convname(ca, exi, args->sla_tnm, 2442 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2443 2444 if (name == NULL) { 2445 *status = NFSERR_ACCES; 2446 return; 2447 } 2448 2449 va.va_type = VLNK; 2450 va.va_mask |= AT_TYPE; 2451 2452 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2453 2454 /* 2455 * Force new data and metadata out to stable storage. 2456 */ 2457 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2458 NULL, cr, NULL, NULL, NULL); 2459 2460 if (!lerror) { 2461 (void) VOP_FSYNC(svp, 0, cr, NULL); 2462 VN_RELE(svp); 2463 } 2464 2465 /* 2466 * Force modified data and metadata out to stable storage. 2467 */ 2468 (void) VOP_FSYNC(vp, 0, cr, NULL); 2469 2470 VN_RELE(vp); 2471 2472 *status = puterrno(error); 2473 if (name != args->sla_tnm) 2474 kmem_free(name, MAXPATHLEN); 2475 2476 } 2477 void * 2478 rfs_symlink_getfh(struct nfsslargs *args) 2479 { 2480 return (args->sla_from.da_fhandle); 2481 } 2482 2483 /* 2484 * Make a directory. 2485 * Create a directory with the given name, parent directory, and attributes. 2486 * Returns a file handle and attributes for the new directory. 2487 */ 2488 /* ARGSUSED */ 2489 void 2490 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2491 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2492 { 2493 int error; 2494 struct vattr va; 2495 vnode_t *dvp = NULL; 2496 vnode_t *vp; 2497 char *name = args->ca_da.da_name; 2498 2499 /* 2500 * Disallow NULL paths 2501 */ 2502 if (name == NULL || *name == '\0') { 2503 dr->dr_status = NFSERR_ACCES; 2504 return; 2505 } 2506 2507 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2508 if (vp == NULL) { 2509 dr->dr_status = NFSERR_STALE; 2510 return; 2511 } 2512 2513 if (rdonly(ro, vp)) { 2514 VN_RELE(vp); 2515 dr->dr_status = NFSERR_ROFS; 2516 return; 2517 } 2518 2519 error = sattr_to_vattr(args->ca_sa, &va); 2520 if (error) { 2521 VN_RELE(vp); 2522 dr->dr_status = puterrno(error); 2523 return; 2524 } 2525 2526 if (!(va.va_mask & AT_MODE)) { 2527 VN_RELE(vp); 2528 dr->dr_status = NFSERR_INVAL; 2529 return; 2530 } 2531 2532 va.va_type = VDIR; 2533 va.va_mask |= AT_TYPE; 2534 2535 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2536 2537 if (!error) { 2538 /* 2539 * Attribtutes of the newly created directory should 2540 * be returned to the client. 2541 */ 2542 va.va_mask = AT_ALL; /* We want everything */ 2543 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2544 2545 /* check for overflows */ 2546 if (!error) { 2547 acl_perm(vp, exi, &va, cr); 2548 error = vattr_to_nattr(&va, &dr->dr_attr); 2549 if (!error) { 2550 error = makefh(&dr->dr_fhandle, dvp, exi); 2551 } 2552 } 2553 /* 2554 * Force new data and metadata out to stable storage. 2555 */ 2556 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2557 VN_RELE(dvp); 2558 } 2559 2560 /* 2561 * Force modified data and metadata out to stable storage. 2562 */ 2563 (void) VOP_FSYNC(vp, 0, cr, NULL); 2564 2565 VN_RELE(vp); 2566 2567 dr->dr_status = puterrno(error); 2568 2569 } 2570 void * 2571 rfs_mkdir_getfh(struct nfscreatargs *args) 2572 { 2573 return (args->ca_da.da_fhandle); 2574 } 2575 2576 /* 2577 * Remove a directory. 2578 * Remove the given directory name from the given parent directory. 2579 */ 2580 /* ARGSUSED */ 2581 void 2582 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2583 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2584 { 2585 int error; 2586 vnode_t *vp; 2587 2588 /* 2589 * Disallow NULL paths 2590 */ 2591 if (da->da_name == NULL || *da->da_name == '\0') { 2592 *status = NFSERR_ACCES; 2593 return; 2594 } 2595 2596 vp = nfs_fhtovp(da->da_fhandle, exi); 2597 if (vp == NULL) { 2598 *status = NFSERR_STALE; 2599 return; 2600 } 2601 2602 if (rdonly(ro, vp)) { 2603 VN_RELE(vp); 2604 *status = NFSERR_ROFS; 2605 return; 2606 } 2607 2608 /* 2609 * VOP_RMDIR takes a third argument (the current 2610 * directory of the process). That's because someone 2611 * wants to return EINVAL if one tries to remove ".". 2612 * Of course, NFS servers have no idea what their 2613 * clients' current directories are. We fake it by 2614 * supplying a vnode known to exist and illegal to 2615 * remove. 2616 */ 2617 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0); 2618 2619 /* 2620 * Force modified data and metadata out to stable storage. 2621 */ 2622 (void) VOP_FSYNC(vp, 0, cr, NULL); 2623 2624 VN_RELE(vp); 2625 2626 /* 2627 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2628 * if the directory is not empty. A System V NFS server 2629 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2630 * over the wire. 2631 */ 2632 if (error == EEXIST) 2633 *status = NFSERR_NOTEMPTY; 2634 else 2635 *status = puterrno(error); 2636 2637 } 2638 void * 2639 rfs_rmdir_getfh(struct nfsdiropargs *da) 2640 { 2641 return (da->da_fhandle); 2642 } 2643 2644 /* ARGSUSED */ 2645 void 2646 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2647 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2648 { 2649 int error; 2650 int iseof; 2651 struct iovec iov; 2652 struct uio uio; 2653 vnode_t *vp; 2654 char *ndata = NULL; 2655 struct sockaddr *ca; 2656 size_t nents; 2657 int ret; 2658 2659 vp = nfs_fhtovp(&rda->rda_fh, exi); 2660 if (vp == NULL) { 2661 rd->rd_entries = NULL; 2662 rd->rd_status = NFSERR_STALE; 2663 return; 2664 } 2665 2666 if (vp->v_type != VDIR) { 2667 VN_RELE(vp); 2668 rd->rd_entries = NULL; 2669 rd->rd_status = NFSERR_NOTDIR; 2670 return; 2671 } 2672 2673 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2674 2675 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2676 2677 if (error) { 2678 rd->rd_entries = NULL; 2679 goto bad; 2680 } 2681 2682 if (rda->rda_count == 0) { 2683 rd->rd_entries = NULL; 2684 rd->rd_size = 0; 2685 rd->rd_eof = FALSE; 2686 goto bad; 2687 } 2688 2689 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2690 2691 /* 2692 * Allocate data for entries. This will be freed by rfs_rddirfree. 2693 */ 2694 rd->rd_bufsize = (uint_t)rda->rda_count; 2695 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2696 2697 /* 2698 * Set up io vector to read directory data 2699 */ 2700 iov.iov_base = (caddr_t)rd->rd_entries; 2701 iov.iov_len = rda->rda_count; 2702 uio.uio_iov = &iov; 2703 uio.uio_iovcnt = 1; 2704 uio.uio_segflg = UIO_SYSSPACE; 2705 uio.uio_extflg = UIO_COPY_CACHED; 2706 uio.uio_loffset = (offset_t)rda->rda_offset; 2707 uio.uio_resid = rda->rda_count; 2708 2709 /* 2710 * read directory 2711 */ 2712 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2713 2714 /* 2715 * Clean up 2716 */ 2717 if (!error) { 2718 /* 2719 * set size and eof 2720 */ 2721 if (uio.uio_resid == rda->rda_count) { 2722 rd->rd_size = 0; 2723 rd->rd_eof = TRUE; 2724 } else { 2725 rd->rd_size = (uint32_t)(rda->rda_count - 2726 uio.uio_resid); 2727 rd->rd_eof = iseof ? TRUE : FALSE; 2728 } 2729 } 2730 2731 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2732 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2733 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2734 rda->rda_count, &ndata); 2735 2736 if (ret != 0) { 2737 size_t dropbytes; 2738 /* 2739 * We had to drop one or more entries in order to fit 2740 * during the character conversion. We need to patch 2741 * up the size and eof info. 2742 */ 2743 if (rd->rd_eof) 2744 rd->rd_eof = FALSE; 2745 dropbytes = nfscmd_dropped_entrysize( 2746 (struct dirent64 *)rd->rd_entries, nents, ret); 2747 rd->rd_size -= dropbytes; 2748 } 2749 if (ndata == NULL) { 2750 ndata = (char *)rd->rd_entries; 2751 } else if (ndata != (char *)rd->rd_entries) { 2752 kmem_free(rd->rd_entries, rd->rd_bufsize); 2753 rd->rd_entries = (void *)ndata; 2754 rd->rd_bufsize = rda->rda_count; 2755 } 2756 2757 bad: 2758 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2759 2760 #if 0 /* notyet */ 2761 /* 2762 * Don't do this. It causes local disk writes when just 2763 * reading the file and the overhead is deemed larger 2764 * than the benefit. 2765 */ 2766 /* 2767 * Force modified metadata out to stable storage. 2768 */ 2769 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2770 #endif 2771 2772 VN_RELE(vp); 2773 2774 rd->rd_status = puterrno(error); 2775 2776 } 2777 void * 2778 rfs_readdir_getfh(struct nfsrddirargs *rda) 2779 { 2780 return (&rda->rda_fh); 2781 } 2782 void 2783 rfs_rddirfree(struct nfsrddirres *rd) 2784 { 2785 if (rd->rd_entries != NULL) 2786 kmem_free(rd->rd_entries, rd->rd_bufsize); 2787 } 2788 2789 /* ARGSUSED */ 2790 void 2791 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2792 struct svc_req *req, cred_t *cr, bool_t ro) 2793 { 2794 int error; 2795 struct statvfs64 sb; 2796 vnode_t *vp; 2797 2798 vp = nfs_fhtovp(fh, exi); 2799 if (vp == NULL) { 2800 fs->fs_status = NFSERR_STALE; 2801 return; 2802 } 2803 2804 error = VFS_STATVFS(vp->v_vfsp, &sb); 2805 2806 if (!error) { 2807 fs->fs_tsize = nfstsize(); 2808 fs->fs_bsize = sb.f_frsize; 2809 fs->fs_blocks = sb.f_blocks; 2810 fs->fs_bfree = sb.f_bfree; 2811 fs->fs_bavail = sb.f_bavail; 2812 } 2813 2814 VN_RELE(vp); 2815 2816 fs->fs_status = puterrno(error); 2817 2818 } 2819 void * 2820 rfs_statfs_getfh(fhandle_t *fh) 2821 { 2822 return (fh); 2823 } 2824 2825 static int 2826 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2827 { 2828 vap->va_mask = 0; 2829 2830 /* 2831 * There was a sign extension bug in some VFS based systems 2832 * which stored the mode as a short. When it would get 2833 * assigned to a u_long, no sign extension would occur. 2834 * It needed to, but this wasn't noticed because sa_mode 2835 * would then get assigned back to the short, thus ignoring 2836 * the upper 16 bits of sa_mode. 2837 * 2838 * To make this implementation work for both broken 2839 * clients and good clients, we check for both versions 2840 * of the mode. 2841 */ 2842 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2843 sa->sa_mode != (uint32_t)-1) { 2844 vap->va_mask |= AT_MODE; 2845 vap->va_mode = sa->sa_mode; 2846 } 2847 if (sa->sa_uid != (uint32_t)-1) { 2848 vap->va_mask |= AT_UID; 2849 vap->va_uid = sa->sa_uid; 2850 } 2851 if (sa->sa_gid != (uint32_t)-1) { 2852 vap->va_mask |= AT_GID; 2853 vap->va_gid = sa->sa_gid; 2854 } 2855 if (sa->sa_size != (uint32_t)-1) { 2856 vap->va_mask |= AT_SIZE; 2857 vap->va_size = sa->sa_size; 2858 } 2859 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2860 sa->sa_atime.tv_usec != (int32_t)-1) { 2861 #ifndef _LP64 2862 /* return error if time overflow */ 2863 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2864 return (EOVERFLOW); 2865 #endif 2866 vap->va_mask |= AT_ATIME; 2867 /* 2868 * nfs protocol defines times as unsigned so don't extend sign, 2869 * unless sysadmin set nfs_allow_preepoch_time. 2870 */ 2871 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2872 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2873 } 2874 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2875 sa->sa_mtime.tv_usec != (int32_t)-1) { 2876 #ifndef _LP64 2877 /* return error if time overflow */ 2878 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2879 return (EOVERFLOW); 2880 #endif 2881 vap->va_mask |= AT_MTIME; 2882 /* 2883 * nfs protocol defines times as unsigned so don't extend sign, 2884 * unless sysadmin set nfs_allow_preepoch_time. 2885 */ 2886 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2887 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2888 } 2889 return (0); 2890 } 2891 2892 static const enum nfsftype vt_to_nf[] = { 2893 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2894 }; 2895 2896 /* 2897 * check the following fields for overflow: nodeid, size, and time. 2898 * There could be a problem when converting 64-bit LP64 fields 2899 * into 32-bit ones. Return an error if there is an overflow. 2900 */ 2901 int 2902 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2903 { 2904 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2905 na->na_type = vt_to_nf[vap->va_type]; 2906 2907 if (vap->va_mode == (unsigned short) -1) 2908 na->na_mode = (uint32_t)-1; 2909 else 2910 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2911 2912 if (vap->va_uid == (unsigned short)(-1)) 2913 na->na_uid = (uint32_t)(-1); 2914 else if (vap->va_uid == UID_NOBODY) 2915 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2916 else 2917 na->na_uid = vap->va_uid; 2918 2919 if (vap->va_gid == (unsigned short)(-1)) 2920 na->na_gid = (uint32_t)-1; 2921 else if (vap->va_gid == GID_NOBODY) 2922 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2923 else 2924 na->na_gid = vap->va_gid; 2925 2926 /* 2927 * Do we need to check fsid for overflow? It is 64-bit in the 2928 * vattr, but are bigger than 32 bit values supported? 2929 */ 2930 na->na_fsid = vap->va_fsid; 2931 2932 na->na_nodeid = vap->va_nodeid; 2933 2934 /* 2935 * Check to make sure that the nodeid is representable over the 2936 * wire without losing bits. 2937 */ 2938 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2939 return (EFBIG); 2940 na->na_nlink = vap->va_nlink; 2941 2942 /* 2943 * Check for big files here, instead of at the caller. See 2944 * comments in cstat for large special file explanation. 2945 */ 2946 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2947 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2948 return (EFBIG); 2949 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2950 /* UNKNOWN_SIZE | OVERFLOW */ 2951 na->na_size = MAXOFF32_T; 2952 } else 2953 na->na_size = vap->va_size; 2954 } else 2955 na->na_size = vap->va_size; 2956 2957 /* 2958 * If the vnode times overflow the 32-bit times that NFS2 2959 * uses on the wire then return an error. 2960 */ 2961 if (!NFS_VAP_TIME_OK(vap)) { 2962 return (EOVERFLOW); 2963 } 2964 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2965 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2966 2967 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2968 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2969 2970 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2971 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2972 2973 /* 2974 * If the dev_t will fit into 16 bits then compress 2975 * it, otherwise leave it alone. See comments in 2976 * nfs_client.c. 2977 */ 2978 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2979 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2980 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2981 else 2982 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2983 2984 na->na_blocks = vap->va_nblocks; 2985 na->na_blocksize = vap->va_blksize; 2986 2987 /* 2988 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2989 * over-the-wire protocols for named-pipe vnodes. It remaps the 2990 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2991 * 2992 * BUYER BEWARE: 2993 * If you are porting the NFS to a non-Sun server, you probably 2994 * don't want to include the following block of code. The 2995 * over-the-wire special file types will be changing with the 2996 * NFS Protocol Revision. 2997 */ 2998 if (vap->va_type == VFIFO) 2999 NA_SETFIFO(na); 3000 return (0); 3001 } 3002 3003 /* 3004 * acl v2 support: returns approximate permission. 3005 * default: returns minimal permission (more restrictive) 3006 * aclok: returns maximal permission (less restrictive) 3007 * This routine changes the permissions that are alaredy in *va. 3008 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 3009 * CLASS_OBJ is always the same as GROUP_OBJ entry. 3010 */ 3011 static void 3012 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 3013 { 3014 vsecattr_t vsa; 3015 int aclcnt; 3016 aclent_t *aclentp; 3017 mode_t mask_perm; 3018 mode_t grp_perm; 3019 mode_t other_perm; 3020 mode_t other_orig; 3021 int error; 3022 3023 /* dont care default acl */ 3024 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 3025 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 3026 3027 if (!error) { 3028 aclcnt = vsa.vsa_aclcnt; 3029 if (aclcnt > MIN_ACL_ENTRIES) { 3030 /* non-trivial ACL */ 3031 aclentp = vsa.vsa_aclentp; 3032 if (exi->exi_export.ex_flags & EX_ACLOK) { 3033 /* maximal permissions */ 3034 grp_perm = 0; 3035 other_perm = 0; 3036 for (; aclcnt > 0; aclcnt--, aclentp++) { 3037 switch (aclentp->a_type) { 3038 case USER_OBJ: 3039 break; 3040 case USER: 3041 grp_perm |= 3042 aclentp->a_perm << 3; 3043 other_perm |= aclentp->a_perm; 3044 break; 3045 case GROUP_OBJ: 3046 grp_perm |= 3047 aclentp->a_perm << 3; 3048 break; 3049 case GROUP: 3050 other_perm |= aclentp->a_perm; 3051 break; 3052 case OTHER_OBJ: 3053 other_orig = aclentp->a_perm; 3054 break; 3055 case CLASS_OBJ: 3056 mask_perm = aclentp->a_perm; 3057 break; 3058 default: 3059 break; 3060 } 3061 } 3062 grp_perm &= mask_perm << 3; 3063 other_perm &= mask_perm; 3064 other_perm |= other_orig; 3065 3066 } else { 3067 /* minimal permissions */ 3068 grp_perm = 070; 3069 other_perm = 07; 3070 for (; aclcnt > 0; aclcnt--, aclentp++) { 3071 switch (aclentp->a_type) { 3072 case USER_OBJ: 3073 break; 3074 case USER: 3075 case CLASS_OBJ: 3076 grp_perm &= 3077 aclentp->a_perm << 3; 3078 other_perm &= 3079 aclentp->a_perm; 3080 break; 3081 case GROUP_OBJ: 3082 grp_perm &= 3083 aclentp->a_perm << 3; 3084 break; 3085 case GROUP: 3086 other_perm &= 3087 aclentp->a_perm; 3088 break; 3089 case OTHER_OBJ: 3090 other_perm &= 3091 aclentp->a_perm; 3092 break; 3093 default: 3094 break; 3095 } 3096 } 3097 } 3098 /* copy to va */ 3099 va->va_mode &= ~077; 3100 va->va_mode |= grp_perm | other_perm; 3101 } 3102 if (vsa.vsa_aclcnt) 3103 kmem_free(vsa.vsa_aclentp, 3104 vsa.vsa_aclcnt * sizeof (aclent_t)); 3105 } 3106 } 3107 3108 void 3109 rfs_srvrinit(void) 3110 { 3111 nfs2_srv_caller_id = fs_new_caller_id(); 3112 } 3113 3114 void 3115 rfs_srvrfini(void) 3116 { 3117 } 3118 3119 /* ARGSUSED */ 3120 void 3121 rfs_srv_zone_init(nfs_globals_t *ng) 3122 { 3123 nfs_srv_t *ns; 3124 3125 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); 3126 3127 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL); 3128 ns->write_async = 1; 3129 3130 ng->nfs_srv = ns; 3131 } 3132 3133 /* ARGSUSED */ 3134 void 3135 rfs_srv_zone_fini(nfs_globals_t *ng) 3136 { 3137 nfs_srv_t *ns = ng->nfs_srv; 3138 3139 ng->nfs_srv = NULL; 3140 3141 mutex_destroy(&ns->async_write_lock); 3142 kmem_free(ns, sizeof (*ns)); 3143 } 3144 3145 static int 3146 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 3147 { 3148 struct clist *wcl; 3149 int wlist_len; 3150 uint32_t count = rr->rr_count; 3151 3152 wcl = ra->ra_wlist; 3153 3154 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3155 return (FALSE); 3156 } 3157 3158 wcl = ra->ra_wlist; 3159 rr->rr_ok.rrok_wlist_len = wlist_len; 3160 rr->rr_ok.rrok_wlist = wcl; 3161 3162 return (TRUE); 3163 } 3164