1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 30 * All rights reserved. 31 */ 32 33 /* 34 * Copyright 2018 Nexenta Systems, Inc. 35 * Copyright (c) 2016 by Delphix. All rights reserved. 36 */ 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/cred.h> 42 #include <sys/buf.h> 43 #include <sys/vfs.h> 44 #include <sys/vnode.h> 45 #include <sys/uio.h> 46 #include <sys/stat.h> 47 #include <sys/errno.h> 48 #include <sys/sysmacros.h> 49 #include <sys/statvfs.h> 50 #include <sys/kmem.h> 51 #include <sys/kstat.h> 52 #include <sys/dirent.h> 53 #include <sys/cmn_err.h> 54 #include <sys/debug.h> 55 #include <sys/vtrace.h> 56 #include <sys/mode.h> 57 #include <sys/acl.h> 58 #include <sys/nbmlock.h> 59 #include <sys/policy.h> 60 #include <sys/sdt.h> 61 62 #include <rpc/types.h> 63 #include <rpc/auth.h> 64 #include <rpc/svc.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/export.h> 68 #include <nfs/nfs_cmd.h> 69 70 #include <vm/hat.h> 71 #include <vm/as.h> 72 #include <vm/seg.h> 73 #include <vm/seg_map.h> 74 #include <vm/seg_kmem.h> 75 76 #include <sys/strsubr.h> 77 78 struct rfs_async_write_list; 79 80 /* 81 * Zone globals of NFSv2 server 82 */ 83 typedef struct nfs_srv { 84 kmutex_t async_write_lock; 85 struct rfs_async_write_list *async_write_head; 86 87 /* 88 * enables write clustering if == 1 89 */ 90 int write_async; 91 } nfs_srv_t; 92 93 /* 94 * These are the interface routines for the server side of the 95 * Network File System. See the NFS version 2 protocol specification 96 * for a description of this interface. 97 */ 98 99 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 100 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 101 cred_t *); 102 103 104 /* 105 * Some "over the wire" UNIX file types. These are encoded 106 * into the mode. This needs to be fixed in the next rev. 107 */ 108 #define IFMT 0170000 /* type of file */ 109 #define IFCHR 0020000 /* character special */ 110 #define IFBLK 0060000 /* block special */ 111 #define IFSOCK 0140000 /* socket */ 112 113 u_longlong_t nfs2_srv_caller_id; 114 115 static nfs_srv_t * 116 nfs_get_srv(void) 117 { 118 nfs_globals_t *ng = nfs_srv_getzg(); 119 nfs_srv_t *srv = ng->nfs_srv; 120 ASSERT(srv != NULL); 121 return (srv); 122 } 123 124 /* 125 * Get file attributes. 126 * Returns the current attributes of the file with the given fhandle. 127 */ 128 /* ARGSUSED */ 129 void 130 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 131 struct svc_req *req, cred_t *cr, bool_t ro) 132 { 133 int error; 134 vnode_t *vp; 135 struct vattr va; 136 137 vp = nfs_fhtovp(fhp, exi); 138 if (vp == NULL) { 139 ns->ns_status = NFSERR_STALE; 140 return; 141 } 142 143 /* 144 * Do the getattr. 145 */ 146 va.va_mask = AT_ALL; /* we want all the attributes */ 147 148 error = rfs4_delegated_getattr(vp, &va, 0, cr); 149 150 /* check for overflows */ 151 if (!error) { 152 /* Lie about the object type for a referral */ 153 if (vn_is_nfs_reparse(vp, cr)) 154 va.va_type = VLNK; 155 156 acl_perm(vp, exi, &va, cr); 157 error = vattr_to_nattr(&va, &ns->ns_attr); 158 } 159 160 VN_RELE(vp); 161 162 ns->ns_status = puterrno(error); 163 } 164 void * 165 rfs_getattr_getfh(fhandle_t *fhp) 166 { 167 return (fhp); 168 } 169 170 /* 171 * Set file attributes. 172 * Sets the attributes of the file with the given fhandle. Returns 173 * the new attributes. 174 */ 175 /* ARGSUSED */ 176 void 177 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 178 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 179 { 180 int error; 181 int flag; 182 int in_crit = 0; 183 vnode_t *vp; 184 struct vattr va; 185 struct vattr bva; 186 struct flock64 bf; 187 caller_context_t ct; 188 189 190 vp = nfs_fhtovp(&args->saa_fh, exi); 191 if (vp == NULL) { 192 ns->ns_status = NFSERR_STALE; 193 return; 194 } 195 196 if (rdonly(ro, vp)) { 197 VN_RELE(vp); 198 ns->ns_status = NFSERR_ROFS; 199 return; 200 } 201 202 error = sattr_to_vattr(&args->saa_sa, &va); 203 if (error) { 204 VN_RELE(vp); 205 ns->ns_status = puterrno(error); 206 return; 207 } 208 209 /* 210 * If the client is requesting a change to the mtime, 211 * but the nanosecond field is set to 1 billion, then 212 * this is a flag to the server that it should set the 213 * atime and mtime fields to the server's current time. 214 * The 1 billion number actually came from the client 215 * as 1 million, but the units in the over the wire 216 * request are microseconds instead of nanoseconds. 217 * 218 * This is an overload of the protocol and should be 219 * documented in the NFS Version 2 protocol specification. 220 */ 221 if (va.va_mask & AT_MTIME) { 222 if (va.va_mtime.tv_nsec == 1000000000) { 223 gethrestime(&va.va_mtime); 224 va.va_atime = va.va_mtime; 225 va.va_mask |= AT_ATIME; 226 flag = 0; 227 } else 228 flag = ATTR_UTIME; 229 } else 230 flag = 0; 231 232 /* 233 * If the filesystem is exported with nosuid, then mask off 234 * the setuid and setgid bits. 235 */ 236 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 237 (exi->exi_export.ex_flags & EX_NOSUID)) 238 va.va_mode &= ~(VSUID | VSGID); 239 240 ct.cc_sysid = 0; 241 ct.cc_pid = 0; 242 ct.cc_caller_id = nfs2_srv_caller_id; 243 ct.cc_flags = CC_DONTBLOCK; 244 245 /* 246 * We need to specially handle size changes because it is 247 * possible for the client to create a file with modes 248 * which indicate read-only, but with the file opened for 249 * writing. If the client then tries to set the size of 250 * the file, then the normal access checking done in 251 * VOP_SETATTR would prevent the client from doing so, 252 * although it should be legal for it to do so. To get 253 * around this, we do the access checking for ourselves 254 * and then use VOP_SPACE which doesn't do the access 255 * checking which VOP_SETATTR does. VOP_SPACE can only 256 * operate on VREG files, let VOP_SETATTR handle the other 257 * extremely rare cases. 258 * Also the client should not be allowed to change the 259 * size of the file if there is a conflicting non-blocking 260 * mandatory lock in the region of change. 261 */ 262 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 263 if (nbl_need_check(vp)) { 264 nbl_start_crit(vp, RW_READER); 265 in_crit = 1; 266 } 267 268 bva.va_mask = AT_UID | AT_SIZE; 269 270 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 271 272 if (error) { 273 if (in_crit) 274 nbl_end_crit(vp); 275 VN_RELE(vp); 276 ns->ns_status = puterrno(error); 277 return; 278 } 279 280 if (in_crit) { 281 u_offset_t offset; 282 ssize_t length; 283 284 if (va.va_size < bva.va_size) { 285 offset = va.va_size; 286 length = bva.va_size - va.va_size; 287 } else { 288 offset = bva.va_size; 289 length = va.va_size - bva.va_size; 290 } 291 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 292 NULL)) { 293 error = EACCES; 294 } 295 } 296 297 if (crgetuid(cr) == bva.va_uid && !error && 298 va.va_size != bva.va_size) { 299 va.va_mask &= ~AT_SIZE; 300 bf.l_type = F_WRLCK; 301 bf.l_whence = 0; 302 bf.l_start = (off64_t)va.va_size; 303 bf.l_len = 0; 304 bf.l_sysid = 0; 305 bf.l_pid = 0; 306 307 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 308 (offset_t)va.va_size, cr, &ct); 309 } 310 if (in_crit) 311 nbl_end_crit(vp); 312 } else 313 error = 0; 314 315 /* 316 * Do the setattr. 317 */ 318 if (!error && va.va_mask) { 319 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 320 } 321 322 /* 323 * check if the monitor on either vop_space or vop_setattr detected 324 * a delegation conflict and if so, mark the thread flag as 325 * wouldblock so that the response is dropped and the client will 326 * try again. 327 */ 328 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 329 VN_RELE(vp); 330 curthread->t_flag |= T_WOULDBLOCK; 331 return; 332 } 333 334 if (!error) { 335 va.va_mask = AT_ALL; /* get everything */ 336 337 error = rfs4_delegated_getattr(vp, &va, 0, cr); 338 339 /* check for overflows */ 340 if (!error) { 341 acl_perm(vp, exi, &va, cr); 342 error = vattr_to_nattr(&va, &ns->ns_attr); 343 } 344 } 345 346 ct.cc_flags = 0; 347 348 /* 349 * Force modified metadata out to stable storage. 350 */ 351 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 352 353 VN_RELE(vp); 354 355 ns->ns_status = puterrno(error); 356 } 357 void * 358 rfs_setattr_getfh(struct nfssaargs *args) 359 { 360 return (&args->saa_fh); 361 } 362 363 /* Change and release @exip and @vpp only in success */ 364 int 365 rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip) 366 { 367 struct exportinfo *exi; 368 vnode_t *vp = *vpp; 369 fid_t fid; 370 int error; 371 372 VN_HOLD(vp); 373 374 if ((error = traverse(&vp)) != 0) { 375 VN_RELE(vp); 376 return (error); 377 } 378 379 bzero(&fid, sizeof (fid)); 380 fid.fid_len = MAXFIDSZ; 381 error = VOP_FID(vp, &fid, NULL); 382 if (error) { 383 VN_RELE(vp); 384 return (error); 385 } 386 387 exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid); 388 if (exi == NULL || 389 (exi->exi_export.ex_flags & EX_NOHIDE) == 0) { 390 /* 391 * It is not error, just subdir is not exported 392 * or "nohide" is not set 393 */ 394 if (exi != NULL) 395 exi_rele(exi); 396 VN_RELE(vp); 397 } else { 398 /* go to submount */ 399 exi_rele(*exip); 400 *exip = exi; 401 402 VN_RELE(*vpp); 403 *vpp = vp; 404 } 405 406 return (0); 407 } 408 409 /* 410 * Given mounted "dvp" and "exi", go upper mountpoint 411 * with dvp/exi correction 412 * Return 0 in success 413 */ 414 int 415 rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr) 416 { 417 struct exportinfo *exi; 418 vnode_t *dvp = *dvpp; 419 vnode_t *zone_rootvp; 420 421 zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp; 422 ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp)); 423 424 VN_HOLD(dvp); 425 dvp = untraverse(dvp, zone_rootvp); 426 exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE); 427 if (exi == NULL) { 428 VN_RELE(dvp); 429 return (-1); 430 } 431 432 ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid); 433 exi_rele(*exip); 434 *exip = exi; 435 VN_RELE(*dvpp); 436 *dvpp = dvp; 437 438 return (0); 439 } 440 /* 441 * Directory lookup. 442 * Returns an fhandle and file attributes for file name in a directory. 443 */ 444 /* ARGSUSED */ 445 void 446 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 447 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 448 { 449 int error; 450 vnode_t *dvp; 451 vnode_t *vp; 452 struct vattr va; 453 fhandle_t *fhp = da->da_fhandle; 454 struct sec_ol sec = {0, 0}; 455 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 456 char *name; 457 struct sockaddr *ca; 458 459 /* 460 * Trusted Extension doesn't support NFSv2. MOUNT 461 * will reject v2 clients. Need to prevent v2 client 462 * access via WebNFS here. 463 */ 464 if (is_system_labeled() && req->rq_vers == 2) { 465 dr->dr_status = NFSERR_ACCES; 466 return; 467 } 468 469 /* 470 * Disallow NULL paths 471 */ 472 if (da->da_name == NULL || *da->da_name == '\0') { 473 dr->dr_status = NFSERR_ACCES; 474 return; 475 } 476 477 /* 478 * Allow lookups from the root - the default 479 * location of the public filehandle. 480 */ 481 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 482 dvp = ZONE_ROOTVP(); 483 VN_HOLD(dvp); 484 } else { 485 dvp = nfs_fhtovp(fhp, exi); 486 if (dvp == NULL) { 487 dr->dr_status = NFSERR_STALE; 488 return; 489 } 490 } 491 492 exi_hold(exi); 493 ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); 494 495 /* 496 * Not allow lookup beyond root. 497 * If the filehandle matches a filehandle of the exi, 498 * then the ".." refers beyond the root of an exported filesystem. 499 */ 500 if (strcmp(da->da_name, "..") == 0 && 501 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 502 if ((exi->exi_export.ex_flags & EX_NOHIDE) && 503 ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) { 504 /* 505 * special case for ".." and 'nohide'exported root 506 */ 507 if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) { 508 error = NFSERR_ACCES; 509 goto out; 510 } 511 } else { 512 error = NFSERR_NOENT; 513 goto out; 514 } 515 } 516 517 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 518 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 519 MAXPATHLEN); 520 521 if (name == NULL) { 522 error = NFSERR_ACCES; 523 goto out; 524 } 525 526 /* 527 * If the public filehandle is used then allow 528 * a multi-component lookup, i.e. evaluate 529 * a pathname and follow symbolic links if 530 * necessary. 531 * 532 * This may result in a vnode in another filesystem 533 * which is OK as long as the filesystem is exported. 534 */ 535 if (PUBLIC_FH2(fhp)) { 536 publicfh_flag = TRUE; 537 538 exi_rele(exi); 539 exi = NULL; 540 541 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 542 &sec); 543 } else { 544 /* 545 * Do a normal single component lookup. 546 */ 547 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 548 NULL, NULL, NULL); 549 } 550 551 if (name != da->da_name) 552 kmem_free(name, MAXPATHLEN); 553 554 if (error == 0 && vn_ismntpt(vp)) { 555 error = rfs_cross_mnt(&vp, &exi); 556 if (error) 557 VN_RELE(vp); 558 } 559 560 if (!error) { 561 va.va_mask = AT_ALL; /* we want everything */ 562 563 error = rfs4_delegated_getattr(vp, &va, 0, cr); 564 565 /* check for overflows */ 566 if (!error) { 567 acl_perm(vp, exi, &va, cr); 568 error = vattr_to_nattr(&va, &dr->dr_attr); 569 if (!error) { 570 if (sec.sec_flags & SEC_QUERY) 571 error = makefh_ol(&dr->dr_fhandle, exi, 572 sec.sec_index); 573 else { 574 error = makefh(&dr->dr_fhandle, vp, 575 exi); 576 if (!error && publicfh_flag && 577 !chk_clnt_sec(exi, req)) 578 auth_weak = TRUE; 579 } 580 } 581 } 582 VN_RELE(vp); 583 } 584 585 out: 586 VN_RELE(dvp); 587 588 if (exi != NULL) 589 exi_rele(exi); 590 591 /* 592 * If it's public fh, no 0x81, and client's flavor is 593 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 594 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 595 */ 596 if (auth_weak) 597 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 598 else 599 dr->dr_status = puterrno(error); 600 } 601 void * 602 rfs_lookup_getfh(struct nfsdiropargs *da) 603 { 604 return (da->da_fhandle); 605 } 606 607 /* 608 * Read symbolic link. 609 * Returns the string in the symbolic link at the given fhandle. 610 */ 611 /* ARGSUSED */ 612 void 613 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 614 struct svc_req *req, cred_t *cr, bool_t ro) 615 { 616 int error; 617 struct iovec iov; 618 struct uio uio; 619 vnode_t *vp; 620 struct vattr va; 621 struct sockaddr *ca; 622 char *name = NULL; 623 int is_referral = 0; 624 625 vp = nfs_fhtovp(fhp, exi); 626 if (vp == NULL) { 627 rl->rl_data = NULL; 628 rl->rl_status = NFSERR_STALE; 629 return; 630 } 631 632 va.va_mask = AT_MODE; 633 634 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 635 636 if (error) { 637 VN_RELE(vp); 638 rl->rl_data = NULL; 639 rl->rl_status = puterrno(error); 640 return; 641 } 642 643 if (MANDLOCK(vp, va.va_mode)) { 644 VN_RELE(vp); 645 rl->rl_data = NULL; 646 rl->rl_status = NFSERR_ACCES; 647 return; 648 } 649 650 /* We lied about the object type for a referral */ 651 if (vn_is_nfs_reparse(vp, cr)) 652 is_referral = 1; 653 654 /* 655 * XNFS and RFC1094 require us to return ENXIO if argument 656 * is not a link. BUGID 1138002. 657 */ 658 if (vp->v_type != VLNK && !is_referral) { 659 VN_RELE(vp); 660 rl->rl_data = NULL; 661 rl->rl_status = NFSERR_NXIO; 662 return; 663 } 664 665 /* 666 * Allocate data for pathname. This will be freed by rfs_rlfree. 667 */ 668 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 669 670 if (is_referral) { 671 char *s; 672 size_t strsz; 673 kstat_named_t *stat = 674 exi->exi_ne->ne_globals->svstat[NFS_VERSION]; 675 676 /* Get an artificial symlink based on a referral */ 677 s = build_symlink(vp, cr, &strsz); 678 stat[NFS_REFERLINKS].value.ui64++; 679 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 680 vnode_t *, vp, char *, s); 681 if (s == NULL) 682 error = EINVAL; 683 else { 684 error = 0; 685 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 686 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 687 kmem_free(s, strsz); 688 } 689 690 } else { 691 692 /* 693 * Set up io vector to read sym link data 694 */ 695 iov.iov_base = rl->rl_data; 696 iov.iov_len = NFS_MAXPATHLEN; 697 uio.uio_iov = &iov; 698 uio.uio_iovcnt = 1; 699 uio.uio_segflg = UIO_SYSSPACE; 700 uio.uio_extflg = UIO_COPY_CACHED; 701 uio.uio_loffset = (offset_t)0; 702 uio.uio_resid = NFS_MAXPATHLEN; 703 704 /* 705 * Do the readlink. 706 */ 707 error = VOP_READLINK(vp, &uio, cr, NULL); 708 709 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 710 711 if (!error) 712 rl->rl_data[rl->rl_count] = '\0'; 713 714 } 715 716 717 VN_RELE(vp); 718 719 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 720 name = nfscmd_convname(ca, exi, rl->rl_data, 721 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 722 723 if (name != NULL && name != rl->rl_data) { 724 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 725 rl->rl_data = name; 726 } 727 728 /* 729 * XNFS and RFC1094 require us to return ENXIO if argument 730 * is not a link. UFS returns EINVAL if this is the case, 731 * so we do the mapping here. BUGID 1138002. 732 */ 733 if (error == EINVAL) 734 rl->rl_status = NFSERR_NXIO; 735 else 736 rl->rl_status = puterrno(error); 737 738 } 739 void * 740 rfs_readlink_getfh(fhandle_t *fhp) 741 { 742 return (fhp); 743 } 744 /* 745 * Free data allocated by rfs_readlink 746 */ 747 void 748 rfs_rlfree(struct nfsrdlnres *rl) 749 { 750 if (rl->rl_data != NULL) 751 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 752 } 753 754 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 755 756 /* 757 * Read data. 758 * Returns some data read from the file at the given fhandle. 759 */ 760 /* ARGSUSED */ 761 void 762 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 763 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 764 { 765 vnode_t *vp; 766 int error; 767 struct vattr va; 768 struct iovec iov; 769 struct uio uio; 770 mblk_t *mp; 771 int alloc_err = 0; 772 int in_crit = 0; 773 caller_context_t ct; 774 775 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 776 if (vp == NULL) { 777 rr->rr_data = NULL; 778 rr->rr_status = NFSERR_STALE; 779 return; 780 } 781 782 if (vp->v_type != VREG) { 783 VN_RELE(vp); 784 rr->rr_data = NULL; 785 rr->rr_status = NFSERR_ISDIR; 786 return; 787 } 788 789 ct.cc_sysid = 0; 790 ct.cc_pid = 0; 791 ct.cc_caller_id = nfs2_srv_caller_id; 792 ct.cc_flags = CC_DONTBLOCK; 793 794 /* 795 * Enter the critical region before calling VOP_RWLOCK 796 * to avoid a deadlock with write requests. 797 */ 798 if (nbl_need_check(vp)) { 799 nbl_start_crit(vp, RW_READER); 800 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 801 0, NULL)) { 802 nbl_end_crit(vp); 803 VN_RELE(vp); 804 rr->rr_data = NULL; 805 rr->rr_status = NFSERR_ACCES; 806 return; 807 } 808 in_crit = 1; 809 } 810 811 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 812 813 /* check if a monitor detected a delegation conflict */ 814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 815 if (in_crit) 816 nbl_end_crit(vp); 817 VN_RELE(vp); 818 /* mark as wouldblock so response is dropped */ 819 curthread->t_flag |= T_WOULDBLOCK; 820 821 rr->rr_data = NULL; 822 return; 823 } 824 825 va.va_mask = AT_ALL; 826 827 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 828 829 if (error) { 830 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 831 if (in_crit) 832 nbl_end_crit(vp); 833 834 VN_RELE(vp); 835 rr->rr_data = NULL; 836 rr->rr_status = puterrno(error); 837 838 return; 839 } 840 841 /* 842 * This is a kludge to allow reading of files created 843 * with no read permission. The owner of the file 844 * is always allowed to read it. 845 */ 846 if (crgetuid(cr) != va.va_uid) { 847 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 848 849 if (error) { 850 /* 851 * Exec is the same as read over the net because 852 * of demand loading. 853 */ 854 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 855 } 856 if (error) { 857 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 858 if (in_crit) 859 nbl_end_crit(vp); 860 VN_RELE(vp); 861 rr->rr_data = NULL; 862 rr->rr_status = puterrno(error); 863 864 return; 865 } 866 } 867 868 if (MANDLOCK(vp, va.va_mode)) { 869 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 870 if (in_crit) 871 nbl_end_crit(vp); 872 873 VN_RELE(vp); 874 rr->rr_data = NULL; 875 rr->rr_status = NFSERR_ACCES; 876 877 return; 878 } 879 880 rr->rr_ok.rrok_wlist_len = 0; 881 rr->rr_ok.rrok_wlist = NULL; 882 883 if ((u_offset_t)ra->ra_offset >= va.va_size) { 884 rr->rr_count = 0; 885 rr->rr_data = NULL; 886 /* 887 * In this case, status is NFS_OK, but there is no data 888 * to encode. So set rr_mp to NULL. 889 */ 890 rr->rr_mp = NULL; 891 rr->rr_ok.rrok_wlist = ra->ra_wlist; 892 if (rr->rr_ok.rrok_wlist) 893 clist_zero_len(rr->rr_ok.rrok_wlist); 894 goto done; 895 } 896 897 if (ra->ra_wlist) { 898 mp = NULL; 899 rr->rr_mp = NULL; 900 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 901 if (ra->ra_count > iov.iov_len) { 902 rr->rr_data = NULL; 903 rr->rr_status = NFSERR_INVAL; 904 goto done; 905 } 906 } else { 907 /* 908 * mp will contain the data to be sent out in the read reply. 909 * This will be freed after the reply has been sent out (by the 910 * driver). 911 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 912 * that the call to xdrmblk_putmblk() never fails. 913 */ 914 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 915 &alloc_err); 916 ASSERT(mp != NULL); 917 ASSERT(alloc_err == 0); 918 919 rr->rr_mp = mp; 920 921 /* 922 * Set up io vector 923 */ 924 iov.iov_base = (caddr_t)mp->b_datap->db_base; 925 iov.iov_len = ra->ra_count; 926 } 927 928 uio.uio_iov = &iov; 929 uio.uio_iovcnt = 1; 930 uio.uio_segflg = UIO_SYSSPACE; 931 uio.uio_extflg = UIO_COPY_CACHED; 932 uio.uio_loffset = (offset_t)ra->ra_offset; 933 uio.uio_resid = ra->ra_count; 934 935 error = VOP_READ(vp, &uio, 0, cr, &ct); 936 937 if (error) { 938 if (mp) 939 freeb(mp); 940 941 /* 942 * check if a monitor detected a delegation conflict and 943 * mark as wouldblock so response is dropped 944 */ 945 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 946 curthread->t_flag |= T_WOULDBLOCK; 947 else 948 rr->rr_status = puterrno(error); 949 950 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 951 if (in_crit) 952 nbl_end_crit(vp); 953 954 VN_RELE(vp); 955 rr->rr_data = NULL; 956 957 return; 958 } 959 960 /* 961 * Get attributes again so we can send the latest access 962 * time to the client side for its cache. 963 */ 964 va.va_mask = AT_ALL; 965 966 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 967 968 if (error) { 969 if (mp) 970 freeb(mp); 971 972 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 973 if (in_crit) 974 nbl_end_crit(vp); 975 976 VN_RELE(vp); 977 rr->rr_data = NULL; 978 rr->rr_status = puterrno(error); 979 980 return; 981 } 982 983 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 984 985 if (mp) { 986 rr->rr_data = (char *)mp->b_datap->db_base; 987 } else { 988 if (ra->ra_wlist) { 989 rr->rr_data = (caddr_t)iov.iov_base; 990 if (!rdma_setup_read_data2(ra, rr)) { 991 rr->rr_data = NULL; 992 rr->rr_status = puterrno(NFSERR_INVAL); 993 } 994 } 995 } 996 done: 997 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 998 if (in_crit) 999 nbl_end_crit(vp); 1000 1001 acl_perm(vp, exi, &va, cr); 1002 1003 /* check for overflows */ 1004 error = vattr_to_nattr(&va, &rr->rr_attr); 1005 1006 VN_RELE(vp); 1007 1008 rr->rr_status = puterrno(error); 1009 } 1010 1011 /* 1012 * Free data allocated by rfs_read 1013 */ 1014 void 1015 rfs_rdfree(struct nfsrdresult *rr) 1016 { 1017 mblk_t *mp; 1018 1019 if (rr->rr_status == NFS_OK) { 1020 mp = rr->rr_mp; 1021 if (mp != NULL) 1022 freeb(mp); 1023 } 1024 } 1025 1026 void * 1027 rfs_read_getfh(struct nfsreadargs *ra) 1028 { 1029 return (&ra->ra_fhandle); 1030 } 1031 1032 #define MAX_IOVECS 12 1033 1034 #ifdef DEBUG 1035 static int rfs_write_sync_hits = 0; 1036 static int rfs_write_sync_misses = 0; 1037 #endif 1038 1039 /* 1040 * Write data to file. 1041 * Returns attributes of a file after writing some data to it. 1042 * 1043 * Any changes made here, especially in error handling might have 1044 * to also be done in rfs_write (which clusters write requests). 1045 */ 1046 /* ARGSUSED */ 1047 void 1048 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 1049 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1050 { 1051 int error; 1052 vnode_t *vp; 1053 rlim64_t rlimit; 1054 struct vattr va; 1055 struct uio uio; 1056 struct iovec iov[MAX_IOVECS]; 1057 mblk_t *m; 1058 struct iovec *iovp; 1059 int iovcnt; 1060 cred_t *savecred; 1061 int in_crit = 0; 1062 caller_context_t ct; 1063 1064 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1065 if (vp == NULL) { 1066 ns->ns_status = NFSERR_STALE; 1067 return; 1068 } 1069 1070 if (rdonly(ro, vp)) { 1071 VN_RELE(vp); 1072 ns->ns_status = NFSERR_ROFS; 1073 return; 1074 } 1075 1076 if (vp->v_type != VREG) { 1077 VN_RELE(vp); 1078 ns->ns_status = NFSERR_ISDIR; 1079 return; 1080 } 1081 1082 ct.cc_sysid = 0; 1083 ct.cc_pid = 0; 1084 ct.cc_caller_id = nfs2_srv_caller_id; 1085 ct.cc_flags = CC_DONTBLOCK; 1086 1087 va.va_mask = AT_UID|AT_MODE; 1088 1089 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1090 1091 if (error) { 1092 VN_RELE(vp); 1093 ns->ns_status = puterrno(error); 1094 1095 return; 1096 } 1097 1098 if (crgetuid(cr) != va.va_uid) { 1099 /* 1100 * This is a kludge to allow writes of files created 1101 * with read only permission. The owner of the file 1102 * is always allowed to write it. 1103 */ 1104 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 1105 1106 if (error) { 1107 VN_RELE(vp); 1108 ns->ns_status = puterrno(error); 1109 return; 1110 } 1111 } 1112 1113 /* 1114 * Can't access a mandatory lock file. This might cause 1115 * the NFS service thread to block forever waiting for a 1116 * lock to be released that will never be released. 1117 */ 1118 if (MANDLOCK(vp, va.va_mode)) { 1119 VN_RELE(vp); 1120 ns->ns_status = NFSERR_ACCES; 1121 return; 1122 } 1123 1124 /* 1125 * We have to enter the critical region before calling VOP_RWLOCK 1126 * to avoid a deadlock with ufs. 1127 */ 1128 if (nbl_need_check(vp)) { 1129 nbl_start_crit(vp, RW_READER); 1130 in_crit = 1; 1131 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1132 wa->wa_count, 0, NULL)) { 1133 error = EACCES; 1134 goto out; 1135 } 1136 } 1137 1138 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1139 1140 /* check if a monitor detected a delegation conflict */ 1141 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1142 goto out; 1143 } 1144 1145 if (wa->wa_data || wa->wa_rlist) { 1146 /* Do the RDMA thing if necessary */ 1147 if (wa->wa_rlist) { 1148 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1149 iov[0].iov_len = wa->wa_count; 1150 } else { 1151 iov[0].iov_base = wa->wa_data; 1152 iov[0].iov_len = wa->wa_count; 1153 } 1154 uio.uio_iov = iov; 1155 uio.uio_iovcnt = 1; 1156 uio.uio_segflg = UIO_SYSSPACE; 1157 uio.uio_extflg = UIO_COPY_DEFAULT; 1158 uio.uio_loffset = (offset_t)wa->wa_offset; 1159 uio.uio_resid = wa->wa_count; 1160 /* 1161 * The limit is checked on the client. We 1162 * should allow any size writes here. 1163 */ 1164 uio.uio_llimit = curproc->p_fsz_ctl; 1165 rlimit = uio.uio_llimit - wa->wa_offset; 1166 if (rlimit < (rlim64_t)uio.uio_resid) 1167 uio.uio_resid = (uint_t)rlimit; 1168 1169 /* 1170 * for now we assume no append mode 1171 */ 1172 /* 1173 * We're changing creds because VM may fault and we need 1174 * the cred of the current thread to be used if quota 1175 * checking is enabled. 1176 */ 1177 savecred = curthread->t_cred; 1178 curthread->t_cred = cr; 1179 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1180 curthread->t_cred = savecred; 1181 } else { 1182 1183 iovcnt = 0; 1184 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1185 iovcnt++; 1186 if (iovcnt <= MAX_IOVECS) { 1187 #ifdef DEBUG 1188 rfs_write_sync_hits++; 1189 #endif 1190 iovp = iov; 1191 } else { 1192 #ifdef DEBUG 1193 rfs_write_sync_misses++; 1194 #endif 1195 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1196 } 1197 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1198 uio.uio_iov = iovp; 1199 uio.uio_iovcnt = iovcnt; 1200 uio.uio_segflg = UIO_SYSSPACE; 1201 uio.uio_extflg = UIO_COPY_DEFAULT; 1202 uio.uio_loffset = (offset_t)wa->wa_offset; 1203 uio.uio_resid = wa->wa_count; 1204 /* 1205 * The limit is checked on the client. We 1206 * should allow any size writes here. 1207 */ 1208 uio.uio_llimit = curproc->p_fsz_ctl; 1209 rlimit = uio.uio_llimit - wa->wa_offset; 1210 if (rlimit < (rlim64_t)uio.uio_resid) 1211 uio.uio_resid = (uint_t)rlimit; 1212 1213 /* 1214 * For now we assume no append mode. 1215 */ 1216 /* 1217 * We're changing creds because VM may fault and we need 1218 * the cred of the current thread to be used if quota 1219 * checking is enabled. 1220 */ 1221 savecred = curthread->t_cred; 1222 curthread->t_cred = cr; 1223 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1224 curthread->t_cred = savecred; 1225 1226 if (iovp != iov) 1227 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1228 } 1229 1230 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1231 1232 if (!error) { 1233 /* 1234 * Get attributes again so we send the latest mod 1235 * time to the client side for its cache. 1236 */ 1237 va.va_mask = AT_ALL; /* now we want everything */ 1238 1239 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1240 1241 /* check for overflows */ 1242 if (!error) { 1243 acl_perm(vp, exi, &va, cr); 1244 error = vattr_to_nattr(&va, &ns->ns_attr); 1245 } 1246 } 1247 1248 out: 1249 if (in_crit) 1250 nbl_end_crit(vp); 1251 VN_RELE(vp); 1252 1253 /* check if a monitor detected a delegation conflict */ 1254 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1255 /* mark as wouldblock so response is dropped */ 1256 curthread->t_flag |= T_WOULDBLOCK; 1257 else 1258 ns->ns_status = puterrno(error); 1259 1260 } 1261 1262 struct rfs_async_write { 1263 struct nfswriteargs *wa; 1264 struct nfsattrstat *ns; 1265 struct svc_req *req; 1266 cred_t *cr; 1267 bool_t ro; 1268 kthread_t *thread; 1269 struct rfs_async_write *list; 1270 }; 1271 1272 struct rfs_async_write_list { 1273 fhandle_t *fhp; 1274 kcondvar_t cv; 1275 struct rfs_async_write *list; 1276 struct rfs_async_write_list *next; 1277 }; 1278 1279 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1280 static kmutex_t rfs_async_write_lock; 1281 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1282 1283 #define MAXCLIOVECS 42 1284 #define RFSWRITE_INITVAL (enum nfsstat) -1 1285 1286 #ifdef DEBUG 1287 static int rfs_write_hits = 0; 1288 static int rfs_write_misses = 0; 1289 #endif 1290 1291 /* 1292 * Write data to file. 1293 * Returns attributes of a file after writing some data to it. 1294 */ 1295 void 1296 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1297 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1298 { 1299 int error; 1300 vnode_t *vp; 1301 rlim64_t rlimit; 1302 struct vattr va; 1303 struct uio uio; 1304 struct rfs_async_write_list *lp; 1305 struct rfs_async_write_list *nlp; 1306 struct rfs_async_write *rp; 1307 struct rfs_async_write *nrp; 1308 struct rfs_async_write *trp; 1309 struct rfs_async_write *lrp; 1310 int data_written; 1311 int iovcnt; 1312 mblk_t *m; 1313 struct iovec *iovp; 1314 struct iovec *niovp; 1315 struct iovec iov[MAXCLIOVECS]; 1316 int count; 1317 int rcount; 1318 uint_t off; 1319 uint_t len; 1320 struct rfs_async_write nrpsp; 1321 struct rfs_async_write_list nlpsp; 1322 ushort_t t_flag; 1323 cred_t *savecred; 1324 int in_crit = 0; 1325 caller_context_t ct; 1326 nfs_srv_t *nsrv; 1327 1328 ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id); 1329 nsrv = nfs_get_srv(); 1330 if (!nsrv->write_async) { 1331 rfs_write_sync(wa, ns, exi, req, cr, ro); 1332 return; 1333 } 1334 1335 /* 1336 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1337 * is considered an OK. 1338 */ 1339 ns->ns_status = RFSWRITE_INITVAL; 1340 1341 nrp = &nrpsp; 1342 nrp->wa = wa; 1343 nrp->ns = ns; 1344 nrp->req = req; 1345 nrp->cr = cr; 1346 nrp->ro = ro; 1347 nrp->thread = curthread; 1348 1349 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 1350 1351 /* 1352 * Look to see if there is already a cluster started 1353 * for this file. 1354 */ 1355 mutex_enter(&nsrv->async_write_lock); 1356 for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) { 1357 if (bcmp(&wa->wa_fhandle, lp->fhp, 1358 sizeof (fhandle_t)) == 0) 1359 break; 1360 } 1361 1362 /* 1363 * If lp is non-NULL, then there is already a cluster 1364 * started. We need to place ourselves in the cluster 1365 * list in the right place as determined by starting 1366 * offset. Conflicts with non-blocking mandatory locked 1367 * regions will be checked when the cluster is processed. 1368 */ 1369 if (lp != NULL) { 1370 rp = lp->list; 1371 trp = NULL; 1372 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1373 trp = rp; 1374 rp = rp->list; 1375 } 1376 nrp->list = rp; 1377 if (trp == NULL) 1378 lp->list = nrp; 1379 else 1380 trp->list = nrp; 1381 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1382 cv_wait(&lp->cv, &nsrv->async_write_lock); 1383 mutex_exit(&nsrv->async_write_lock); 1384 1385 return; 1386 } 1387 1388 /* 1389 * No cluster started yet, start one and add ourselves 1390 * to the list of clusters. 1391 */ 1392 nrp->list = NULL; 1393 1394 nlp = &nlpsp; 1395 nlp->fhp = &wa->wa_fhandle; 1396 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1397 nlp->list = nrp; 1398 nlp->next = NULL; 1399 1400 if (nsrv->async_write_head == NULL) { 1401 nsrv->async_write_head = nlp; 1402 } else { 1403 lp = nsrv->async_write_head; 1404 while (lp->next != NULL) 1405 lp = lp->next; 1406 lp->next = nlp; 1407 } 1408 mutex_exit(&nsrv->async_write_lock); 1409 1410 /* 1411 * Convert the file handle common to all of the requests 1412 * in this cluster to a vnode. 1413 */ 1414 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1415 if (vp == NULL) { 1416 mutex_enter(&nsrv->async_write_lock); 1417 if (nsrv->async_write_head == nlp) 1418 nsrv->async_write_head = nlp->next; 1419 else { 1420 lp = nsrv->async_write_head; 1421 while (lp->next != nlp) 1422 lp = lp->next; 1423 lp->next = nlp->next; 1424 } 1425 t_flag = curthread->t_flag & T_WOULDBLOCK; 1426 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1427 rp->ns->ns_status = NFSERR_STALE; 1428 rp->thread->t_flag |= t_flag; 1429 } 1430 cv_broadcast(&nlp->cv); 1431 mutex_exit(&nsrv->async_write_lock); 1432 1433 return; 1434 } 1435 1436 /* 1437 * Can only write regular files. Attempts to write any 1438 * other file types fail with EISDIR. 1439 */ 1440 if (vp->v_type != VREG) { 1441 VN_RELE(vp); 1442 mutex_enter(&nsrv->async_write_lock); 1443 if (nsrv->async_write_head == nlp) 1444 nsrv->async_write_head = nlp->next; 1445 else { 1446 lp = nsrv->async_write_head; 1447 while (lp->next != nlp) 1448 lp = lp->next; 1449 lp->next = nlp->next; 1450 } 1451 t_flag = curthread->t_flag & T_WOULDBLOCK; 1452 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1453 rp->ns->ns_status = NFSERR_ISDIR; 1454 rp->thread->t_flag |= t_flag; 1455 } 1456 cv_broadcast(&nlp->cv); 1457 mutex_exit(&nsrv->async_write_lock); 1458 1459 return; 1460 } 1461 1462 /* 1463 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1464 * deadlock with ufs. 1465 */ 1466 if (nbl_need_check(vp)) { 1467 nbl_start_crit(vp, RW_READER); 1468 in_crit = 1; 1469 } 1470 1471 ct.cc_sysid = 0; 1472 ct.cc_pid = 0; 1473 ct.cc_caller_id = nfs2_srv_caller_id; 1474 ct.cc_flags = CC_DONTBLOCK; 1475 1476 /* 1477 * Lock the file for writing. This operation provides 1478 * the delay which allows clusters to grow. 1479 */ 1480 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1481 1482 /* check if a monitor detected a delegation conflict */ 1483 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1484 if (in_crit) 1485 nbl_end_crit(vp); 1486 VN_RELE(vp); 1487 /* mark as wouldblock so response is dropped */ 1488 curthread->t_flag |= T_WOULDBLOCK; 1489 mutex_enter(&nsrv->async_write_lock); 1490 if (nsrv->async_write_head == nlp) 1491 nsrv->async_write_head = nlp->next; 1492 else { 1493 lp = nsrv->async_write_head; 1494 while (lp->next != nlp) 1495 lp = lp->next; 1496 lp->next = nlp->next; 1497 } 1498 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1499 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1500 rp->ns->ns_status = puterrno(error); 1501 rp->thread->t_flag |= T_WOULDBLOCK; 1502 } 1503 } 1504 cv_broadcast(&nlp->cv); 1505 mutex_exit(&nsrv->async_write_lock); 1506 1507 return; 1508 } 1509 1510 /* 1511 * Disconnect this cluster from the list of clusters. 1512 * The cluster that is being dealt with must be fixed 1513 * in size after this point, so there is no reason 1514 * to leave it on the list so that new requests can 1515 * find it. 1516 * 1517 * The algorithm is that the first write request will 1518 * create a cluster, convert the file handle to a 1519 * vnode pointer, and then lock the file for writing. 1520 * This request is not likely to be clustered with 1521 * any others. However, the next request will create 1522 * a new cluster and be blocked in VOP_RWLOCK while 1523 * the first request is being processed. This delay 1524 * will allow more requests to be clustered in this 1525 * second cluster. 1526 */ 1527 mutex_enter(&nsrv->async_write_lock); 1528 if (nsrv->async_write_head == nlp) 1529 nsrv->async_write_head = nlp->next; 1530 else { 1531 lp = nsrv->async_write_head; 1532 while (lp->next != nlp) 1533 lp = lp->next; 1534 lp->next = nlp->next; 1535 } 1536 mutex_exit(&nsrv->async_write_lock); 1537 1538 /* 1539 * Step through the list of requests in this cluster. 1540 * We need to check permissions to make sure that all 1541 * of the requests have sufficient permission to write 1542 * the file. A cluster can be composed of requests 1543 * from different clients and different users on each 1544 * client. 1545 * 1546 * As a side effect, we also calculate the size of the 1547 * byte range that this cluster encompasses. 1548 */ 1549 rp = nlp->list; 1550 off = rp->wa->wa_offset; 1551 len = (uint_t)0; 1552 do { 1553 if (rdonly(rp->ro, vp)) { 1554 rp->ns->ns_status = NFSERR_ROFS; 1555 t_flag = curthread->t_flag & T_WOULDBLOCK; 1556 rp->thread->t_flag |= t_flag; 1557 continue; 1558 } 1559 1560 va.va_mask = AT_UID|AT_MODE; 1561 1562 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1563 1564 if (!error) { 1565 if (crgetuid(rp->cr) != va.va_uid) { 1566 /* 1567 * This is a kludge to allow writes of files 1568 * created with read only permission. The 1569 * owner of the file is always allowed to 1570 * write it. 1571 */ 1572 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1573 } 1574 if (!error && MANDLOCK(vp, va.va_mode)) 1575 error = EACCES; 1576 } 1577 1578 /* 1579 * Check for a conflict with a nbmand-locked region. 1580 */ 1581 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1582 rp->wa->wa_count, 0, NULL)) { 1583 error = EACCES; 1584 } 1585 1586 if (error) { 1587 rp->ns->ns_status = puterrno(error); 1588 t_flag = curthread->t_flag & T_WOULDBLOCK; 1589 rp->thread->t_flag |= t_flag; 1590 continue; 1591 } 1592 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1593 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1594 } while ((rp = rp->list) != NULL); 1595 1596 /* 1597 * Step through the cluster attempting to gather as many 1598 * requests which are contiguous as possible. These 1599 * contiguous requests are handled via one call to VOP_WRITE 1600 * instead of different calls to VOP_WRITE. We also keep 1601 * track of the fact that any data was written. 1602 */ 1603 rp = nlp->list; 1604 data_written = 0; 1605 do { 1606 /* 1607 * Skip any requests which are already marked as having an 1608 * error. 1609 */ 1610 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1611 rp = rp->list; 1612 continue; 1613 } 1614 1615 /* 1616 * Count the number of iovec's which are required 1617 * to handle this set of requests. One iovec is 1618 * needed for each data buffer, whether addressed 1619 * by wa_data or by the b_rptr pointers in the 1620 * mblk chains. 1621 */ 1622 iovcnt = 0; 1623 lrp = rp; 1624 for (;;) { 1625 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1626 iovcnt++; 1627 else { 1628 m = lrp->wa->wa_mblk; 1629 while (m != NULL) { 1630 iovcnt++; 1631 m = m->b_cont; 1632 } 1633 } 1634 if (lrp->list == NULL || 1635 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1636 lrp->wa->wa_offset + lrp->wa->wa_count != 1637 lrp->list->wa->wa_offset) { 1638 lrp = lrp->list; 1639 break; 1640 } 1641 lrp = lrp->list; 1642 } 1643 1644 if (iovcnt <= MAXCLIOVECS) { 1645 #ifdef DEBUG 1646 rfs_write_hits++; 1647 #endif 1648 niovp = iov; 1649 } else { 1650 #ifdef DEBUG 1651 rfs_write_misses++; 1652 #endif 1653 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1654 } 1655 /* 1656 * Put together the scatter/gather iovecs. 1657 */ 1658 iovp = niovp; 1659 trp = rp; 1660 count = 0; 1661 do { 1662 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1663 if (trp->wa->wa_rlist) { 1664 iovp->iov_base = 1665 (char *)((trp->wa->wa_rlist)-> 1666 u.c_daddr3); 1667 iovp->iov_len = trp->wa->wa_count; 1668 } else { 1669 iovp->iov_base = trp->wa->wa_data; 1670 iovp->iov_len = trp->wa->wa_count; 1671 } 1672 iovp++; 1673 } else { 1674 m = trp->wa->wa_mblk; 1675 rcount = trp->wa->wa_count; 1676 while (m != NULL) { 1677 iovp->iov_base = (caddr_t)m->b_rptr; 1678 iovp->iov_len = (m->b_wptr - m->b_rptr); 1679 rcount -= iovp->iov_len; 1680 if (rcount < 0) 1681 iovp->iov_len += rcount; 1682 iovp++; 1683 if (rcount <= 0) 1684 break; 1685 m = m->b_cont; 1686 } 1687 } 1688 count += trp->wa->wa_count; 1689 trp = trp->list; 1690 } while (trp != lrp); 1691 1692 uio.uio_iov = niovp; 1693 uio.uio_iovcnt = iovcnt; 1694 uio.uio_segflg = UIO_SYSSPACE; 1695 uio.uio_extflg = UIO_COPY_DEFAULT; 1696 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1697 uio.uio_resid = count; 1698 /* 1699 * The limit is checked on the client. We 1700 * should allow any size writes here. 1701 */ 1702 uio.uio_llimit = curproc->p_fsz_ctl; 1703 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1704 if (rlimit < (rlim64_t)uio.uio_resid) 1705 uio.uio_resid = (uint_t)rlimit; 1706 1707 /* 1708 * For now we assume no append mode. 1709 */ 1710 1711 /* 1712 * We're changing creds because VM may fault 1713 * and we need the cred of the current 1714 * thread to be used if quota * checking is 1715 * enabled. 1716 */ 1717 savecred = curthread->t_cred; 1718 curthread->t_cred = cr; 1719 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1720 curthread->t_cred = savecred; 1721 1722 /* check if a monitor detected a delegation conflict */ 1723 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1724 /* mark as wouldblock so response is dropped */ 1725 curthread->t_flag |= T_WOULDBLOCK; 1726 1727 if (niovp != iov) 1728 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1729 1730 if (!error) { 1731 data_written = 1; 1732 /* 1733 * Get attributes again so we send the latest mod 1734 * time to the client side for its cache. 1735 */ 1736 va.va_mask = AT_ALL; /* now we want everything */ 1737 1738 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1739 1740 if (!error) 1741 acl_perm(vp, exi, &va, rp->cr); 1742 } 1743 1744 /* 1745 * Fill in the status responses for each request 1746 * which was just handled. Also, copy the latest 1747 * attributes in to the attribute responses if 1748 * appropriate. 1749 */ 1750 t_flag = curthread->t_flag & T_WOULDBLOCK; 1751 do { 1752 rp->thread->t_flag |= t_flag; 1753 /* check for overflows */ 1754 if (!error) { 1755 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1756 } 1757 rp->ns->ns_status = puterrno(error); 1758 rp = rp->list; 1759 } while (rp != lrp); 1760 } while (rp != NULL); 1761 1762 /* 1763 * If any data was written at all, then we need to flush 1764 * the data and metadata to stable storage. 1765 */ 1766 if (data_written) { 1767 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1768 1769 if (!error) { 1770 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1771 } 1772 } 1773 1774 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1775 1776 if (in_crit) 1777 nbl_end_crit(vp); 1778 VN_RELE(vp); 1779 1780 t_flag = curthread->t_flag & T_WOULDBLOCK; 1781 mutex_enter(&nsrv->async_write_lock); 1782 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1783 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1784 rp->ns->ns_status = puterrno(error); 1785 rp->thread->t_flag |= t_flag; 1786 } 1787 } 1788 cv_broadcast(&nlp->cv); 1789 mutex_exit(&nsrv->async_write_lock); 1790 1791 } 1792 1793 void * 1794 rfs_write_getfh(struct nfswriteargs *wa) 1795 { 1796 return (&wa->wa_fhandle); 1797 } 1798 1799 /* 1800 * Create a file. 1801 * Creates a file with given attributes and returns those attributes 1802 * and an fhandle for the new file. 1803 */ 1804 void 1805 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1806 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1807 { 1808 int error; 1809 int lookuperr; 1810 int in_crit = 0; 1811 struct vattr va; 1812 vnode_t *vp; 1813 vnode_t *realvp; 1814 vnode_t *dvp; 1815 char *name = args->ca_da.da_name; 1816 vnode_t *tvp = NULL; 1817 int mode; 1818 int lookup_ok; 1819 bool_t trunc; 1820 struct sockaddr *ca; 1821 1822 /* 1823 * Disallow NULL paths 1824 */ 1825 if (name == NULL || *name == '\0') { 1826 dr->dr_status = NFSERR_ACCES; 1827 return; 1828 } 1829 1830 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1831 if (dvp == NULL) { 1832 dr->dr_status = NFSERR_STALE; 1833 return; 1834 } 1835 1836 error = sattr_to_vattr(args->ca_sa, &va); 1837 if (error) { 1838 dr->dr_status = puterrno(error); 1839 return; 1840 } 1841 1842 /* 1843 * Must specify the mode. 1844 */ 1845 if (!(va.va_mask & AT_MODE)) { 1846 VN_RELE(dvp); 1847 dr->dr_status = NFSERR_INVAL; 1848 return; 1849 } 1850 1851 /* 1852 * This is a completely gross hack to make mknod 1853 * work over the wire until we can wack the protocol 1854 */ 1855 if ((va.va_mode & IFMT) == IFCHR) { 1856 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1857 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1858 else { 1859 va.va_type = VCHR; 1860 /* 1861 * uncompress the received dev_t 1862 * if the top half is zero indicating a request 1863 * from an `older style' OS. 1864 */ 1865 if ((va.va_size & 0xffff0000) == 0) 1866 va.va_rdev = nfsv2_expdev(va.va_size); 1867 else 1868 va.va_rdev = (dev_t)va.va_size; 1869 } 1870 va.va_mask &= ~AT_SIZE; 1871 } else if ((va.va_mode & IFMT) == IFBLK) { 1872 va.va_type = VBLK; 1873 /* 1874 * uncompress the received dev_t 1875 * if the top half is zero indicating a request 1876 * from an `older style' OS. 1877 */ 1878 if ((va.va_size & 0xffff0000) == 0) 1879 va.va_rdev = nfsv2_expdev(va.va_size); 1880 else 1881 va.va_rdev = (dev_t)va.va_size; 1882 va.va_mask &= ~AT_SIZE; 1883 } else if ((va.va_mode & IFMT) == IFSOCK) { 1884 va.va_type = VSOCK; 1885 } else { 1886 va.va_type = VREG; 1887 } 1888 va.va_mode &= ~IFMT; 1889 va.va_mask |= AT_TYPE; 1890 1891 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1892 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1893 MAXPATHLEN); 1894 if (name == NULL) { 1895 dr->dr_status = puterrno(EINVAL); 1896 return; 1897 } 1898 1899 /* 1900 * Why was the choice made to use VWRITE as the mode to the 1901 * call to VOP_CREATE ? This results in a bug. When a client 1902 * opens a file that already exists and is RDONLY, the second 1903 * open fails with an EACESS because of the mode. 1904 * bug ID 1054648. 1905 */ 1906 lookup_ok = 0; 1907 mode = VWRITE; 1908 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1909 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1910 NULL, NULL, NULL); 1911 if (!error) { 1912 struct vattr at; 1913 1914 lookup_ok = 1; 1915 at.va_mask = AT_MODE; 1916 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1917 if (!error) 1918 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1919 VN_RELE(tvp); 1920 tvp = NULL; 1921 } 1922 } 1923 1924 if (!lookup_ok) { 1925 if (rdonly(ro, dvp)) { 1926 error = EROFS; 1927 } else if (va.va_type != VREG && va.va_type != VFIFO && 1928 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1929 error = EPERM; 1930 } else { 1931 error = 0; 1932 } 1933 } 1934 1935 /* 1936 * If file size is being modified on an already existing file 1937 * make sure that there are no conflicting non-blocking mandatory 1938 * locks in the region being manipulated. Return EACCES if there 1939 * are conflicting locks. 1940 */ 1941 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1942 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1943 NULL, NULL, NULL); 1944 1945 if (!lookuperr && 1946 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1947 VN_RELE(tvp); 1948 curthread->t_flag |= T_WOULDBLOCK; 1949 goto out; 1950 } 1951 1952 if (!lookuperr && nbl_need_check(tvp)) { 1953 /* 1954 * The file exists. Now check if it has any 1955 * conflicting non-blocking mandatory locks 1956 * in the region being changed. 1957 */ 1958 struct vattr bva; 1959 u_offset_t offset; 1960 ssize_t length; 1961 1962 nbl_start_crit(tvp, RW_READER); 1963 in_crit = 1; 1964 1965 bva.va_mask = AT_SIZE; 1966 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1967 if (!error) { 1968 if (va.va_size < bva.va_size) { 1969 offset = va.va_size; 1970 length = bva.va_size - va.va_size; 1971 } else { 1972 offset = bva.va_size; 1973 length = va.va_size - bva.va_size; 1974 } 1975 if (length) { 1976 if (nbl_conflict(tvp, NBL_WRITE, 1977 offset, length, 0, NULL)) { 1978 error = EACCES; 1979 } 1980 } 1981 } 1982 if (error) { 1983 nbl_end_crit(tvp); 1984 VN_RELE(tvp); 1985 in_crit = 0; 1986 } 1987 } else if (tvp != NULL) { 1988 VN_RELE(tvp); 1989 } 1990 } 1991 1992 if (!error) { 1993 /* 1994 * If filesystem is shared with nosuid the remove any 1995 * setuid/setgid bits on create. 1996 */ 1997 if (va.va_type == VREG && 1998 exi->exi_export.ex_flags & EX_NOSUID) 1999 va.va_mode &= ~(VSUID | VSGID); 2000 2001 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 2002 NULL, NULL); 2003 2004 if (!error) { 2005 2006 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 2007 trunc = TRUE; 2008 else 2009 trunc = FALSE; 2010 2011 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 2012 VN_RELE(vp); 2013 curthread->t_flag |= T_WOULDBLOCK; 2014 goto out; 2015 } 2016 va.va_mask = AT_ALL; 2017 2018 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 2019 2020 /* check for overflows */ 2021 if (!error) { 2022 acl_perm(vp, exi, &va, cr); 2023 error = vattr_to_nattr(&va, &dr->dr_attr); 2024 if (!error) { 2025 error = makefh(&dr->dr_fhandle, vp, 2026 exi); 2027 } 2028 } 2029 /* 2030 * Force modified metadata out to stable storage. 2031 * 2032 * if a underlying vp exists, pass it to VOP_FSYNC 2033 */ 2034 if (VOP_REALVP(vp, &realvp, NULL) == 0) 2035 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 2036 else 2037 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2038 VN_RELE(vp); 2039 } 2040 2041 if (in_crit) { 2042 nbl_end_crit(tvp); 2043 VN_RELE(tvp); 2044 } 2045 } 2046 2047 /* 2048 * Force modified data and metadata out to stable storage. 2049 */ 2050 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2051 2052 out: 2053 2054 VN_RELE(dvp); 2055 2056 dr->dr_status = puterrno(error); 2057 2058 if (name != args->ca_da.da_name) 2059 kmem_free(name, MAXPATHLEN); 2060 } 2061 void * 2062 rfs_create_getfh(struct nfscreatargs *args) 2063 { 2064 return (args->ca_da.da_fhandle); 2065 } 2066 2067 /* 2068 * Remove a file. 2069 * Remove named file from parent directory. 2070 */ 2071 /* ARGSUSED */ 2072 void 2073 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 2074 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2075 { 2076 int error = 0; 2077 vnode_t *vp; 2078 vnode_t *targvp; 2079 int in_crit = 0; 2080 2081 /* 2082 * Disallow NULL paths 2083 */ 2084 if (da->da_name == NULL || *da->da_name == '\0') { 2085 *status = NFSERR_ACCES; 2086 return; 2087 } 2088 2089 vp = nfs_fhtovp(da->da_fhandle, exi); 2090 if (vp == NULL) { 2091 *status = NFSERR_STALE; 2092 return; 2093 } 2094 2095 if (rdonly(ro, vp)) { 2096 VN_RELE(vp); 2097 *status = NFSERR_ROFS; 2098 return; 2099 } 2100 2101 /* 2102 * Check for a conflict with a non-blocking mandatory share reservation. 2103 */ 2104 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 2105 NULL, cr, NULL, NULL, NULL); 2106 if (error != 0) { 2107 VN_RELE(vp); 2108 *status = puterrno(error); 2109 return; 2110 } 2111 2112 /* 2113 * If the file is delegated to an v4 client, then initiate 2114 * recall and drop this request (by setting T_WOULDBLOCK). 2115 * The client will eventually re-transmit the request and 2116 * (hopefully), by then, the v4 client will have returned 2117 * the delegation. 2118 */ 2119 2120 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2121 VN_RELE(vp); 2122 VN_RELE(targvp); 2123 curthread->t_flag |= T_WOULDBLOCK; 2124 return; 2125 } 2126 2127 if (nbl_need_check(targvp)) { 2128 nbl_start_crit(targvp, RW_READER); 2129 in_crit = 1; 2130 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 2131 error = EACCES; 2132 goto out; 2133 } 2134 } 2135 2136 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2137 2138 /* 2139 * Force modified data and metadata out to stable storage. 2140 */ 2141 (void) VOP_FSYNC(vp, 0, cr, NULL); 2142 2143 out: 2144 if (in_crit) 2145 nbl_end_crit(targvp); 2146 VN_RELE(targvp); 2147 VN_RELE(vp); 2148 2149 *status = puterrno(error); 2150 2151 } 2152 2153 void * 2154 rfs_remove_getfh(struct nfsdiropargs *da) 2155 { 2156 return (da->da_fhandle); 2157 } 2158 2159 /* 2160 * rename a file 2161 * Give a file (from) a new name (to). 2162 */ 2163 /* ARGSUSED */ 2164 void 2165 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2166 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2167 { 2168 int error = 0; 2169 vnode_t *fromvp; 2170 vnode_t *tovp; 2171 struct exportinfo *to_exi; 2172 fhandle_t *fh; 2173 vnode_t *srcvp; 2174 vnode_t *targvp; 2175 int in_crit = 0; 2176 2177 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2178 if (fromvp == NULL) { 2179 *status = NFSERR_STALE; 2180 return; 2181 } 2182 2183 fh = args->rna_to.da_fhandle; 2184 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2185 if (to_exi == NULL) { 2186 VN_RELE(fromvp); 2187 *status = NFSERR_ACCES; 2188 return; 2189 } 2190 exi_rele(to_exi); 2191 2192 if (to_exi != exi) { 2193 VN_RELE(fromvp); 2194 *status = NFSERR_XDEV; 2195 return; 2196 } 2197 2198 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2199 if (tovp == NULL) { 2200 VN_RELE(fromvp); 2201 *status = NFSERR_STALE; 2202 return; 2203 } 2204 2205 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2206 VN_RELE(tovp); 2207 VN_RELE(fromvp); 2208 *status = NFSERR_NOTDIR; 2209 return; 2210 } 2211 2212 /* 2213 * Disallow NULL paths 2214 */ 2215 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2216 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2217 VN_RELE(tovp); 2218 VN_RELE(fromvp); 2219 *status = NFSERR_ACCES; 2220 return; 2221 } 2222 2223 if (rdonly(ro, tovp)) { 2224 VN_RELE(tovp); 2225 VN_RELE(fromvp); 2226 *status = NFSERR_ROFS; 2227 return; 2228 } 2229 2230 /* 2231 * Check for a conflict with a non-blocking mandatory share reservation. 2232 */ 2233 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2234 NULL, cr, NULL, NULL, NULL); 2235 if (error != 0) { 2236 VN_RELE(tovp); 2237 VN_RELE(fromvp); 2238 *status = puterrno(error); 2239 return; 2240 } 2241 2242 /* Check for delegations on the source file */ 2243 2244 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2245 VN_RELE(tovp); 2246 VN_RELE(fromvp); 2247 VN_RELE(srcvp); 2248 curthread->t_flag |= T_WOULDBLOCK; 2249 return; 2250 } 2251 2252 /* Check for delegation on the file being renamed over, if it exists */ 2253 2254 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && 2255 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2256 NULL, NULL, NULL) == 0) { 2257 2258 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2259 VN_RELE(tovp); 2260 VN_RELE(fromvp); 2261 VN_RELE(srcvp); 2262 VN_RELE(targvp); 2263 curthread->t_flag |= T_WOULDBLOCK; 2264 return; 2265 } 2266 VN_RELE(targvp); 2267 } 2268 2269 2270 if (nbl_need_check(srcvp)) { 2271 nbl_start_crit(srcvp, RW_READER); 2272 in_crit = 1; 2273 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2274 error = EACCES; 2275 goto out; 2276 } 2277 } 2278 2279 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2280 tovp, args->rna_to.da_name, cr, NULL, 0); 2281 2282 if (error == 0) 2283 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2284 strlen(args->rna_to.da_name)); 2285 2286 /* 2287 * Force modified data and metadata out to stable storage. 2288 */ 2289 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2290 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2291 2292 out: 2293 if (in_crit) 2294 nbl_end_crit(srcvp); 2295 VN_RELE(srcvp); 2296 VN_RELE(tovp); 2297 VN_RELE(fromvp); 2298 2299 *status = puterrno(error); 2300 2301 } 2302 void * 2303 rfs_rename_getfh(struct nfsrnmargs *args) 2304 { 2305 return (args->rna_from.da_fhandle); 2306 } 2307 2308 /* 2309 * Link to a file. 2310 * Create a file (to) which is a hard link to the given file (from). 2311 */ 2312 /* ARGSUSED */ 2313 void 2314 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2315 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2316 { 2317 int error; 2318 vnode_t *fromvp; 2319 vnode_t *tovp; 2320 struct exportinfo *to_exi; 2321 fhandle_t *fh; 2322 2323 fromvp = nfs_fhtovp(args->la_from, exi); 2324 if (fromvp == NULL) { 2325 *status = NFSERR_STALE; 2326 return; 2327 } 2328 2329 fh = args->la_to.da_fhandle; 2330 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2331 if (to_exi == NULL) { 2332 VN_RELE(fromvp); 2333 *status = NFSERR_ACCES; 2334 return; 2335 } 2336 exi_rele(to_exi); 2337 2338 if (to_exi != exi) { 2339 VN_RELE(fromvp); 2340 *status = NFSERR_XDEV; 2341 return; 2342 } 2343 2344 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2345 if (tovp == NULL) { 2346 VN_RELE(fromvp); 2347 *status = NFSERR_STALE; 2348 return; 2349 } 2350 2351 if (tovp->v_type != VDIR) { 2352 VN_RELE(tovp); 2353 VN_RELE(fromvp); 2354 *status = NFSERR_NOTDIR; 2355 return; 2356 } 2357 /* 2358 * Disallow NULL paths 2359 */ 2360 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2361 VN_RELE(tovp); 2362 VN_RELE(fromvp); 2363 *status = NFSERR_ACCES; 2364 return; 2365 } 2366 2367 if (rdonly(ro, tovp)) { 2368 VN_RELE(tovp); 2369 VN_RELE(fromvp); 2370 *status = NFSERR_ROFS; 2371 return; 2372 } 2373 2374 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2375 2376 /* 2377 * Force modified data and metadata out to stable storage. 2378 */ 2379 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2380 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2381 2382 VN_RELE(tovp); 2383 VN_RELE(fromvp); 2384 2385 *status = puterrno(error); 2386 2387 } 2388 void * 2389 rfs_link_getfh(struct nfslinkargs *args) 2390 { 2391 return (args->la_from); 2392 } 2393 2394 /* 2395 * Symbolicly link to a file. 2396 * Create a file (to) with the given attributes which is a symbolic link 2397 * to the given path name (to). 2398 */ 2399 void 2400 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2401 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2402 { 2403 int error; 2404 struct vattr va; 2405 vnode_t *vp; 2406 vnode_t *svp; 2407 int lerror; 2408 struct sockaddr *ca; 2409 char *name = NULL; 2410 2411 /* 2412 * Disallow NULL paths 2413 */ 2414 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2415 *status = NFSERR_ACCES; 2416 return; 2417 } 2418 2419 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2420 if (vp == NULL) { 2421 *status = NFSERR_STALE; 2422 return; 2423 } 2424 2425 if (rdonly(ro, vp)) { 2426 VN_RELE(vp); 2427 *status = NFSERR_ROFS; 2428 return; 2429 } 2430 2431 error = sattr_to_vattr(args->sla_sa, &va); 2432 if (error) { 2433 VN_RELE(vp); 2434 *status = puterrno(error); 2435 return; 2436 } 2437 2438 if (!(va.va_mask & AT_MODE)) { 2439 VN_RELE(vp); 2440 *status = NFSERR_INVAL; 2441 return; 2442 } 2443 2444 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2445 name = nfscmd_convname(ca, exi, args->sla_tnm, 2446 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2447 2448 if (name == NULL) { 2449 *status = NFSERR_ACCES; 2450 return; 2451 } 2452 2453 va.va_type = VLNK; 2454 va.va_mask |= AT_TYPE; 2455 2456 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2457 2458 /* 2459 * Force new data and metadata out to stable storage. 2460 */ 2461 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2462 NULL, cr, NULL, NULL, NULL); 2463 2464 if (!lerror) { 2465 (void) VOP_FSYNC(svp, 0, cr, NULL); 2466 VN_RELE(svp); 2467 } 2468 2469 /* 2470 * Force modified data and metadata out to stable storage. 2471 */ 2472 (void) VOP_FSYNC(vp, 0, cr, NULL); 2473 2474 VN_RELE(vp); 2475 2476 *status = puterrno(error); 2477 if (name != args->sla_tnm) 2478 kmem_free(name, MAXPATHLEN); 2479 2480 } 2481 void * 2482 rfs_symlink_getfh(struct nfsslargs *args) 2483 { 2484 return (args->sla_from.da_fhandle); 2485 } 2486 2487 /* 2488 * Make a directory. 2489 * Create a directory with the given name, parent directory, and attributes. 2490 * Returns a file handle and attributes for the new directory. 2491 */ 2492 /* ARGSUSED */ 2493 void 2494 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2495 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2496 { 2497 int error; 2498 struct vattr va; 2499 vnode_t *dvp = NULL; 2500 vnode_t *vp; 2501 char *name = args->ca_da.da_name; 2502 2503 /* 2504 * Disallow NULL paths 2505 */ 2506 if (name == NULL || *name == '\0') { 2507 dr->dr_status = NFSERR_ACCES; 2508 return; 2509 } 2510 2511 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2512 if (vp == NULL) { 2513 dr->dr_status = NFSERR_STALE; 2514 return; 2515 } 2516 2517 if (rdonly(ro, vp)) { 2518 VN_RELE(vp); 2519 dr->dr_status = NFSERR_ROFS; 2520 return; 2521 } 2522 2523 error = sattr_to_vattr(args->ca_sa, &va); 2524 if (error) { 2525 VN_RELE(vp); 2526 dr->dr_status = puterrno(error); 2527 return; 2528 } 2529 2530 if (!(va.va_mask & AT_MODE)) { 2531 VN_RELE(vp); 2532 dr->dr_status = NFSERR_INVAL; 2533 return; 2534 } 2535 2536 va.va_type = VDIR; 2537 va.va_mask |= AT_TYPE; 2538 2539 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2540 2541 if (!error) { 2542 /* 2543 * Attribtutes of the newly created directory should 2544 * be returned to the client. 2545 */ 2546 va.va_mask = AT_ALL; /* We want everything */ 2547 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2548 2549 /* check for overflows */ 2550 if (!error) { 2551 acl_perm(vp, exi, &va, cr); 2552 error = vattr_to_nattr(&va, &dr->dr_attr); 2553 if (!error) { 2554 error = makefh(&dr->dr_fhandle, dvp, exi); 2555 } 2556 } 2557 /* 2558 * Force new data and metadata out to stable storage. 2559 */ 2560 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2561 VN_RELE(dvp); 2562 } 2563 2564 /* 2565 * Force modified data and metadata out to stable storage. 2566 */ 2567 (void) VOP_FSYNC(vp, 0, cr, NULL); 2568 2569 VN_RELE(vp); 2570 2571 dr->dr_status = puterrno(error); 2572 2573 } 2574 void * 2575 rfs_mkdir_getfh(struct nfscreatargs *args) 2576 { 2577 return (args->ca_da.da_fhandle); 2578 } 2579 2580 /* 2581 * Remove a directory. 2582 * Remove the given directory name from the given parent directory. 2583 */ 2584 /* ARGSUSED */ 2585 void 2586 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2587 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2588 { 2589 int error; 2590 vnode_t *vp; 2591 2592 /* 2593 * Disallow NULL paths 2594 */ 2595 if (da->da_name == NULL || *da->da_name == '\0') { 2596 *status = NFSERR_ACCES; 2597 return; 2598 } 2599 2600 vp = nfs_fhtovp(da->da_fhandle, exi); 2601 if (vp == NULL) { 2602 *status = NFSERR_STALE; 2603 return; 2604 } 2605 2606 if (rdonly(ro, vp)) { 2607 VN_RELE(vp); 2608 *status = NFSERR_ROFS; 2609 return; 2610 } 2611 2612 /* 2613 * VOP_RMDIR takes a third argument (the current 2614 * directory of the process). That's because someone 2615 * wants to return EINVAL if one tries to remove ".". 2616 * Of course, NFS servers have no idea what their 2617 * clients' current directories are. We fake it by 2618 * supplying a vnode known to exist and illegal to 2619 * remove. 2620 */ 2621 error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0); 2622 2623 /* 2624 * Force modified data and metadata out to stable storage. 2625 */ 2626 (void) VOP_FSYNC(vp, 0, cr, NULL); 2627 2628 VN_RELE(vp); 2629 2630 /* 2631 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2632 * if the directory is not empty. A System V NFS server 2633 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2634 * over the wire. 2635 */ 2636 if (error == EEXIST) 2637 *status = NFSERR_NOTEMPTY; 2638 else 2639 *status = puterrno(error); 2640 2641 } 2642 void * 2643 rfs_rmdir_getfh(struct nfsdiropargs *da) 2644 { 2645 return (da->da_fhandle); 2646 } 2647 2648 /* ARGSUSED */ 2649 void 2650 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2651 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2652 { 2653 int error; 2654 int iseof; 2655 struct iovec iov; 2656 struct uio uio; 2657 vnode_t *vp; 2658 char *ndata = NULL; 2659 struct sockaddr *ca; 2660 size_t nents; 2661 int ret; 2662 2663 vp = nfs_fhtovp(&rda->rda_fh, exi); 2664 if (vp == NULL) { 2665 rd->rd_entries = NULL; 2666 rd->rd_status = NFSERR_STALE; 2667 return; 2668 } 2669 2670 if (vp->v_type != VDIR) { 2671 VN_RELE(vp); 2672 rd->rd_entries = NULL; 2673 rd->rd_status = NFSERR_NOTDIR; 2674 return; 2675 } 2676 2677 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2678 2679 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2680 2681 if (error) { 2682 rd->rd_entries = NULL; 2683 goto bad; 2684 } 2685 2686 if (rda->rda_count == 0) { 2687 rd->rd_entries = NULL; 2688 rd->rd_size = 0; 2689 rd->rd_eof = FALSE; 2690 goto bad; 2691 } 2692 2693 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2694 2695 /* 2696 * Allocate data for entries. This will be freed by rfs_rddirfree. 2697 */ 2698 rd->rd_bufsize = (uint_t)rda->rda_count; 2699 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2700 2701 /* 2702 * Set up io vector to read directory data 2703 */ 2704 iov.iov_base = (caddr_t)rd->rd_entries; 2705 iov.iov_len = rda->rda_count; 2706 uio.uio_iov = &iov; 2707 uio.uio_iovcnt = 1; 2708 uio.uio_segflg = UIO_SYSSPACE; 2709 uio.uio_extflg = UIO_COPY_CACHED; 2710 uio.uio_loffset = (offset_t)rda->rda_offset; 2711 uio.uio_resid = rda->rda_count; 2712 2713 /* 2714 * read directory 2715 */ 2716 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2717 2718 /* 2719 * Clean up 2720 */ 2721 if (!error) { 2722 /* 2723 * set size and eof 2724 */ 2725 if (uio.uio_resid == rda->rda_count) { 2726 rd->rd_size = 0; 2727 rd->rd_eof = TRUE; 2728 } else { 2729 rd->rd_size = (uint32_t)(rda->rda_count - 2730 uio.uio_resid); 2731 rd->rd_eof = iseof ? TRUE : FALSE; 2732 } 2733 } 2734 2735 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2736 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2737 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2738 rda->rda_count, &ndata); 2739 2740 if (ret != 0) { 2741 size_t dropbytes; 2742 /* 2743 * We had to drop one or more entries in order to fit 2744 * during the character conversion. We need to patch 2745 * up the size and eof info. 2746 */ 2747 if (rd->rd_eof) 2748 rd->rd_eof = FALSE; 2749 dropbytes = nfscmd_dropped_entrysize( 2750 (struct dirent64 *)rd->rd_entries, nents, ret); 2751 rd->rd_size -= dropbytes; 2752 } 2753 if (ndata == NULL) { 2754 ndata = (char *)rd->rd_entries; 2755 } else if (ndata != (char *)rd->rd_entries) { 2756 kmem_free(rd->rd_entries, rd->rd_bufsize); 2757 rd->rd_entries = (void *)ndata; 2758 rd->rd_bufsize = rda->rda_count; 2759 } 2760 2761 bad: 2762 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2763 2764 #if 0 /* notyet */ 2765 /* 2766 * Don't do this. It causes local disk writes when just 2767 * reading the file and the overhead is deemed larger 2768 * than the benefit. 2769 */ 2770 /* 2771 * Force modified metadata out to stable storage. 2772 */ 2773 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2774 #endif 2775 2776 VN_RELE(vp); 2777 2778 rd->rd_status = puterrno(error); 2779 2780 } 2781 void * 2782 rfs_readdir_getfh(struct nfsrddirargs *rda) 2783 { 2784 return (&rda->rda_fh); 2785 } 2786 void 2787 rfs_rddirfree(struct nfsrddirres *rd) 2788 { 2789 if (rd->rd_entries != NULL) 2790 kmem_free(rd->rd_entries, rd->rd_bufsize); 2791 } 2792 2793 /* ARGSUSED */ 2794 void 2795 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2796 struct svc_req *req, cred_t *cr, bool_t ro) 2797 { 2798 int error; 2799 struct statvfs64 sb; 2800 vnode_t *vp; 2801 2802 vp = nfs_fhtovp(fh, exi); 2803 if (vp == NULL) { 2804 fs->fs_status = NFSERR_STALE; 2805 return; 2806 } 2807 2808 error = VFS_STATVFS(vp->v_vfsp, &sb); 2809 2810 if (!error) { 2811 fs->fs_tsize = nfstsize(); 2812 fs->fs_bsize = sb.f_frsize; 2813 fs->fs_blocks = sb.f_blocks; 2814 fs->fs_bfree = sb.f_bfree; 2815 fs->fs_bavail = sb.f_bavail; 2816 } 2817 2818 VN_RELE(vp); 2819 2820 fs->fs_status = puterrno(error); 2821 2822 } 2823 void * 2824 rfs_statfs_getfh(fhandle_t *fh) 2825 { 2826 return (fh); 2827 } 2828 2829 static int 2830 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2831 { 2832 vap->va_mask = 0; 2833 2834 /* 2835 * There was a sign extension bug in some VFS based systems 2836 * which stored the mode as a short. When it would get 2837 * assigned to a u_long, no sign extension would occur. 2838 * It needed to, but this wasn't noticed because sa_mode 2839 * would then get assigned back to the short, thus ignoring 2840 * the upper 16 bits of sa_mode. 2841 * 2842 * To make this implementation work for both broken 2843 * clients and good clients, we check for both versions 2844 * of the mode. 2845 */ 2846 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2847 sa->sa_mode != (uint32_t)-1) { 2848 vap->va_mask |= AT_MODE; 2849 vap->va_mode = sa->sa_mode; 2850 } 2851 if (sa->sa_uid != (uint32_t)-1) { 2852 vap->va_mask |= AT_UID; 2853 vap->va_uid = sa->sa_uid; 2854 } 2855 if (sa->sa_gid != (uint32_t)-1) { 2856 vap->va_mask |= AT_GID; 2857 vap->va_gid = sa->sa_gid; 2858 } 2859 if (sa->sa_size != (uint32_t)-1) { 2860 vap->va_mask |= AT_SIZE; 2861 vap->va_size = sa->sa_size; 2862 } 2863 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2864 sa->sa_atime.tv_usec != (int32_t)-1) { 2865 #ifndef _LP64 2866 /* return error if time overflow */ 2867 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2868 return (EOVERFLOW); 2869 #endif 2870 vap->va_mask |= AT_ATIME; 2871 /* 2872 * nfs protocol defines times as unsigned so don't extend sign, 2873 * unless sysadmin set nfs_allow_preepoch_time. 2874 */ 2875 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2876 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2877 } 2878 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2879 sa->sa_mtime.tv_usec != (int32_t)-1) { 2880 #ifndef _LP64 2881 /* return error if time overflow */ 2882 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2883 return (EOVERFLOW); 2884 #endif 2885 vap->va_mask |= AT_MTIME; 2886 /* 2887 * nfs protocol defines times as unsigned so don't extend sign, 2888 * unless sysadmin set nfs_allow_preepoch_time. 2889 */ 2890 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2891 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2892 } 2893 return (0); 2894 } 2895 2896 static const enum nfsftype vt_to_nf[] = { 2897 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2898 }; 2899 2900 /* 2901 * check the following fields for overflow: nodeid, size, and time. 2902 * There could be a problem when converting 64-bit LP64 fields 2903 * into 32-bit ones. Return an error if there is an overflow. 2904 */ 2905 int 2906 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2907 { 2908 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2909 na->na_type = vt_to_nf[vap->va_type]; 2910 2911 if (vap->va_mode == (unsigned short) -1) 2912 na->na_mode = (uint32_t)-1; 2913 else 2914 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2915 2916 if (vap->va_uid == (unsigned short)(-1)) 2917 na->na_uid = (uint32_t)(-1); 2918 else if (vap->va_uid == UID_NOBODY) 2919 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2920 else 2921 na->na_uid = vap->va_uid; 2922 2923 if (vap->va_gid == (unsigned short)(-1)) 2924 na->na_gid = (uint32_t)-1; 2925 else if (vap->va_gid == GID_NOBODY) 2926 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2927 else 2928 na->na_gid = vap->va_gid; 2929 2930 /* 2931 * Do we need to check fsid for overflow? It is 64-bit in the 2932 * vattr, but are bigger than 32 bit values supported? 2933 */ 2934 na->na_fsid = vap->va_fsid; 2935 2936 na->na_nodeid = vap->va_nodeid; 2937 2938 /* 2939 * Check to make sure that the nodeid is representable over the 2940 * wire without losing bits. 2941 */ 2942 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2943 return (EFBIG); 2944 na->na_nlink = vap->va_nlink; 2945 2946 /* 2947 * Check for big files here, instead of at the caller. See 2948 * comments in cstat for large special file explanation. 2949 */ 2950 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2951 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2952 return (EFBIG); 2953 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2954 /* UNKNOWN_SIZE | OVERFLOW */ 2955 na->na_size = MAXOFF32_T; 2956 } else 2957 na->na_size = vap->va_size; 2958 } else 2959 na->na_size = vap->va_size; 2960 2961 /* 2962 * If the vnode times overflow the 32-bit times that NFS2 2963 * uses on the wire then return an error. 2964 */ 2965 if (!NFS_VAP_TIME_OK(vap)) { 2966 return (EOVERFLOW); 2967 } 2968 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2969 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2970 2971 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2972 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2973 2974 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2975 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2976 2977 /* 2978 * If the dev_t will fit into 16 bits then compress 2979 * it, otherwise leave it alone. See comments in 2980 * nfs_client.c. 2981 */ 2982 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2983 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2984 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2985 else 2986 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2987 2988 na->na_blocks = vap->va_nblocks; 2989 na->na_blocksize = vap->va_blksize; 2990 2991 /* 2992 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2993 * over-the-wire protocols for named-pipe vnodes. It remaps the 2994 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2995 * 2996 * BUYER BEWARE: 2997 * If you are porting the NFS to a non-Sun server, you probably 2998 * don't want to include the following block of code. The 2999 * over-the-wire special file types will be changing with the 3000 * NFS Protocol Revision. 3001 */ 3002 if (vap->va_type == VFIFO) 3003 NA_SETFIFO(na); 3004 return (0); 3005 } 3006 3007 /* 3008 * acl v2 support: returns approximate permission. 3009 * default: returns minimal permission (more restrictive) 3010 * aclok: returns maximal permission (less restrictive) 3011 * This routine changes the permissions that are alaredy in *va. 3012 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 3013 * CLASS_OBJ is always the same as GROUP_OBJ entry. 3014 */ 3015 static void 3016 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 3017 { 3018 vsecattr_t vsa; 3019 int aclcnt; 3020 aclent_t *aclentp; 3021 mode_t mask_perm; 3022 mode_t grp_perm; 3023 mode_t other_perm; 3024 mode_t other_orig; 3025 int error; 3026 3027 /* dont care default acl */ 3028 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 3029 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 3030 3031 if (!error) { 3032 aclcnt = vsa.vsa_aclcnt; 3033 if (aclcnt > MIN_ACL_ENTRIES) { 3034 /* non-trivial ACL */ 3035 aclentp = vsa.vsa_aclentp; 3036 if (exi->exi_export.ex_flags & EX_ACLOK) { 3037 /* maximal permissions */ 3038 grp_perm = 0; 3039 other_perm = 0; 3040 for (; aclcnt > 0; aclcnt--, aclentp++) { 3041 switch (aclentp->a_type) { 3042 case USER_OBJ: 3043 break; 3044 case USER: 3045 grp_perm |= 3046 aclentp->a_perm << 3; 3047 other_perm |= aclentp->a_perm; 3048 break; 3049 case GROUP_OBJ: 3050 grp_perm |= 3051 aclentp->a_perm << 3; 3052 break; 3053 case GROUP: 3054 other_perm |= aclentp->a_perm; 3055 break; 3056 case OTHER_OBJ: 3057 other_orig = aclentp->a_perm; 3058 break; 3059 case CLASS_OBJ: 3060 mask_perm = aclentp->a_perm; 3061 break; 3062 default: 3063 break; 3064 } 3065 } 3066 grp_perm &= mask_perm << 3; 3067 other_perm &= mask_perm; 3068 other_perm |= other_orig; 3069 3070 } else { 3071 /* minimal permissions */ 3072 grp_perm = 070; 3073 other_perm = 07; 3074 for (; aclcnt > 0; aclcnt--, aclentp++) { 3075 switch (aclentp->a_type) { 3076 case USER_OBJ: 3077 break; 3078 case USER: 3079 case CLASS_OBJ: 3080 grp_perm &= 3081 aclentp->a_perm << 3; 3082 other_perm &= 3083 aclentp->a_perm; 3084 break; 3085 case GROUP_OBJ: 3086 grp_perm &= 3087 aclentp->a_perm << 3; 3088 break; 3089 case GROUP: 3090 other_perm &= 3091 aclentp->a_perm; 3092 break; 3093 case OTHER_OBJ: 3094 other_perm &= 3095 aclentp->a_perm; 3096 break; 3097 default: 3098 break; 3099 } 3100 } 3101 } 3102 /* copy to va */ 3103 va->va_mode &= ~077; 3104 va->va_mode |= grp_perm | other_perm; 3105 } 3106 if (vsa.vsa_aclcnt) 3107 kmem_free(vsa.vsa_aclentp, 3108 vsa.vsa_aclcnt * sizeof (aclent_t)); 3109 } 3110 } 3111 3112 void 3113 rfs_srvrinit(void) 3114 { 3115 nfs2_srv_caller_id = fs_new_caller_id(); 3116 } 3117 3118 void 3119 rfs_srvrfini(void) 3120 { 3121 } 3122 3123 /* ARGSUSED */ 3124 void 3125 rfs_srv_zone_init(nfs_globals_t *ng) 3126 { 3127 nfs_srv_t *ns; 3128 3129 ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); 3130 3131 mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL); 3132 ns->write_async = 1; 3133 3134 ng->nfs_srv = ns; 3135 } 3136 3137 /* ARGSUSED */ 3138 void 3139 rfs_srv_zone_fini(nfs_globals_t *ng) 3140 { 3141 nfs_srv_t *ns = ng->nfs_srv; 3142 3143 ng->nfs_srv = NULL; 3144 3145 mutex_destroy(&ns->async_write_lock); 3146 kmem_free(ns, sizeof (*ns)); 3147 } 3148 3149 static int 3150 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 3151 { 3152 struct clist *wcl; 3153 int wlist_len; 3154 uint32_t count = rr->rr_count; 3155 3156 wcl = ra->ra_wlist; 3157 3158 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3159 return (FALSE); 3160 } 3161 3162 wcl = ra->ra_wlist; 3163 rr->rr_ok.rrok_wlist_len = wlist_len; 3164 rr->rr_ok.rrok_wlist = wcl; 3165 3166 return (TRUE); 3167 } 3168