1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/capability.h> 38 39 /* 40 * Functions that perform the vfs operations required by the routines in 41 * nfsd_serv.c. It is hoped that this change will make the server more 42 * portable. 43 */ 44 45 #include <fs/nfs/nfsport.h> 46 #include <sys/hash.h> 47 #include <sys/sysctl.h> 48 #include <nlm/nlm_prot.h> 49 #include <nlm/nlm.h> 50 51 FEATURE(nfsd, "NFSv4 server"); 52 53 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 54 extern int nfsrv_useacl; 55 extern int newnfs_numnfsd; 56 extern struct mount nfsv4root_mnt; 57 extern struct nfsrv_stablefirst nfsrv_stablefirst; 58 extern void (*nfsd_call_servertimer)(void); 59 extern SVCPOOL *nfsrvd_pool; 60 extern struct nfsv4lock nfsd_suspend_lock; 61 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 62 NFSDLOCKMUTEX; 63 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 64 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 65 struct mtx nfsrc_udpmtx; 66 struct mtx nfs_v4root_mutex; 67 struct nfsrvfh nfs_rootfh, nfs_pubfh; 68 int nfs_pubfhset = 0, nfs_rootfhset = 0; 69 struct proc *nfsd_master_proc = NULL; 70 static pid_t nfsd_master_pid = (pid_t)-1; 71 static char nfsd_master_comm[MAXCOMLEN + 1]; 72 static struct timeval nfsd_master_start; 73 static uint32_t nfsv4_sysid = 0; 74 75 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 76 struct ucred *); 77 78 int nfsrv_enable_crossmntpt = 1; 79 static int nfs_commit_blks; 80 static int nfs_commit_miss; 81 extern int nfsrv_issuedelegs; 82 extern int nfsrv_dolocallocks; 83 84 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "New NFS server"); 85 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 86 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 87 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 88 0, ""); 89 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 90 0, ""); 91 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 92 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 93 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 94 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 95 96 #define MAX_REORDERED_RPC 16 97 #define NUM_HEURISTIC 1031 98 #define NHUSE_INIT 64 99 #define NHUSE_INC 16 100 #define NHUSE_MAX 2048 101 102 static struct nfsheur { 103 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 104 off_t nh_nextoff; /* next offset for sequential detection */ 105 int nh_use; /* use count for selection */ 106 int nh_seqcount; /* heuristic */ 107 } nfsheur[NUM_HEURISTIC]; 108 109 110 /* 111 * Heuristic to detect sequential operation. 112 */ 113 static struct nfsheur * 114 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 115 { 116 struct nfsheur *nh; 117 int hi, try; 118 119 /* Locate best candidate. */ 120 try = 32; 121 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 122 nh = &nfsheur[hi]; 123 while (try--) { 124 if (nfsheur[hi].nh_vp == vp) { 125 nh = &nfsheur[hi]; 126 break; 127 } 128 if (nfsheur[hi].nh_use > 0) 129 --nfsheur[hi].nh_use; 130 hi = (hi + 1) % NUM_HEURISTIC; 131 if (nfsheur[hi].nh_use < nh->nh_use) 132 nh = &nfsheur[hi]; 133 } 134 135 /* Initialize hint if this is a new file. */ 136 if (nh->nh_vp != vp) { 137 nh->nh_vp = vp; 138 nh->nh_nextoff = uio->uio_offset; 139 nh->nh_use = NHUSE_INIT; 140 if (uio->uio_offset == 0) 141 nh->nh_seqcount = 4; 142 else 143 nh->nh_seqcount = 1; 144 } 145 146 /* Calculate heuristic. */ 147 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 148 uio->uio_offset == nh->nh_nextoff) { 149 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 150 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 151 if (nh->nh_seqcount > IO_SEQMAX) 152 nh->nh_seqcount = IO_SEQMAX; 153 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 154 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 155 /* Probably a reordered RPC, leave seqcount alone. */ 156 } else if (nh->nh_seqcount > 1) { 157 nh->nh_seqcount /= 2; 158 } else { 159 nh->nh_seqcount = 0; 160 } 161 nh->nh_use += NHUSE_INC; 162 if (nh->nh_use > NHUSE_MAX) 163 nh->nh_use = NHUSE_MAX; 164 return (nh); 165 } 166 167 /* 168 * Get attributes into nfsvattr structure. 169 */ 170 int 171 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 172 struct thread *p, int vpislocked) 173 { 174 int error, lockedit = 0; 175 176 if (vpislocked == 0) { 177 /* 178 * When vpislocked == 0, the vnode is either exclusively 179 * locked by this thread or not locked by this thread. 180 * As such, shared lock it, if not exclusively locked. 181 */ 182 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 183 lockedit = 1; 184 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 185 } 186 } 187 error = VOP_GETATTR(vp, &nvap->na_vattr, cred); 188 if (lockedit != 0) 189 NFSVOPUNLOCK(vp, 0); 190 191 NFSEXITCODE(error); 192 return (error); 193 } 194 195 /* 196 * Get a file handle for a vnode. 197 */ 198 int 199 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 200 { 201 int error; 202 203 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 204 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 205 error = VOP_VPTOFH(vp, &fhp->fh_fid); 206 207 NFSEXITCODE(error); 208 return (error); 209 } 210 211 /* 212 * Perform access checking for vnodes obtained from file handles that would 213 * refer to files already opened by a Unix client. You cannot just use 214 * vn_writechk() and VOP_ACCESSX() for two reasons. 215 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 216 * case. 217 * 2 - The owner is to be given access irrespective of mode bits for some 218 * operations, so that processes that chmod after opening a file don't 219 * break. 220 */ 221 int 222 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 223 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 224 u_int32_t *supportedtypep) 225 { 226 struct vattr vattr; 227 int error = 0, getret = 0; 228 229 if (vpislocked == 0) { 230 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 231 error = EPERM; 232 goto out; 233 } 234 } 235 if (accmode & VWRITE) { 236 /* Just vn_writechk() changed to check rdonly */ 237 /* 238 * Disallow write attempts on read-only file systems; 239 * unless the file is a socket or a block or character 240 * device resident on the file system. 241 */ 242 if (NFSVNO_EXRDONLY(exp) || 243 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 244 switch (vp->v_type) { 245 case VREG: 246 case VDIR: 247 case VLNK: 248 error = EROFS; 249 default: 250 break; 251 } 252 } 253 /* 254 * If there's shared text associated with 255 * the inode, try to free it up once. If 256 * we fail, we can't allow writing. 257 */ 258 if (VOP_IS_TEXT(vp) && error == 0) 259 error = ETXTBSY; 260 } 261 if (error != 0) { 262 if (vpislocked == 0) 263 NFSVOPUNLOCK(vp, 0); 264 goto out; 265 } 266 267 /* 268 * Should the override still be applied when ACLs are enabled? 269 */ 270 error = VOP_ACCESSX(vp, accmode, cred, p); 271 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 272 /* 273 * Try again with VEXPLICIT_DENY, to see if the test for 274 * deletion is supported. 275 */ 276 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 277 if (error == 0) { 278 if (vp->v_type == VDIR) { 279 accmode &= ~(VDELETE | VDELETE_CHILD); 280 accmode |= VWRITE; 281 error = VOP_ACCESSX(vp, accmode, cred, p); 282 } else if (supportedtypep != NULL) { 283 *supportedtypep &= ~NFSACCESS_DELETE; 284 } 285 } 286 } 287 288 /* 289 * Allow certain operations for the owner (reads and writes 290 * on files that are already open). 291 */ 292 if (override != NFSACCCHK_NOOVERRIDE && 293 (error == EPERM || error == EACCES)) { 294 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 295 error = 0; 296 else if (override & NFSACCCHK_ALLOWOWNER) { 297 getret = VOP_GETATTR(vp, &vattr, cred); 298 if (getret == 0 && cred->cr_uid == vattr.va_uid) 299 error = 0; 300 } 301 } 302 if (vpislocked == 0) 303 NFSVOPUNLOCK(vp, 0); 304 305 out: 306 NFSEXITCODE(error); 307 return (error); 308 } 309 310 /* 311 * Set attribute(s) vnop. 312 */ 313 int 314 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 315 struct thread *p, struct nfsexstuff *exp) 316 { 317 int error; 318 319 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 320 NFSEXITCODE(error); 321 return (error); 322 } 323 324 /* 325 * Set up nameidata for a lookup() call and do it. 326 */ 327 int 328 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 329 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 330 struct vnode **retdirp) 331 { 332 struct componentname *cnp = &ndp->ni_cnd; 333 int i; 334 struct iovec aiov; 335 struct uio auio; 336 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 337 int error = 0, crossmnt; 338 char *cp; 339 340 *retdirp = NULL; 341 cnp->cn_nameptr = cnp->cn_pnbuf; 342 ndp->ni_strictrelative = 0; 343 /* 344 * Extract and set starting directory. 345 */ 346 if (dp->v_type != VDIR) { 347 if (islocked) 348 vput(dp); 349 else 350 vrele(dp); 351 nfsvno_relpathbuf(ndp); 352 error = ENOTDIR; 353 goto out1; 354 } 355 if (islocked) 356 NFSVOPUNLOCK(dp, 0); 357 VREF(dp); 358 *retdirp = dp; 359 if (NFSVNO_EXRDONLY(exp)) 360 cnp->cn_flags |= RDONLY; 361 ndp->ni_segflg = UIO_SYSSPACE; 362 crossmnt = 1; 363 364 if (nd->nd_flag & ND_PUBLOOKUP) { 365 ndp->ni_loopcnt = 0; 366 if (cnp->cn_pnbuf[0] == '/') { 367 vrele(dp); 368 /* 369 * Check for degenerate pathnames here, since lookup() 370 * panics on them. 371 */ 372 for (i = 1; i < ndp->ni_pathlen; i++) 373 if (cnp->cn_pnbuf[i] != '/') 374 break; 375 if (i == ndp->ni_pathlen) { 376 error = NFSERR_ACCES; 377 goto out; 378 } 379 dp = rootvnode; 380 VREF(dp); 381 } 382 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 383 (nd->nd_flag & ND_NFSV4) == 0) { 384 /* 385 * Only cross mount points for NFSv4 when doing a 386 * mount while traversing the file system above 387 * the mount point, unless nfsrv_enable_crossmntpt is set. 388 */ 389 cnp->cn_flags |= NOCROSSMOUNT; 390 crossmnt = 0; 391 } 392 393 /* 394 * Initialize for scan, set ni_startdir and bump ref on dp again 395 * because lookup() will dereference ni_startdir. 396 */ 397 398 cnp->cn_thread = p; 399 ndp->ni_startdir = dp; 400 ndp->ni_rootdir = rootvnode; 401 ndp->ni_topdir = NULL; 402 403 if (!lockleaf) 404 cnp->cn_flags |= LOCKLEAF; 405 for (;;) { 406 cnp->cn_nameptr = cnp->cn_pnbuf; 407 /* 408 * Call lookup() to do the real work. If an error occurs, 409 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 410 * we do not have to dereference anything before returning. 411 * In either case ni_startdir will be dereferenced and NULLed 412 * out. 413 */ 414 error = lookup(ndp); 415 if (error) 416 break; 417 418 /* 419 * Check for encountering a symbolic link. Trivial 420 * termination occurs if no symlink encountered. 421 */ 422 if ((cnp->cn_flags & ISSYMLINK) == 0) { 423 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 424 nfsvno_relpathbuf(ndp); 425 if (ndp->ni_vp && !lockleaf) 426 NFSVOPUNLOCK(ndp->ni_vp, 0); 427 break; 428 } 429 430 /* 431 * Validate symlink 432 */ 433 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 434 NFSVOPUNLOCK(ndp->ni_dvp, 0); 435 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 436 error = EINVAL; 437 goto badlink2; 438 } 439 440 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 441 error = ELOOP; 442 goto badlink2; 443 } 444 if (ndp->ni_pathlen > 1) 445 cp = uma_zalloc(namei_zone, M_WAITOK); 446 else 447 cp = cnp->cn_pnbuf; 448 aiov.iov_base = cp; 449 aiov.iov_len = MAXPATHLEN; 450 auio.uio_iov = &aiov; 451 auio.uio_iovcnt = 1; 452 auio.uio_offset = 0; 453 auio.uio_rw = UIO_READ; 454 auio.uio_segflg = UIO_SYSSPACE; 455 auio.uio_td = NULL; 456 auio.uio_resid = MAXPATHLEN; 457 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 458 if (error) { 459 badlink1: 460 if (ndp->ni_pathlen > 1) 461 uma_zfree(namei_zone, cp); 462 badlink2: 463 vrele(ndp->ni_dvp); 464 vput(ndp->ni_vp); 465 break; 466 } 467 linklen = MAXPATHLEN - auio.uio_resid; 468 if (linklen == 0) { 469 error = ENOENT; 470 goto badlink1; 471 } 472 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 473 error = ENAMETOOLONG; 474 goto badlink1; 475 } 476 477 /* 478 * Adjust or replace path 479 */ 480 if (ndp->ni_pathlen > 1) { 481 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 482 uma_zfree(namei_zone, cnp->cn_pnbuf); 483 cnp->cn_pnbuf = cp; 484 } else 485 cnp->cn_pnbuf[linklen] = '\0'; 486 ndp->ni_pathlen += linklen; 487 488 /* 489 * Cleanup refs for next loop and check if root directory 490 * should replace current directory. Normally ni_dvp 491 * becomes the new base directory and is cleaned up when 492 * we loop. Explicitly null pointers after invalidation 493 * to clarify operation. 494 */ 495 vput(ndp->ni_vp); 496 ndp->ni_vp = NULL; 497 498 if (cnp->cn_pnbuf[0] == '/') { 499 vrele(ndp->ni_dvp); 500 ndp->ni_dvp = ndp->ni_rootdir; 501 VREF(ndp->ni_dvp); 502 } 503 ndp->ni_startdir = ndp->ni_dvp; 504 ndp->ni_dvp = NULL; 505 } 506 if (!lockleaf) 507 cnp->cn_flags &= ~LOCKLEAF; 508 509 out: 510 if (error) { 511 nfsvno_relpathbuf(ndp); 512 ndp->ni_vp = NULL; 513 ndp->ni_dvp = NULL; 514 ndp->ni_startdir = NULL; 515 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 516 ndp->ni_dvp = NULL; 517 } 518 519 out1: 520 NFSEXITCODE2(error, nd); 521 return (error); 522 } 523 524 /* 525 * Set up a pathname buffer and return a pointer to it and, optionally 526 * set a hash pointer. 527 */ 528 void 529 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 530 { 531 struct componentname *cnp = &ndp->ni_cnd; 532 533 cnp->cn_flags |= (NOMACCHECK | HASBUF); 534 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 535 if (hashpp != NULL) 536 *hashpp = NULL; 537 *bufpp = cnp->cn_pnbuf; 538 } 539 540 /* 541 * Release the above path buffer, if not released by nfsvno_namei(). 542 */ 543 void 544 nfsvno_relpathbuf(struct nameidata *ndp) 545 { 546 547 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 548 panic("nfsrelpath"); 549 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 550 ndp->ni_cnd.cn_flags &= ~HASBUF; 551 } 552 553 /* 554 * Readlink vnode op into an mbuf list. 555 */ 556 int 557 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, 558 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 559 { 560 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; 561 struct iovec *ivp = iv; 562 struct uio io, *uiop = &io; 563 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; 564 int i, len, tlen, error = 0; 565 566 len = 0; 567 i = 0; 568 while (len < NFS_MAXPATHLEN) { 569 NFSMGET(mp); 570 MCLGET(mp, M_WAITOK); 571 mp->m_len = NFSMSIZ(mp); 572 if (len == 0) { 573 mp3 = mp2 = mp; 574 } else { 575 mp2->m_next = mp; 576 mp2 = mp; 577 } 578 if ((len + mp->m_len) > NFS_MAXPATHLEN) { 579 mp->m_len = NFS_MAXPATHLEN - len; 580 len = NFS_MAXPATHLEN; 581 } else { 582 len += mp->m_len; 583 } 584 ivp->iov_base = mtod(mp, caddr_t); 585 ivp->iov_len = mp->m_len; 586 i++; 587 ivp++; 588 } 589 uiop->uio_iov = iv; 590 uiop->uio_iovcnt = i; 591 uiop->uio_offset = 0; 592 uiop->uio_resid = len; 593 uiop->uio_rw = UIO_READ; 594 uiop->uio_segflg = UIO_SYSSPACE; 595 uiop->uio_td = NULL; 596 error = VOP_READLINK(vp, uiop, cred); 597 if (error) { 598 m_freem(mp3); 599 *lenp = 0; 600 goto out; 601 } 602 if (uiop->uio_resid > 0) { 603 len -= uiop->uio_resid; 604 tlen = NFSM_RNDUP(len); 605 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); 606 } 607 *lenp = len; 608 *mpp = mp3; 609 *mpendp = mp; 610 611 out: 612 NFSEXITCODE(error); 613 return (error); 614 } 615 616 /* 617 * Read vnode op call into mbuf list. 618 */ 619 int 620 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 621 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) 622 { 623 struct mbuf *m; 624 int i; 625 struct iovec *iv; 626 struct iovec *iv2; 627 int error = 0, len, left, siz, tlen, ioflag = 0; 628 struct mbuf *m2 = NULL, *m3; 629 struct uio io, *uiop = &io; 630 struct nfsheur *nh; 631 632 len = left = NFSM_RNDUP(cnt); 633 m3 = NULL; 634 /* 635 * Generate the mbuf list with the uio_iov ref. to it. 636 */ 637 i = 0; 638 while (left > 0) { 639 NFSMGET(m); 640 MCLGET(m, M_WAITOK); 641 m->m_len = 0; 642 siz = min(M_TRAILINGSPACE(m), left); 643 left -= siz; 644 i++; 645 if (m3) 646 m2->m_next = m; 647 else 648 m3 = m; 649 m2 = m; 650 } 651 MALLOC(iv, struct iovec *, i * sizeof (struct iovec), 652 M_TEMP, M_WAITOK); 653 uiop->uio_iov = iv2 = iv; 654 m = m3; 655 left = len; 656 i = 0; 657 while (left > 0) { 658 if (m == NULL) 659 panic("nfsvno_read iov"); 660 siz = min(M_TRAILINGSPACE(m), left); 661 if (siz > 0) { 662 iv->iov_base = mtod(m, caddr_t) + m->m_len; 663 iv->iov_len = siz; 664 m->m_len += siz; 665 left -= siz; 666 iv++; 667 i++; 668 } 669 m = m->m_next; 670 } 671 uiop->uio_iovcnt = i; 672 uiop->uio_offset = off; 673 uiop->uio_resid = len; 674 uiop->uio_rw = UIO_READ; 675 uiop->uio_segflg = UIO_SYSSPACE; 676 nh = nfsrv_sequential_heuristic(uiop, vp); 677 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 678 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 679 FREE((caddr_t)iv2, M_TEMP); 680 if (error) { 681 m_freem(m3); 682 *mpp = NULL; 683 goto out; 684 } 685 nh->nh_nextoff = uiop->uio_offset; 686 tlen = len - uiop->uio_resid; 687 cnt = cnt < tlen ? cnt : tlen; 688 tlen = NFSM_RNDUP(cnt); 689 if (tlen == 0) { 690 m_freem(m3); 691 m3 = NULL; 692 } else if (len != tlen || tlen != cnt) 693 nfsrv_adj(m3, len - tlen, tlen - cnt); 694 *mpp = m3; 695 *mpendp = m2; 696 697 out: 698 NFSEXITCODE(error); 699 return (error); 700 } 701 702 /* 703 * Write vnode op from an mbuf list. 704 */ 705 int 706 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, 707 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 708 { 709 struct iovec *ivp; 710 int i, len; 711 struct iovec *iv; 712 int ioflags, error; 713 struct uio io, *uiop = &io; 714 struct nfsheur *nh; 715 716 MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, 717 M_WAITOK); 718 uiop->uio_iov = iv = ivp; 719 uiop->uio_iovcnt = cnt; 720 i = mtod(mp, caddr_t) + mp->m_len - cp; 721 len = retlen; 722 while (len > 0) { 723 if (mp == NULL) 724 panic("nfsvno_write"); 725 if (i > 0) { 726 i = min(i, len); 727 ivp->iov_base = cp; 728 ivp->iov_len = i; 729 ivp++; 730 len -= i; 731 } 732 mp = mp->m_next; 733 if (mp) { 734 i = mp->m_len; 735 cp = mtod(mp, caddr_t); 736 } 737 } 738 739 if (stable == NFSWRITE_UNSTABLE) 740 ioflags = IO_NODELOCKED; 741 else 742 ioflags = (IO_SYNC | IO_NODELOCKED); 743 uiop->uio_resid = retlen; 744 uiop->uio_rw = UIO_WRITE; 745 uiop->uio_segflg = UIO_SYSSPACE; 746 NFSUIOPROC(uiop, p); 747 uiop->uio_offset = off; 748 nh = nfsrv_sequential_heuristic(uiop, vp); 749 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 750 error = VOP_WRITE(vp, uiop, ioflags, cred); 751 if (error == 0) 752 nh->nh_nextoff = uiop->uio_offset; 753 FREE((caddr_t)iv, M_TEMP); 754 755 NFSEXITCODE(error); 756 return (error); 757 } 758 759 /* 760 * Common code for creating a regular file (plus special files for V2). 761 */ 762 int 763 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 764 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 765 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) 766 { 767 u_quad_t tempsize; 768 int error; 769 770 error = nd->nd_repstat; 771 if (!error && ndp->ni_vp == NULL) { 772 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 773 vrele(ndp->ni_startdir); 774 error = VOP_CREATE(ndp->ni_dvp, 775 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 776 vput(ndp->ni_dvp); 777 nfsvno_relpathbuf(ndp); 778 if (!error) { 779 if (*exclusive_flagp) { 780 *exclusive_flagp = 0; 781 NFSVNO_ATTRINIT(nvap); 782 nvap->na_atime.tv_sec = cverf[0]; 783 nvap->na_atime.tv_nsec = cverf[1]; 784 error = VOP_SETATTR(ndp->ni_vp, 785 &nvap->na_vattr, nd->nd_cred); 786 } 787 } 788 /* 789 * NFS V2 Only. nfsrvd_mknod() does this for V3. 790 * (This implies, just get out on an error.) 791 */ 792 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 793 nvap->na_type == VFIFO) { 794 if (nvap->na_type == VCHR && rdev == 0xffffffff) 795 nvap->na_type = VFIFO; 796 if (nvap->na_type != VFIFO && 797 (error = priv_check_cred(nd->nd_cred, 798 PRIV_VFS_MKNOD_DEV, 0))) { 799 vrele(ndp->ni_startdir); 800 nfsvno_relpathbuf(ndp); 801 vput(ndp->ni_dvp); 802 goto out; 803 } 804 nvap->na_rdev = rdev; 805 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 806 &ndp->ni_cnd, &nvap->na_vattr); 807 vput(ndp->ni_dvp); 808 nfsvno_relpathbuf(ndp); 809 vrele(ndp->ni_startdir); 810 if (error) 811 goto out; 812 } else { 813 vrele(ndp->ni_startdir); 814 nfsvno_relpathbuf(ndp); 815 vput(ndp->ni_dvp); 816 error = ENXIO; 817 goto out; 818 } 819 *vpp = ndp->ni_vp; 820 } else { 821 /* 822 * Handle cases where error is already set and/or 823 * the file exists. 824 * 1 - clean up the lookup 825 * 2 - iff !error and na_size set, truncate it 826 */ 827 vrele(ndp->ni_startdir); 828 nfsvno_relpathbuf(ndp); 829 *vpp = ndp->ni_vp; 830 if (ndp->ni_dvp == *vpp) 831 vrele(ndp->ni_dvp); 832 else 833 vput(ndp->ni_dvp); 834 if (!error && nvap->na_size != VNOVAL) { 835 error = nfsvno_accchk(*vpp, VWRITE, 836 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 837 NFSACCCHK_VPISLOCKED, NULL); 838 if (!error) { 839 tempsize = nvap->na_size; 840 NFSVNO_ATTRINIT(nvap); 841 nvap->na_size = tempsize; 842 error = VOP_SETATTR(*vpp, 843 &nvap->na_vattr, nd->nd_cred); 844 } 845 } 846 if (error) 847 vput(*vpp); 848 } 849 850 out: 851 NFSEXITCODE(error); 852 return (error); 853 } 854 855 /* 856 * Do a mknod vnode op. 857 */ 858 int 859 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 860 struct thread *p) 861 { 862 int error = 0; 863 enum vtype vtyp; 864 865 vtyp = nvap->na_type; 866 /* 867 * Iff doesn't exist, create it. 868 */ 869 if (ndp->ni_vp) { 870 vrele(ndp->ni_startdir); 871 nfsvno_relpathbuf(ndp); 872 vput(ndp->ni_dvp); 873 vrele(ndp->ni_vp); 874 error = EEXIST; 875 goto out; 876 } 877 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 878 vrele(ndp->ni_startdir); 879 nfsvno_relpathbuf(ndp); 880 vput(ndp->ni_dvp); 881 error = NFSERR_BADTYPE; 882 goto out; 883 } 884 if (vtyp == VSOCK) { 885 vrele(ndp->ni_startdir); 886 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 887 &ndp->ni_cnd, &nvap->na_vattr); 888 vput(ndp->ni_dvp); 889 nfsvno_relpathbuf(ndp); 890 } else { 891 if (nvap->na_type != VFIFO && 892 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { 893 vrele(ndp->ni_startdir); 894 nfsvno_relpathbuf(ndp); 895 vput(ndp->ni_dvp); 896 goto out; 897 } 898 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 899 &ndp->ni_cnd, &nvap->na_vattr); 900 vput(ndp->ni_dvp); 901 nfsvno_relpathbuf(ndp); 902 vrele(ndp->ni_startdir); 903 /* 904 * Since VOP_MKNOD returns the ni_vp, I can't 905 * see any reason to do the lookup. 906 */ 907 } 908 909 out: 910 NFSEXITCODE(error); 911 return (error); 912 } 913 914 /* 915 * Mkdir vnode op. 916 */ 917 int 918 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 919 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 920 { 921 int error = 0; 922 923 if (ndp->ni_vp != NULL) { 924 if (ndp->ni_dvp == ndp->ni_vp) 925 vrele(ndp->ni_dvp); 926 else 927 vput(ndp->ni_dvp); 928 vrele(ndp->ni_vp); 929 nfsvno_relpathbuf(ndp); 930 error = EEXIST; 931 goto out; 932 } 933 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 934 &nvap->na_vattr); 935 vput(ndp->ni_dvp); 936 nfsvno_relpathbuf(ndp); 937 938 out: 939 NFSEXITCODE(error); 940 return (error); 941 } 942 943 /* 944 * symlink vnode op. 945 */ 946 int 947 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 948 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 949 struct nfsexstuff *exp) 950 { 951 int error = 0; 952 953 if (ndp->ni_vp) { 954 vrele(ndp->ni_startdir); 955 nfsvno_relpathbuf(ndp); 956 if (ndp->ni_dvp == ndp->ni_vp) 957 vrele(ndp->ni_dvp); 958 else 959 vput(ndp->ni_dvp); 960 vrele(ndp->ni_vp); 961 error = EEXIST; 962 goto out; 963 } 964 965 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 966 &nvap->na_vattr, pathcp); 967 vput(ndp->ni_dvp); 968 vrele(ndp->ni_startdir); 969 nfsvno_relpathbuf(ndp); 970 /* 971 * Although FreeBSD still had the lookup code in 972 * it for 7/current, there doesn't seem to be any 973 * point, since VOP_SYMLINK() returns the ni_vp. 974 * Just vput it for v2. 975 */ 976 if (!not_v2 && !error) 977 vput(ndp->ni_vp); 978 979 out: 980 NFSEXITCODE(error); 981 return (error); 982 } 983 984 /* 985 * Parse symbolic link arguments. 986 * This function has an ugly side effect. It will MALLOC() an area for 987 * the symlink and set iov_base to point to it, only if it succeeds. 988 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 989 * be FREE'd later. 990 */ 991 int 992 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 993 struct thread *p, char **pathcpp, int *lenp) 994 { 995 u_int32_t *tl; 996 char *pathcp = NULL; 997 int error = 0, len; 998 struct nfsv2_sattr *sp; 999 1000 *pathcpp = NULL; 1001 *lenp = 0; 1002 if ((nd->nd_flag & ND_NFSV3) && 1003 (error = nfsrv_sattr(nd, nvap, NULL, NULL, p))) 1004 goto nfsmout; 1005 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1006 len = fxdr_unsigned(int, *tl); 1007 if (len > NFS_MAXPATHLEN || len <= 0) { 1008 error = EBADRPC; 1009 goto nfsmout; 1010 } 1011 MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK); 1012 error = nfsrv_mtostr(nd, pathcp, len); 1013 if (error) 1014 goto nfsmout; 1015 if (nd->nd_flag & ND_NFSV2) { 1016 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1017 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1018 } 1019 *pathcpp = pathcp; 1020 *lenp = len; 1021 NFSEXITCODE2(0, nd); 1022 return (0); 1023 nfsmout: 1024 if (pathcp) 1025 free(pathcp, M_TEMP); 1026 NFSEXITCODE2(error, nd); 1027 return (error); 1028 } 1029 1030 /* 1031 * Remove a non-directory object. 1032 */ 1033 int 1034 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1035 struct thread *p, struct nfsexstuff *exp) 1036 { 1037 struct vnode *vp; 1038 int error = 0; 1039 1040 vp = ndp->ni_vp; 1041 if (vp->v_type == VDIR) 1042 error = NFSERR_ISDIR; 1043 else if (is_v4) 1044 error = nfsrv_checkremove(vp, 1, p); 1045 if (!error) 1046 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1047 if (ndp->ni_dvp == vp) 1048 vrele(ndp->ni_dvp); 1049 else 1050 vput(ndp->ni_dvp); 1051 vput(vp); 1052 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1053 nfsvno_relpathbuf(ndp); 1054 NFSEXITCODE(error); 1055 return (error); 1056 } 1057 1058 /* 1059 * Remove a directory. 1060 */ 1061 int 1062 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1063 struct thread *p, struct nfsexstuff *exp) 1064 { 1065 struct vnode *vp; 1066 int error = 0; 1067 1068 vp = ndp->ni_vp; 1069 if (vp->v_type != VDIR) { 1070 error = ENOTDIR; 1071 goto out; 1072 } 1073 /* 1074 * No rmdir "." please. 1075 */ 1076 if (ndp->ni_dvp == vp) { 1077 error = EINVAL; 1078 goto out; 1079 } 1080 /* 1081 * The root of a mounted filesystem cannot be deleted. 1082 */ 1083 if (vp->v_vflag & VV_ROOT) 1084 error = EBUSY; 1085 out: 1086 if (!error) 1087 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1088 if (ndp->ni_dvp == vp) 1089 vrele(ndp->ni_dvp); 1090 else 1091 vput(ndp->ni_dvp); 1092 vput(vp); 1093 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1094 nfsvno_relpathbuf(ndp); 1095 NFSEXITCODE(error); 1096 return (error); 1097 } 1098 1099 /* 1100 * Rename vnode op. 1101 */ 1102 int 1103 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1104 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1105 { 1106 struct vnode *fvp, *tvp, *tdvp; 1107 int error = 0; 1108 1109 fvp = fromndp->ni_vp; 1110 if (ndstat) { 1111 vrele(fromndp->ni_dvp); 1112 vrele(fvp); 1113 error = ndstat; 1114 goto out1; 1115 } 1116 tdvp = tondp->ni_dvp; 1117 tvp = tondp->ni_vp; 1118 if (tvp != NULL) { 1119 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1120 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1121 goto out; 1122 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1123 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1124 goto out; 1125 } 1126 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1127 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1128 goto out; 1129 } 1130 1131 /* 1132 * A rename to '.' or '..' results in a prematurely 1133 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1134 * here. 1135 */ 1136 if ((tondp->ni_cnd.cn_namelen == 1 && 1137 tondp->ni_cnd.cn_nameptr[0] == '.') || 1138 (tondp->ni_cnd.cn_namelen == 2 && 1139 tondp->ni_cnd.cn_nameptr[0] == '.' && 1140 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1141 error = EINVAL; 1142 goto out; 1143 } 1144 } 1145 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1146 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1147 goto out; 1148 } 1149 if (fvp->v_mount != tdvp->v_mount) { 1150 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1151 goto out; 1152 } 1153 if (fvp == tdvp) { 1154 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1155 goto out; 1156 } 1157 if (fvp == tvp) { 1158 /* 1159 * If source and destination are the same, there is nothing to 1160 * do. Set error to -1 to indicate this. 1161 */ 1162 error = -1; 1163 goto out; 1164 } 1165 if (ndflag & ND_NFSV4) { 1166 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1167 error = nfsrv_checkremove(fvp, 0, p); 1168 NFSVOPUNLOCK(fvp, 0); 1169 } else 1170 error = EPERM; 1171 if (tvp && !error) 1172 error = nfsrv_checkremove(tvp, 1, p); 1173 } else { 1174 /* 1175 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1176 * that the NFSv4 client won't be confused by the rename. 1177 * Since nfsd_recalldelegation() can only be called on an 1178 * unlocked vnode at this point and fvp is the file that will 1179 * still exist after the rename, just do fvp. 1180 */ 1181 nfsd_recalldelegation(fvp, p); 1182 } 1183 out: 1184 if (!error) { 1185 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1186 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1187 &tondp->ni_cnd); 1188 } else { 1189 if (tdvp == tvp) 1190 vrele(tdvp); 1191 else 1192 vput(tdvp); 1193 if (tvp) 1194 vput(tvp); 1195 vrele(fromndp->ni_dvp); 1196 vrele(fvp); 1197 if (error == -1) 1198 error = 0; 1199 } 1200 vrele(tondp->ni_startdir); 1201 nfsvno_relpathbuf(tondp); 1202 out1: 1203 vrele(fromndp->ni_startdir); 1204 nfsvno_relpathbuf(fromndp); 1205 NFSEXITCODE(error); 1206 return (error); 1207 } 1208 1209 /* 1210 * Link vnode op. 1211 */ 1212 int 1213 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1214 struct thread *p, struct nfsexstuff *exp) 1215 { 1216 struct vnode *xp; 1217 int error = 0; 1218 1219 xp = ndp->ni_vp; 1220 if (xp != NULL) { 1221 error = EEXIST; 1222 } else { 1223 xp = ndp->ni_dvp; 1224 if (vp->v_mount != xp->v_mount) 1225 error = EXDEV; 1226 } 1227 if (!error) { 1228 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1229 if ((vp->v_iflag & VI_DOOMED) == 0) 1230 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1231 else 1232 error = EPERM; 1233 if (ndp->ni_dvp == vp) 1234 vrele(ndp->ni_dvp); 1235 else 1236 vput(ndp->ni_dvp); 1237 NFSVOPUNLOCK(vp, 0); 1238 } else { 1239 if (ndp->ni_dvp == ndp->ni_vp) 1240 vrele(ndp->ni_dvp); 1241 else 1242 vput(ndp->ni_dvp); 1243 if (ndp->ni_vp) 1244 vrele(ndp->ni_vp); 1245 } 1246 nfsvno_relpathbuf(ndp); 1247 NFSEXITCODE(error); 1248 return (error); 1249 } 1250 1251 /* 1252 * Do the fsync() appropriate for the commit. 1253 */ 1254 int 1255 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1256 struct thread *td) 1257 { 1258 int error = 0; 1259 1260 /* 1261 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1262 * file is done. At this time VOP_FSYNC does not accept offset and 1263 * byte count parameters so call VOP_FSYNC the whole file for now. 1264 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1265 */ 1266 if (cnt == 0 || cnt > MAX_COMMIT_COUNT) { 1267 /* 1268 * Give up and do the whole thing 1269 */ 1270 if (vp->v_object && 1271 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1272 VM_OBJECT_WLOCK(vp->v_object); 1273 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1274 VM_OBJECT_WUNLOCK(vp->v_object); 1275 } 1276 error = VOP_FSYNC(vp, MNT_WAIT, td); 1277 } else { 1278 /* 1279 * Locate and synchronously write any buffers that fall 1280 * into the requested range. Note: we are assuming that 1281 * f_iosize is a power of 2. 1282 */ 1283 int iosize = vp->v_mount->mnt_stat.f_iosize; 1284 int iomask = iosize - 1; 1285 struct bufobj *bo; 1286 daddr_t lblkno; 1287 1288 /* 1289 * Align to iosize boundry, super-align to page boundry. 1290 */ 1291 if (off & iomask) { 1292 cnt += off & iomask; 1293 off &= ~(u_quad_t)iomask; 1294 } 1295 if (off & PAGE_MASK) { 1296 cnt += off & PAGE_MASK; 1297 off &= ~(u_quad_t)PAGE_MASK; 1298 } 1299 lblkno = off / iosize; 1300 1301 if (vp->v_object && 1302 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1303 VM_OBJECT_WLOCK(vp->v_object); 1304 vm_object_page_clean(vp->v_object, off, off + cnt, 1305 OBJPC_SYNC); 1306 VM_OBJECT_WUNLOCK(vp->v_object); 1307 } 1308 1309 bo = &vp->v_bufobj; 1310 BO_LOCK(bo); 1311 while (cnt > 0) { 1312 struct buf *bp; 1313 1314 /* 1315 * If we have a buffer and it is marked B_DELWRI we 1316 * have to lock and write it. Otherwise the prior 1317 * write is assumed to have already been committed. 1318 * 1319 * gbincore() can return invalid buffers now so we 1320 * have to check that bit as well (though B_DELWRI 1321 * should not be set if B_INVAL is set there could be 1322 * a race here since we haven't locked the buffer). 1323 */ 1324 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1325 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1326 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1327 BO_LOCK(bo); 1328 continue; /* retry */ 1329 } 1330 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1331 B_DELWRI) { 1332 bremfree(bp); 1333 bp->b_flags &= ~B_ASYNC; 1334 bwrite(bp); 1335 ++nfs_commit_miss; 1336 } else 1337 BUF_UNLOCK(bp); 1338 BO_LOCK(bo); 1339 } 1340 ++nfs_commit_blks; 1341 if (cnt < iosize) 1342 break; 1343 cnt -= iosize; 1344 ++lblkno; 1345 } 1346 BO_UNLOCK(bo); 1347 } 1348 NFSEXITCODE(error); 1349 return (error); 1350 } 1351 1352 /* 1353 * Statfs vnode op. 1354 */ 1355 int 1356 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1357 { 1358 int error; 1359 1360 error = VFS_STATFS(vp->v_mount, sf); 1361 if (error == 0) { 1362 /* 1363 * Since NFS handles these values as unsigned on the 1364 * wire, there is no way to represent negative values, 1365 * so set them to 0. Without this, they will appear 1366 * to be very large positive values for clients like 1367 * Solaris10. 1368 */ 1369 if (sf->f_bavail < 0) 1370 sf->f_bavail = 0; 1371 if (sf->f_ffree < 0) 1372 sf->f_ffree = 0; 1373 } 1374 NFSEXITCODE(error); 1375 return (error); 1376 } 1377 1378 /* 1379 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1380 * must handle nfsrv_opencheck() calls after any other access checks. 1381 */ 1382 void 1383 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1384 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1385 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1386 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, 1387 struct nfsexstuff *exp, struct vnode **vpp) 1388 { 1389 struct vnode *vp = NULL; 1390 u_quad_t tempsize; 1391 struct nfsexstuff nes; 1392 1393 if (ndp->ni_vp == NULL) 1394 nd->nd_repstat = nfsrv_opencheck(clientid, 1395 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1396 if (!nd->nd_repstat) { 1397 if (ndp->ni_vp == NULL) { 1398 vrele(ndp->ni_startdir); 1399 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1400 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1401 vput(ndp->ni_dvp); 1402 nfsvno_relpathbuf(ndp); 1403 if (!nd->nd_repstat) { 1404 if (*exclusive_flagp) { 1405 *exclusive_flagp = 0; 1406 NFSVNO_ATTRINIT(nvap); 1407 nvap->na_atime.tv_sec = cverf[0]; 1408 nvap->na_atime.tv_nsec = cverf[1]; 1409 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1410 &nvap->na_vattr, cred); 1411 } else { 1412 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1413 aclp, p, attrbitp, exp); 1414 } 1415 } 1416 vp = ndp->ni_vp; 1417 } else { 1418 if (ndp->ni_startdir) 1419 vrele(ndp->ni_startdir); 1420 nfsvno_relpathbuf(ndp); 1421 vp = ndp->ni_vp; 1422 if (create == NFSV4OPEN_CREATE) { 1423 if (ndp->ni_dvp == vp) 1424 vrele(ndp->ni_dvp); 1425 else 1426 vput(ndp->ni_dvp); 1427 } 1428 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1429 if (ndp->ni_cnd.cn_flags & RDONLY) 1430 NFSVNO_SETEXRDONLY(&nes); 1431 else 1432 NFSVNO_EXINIT(&nes); 1433 nd->nd_repstat = nfsvno_accchk(vp, 1434 VWRITE, cred, &nes, p, 1435 NFSACCCHK_NOOVERRIDE, 1436 NFSACCCHK_VPISLOCKED, NULL); 1437 nd->nd_repstat = nfsrv_opencheck(clientid, 1438 stateidp, stp, vp, nd, p, nd->nd_repstat); 1439 if (!nd->nd_repstat) { 1440 tempsize = nvap->na_size; 1441 NFSVNO_ATTRINIT(nvap); 1442 nvap->na_size = tempsize; 1443 nd->nd_repstat = VOP_SETATTR(vp, 1444 &nvap->na_vattr, cred); 1445 } 1446 } else if (vp->v_type == VREG) { 1447 nd->nd_repstat = nfsrv_opencheck(clientid, 1448 stateidp, stp, vp, nd, p, nd->nd_repstat); 1449 } 1450 } 1451 } else { 1452 if (ndp->ni_cnd.cn_flags & HASBUF) 1453 nfsvno_relpathbuf(ndp); 1454 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1455 vrele(ndp->ni_startdir); 1456 if (ndp->ni_dvp == ndp->ni_vp) 1457 vrele(ndp->ni_dvp); 1458 else 1459 vput(ndp->ni_dvp); 1460 if (ndp->ni_vp) 1461 vput(ndp->ni_vp); 1462 } 1463 } 1464 *vpp = vp; 1465 1466 NFSEXITCODE2(0, nd); 1467 } 1468 1469 /* 1470 * Updates the file rev and sets the mtime and ctime 1471 * to the current clock time, returning the va_filerev and va_Xtime 1472 * values. 1473 * Return ESTALE to indicate the vnode is VI_DOOMED. 1474 */ 1475 int 1476 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1477 struct ucred *cred, struct thread *p) 1478 { 1479 struct vattr va; 1480 1481 VATTR_NULL(&va); 1482 vfs_timestamp(&va.va_mtime); 1483 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1484 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1485 if ((vp->v_iflag & VI_DOOMED) != 0) 1486 return (ESTALE); 1487 } 1488 (void) VOP_SETATTR(vp, &va, cred); 1489 (void) nfsvno_getattr(vp, nvap, cred, p, 1); 1490 return (0); 1491 } 1492 1493 /* 1494 * Glue routine to nfsv4_fillattr(). 1495 */ 1496 int 1497 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1498 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1499 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1500 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1501 { 1502 int error; 1503 1504 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 1505 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 1506 mounted_on_fileno); 1507 NFSEXITCODE2(0, nd); 1508 return (error); 1509 } 1510 1511 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 1512 /* 1513 * nfs readdir service 1514 * - mallocs what it thinks is enough to read 1515 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 1516 * - calls VOP_READDIR() 1517 * - loops around building the reply 1518 * if the output generated exceeds count break out of loop 1519 * The NFSM_CLGET macro is used here so that the reply will be packed 1520 * tightly in mbuf clusters. 1521 * - it trims out records with d_fileno == 0 1522 * this doesn't matter for Unix clients, but they might confuse clients 1523 * for other os'. 1524 * - it trims out records with d_type == DT_WHT 1525 * these cannot be seen through NFS (unless we extend the protocol) 1526 * The alternate call nfsrvd_readdirplus() does lookups as well. 1527 * PS: The NFS protocol spec. does not clarify what the "count" byte 1528 * argument is a count of.. just name strings and file id's or the 1529 * entire reply rpc or ... 1530 * I tried just file name and id sizes and it confused the Sun client, 1531 * so I am using the full rpc size now. The "paranoia.." comment refers 1532 * to including the status longwords that are not a part of the dir. 1533 * "entry" structures, but are in the rpc. 1534 */ 1535 int 1536 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 1537 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 1538 { 1539 struct dirent *dp; 1540 u_int32_t *tl; 1541 int dirlen; 1542 char *cpos, *cend, *rbuf; 1543 struct nfsvattr at; 1544 int nlen, error = 0, getret = 1; 1545 int siz, cnt, fullsiz, eofflag, ncookies; 1546 u_int64_t off, toff, verf; 1547 u_long *cookies = NULL, *cookiep; 1548 struct uio io; 1549 struct iovec iv; 1550 int not_zfs; 1551 1552 if (nd->nd_repstat) { 1553 nfsrv_postopattr(nd, getret, &at); 1554 goto out; 1555 } 1556 if (nd->nd_flag & ND_NFSV2) { 1557 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1558 off = fxdr_unsigned(u_quad_t, *tl++); 1559 } else { 1560 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1561 off = fxdr_hyper(tl); 1562 tl += 2; 1563 verf = fxdr_hyper(tl); 1564 tl += 2; 1565 } 1566 toff = off; 1567 cnt = fxdr_unsigned(int, *tl); 1568 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1569 cnt = NFS_SRVMAXDATA(nd); 1570 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1571 fullsiz = siz; 1572 if (nd->nd_flag & ND_NFSV3) { 1573 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, 1574 p, 1); 1575 #if 0 1576 /* 1577 * va_filerev is not sufficient as a cookie verifier, 1578 * since it is not supposed to change when entries are 1579 * removed/added unless that offset cookies returned to 1580 * the client are no longer valid. 1581 */ 1582 if (!nd->nd_repstat && toff && verf != at.na_filerev) 1583 nd->nd_repstat = NFSERR_BAD_COOKIE; 1584 #endif 1585 } 1586 if (!nd->nd_repstat && vp->v_type != VDIR) 1587 nd->nd_repstat = NFSERR_NOTDIR; 1588 if (nd->nd_repstat == 0 && cnt == 0) { 1589 if (nd->nd_flag & ND_NFSV2) 1590 /* NFSv2 does not have NFSERR_TOOSMALL */ 1591 nd->nd_repstat = EPERM; 1592 else 1593 nd->nd_repstat = NFSERR_TOOSMALL; 1594 } 1595 if (!nd->nd_repstat) 1596 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 1597 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1598 NFSACCCHK_VPISLOCKED, NULL); 1599 if (nd->nd_repstat) { 1600 vput(vp); 1601 if (nd->nd_flag & ND_NFSV3) 1602 nfsrv_postopattr(nd, getret, &at); 1603 goto out; 1604 } 1605 not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs"); 1606 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); 1607 again: 1608 eofflag = 0; 1609 if (cookies) { 1610 free((caddr_t)cookies, M_TEMP); 1611 cookies = NULL; 1612 } 1613 1614 iv.iov_base = rbuf; 1615 iv.iov_len = siz; 1616 io.uio_iov = &iv; 1617 io.uio_iovcnt = 1; 1618 io.uio_offset = (off_t)off; 1619 io.uio_resid = siz; 1620 io.uio_segflg = UIO_SYSSPACE; 1621 io.uio_rw = UIO_READ; 1622 io.uio_td = NULL; 1623 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 1624 &cookies); 1625 off = (u_int64_t)io.uio_offset; 1626 if (io.uio_resid) 1627 siz -= io.uio_resid; 1628 1629 if (!cookies && !nd->nd_repstat) 1630 nd->nd_repstat = NFSERR_PERM; 1631 if (nd->nd_flag & ND_NFSV3) { 1632 getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); 1633 if (!nd->nd_repstat) 1634 nd->nd_repstat = getret; 1635 } 1636 1637 /* 1638 * Handles the failed cases. nd->nd_repstat == 0 past here. 1639 */ 1640 if (nd->nd_repstat) { 1641 vput(vp); 1642 free((caddr_t)rbuf, M_TEMP); 1643 if (cookies) 1644 free((caddr_t)cookies, M_TEMP); 1645 if (nd->nd_flag & ND_NFSV3) 1646 nfsrv_postopattr(nd, getret, &at); 1647 goto out; 1648 } 1649 /* 1650 * If nothing read, return eof 1651 * rpc reply 1652 */ 1653 if (siz == 0) { 1654 vput(vp); 1655 if (nd->nd_flag & ND_NFSV2) { 1656 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1657 } else { 1658 nfsrv_postopattr(nd, getret, &at); 1659 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 1660 txdr_hyper(at.na_filerev, tl); 1661 tl += 2; 1662 } 1663 *tl++ = newnfs_false; 1664 *tl = newnfs_true; 1665 FREE((caddr_t)rbuf, M_TEMP); 1666 FREE((caddr_t)cookies, M_TEMP); 1667 goto out; 1668 } 1669 1670 /* 1671 * Check for degenerate cases of nothing useful read. 1672 * If so go try again 1673 */ 1674 cpos = rbuf; 1675 cend = rbuf + siz; 1676 dp = (struct dirent *)cpos; 1677 cookiep = cookies; 1678 1679 /* 1680 * For some reason FreeBSD's ufs_readdir() chooses to back the 1681 * directory offset up to a block boundary, so it is necessary to 1682 * skip over the records that precede the requested offset. This 1683 * requires the assumption that file offset cookies monotonically 1684 * increase. 1685 * Since the offset cookies don't monotonically increase for ZFS, 1686 * this is not done when ZFS is the file system. 1687 */ 1688 while (cpos < cend && ncookies > 0 && 1689 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 1690 (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) { 1691 cpos += dp->d_reclen; 1692 dp = (struct dirent *)cpos; 1693 cookiep++; 1694 ncookies--; 1695 } 1696 if (cpos >= cend || ncookies == 0) { 1697 siz = fullsiz; 1698 toff = off; 1699 goto again; 1700 } 1701 vput(vp); 1702 1703 /* 1704 * dirlen is the size of the reply, including all XDR and must 1705 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 1706 * if the XDR should be included in "count", but to be safe, we do. 1707 * (Include the two booleans at the end of the reply in dirlen now.) 1708 */ 1709 if (nd->nd_flag & ND_NFSV3) { 1710 nfsrv_postopattr(nd, getret, &at); 1711 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1712 txdr_hyper(at.na_filerev, tl); 1713 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 1714 } else { 1715 dirlen = 2 * NFSX_UNSIGNED; 1716 } 1717 1718 /* Loop through the records and build reply */ 1719 while (cpos < cend && ncookies > 0) { 1720 nlen = dp->d_namlen; 1721 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 1722 nlen <= NFS_MAXNAMLEN) { 1723 if (nd->nd_flag & ND_NFSV3) 1724 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1725 else 1726 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1727 if (dirlen > cnt) { 1728 eofflag = 0; 1729 break; 1730 } 1731 1732 /* 1733 * Build the directory record xdr from 1734 * the dirent entry. 1735 */ 1736 if (nd->nd_flag & ND_NFSV3) { 1737 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1738 *tl++ = newnfs_true; 1739 *tl++ = 0; 1740 } else { 1741 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1742 *tl++ = newnfs_true; 1743 } 1744 *tl = txdr_unsigned(dp->d_fileno); 1745 (void) nfsm_strtom(nd, dp->d_name, nlen); 1746 if (nd->nd_flag & ND_NFSV3) { 1747 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1748 *tl++ = 0; 1749 } else 1750 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 1751 *tl = txdr_unsigned(*cookiep); 1752 } 1753 cpos += dp->d_reclen; 1754 dp = (struct dirent *)cpos; 1755 cookiep++; 1756 ncookies--; 1757 } 1758 if (cpos < cend) 1759 eofflag = 0; 1760 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1761 *tl++ = newnfs_false; 1762 if (eofflag) 1763 *tl = newnfs_true; 1764 else 1765 *tl = newnfs_false; 1766 FREE((caddr_t)rbuf, M_TEMP); 1767 FREE((caddr_t)cookies, M_TEMP); 1768 1769 out: 1770 NFSEXITCODE2(0, nd); 1771 return (0); 1772 nfsmout: 1773 vput(vp); 1774 NFSEXITCODE2(error, nd); 1775 return (error); 1776 } 1777 1778 /* 1779 * Readdirplus for V3 and Readdir for V4. 1780 */ 1781 int 1782 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 1783 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 1784 { 1785 struct dirent *dp; 1786 u_int32_t *tl; 1787 int dirlen; 1788 char *cpos, *cend, *rbuf; 1789 struct vnode *nvp; 1790 fhandle_t nfh; 1791 struct nfsvattr nva, at, *nvap = &nva; 1792 struct mbuf *mb0, *mb1; 1793 struct nfsreferral *refp; 1794 int nlen, r, error = 0, getret = 1, usevget = 1; 1795 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 1796 caddr_t bpos0, bpos1; 1797 u_int64_t off, toff, verf; 1798 u_long *cookies = NULL, *cookiep; 1799 nfsattrbit_t attrbits, rderrbits, savbits; 1800 struct uio io; 1801 struct iovec iv; 1802 struct componentname cn; 1803 int at_root, needs_unbusy, not_zfs, supports_nfsv4acls; 1804 struct mount *mp, *new_mp; 1805 uint64_t mounted_on_fileno; 1806 1807 if (nd->nd_repstat) { 1808 nfsrv_postopattr(nd, getret, &at); 1809 goto out; 1810 } 1811 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 1812 off = fxdr_hyper(tl); 1813 toff = off; 1814 tl += 2; 1815 verf = fxdr_hyper(tl); 1816 tl += 2; 1817 siz = fxdr_unsigned(int, *tl++); 1818 cnt = fxdr_unsigned(int, *tl); 1819 1820 /* 1821 * Use the server's maximum data transfer size as the upper bound 1822 * on reply datalen. 1823 */ 1824 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1825 cnt = NFS_SRVMAXDATA(nd); 1826 1827 /* 1828 * siz is a "hint" of how much directory information (name, fileid, 1829 * cookie) should be in the reply. At least one client "hints" 0, 1830 * so I set it to cnt for that case. I also round it up to the 1831 * next multiple of DIRBLKSIZ. 1832 */ 1833 if (siz <= 0) 1834 siz = cnt; 1835 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1836 1837 if (nd->nd_flag & ND_NFSV4) { 1838 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 1839 if (error) 1840 goto nfsmout; 1841 NFSSET_ATTRBIT(&savbits, &attrbits); 1842 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); 1843 NFSZERO_ATTRBIT(&rderrbits); 1844 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 1845 } else { 1846 NFSZERO_ATTRBIT(&attrbits); 1847 } 1848 fullsiz = siz; 1849 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); 1850 if (!nd->nd_repstat) { 1851 if (off && verf != at.na_filerev) { 1852 /* 1853 * va_filerev is not sufficient as a cookie verifier, 1854 * since it is not supposed to change when entries are 1855 * removed/added unless that offset cookies returned to 1856 * the client are no longer valid. 1857 */ 1858 #if 0 1859 if (nd->nd_flag & ND_NFSV4) { 1860 nd->nd_repstat = NFSERR_NOTSAME; 1861 } else { 1862 nd->nd_repstat = NFSERR_BAD_COOKIE; 1863 } 1864 #endif 1865 } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { 1866 nd->nd_repstat = NFSERR_BAD_COOKIE; 1867 } 1868 } 1869 if (!nd->nd_repstat && vp->v_type != VDIR) 1870 nd->nd_repstat = NFSERR_NOTDIR; 1871 if (!nd->nd_repstat && cnt == 0) 1872 nd->nd_repstat = NFSERR_TOOSMALL; 1873 if (!nd->nd_repstat) 1874 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 1875 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1876 NFSACCCHK_VPISLOCKED, NULL); 1877 if (nd->nd_repstat) { 1878 vput(vp); 1879 if (nd->nd_flag & ND_NFSV3) 1880 nfsrv_postopattr(nd, getret, &at); 1881 goto out; 1882 } 1883 not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs"); 1884 1885 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); 1886 again: 1887 eofflag = 0; 1888 if (cookies) { 1889 free((caddr_t)cookies, M_TEMP); 1890 cookies = NULL; 1891 } 1892 1893 iv.iov_base = rbuf; 1894 iv.iov_len = siz; 1895 io.uio_iov = &iv; 1896 io.uio_iovcnt = 1; 1897 io.uio_offset = (off_t)off; 1898 io.uio_resid = siz; 1899 io.uio_segflg = UIO_SYSSPACE; 1900 io.uio_rw = UIO_READ; 1901 io.uio_td = NULL; 1902 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 1903 &cookies); 1904 off = (u_int64_t)io.uio_offset; 1905 if (io.uio_resid) 1906 siz -= io.uio_resid; 1907 1908 getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); 1909 1910 if (!cookies && !nd->nd_repstat) 1911 nd->nd_repstat = NFSERR_PERM; 1912 if (!nd->nd_repstat) 1913 nd->nd_repstat = getret; 1914 if (nd->nd_repstat) { 1915 vput(vp); 1916 if (cookies) 1917 free((caddr_t)cookies, M_TEMP); 1918 free((caddr_t)rbuf, M_TEMP); 1919 if (nd->nd_flag & ND_NFSV3) 1920 nfsrv_postopattr(nd, getret, &at); 1921 goto out; 1922 } 1923 /* 1924 * If nothing read, return eof 1925 * rpc reply 1926 */ 1927 if (siz == 0) { 1928 vput(vp); 1929 if (nd->nd_flag & ND_NFSV3) 1930 nfsrv_postopattr(nd, getret, &at); 1931 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 1932 txdr_hyper(at.na_filerev, tl); 1933 tl += 2; 1934 *tl++ = newnfs_false; 1935 *tl = newnfs_true; 1936 free((caddr_t)cookies, M_TEMP); 1937 free((caddr_t)rbuf, M_TEMP); 1938 goto out; 1939 } 1940 1941 /* 1942 * Check for degenerate cases of nothing useful read. 1943 * If so go try again 1944 */ 1945 cpos = rbuf; 1946 cend = rbuf + siz; 1947 dp = (struct dirent *)cpos; 1948 cookiep = cookies; 1949 1950 /* 1951 * For some reason FreeBSD's ufs_readdir() chooses to back the 1952 * directory offset up to a block boundary, so it is necessary to 1953 * skip over the records that precede the requested offset. This 1954 * requires the assumption that file offset cookies monotonically 1955 * increase. 1956 * Since the offset cookies don't monotonically increase for ZFS, 1957 * this is not done when ZFS is the file system. 1958 */ 1959 while (cpos < cend && ncookies > 0 && 1960 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 1961 (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) || 1962 ((nd->nd_flag & ND_NFSV4) && 1963 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 1964 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 1965 cpos += dp->d_reclen; 1966 dp = (struct dirent *)cpos; 1967 cookiep++; 1968 ncookies--; 1969 } 1970 if (cpos >= cend || ncookies == 0) { 1971 siz = fullsiz; 1972 toff = off; 1973 goto again; 1974 } 1975 1976 /* 1977 * Busy the file system so that the mount point won't go away 1978 * and, as such, VFS_VGET() can be used safely. 1979 */ 1980 mp = vp->v_mount; 1981 vfs_ref(mp); 1982 NFSVOPUNLOCK(vp, 0); 1983 nd->nd_repstat = vfs_busy(mp, 0); 1984 vfs_rel(mp); 1985 if (nd->nd_repstat != 0) { 1986 vrele(vp); 1987 free(cookies, M_TEMP); 1988 free(rbuf, M_TEMP); 1989 if (nd->nd_flag & ND_NFSV3) 1990 nfsrv_postopattr(nd, getret, &at); 1991 goto out; 1992 } 1993 1994 /* 1995 * Check to see if entries in this directory can be safely acquired 1996 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 1997 * ZFS snapshot directories need VOP_LOOKUP(), so that any 1998 * automount of the snapshot directory that is required will 1999 * be done. 2000 * This needs to be done here for NFSv4, since NFSv4 never does 2001 * a VFS_VGET() for "." or "..". 2002 */ 2003 if (not_zfs == 0) { 2004 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2005 if (r == EOPNOTSUPP) { 2006 usevget = 0; 2007 cn.cn_nameiop = LOOKUP; 2008 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2009 cn.cn_cred = nd->nd_cred; 2010 cn.cn_thread = p; 2011 } else if (r == 0) 2012 vput(nvp); 2013 } 2014 2015 /* 2016 * Save this position, in case there is an error before one entry 2017 * is created. 2018 */ 2019 mb0 = nd->nd_mb; 2020 bpos0 = nd->nd_bpos; 2021 2022 /* 2023 * Fill in the first part of the reply. 2024 * dirlen is the reply length in bytes and cannot exceed cnt. 2025 * (Include the two booleans at the end of the reply in dirlen now, 2026 * so we recognize when we have exceeded cnt.) 2027 */ 2028 if (nd->nd_flag & ND_NFSV3) { 2029 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2030 nfsrv_postopattr(nd, getret, &at); 2031 } else { 2032 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2033 } 2034 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2035 txdr_hyper(at.na_filerev, tl); 2036 2037 /* 2038 * Save this position, in case there is an empty reply needed. 2039 */ 2040 mb1 = nd->nd_mb; 2041 bpos1 = nd->nd_bpos; 2042 2043 /* Loop through the records and build reply */ 2044 entrycnt = 0; 2045 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2046 nlen = dp->d_namlen; 2047 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2048 nlen <= NFS_MAXNAMLEN && 2049 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2050 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2051 || (nlen == 1 && dp->d_name[0] != '.'))) { 2052 /* 2053 * Save the current position in the reply, in case 2054 * this entry exceeds cnt. 2055 */ 2056 mb1 = nd->nd_mb; 2057 bpos1 = nd->nd_bpos; 2058 2059 /* 2060 * For readdir_and_lookup get the vnode using 2061 * the file number. 2062 */ 2063 nvp = NULL; 2064 refp = NULL; 2065 r = 0; 2066 at_root = 0; 2067 needs_unbusy = 0; 2068 new_mp = mp; 2069 mounted_on_fileno = (uint64_t)dp->d_fileno; 2070 if ((nd->nd_flag & ND_NFSV3) || 2071 NFSNONZERO_ATTRBIT(&savbits)) { 2072 if (nd->nd_flag & ND_NFSV4) 2073 refp = nfsv4root_getreferral(NULL, 2074 vp, dp->d_fileno); 2075 if (refp == NULL) { 2076 if (usevget) 2077 r = VFS_VGET(mp, dp->d_fileno, 2078 LK_SHARED, &nvp); 2079 else 2080 r = EOPNOTSUPP; 2081 if (r == EOPNOTSUPP) { 2082 if (usevget) { 2083 usevget = 0; 2084 cn.cn_nameiop = LOOKUP; 2085 cn.cn_lkflags = 2086 LK_SHARED | 2087 LK_RETRY; 2088 cn.cn_cred = 2089 nd->nd_cred; 2090 cn.cn_thread = p; 2091 } 2092 cn.cn_nameptr = dp->d_name; 2093 cn.cn_namelen = nlen; 2094 cn.cn_flags = ISLASTCN | 2095 NOFOLLOW | LOCKLEAF; 2096 if (nlen == 2 && 2097 dp->d_name[0] == '.' && 2098 dp->d_name[1] == '.') 2099 cn.cn_flags |= 2100 ISDOTDOT; 2101 if (NFSVOPLOCK(vp, LK_SHARED) 2102 != 0) { 2103 nd->nd_repstat = EPERM; 2104 break; 2105 } 2106 if ((vp->v_vflag & VV_ROOT) != 0 2107 && (cn.cn_flags & ISDOTDOT) 2108 != 0) { 2109 vref(vp); 2110 nvp = vp; 2111 r = 0; 2112 } else { 2113 r = VOP_LOOKUP(vp, &nvp, 2114 &cn); 2115 if (vp != nvp) 2116 NFSVOPUNLOCK(vp, 2117 0); 2118 } 2119 } 2120 2121 /* 2122 * For NFSv4, check to see if nvp is 2123 * a mount point and get the mount 2124 * point vnode, as required. 2125 */ 2126 if (r == 0 && 2127 nfsrv_enable_crossmntpt != 0 && 2128 (nd->nd_flag & ND_NFSV4) != 0 && 2129 nvp->v_type == VDIR && 2130 nvp->v_mountedhere != NULL) { 2131 new_mp = nvp->v_mountedhere; 2132 r = vfs_busy(new_mp, 0); 2133 vput(nvp); 2134 nvp = NULL; 2135 if (r == 0) { 2136 r = VFS_ROOT(new_mp, 2137 LK_SHARED, &nvp); 2138 needs_unbusy = 1; 2139 if (r == 0) 2140 at_root = 1; 2141 } 2142 } 2143 } 2144 if (!r) { 2145 if (refp == NULL && 2146 ((nd->nd_flag & ND_NFSV3) || 2147 NFSNONZERO_ATTRBIT(&attrbits))) { 2148 r = nfsvno_getfh(nvp, &nfh, p); 2149 if (!r) 2150 r = nfsvno_getattr(nvp, nvap, 2151 nd->nd_cred, p, 1); 2152 if (r == 0 && not_zfs == 0 && 2153 nfsrv_enable_crossmntpt != 0 && 2154 (nd->nd_flag & ND_NFSV4) != 0 && 2155 nvp->v_type == VDIR && 2156 vp->v_mount != nvp->v_mount) { 2157 /* 2158 * For a ZFS snapshot, there is a 2159 * pseudo mount that does not set 2160 * v_mountedhere, so it needs to 2161 * be detected via a different 2162 * mount structure. 2163 */ 2164 at_root = 1; 2165 if (new_mp == mp) 2166 new_mp = nvp->v_mount; 2167 } 2168 } 2169 } else { 2170 nvp = NULL; 2171 } 2172 if (r) { 2173 if (!NFSISSET_ATTRBIT(&attrbits, 2174 NFSATTRBIT_RDATTRERROR)) { 2175 if (nvp != NULL) 2176 vput(nvp); 2177 if (needs_unbusy != 0) 2178 vfs_unbusy(new_mp); 2179 nd->nd_repstat = r; 2180 break; 2181 } 2182 } 2183 } 2184 2185 /* 2186 * Build the directory record xdr 2187 */ 2188 if (nd->nd_flag & ND_NFSV3) { 2189 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2190 *tl++ = newnfs_true; 2191 *tl++ = 0; 2192 *tl = txdr_unsigned(dp->d_fileno); 2193 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2194 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2195 *tl++ = 0; 2196 *tl = txdr_unsigned(*cookiep); 2197 nfsrv_postopattr(nd, 0, nvap); 2198 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2199 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2200 if (nvp != NULL) 2201 vput(nvp); 2202 } else { 2203 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2204 *tl++ = newnfs_true; 2205 *tl++ = 0; 2206 *tl = txdr_unsigned(*cookiep); 2207 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2208 if (nvp != NULL) { 2209 supports_nfsv4acls = 2210 nfs_supportsnfsv4acls(nvp); 2211 NFSVOPUNLOCK(nvp, 0); 2212 } else 2213 supports_nfsv4acls = 0; 2214 if (refp != NULL) { 2215 dirlen += nfsrv_putreferralattr(nd, 2216 &savbits, refp, 0, 2217 &nd->nd_repstat); 2218 if (nd->nd_repstat) { 2219 if (nvp != NULL) 2220 vrele(nvp); 2221 if (needs_unbusy != 0) 2222 vfs_unbusy(new_mp); 2223 break; 2224 } 2225 } else if (r) { 2226 dirlen += nfsvno_fillattr(nd, new_mp, 2227 nvp, nvap, &nfh, r, &rderrbits, 2228 nd->nd_cred, p, isdgram, 0, 2229 supports_nfsv4acls, at_root, 2230 mounted_on_fileno); 2231 } else { 2232 dirlen += nfsvno_fillattr(nd, new_mp, 2233 nvp, nvap, &nfh, r, &attrbits, 2234 nd->nd_cred, p, isdgram, 0, 2235 supports_nfsv4acls, at_root, 2236 mounted_on_fileno); 2237 } 2238 if (nvp != NULL) 2239 vrele(nvp); 2240 dirlen += (3 * NFSX_UNSIGNED); 2241 } 2242 if (needs_unbusy != 0) 2243 vfs_unbusy(new_mp); 2244 if (dirlen <= cnt) 2245 entrycnt++; 2246 } 2247 cpos += dp->d_reclen; 2248 dp = (struct dirent *)cpos; 2249 cookiep++; 2250 ncookies--; 2251 } 2252 vrele(vp); 2253 vfs_unbusy(mp); 2254 2255 /* 2256 * If dirlen > cnt, we must strip off the last entry. If that 2257 * results in an empty reply, report NFSERR_TOOSMALL. 2258 */ 2259 if (dirlen > cnt || nd->nd_repstat) { 2260 if (!nd->nd_repstat && entrycnt == 0) 2261 nd->nd_repstat = NFSERR_TOOSMALL; 2262 if (nd->nd_repstat) 2263 newnfs_trimtrailing(nd, mb0, bpos0); 2264 else 2265 newnfs_trimtrailing(nd, mb1, bpos1); 2266 eofflag = 0; 2267 } else if (cpos < cend) 2268 eofflag = 0; 2269 if (!nd->nd_repstat) { 2270 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2271 *tl++ = newnfs_false; 2272 if (eofflag) 2273 *tl = newnfs_true; 2274 else 2275 *tl = newnfs_false; 2276 } 2277 FREE((caddr_t)cookies, M_TEMP); 2278 FREE((caddr_t)rbuf, M_TEMP); 2279 2280 out: 2281 NFSEXITCODE2(0, nd); 2282 return (0); 2283 nfsmout: 2284 vput(vp); 2285 NFSEXITCODE2(error, nd); 2286 return (error); 2287 } 2288 2289 /* 2290 * Get the settable attributes out of the mbuf list. 2291 * (Return 0 or EBADRPC) 2292 */ 2293 int 2294 nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap, 2295 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2296 { 2297 u_int32_t *tl; 2298 struct nfsv2_sattr *sp; 2299 int error = 0, toclient = 0; 2300 2301 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2302 case ND_NFSV2: 2303 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2304 /* 2305 * Some old clients didn't fill in the high order 16bits. 2306 * --> check the low order 2 bytes for 0xffff 2307 */ 2308 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2309 nvap->na_mode = nfstov_mode(sp->sa_mode); 2310 if (sp->sa_uid != newnfs_xdrneg1) 2311 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2312 if (sp->sa_gid != newnfs_xdrneg1) 2313 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2314 if (sp->sa_size != newnfs_xdrneg1) 2315 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2316 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2317 #ifdef notyet 2318 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2319 #else 2320 nvap->na_atime.tv_sec = 2321 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2322 nvap->na_atime.tv_nsec = 0; 2323 #endif 2324 } 2325 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2326 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2327 break; 2328 case ND_NFSV3: 2329 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2330 if (*tl == newnfs_true) { 2331 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2332 nvap->na_mode = nfstov_mode(*tl); 2333 } 2334 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2335 if (*tl == newnfs_true) { 2336 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2337 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2338 } 2339 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2340 if (*tl == newnfs_true) { 2341 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2342 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2343 } 2344 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2345 if (*tl == newnfs_true) { 2346 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2347 nvap->na_size = fxdr_hyper(tl); 2348 } 2349 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2350 switch (fxdr_unsigned(int, *tl)) { 2351 case NFSV3SATTRTIME_TOCLIENT: 2352 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2353 fxdr_nfsv3time(tl, &nvap->na_atime); 2354 toclient = 1; 2355 break; 2356 case NFSV3SATTRTIME_TOSERVER: 2357 vfs_timestamp(&nvap->na_atime); 2358 nvap->na_vaflags |= VA_UTIMES_NULL; 2359 break; 2360 }; 2361 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2362 switch (fxdr_unsigned(int, *tl)) { 2363 case NFSV3SATTRTIME_TOCLIENT: 2364 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2365 fxdr_nfsv3time(tl, &nvap->na_mtime); 2366 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2367 break; 2368 case NFSV3SATTRTIME_TOSERVER: 2369 vfs_timestamp(&nvap->na_mtime); 2370 if (!toclient) 2371 nvap->na_vaflags |= VA_UTIMES_NULL; 2372 break; 2373 }; 2374 break; 2375 case ND_NFSV4: 2376 error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p); 2377 }; 2378 nfsmout: 2379 NFSEXITCODE2(error, nd); 2380 return (error); 2381 } 2382 2383 /* 2384 * Handle the setable attributes for V4. 2385 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2386 */ 2387 int 2388 nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap, 2389 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2390 { 2391 u_int32_t *tl; 2392 int attrsum = 0; 2393 int i, j; 2394 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2395 int toclient = 0; 2396 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2397 uid_t uid; 2398 gid_t gid; 2399 2400 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2401 if (error) 2402 goto nfsmout; 2403 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2404 attrsize = fxdr_unsigned(int, *tl); 2405 2406 /* 2407 * Loop around getting the setable attributes. If an unsupported 2408 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2409 */ 2410 if (retnotsup) { 2411 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2412 bitpos = NFSATTRBIT_MAX; 2413 } else { 2414 bitpos = 0; 2415 } 2416 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2417 if (attrsum > attrsize) { 2418 error = NFSERR_BADXDR; 2419 goto nfsmout; 2420 } 2421 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2422 switch (bitpos) { 2423 case NFSATTRBIT_SIZE: 2424 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2425 nvap->na_size = fxdr_hyper(tl); 2426 attrsum += NFSX_HYPER; 2427 break; 2428 case NFSATTRBIT_ACL: 2429 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, 2430 p); 2431 if (error) 2432 goto nfsmout; 2433 if (aceerr && !nd->nd_repstat) 2434 nd->nd_repstat = aceerr; 2435 attrsum += aclsize; 2436 break; 2437 case NFSATTRBIT_ARCHIVE: 2438 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2439 if (!nd->nd_repstat) 2440 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2441 attrsum += NFSX_UNSIGNED; 2442 break; 2443 case NFSATTRBIT_HIDDEN: 2444 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2445 if (!nd->nd_repstat) 2446 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2447 attrsum += NFSX_UNSIGNED; 2448 break; 2449 case NFSATTRBIT_MIMETYPE: 2450 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2451 i = fxdr_unsigned(int, *tl); 2452 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 2453 if (error) 2454 goto nfsmout; 2455 if (!nd->nd_repstat) 2456 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2457 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 2458 break; 2459 case NFSATTRBIT_MODE: 2460 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2461 nvap->na_mode = nfstov_mode(*tl); 2462 attrsum += NFSX_UNSIGNED; 2463 break; 2464 case NFSATTRBIT_OWNER: 2465 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2466 j = fxdr_unsigned(int, *tl); 2467 if (j < 0) { 2468 error = NFSERR_BADXDR; 2469 goto nfsmout; 2470 } 2471 if (j > NFSV4_SMALLSTR) 2472 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2473 else 2474 cp = namestr; 2475 error = nfsrv_mtostr(nd, cp, j); 2476 if (error) { 2477 if (j > NFSV4_SMALLSTR) 2478 free(cp, M_NFSSTRING); 2479 goto nfsmout; 2480 } 2481 if (!nd->nd_repstat) { 2482 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, 2483 p); 2484 if (!nd->nd_repstat) 2485 nvap->na_uid = uid; 2486 } 2487 if (j > NFSV4_SMALLSTR) 2488 free(cp, M_NFSSTRING); 2489 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2490 break; 2491 case NFSATTRBIT_OWNERGROUP: 2492 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2493 j = fxdr_unsigned(int, *tl); 2494 if (j < 0) { 2495 error = NFSERR_BADXDR; 2496 goto nfsmout; 2497 } 2498 if (j > NFSV4_SMALLSTR) 2499 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2500 else 2501 cp = namestr; 2502 error = nfsrv_mtostr(nd, cp, j); 2503 if (error) { 2504 if (j > NFSV4_SMALLSTR) 2505 free(cp, M_NFSSTRING); 2506 goto nfsmout; 2507 } 2508 if (!nd->nd_repstat) { 2509 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, 2510 p); 2511 if (!nd->nd_repstat) 2512 nvap->na_gid = gid; 2513 } 2514 if (j > NFSV4_SMALLSTR) 2515 free(cp, M_NFSSTRING); 2516 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2517 break; 2518 case NFSATTRBIT_SYSTEM: 2519 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2520 if (!nd->nd_repstat) 2521 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2522 attrsum += NFSX_UNSIGNED; 2523 break; 2524 case NFSATTRBIT_TIMEACCESSSET: 2525 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2526 attrsum += NFSX_UNSIGNED; 2527 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2528 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2529 fxdr_nfsv4time(tl, &nvap->na_atime); 2530 toclient = 1; 2531 attrsum += NFSX_V4TIME; 2532 } else { 2533 vfs_timestamp(&nvap->na_atime); 2534 nvap->na_vaflags |= VA_UTIMES_NULL; 2535 } 2536 break; 2537 case NFSATTRBIT_TIMEBACKUP: 2538 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2539 if (!nd->nd_repstat) 2540 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2541 attrsum += NFSX_V4TIME; 2542 break; 2543 case NFSATTRBIT_TIMECREATE: 2544 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2545 if (!nd->nd_repstat) 2546 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2547 attrsum += NFSX_V4TIME; 2548 break; 2549 case NFSATTRBIT_TIMEMODIFYSET: 2550 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2551 attrsum += NFSX_UNSIGNED; 2552 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2553 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2554 fxdr_nfsv4time(tl, &nvap->na_mtime); 2555 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2556 attrsum += NFSX_V4TIME; 2557 } else { 2558 vfs_timestamp(&nvap->na_mtime); 2559 if (!toclient) 2560 nvap->na_vaflags |= VA_UTIMES_NULL; 2561 } 2562 break; 2563 default: 2564 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2565 /* 2566 * set bitpos so we drop out of the loop. 2567 */ 2568 bitpos = NFSATTRBIT_MAX; 2569 break; 2570 }; 2571 } 2572 2573 /* 2574 * some clients pad the attrlist, so we need to skip over the 2575 * padding. 2576 */ 2577 if (attrsum > attrsize) { 2578 error = NFSERR_BADXDR; 2579 } else { 2580 attrsize = NFSM_RNDUP(attrsize); 2581 if (attrsum < attrsize) 2582 error = nfsm_advance(nd, attrsize - attrsum, -1); 2583 } 2584 nfsmout: 2585 NFSEXITCODE2(error, nd); 2586 return (error); 2587 } 2588 2589 /* 2590 * Check/setup export credentials. 2591 */ 2592 int 2593 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 2594 struct ucred *credanon) 2595 { 2596 int error = 0; 2597 2598 /* 2599 * Check/setup credentials. 2600 */ 2601 if (nd->nd_flag & ND_GSS) 2602 exp->nes_exflag &= ~MNT_EXPORTANON; 2603 2604 /* 2605 * Check to see if the operation is allowed for this security flavor. 2606 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to 2607 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 2608 * Also, allow Secinfo, so that it can acquire the correct flavor(s). 2609 */ 2610 if (nfsvno_testexp(nd, exp) && 2611 nd->nd_procnum != NFSV4OP_SECINFO && 2612 nd->nd_procnum != NFSPROC_FSINFO) { 2613 if (nd->nd_flag & ND_NFSV4) 2614 error = NFSERR_WRONGSEC; 2615 else 2616 error = (NFSERR_AUTHERR | AUTH_TOOWEAK); 2617 goto out; 2618 } 2619 2620 /* 2621 * Check to see if the file system is exported V4 only. 2622 */ 2623 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 2624 error = NFSERR_PROGNOTV4; 2625 goto out; 2626 } 2627 2628 /* 2629 * Now, map the user credentials. 2630 * (Note that ND_AUTHNONE will only be set for an NFSv3 2631 * Fsinfo RPC. If set for anything else, this code might need 2632 * to change.) 2633 */ 2634 if (NFSVNO_EXPORTED(exp) && 2635 ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) || 2636 NFSVNO_EXPORTANON(exp) || 2637 (nd->nd_flag & ND_AUTHNONE))) { 2638 nd->nd_cred->cr_uid = credanon->cr_uid; 2639 nd->nd_cred->cr_gid = credanon->cr_gid; 2640 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 2641 credanon->cr_groups); 2642 } 2643 2644 out: 2645 NFSEXITCODE2(error, nd); 2646 return (error); 2647 } 2648 2649 /* 2650 * Check exports. 2651 */ 2652 int 2653 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 2654 struct ucred **credp) 2655 { 2656 int i, error, *secflavors; 2657 2658 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2659 &exp->nes_numsecflavor, &secflavors); 2660 if (error) { 2661 if (nfs_rootfhset) { 2662 exp->nes_exflag = 0; 2663 exp->nes_numsecflavor = 0; 2664 error = 0; 2665 } 2666 } else { 2667 /* Copy the security flavors. */ 2668 for (i = 0; i < exp->nes_numsecflavor; i++) 2669 exp->nes_secflavors[i] = secflavors[i]; 2670 } 2671 NFSEXITCODE(error); 2672 return (error); 2673 } 2674 2675 /* 2676 * Get a vnode for a file handle and export stuff. 2677 */ 2678 int 2679 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 2680 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 2681 struct ucred **credp) 2682 { 2683 int i, error, *secflavors; 2684 2685 *credp = NULL; 2686 exp->nes_numsecflavor = 0; 2687 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 2688 if (error != 0) 2689 /* Make sure the server replies ESTALE to the client. */ 2690 error = ESTALE; 2691 if (nam && !error) { 2692 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2693 &exp->nes_numsecflavor, &secflavors); 2694 if (error) { 2695 if (nfs_rootfhset) { 2696 exp->nes_exflag = 0; 2697 exp->nes_numsecflavor = 0; 2698 error = 0; 2699 } else { 2700 vput(*vpp); 2701 } 2702 } else { 2703 /* Copy the security flavors. */ 2704 for (i = 0; i < exp->nes_numsecflavor; i++) 2705 exp->nes_secflavors[i] = secflavors[i]; 2706 } 2707 } 2708 NFSEXITCODE(error); 2709 return (error); 2710 } 2711 2712 /* 2713 * nfsd_fhtovp() - convert a fh to a vnode ptr 2714 * - look up fsid in mount list (if not found ret error) 2715 * - get vp and export rights by calling nfsvno_fhtovp() 2716 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 2717 * for AUTH_SYS 2718 * - if mpp != NULL, return the mount point so that it can 2719 * be used for vn_finished_write() by the caller 2720 */ 2721 void 2722 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 2723 struct vnode **vpp, struct nfsexstuff *exp, 2724 struct mount **mpp, int startwrite, struct thread *p) 2725 { 2726 struct mount *mp; 2727 struct ucred *credanon; 2728 fhandle_t *fhp; 2729 2730 fhp = (fhandle_t *)nfp->nfsrvfh_data; 2731 /* 2732 * Check for the special case of the nfsv4root_fh. 2733 */ 2734 mp = vfs_busyfs(&fhp->fh_fsid); 2735 if (mpp != NULL) 2736 *mpp = mp; 2737 if (mp == NULL) { 2738 *vpp = NULL; 2739 nd->nd_repstat = ESTALE; 2740 goto out; 2741 } 2742 2743 if (startwrite) { 2744 vn_start_write(NULL, mpp, V_WAIT); 2745 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 2746 lktype = LK_EXCLUSIVE; 2747 } 2748 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 2749 &credanon); 2750 vfs_unbusy(mp); 2751 2752 /* 2753 * For NFSv4 without a pseudo root fs, unexported file handles 2754 * can be returned, so that Lookup works everywhere. 2755 */ 2756 if (!nd->nd_repstat && exp->nes_exflag == 0 && 2757 !(nd->nd_flag & ND_NFSV4)) { 2758 vput(*vpp); 2759 nd->nd_repstat = EACCES; 2760 } 2761 2762 /* 2763 * Personally, I've never seen any point in requiring a 2764 * reserved port#, since only in the rare case where the 2765 * clients are all boxes with secure system priviledges, 2766 * does it provide any enhanced security, but... some people 2767 * believe it to be useful and keep putting this code back in. 2768 * (There is also some "security checker" out there that 2769 * complains if the nfs server doesn't enforce this.) 2770 * However, note the following: 2771 * RFC3530 (NFSv4) specifies that a reserved port# not be 2772 * required. 2773 * RFC2623 recommends that, if a reserved port# is checked for, 2774 * that there be a way to turn that off--> ifdef'd. 2775 */ 2776 #ifdef NFS_REQRSVPORT 2777 if (!nd->nd_repstat) { 2778 struct sockaddr_in *saddr; 2779 struct sockaddr_in6 *saddr6; 2780 2781 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 2782 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 2783 if (!(nd->nd_flag & ND_NFSV4) && 2784 ((saddr->sin_family == AF_INET && 2785 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 2786 (saddr6->sin6_family == AF_INET6 && 2787 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 2788 vput(*vpp); 2789 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 2790 } 2791 } 2792 #endif /* NFS_REQRSVPORT */ 2793 2794 /* 2795 * Check/setup credentials. 2796 */ 2797 if (!nd->nd_repstat) { 2798 nd->nd_saveduid = nd->nd_cred->cr_uid; 2799 nd->nd_repstat = nfsd_excred(nd, exp, credanon); 2800 if (nd->nd_repstat) 2801 vput(*vpp); 2802 } 2803 if (credanon != NULL) 2804 crfree(credanon); 2805 if (nd->nd_repstat) { 2806 if (startwrite) 2807 vn_finished_write(mp); 2808 *vpp = NULL; 2809 if (mpp != NULL) 2810 *mpp = NULL; 2811 } 2812 2813 out: 2814 NFSEXITCODE2(0, nd); 2815 } 2816 2817 /* 2818 * glue for fp. 2819 */ 2820 static int 2821 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 2822 { 2823 struct filedesc *fdp; 2824 struct file *fp; 2825 int error = 0; 2826 2827 fdp = p->td_proc->p_fd; 2828 if (fd < 0 || fd >= fdp->fd_nfiles || 2829 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 2830 error = EBADF; 2831 goto out; 2832 } 2833 *fpp = fp; 2834 2835 out: 2836 NFSEXITCODE(error); 2837 return (error); 2838 } 2839 2840 /* 2841 * Called from nfssvc() to update the exports list. Just call 2842 * vfs_export(). This has to be done, since the v4 root fake fs isn't 2843 * in the mount list. 2844 */ 2845 int 2846 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 2847 { 2848 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 2849 int error = 0; 2850 struct nameidata nd; 2851 fhandle_t fh; 2852 2853 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 2854 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 2855 nfs_rootfhset = 0; 2856 else if (error == 0) { 2857 if (nfsexargp->fspec == NULL) { 2858 error = EPERM; 2859 goto out; 2860 } 2861 /* 2862 * If fspec != NULL, this is the v4root path. 2863 */ 2864 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 2865 nfsexargp->fspec, p); 2866 if ((error = namei(&nd)) != 0) 2867 goto out; 2868 error = nfsvno_getfh(nd.ni_vp, &fh, p); 2869 vrele(nd.ni_vp); 2870 if (!error) { 2871 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 2872 NFSBCOPY((caddr_t)&fh, 2873 nfs_rootfh.nfsrvfh_data, 2874 sizeof (fhandle_t)); 2875 nfs_rootfhset = 1; 2876 } 2877 } 2878 2879 out: 2880 NFSEXITCODE(error); 2881 return (error); 2882 } 2883 2884 /* 2885 * This function needs to test to see if the system is near its limit 2886 * for memory allocation via malloc() or mget() and return True iff 2887 * either of these resources are near their limit. 2888 * XXX (For now, this is just a stub.) 2889 */ 2890 int nfsrv_testmalloclimit = 0; 2891 int 2892 nfsrv_mallocmget_limit(void) 2893 { 2894 static int printmesg = 0; 2895 static int testval = 1; 2896 2897 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 2898 if ((printmesg++ % 100) == 0) 2899 printf("nfsd: malloc/mget near limit\n"); 2900 return (1); 2901 } 2902 return (0); 2903 } 2904 2905 /* 2906 * BSD specific initialization of a mount point. 2907 */ 2908 void 2909 nfsd_mntinit(void) 2910 { 2911 static int inited = 0; 2912 2913 if (inited) 2914 return; 2915 inited = 1; 2916 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 2917 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 2918 TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); 2919 nfsv4root_mnt.mnt_export = NULL; 2920 TAILQ_INIT(&nfsv4root_opt); 2921 TAILQ_INIT(&nfsv4root_newopt); 2922 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 2923 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 2924 nfsv4root_mnt.mnt_nvnodelistsize = 0; 2925 nfsv4root_mnt.mnt_activevnodelistsize = 0; 2926 } 2927 2928 /* 2929 * Get a vnode for a file handle, without checking exports, etc. 2930 */ 2931 struct vnode * 2932 nfsvno_getvp(fhandle_t *fhp) 2933 { 2934 struct mount *mp; 2935 struct vnode *vp; 2936 int error; 2937 2938 mp = vfs_busyfs(&fhp->fh_fsid); 2939 if (mp == NULL) 2940 return (NULL); 2941 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 2942 vfs_unbusy(mp); 2943 if (error) 2944 return (NULL); 2945 return (vp); 2946 } 2947 2948 /* 2949 * Do a local VOP_ADVLOCK(). 2950 */ 2951 int 2952 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 2953 u_int64_t end, struct thread *td) 2954 { 2955 int error = 0; 2956 struct flock fl; 2957 u_int64_t tlen; 2958 2959 if (nfsrv_dolocallocks == 0) 2960 goto out; 2961 2962 /* Check for VI_DOOMED here, so that VOP_ADVLOCK() isn't performed. */ 2963 if ((vp->v_iflag & VI_DOOMED) != 0) { 2964 error = EPERM; 2965 goto out; 2966 } 2967 2968 fl.l_whence = SEEK_SET; 2969 fl.l_type = ftype; 2970 fl.l_start = (off_t)first; 2971 if (end == NFS64BITSSET) { 2972 fl.l_len = 0; 2973 } else { 2974 tlen = end - first; 2975 fl.l_len = (off_t)tlen; 2976 } 2977 /* 2978 * For FreeBSD8, the l_pid and l_sysid must be set to the same 2979 * values for all calls, so that all locks will be held by the 2980 * nfsd server. (The nfsd server handles conflicts between the 2981 * various clients.) 2982 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 2983 * bytes, so it can't be put in l_sysid. 2984 */ 2985 if (nfsv4_sysid == 0) 2986 nfsv4_sysid = nlm_acquire_next_sysid(); 2987 fl.l_pid = (pid_t)0; 2988 fl.l_sysid = (int)nfsv4_sysid; 2989 2990 NFSVOPUNLOCK(vp, 0); 2991 if (ftype == F_UNLCK) 2992 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 2993 (F_POSIX | F_REMOTE)); 2994 else 2995 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 2996 (F_POSIX | F_REMOTE)); 2997 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 2998 2999 out: 3000 NFSEXITCODE(error); 3001 return (error); 3002 } 3003 3004 /* 3005 * Check the nfsv4 root exports. 3006 */ 3007 int 3008 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3009 { 3010 struct ucred *credanon; 3011 int exflags, error = 0, numsecflavor, *secflavors, i; 3012 3013 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3014 &credanon, &numsecflavor, &secflavors); 3015 if (error) { 3016 error = NFSERR_PROGUNAVAIL; 3017 goto out; 3018 } 3019 if (credanon != NULL) 3020 crfree(credanon); 3021 for (i = 0; i < numsecflavor; i++) { 3022 if (secflavors[i] == AUTH_SYS) 3023 nd->nd_flag |= ND_EXAUTHSYS; 3024 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3025 nd->nd_flag |= ND_EXGSS; 3026 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3027 nd->nd_flag |= ND_EXGSSINTEGRITY; 3028 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3029 nd->nd_flag |= ND_EXGSSPRIVACY; 3030 } 3031 3032 out: 3033 NFSEXITCODE(error); 3034 return (error); 3035 } 3036 3037 /* 3038 * Nfs server psuedo system call for the nfsd's 3039 */ 3040 /* 3041 * MPSAFE 3042 */ 3043 static int 3044 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3045 { 3046 struct file *fp; 3047 struct nfsd_addsock_args sockarg; 3048 struct nfsd_nfsd_args nfsdarg; 3049 cap_rights_t rights; 3050 int error; 3051 3052 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3053 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3054 if (error) 3055 goto out; 3056 /* 3057 * Since we don't know what rights might be required, 3058 * pretend that we need them all. It is better to be too 3059 * careful than too reckless. 3060 */ 3061 error = fget(td, sockarg.sock, 3062 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 3063 if (error != 0) 3064 goto out; 3065 if (fp->f_type != DTYPE_SOCKET) { 3066 fdrop(fp, td); 3067 error = EPERM; 3068 goto out; 3069 } 3070 error = nfsrvd_addsock(fp); 3071 fdrop(fp, td); 3072 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3073 if (uap->argp == NULL) { 3074 error = EINVAL; 3075 goto out; 3076 } 3077 error = copyin(uap->argp, (caddr_t)&nfsdarg, 3078 sizeof (nfsdarg)); 3079 if (error) 3080 goto out; 3081 error = nfsrvd_nfsd(td, &nfsdarg); 3082 } else { 3083 error = nfssvc_srvcall(td, uap, td->td_ucred); 3084 } 3085 3086 out: 3087 NFSEXITCODE(error); 3088 return (error); 3089 } 3090 3091 static int 3092 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3093 { 3094 struct nfsex_args export; 3095 struct file *fp = NULL; 3096 int stablefd, len; 3097 struct nfsd_clid adminrevoke; 3098 struct nfsd_dumplist dumplist; 3099 struct nfsd_dumpclients *dumpclients; 3100 struct nfsd_dumplocklist dumplocklist; 3101 struct nfsd_dumplocks *dumplocks; 3102 struct nameidata nd; 3103 vnode_t vp; 3104 int error = EINVAL, igotlock; 3105 struct proc *procp; 3106 static int suspend_nfsd = 0; 3107 3108 if (uap->flag & NFSSVC_PUBLICFH) { 3109 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3110 sizeof (fhandle_t)); 3111 error = copyin(uap->argp, 3112 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3113 if (!error) 3114 nfs_pubfhset = 1; 3115 } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { 3116 error = copyin(uap->argp,(caddr_t)&export, 3117 sizeof (struct nfsex_args)); 3118 if (!error) 3119 error = nfsrv_v4rootexport(&export, cred, p); 3120 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3121 nfs_pubfhset = 0; 3122 error = 0; 3123 } else if (uap->flag & NFSSVC_STABLERESTART) { 3124 error = copyin(uap->argp, (caddr_t)&stablefd, 3125 sizeof (int)); 3126 if (!error) 3127 error = fp_getfvp(p, stablefd, &fp, &vp); 3128 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3129 error = EBADF; 3130 if (!error && newnfs_numnfsd != 0) 3131 error = EPERM; 3132 if (!error) { 3133 nfsrv_stablefirst.nsf_fp = fp; 3134 nfsrv_setupstable(p); 3135 } 3136 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3137 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3138 sizeof (struct nfsd_clid)); 3139 if (!error) 3140 error = nfsrv_adminrevoke(&adminrevoke, p); 3141 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3142 error = copyin(uap->argp, (caddr_t)&dumplist, 3143 sizeof (struct nfsd_dumplist)); 3144 if (!error && (dumplist.ndl_size < 1 || 3145 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3146 error = EPERM; 3147 if (!error) { 3148 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3149 dumpclients = (struct nfsd_dumpclients *)malloc(len, 3150 M_TEMP, M_WAITOK); 3151 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3152 error = copyout(dumpclients, 3153 CAST_USER_ADDR_T(dumplist.ndl_list), len); 3154 free((caddr_t)dumpclients, M_TEMP); 3155 } 3156 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3157 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3158 sizeof (struct nfsd_dumplocklist)); 3159 if (!error && (dumplocklist.ndllck_size < 1 || 3160 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3161 error = EPERM; 3162 if (!error) 3163 error = nfsrv_lookupfilename(&nd, 3164 dumplocklist.ndllck_fname, p); 3165 if (!error) { 3166 len = sizeof (struct nfsd_dumplocks) * 3167 dumplocklist.ndllck_size; 3168 dumplocks = (struct nfsd_dumplocks *)malloc(len, 3169 M_TEMP, M_WAITOK); 3170 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3171 dumplocklist.ndllck_size, p); 3172 vput(nd.ni_vp); 3173 error = copyout(dumplocks, 3174 CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); 3175 free((caddr_t)dumplocks, M_TEMP); 3176 } 3177 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 3178 procp = p->td_proc; 3179 PROC_LOCK(procp); 3180 nfsd_master_pid = procp->p_pid; 3181 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 3182 nfsd_master_start = procp->p_stats->p_start; 3183 nfsd_master_proc = procp; 3184 PROC_UNLOCK(procp); 3185 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 3186 NFSLOCKV4ROOTMUTEX(); 3187 if (suspend_nfsd == 0) { 3188 /* Lock out all nfsd threads */ 3189 do { 3190 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 3191 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 3192 } while (igotlock == 0 && suspend_nfsd == 0); 3193 suspend_nfsd = 1; 3194 } 3195 NFSUNLOCKV4ROOTMUTEX(); 3196 error = 0; 3197 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 3198 NFSLOCKV4ROOTMUTEX(); 3199 if (suspend_nfsd != 0) { 3200 nfsv4_unlock(&nfsd_suspend_lock, 0); 3201 suspend_nfsd = 0; 3202 } 3203 NFSUNLOCKV4ROOTMUTEX(); 3204 error = 0; 3205 } 3206 3207 NFSEXITCODE(error); 3208 return (error); 3209 } 3210 3211 /* 3212 * Check exports. 3213 * Returns 0 if ok, 1 otherwise. 3214 */ 3215 int 3216 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 3217 { 3218 int i; 3219 3220 /* 3221 * This seems odd, but allow the case where the security flavor 3222 * list is empty. This happens when NFSv4 is traversing non-exported 3223 * file systems. Exported file systems should always have a non-empty 3224 * security flavor list. 3225 */ 3226 if (exp->nes_numsecflavor == 0) 3227 return (0); 3228 3229 for (i = 0; i < exp->nes_numsecflavor; i++) { 3230 /* 3231 * The tests for privacy and integrity must be first, 3232 * since ND_GSS is set for everything but AUTH_SYS. 3233 */ 3234 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 3235 (nd->nd_flag & ND_GSSPRIVACY)) 3236 return (0); 3237 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 3238 (nd->nd_flag & ND_GSSINTEGRITY)) 3239 return (0); 3240 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 3241 (nd->nd_flag & ND_GSS)) 3242 return (0); 3243 if (exp->nes_secflavors[i] == AUTH_SYS && 3244 (nd->nd_flag & ND_GSS) == 0) 3245 return (0); 3246 } 3247 return (1); 3248 } 3249 3250 /* 3251 * Calculate a hash value for the fid in a file handle. 3252 */ 3253 uint32_t 3254 nfsrv_hashfh(fhandle_t *fhp) 3255 { 3256 uint32_t hashval; 3257 3258 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 3259 return (hashval); 3260 } 3261 3262 /* 3263 * Signal the userland master nfsd to backup the stable restart file. 3264 */ 3265 void 3266 nfsrv_backupstable(void) 3267 { 3268 struct proc *procp; 3269 3270 if (nfsd_master_proc != NULL) { 3271 procp = pfind(nfsd_master_pid); 3272 /* Try to make sure it is the correct process. */ 3273 if (procp == nfsd_master_proc && 3274 procp->p_stats->p_start.tv_sec == 3275 nfsd_master_start.tv_sec && 3276 procp->p_stats->p_start.tv_usec == 3277 nfsd_master_start.tv_usec && 3278 strcmp(procp->p_comm, nfsd_master_comm) == 0) 3279 kern_psignal(procp, SIGUSR2); 3280 else 3281 nfsd_master_proc = NULL; 3282 3283 if (procp != NULL) 3284 PROC_UNLOCK(procp); 3285 } 3286 } 3287 3288 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 3289 3290 /* 3291 * Called once to initialize data structures... 3292 */ 3293 static int 3294 nfsd_modevent(module_t mod, int type, void *data) 3295 { 3296 int error = 0, i; 3297 static int loaded = 0; 3298 3299 switch (type) { 3300 case MOD_LOAD: 3301 if (loaded) 3302 goto out; 3303 newnfs_portinit(); 3304 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 3305 snprintf(nfsrchash_table[i].lock_name, 3306 sizeof(nfsrchash_table[i].lock_name), "nfsrc_tcp%d", 3307 i); 3308 mtx_init(&nfsrchash_table[i].mtx, 3309 nfsrchash_table[i].lock_name, NULL, MTX_DEF); 3310 snprintf(nfsrcahash_table[i].lock_name, 3311 sizeof(nfsrcahash_table[i].lock_name), "nfsrc_tcpa%d", 3312 i); 3313 mtx_init(&nfsrcahash_table[i].mtx, 3314 nfsrcahash_table[i].lock_name, NULL, MTX_DEF); 3315 } 3316 mtx_init(&nfsrc_udpmtx, "nfs_udpcache_mutex", NULL, MTX_DEF); 3317 mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF); 3318 mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL, 3319 MTX_DEF); 3320 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 3321 nfsrvd_initcache(); 3322 nfsd_init(); 3323 NFSD_LOCK(); 3324 nfsrvd_init(0); 3325 NFSD_UNLOCK(); 3326 nfsd_mntinit(); 3327 #ifdef VV_DISABLEDELEG 3328 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 3329 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 3330 #endif 3331 nfsd_call_servertimer = nfsrv_servertimer; 3332 nfsd_call_nfsd = nfssvc_nfsd; 3333 loaded = 1; 3334 break; 3335 3336 case MOD_UNLOAD: 3337 if (newnfs_numnfsd != 0) { 3338 error = EBUSY; 3339 break; 3340 } 3341 3342 #ifdef VV_DISABLEDELEG 3343 vn_deleg_ops.vndeleg_recall = NULL; 3344 vn_deleg_ops.vndeleg_disable = NULL; 3345 #endif 3346 nfsd_call_servertimer = NULL; 3347 nfsd_call_nfsd = NULL; 3348 3349 /* Clean out all NFSv4 state. */ 3350 nfsrv_throwawayallstate(curthread); 3351 3352 /* Clean the NFS server reply cache */ 3353 nfsrvd_cleancache(); 3354 3355 /* Free up the krpc server pool. */ 3356 if (nfsrvd_pool != NULL) 3357 svcpool_destroy(nfsrvd_pool); 3358 3359 /* and get rid of the locks */ 3360 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 3361 mtx_destroy(&nfsrchash_table[i].mtx); 3362 mtx_destroy(&nfsrcahash_table[i].mtx); 3363 } 3364 mtx_destroy(&nfsrc_udpmtx); 3365 mtx_destroy(&nfs_v4root_mutex); 3366 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 3367 lockdestroy(&nfsv4root_mnt.mnt_explock); 3368 loaded = 0; 3369 break; 3370 default: 3371 error = EOPNOTSUPP; 3372 break; 3373 } 3374 3375 out: 3376 NFSEXITCODE(error); 3377 return (error); 3378 } 3379 static moduledata_t nfsd_mod = { 3380 "nfsd", 3381 nfsd_modevent, 3382 NULL, 3383 }; 3384 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 3385 3386 /* So that loader and kldload(2) can find us, wherever we are.. */ 3387 MODULE_VERSION(nfsd, 1); 3388 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 3389 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); 3390 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 3391 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 3392 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 3393 3394