1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/capsicum.h> 40 #include <sys/extattr.h> 41 42 /* 43 * Functions that perform the vfs operations required by the routines in 44 * nfsd_serv.c. It is hoped that this change will make the server more 45 * portable. 46 */ 47 48 #include <fs/nfs/nfsport.h> 49 #include <sys/hash.h> 50 #include <sys/sysctl.h> 51 #include <nlm/nlm_prot.h> 52 #include <nlm/nlm.h> 53 54 FEATURE(nfsd, "NFSv4 server"); 55 56 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 57 extern int nfsrv_useacl; 58 extern int newnfs_numnfsd; 59 extern struct mount nfsv4root_mnt; 60 extern struct nfsrv_stablefirst nfsrv_stablefirst; 61 extern void (*nfsd_call_servertimer)(void); 62 extern SVCPOOL *nfsrvd_pool; 63 extern struct nfsv4lock nfsd_suspend_lock; 64 extern struct nfsclienthashhead *nfsclienthash; 65 extern struct nfslockhashhead *nfslockhash; 66 extern struct nfssessionhash *nfssessionhash; 67 extern int nfsrv_sessionhashsize; 68 extern struct nfsstatsv1 nfsstatsv1; 69 extern struct nfslayouthash *nfslayouthash; 70 extern int nfsrv_layouthashsize; 71 extern struct mtx nfsrv_dslock_mtx; 72 extern int nfs_pnfsiothreads; 73 extern struct nfsdontlisthead nfsrv_dontlisthead; 74 extern volatile int nfsrv_dontlistlen; 75 extern volatile int nfsrv_devidcnt; 76 extern int nfsrv_maxpnfsmirror; 77 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 78 NFSDLOCKMUTEX; 79 NFSSTATESPINLOCK; 80 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 81 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 82 struct mtx nfsrc_udpmtx; 83 struct mtx nfs_v4root_mutex; 84 struct mtx nfsrv_dontlistlock_mtx; 85 struct mtx nfsrv_recalllock_mtx; 86 struct nfsrvfh nfs_rootfh, nfs_pubfh; 87 int nfs_pubfhset = 0, nfs_rootfhset = 0; 88 struct proc *nfsd_master_proc = NULL; 89 int nfsd_debuglevel = 0; 90 static pid_t nfsd_master_pid = (pid_t)-1; 91 static char nfsd_master_comm[MAXCOMLEN + 1]; 92 static struct timeval nfsd_master_start; 93 static uint32_t nfsv4_sysid = 0; 94 static fhandle_t zerofh; 95 96 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 97 struct ucred *); 98 99 int nfsrv_enable_crossmntpt = 1; 100 static int nfs_commit_blks; 101 static int nfs_commit_miss; 102 extern int nfsrv_issuedelegs; 103 extern int nfsrv_dolocallocks; 104 extern int nfsd_enable_stringtouid; 105 extern struct nfsdevicehead nfsrv_devidhead; 106 107 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 108 NFSPROC_T *); 109 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 110 int *, char *, fhandle_t *); 111 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 112 NFSPROC_T *); 113 static int nfsrv_proxyds(struct nfsrv_descript *, struct vnode *, off_t, int, 114 struct ucred *, struct thread *, int, struct mbuf **, char *, 115 struct mbuf **, struct nfsvattr *, struct acl *); 116 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 117 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 118 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 119 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 120 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 121 char *, int *); 122 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 123 struct vnode *, struct nfsmount **, int, struct acl *, int *); 124 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 125 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 126 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 127 struct vnode *, struct nfsmount *, struct nfsvattr *); 128 static int nfsrv_putfhname(fhandle_t *, char *); 129 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 130 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 131 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, 132 struct vnode *, NFSPROC_T *); 133 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 134 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 135 NFSPROC_T *); 136 static int nfsrv_pnfsstatfs(struct statfs *); 137 138 int nfs_pnfsio(task_fn_t *, void *); 139 140 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); 141 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 142 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 143 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 144 0, ""); 145 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 146 0, ""); 147 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 148 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 149 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 150 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 151 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 152 0, "Debug level for NFS server"); 153 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 154 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 155 static int nfsrv_pnfsgetdsattr = 1; 156 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 157 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 158 159 /* 160 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 161 * not running. 162 * The dsN subdirectories for the increased values must have been created 163 * on all DS servers before this increase is done. 164 */ 165 u_int nfsrv_dsdirsize = 20; 166 static int 167 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 168 { 169 int error, newdsdirsize; 170 171 newdsdirsize = nfsrv_dsdirsize; 172 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 173 if (error != 0 || req->newptr == NULL) 174 return (error); 175 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 176 newnfs_numnfsd != 0) 177 return (EINVAL); 178 nfsrv_dsdirsize = newdsdirsize; 179 return (0); 180 } 181 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_RW, 0, 182 sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", 183 "Number of dsN subdirs on the DS servers"); 184 185 #define MAX_REORDERED_RPC 16 186 #define NUM_HEURISTIC 1031 187 #define NHUSE_INIT 64 188 #define NHUSE_INC 16 189 #define NHUSE_MAX 2048 190 191 static struct nfsheur { 192 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 193 off_t nh_nextoff; /* next offset for sequential detection */ 194 int nh_use; /* use count for selection */ 195 int nh_seqcount; /* heuristic */ 196 } nfsheur[NUM_HEURISTIC]; 197 198 199 /* 200 * Heuristic to detect sequential operation. 201 */ 202 static struct nfsheur * 203 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 204 { 205 struct nfsheur *nh; 206 int hi, try; 207 208 /* Locate best candidate. */ 209 try = 32; 210 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 211 nh = &nfsheur[hi]; 212 while (try--) { 213 if (nfsheur[hi].nh_vp == vp) { 214 nh = &nfsheur[hi]; 215 break; 216 } 217 if (nfsheur[hi].nh_use > 0) 218 --nfsheur[hi].nh_use; 219 hi = (hi + 1) % NUM_HEURISTIC; 220 if (nfsheur[hi].nh_use < nh->nh_use) 221 nh = &nfsheur[hi]; 222 } 223 224 /* Initialize hint if this is a new file. */ 225 if (nh->nh_vp != vp) { 226 nh->nh_vp = vp; 227 nh->nh_nextoff = uio->uio_offset; 228 nh->nh_use = NHUSE_INIT; 229 if (uio->uio_offset == 0) 230 nh->nh_seqcount = 4; 231 else 232 nh->nh_seqcount = 1; 233 } 234 235 /* Calculate heuristic. */ 236 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 237 uio->uio_offset == nh->nh_nextoff) { 238 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 239 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 240 if (nh->nh_seqcount > IO_SEQMAX) 241 nh->nh_seqcount = IO_SEQMAX; 242 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 243 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 244 /* Probably a reordered RPC, leave seqcount alone. */ 245 } else if (nh->nh_seqcount > 1) { 246 nh->nh_seqcount /= 2; 247 } else { 248 nh->nh_seqcount = 0; 249 } 250 nh->nh_use += NHUSE_INC; 251 if (nh->nh_use > NHUSE_MAX) 252 nh->nh_use = NHUSE_MAX; 253 return (nh); 254 } 255 256 /* 257 * Get attributes into nfsvattr structure. 258 */ 259 int 260 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 261 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 262 nfsattrbit_t *attrbitp) 263 { 264 int error, gotattr, lockedit = 0; 265 struct nfsvattr na; 266 267 if (vpislocked == 0) { 268 /* 269 * When vpislocked == 0, the vnode is either exclusively 270 * locked by this thread or not locked by this thread. 271 * As such, shared lock it, if not exclusively locked. 272 */ 273 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 274 lockedit = 1; 275 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 276 } 277 } 278 279 /* 280 * Acquire the Change, Size and TimeModify attributes, as required. 281 * This needs to be done for regular files if: 282 * - non-NFSv4 RPCs or 283 * - when attrbitp == NULL or 284 * - an NFSv4 RPC with any of the above attributes in attrbitp. 285 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 286 * these attributes. nfsrv_proxyds() will return an error if the 287 * server is not a pNFS one. 288 */ 289 gotattr = 0; 290 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 291 (nd->nd_flag & ND_NFSV4) == 0 || 292 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 293 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 294 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 295 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY))) { 296 error = nfsrv_proxyds(nd, vp, 0, 0, nd->nd_cred, p, 297 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); 298 if (error == 0) 299 gotattr = 1; 300 } 301 302 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 303 if (lockedit != 0) 304 NFSVOPUNLOCK(vp, 0); 305 306 /* 307 * If we got the Change, Size and Modify Time from the DS, 308 * replace them. 309 */ 310 if (gotattr != 0) { 311 nvap->na_atime = na.na_atime; 312 nvap->na_mtime = na.na_mtime; 313 nvap->na_filerev = na.na_filerev; 314 nvap->na_size = na.na_size; 315 } 316 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 317 error, (uintmax_t)na.na_filerev); 318 319 NFSEXITCODE(error); 320 return (error); 321 } 322 323 /* 324 * Get a file handle for a vnode. 325 */ 326 int 327 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 328 { 329 int error; 330 331 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 332 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 333 error = VOP_VPTOFH(vp, &fhp->fh_fid); 334 335 NFSEXITCODE(error); 336 return (error); 337 } 338 339 /* 340 * Perform access checking for vnodes obtained from file handles that would 341 * refer to files already opened by a Unix client. You cannot just use 342 * vn_writechk() and VOP_ACCESSX() for two reasons. 343 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 344 * case. 345 * 2 - The owner is to be given access irrespective of mode bits for some 346 * operations, so that processes that chmod after opening a file don't 347 * break. 348 */ 349 int 350 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 351 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 352 u_int32_t *supportedtypep) 353 { 354 struct vattr vattr; 355 int error = 0, getret = 0; 356 357 if (vpislocked == 0) { 358 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 359 error = EPERM; 360 goto out; 361 } 362 } 363 if (accmode & VWRITE) { 364 /* Just vn_writechk() changed to check rdonly */ 365 /* 366 * Disallow write attempts on read-only file systems; 367 * unless the file is a socket or a block or character 368 * device resident on the file system. 369 */ 370 if (NFSVNO_EXRDONLY(exp) || 371 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 372 switch (vp->v_type) { 373 case VREG: 374 case VDIR: 375 case VLNK: 376 error = EROFS; 377 default: 378 break; 379 } 380 } 381 /* 382 * If there's shared text associated with 383 * the inode, try to free it up once. If 384 * we fail, we can't allow writing. 385 */ 386 if (VOP_IS_TEXT(vp) && error == 0) 387 error = ETXTBSY; 388 } 389 if (error != 0) { 390 if (vpislocked == 0) 391 NFSVOPUNLOCK(vp, 0); 392 goto out; 393 } 394 395 /* 396 * Should the override still be applied when ACLs are enabled? 397 */ 398 error = VOP_ACCESSX(vp, accmode, cred, p); 399 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 400 /* 401 * Try again with VEXPLICIT_DENY, to see if the test for 402 * deletion is supported. 403 */ 404 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 405 if (error == 0) { 406 if (vp->v_type == VDIR) { 407 accmode &= ~(VDELETE | VDELETE_CHILD); 408 accmode |= VWRITE; 409 error = VOP_ACCESSX(vp, accmode, cred, p); 410 } else if (supportedtypep != NULL) { 411 *supportedtypep &= ~NFSACCESS_DELETE; 412 } 413 } 414 } 415 416 /* 417 * Allow certain operations for the owner (reads and writes 418 * on files that are already open). 419 */ 420 if (override != NFSACCCHK_NOOVERRIDE && 421 (error == EPERM || error == EACCES)) { 422 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 423 error = 0; 424 else if (override & NFSACCCHK_ALLOWOWNER) { 425 getret = VOP_GETATTR(vp, &vattr, cred); 426 if (getret == 0 && cred->cr_uid == vattr.va_uid) 427 error = 0; 428 } 429 } 430 if (vpislocked == 0) 431 NFSVOPUNLOCK(vp, 0); 432 433 out: 434 NFSEXITCODE(error); 435 return (error); 436 } 437 438 /* 439 * Set attribute(s) vnop. 440 */ 441 int 442 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 443 struct thread *p, struct nfsexstuff *exp) 444 { 445 int error; 446 447 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 448 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 449 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 450 nvap->na_vattr.va_size != VNOVAL || 451 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 452 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 453 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 454 /* For a pNFS server, set the attributes on the DS file. */ 455 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETATTR, 456 NULL, NULL, NULL, nvap, NULL); 457 if (error == ENOENT) 458 error = 0; 459 } 460 NFSEXITCODE(error); 461 return (error); 462 } 463 464 /* 465 * Set up nameidata for a lookup() call and do it. 466 */ 467 int 468 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 469 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 470 struct vnode **retdirp) 471 { 472 struct componentname *cnp = &ndp->ni_cnd; 473 int i; 474 struct iovec aiov; 475 struct uio auio; 476 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 477 int error = 0; 478 char *cp; 479 480 *retdirp = NULL; 481 cnp->cn_nameptr = cnp->cn_pnbuf; 482 ndp->ni_lcf = 0; 483 /* 484 * Extract and set starting directory. 485 */ 486 if (dp->v_type != VDIR) { 487 if (islocked) 488 vput(dp); 489 else 490 vrele(dp); 491 nfsvno_relpathbuf(ndp); 492 error = ENOTDIR; 493 goto out1; 494 } 495 if (islocked) 496 NFSVOPUNLOCK(dp, 0); 497 VREF(dp); 498 *retdirp = dp; 499 if (NFSVNO_EXRDONLY(exp)) 500 cnp->cn_flags |= RDONLY; 501 ndp->ni_segflg = UIO_SYSSPACE; 502 503 if (nd->nd_flag & ND_PUBLOOKUP) { 504 ndp->ni_loopcnt = 0; 505 if (cnp->cn_pnbuf[0] == '/') { 506 vrele(dp); 507 /* 508 * Check for degenerate pathnames here, since lookup() 509 * panics on them. 510 */ 511 for (i = 1; i < ndp->ni_pathlen; i++) 512 if (cnp->cn_pnbuf[i] != '/') 513 break; 514 if (i == ndp->ni_pathlen) { 515 error = NFSERR_ACCES; 516 goto out; 517 } 518 dp = rootvnode; 519 VREF(dp); 520 } 521 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 522 (nd->nd_flag & ND_NFSV4) == 0) { 523 /* 524 * Only cross mount points for NFSv4 when doing a 525 * mount while traversing the file system above 526 * the mount point, unless nfsrv_enable_crossmntpt is set. 527 */ 528 cnp->cn_flags |= NOCROSSMOUNT; 529 } 530 531 /* 532 * Initialize for scan, set ni_startdir and bump ref on dp again 533 * because lookup() will dereference ni_startdir. 534 */ 535 536 cnp->cn_thread = p; 537 ndp->ni_startdir = dp; 538 ndp->ni_rootdir = rootvnode; 539 ndp->ni_topdir = NULL; 540 541 if (!lockleaf) 542 cnp->cn_flags |= LOCKLEAF; 543 for (;;) { 544 cnp->cn_nameptr = cnp->cn_pnbuf; 545 /* 546 * Call lookup() to do the real work. If an error occurs, 547 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 548 * we do not have to dereference anything before returning. 549 * In either case ni_startdir will be dereferenced and NULLed 550 * out. 551 */ 552 error = lookup(ndp); 553 if (error) 554 break; 555 556 /* 557 * Check for encountering a symbolic link. Trivial 558 * termination occurs if no symlink encountered. 559 */ 560 if ((cnp->cn_flags & ISSYMLINK) == 0) { 561 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 562 nfsvno_relpathbuf(ndp); 563 if (ndp->ni_vp && !lockleaf) 564 NFSVOPUNLOCK(ndp->ni_vp, 0); 565 break; 566 } 567 568 /* 569 * Validate symlink 570 */ 571 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 572 NFSVOPUNLOCK(ndp->ni_dvp, 0); 573 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 574 error = EINVAL; 575 goto badlink2; 576 } 577 578 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 579 error = ELOOP; 580 goto badlink2; 581 } 582 if (ndp->ni_pathlen > 1) 583 cp = uma_zalloc(namei_zone, M_WAITOK); 584 else 585 cp = cnp->cn_pnbuf; 586 aiov.iov_base = cp; 587 aiov.iov_len = MAXPATHLEN; 588 auio.uio_iov = &aiov; 589 auio.uio_iovcnt = 1; 590 auio.uio_offset = 0; 591 auio.uio_rw = UIO_READ; 592 auio.uio_segflg = UIO_SYSSPACE; 593 auio.uio_td = NULL; 594 auio.uio_resid = MAXPATHLEN; 595 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 596 if (error) { 597 badlink1: 598 if (ndp->ni_pathlen > 1) 599 uma_zfree(namei_zone, cp); 600 badlink2: 601 vrele(ndp->ni_dvp); 602 vput(ndp->ni_vp); 603 break; 604 } 605 linklen = MAXPATHLEN - auio.uio_resid; 606 if (linklen == 0) { 607 error = ENOENT; 608 goto badlink1; 609 } 610 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 611 error = ENAMETOOLONG; 612 goto badlink1; 613 } 614 615 /* 616 * Adjust or replace path 617 */ 618 if (ndp->ni_pathlen > 1) { 619 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 620 uma_zfree(namei_zone, cnp->cn_pnbuf); 621 cnp->cn_pnbuf = cp; 622 } else 623 cnp->cn_pnbuf[linklen] = '\0'; 624 ndp->ni_pathlen += linklen; 625 626 /* 627 * Cleanup refs for next loop and check if root directory 628 * should replace current directory. Normally ni_dvp 629 * becomes the new base directory and is cleaned up when 630 * we loop. Explicitly null pointers after invalidation 631 * to clarify operation. 632 */ 633 vput(ndp->ni_vp); 634 ndp->ni_vp = NULL; 635 636 if (cnp->cn_pnbuf[0] == '/') { 637 vrele(ndp->ni_dvp); 638 ndp->ni_dvp = ndp->ni_rootdir; 639 VREF(ndp->ni_dvp); 640 } 641 ndp->ni_startdir = ndp->ni_dvp; 642 ndp->ni_dvp = NULL; 643 } 644 if (!lockleaf) 645 cnp->cn_flags &= ~LOCKLEAF; 646 647 out: 648 if (error) { 649 nfsvno_relpathbuf(ndp); 650 ndp->ni_vp = NULL; 651 ndp->ni_dvp = NULL; 652 ndp->ni_startdir = NULL; 653 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 654 ndp->ni_dvp = NULL; 655 } 656 657 out1: 658 NFSEXITCODE2(error, nd); 659 return (error); 660 } 661 662 /* 663 * Set up a pathname buffer and return a pointer to it and, optionally 664 * set a hash pointer. 665 */ 666 void 667 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 668 { 669 struct componentname *cnp = &ndp->ni_cnd; 670 671 cnp->cn_flags |= (NOMACCHECK | HASBUF); 672 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 673 if (hashpp != NULL) 674 *hashpp = NULL; 675 *bufpp = cnp->cn_pnbuf; 676 } 677 678 /* 679 * Release the above path buffer, if not released by nfsvno_namei(). 680 */ 681 void 682 nfsvno_relpathbuf(struct nameidata *ndp) 683 { 684 685 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 686 panic("nfsrelpath"); 687 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 688 ndp->ni_cnd.cn_flags &= ~HASBUF; 689 } 690 691 /* 692 * Readlink vnode op into an mbuf list. 693 */ 694 int 695 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, 696 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 697 { 698 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; 699 struct iovec *ivp = iv; 700 struct uio io, *uiop = &io; 701 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; 702 int i, len, tlen, error = 0; 703 704 len = 0; 705 i = 0; 706 while (len < NFS_MAXPATHLEN) { 707 NFSMGET(mp); 708 MCLGET(mp, M_WAITOK); 709 mp->m_len = M_SIZE(mp); 710 if (len == 0) { 711 mp3 = mp2 = mp; 712 } else { 713 mp2->m_next = mp; 714 mp2 = mp; 715 } 716 if ((len + mp->m_len) > NFS_MAXPATHLEN) { 717 mp->m_len = NFS_MAXPATHLEN - len; 718 len = NFS_MAXPATHLEN; 719 } else { 720 len += mp->m_len; 721 } 722 ivp->iov_base = mtod(mp, caddr_t); 723 ivp->iov_len = mp->m_len; 724 i++; 725 ivp++; 726 } 727 uiop->uio_iov = iv; 728 uiop->uio_iovcnt = i; 729 uiop->uio_offset = 0; 730 uiop->uio_resid = len; 731 uiop->uio_rw = UIO_READ; 732 uiop->uio_segflg = UIO_SYSSPACE; 733 uiop->uio_td = NULL; 734 error = VOP_READLINK(vp, uiop, cred); 735 if (error) { 736 m_freem(mp3); 737 *lenp = 0; 738 goto out; 739 } 740 if (uiop->uio_resid > 0) { 741 len -= uiop->uio_resid; 742 tlen = NFSM_RNDUP(len); 743 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); 744 } 745 *lenp = len; 746 *mpp = mp3; 747 *mpendp = mp; 748 749 out: 750 NFSEXITCODE(error); 751 return (error); 752 } 753 754 /* 755 * Read vnode op call into mbuf list. 756 */ 757 int 758 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 759 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) 760 { 761 struct mbuf *m; 762 int i; 763 struct iovec *iv; 764 struct iovec *iv2; 765 int error = 0, len, left, siz, tlen, ioflag = 0; 766 struct mbuf *m2 = NULL, *m3; 767 struct uio io, *uiop = &io; 768 struct nfsheur *nh; 769 770 /* 771 * Attempt to read from a DS file. A return of ENOENT implies 772 * there is no DS file to read. 773 */ 774 error = nfsrv_proxyds(NULL, vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 775 NULL, mpendp, NULL, NULL); 776 if (error != ENOENT) 777 return (error); 778 779 len = left = NFSM_RNDUP(cnt); 780 m3 = NULL; 781 /* 782 * Generate the mbuf list with the uio_iov ref. to it. 783 */ 784 i = 0; 785 while (left > 0) { 786 NFSMGET(m); 787 MCLGET(m, M_WAITOK); 788 m->m_len = 0; 789 siz = min(M_TRAILINGSPACE(m), left); 790 left -= siz; 791 i++; 792 if (m3) 793 m2->m_next = m; 794 else 795 m3 = m; 796 m2 = m; 797 } 798 iv = malloc(i * sizeof (struct iovec), 799 M_TEMP, M_WAITOK); 800 uiop->uio_iov = iv2 = iv; 801 m = m3; 802 left = len; 803 i = 0; 804 while (left > 0) { 805 if (m == NULL) 806 panic("nfsvno_read iov"); 807 siz = min(M_TRAILINGSPACE(m), left); 808 if (siz > 0) { 809 iv->iov_base = mtod(m, caddr_t) + m->m_len; 810 iv->iov_len = siz; 811 m->m_len += siz; 812 left -= siz; 813 iv++; 814 i++; 815 } 816 m = m->m_next; 817 } 818 uiop->uio_iovcnt = i; 819 uiop->uio_offset = off; 820 uiop->uio_resid = len; 821 uiop->uio_rw = UIO_READ; 822 uiop->uio_segflg = UIO_SYSSPACE; 823 uiop->uio_td = NULL; 824 nh = nfsrv_sequential_heuristic(uiop, vp); 825 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 826 /* XXX KDM make this more systematic? */ 827 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 828 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 829 free(iv2, M_TEMP); 830 if (error) { 831 m_freem(m3); 832 *mpp = NULL; 833 goto out; 834 } 835 nh->nh_nextoff = uiop->uio_offset; 836 tlen = len - uiop->uio_resid; 837 cnt = cnt < tlen ? cnt : tlen; 838 tlen = NFSM_RNDUP(cnt); 839 if (tlen == 0) { 840 m_freem(m3); 841 m3 = NULL; 842 } else if (len != tlen || tlen != cnt) 843 nfsrv_adj(m3, len - tlen, tlen - cnt); 844 *mpp = m3; 845 *mpendp = m2; 846 847 out: 848 NFSEXITCODE(error); 849 return (error); 850 } 851 852 /* 853 * Write vnode op from an mbuf list. 854 */ 855 int 856 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, 857 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 858 { 859 struct iovec *ivp; 860 int i, len; 861 struct iovec *iv; 862 int ioflags, error; 863 struct uio io, *uiop = &io; 864 struct nfsheur *nh; 865 866 /* 867 * Attempt to write to a DS file. A return of ENOENT implies 868 * there is no DS file to write. 869 */ 870 error = nfsrv_proxyds(NULL, vp, off, retlen, cred, p, NFSPROC_WRITEDS, 871 &mp, cp, NULL, NULL, NULL); 872 if (error != ENOENT) { 873 *stable = NFSWRITE_FILESYNC; 874 return (error); 875 } 876 877 ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 878 M_WAITOK); 879 uiop->uio_iov = iv = ivp; 880 uiop->uio_iovcnt = cnt; 881 i = mtod(mp, caddr_t) + mp->m_len - cp; 882 len = retlen; 883 while (len > 0) { 884 if (mp == NULL) 885 panic("nfsvno_write"); 886 if (i > 0) { 887 i = min(i, len); 888 ivp->iov_base = cp; 889 ivp->iov_len = i; 890 ivp++; 891 len -= i; 892 } 893 mp = mp->m_next; 894 if (mp) { 895 i = mp->m_len; 896 cp = mtod(mp, caddr_t); 897 } 898 } 899 900 if (*stable == NFSWRITE_UNSTABLE) 901 ioflags = IO_NODELOCKED; 902 else 903 ioflags = (IO_SYNC | IO_NODELOCKED); 904 uiop->uio_resid = retlen; 905 uiop->uio_rw = UIO_WRITE; 906 uiop->uio_segflg = UIO_SYSSPACE; 907 NFSUIOPROC(uiop, p); 908 uiop->uio_offset = off; 909 nh = nfsrv_sequential_heuristic(uiop, vp); 910 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 911 /* XXX KDM make this more systematic? */ 912 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 913 error = VOP_WRITE(vp, uiop, ioflags, cred); 914 if (error == 0) 915 nh->nh_nextoff = uiop->uio_offset; 916 free(iv, M_TEMP); 917 918 NFSEXITCODE(error); 919 return (error); 920 } 921 922 /* 923 * Common code for creating a regular file (plus special files for V2). 924 */ 925 int 926 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 927 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 928 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) 929 { 930 u_quad_t tempsize; 931 int error; 932 933 error = nd->nd_repstat; 934 if (!error && ndp->ni_vp == NULL) { 935 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 936 vrele(ndp->ni_startdir); 937 error = VOP_CREATE(ndp->ni_dvp, 938 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 939 /* For a pNFS server, create the data file on a DS. */ 940 if (error == 0 && nvap->na_type == VREG) { 941 /* 942 * Create a data file on a DS for a pNFS server. 943 * This function just returns if not 944 * running a pNFS DS or the creation fails. 945 */ 946 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 947 nd->nd_cred, p); 948 } 949 vput(ndp->ni_dvp); 950 nfsvno_relpathbuf(ndp); 951 if (!error) { 952 if (*exclusive_flagp) { 953 *exclusive_flagp = 0; 954 NFSVNO_ATTRINIT(nvap); 955 nvap->na_atime.tv_sec = cverf[0]; 956 nvap->na_atime.tv_nsec = cverf[1]; 957 error = VOP_SETATTR(ndp->ni_vp, 958 &nvap->na_vattr, nd->nd_cred); 959 if (error != 0) { 960 vput(ndp->ni_vp); 961 ndp->ni_vp = NULL; 962 error = NFSERR_NOTSUPP; 963 } 964 } 965 } 966 /* 967 * NFS V2 Only. nfsrvd_mknod() does this for V3. 968 * (This implies, just get out on an error.) 969 */ 970 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 971 nvap->na_type == VFIFO) { 972 if (nvap->na_type == VCHR && rdev == 0xffffffff) 973 nvap->na_type = VFIFO; 974 if (nvap->na_type != VFIFO && 975 (error = priv_check_cred(nd->nd_cred, 976 PRIV_VFS_MKNOD_DEV, 0))) { 977 vrele(ndp->ni_startdir); 978 nfsvno_relpathbuf(ndp); 979 vput(ndp->ni_dvp); 980 goto out; 981 } 982 nvap->na_rdev = rdev; 983 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 984 &ndp->ni_cnd, &nvap->na_vattr); 985 vput(ndp->ni_dvp); 986 nfsvno_relpathbuf(ndp); 987 vrele(ndp->ni_startdir); 988 if (error) 989 goto out; 990 } else { 991 vrele(ndp->ni_startdir); 992 nfsvno_relpathbuf(ndp); 993 vput(ndp->ni_dvp); 994 error = ENXIO; 995 goto out; 996 } 997 *vpp = ndp->ni_vp; 998 } else { 999 /* 1000 * Handle cases where error is already set and/or 1001 * the file exists. 1002 * 1 - clean up the lookup 1003 * 2 - iff !error and na_size set, truncate it 1004 */ 1005 vrele(ndp->ni_startdir); 1006 nfsvno_relpathbuf(ndp); 1007 *vpp = ndp->ni_vp; 1008 if (ndp->ni_dvp == *vpp) 1009 vrele(ndp->ni_dvp); 1010 else 1011 vput(ndp->ni_dvp); 1012 if (!error && nvap->na_size != VNOVAL) { 1013 error = nfsvno_accchk(*vpp, VWRITE, 1014 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1015 NFSACCCHK_VPISLOCKED, NULL); 1016 if (!error) { 1017 tempsize = nvap->na_size; 1018 NFSVNO_ATTRINIT(nvap); 1019 nvap->na_size = tempsize; 1020 error = VOP_SETATTR(*vpp, 1021 &nvap->na_vattr, nd->nd_cred); 1022 } 1023 } 1024 if (error) 1025 vput(*vpp); 1026 } 1027 1028 out: 1029 NFSEXITCODE(error); 1030 return (error); 1031 } 1032 1033 /* 1034 * Do a mknod vnode op. 1035 */ 1036 int 1037 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1038 struct thread *p) 1039 { 1040 int error = 0; 1041 enum vtype vtyp; 1042 1043 vtyp = nvap->na_type; 1044 /* 1045 * Iff doesn't exist, create it. 1046 */ 1047 if (ndp->ni_vp) { 1048 vrele(ndp->ni_startdir); 1049 nfsvno_relpathbuf(ndp); 1050 vput(ndp->ni_dvp); 1051 vrele(ndp->ni_vp); 1052 error = EEXIST; 1053 goto out; 1054 } 1055 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1056 vrele(ndp->ni_startdir); 1057 nfsvno_relpathbuf(ndp); 1058 vput(ndp->ni_dvp); 1059 error = NFSERR_BADTYPE; 1060 goto out; 1061 } 1062 if (vtyp == VSOCK) { 1063 vrele(ndp->ni_startdir); 1064 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1065 &ndp->ni_cnd, &nvap->na_vattr); 1066 vput(ndp->ni_dvp); 1067 nfsvno_relpathbuf(ndp); 1068 } else { 1069 if (nvap->na_type != VFIFO && 1070 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { 1071 vrele(ndp->ni_startdir); 1072 nfsvno_relpathbuf(ndp); 1073 vput(ndp->ni_dvp); 1074 goto out; 1075 } 1076 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1077 &ndp->ni_cnd, &nvap->na_vattr); 1078 vput(ndp->ni_dvp); 1079 nfsvno_relpathbuf(ndp); 1080 vrele(ndp->ni_startdir); 1081 /* 1082 * Since VOP_MKNOD returns the ni_vp, I can't 1083 * see any reason to do the lookup. 1084 */ 1085 } 1086 1087 out: 1088 NFSEXITCODE(error); 1089 return (error); 1090 } 1091 1092 /* 1093 * Mkdir vnode op. 1094 */ 1095 int 1096 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1097 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1098 { 1099 int error = 0; 1100 1101 if (ndp->ni_vp != NULL) { 1102 if (ndp->ni_dvp == ndp->ni_vp) 1103 vrele(ndp->ni_dvp); 1104 else 1105 vput(ndp->ni_dvp); 1106 vrele(ndp->ni_vp); 1107 nfsvno_relpathbuf(ndp); 1108 error = EEXIST; 1109 goto out; 1110 } 1111 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1112 &nvap->na_vattr); 1113 vput(ndp->ni_dvp); 1114 nfsvno_relpathbuf(ndp); 1115 1116 out: 1117 NFSEXITCODE(error); 1118 return (error); 1119 } 1120 1121 /* 1122 * symlink vnode op. 1123 */ 1124 int 1125 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1126 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1127 struct nfsexstuff *exp) 1128 { 1129 int error = 0; 1130 1131 if (ndp->ni_vp) { 1132 vrele(ndp->ni_startdir); 1133 nfsvno_relpathbuf(ndp); 1134 if (ndp->ni_dvp == ndp->ni_vp) 1135 vrele(ndp->ni_dvp); 1136 else 1137 vput(ndp->ni_dvp); 1138 vrele(ndp->ni_vp); 1139 error = EEXIST; 1140 goto out; 1141 } 1142 1143 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1144 &nvap->na_vattr, pathcp); 1145 vput(ndp->ni_dvp); 1146 vrele(ndp->ni_startdir); 1147 nfsvno_relpathbuf(ndp); 1148 /* 1149 * Although FreeBSD still had the lookup code in 1150 * it for 7/current, there doesn't seem to be any 1151 * point, since VOP_SYMLINK() returns the ni_vp. 1152 * Just vput it for v2. 1153 */ 1154 if (!not_v2 && !error) 1155 vput(ndp->ni_vp); 1156 1157 out: 1158 NFSEXITCODE(error); 1159 return (error); 1160 } 1161 1162 /* 1163 * Parse symbolic link arguments. 1164 * This function has an ugly side effect. It will malloc() an area for 1165 * the symlink and set iov_base to point to it, only if it succeeds. 1166 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1167 * be FREE'd later. 1168 */ 1169 int 1170 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1171 struct thread *p, char **pathcpp, int *lenp) 1172 { 1173 u_int32_t *tl; 1174 char *pathcp = NULL; 1175 int error = 0, len; 1176 struct nfsv2_sattr *sp; 1177 1178 *pathcpp = NULL; 1179 *lenp = 0; 1180 if ((nd->nd_flag & ND_NFSV3) && 1181 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1182 goto nfsmout; 1183 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1184 len = fxdr_unsigned(int, *tl); 1185 if (len > NFS_MAXPATHLEN || len <= 0) { 1186 error = EBADRPC; 1187 goto nfsmout; 1188 } 1189 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1190 error = nfsrv_mtostr(nd, pathcp, len); 1191 if (error) 1192 goto nfsmout; 1193 if (nd->nd_flag & ND_NFSV2) { 1194 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1195 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1196 } 1197 *pathcpp = pathcp; 1198 *lenp = len; 1199 NFSEXITCODE2(0, nd); 1200 return (0); 1201 nfsmout: 1202 if (pathcp) 1203 free(pathcp, M_TEMP); 1204 NFSEXITCODE2(error, nd); 1205 return (error); 1206 } 1207 1208 /* 1209 * Remove a non-directory object. 1210 */ 1211 int 1212 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1213 struct thread *p, struct nfsexstuff *exp) 1214 { 1215 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1216 int error = 0, mirrorcnt; 1217 char fname[PNFS_FILENAME_LEN + 1]; 1218 fhandle_t fh; 1219 1220 vp = ndp->ni_vp; 1221 dsdvp[0] = NULL; 1222 if (vp->v_type == VDIR) 1223 error = NFSERR_ISDIR; 1224 else if (is_v4) 1225 error = nfsrv_checkremove(vp, 1, p); 1226 if (error == 0) 1227 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1228 if (!error) 1229 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1230 if (error == 0 && dsdvp[0] != NULL) 1231 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1232 if (ndp->ni_dvp == vp) 1233 vrele(ndp->ni_dvp); 1234 else 1235 vput(ndp->ni_dvp); 1236 vput(vp); 1237 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1238 nfsvno_relpathbuf(ndp); 1239 NFSEXITCODE(error); 1240 return (error); 1241 } 1242 1243 /* 1244 * Remove a directory. 1245 */ 1246 int 1247 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1248 struct thread *p, struct nfsexstuff *exp) 1249 { 1250 struct vnode *vp; 1251 int error = 0; 1252 1253 vp = ndp->ni_vp; 1254 if (vp->v_type != VDIR) { 1255 error = ENOTDIR; 1256 goto out; 1257 } 1258 /* 1259 * No rmdir "." please. 1260 */ 1261 if (ndp->ni_dvp == vp) { 1262 error = EINVAL; 1263 goto out; 1264 } 1265 /* 1266 * The root of a mounted filesystem cannot be deleted. 1267 */ 1268 if (vp->v_vflag & VV_ROOT) 1269 error = EBUSY; 1270 out: 1271 if (!error) 1272 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1273 if (ndp->ni_dvp == vp) 1274 vrele(ndp->ni_dvp); 1275 else 1276 vput(ndp->ni_dvp); 1277 vput(vp); 1278 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1279 nfsvno_relpathbuf(ndp); 1280 NFSEXITCODE(error); 1281 return (error); 1282 } 1283 1284 /* 1285 * Rename vnode op. 1286 */ 1287 int 1288 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1289 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1290 { 1291 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1292 int error = 0, mirrorcnt; 1293 char fname[PNFS_FILENAME_LEN + 1]; 1294 fhandle_t fh; 1295 1296 dsdvp[0] = NULL; 1297 fvp = fromndp->ni_vp; 1298 if (ndstat) { 1299 vrele(fromndp->ni_dvp); 1300 vrele(fvp); 1301 error = ndstat; 1302 goto out1; 1303 } 1304 tdvp = tondp->ni_dvp; 1305 tvp = tondp->ni_vp; 1306 if (tvp != NULL) { 1307 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1308 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1309 goto out; 1310 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1311 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1312 goto out; 1313 } 1314 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1315 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1316 goto out; 1317 } 1318 1319 /* 1320 * A rename to '.' or '..' results in a prematurely 1321 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1322 * here. 1323 */ 1324 if ((tondp->ni_cnd.cn_namelen == 1 && 1325 tondp->ni_cnd.cn_nameptr[0] == '.') || 1326 (tondp->ni_cnd.cn_namelen == 2 && 1327 tondp->ni_cnd.cn_nameptr[0] == '.' && 1328 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1329 error = EINVAL; 1330 goto out; 1331 } 1332 } 1333 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1334 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1335 goto out; 1336 } 1337 if (fvp->v_mount != tdvp->v_mount) { 1338 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1339 goto out; 1340 } 1341 if (fvp == tdvp) { 1342 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1343 goto out; 1344 } 1345 if (fvp == tvp) { 1346 /* 1347 * If source and destination are the same, there is nothing to 1348 * do. Set error to -1 to indicate this. 1349 */ 1350 error = -1; 1351 goto out; 1352 } 1353 if (ndflag & ND_NFSV4) { 1354 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1355 error = nfsrv_checkremove(fvp, 0, p); 1356 NFSVOPUNLOCK(fvp, 0); 1357 } else 1358 error = EPERM; 1359 if (tvp && !error) 1360 error = nfsrv_checkremove(tvp, 1, p); 1361 } else { 1362 /* 1363 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1364 * that the NFSv4 client won't be confused by the rename. 1365 * Since nfsd_recalldelegation() can only be called on an 1366 * unlocked vnode at this point and fvp is the file that will 1367 * still exist after the rename, just do fvp. 1368 */ 1369 nfsd_recalldelegation(fvp, p); 1370 } 1371 if (error == 0 && tvp != NULL) { 1372 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1373 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1374 " dsdvp=%p\n", dsdvp[0]); 1375 } 1376 out: 1377 if (!error) { 1378 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1379 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1380 &tondp->ni_cnd); 1381 } else { 1382 if (tdvp == tvp) 1383 vrele(tdvp); 1384 else 1385 vput(tdvp); 1386 if (tvp) 1387 vput(tvp); 1388 vrele(fromndp->ni_dvp); 1389 vrele(fvp); 1390 if (error == -1) 1391 error = 0; 1392 } 1393 1394 /* 1395 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1396 * if the rename succeeded, the DS file for the tvp needs to be 1397 * removed. 1398 */ 1399 if (error == 0 && dsdvp[0] != NULL) { 1400 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1401 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1402 } 1403 1404 vrele(tondp->ni_startdir); 1405 nfsvno_relpathbuf(tondp); 1406 out1: 1407 vrele(fromndp->ni_startdir); 1408 nfsvno_relpathbuf(fromndp); 1409 NFSEXITCODE(error); 1410 return (error); 1411 } 1412 1413 /* 1414 * Link vnode op. 1415 */ 1416 int 1417 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1418 struct thread *p, struct nfsexstuff *exp) 1419 { 1420 struct vnode *xp; 1421 int error = 0; 1422 1423 xp = ndp->ni_vp; 1424 if (xp != NULL) { 1425 error = EEXIST; 1426 } else { 1427 xp = ndp->ni_dvp; 1428 if (vp->v_mount != xp->v_mount) 1429 error = EXDEV; 1430 } 1431 if (!error) { 1432 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1433 if ((vp->v_iflag & VI_DOOMED) == 0) 1434 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1435 else 1436 error = EPERM; 1437 if (ndp->ni_dvp == vp) 1438 vrele(ndp->ni_dvp); 1439 else 1440 vput(ndp->ni_dvp); 1441 NFSVOPUNLOCK(vp, 0); 1442 } else { 1443 if (ndp->ni_dvp == ndp->ni_vp) 1444 vrele(ndp->ni_dvp); 1445 else 1446 vput(ndp->ni_dvp); 1447 if (ndp->ni_vp) 1448 vrele(ndp->ni_vp); 1449 } 1450 nfsvno_relpathbuf(ndp); 1451 NFSEXITCODE(error); 1452 return (error); 1453 } 1454 1455 /* 1456 * Do the fsync() appropriate for the commit. 1457 */ 1458 int 1459 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1460 struct thread *td) 1461 { 1462 int error = 0; 1463 1464 /* 1465 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1466 * file is done. At this time VOP_FSYNC does not accept offset and 1467 * byte count parameters so call VOP_FSYNC the whole file for now. 1468 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1469 * File systems that do not use the buffer cache (as indicated 1470 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1471 */ 1472 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1473 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1474 /* 1475 * Give up and do the whole thing 1476 */ 1477 if (vp->v_object && 1478 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1479 VM_OBJECT_WLOCK(vp->v_object); 1480 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1481 VM_OBJECT_WUNLOCK(vp->v_object); 1482 } 1483 error = VOP_FSYNC(vp, MNT_WAIT, td); 1484 } else { 1485 /* 1486 * Locate and synchronously write any buffers that fall 1487 * into the requested range. Note: we are assuming that 1488 * f_iosize is a power of 2. 1489 */ 1490 int iosize = vp->v_mount->mnt_stat.f_iosize; 1491 int iomask = iosize - 1; 1492 struct bufobj *bo; 1493 daddr_t lblkno; 1494 1495 /* 1496 * Align to iosize boundary, super-align to page boundary. 1497 */ 1498 if (off & iomask) { 1499 cnt += off & iomask; 1500 off &= ~(u_quad_t)iomask; 1501 } 1502 if (off & PAGE_MASK) { 1503 cnt += off & PAGE_MASK; 1504 off &= ~(u_quad_t)PAGE_MASK; 1505 } 1506 lblkno = off / iosize; 1507 1508 if (vp->v_object && 1509 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1510 VM_OBJECT_WLOCK(vp->v_object); 1511 vm_object_page_clean(vp->v_object, off, off + cnt, 1512 OBJPC_SYNC); 1513 VM_OBJECT_WUNLOCK(vp->v_object); 1514 } 1515 1516 bo = &vp->v_bufobj; 1517 BO_LOCK(bo); 1518 while (cnt > 0) { 1519 struct buf *bp; 1520 1521 /* 1522 * If we have a buffer and it is marked B_DELWRI we 1523 * have to lock and write it. Otherwise the prior 1524 * write is assumed to have already been committed. 1525 * 1526 * gbincore() can return invalid buffers now so we 1527 * have to check that bit as well (though B_DELWRI 1528 * should not be set if B_INVAL is set there could be 1529 * a race here since we haven't locked the buffer). 1530 */ 1531 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1532 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1533 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1534 BO_LOCK(bo); 1535 continue; /* retry */ 1536 } 1537 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1538 B_DELWRI) { 1539 bremfree(bp); 1540 bp->b_flags &= ~B_ASYNC; 1541 bwrite(bp); 1542 ++nfs_commit_miss; 1543 } else 1544 BUF_UNLOCK(bp); 1545 BO_LOCK(bo); 1546 } 1547 ++nfs_commit_blks; 1548 if (cnt < iosize) 1549 break; 1550 cnt -= iosize; 1551 ++lblkno; 1552 } 1553 BO_UNLOCK(bo); 1554 } 1555 NFSEXITCODE(error); 1556 return (error); 1557 } 1558 1559 /* 1560 * Statfs vnode op. 1561 */ 1562 int 1563 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1564 { 1565 struct statfs *tsf; 1566 int error; 1567 1568 tsf = NULL; 1569 if (nfsrv_devidcnt > 0) { 1570 /* For a pNFS service, get the DS numbers. */ 1571 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1572 error = nfsrv_pnfsstatfs(tsf); 1573 if (error != 0) { 1574 free(tsf, M_TEMP); 1575 tsf = NULL; 1576 } 1577 } 1578 error = VFS_STATFS(vp->v_mount, sf); 1579 if (error == 0) { 1580 if (tsf != NULL) { 1581 sf->f_blocks = tsf->f_blocks; 1582 sf->f_bavail = tsf->f_bavail; 1583 sf->f_bfree = tsf->f_bfree; 1584 sf->f_bsize = tsf->f_bsize; 1585 } 1586 /* 1587 * Since NFS handles these values as unsigned on the 1588 * wire, there is no way to represent negative values, 1589 * so set them to 0. Without this, they will appear 1590 * to be very large positive values for clients like 1591 * Solaris10. 1592 */ 1593 if (sf->f_bavail < 0) 1594 sf->f_bavail = 0; 1595 if (sf->f_ffree < 0) 1596 sf->f_ffree = 0; 1597 } 1598 free(tsf, M_TEMP); 1599 NFSEXITCODE(error); 1600 return (error); 1601 } 1602 1603 /* 1604 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1605 * must handle nfsrv_opencheck() calls after any other access checks. 1606 */ 1607 void 1608 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1609 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1610 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1611 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, 1612 struct nfsexstuff *exp, struct vnode **vpp) 1613 { 1614 struct vnode *vp = NULL; 1615 u_quad_t tempsize; 1616 struct nfsexstuff nes; 1617 1618 if (ndp->ni_vp == NULL) 1619 nd->nd_repstat = nfsrv_opencheck(clientid, 1620 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1621 if (!nd->nd_repstat) { 1622 if (ndp->ni_vp == NULL) { 1623 vrele(ndp->ni_startdir); 1624 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1625 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1626 /* For a pNFS server, create the data file on a DS. */ 1627 if (nd->nd_repstat == 0) { 1628 /* 1629 * Create a data file on a DS for a pNFS server. 1630 * This function just returns if not 1631 * running a pNFS DS or the creation fails. 1632 */ 1633 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1634 cred, p); 1635 } 1636 vput(ndp->ni_dvp); 1637 nfsvno_relpathbuf(ndp); 1638 if (!nd->nd_repstat) { 1639 if (*exclusive_flagp) { 1640 *exclusive_flagp = 0; 1641 NFSVNO_ATTRINIT(nvap); 1642 nvap->na_atime.tv_sec = cverf[0]; 1643 nvap->na_atime.tv_nsec = cverf[1]; 1644 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1645 &nvap->na_vattr, cred); 1646 if (nd->nd_repstat != 0) { 1647 vput(ndp->ni_vp); 1648 ndp->ni_vp = NULL; 1649 nd->nd_repstat = NFSERR_NOTSUPP; 1650 } else 1651 NFSSETBIT_ATTRBIT(attrbitp, 1652 NFSATTRBIT_TIMEACCESS); 1653 } else { 1654 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1655 aclp, p, attrbitp, exp); 1656 } 1657 } 1658 vp = ndp->ni_vp; 1659 } else { 1660 if (ndp->ni_startdir) 1661 vrele(ndp->ni_startdir); 1662 nfsvno_relpathbuf(ndp); 1663 vp = ndp->ni_vp; 1664 if (create == NFSV4OPEN_CREATE) { 1665 if (ndp->ni_dvp == vp) 1666 vrele(ndp->ni_dvp); 1667 else 1668 vput(ndp->ni_dvp); 1669 } 1670 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1671 if (ndp->ni_cnd.cn_flags & RDONLY) 1672 NFSVNO_SETEXRDONLY(&nes); 1673 else 1674 NFSVNO_EXINIT(&nes); 1675 nd->nd_repstat = nfsvno_accchk(vp, 1676 VWRITE, cred, &nes, p, 1677 NFSACCCHK_NOOVERRIDE, 1678 NFSACCCHK_VPISLOCKED, NULL); 1679 nd->nd_repstat = nfsrv_opencheck(clientid, 1680 stateidp, stp, vp, nd, p, nd->nd_repstat); 1681 if (!nd->nd_repstat) { 1682 tempsize = nvap->na_size; 1683 NFSVNO_ATTRINIT(nvap); 1684 nvap->na_size = tempsize; 1685 nd->nd_repstat = VOP_SETATTR(vp, 1686 &nvap->na_vattr, cred); 1687 } 1688 } else if (vp->v_type == VREG) { 1689 nd->nd_repstat = nfsrv_opencheck(clientid, 1690 stateidp, stp, vp, nd, p, nd->nd_repstat); 1691 } 1692 } 1693 } else { 1694 if (ndp->ni_cnd.cn_flags & HASBUF) 1695 nfsvno_relpathbuf(ndp); 1696 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1697 vrele(ndp->ni_startdir); 1698 if (ndp->ni_dvp == ndp->ni_vp) 1699 vrele(ndp->ni_dvp); 1700 else 1701 vput(ndp->ni_dvp); 1702 if (ndp->ni_vp) 1703 vput(ndp->ni_vp); 1704 } 1705 } 1706 *vpp = vp; 1707 1708 NFSEXITCODE2(0, nd); 1709 } 1710 1711 /* 1712 * Updates the file rev and sets the mtime and ctime 1713 * to the current clock time, returning the va_filerev and va_Xtime 1714 * values. 1715 * Return ESTALE to indicate the vnode is VI_DOOMED. 1716 */ 1717 int 1718 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1719 struct nfsrv_descript *nd, struct thread *p) 1720 { 1721 struct vattr va; 1722 1723 VATTR_NULL(&va); 1724 vfs_timestamp(&va.va_mtime); 1725 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1726 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1727 if ((vp->v_iflag & VI_DOOMED) != 0) 1728 return (ESTALE); 1729 } 1730 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1731 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1732 return (0); 1733 } 1734 1735 /* 1736 * Glue routine to nfsv4_fillattr(). 1737 */ 1738 int 1739 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1740 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1741 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1742 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1743 { 1744 struct statfs *sf; 1745 int error; 1746 1747 sf = NULL; 1748 if (nfsrv_devidcnt > 0 && 1749 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1750 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 1751 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 1752 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 1753 error = nfsrv_pnfsstatfs(sf); 1754 if (error != 0) { 1755 free(sf, M_TEMP); 1756 sf = NULL; 1757 } 1758 } 1759 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 1760 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 1761 mounted_on_fileno, sf); 1762 free(sf, M_TEMP); 1763 NFSEXITCODE2(0, nd); 1764 return (error); 1765 } 1766 1767 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 1768 /* 1769 * nfs readdir service 1770 * - mallocs what it thinks is enough to read 1771 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 1772 * - calls VOP_READDIR() 1773 * - loops around building the reply 1774 * if the output generated exceeds count break out of loop 1775 * The NFSM_CLGET macro is used here so that the reply will be packed 1776 * tightly in mbuf clusters. 1777 * - it trims out records with d_fileno == 0 1778 * this doesn't matter for Unix clients, but they might confuse clients 1779 * for other os'. 1780 * - it trims out records with d_type == DT_WHT 1781 * these cannot be seen through NFS (unless we extend the protocol) 1782 * The alternate call nfsrvd_readdirplus() does lookups as well. 1783 * PS: The NFS protocol spec. does not clarify what the "count" byte 1784 * argument is a count of.. just name strings and file id's or the 1785 * entire reply rpc or ... 1786 * I tried just file name and id sizes and it confused the Sun client, 1787 * so I am using the full rpc size now. The "paranoia.." comment refers 1788 * to including the status longwords that are not a part of the dir. 1789 * "entry" structures, but are in the rpc. 1790 */ 1791 int 1792 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 1793 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 1794 { 1795 struct dirent *dp; 1796 u_int32_t *tl; 1797 int dirlen; 1798 char *cpos, *cend, *rbuf; 1799 struct nfsvattr at; 1800 int nlen, error = 0, getret = 1; 1801 int siz, cnt, fullsiz, eofflag, ncookies; 1802 u_int64_t off, toff, verf __unused; 1803 u_long *cookies = NULL, *cookiep; 1804 struct uio io; 1805 struct iovec iv; 1806 int is_ufs; 1807 1808 if (nd->nd_repstat) { 1809 nfsrv_postopattr(nd, getret, &at); 1810 goto out; 1811 } 1812 if (nd->nd_flag & ND_NFSV2) { 1813 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1814 off = fxdr_unsigned(u_quad_t, *tl++); 1815 } else { 1816 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1817 off = fxdr_hyper(tl); 1818 tl += 2; 1819 verf = fxdr_hyper(tl); 1820 tl += 2; 1821 } 1822 toff = off; 1823 cnt = fxdr_unsigned(int, *tl); 1824 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1825 cnt = NFS_SRVMAXDATA(nd); 1826 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1827 fullsiz = siz; 1828 if (nd->nd_flag & ND_NFSV3) { 1829 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 1830 NULL); 1831 #if 0 1832 /* 1833 * va_filerev is not sufficient as a cookie verifier, 1834 * since it is not supposed to change when entries are 1835 * removed/added unless that offset cookies returned to 1836 * the client are no longer valid. 1837 */ 1838 if (!nd->nd_repstat && toff && verf != at.na_filerev) 1839 nd->nd_repstat = NFSERR_BAD_COOKIE; 1840 #endif 1841 } 1842 if (!nd->nd_repstat && vp->v_type != VDIR) 1843 nd->nd_repstat = NFSERR_NOTDIR; 1844 if (nd->nd_repstat == 0 && cnt == 0) { 1845 if (nd->nd_flag & ND_NFSV2) 1846 /* NFSv2 does not have NFSERR_TOOSMALL */ 1847 nd->nd_repstat = EPERM; 1848 else 1849 nd->nd_repstat = NFSERR_TOOSMALL; 1850 } 1851 if (!nd->nd_repstat) 1852 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 1853 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1854 NFSACCCHK_VPISLOCKED, NULL); 1855 if (nd->nd_repstat) { 1856 vput(vp); 1857 if (nd->nd_flag & ND_NFSV3) 1858 nfsrv_postopattr(nd, getret, &at); 1859 goto out; 1860 } 1861 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 1862 rbuf = malloc(siz, M_TEMP, M_WAITOK); 1863 again: 1864 eofflag = 0; 1865 if (cookies) { 1866 free(cookies, M_TEMP); 1867 cookies = NULL; 1868 } 1869 1870 iv.iov_base = rbuf; 1871 iv.iov_len = siz; 1872 io.uio_iov = &iv; 1873 io.uio_iovcnt = 1; 1874 io.uio_offset = (off_t)off; 1875 io.uio_resid = siz; 1876 io.uio_segflg = UIO_SYSSPACE; 1877 io.uio_rw = UIO_READ; 1878 io.uio_td = NULL; 1879 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 1880 &cookies); 1881 off = (u_int64_t)io.uio_offset; 1882 if (io.uio_resid) 1883 siz -= io.uio_resid; 1884 1885 if (!cookies && !nd->nd_repstat) 1886 nd->nd_repstat = NFSERR_PERM; 1887 if (nd->nd_flag & ND_NFSV3) { 1888 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 1889 if (!nd->nd_repstat) 1890 nd->nd_repstat = getret; 1891 } 1892 1893 /* 1894 * Handles the failed cases. nd->nd_repstat == 0 past here. 1895 */ 1896 if (nd->nd_repstat) { 1897 vput(vp); 1898 free(rbuf, M_TEMP); 1899 if (cookies) 1900 free(cookies, M_TEMP); 1901 if (nd->nd_flag & ND_NFSV3) 1902 nfsrv_postopattr(nd, getret, &at); 1903 goto out; 1904 } 1905 /* 1906 * If nothing read, return eof 1907 * rpc reply 1908 */ 1909 if (siz == 0) { 1910 vput(vp); 1911 if (nd->nd_flag & ND_NFSV2) { 1912 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1913 } else { 1914 nfsrv_postopattr(nd, getret, &at); 1915 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 1916 txdr_hyper(at.na_filerev, tl); 1917 tl += 2; 1918 } 1919 *tl++ = newnfs_false; 1920 *tl = newnfs_true; 1921 free(rbuf, M_TEMP); 1922 free(cookies, M_TEMP); 1923 goto out; 1924 } 1925 1926 /* 1927 * Check for degenerate cases of nothing useful read. 1928 * If so go try again 1929 */ 1930 cpos = rbuf; 1931 cend = rbuf + siz; 1932 dp = (struct dirent *)cpos; 1933 cookiep = cookies; 1934 1935 /* 1936 * For some reason FreeBSD's ufs_readdir() chooses to back the 1937 * directory offset up to a block boundary, so it is necessary to 1938 * skip over the records that precede the requested offset. This 1939 * requires the assumption that file offset cookies monotonically 1940 * increase. 1941 */ 1942 while (cpos < cend && ncookies > 0 && 1943 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 1944 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 1945 cpos += dp->d_reclen; 1946 dp = (struct dirent *)cpos; 1947 cookiep++; 1948 ncookies--; 1949 } 1950 if (cpos >= cend || ncookies == 0) { 1951 siz = fullsiz; 1952 toff = off; 1953 goto again; 1954 } 1955 vput(vp); 1956 1957 /* 1958 * dirlen is the size of the reply, including all XDR and must 1959 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 1960 * if the XDR should be included in "count", but to be safe, we do. 1961 * (Include the two booleans at the end of the reply in dirlen now.) 1962 */ 1963 if (nd->nd_flag & ND_NFSV3) { 1964 nfsrv_postopattr(nd, getret, &at); 1965 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1966 txdr_hyper(at.na_filerev, tl); 1967 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 1968 } else { 1969 dirlen = 2 * NFSX_UNSIGNED; 1970 } 1971 1972 /* Loop through the records and build reply */ 1973 while (cpos < cend && ncookies > 0) { 1974 nlen = dp->d_namlen; 1975 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 1976 nlen <= NFS_MAXNAMLEN) { 1977 if (nd->nd_flag & ND_NFSV3) 1978 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1979 else 1980 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1981 if (dirlen > cnt) { 1982 eofflag = 0; 1983 break; 1984 } 1985 1986 /* 1987 * Build the directory record xdr from 1988 * the dirent entry. 1989 */ 1990 if (nd->nd_flag & ND_NFSV3) { 1991 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1992 *tl++ = newnfs_true; 1993 *tl++ = 0; 1994 } else { 1995 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1996 *tl++ = newnfs_true; 1997 } 1998 *tl = txdr_unsigned(dp->d_fileno); 1999 (void) nfsm_strtom(nd, dp->d_name, nlen); 2000 if (nd->nd_flag & ND_NFSV3) { 2001 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2002 *tl++ = 0; 2003 } else 2004 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2005 *tl = txdr_unsigned(*cookiep); 2006 } 2007 cpos += dp->d_reclen; 2008 dp = (struct dirent *)cpos; 2009 cookiep++; 2010 ncookies--; 2011 } 2012 if (cpos < cend) 2013 eofflag = 0; 2014 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2015 *tl++ = newnfs_false; 2016 if (eofflag) 2017 *tl = newnfs_true; 2018 else 2019 *tl = newnfs_false; 2020 free(rbuf, M_TEMP); 2021 free(cookies, M_TEMP); 2022 2023 out: 2024 NFSEXITCODE2(0, nd); 2025 return (0); 2026 nfsmout: 2027 vput(vp); 2028 NFSEXITCODE2(error, nd); 2029 return (error); 2030 } 2031 2032 /* 2033 * Readdirplus for V3 and Readdir for V4. 2034 */ 2035 int 2036 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2037 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 2038 { 2039 struct dirent *dp; 2040 u_int32_t *tl; 2041 int dirlen; 2042 char *cpos, *cend, *rbuf; 2043 struct vnode *nvp; 2044 fhandle_t nfh; 2045 struct nfsvattr nva, at, *nvap = &nva; 2046 struct mbuf *mb0, *mb1; 2047 struct nfsreferral *refp; 2048 int nlen, r, error = 0, getret = 1, usevget = 1; 2049 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2050 caddr_t bpos0, bpos1; 2051 u_int64_t off, toff, verf; 2052 u_long *cookies = NULL, *cookiep; 2053 nfsattrbit_t attrbits, rderrbits, savbits; 2054 struct uio io; 2055 struct iovec iv; 2056 struct componentname cn; 2057 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2058 struct mount *mp, *new_mp; 2059 uint64_t mounted_on_fileno; 2060 2061 if (nd->nd_repstat) { 2062 nfsrv_postopattr(nd, getret, &at); 2063 goto out; 2064 } 2065 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2066 off = fxdr_hyper(tl); 2067 toff = off; 2068 tl += 2; 2069 verf = fxdr_hyper(tl); 2070 tl += 2; 2071 siz = fxdr_unsigned(int, *tl++); 2072 cnt = fxdr_unsigned(int, *tl); 2073 2074 /* 2075 * Use the server's maximum data transfer size as the upper bound 2076 * on reply datalen. 2077 */ 2078 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2079 cnt = NFS_SRVMAXDATA(nd); 2080 2081 /* 2082 * siz is a "hint" of how much directory information (name, fileid, 2083 * cookie) should be in the reply. At least one client "hints" 0, 2084 * so I set it to cnt for that case. I also round it up to the 2085 * next multiple of DIRBLKSIZ. 2086 */ 2087 if (siz <= 0) 2088 siz = cnt; 2089 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2090 2091 if (nd->nd_flag & ND_NFSV4) { 2092 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2093 if (error) 2094 goto nfsmout; 2095 NFSSET_ATTRBIT(&savbits, &attrbits); 2096 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); 2097 NFSZERO_ATTRBIT(&rderrbits); 2098 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2099 } else { 2100 NFSZERO_ATTRBIT(&attrbits); 2101 } 2102 fullsiz = siz; 2103 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2104 #if 0 2105 if (!nd->nd_repstat) { 2106 if (off && verf != at.na_filerev) { 2107 /* 2108 * va_filerev is not sufficient as a cookie verifier, 2109 * since it is not supposed to change when entries are 2110 * removed/added unless that offset cookies returned to 2111 * the client are no longer valid. 2112 */ 2113 if (nd->nd_flag & ND_NFSV4) { 2114 nd->nd_repstat = NFSERR_NOTSAME; 2115 } else { 2116 nd->nd_repstat = NFSERR_BAD_COOKIE; 2117 } 2118 } 2119 } 2120 #endif 2121 if (!nd->nd_repstat && vp->v_type != VDIR) 2122 nd->nd_repstat = NFSERR_NOTDIR; 2123 if (!nd->nd_repstat && cnt == 0) 2124 nd->nd_repstat = NFSERR_TOOSMALL; 2125 if (!nd->nd_repstat) 2126 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2127 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2128 NFSACCCHK_VPISLOCKED, NULL); 2129 if (nd->nd_repstat) { 2130 vput(vp); 2131 if (nd->nd_flag & ND_NFSV3) 2132 nfsrv_postopattr(nd, getret, &at); 2133 goto out; 2134 } 2135 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2136 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2137 2138 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2139 again: 2140 eofflag = 0; 2141 if (cookies) { 2142 free(cookies, M_TEMP); 2143 cookies = NULL; 2144 } 2145 2146 iv.iov_base = rbuf; 2147 iv.iov_len = siz; 2148 io.uio_iov = &iv; 2149 io.uio_iovcnt = 1; 2150 io.uio_offset = (off_t)off; 2151 io.uio_resid = siz; 2152 io.uio_segflg = UIO_SYSSPACE; 2153 io.uio_rw = UIO_READ; 2154 io.uio_td = NULL; 2155 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2156 &cookies); 2157 off = (u_int64_t)io.uio_offset; 2158 if (io.uio_resid) 2159 siz -= io.uio_resid; 2160 2161 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2162 2163 if (!cookies && !nd->nd_repstat) 2164 nd->nd_repstat = NFSERR_PERM; 2165 if (!nd->nd_repstat) 2166 nd->nd_repstat = getret; 2167 if (nd->nd_repstat) { 2168 vput(vp); 2169 if (cookies) 2170 free(cookies, M_TEMP); 2171 free(rbuf, M_TEMP); 2172 if (nd->nd_flag & ND_NFSV3) 2173 nfsrv_postopattr(nd, getret, &at); 2174 goto out; 2175 } 2176 /* 2177 * If nothing read, return eof 2178 * rpc reply 2179 */ 2180 if (siz == 0) { 2181 vput(vp); 2182 if (nd->nd_flag & ND_NFSV3) 2183 nfsrv_postopattr(nd, getret, &at); 2184 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2185 txdr_hyper(at.na_filerev, tl); 2186 tl += 2; 2187 *tl++ = newnfs_false; 2188 *tl = newnfs_true; 2189 free(cookies, M_TEMP); 2190 free(rbuf, M_TEMP); 2191 goto out; 2192 } 2193 2194 /* 2195 * Check for degenerate cases of nothing useful read. 2196 * If so go try again 2197 */ 2198 cpos = rbuf; 2199 cend = rbuf + siz; 2200 dp = (struct dirent *)cpos; 2201 cookiep = cookies; 2202 2203 /* 2204 * For some reason FreeBSD's ufs_readdir() chooses to back the 2205 * directory offset up to a block boundary, so it is necessary to 2206 * skip over the records that precede the requested offset. This 2207 * requires the assumption that file offset cookies monotonically 2208 * increase. 2209 */ 2210 while (cpos < cend && ncookies > 0 && 2211 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2212 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2213 ((nd->nd_flag & ND_NFSV4) && 2214 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2215 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2216 cpos += dp->d_reclen; 2217 dp = (struct dirent *)cpos; 2218 cookiep++; 2219 ncookies--; 2220 } 2221 if (cpos >= cend || ncookies == 0) { 2222 siz = fullsiz; 2223 toff = off; 2224 goto again; 2225 } 2226 2227 /* 2228 * Busy the file system so that the mount point won't go away 2229 * and, as such, VFS_VGET() can be used safely. 2230 */ 2231 mp = vp->v_mount; 2232 vfs_ref(mp); 2233 NFSVOPUNLOCK(vp, 0); 2234 nd->nd_repstat = vfs_busy(mp, 0); 2235 vfs_rel(mp); 2236 if (nd->nd_repstat != 0) { 2237 vrele(vp); 2238 free(cookies, M_TEMP); 2239 free(rbuf, M_TEMP); 2240 if (nd->nd_flag & ND_NFSV3) 2241 nfsrv_postopattr(nd, getret, &at); 2242 goto out; 2243 } 2244 2245 /* 2246 * Check to see if entries in this directory can be safely acquired 2247 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2248 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2249 * automount of the snapshot directory that is required will 2250 * be done. 2251 * This needs to be done here for NFSv4, since NFSv4 never does 2252 * a VFS_VGET() for "." or "..". 2253 */ 2254 if (is_zfs == 1) { 2255 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2256 if (r == EOPNOTSUPP) { 2257 usevget = 0; 2258 cn.cn_nameiop = LOOKUP; 2259 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2260 cn.cn_cred = nd->nd_cred; 2261 cn.cn_thread = p; 2262 } else if (r == 0) 2263 vput(nvp); 2264 } 2265 2266 /* 2267 * Save this position, in case there is an error before one entry 2268 * is created. 2269 */ 2270 mb0 = nd->nd_mb; 2271 bpos0 = nd->nd_bpos; 2272 2273 /* 2274 * Fill in the first part of the reply. 2275 * dirlen is the reply length in bytes and cannot exceed cnt. 2276 * (Include the two booleans at the end of the reply in dirlen now, 2277 * so we recognize when we have exceeded cnt.) 2278 */ 2279 if (nd->nd_flag & ND_NFSV3) { 2280 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2281 nfsrv_postopattr(nd, getret, &at); 2282 } else { 2283 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2284 } 2285 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2286 txdr_hyper(at.na_filerev, tl); 2287 2288 /* 2289 * Save this position, in case there is an empty reply needed. 2290 */ 2291 mb1 = nd->nd_mb; 2292 bpos1 = nd->nd_bpos; 2293 2294 /* Loop through the records and build reply */ 2295 entrycnt = 0; 2296 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2297 nlen = dp->d_namlen; 2298 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2299 nlen <= NFS_MAXNAMLEN && 2300 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2301 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2302 || (nlen == 1 && dp->d_name[0] != '.'))) { 2303 /* 2304 * Save the current position in the reply, in case 2305 * this entry exceeds cnt. 2306 */ 2307 mb1 = nd->nd_mb; 2308 bpos1 = nd->nd_bpos; 2309 2310 /* 2311 * For readdir_and_lookup get the vnode using 2312 * the file number. 2313 */ 2314 nvp = NULL; 2315 refp = NULL; 2316 r = 0; 2317 at_root = 0; 2318 needs_unbusy = 0; 2319 new_mp = mp; 2320 mounted_on_fileno = (uint64_t)dp->d_fileno; 2321 if ((nd->nd_flag & ND_NFSV3) || 2322 NFSNONZERO_ATTRBIT(&savbits)) { 2323 if (nd->nd_flag & ND_NFSV4) 2324 refp = nfsv4root_getreferral(NULL, 2325 vp, dp->d_fileno); 2326 if (refp == NULL) { 2327 if (usevget) 2328 r = VFS_VGET(mp, dp->d_fileno, 2329 LK_SHARED, &nvp); 2330 else 2331 r = EOPNOTSUPP; 2332 if (r == EOPNOTSUPP) { 2333 if (usevget) { 2334 usevget = 0; 2335 cn.cn_nameiop = LOOKUP; 2336 cn.cn_lkflags = 2337 LK_SHARED | 2338 LK_RETRY; 2339 cn.cn_cred = 2340 nd->nd_cred; 2341 cn.cn_thread = p; 2342 } 2343 cn.cn_nameptr = dp->d_name; 2344 cn.cn_namelen = nlen; 2345 cn.cn_flags = ISLASTCN | 2346 NOFOLLOW | LOCKLEAF; 2347 if (nlen == 2 && 2348 dp->d_name[0] == '.' && 2349 dp->d_name[1] == '.') 2350 cn.cn_flags |= 2351 ISDOTDOT; 2352 if (NFSVOPLOCK(vp, LK_SHARED) 2353 != 0) { 2354 nd->nd_repstat = EPERM; 2355 break; 2356 } 2357 if ((vp->v_vflag & VV_ROOT) != 0 2358 && (cn.cn_flags & ISDOTDOT) 2359 != 0) { 2360 vref(vp); 2361 nvp = vp; 2362 r = 0; 2363 } else { 2364 r = VOP_LOOKUP(vp, &nvp, 2365 &cn); 2366 if (vp != nvp) 2367 NFSVOPUNLOCK(vp, 2368 0); 2369 } 2370 } 2371 2372 /* 2373 * For NFSv4, check to see if nvp is 2374 * a mount point and get the mount 2375 * point vnode, as required. 2376 */ 2377 if (r == 0 && 2378 nfsrv_enable_crossmntpt != 0 && 2379 (nd->nd_flag & ND_NFSV4) != 0 && 2380 nvp->v_type == VDIR && 2381 nvp->v_mountedhere != NULL) { 2382 new_mp = nvp->v_mountedhere; 2383 r = vfs_busy(new_mp, 0); 2384 vput(nvp); 2385 nvp = NULL; 2386 if (r == 0) { 2387 r = VFS_ROOT(new_mp, 2388 LK_SHARED, &nvp); 2389 needs_unbusy = 1; 2390 if (r == 0) 2391 at_root = 1; 2392 } 2393 } 2394 } 2395 if (!r) { 2396 if (refp == NULL && 2397 ((nd->nd_flag & ND_NFSV3) || 2398 NFSNONZERO_ATTRBIT(&attrbits))) { 2399 r = nfsvno_getfh(nvp, &nfh, p); 2400 if (!r) 2401 r = nfsvno_getattr(nvp, nvap, nd, p, 2402 1, &attrbits); 2403 if (r == 0 && is_zfs == 1 && 2404 nfsrv_enable_crossmntpt != 0 && 2405 (nd->nd_flag & ND_NFSV4) != 0 && 2406 nvp->v_type == VDIR && 2407 vp->v_mount != nvp->v_mount) { 2408 /* 2409 * For a ZFS snapshot, there is a 2410 * pseudo mount that does not set 2411 * v_mountedhere, so it needs to 2412 * be detected via a different 2413 * mount structure. 2414 */ 2415 at_root = 1; 2416 if (new_mp == mp) 2417 new_mp = nvp->v_mount; 2418 } 2419 } 2420 } else { 2421 nvp = NULL; 2422 } 2423 if (r) { 2424 if (!NFSISSET_ATTRBIT(&attrbits, 2425 NFSATTRBIT_RDATTRERROR)) { 2426 if (nvp != NULL) 2427 vput(nvp); 2428 if (needs_unbusy != 0) 2429 vfs_unbusy(new_mp); 2430 nd->nd_repstat = r; 2431 break; 2432 } 2433 } 2434 } 2435 2436 /* 2437 * Build the directory record xdr 2438 */ 2439 if (nd->nd_flag & ND_NFSV3) { 2440 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2441 *tl++ = newnfs_true; 2442 *tl++ = 0; 2443 *tl = txdr_unsigned(dp->d_fileno); 2444 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2445 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2446 *tl++ = 0; 2447 *tl = txdr_unsigned(*cookiep); 2448 nfsrv_postopattr(nd, 0, nvap); 2449 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2450 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2451 if (nvp != NULL) 2452 vput(nvp); 2453 } else { 2454 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2455 *tl++ = newnfs_true; 2456 *tl++ = 0; 2457 *tl = txdr_unsigned(*cookiep); 2458 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2459 if (nvp != NULL) { 2460 supports_nfsv4acls = 2461 nfs_supportsnfsv4acls(nvp); 2462 NFSVOPUNLOCK(nvp, 0); 2463 } else 2464 supports_nfsv4acls = 0; 2465 if (refp != NULL) { 2466 dirlen += nfsrv_putreferralattr(nd, 2467 &savbits, refp, 0, 2468 &nd->nd_repstat); 2469 if (nd->nd_repstat) { 2470 if (nvp != NULL) 2471 vrele(nvp); 2472 if (needs_unbusy != 0) 2473 vfs_unbusy(new_mp); 2474 break; 2475 } 2476 } else if (r) { 2477 dirlen += nfsvno_fillattr(nd, new_mp, 2478 nvp, nvap, &nfh, r, &rderrbits, 2479 nd->nd_cred, p, isdgram, 0, 2480 supports_nfsv4acls, at_root, 2481 mounted_on_fileno); 2482 } else { 2483 dirlen += nfsvno_fillattr(nd, new_mp, 2484 nvp, nvap, &nfh, r, &attrbits, 2485 nd->nd_cred, p, isdgram, 0, 2486 supports_nfsv4acls, at_root, 2487 mounted_on_fileno); 2488 } 2489 if (nvp != NULL) 2490 vrele(nvp); 2491 dirlen += (3 * NFSX_UNSIGNED); 2492 } 2493 if (needs_unbusy != 0) 2494 vfs_unbusy(new_mp); 2495 if (dirlen <= cnt) 2496 entrycnt++; 2497 } 2498 cpos += dp->d_reclen; 2499 dp = (struct dirent *)cpos; 2500 cookiep++; 2501 ncookies--; 2502 } 2503 vrele(vp); 2504 vfs_unbusy(mp); 2505 2506 /* 2507 * If dirlen > cnt, we must strip off the last entry. If that 2508 * results in an empty reply, report NFSERR_TOOSMALL. 2509 */ 2510 if (dirlen > cnt || nd->nd_repstat) { 2511 if (!nd->nd_repstat && entrycnt == 0) 2512 nd->nd_repstat = NFSERR_TOOSMALL; 2513 if (nd->nd_repstat) { 2514 newnfs_trimtrailing(nd, mb0, bpos0); 2515 if (nd->nd_flag & ND_NFSV3) 2516 nfsrv_postopattr(nd, getret, &at); 2517 } else 2518 newnfs_trimtrailing(nd, mb1, bpos1); 2519 eofflag = 0; 2520 } else if (cpos < cend) 2521 eofflag = 0; 2522 if (!nd->nd_repstat) { 2523 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2524 *tl++ = newnfs_false; 2525 if (eofflag) 2526 *tl = newnfs_true; 2527 else 2528 *tl = newnfs_false; 2529 } 2530 free(cookies, M_TEMP); 2531 free(rbuf, M_TEMP); 2532 2533 out: 2534 NFSEXITCODE2(0, nd); 2535 return (0); 2536 nfsmout: 2537 vput(vp); 2538 NFSEXITCODE2(error, nd); 2539 return (error); 2540 } 2541 2542 /* 2543 * Get the settable attributes out of the mbuf list. 2544 * (Return 0 or EBADRPC) 2545 */ 2546 int 2547 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2548 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2549 { 2550 u_int32_t *tl; 2551 struct nfsv2_sattr *sp; 2552 int error = 0, toclient = 0; 2553 2554 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2555 case ND_NFSV2: 2556 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2557 /* 2558 * Some old clients didn't fill in the high order 16bits. 2559 * --> check the low order 2 bytes for 0xffff 2560 */ 2561 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2562 nvap->na_mode = nfstov_mode(sp->sa_mode); 2563 if (sp->sa_uid != newnfs_xdrneg1) 2564 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2565 if (sp->sa_gid != newnfs_xdrneg1) 2566 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2567 if (sp->sa_size != newnfs_xdrneg1) 2568 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2569 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2570 #ifdef notyet 2571 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2572 #else 2573 nvap->na_atime.tv_sec = 2574 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2575 nvap->na_atime.tv_nsec = 0; 2576 #endif 2577 } 2578 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2579 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2580 break; 2581 case ND_NFSV3: 2582 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2583 if (*tl == newnfs_true) { 2584 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2585 nvap->na_mode = nfstov_mode(*tl); 2586 } 2587 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2588 if (*tl == newnfs_true) { 2589 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2590 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2591 } 2592 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2593 if (*tl == newnfs_true) { 2594 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2595 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2596 } 2597 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2598 if (*tl == newnfs_true) { 2599 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2600 nvap->na_size = fxdr_hyper(tl); 2601 } 2602 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2603 switch (fxdr_unsigned(int, *tl)) { 2604 case NFSV3SATTRTIME_TOCLIENT: 2605 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2606 fxdr_nfsv3time(tl, &nvap->na_atime); 2607 toclient = 1; 2608 break; 2609 case NFSV3SATTRTIME_TOSERVER: 2610 vfs_timestamp(&nvap->na_atime); 2611 nvap->na_vaflags |= VA_UTIMES_NULL; 2612 break; 2613 } 2614 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2615 switch (fxdr_unsigned(int, *tl)) { 2616 case NFSV3SATTRTIME_TOCLIENT: 2617 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2618 fxdr_nfsv3time(tl, &nvap->na_mtime); 2619 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2620 break; 2621 case NFSV3SATTRTIME_TOSERVER: 2622 vfs_timestamp(&nvap->na_mtime); 2623 if (!toclient) 2624 nvap->na_vaflags |= VA_UTIMES_NULL; 2625 break; 2626 } 2627 break; 2628 case ND_NFSV4: 2629 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2630 } 2631 nfsmout: 2632 NFSEXITCODE2(error, nd); 2633 return (error); 2634 } 2635 2636 /* 2637 * Handle the setable attributes for V4. 2638 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2639 */ 2640 int 2641 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2642 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2643 { 2644 u_int32_t *tl; 2645 int attrsum = 0; 2646 int i, j; 2647 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2648 int toclient = 0; 2649 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2650 uid_t uid; 2651 gid_t gid; 2652 2653 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2654 if (error) 2655 goto nfsmout; 2656 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2657 attrsize = fxdr_unsigned(int, *tl); 2658 2659 /* 2660 * Loop around getting the setable attributes. If an unsupported 2661 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2662 */ 2663 if (retnotsup) { 2664 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2665 bitpos = NFSATTRBIT_MAX; 2666 } else { 2667 bitpos = 0; 2668 } 2669 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2670 if (attrsum > attrsize) { 2671 error = NFSERR_BADXDR; 2672 goto nfsmout; 2673 } 2674 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2675 switch (bitpos) { 2676 case NFSATTRBIT_SIZE: 2677 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2678 if (vp != NULL && vp->v_type != VREG) { 2679 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 2680 NFSERR_INVAL; 2681 goto nfsmout; 2682 } 2683 nvap->na_size = fxdr_hyper(tl); 2684 attrsum += NFSX_HYPER; 2685 break; 2686 case NFSATTRBIT_ACL: 2687 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, 2688 p); 2689 if (error) 2690 goto nfsmout; 2691 if (aceerr && !nd->nd_repstat) 2692 nd->nd_repstat = aceerr; 2693 attrsum += aclsize; 2694 break; 2695 case NFSATTRBIT_ARCHIVE: 2696 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2697 if (!nd->nd_repstat) 2698 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2699 attrsum += NFSX_UNSIGNED; 2700 break; 2701 case NFSATTRBIT_HIDDEN: 2702 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2703 if (!nd->nd_repstat) 2704 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2705 attrsum += NFSX_UNSIGNED; 2706 break; 2707 case NFSATTRBIT_MIMETYPE: 2708 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2709 i = fxdr_unsigned(int, *tl); 2710 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 2711 if (error) 2712 goto nfsmout; 2713 if (!nd->nd_repstat) 2714 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2715 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 2716 break; 2717 case NFSATTRBIT_MODE: 2718 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2719 nvap->na_mode = nfstov_mode(*tl); 2720 attrsum += NFSX_UNSIGNED; 2721 break; 2722 case NFSATTRBIT_OWNER: 2723 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2724 j = fxdr_unsigned(int, *tl); 2725 if (j < 0) { 2726 error = NFSERR_BADXDR; 2727 goto nfsmout; 2728 } 2729 if (j > NFSV4_SMALLSTR) 2730 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2731 else 2732 cp = namestr; 2733 error = nfsrv_mtostr(nd, cp, j); 2734 if (error) { 2735 if (j > NFSV4_SMALLSTR) 2736 free(cp, M_NFSSTRING); 2737 goto nfsmout; 2738 } 2739 if (!nd->nd_repstat) { 2740 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, 2741 p); 2742 if (!nd->nd_repstat) 2743 nvap->na_uid = uid; 2744 } 2745 if (j > NFSV4_SMALLSTR) 2746 free(cp, M_NFSSTRING); 2747 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2748 break; 2749 case NFSATTRBIT_OWNERGROUP: 2750 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2751 j = fxdr_unsigned(int, *tl); 2752 if (j < 0) { 2753 error = NFSERR_BADXDR; 2754 goto nfsmout; 2755 } 2756 if (j > NFSV4_SMALLSTR) 2757 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2758 else 2759 cp = namestr; 2760 error = nfsrv_mtostr(nd, cp, j); 2761 if (error) { 2762 if (j > NFSV4_SMALLSTR) 2763 free(cp, M_NFSSTRING); 2764 goto nfsmout; 2765 } 2766 if (!nd->nd_repstat) { 2767 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, 2768 p); 2769 if (!nd->nd_repstat) 2770 nvap->na_gid = gid; 2771 } 2772 if (j > NFSV4_SMALLSTR) 2773 free(cp, M_NFSSTRING); 2774 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2775 break; 2776 case NFSATTRBIT_SYSTEM: 2777 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2778 if (!nd->nd_repstat) 2779 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2780 attrsum += NFSX_UNSIGNED; 2781 break; 2782 case NFSATTRBIT_TIMEACCESSSET: 2783 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2784 attrsum += NFSX_UNSIGNED; 2785 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2786 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2787 fxdr_nfsv4time(tl, &nvap->na_atime); 2788 toclient = 1; 2789 attrsum += NFSX_V4TIME; 2790 } else { 2791 vfs_timestamp(&nvap->na_atime); 2792 nvap->na_vaflags |= VA_UTIMES_NULL; 2793 } 2794 break; 2795 case NFSATTRBIT_TIMEBACKUP: 2796 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2797 if (!nd->nd_repstat) 2798 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2799 attrsum += NFSX_V4TIME; 2800 break; 2801 case NFSATTRBIT_TIMECREATE: 2802 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2803 if (!nd->nd_repstat) 2804 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2805 attrsum += NFSX_V4TIME; 2806 break; 2807 case NFSATTRBIT_TIMEMODIFYSET: 2808 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2809 attrsum += NFSX_UNSIGNED; 2810 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2811 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2812 fxdr_nfsv4time(tl, &nvap->na_mtime); 2813 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2814 attrsum += NFSX_V4TIME; 2815 } else { 2816 vfs_timestamp(&nvap->na_mtime); 2817 if (!toclient) 2818 nvap->na_vaflags |= VA_UTIMES_NULL; 2819 } 2820 break; 2821 default: 2822 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2823 /* 2824 * set bitpos so we drop out of the loop. 2825 */ 2826 bitpos = NFSATTRBIT_MAX; 2827 break; 2828 } 2829 } 2830 2831 /* 2832 * some clients pad the attrlist, so we need to skip over the 2833 * padding. 2834 */ 2835 if (attrsum > attrsize) { 2836 error = NFSERR_BADXDR; 2837 } else { 2838 attrsize = NFSM_RNDUP(attrsize); 2839 if (attrsum < attrsize) 2840 error = nfsm_advance(nd, attrsize - attrsum, -1); 2841 } 2842 nfsmout: 2843 NFSEXITCODE2(error, nd); 2844 return (error); 2845 } 2846 2847 /* 2848 * Check/setup export credentials. 2849 */ 2850 int 2851 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 2852 struct ucred *credanon) 2853 { 2854 int error = 0; 2855 2856 /* 2857 * Check/setup credentials. 2858 */ 2859 if (nd->nd_flag & ND_GSS) 2860 exp->nes_exflag &= ~MNT_EXPORTANON; 2861 2862 /* 2863 * Check to see if the operation is allowed for this security flavor. 2864 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to 2865 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 2866 * Also, allow Secinfo, so that it can acquire the correct flavor(s). 2867 */ 2868 if (nfsvno_testexp(nd, exp) && 2869 nd->nd_procnum != NFSV4OP_SECINFO && 2870 nd->nd_procnum != NFSPROC_FSINFO) { 2871 if (nd->nd_flag & ND_NFSV4) 2872 error = NFSERR_WRONGSEC; 2873 else 2874 error = (NFSERR_AUTHERR | AUTH_TOOWEAK); 2875 goto out; 2876 } 2877 2878 /* 2879 * Check to see if the file system is exported V4 only. 2880 */ 2881 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 2882 error = NFSERR_PROGNOTV4; 2883 goto out; 2884 } 2885 2886 /* 2887 * Now, map the user credentials. 2888 * (Note that ND_AUTHNONE will only be set for an NFSv3 2889 * Fsinfo RPC. If set for anything else, this code might need 2890 * to change.) 2891 */ 2892 if (NFSVNO_EXPORTED(exp)) { 2893 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 2894 NFSVNO_EXPORTANON(exp) || 2895 (nd->nd_flag & ND_AUTHNONE) != 0) { 2896 nd->nd_cred->cr_uid = credanon->cr_uid; 2897 nd->nd_cred->cr_gid = credanon->cr_gid; 2898 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 2899 credanon->cr_groups); 2900 } else if ((nd->nd_flag & ND_GSS) == 0) { 2901 /* 2902 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 2903 * if there is a replacement credential with a group 2904 * list set up by "nfsuserd -manage-gids". 2905 * If there is no replacement, nfsrv_getgrpscred() 2906 * simply returns its argument. 2907 */ 2908 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 2909 } 2910 } 2911 2912 out: 2913 NFSEXITCODE2(error, nd); 2914 return (error); 2915 } 2916 2917 /* 2918 * Check exports. 2919 */ 2920 int 2921 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 2922 struct ucred **credp) 2923 { 2924 int i, error, *secflavors; 2925 2926 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2927 &exp->nes_numsecflavor, &secflavors); 2928 if (error) { 2929 if (nfs_rootfhset) { 2930 exp->nes_exflag = 0; 2931 exp->nes_numsecflavor = 0; 2932 error = 0; 2933 } 2934 } else { 2935 /* Copy the security flavors. */ 2936 for (i = 0; i < exp->nes_numsecflavor; i++) 2937 exp->nes_secflavors[i] = secflavors[i]; 2938 } 2939 NFSEXITCODE(error); 2940 return (error); 2941 } 2942 2943 /* 2944 * Get a vnode for a file handle and export stuff. 2945 */ 2946 int 2947 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 2948 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 2949 struct ucred **credp) 2950 { 2951 int i, error, *secflavors; 2952 2953 *credp = NULL; 2954 exp->nes_numsecflavor = 0; 2955 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 2956 if (error != 0) 2957 /* Make sure the server replies ESTALE to the client. */ 2958 error = ESTALE; 2959 if (nam && !error) { 2960 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2961 &exp->nes_numsecflavor, &secflavors); 2962 if (error) { 2963 if (nfs_rootfhset) { 2964 exp->nes_exflag = 0; 2965 exp->nes_numsecflavor = 0; 2966 error = 0; 2967 } else { 2968 vput(*vpp); 2969 } 2970 } else { 2971 /* Copy the security flavors. */ 2972 for (i = 0; i < exp->nes_numsecflavor; i++) 2973 exp->nes_secflavors[i] = secflavors[i]; 2974 } 2975 } 2976 NFSEXITCODE(error); 2977 return (error); 2978 } 2979 2980 /* 2981 * nfsd_fhtovp() - convert a fh to a vnode ptr 2982 * - look up fsid in mount list (if not found ret error) 2983 * - get vp and export rights by calling nfsvno_fhtovp() 2984 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 2985 * for AUTH_SYS 2986 * - if mpp != NULL, return the mount point so that it can 2987 * be used for vn_finished_write() by the caller 2988 */ 2989 void 2990 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 2991 struct vnode **vpp, struct nfsexstuff *exp, 2992 struct mount **mpp, int startwrite, struct thread *p) 2993 { 2994 struct mount *mp; 2995 struct ucred *credanon; 2996 fhandle_t *fhp; 2997 2998 fhp = (fhandle_t *)nfp->nfsrvfh_data; 2999 /* 3000 * Check for the special case of the nfsv4root_fh. 3001 */ 3002 mp = vfs_busyfs(&fhp->fh_fsid); 3003 if (mpp != NULL) 3004 *mpp = mp; 3005 if (mp == NULL) { 3006 *vpp = NULL; 3007 nd->nd_repstat = ESTALE; 3008 goto out; 3009 } 3010 3011 if (startwrite) { 3012 vn_start_write(NULL, mpp, V_WAIT); 3013 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3014 lktype = LK_EXCLUSIVE; 3015 } 3016 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3017 &credanon); 3018 vfs_unbusy(mp); 3019 3020 /* 3021 * For NFSv4 without a pseudo root fs, unexported file handles 3022 * can be returned, so that Lookup works everywhere. 3023 */ 3024 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3025 !(nd->nd_flag & ND_NFSV4)) { 3026 vput(*vpp); 3027 nd->nd_repstat = EACCES; 3028 } 3029 3030 /* 3031 * Personally, I've never seen any point in requiring a 3032 * reserved port#, since only in the rare case where the 3033 * clients are all boxes with secure system privileges, 3034 * does it provide any enhanced security, but... some people 3035 * believe it to be useful and keep putting this code back in. 3036 * (There is also some "security checker" out there that 3037 * complains if the nfs server doesn't enforce this.) 3038 * However, note the following: 3039 * RFC3530 (NFSv4) specifies that a reserved port# not be 3040 * required. 3041 * RFC2623 recommends that, if a reserved port# is checked for, 3042 * that there be a way to turn that off--> ifdef'd. 3043 */ 3044 #ifdef NFS_REQRSVPORT 3045 if (!nd->nd_repstat) { 3046 struct sockaddr_in *saddr; 3047 struct sockaddr_in6 *saddr6; 3048 3049 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3050 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3051 if (!(nd->nd_flag & ND_NFSV4) && 3052 ((saddr->sin_family == AF_INET && 3053 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3054 (saddr6->sin6_family == AF_INET6 && 3055 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3056 vput(*vpp); 3057 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3058 } 3059 } 3060 #endif /* NFS_REQRSVPORT */ 3061 3062 /* 3063 * Check/setup credentials. 3064 */ 3065 if (!nd->nd_repstat) { 3066 nd->nd_saveduid = nd->nd_cred->cr_uid; 3067 nd->nd_repstat = nfsd_excred(nd, exp, credanon); 3068 if (nd->nd_repstat) 3069 vput(*vpp); 3070 } 3071 if (credanon != NULL) 3072 crfree(credanon); 3073 if (nd->nd_repstat) { 3074 if (startwrite) 3075 vn_finished_write(mp); 3076 *vpp = NULL; 3077 if (mpp != NULL) 3078 *mpp = NULL; 3079 } 3080 3081 out: 3082 NFSEXITCODE2(0, nd); 3083 } 3084 3085 /* 3086 * glue for fp. 3087 */ 3088 static int 3089 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3090 { 3091 struct filedesc *fdp; 3092 struct file *fp; 3093 int error = 0; 3094 3095 fdp = p->td_proc->p_fd; 3096 if (fd < 0 || fd >= fdp->fd_nfiles || 3097 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3098 error = EBADF; 3099 goto out; 3100 } 3101 *fpp = fp; 3102 3103 out: 3104 NFSEXITCODE(error); 3105 return (error); 3106 } 3107 3108 /* 3109 * Called from nfssvc() to update the exports list. Just call 3110 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3111 * in the mount list. 3112 */ 3113 int 3114 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3115 { 3116 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3117 int error = 0; 3118 struct nameidata nd; 3119 fhandle_t fh; 3120 3121 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 3122 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3123 nfs_rootfhset = 0; 3124 else if (error == 0) { 3125 if (nfsexargp->fspec == NULL) { 3126 error = EPERM; 3127 goto out; 3128 } 3129 /* 3130 * If fspec != NULL, this is the v4root path. 3131 */ 3132 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 3133 nfsexargp->fspec, p); 3134 if ((error = namei(&nd)) != 0) 3135 goto out; 3136 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3137 vrele(nd.ni_vp); 3138 if (!error) { 3139 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 3140 NFSBCOPY((caddr_t)&fh, 3141 nfs_rootfh.nfsrvfh_data, 3142 sizeof (fhandle_t)); 3143 nfs_rootfhset = 1; 3144 } 3145 } 3146 3147 out: 3148 NFSEXITCODE(error); 3149 return (error); 3150 } 3151 3152 /* 3153 * This function needs to test to see if the system is near its limit 3154 * for memory allocation via malloc() or mget() and return True iff 3155 * either of these resources are near their limit. 3156 * XXX (For now, this is just a stub.) 3157 */ 3158 int nfsrv_testmalloclimit = 0; 3159 int 3160 nfsrv_mallocmget_limit(void) 3161 { 3162 static int printmesg = 0; 3163 static int testval = 1; 3164 3165 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3166 if ((printmesg++ % 100) == 0) 3167 printf("nfsd: malloc/mget near limit\n"); 3168 return (1); 3169 } 3170 return (0); 3171 } 3172 3173 /* 3174 * BSD specific initialization of a mount point. 3175 */ 3176 void 3177 nfsd_mntinit(void) 3178 { 3179 static int inited = 0; 3180 3181 if (inited) 3182 return; 3183 inited = 1; 3184 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3185 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 3186 TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); 3187 nfsv4root_mnt.mnt_export = NULL; 3188 TAILQ_INIT(&nfsv4root_opt); 3189 TAILQ_INIT(&nfsv4root_newopt); 3190 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 3191 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 3192 nfsv4root_mnt.mnt_nvnodelistsize = 0; 3193 nfsv4root_mnt.mnt_activevnodelistsize = 0; 3194 } 3195 3196 /* 3197 * Get a vnode for a file handle, without checking exports, etc. 3198 */ 3199 struct vnode * 3200 nfsvno_getvp(fhandle_t *fhp) 3201 { 3202 struct mount *mp; 3203 struct vnode *vp; 3204 int error; 3205 3206 mp = vfs_busyfs(&fhp->fh_fsid); 3207 if (mp == NULL) 3208 return (NULL); 3209 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3210 vfs_unbusy(mp); 3211 if (error) 3212 return (NULL); 3213 return (vp); 3214 } 3215 3216 /* 3217 * Do a local VOP_ADVLOCK(). 3218 */ 3219 int 3220 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3221 u_int64_t end, struct thread *td) 3222 { 3223 int error = 0; 3224 struct flock fl; 3225 u_int64_t tlen; 3226 3227 if (nfsrv_dolocallocks == 0) 3228 goto out; 3229 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3230 3231 fl.l_whence = SEEK_SET; 3232 fl.l_type = ftype; 3233 fl.l_start = (off_t)first; 3234 if (end == NFS64BITSSET) { 3235 fl.l_len = 0; 3236 } else { 3237 tlen = end - first; 3238 fl.l_len = (off_t)tlen; 3239 } 3240 /* 3241 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3242 * values for all calls, so that all locks will be held by the 3243 * nfsd server. (The nfsd server handles conflicts between the 3244 * various clients.) 3245 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3246 * bytes, so it can't be put in l_sysid. 3247 */ 3248 if (nfsv4_sysid == 0) 3249 nfsv4_sysid = nlm_acquire_next_sysid(); 3250 fl.l_pid = (pid_t)0; 3251 fl.l_sysid = (int)nfsv4_sysid; 3252 3253 if (ftype == F_UNLCK) 3254 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3255 (F_POSIX | F_REMOTE)); 3256 else 3257 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3258 (F_POSIX | F_REMOTE)); 3259 3260 out: 3261 NFSEXITCODE(error); 3262 return (error); 3263 } 3264 3265 /* 3266 * Check the nfsv4 root exports. 3267 */ 3268 int 3269 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3270 { 3271 struct ucred *credanon; 3272 int exflags, error = 0, numsecflavor, *secflavors, i; 3273 3274 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3275 &credanon, &numsecflavor, &secflavors); 3276 if (error) { 3277 error = NFSERR_PROGUNAVAIL; 3278 goto out; 3279 } 3280 if (credanon != NULL) 3281 crfree(credanon); 3282 for (i = 0; i < numsecflavor; i++) { 3283 if (secflavors[i] == AUTH_SYS) 3284 nd->nd_flag |= ND_EXAUTHSYS; 3285 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3286 nd->nd_flag |= ND_EXGSS; 3287 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3288 nd->nd_flag |= ND_EXGSSINTEGRITY; 3289 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3290 nd->nd_flag |= ND_EXGSSPRIVACY; 3291 } 3292 3293 out: 3294 NFSEXITCODE(error); 3295 return (error); 3296 } 3297 3298 /* 3299 * Nfs server pseudo system call for the nfsd's 3300 */ 3301 /* 3302 * MPSAFE 3303 */ 3304 static int 3305 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3306 { 3307 struct file *fp; 3308 struct nfsd_addsock_args sockarg; 3309 struct nfsd_nfsd_args nfsdarg; 3310 struct nfsd_nfsd_oargs onfsdarg; 3311 struct nfsd_pnfsd_args pnfsdarg; 3312 struct vnode *vp, *nvp, *curdvp; 3313 struct pnfsdsfile *pf; 3314 struct nfsdevice *ds, *fds; 3315 cap_rights_t rights; 3316 int buflen, error, ret; 3317 char *buf, *cp, *cp2, *cp3; 3318 char fname[PNFS_FILENAME_LEN + 1]; 3319 3320 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3321 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3322 if (error) 3323 goto out; 3324 /* 3325 * Since we don't know what rights might be required, 3326 * pretend that we need them all. It is better to be too 3327 * careful than too reckless. 3328 */ 3329 error = fget(td, sockarg.sock, 3330 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 3331 if (error != 0) 3332 goto out; 3333 if (fp->f_type != DTYPE_SOCKET) { 3334 fdrop(fp, td); 3335 error = EPERM; 3336 goto out; 3337 } 3338 error = nfsrvd_addsock(fp); 3339 fdrop(fp, td); 3340 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3341 if (uap->argp == NULL) { 3342 error = EINVAL; 3343 goto out; 3344 } 3345 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3346 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3347 if (error == 0) { 3348 nfsdarg.principal = onfsdarg.principal; 3349 nfsdarg.minthreads = onfsdarg.minthreads; 3350 nfsdarg.maxthreads = onfsdarg.maxthreads; 3351 nfsdarg.version = 1; 3352 nfsdarg.addr = NULL; 3353 nfsdarg.addrlen = 0; 3354 nfsdarg.dnshost = NULL; 3355 nfsdarg.dnshostlen = 0; 3356 nfsdarg.dspath = NULL; 3357 nfsdarg.dspathlen = 0; 3358 nfsdarg.mdspath = NULL; 3359 nfsdarg.mdspathlen = 0; 3360 nfsdarg.mirrorcnt = 1; 3361 } 3362 } else 3363 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3364 if (error) 3365 goto out; 3366 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3367 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3368 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3369 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 3370 nfsdarg.mirrorcnt >= 1 && 3371 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3372 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3373 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 3374 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3375 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 3376 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3377 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 3378 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3379 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3380 if (error != 0) { 3381 free(cp, M_TEMP); 3382 goto out; 3383 } 3384 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3385 nfsdarg.addr = cp; 3386 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3387 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3388 if (error != 0) { 3389 free(nfsdarg.addr, M_TEMP); 3390 free(cp, M_TEMP); 3391 goto out; 3392 } 3393 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3394 nfsdarg.dnshost = cp; 3395 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3396 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3397 if (error != 0) { 3398 free(nfsdarg.addr, M_TEMP); 3399 free(nfsdarg.dnshost, M_TEMP); 3400 free(cp, M_TEMP); 3401 goto out; 3402 } 3403 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3404 nfsdarg.dspath = cp; 3405 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 3406 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 3407 if (error != 0) { 3408 free(nfsdarg.addr, M_TEMP); 3409 free(nfsdarg.dnshost, M_TEMP); 3410 free(nfsdarg.dspath, M_TEMP); 3411 free(cp, M_TEMP); 3412 goto out; 3413 } 3414 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 3415 nfsdarg.mdspath = cp; 3416 } else { 3417 nfsdarg.addr = NULL; 3418 nfsdarg.addrlen = 0; 3419 nfsdarg.dnshost = NULL; 3420 nfsdarg.dnshostlen = 0; 3421 nfsdarg.dspath = NULL; 3422 nfsdarg.dspathlen = 0; 3423 nfsdarg.mdspath = NULL; 3424 nfsdarg.mdspathlen = 0; 3425 nfsdarg.mirrorcnt = 1; 3426 } 3427 error = nfsrvd_nfsd(td, &nfsdarg); 3428 free(nfsdarg.addr, M_TEMP); 3429 free(nfsdarg.dnshost, M_TEMP); 3430 free(nfsdarg.dspath, M_TEMP); 3431 free(nfsdarg.mdspath, M_TEMP); 3432 } else if (uap->flag & NFSSVC_PNFSDS) { 3433 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3434 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 3435 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 3436 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3437 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3438 NULL); 3439 if (error == 0) 3440 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 3441 free(cp, M_TEMP); 3442 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3443 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3444 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3445 buf = malloc(buflen, M_TEMP, M_WAITOK); 3446 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3447 NULL); 3448 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3449 if (error == 0 && pnfsdarg.dspath != NULL) { 3450 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3451 error = copyinstr(pnfsdarg.dspath, cp2, 3452 PATH_MAX + 1, NULL); 3453 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3454 error); 3455 } else 3456 cp2 = NULL; 3457 if (error == 0 && pnfsdarg.curdspath != NULL) { 3458 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3459 error = copyinstr(pnfsdarg.curdspath, cp3, 3460 PATH_MAX + 1, NULL); 3461 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3462 error); 3463 } else 3464 cp3 = NULL; 3465 curdvp = NULL; 3466 fds = NULL; 3467 if (error == 0) 3468 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3469 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3470 &fds); 3471 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3472 if (error == 0) { 3473 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3474 printf("copymr: dsdir out of range\n"); 3475 pf->dsf_dir = 0; 3476 } 3477 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3478 error = nfsrv_copymr(vp, nvp, 3479 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3480 (struct pnfsdsfile *)buf, 3481 buflen / sizeof(*pf), td->td_ucred, td); 3482 vput(vp); 3483 vput(nvp); 3484 if (fds != NULL && error == 0) { 3485 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3486 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3487 if (ret == 0) { 3488 nfsrv_dsremove(curdvp, fname, 3489 td->td_ucred, td); 3490 NFSVOPUNLOCK(curdvp, 0); 3491 } 3492 } 3493 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3494 } 3495 free(cp, M_TEMP); 3496 free(cp2, M_TEMP); 3497 free(cp3, M_TEMP); 3498 free(buf, M_TEMP); 3499 } 3500 } else { 3501 error = nfssvc_srvcall(td, uap, td->td_ucred); 3502 } 3503 3504 out: 3505 NFSEXITCODE(error); 3506 return (error); 3507 } 3508 3509 static int 3510 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3511 { 3512 struct nfsex_args export; 3513 struct file *fp = NULL; 3514 int stablefd, len; 3515 struct nfsd_clid adminrevoke; 3516 struct nfsd_dumplist dumplist; 3517 struct nfsd_dumpclients *dumpclients; 3518 struct nfsd_dumplocklist dumplocklist; 3519 struct nfsd_dumplocks *dumplocks; 3520 struct nameidata nd; 3521 vnode_t vp; 3522 int error = EINVAL, igotlock; 3523 struct proc *procp; 3524 static int suspend_nfsd = 0; 3525 3526 if (uap->flag & NFSSVC_PUBLICFH) { 3527 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3528 sizeof (fhandle_t)); 3529 error = copyin(uap->argp, 3530 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3531 if (!error) 3532 nfs_pubfhset = 1; 3533 } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { 3534 error = copyin(uap->argp,(caddr_t)&export, 3535 sizeof (struct nfsex_args)); 3536 if (!error) 3537 error = nfsrv_v4rootexport(&export, cred, p); 3538 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3539 nfs_pubfhset = 0; 3540 error = 0; 3541 } else if (uap->flag & NFSSVC_STABLERESTART) { 3542 error = copyin(uap->argp, (caddr_t)&stablefd, 3543 sizeof (int)); 3544 if (!error) 3545 error = fp_getfvp(p, stablefd, &fp, &vp); 3546 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3547 error = EBADF; 3548 if (!error && newnfs_numnfsd != 0) 3549 error = EPERM; 3550 if (!error) { 3551 nfsrv_stablefirst.nsf_fp = fp; 3552 nfsrv_setupstable(p); 3553 } 3554 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3555 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3556 sizeof (struct nfsd_clid)); 3557 if (!error) 3558 error = nfsrv_adminrevoke(&adminrevoke, p); 3559 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3560 error = copyin(uap->argp, (caddr_t)&dumplist, 3561 sizeof (struct nfsd_dumplist)); 3562 if (!error && (dumplist.ndl_size < 1 || 3563 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3564 error = EPERM; 3565 if (!error) { 3566 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3567 dumpclients = (struct nfsd_dumpclients *)malloc(len, 3568 M_TEMP, M_WAITOK); 3569 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3570 error = copyout(dumpclients, 3571 CAST_USER_ADDR_T(dumplist.ndl_list), len); 3572 free(dumpclients, M_TEMP); 3573 } 3574 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3575 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3576 sizeof (struct nfsd_dumplocklist)); 3577 if (!error && (dumplocklist.ndllck_size < 1 || 3578 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3579 error = EPERM; 3580 if (!error) 3581 error = nfsrv_lookupfilename(&nd, 3582 dumplocklist.ndllck_fname, p); 3583 if (!error) { 3584 len = sizeof (struct nfsd_dumplocks) * 3585 dumplocklist.ndllck_size; 3586 dumplocks = (struct nfsd_dumplocks *)malloc(len, 3587 M_TEMP, M_WAITOK); 3588 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3589 dumplocklist.ndllck_size, p); 3590 vput(nd.ni_vp); 3591 error = copyout(dumplocks, 3592 CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); 3593 free(dumplocks, M_TEMP); 3594 } 3595 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 3596 procp = p->td_proc; 3597 PROC_LOCK(procp); 3598 nfsd_master_pid = procp->p_pid; 3599 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 3600 nfsd_master_start = procp->p_stats->p_start; 3601 nfsd_master_proc = procp; 3602 PROC_UNLOCK(procp); 3603 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 3604 NFSLOCKV4ROOTMUTEX(); 3605 if (suspend_nfsd == 0) { 3606 /* Lock out all nfsd threads */ 3607 do { 3608 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 3609 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 3610 } while (igotlock == 0 && suspend_nfsd == 0); 3611 suspend_nfsd = 1; 3612 } 3613 NFSUNLOCKV4ROOTMUTEX(); 3614 error = 0; 3615 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 3616 NFSLOCKV4ROOTMUTEX(); 3617 if (suspend_nfsd != 0) { 3618 nfsv4_unlock(&nfsd_suspend_lock, 0); 3619 suspend_nfsd = 0; 3620 } 3621 NFSUNLOCKV4ROOTMUTEX(); 3622 error = 0; 3623 } 3624 3625 NFSEXITCODE(error); 3626 return (error); 3627 } 3628 3629 /* 3630 * Check exports. 3631 * Returns 0 if ok, 1 otherwise. 3632 */ 3633 int 3634 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 3635 { 3636 int i; 3637 3638 /* 3639 * This seems odd, but allow the case where the security flavor 3640 * list is empty. This happens when NFSv4 is traversing non-exported 3641 * file systems. Exported file systems should always have a non-empty 3642 * security flavor list. 3643 */ 3644 if (exp->nes_numsecflavor == 0) 3645 return (0); 3646 3647 for (i = 0; i < exp->nes_numsecflavor; i++) { 3648 /* 3649 * The tests for privacy and integrity must be first, 3650 * since ND_GSS is set for everything but AUTH_SYS. 3651 */ 3652 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 3653 (nd->nd_flag & ND_GSSPRIVACY)) 3654 return (0); 3655 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 3656 (nd->nd_flag & ND_GSSINTEGRITY)) 3657 return (0); 3658 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 3659 (nd->nd_flag & ND_GSS)) 3660 return (0); 3661 if (exp->nes_secflavors[i] == AUTH_SYS && 3662 (nd->nd_flag & ND_GSS) == 0) 3663 return (0); 3664 } 3665 return (1); 3666 } 3667 3668 /* 3669 * Calculate a hash value for the fid in a file handle. 3670 */ 3671 uint32_t 3672 nfsrv_hashfh(fhandle_t *fhp) 3673 { 3674 uint32_t hashval; 3675 3676 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 3677 return (hashval); 3678 } 3679 3680 /* 3681 * Calculate a hash value for the sessionid. 3682 */ 3683 uint32_t 3684 nfsrv_hashsessionid(uint8_t *sessionid) 3685 { 3686 uint32_t hashval; 3687 3688 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 3689 return (hashval); 3690 } 3691 3692 /* 3693 * Signal the userland master nfsd to backup the stable restart file. 3694 */ 3695 void 3696 nfsrv_backupstable(void) 3697 { 3698 struct proc *procp; 3699 3700 if (nfsd_master_proc != NULL) { 3701 procp = pfind(nfsd_master_pid); 3702 /* Try to make sure it is the correct process. */ 3703 if (procp == nfsd_master_proc && 3704 procp->p_stats->p_start.tv_sec == 3705 nfsd_master_start.tv_sec && 3706 procp->p_stats->p_start.tv_usec == 3707 nfsd_master_start.tv_usec && 3708 strcmp(procp->p_comm, nfsd_master_comm) == 0) 3709 kern_psignal(procp, SIGUSR2); 3710 else 3711 nfsd_master_proc = NULL; 3712 3713 if (procp != NULL) 3714 PROC_UNLOCK(procp); 3715 } 3716 } 3717 3718 /* 3719 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 3720 * The arguments are in a structure, so that they can be passed through 3721 * taskqueue for a kernel process to execute this function. 3722 */ 3723 struct nfsrvdscreate { 3724 int done; 3725 int inprog; 3726 struct task tsk; 3727 struct ucred *tcred; 3728 struct vnode *dvp; 3729 NFSPROC_T *p; 3730 struct pnfsdsfile *pf; 3731 int err; 3732 fhandle_t fh; 3733 struct vattr va; 3734 struct vattr createva; 3735 }; 3736 3737 int 3738 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 3739 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 3740 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 3741 { 3742 struct vnode *nvp; 3743 struct nameidata named; 3744 struct vattr va; 3745 char *bufp; 3746 u_long *hashp; 3747 struct nfsnode *np; 3748 struct nfsmount *nmp; 3749 int error; 3750 3751 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 3752 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); 3753 nfsvno_setpathbuf(&named, &bufp, &hashp); 3754 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 3755 named.ni_cnd.cn_thread = p; 3756 named.ni_cnd.cn_nameptr = bufp; 3757 if (fnamep != NULL) { 3758 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 3759 named.ni_cnd.cn_namelen = strlen(bufp); 3760 } else 3761 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 3762 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 3763 3764 /* Create the date file in the DS mount. */ 3765 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 3766 if (error == 0) { 3767 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 3768 NFSVOPUNLOCK(dvp, 0); 3769 if (error == 0) { 3770 /* Set the ownership of the file. */ 3771 error = VOP_SETATTR(nvp, nvap, tcred); 3772 NFSD_DEBUG(4, "nfsrv_dscreate:" 3773 " setattr-uid=%d\n", error); 3774 if (error != 0) 3775 vput(nvp); 3776 } 3777 if (error != 0) 3778 printf("pNFS: pnfscreate failed=%d\n", error); 3779 } else 3780 printf("pNFS: pnfscreate vnlock=%d\n", error); 3781 if (error == 0) { 3782 np = VTONFS(nvp); 3783 nmp = VFSTONFS(nvp->v_mount); 3784 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 3785 != 0 || nmp->nm_nam->sa_len > sizeof( 3786 struct sockaddr_in6) || 3787 np->n_fhp->nfh_len != NFSX_MYFH) { 3788 printf("Bad DS file: fstype=%s salen=%d" 3789 " fhlen=%d\n", 3790 nvp->v_mount->mnt_vfc->vfc_name, 3791 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 3792 error = ENOENT; 3793 } 3794 3795 /* Set extattrs for the DS on the MDS file. */ 3796 if (error == 0) { 3797 if (dsa != NULL) { 3798 error = VOP_GETATTR(nvp, &va, tcred); 3799 if (error == 0) { 3800 dsa->dsa_filerev = va.va_filerev; 3801 dsa->dsa_size = va.va_size; 3802 dsa->dsa_atime = va.va_atime; 3803 dsa->dsa_mtime = va.va_mtime; 3804 } 3805 } 3806 if (error == 0) { 3807 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 3808 NFSX_MYFH); 3809 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 3810 nmp->nm_nam->sa_len); 3811 NFSBCOPY(named.ni_cnd.cn_nameptr, 3812 pf->dsf_filename, 3813 sizeof(pf->dsf_filename)); 3814 } 3815 } else 3816 printf("pNFS: pnfscreate can't get DS" 3817 " attr=%d\n", error); 3818 if (nvpp != NULL && error == 0) 3819 *nvpp = nvp; 3820 else 3821 vput(nvp); 3822 } 3823 nfsvno_relpathbuf(&named); 3824 return (error); 3825 } 3826 3827 /* 3828 * Start up the thread that will execute nfsrv_dscreate(). 3829 */ 3830 static void 3831 start_dscreate(void *arg, int pending) 3832 { 3833 struct nfsrvdscreate *dsc; 3834 3835 dsc = (struct nfsrvdscreate *)arg; 3836 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 3837 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 3838 dsc->done = 1; 3839 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 3840 } 3841 3842 /* 3843 * Create a pNFS data file on the Data Server(s). 3844 */ 3845 static void 3846 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 3847 NFSPROC_T *p) 3848 { 3849 struct nfsrvdscreate *dsc, *tdsc; 3850 struct nfsdevice *ds, *tds, *fds; 3851 struct mount *mp; 3852 struct pnfsdsfile *pf, *tpf; 3853 struct pnfsdsattr dsattr; 3854 struct vattr va; 3855 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 3856 struct nfsmount *nmp; 3857 fhandle_t fh; 3858 uid_t vauid; 3859 gid_t vagid; 3860 u_short vamode; 3861 struct ucred *tcred; 3862 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 3863 int failpos, timo; 3864 3865 /* Get a DS server directory in a round-robin order. */ 3866 mirrorcnt = 1; 3867 mp = vp->v_mount; 3868 ds = fds = NULL; 3869 NFSDDSLOCK(); 3870 /* 3871 * Search for the first entry that handles this MDS fs, but use the 3872 * first entry for all MDS fs's otherwise. 3873 */ 3874 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 3875 if (tds->nfsdev_nmp != NULL) { 3876 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 3877 ds = tds; 3878 else if (tds->nfsdev_mdsisset != 0 && 3879 mp->mnt_stat.f_fsid.val[0] == 3880 tds->nfsdev_mdsfsid.val[0] && 3881 mp->mnt_stat.f_fsid.val[1] == 3882 tds->nfsdev_mdsfsid.val[1]) { 3883 ds = fds = tds; 3884 break; 3885 } 3886 } 3887 } 3888 if (ds == NULL) { 3889 NFSDDSUNLOCK(); 3890 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 3891 return; 3892 } 3893 i = dsdir[0] = ds->nfsdev_nextdir; 3894 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 3895 dvp[0] = ds->nfsdev_dsdir[i]; 3896 tds = TAILQ_NEXT(ds, nfsdev_list); 3897 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { 3898 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 3899 if (tds->nfsdev_nmp != NULL && 3900 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 3901 (tds->nfsdev_mdsisset != 0 && fds != NULL && 3902 mp->mnt_stat.f_fsid.val[0] == 3903 tds->nfsdev_mdsfsid.val[0] && 3904 mp->mnt_stat.f_fsid.val[1] == 3905 tds->nfsdev_mdsfsid.val[1]))) { 3906 dsdir[mirrorcnt] = i; 3907 dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; 3908 mirrorcnt++; 3909 if (mirrorcnt >= nfsrv_maxpnfsmirror) 3910 break; 3911 } 3912 } 3913 } 3914 /* Put at end of list to implement round-robin usage. */ 3915 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 3916 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 3917 NFSDDSUNLOCK(); 3918 dsc = NULL; 3919 if (mirrorcnt > 1) 3920 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 3921 M_WAITOK | M_ZERO); 3922 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 3923 M_ZERO); 3924 3925 error = nfsvno_getfh(vp, &fh, p); 3926 if (error == 0) 3927 error = VOP_GETATTR(vp, &va, cred); 3928 if (error == 0) { 3929 /* Set the attributes for "vp" to Setattr the DS vp. */ 3930 vauid = va.va_uid; 3931 vagid = va.va_gid; 3932 vamode = va.va_mode; 3933 VATTR_NULL(&va); 3934 va.va_uid = vauid; 3935 va.va_gid = vagid; 3936 va.va_mode = vamode; 3937 va.va_size = 0; 3938 } else 3939 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 3940 3941 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 3942 cred->cr_gid); 3943 /* Make data file name based on FH. */ 3944 tcred = newnfs_getcred(); 3945 3946 /* 3947 * Create the file on each DS mirror, using kernel process(es) for the 3948 * additional mirrors. 3949 */ 3950 failpos = -1; 3951 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 3952 tpf->dsf_dir = dsdir[i]; 3953 tdsc->tcred = tcred; 3954 tdsc->p = p; 3955 tdsc->pf = tpf; 3956 tdsc->createva = *vap; 3957 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 3958 tdsc->va = va; 3959 tdsc->dvp = dvp[i]; 3960 tdsc->done = 0; 3961 tdsc->inprog = 0; 3962 tdsc->err = 0; 3963 ret = EIO; 3964 if (nfs_pnfsiothreads != 0) { 3965 ret = nfs_pnfsio(start_dscreate, tdsc); 3966 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 3967 } 3968 if (ret != 0) { 3969 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 3970 NULL, tcred, p, NULL); 3971 if (ret != 0) { 3972 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 3973 error)); 3974 if (failpos == -1 && nfsds_failerr(ret)) 3975 failpos = i; 3976 else 3977 error = ret; 3978 } 3979 } 3980 } 3981 if (error == 0) { 3982 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 3983 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 3984 &dsattr, NULL, tcred, p, NULL); 3985 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 3986 failpos = mirrorcnt - 1; 3987 error = 0; 3988 } 3989 } 3990 timo = hz / 50; /* Wait for 20msec. */ 3991 if (timo < 1) 3992 timo = 1; 3993 /* Wait for kernel task(s) to complete. */ 3994 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 3995 while (tdsc->inprog != 0 && tdsc->done == 0) 3996 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 3997 if (tdsc->err != 0) { 3998 if (failpos == -1 && nfsds_failerr(tdsc->err)) 3999 failpos = i; 4000 else if (error == 0) 4001 error = tdsc->err; 4002 } 4003 } 4004 4005 /* 4006 * If failpos has been set, that mirror has failed, so it needs 4007 * to be disabled. 4008 */ 4009 if (failpos >= 0) { 4010 nmp = VFSTONFS(dvp[failpos]->v_mount); 4011 NFSLOCKMNT(nmp); 4012 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4013 NFSMNTP_CANCELRPCS)) == 0) { 4014 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4015 NFSUNLOCKMNT(nmp); 4016 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4017 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 4018 ds); 4019 if (ds != NULL) 4020 nfsrv_killrpcs(nmp); 4021 NFSLOCKMNT(nmp); 4022 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4023 wakeup(nmp); 4024 } 4025 NFSUNLOCKMNT(nmp); 4026 } 4027 4028 NFSFREECRED(tcred); 4029 if (error == 0) { 4030 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 4031 4032 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 4033 mirrorcnt, nfsrv_maxpnfsmirror); 4034 /* 4035 * For all mirrors that couldn't be created, fill in the 4036 * *pf structure, but with an IP address == 0.0.0.0. 4037 */ 4038 tpf = pf + mirrorcnt; 4039 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4040 *tpf = *pf; 4041 tpf->dsf_sin.sin_family = AF_INET; 4042 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4043 tpf->dsf_sin.sin_addr.s_addr = 0; 4044 tpf->dsf_sin.sin_port = 0; 4045 } 4046 4047 error = vn_start_write(vp, &mp, V_WAIT); 4048 if (error == 0) { 4049 error = vn_extattr_set(vp, IO_NODELOCKED, 4050 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4051 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4052 if (error == 0) 4053 error = vn_extattr_set(vp, IO_NODELOCKED, 4054 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4055 sizeof(dsattr), (char *)&dsattr, p); 4056 vn_finished_write(mp); 4057 if (error != 0) 4058 printf("pNFS: pnfscreate setextattr=%d\n", 4059 error); 4060 } else 4061 printf("pNFS: pnfscreate startwrite=%d\n", error); 4062 } else 4063 printf("pNFS: pnfscreate=%d\n", error); 4064 free(pf, M_TEMP); 4065 free(dsc, M_TEMP); 4066 } 4067 4068 /* 4069 * Get the information needed to remove the pNFS Data Server file from the 4070 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4071 * DS directory vnode. The caller must unlock *ddvp when done with it. 4072 */ 4073 static void 4074 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4075 int *mirrorcntp, char *fname, fhandle_t *fhp) 4076 { 4077 struct vattr va; 4078 struct ucred *tcred; 4079 char *buf; 4080 int buflen, error; 4081 4082 dvpp[0] = NULL; 4083 /* If not an exported regular file or not a pNFS server, just return. */ 4084 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4085 nfsrv_devidcnt == 0) 4086 return; 4087 4088 /* Check to see if this is the last hard link. */ 4089 tcred = newnfs_getcred(); 4090 error = VOP_GETATTR(vp, &va, tcred); 4091 NFSFREECRED(tcred); 4092 if (error != 0) { 4093 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4094 return; 4095 } 4096 if (va.va_nlink > 1) 4097 return; 4098 4099 error = nfsvno_getfh(vp, fhp, p); 4100 if (error != 0) { 4101 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4102 return; 4103 } 4104 4105 buflen = 1024; 4106 buf = malloc(buflen, M_TEMP, M_WAITOK); 4107 /* Get the directory vnode for the DS mount and the file handle. */ 4108 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4109 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4110 free(buf, M_TEMP); 4111 if (error != 0) 4112 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4113 } 4114 4115 /* 4116 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4117 * The arguments are in a structure, so that they can be passed through 4118 * taskqueue for a kernel process to execute this function. 4119 */ 4120 struct nfsrvdsremove { 4121 int done; 4122 int inprog; 4123 struct task tsk; 4124 struct ucred *tcred; 4125 struct vnode *dvp; 4126 NFSPROC_T *p; 4127 int err; 4128 char fname[PNFS_FILENAME_LEN + 1]; 4129 }; 4130 4131 static int 4132 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4133 NFSPROC_T *p) 4134 { 4135 struct nameidata named; 4136 struct vnode *nvp; 4137 char *bufp; 4138 u_long *hashp; 4139 int error; 4140 4141 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4142 if (error != 0) 4143 return (error); 4144 named.ni_cnd.cn_nameiop = DELETE; 4145 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4146 named.ni_cnd.cn_cred = tcred; 4147 named.ni_cnd.cn_thread = p; 4148 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 4149 nfsvno_setpathbuf(&named, &bufp, &hashp); 4150 named.ni_cnd.cn_nameptr = bufp; 4151 named.ni_cnd.cn_namelen = strlen(fname); 4152 strlcpy(bufp, fname, NAME_MAX); 4153 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4154 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4155 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4156 if (error == 0) { 4157 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4158 vput(nvp); 4159 } 4160 NFSVOPUNLOCK(dvp, 0); 4161 nfsvno_relpathbuf(&named); 4162 if (error != 0) 4163 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4164 return (error); 4165 } 4166 4167 /* 4168 * Start up the thread that will execute nfsrv_dsremove(). 4169 */ 4170 static void 4171 start_dsremove(void *arg, int pending) 4172 { 4173 struct nfsrvdsremove *dsrm; 4174 4175 dsrm = (struct nfsrvdsremove *)arg; 4176 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4177 dsrm->p); 4178 dsrm->done = 1; 4179 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4180 } 4181 4182 /* 4183 * Remove a pNFS data file from a Data Server. 4184 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4185 * removed to set up the dvp and fill in the FH. 4186 */ 4187 static void 4188 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4189 NFSPROC_T *p) 4190 { 4191 struct ucred *tcred; 4192 struct nfsrvdsremove *dsrm, *tdsrm; 4193 struct nfsdevice *ds; 4194 struct nfsmount *nmp; 4195 int failpos, i, ret, timo; 4196 4197 tcred = newnfs_getcred(); 4198 dsrm = NULL; 4199 if (mirrorcnt > 1) 4200 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4201 /* 4202 * Remove the file on each DS mirror, using kernel process(es) for the 4203 * additional mirrors. 4204 */ 4205 failpos = -1; 4206 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4207 tdsrm->tcred = tcred; 4208 tdsrm->p = p; 4209 tdsrm->dvp = dvp[i]; 4210 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4211 tdsrm->inprog = 0; 4212 tdsrm->done = 0; 4213 tdsrm->err = 0; 4214 ret = EIO; 4215 if (nfs_pnfsiothreads != 0) { 4216 ret = nfs_pnfsio(start_dsremove, tdsrm); 4217 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4218 } 4219 if (ret != 0) { 4220 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4221 if (failpos == -1 && nfsds_failerr(ret)) 4222 failpos = i; 4223 } 4224 } 4225 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4226 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4227 failpos = mirrorcnt - 1; 4228 timo = hz / 50; /* Wait for 20msec. */ 4229 if (timo < 1) 4230 timo = 1; 4231 /* Wait for kernel task(s) to complete. */ 4232 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4233 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4234 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4235 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4236 failpos = i; 4237 } 4238 4239 /* 4240 * If failpos has been set, that mirror has failed, so it needs 4241 * to be disabled. 4242 */ 4243 if (failpos >= 0) { 4244 nmp = VFSTONFS(dvp[failpos]->v_mount); 4245 NFSLOCKMNT(nmp); 4246 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4247 NFSMNTP_CANCELRPCS)) == 0) { 4248 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4249 NFSUNLOCKMNT(nmp); 4250 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4251 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4252 ds); 4253 if (ds != NULL) 4254 nfsrv_killrpcs(nmp); 4255 NFSLOCKMNT(nmp); 4256 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4257 wakeup(nmp); 4258 } 4259 NFSUNLOCKMNT(nmp); 4260 } 4261 4262 /* Get rid all layouts for the file. */ 4263 nfsrv_freefilelayouts(fhp); 4264 4265 NFSFREECRED(tcred); 4266 free(dsrm, M_TEMP); 4267 } 4268 4269 /* 4270 * Generate a file name based on the file handle and put it in *bufp. 4271 * Return the number of bytes generated. 4272 */ 4273 static int 4274 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4275 { 4276 int i; 4277 uint8_t *cp; 4278 const uint8_t *hexdigits = "0123456789abcdef"; 4279 4280 cp = (uint8_t *)fhp; 4281 for (i = 0; i < sizeof(*fhp); i++) { 4282 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4283 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4284 } 4285 bufp[2 * i] = '\0'; 4286 return (2 * i); 4287 } 4288 4289 /* 4290 * Update the Metadata file's attributes from the DS file when a Read/Write 4291 * layout is returned. 4292 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4293 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4294 */ 4295 int 4296 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4297 { 4298 struct ucred *tcred; 4299 int error; 4300 4301 /* Do this as root so that it won't fail with EACCES. */ 4302 tcred = newnfs_getcred(); 4303 error = nfsrv_proxyds(NULL, vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4304 NULL, NULL, NULL, nap, NULL); 4305 NFSFREECRED(tcred); 4306 return (error); 4307 } 4308 4309 /* 4310 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4311 */ 4312 static int 4313 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4314 NFSPROC_T *p) 4315 { 4316 int error; 4317 4318 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETACL, 4319 NULL, NULL, NULL, NULL, aclp); 4320 return (error); 4321 } 4322 4323 static int 4324 nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt, 4325 struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, 4326 char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) 4327 { 4328 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4329 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4330 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4331 struct nfsdevice *ds; 4332 struct pnfsdsattr dsattr; 4333 char *buf; 4334 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4335 4336 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4337 /* 4338 * If not a regular file, not exported or not a pNFS server, 4339 * just return ENOENT. 4340 */ 4341 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4342 nfsrv_devidcnt == 0) 4343 return (ENOENT); 4344 4345 buflen = 1024; 4346 buf = malloc(buflen, M_TEMP, M_WAITOK); 4347 error = 0; 4348 4349 /* 4350 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4351 * from the MetaData file's extended attribute. 4352 */ 4353 if (ioproc == NFSPROC_GETATTR) { 4354 error = vn_extattr_get(vp, IO_NODELOCKED, 4355 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4356 p); 4357 if (error == 0 && buflen != sizeof(dsattr)) 4358 error = ENXIO; 4359 if (error == 0) { 4360 NFSBCOPY(buf, &dsattr, buflen); 4361 nap->na_filerev = dsattr.dsa_filerev; 4362 nap->na_size = dsattr.dsa_size; 4363 nap->na_atime = dsattr.dsa_atime; 4364 nap->na_mtime = dsattr.dsa_mtime; 4365 4366 /* 4367 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4368 * returns 0, just return now. nfsrv_checkdsattr() 4369 * returns 0 if there is no Read/Write layout 4370 * plus either an Open/Write_access or Write 4371 * delegation issued to a client for the file. 4372 */ 4373 if (nfsrv_pnfsgetdsattr == 0 || 4374 nfsrv_checkdsattr(nd, vp, p) == 0) { 4375 free(buf, M_TEMP); 4376 return (error); 4377 } 4378 } 4379 4380 /* 4381 * Clear ENOATTR so the code below will attempt to do a 4382 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4383 * the extended attribute. 4384 */ 4385 if (error == ENOATTR) 4386 error = 0; 4387 } 4388 4389 origmircnt = -1; 4390 trycnt = 0; 4391 tryagain: 4392 if (error == 0) { 4393 buflen = 1024; 4394 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4395 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4396 NULL, NULL); 4397 if (error == 0) { 4398 for (i = 0; i < mirrorcnt; i++) 4399 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4400 } else 4401 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4402 } else 4403 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4404 if (error == 0) { 4405 failpos = -1; 4406 if (origmircnt == -1) 4407 origmircnt = mirrorcnt; 4408 /* 4409 * If failpos is set to a mirror#, then that mirror has 4410 * failed and will be disabled. For Read and Getattr, the 4411 * function only tries one mirror, so if that mirror has 4412 * failed, it will need to be retried. As such, increment 4413 * tryitagain for these cases. 4414 * For Write, Setattr and Setacl, the function tries all 4415 * mirrors and will not return an error for the case where 4416 * one mirror has failed. For these cases, the functioning 4417 * mirror(s) will have been modified, so a retry isn't 4418 * necessary. These functions will set failpos for the 4419 * failed mirror#. 4420 */ 4421 if (ioproc == NFSPROC_READDS) { 4422 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4423 mpp, mpp2); 4424 if (nfsds_failerr(error) && mirrorcnt > 1) { 4425 /* 4426 * Setting failpos will cause the mirror 4427 * to be disabled and then a retry of this 4428 * read is required. 4429 */ 4430 failpos = 0; 4431 error = 0; 4432 trycnt++; 4433 } 4434 } else if (ioproc == NFSPROC_WRITEDS) 4435 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4436 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4437 else if (ioproc == NFSPROC_SETATTR) 4438 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4439 mirrorcnt, nap, &failpos); 4440 else if (ioproc == NFSPROC_SETACL) 4441 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4442 mirrorcnt, aclp, &failpos); 4443 else { 4444 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4445 vp, nmp[mirrorcnt - 1], nap); 4446 if (nfsds_failerr(error) && mirrorcnt > 1) { 4447 /* 4448 * Setting failpos will cause the mirror 4449 * to be disabled and then a retry of this 4450 * getattr is required. 4451 */ 4452 failpos = mirrorcnt - 1; 4453 error = 0; 4454 trycnt++; 4455 } 4456 } 4457 ds = NULL; 4458 if (failpos >= 0) { 4459 failnmp = nmp[failpos]; 4460 NFSLOCKMNT(failnmp); 4461 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4462 NFSMNTP_CANCELRPCS)) == 0) { 4463 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4464 NFSUNLOCKMNT(failnmp); 4465 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 4466 failnmp, p); 4467 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 4468 failpos, ds); 4469 if (ds != NULL) 4470 nfsrv_killrpcs(failnmp); 4471 NFSLOCKMNT(failnmp); 4472 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4473 wakeup(failnmp); 4474 } 4475 NFSUNLOCKMNT(failnmp); 4476 } 4477 for (i = 0; i < mirrorcnt; i++) 4478 NFSVOPUNLOCK(dvp[i], 0); 4479 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 4480 trycnt); 4481 /* Try the Read/Getattr again if a mirror was deleted. */ 4482 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 4483 goto tryagain; 4484 } else { 4485 /* Return ENOENT for any Extended Attribute error. */ 4486 error = ENOENT; 4487 } 4488 free(buf, M_TEMP); 4489 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 4490 return (error); 4491 } 4492 4493 /* 4494 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 4495 * attribute. 4496 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 4497 * to be checked. If it points to a NULL nmp, then it returns 4498 * a suitable destination. 4499 * curnmp - If non-NULL, it is the source mount for the copy. 4500 */ 4501 int 4502 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 4503 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 4504 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 4505 struct nfsmount *curnmp, int *ippos, int *dsdirp) 4506 { 4507 struct vnode *dvp, *nvp, **tdvpp; 4508 struct mount *mp; 4509 struct nfsmount *nmp, *newnmp; 4510 struct sockaddr *sad; 4511 struct sockaddr_in *sin; 4512 struct nfsdevice *ds, *tds, *fndds; 4513 struct pnfsdsfile *pf; 4514 uint32_t dsdir; 4515 int error, fhiszero, fnd, gotone, i, mirrorcnt; 4516 4517 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 4518 *mirrorcntp = 1; 4519 tdvpp = dvpp; 4520 if (nvpp != NULL) 4521 *nvpp = NULL; 4522 if (dvpp != NULL) 4523 *dvpp = NULL; 4524 if (ippos != NULL) 4525 *ippos = -1; 4526 if (newnmpp != NULL) 4527 newnmp = *newnmpp; 4528 else 4529 newnmp = NULL; 4530 mp = vp->v_mount; 4531 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 4532 "pnfsd.dsfile", buflenp, buf, p); 4533 mirrorcnt = *buflenp / sizeof(*pf); 4534 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 4535 *buflenp != sizeof(*pf) * mirrorcnt)) 4536 error = ENOATTR; 4537 4538 pf = (struct pnfsdsfile *)buf; 4539 /* If curnmp != NULL, check for a match in the mirror list. */ 4540 if (curnmp != NULL && error == 0) { 4541 fnd = 0; 4542 for (i = 0; i < mirrorcnt; i++, pf++) { 4543 sad = (struct sockaddr *)&pf->dsf_sin; 4544 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 4545 if (ippos != NULL) 4546 *ippos = i; 4547 fnd = 1; 4548 break; 4549 } 4550 } 4551 if (fnd == 0) 4552 error = ENXIO; 4553 } 4554 4555 gotone = 0; 4556 pf = (struct pnfsdsfile *)buf; 4557 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 4558 error); 4559 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 4560 fhiszero = 0; 4561 sad = (struct sockaddr *)&pf->dsf_sin; 4562 sin = &pf->dsf_sin; 4563 dsdir = pf->dsf_dir; 4564 if (dsdir >= nfsrv_dsdirsize) { 4565 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 4566 error = ENOATTR; 4567 } else if (nvpp != NULL && newnmp != NULL && 4568 nfsaddr2_match(sad, newnmp->nm_nam)) 4569 error = EEXIST; 4570 if (error == 0) { 4571 if (ippos != NULL && curnmp == NULL && 4572 sad->sa_family == AF_INET && 4573 sin->sin_addr.s_addr == 0) 4574 *ippos = i; 4575 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 4576 fhiszero = 1; 4577 /* Use the socket address to find the mount point. */ 4578 fndds = NULL; 4579 NFSDDSLOCK(); 4580 /* Find a match for the IP address. */ 4581 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 4582 if (ds->nfsdev_nmp != NULL) { 4583 dvp = ds->nfsdev_dvp; 4584 nmp = VFSTONFS(dvp->v_mount); 4585 if (nmp != ds->nfsdev_nmp) 4586 printf("different2 nmp %p %p\n", 4587 nmp, ds->nfsdev_nmp); 4588 if (nfsaddr2_match(sad, nmp->nm_nam)) { 4589 fndds = ds; 4590 break; 4591 } 4592 } 4593 } 4594 if (fndds != NULL && newnmpp != NULL && 4595 newnmp == NULL) { 4596 /* Search for a place to make a mirror copy. */ 4597 TAILQ_FOREACH(tds, &nfsrv_devidhead, 4598 nfsdev_list) { 4599 if (tds->nfsdev_nmp != NULL && 4600 fndds != tds && 4601 ((tds->nfsdev_mdsisset == 0 && 4602 fndds->nfsdev_mdsisset == 0) || 4603 (tds->nfsdev_mdsisset != 0 && 4604 fndds->nfsdev_mdsisset != 0 && 4605 tds->nfsdev_mdsfsid.val[0] == 4606 mp->mnt_stat.f_fsid.val[0] && 4607 tds->nfsdev_mdsfsid.val[1] == 4608 mp->mnt_stat.f_fsid.val[1]))) { 4609 *newnmpp = tds->nfsdev_nmp; 4610 break; 4611 } 4612 } 4613 if (tds != NULL) { 4614 /* 4615 * Move this entry to the end of the 4616 * list, so it won't be selected as 4617 * easily the next time. 4618 */ 4619 TAILQ_REMOVE(&nfsrv_devidhead, tds, 4620 nfsdev_list); 4621 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 4622 nfsdev_list); 4623 } 4624 } 4625 NFSDDSUNLOCK(); 4626 if (fndds != NULL) { 4627 dvp = fndds->nfsdev_dsdir[dsdir]; 4628 if (lktype != 0 || fhiszero != 0 || 4629 (nvpp != NULL && *nvpp == NULL)) { 4630 if (fhiszero != 0) 4631 error = vn_lock(dvp, 4632 LK_EXCLUSIVE); 4633 else if (lktype != 0) 4634 error = vn_lock(dvp, lktype); 4635 else 4636 error = vn_lock(dvp, LK_SHARED); 4637 /* 4638 * If the file handle is all 0's, try to 4639 * do a Lookup against the DS to acquire 4640 * it. 4641 * If dvpp == NULL or the Lookup fails, 4642 * unlock dvp after the call. 4643 */ 4644 if (error == 0 && (fhiszero != 0 || 4645 (nvpp != NULL && *nvpp == NULL))) { 4646 error = nfsrv_pnfslookupds(vp, 4647 dvp, pf, &nvp, p); 4648 if (error == 0) { 4649 if (fhiszero != 0) 4650 nfsrv_pnfssetfh( 4651 vp, pf, 4652 nvp, p); 4653 if (nvpp != NULL && 4654 *nvpp == NULL) { 4655 *nvpp = nvp; 4656 *dsdirp = dsdir; 4657 } else 4658 vput(nvp); 4659 } 4660 if (error != 0 || lktype == 0) 4661 NFSVOPUNLOCK(dvp, 0); 4662 } 4663 } 4664 if (error == 0) { 4665 gotone++; 4666 NFSD_DEBUG(4, "gotone=%d\n", gotone); 4667 if (devid != NULL) { 4668 NFSBCOPY(fndds->nfsdev_deviceid, 4669 devid, NFSX_V4DEVICEID); 4670 devid += NFSX_V4DEVICEID; 4671 } 4672 if (dvpp != NULL) 4673 *tdvpp++ = dvp; 4674 if (fhp != NULL) 4675 NFSBCOPY(&pf->dsf_fh, fhp++, 4676 NFSX_MYFH); 4677 if (fnamep != NULL && gotone == 1) 4678 strlcpy(fnamep, 4679 pf->dsf_filename, 4680 sizeof(pf->dsf_filename)); 4681 } else 4682 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 4683 "err=%d\n", error); 4684 } 4685 } 4686 } 4687 if (error == 0 && gotone == 0) 4688 error = ENOENT; 4689 4690 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 4691 error); 4692 if (error == 0) 4693 *mirrorcntp = gotone; 4694 else { 4695 if (gotone > 0 && dvpp != NULL) { 4696 /* 4697 * If the error didn't occur on the first one and 4698 * dvpp != NULL, the one(s) prior to the failure will 4699 * have locked dvp's that need to be unlocked. 4700 */ 4701 for (i = 0; i < gotone; i++) { 4702 NFSVOPUNLOCK(*dvpp, 0); 4703 *dvpp++ = NULL; 4704 } 4705 } 4706 /* 4707 * If it found the vnode to be copied from before a failure, 4708 * it needs to be vput()'d. 4709 */ 4710 if (nvpp != NULL && *nvpp != NULL) { 4711 vput(*nvpp); 4712 *nvpp = NULL; 4713 } 4714 } 4715 return (error); 4716 } 4717 4718 /* 4719 * Set the extended attribute for the Change attribute. 4720 */ 4721 static int 4722 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4723 { 4724 struct pnfsdsattr dsattr; 4725 struct mount *mp; 4726 int error; 4727 4728 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 4729 error = vn_start_write(vp, &mp, V_WAIT); 4730 if (error == 0) { 4731 dsattr.dsa_filerev = nap->na_filerev; 4732 dsattr.dsa_size = nap->na_size; 4733 dsattr.dsa_atime = nap->na_atime; 4734 dsattr.dsa_mtime = nap->na_mtime; 4735 error = vn_extattr_set(vp, IO_NODELOCKED, 4736 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4737 sizeof(dsattr), (char *)&dsattr, p); 4738 vn_finished_write(mp); 4739 } 4740 if (error != 0) 4741 printf("pNFS: setextattr=%d\n", error); 4742 return (error); 4743 } 4744 4745 static int 4746 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 4747 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 4748 { 4749 uint32_t *tl; 4750 struct nfsrv_descript *nd; 4751 nfsv4stateid_t st; 4752 struct mbuf *m, *m2; 4753 int error = 0, retlen, tlen, trimlen; 4754 4755 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 4756 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4757 *mpp = NULL; 4758 /* 4759 * Use a stateid where other is an alternating 01010 pattern and 4760 * seqid is 0xffffffff. This value is not defined as special by 4761 * the RFC and is used by the FreeBSD NFS server to indicate an 4762 * MDS->DS proxy operation. 4763 */ 4764 st.other[0] = 0x55555555; 4765 st.other[1] = 0x55555555; 4766 st.other[2] = 0x55555555; 4767 st.seqid = 0xffffffff; 4768 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 4769 NULL, NULL, 0, 0); 4770 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 4771 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 4772 txdr_hyper(off, tl); 4773 *(tl + 2) = txdr_unsigned(len); 4774 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 4775 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 4776 if (error != 0) { 4777 free(nd, M_TEMP); 4778 return (error); 4779 } 4780 if (nd->nd_repstat == 0) { 4781 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 4782 NFSM_STRSIZ(retlen, len); 4783 if (retlen > 0) { 4784 /* Trim off the pre-data XDR from the mbuf chain. */ 4785 m = nd->nd_mrep; 4786 while (m != NULL && m != nd->nd_md) { 4787 if (m->m_next == nd->nd_md) { 4788 m->m_next = NULL; 4789 m_freem(nd->nd_mrep); 4790 nd->nd_mrep = m = nd->nd_md; 4791 } else 4792 m = m->m_next; 4793 } 4794 if (m == NULL) { 4795 printf("nfsrv_readdsrpc: busted mbuf list\n"); 4796 error = ENOENT; 4797 goto nfsmout; 4798 } 4799 4800 /* 4801 * Now, adjust first mbuf so that any XDR before the 4802 * read data is skipped over. 4803 */ 4804 trimlen = nd->nd_dpos - mtod(m, char *); 4805 if (trimlen > 0) { 4806 m->m_len -= trimlen; 4807 NFSM_DATAP(m, trimlen); 4808 } 4809 4810 /* 4811 * Truncate the mbuf chain at retlen bytes of data, 4812 * plus XDR padding that brings the length up to a 4813 * multiple of 4. 4814 */ 4815 tlen = NFSM_RNDUP(retlen); 4816 do { 4817 if (m->m_len >= tlen) { 4818 m->m_len = tlen; 4819 tlen = 0; 4820 m2 = m->m_next; 4821 m->m_next = NULL; 4822 m_freem(m2); 4823 break; 4824 } 4825 tlen -= m->m_len; 4826 m = m->m_next; 4827 } while (m != NULL); 4828 if (tlen > 0) { 4829 printf("nfsrv_readdsrpc: busted mbuf list\n"); 4830 error = ENOENT; 4831 goto nfsmout; 4832 } 4833 *mpp = nd->nd_mrep; 4834 *mpendp = m; 4835 nd->nd_mrep = NULL; 4836 } 4837 } else 4838 error = nd->nd_repstat; 4839 nfsmout: 4840 /* If nd->nd_mrep is already NULL, this is a no-op. */ 4841 m_freem(nd->nd_mrep); 4842 free(nd, M_TEMP); 4843 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 4844 return (error); 4845 } 4846 4847 /* 4848 * Do a write RPC on a DS data file, using this structure for the arguments, 4849 * so that this function can be executed by a separate kernel process. 4850 */ 4851 struct nfsrvwritedsdorpc { 4852 int done; 4853 int inprog; 4854 struct task tsk; 4855 fhandle_t fh; 4856 off_t off; 4857 int len; 4858 struct nfsmount *nmp; 4859 struct ucred *cred; 4860 NFSPROC_T *p; 4861 struct mbuf *m; 4862 int err; 4863 }; 4864 4865 static int 4866 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 4867 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 4868 { 4869 uint32_t *tl; 4870 struct nfsrv_descript *nd; 4871 nfsattrbit_t attrbits; 4872 nfsv4stateid_t st; 4873 int commit, error, retlen; 4874 4875 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4876 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 4877 sizeof(fhandle_t), NULL, NULL, 0, 0); 4878 4879 /* 4880 * Use a stateid where other is an alternating 01010 pattern and 4881 * seqid is 0xffffffff. This value is not defined as special by 4882 * the RFC and is used by the FreeBSD NFS server to indicate an 4883 * MDS->DS proxy operation. 4884 */ 4885 st.other[0] = 0x55555555; 4886 st.other[1] = 0x55555555; 4887 st.other[2] = 0x55555555; 4888 st.seqid = 0xffffffff; 4889 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 4890 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 4891 txdr_hyper(off, tl); 4892 tl += 2; 4893 /* 4894 * Do all writes FileSync, since the server doesn't hold onto dirty 4895 * buffers. Since clients should be accessing the DS servers directly 4896 * using the pNFS layouts, this just needs to work correctly as a 4897 * fallback. 4898 */ 4899 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 4900 *tl = txdr_unsigned(len); 4901 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 4902 4903 /* Put data in mbuf chain. */ 4904 nd->nd_mb->m_next = m; 4905 4906 /* Set nd_mb and nd_bpos to end of data. */ 4907 while (m->m_next != NULL) 4908 m = m->m_next; 4909 nd->nd_mb = m; 4910 nd->nd_bpos = mtod(m, char *) + m->m_len; 4911 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 4912 4913 /* Do a Getattr for Size, Change and Modify Time. */ 4914 NFSZERO_ATTRBIT(&attrbits); 4915 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 4916 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 4917 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 4918 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 4919 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 4920 *tl = txdr_unsigned(NFSV4OP_GETATTR); 4921 (void) nfsrv_putattrbit(nd, &attrbits); 4922 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 4923 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 4924 if (error != 0) { 4925 free(nd, M_TEMP); 4926 return (error); 4927 } 4928 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 4929 /* Get rid of weak cache consistency data for now. */ 4930 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 4931 (ND_NFSV4 | ND_V4WCCATTR)) { 4932 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 4933 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 4934 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 4935 if (error != 0) 4936 goto nfsmout; 4937 /* 4938 * Get rid of Op# and status for next op. 4939 */ 4940 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4941 if (*++tl != 0) 4942 nd->nd_flag |= ND_NOMOREDATA; 4943 } 4944 if (nd->nd_repstat == 0) { 4945 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 4946 retlen = fxdr_unsigned(int, *tl++); 4947 commit = fxdr_unsigned(int, *tl); 4948 if (commit != NFSWRITE_FILESYNC) 4949 error = NFSERR_IO; 4950 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 4951 retlen, commit, error); 4952 } else 4953 error = nd->nd_repstat; 4954 /* We have no use for the Write Verifier since we use FileSync. */ 4955 4956 /* 4957 * Get the Change, Size, Access Time and Modify Time attributes and set 4958 * on the Metadata file, so its attributes will be what the file's 4959 * would be if it had been written. 4960 */ 4961 if (error == 0) { 4962 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4963 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 4964 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 4965 } 4966 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 4967 nfsmout: 4968 m_freem(nd->nd_mrep); 4969 free(nd, M_TEMP); 4970 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 4971 return (error); 4972 } 4973 4974 /* 4975 * Start up the thread that will execute nfsrv_writedsdorpc(). 4976 */ 4977 static void 4978 start_writedsdorpc(void *arg, int pending) 4979 { 4980 struct nfsrvwritedsdorpc *drpc; 4981 4982 drpc = (struct nfsrvwritedsdorpc *)arg; 4983 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 4984 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 4985 drpc->done = 1; 4986 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 4987 } 4988 4989 static int 4990 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 4991 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 4992 struct mbuf **mpp, char *cp, int *failposp) 4993 { 4994 struct nfsrvwritedsdorpc *drpc, *tdrpc; 4995 struct nfsvattr na; 4996 struct mbuf *m; 4997 int error, i, offs, ret, timo; 4998 4999 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 5000 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 5001 drpc = NULL; 5002 if (mirrorcnt > 1) 5003 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5004 M_WAITOK); 5005 5006 /* Calculate offset in mbuf chain that data starts. */ 5007 offs = cp - mtod(*mpp, char *); 5008 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 5009 5010 /* 5011 * Do the write RPC for every DS, using a separate kernel process 5012 * for every DS except the last one. 5013 */ 5014 error = 0; 5015 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5016 tdrpc->done = 0; 5017 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5018 tdrpc->off = off; 5019 tdrpc->len = len; 5020 tdrpc->nmp = *nmpp; 5021 tdrpc->cred = cred; 5022 tdrpc->p = p; 5023 tdrpc->inprog = 0; 5024 tdrpc->err = 0; 5025 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5026 ret = EIO; 5027 if (nfs_pnfsiothreads != 0) { 5028 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 5029 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 5030 ret); 5031 } 5032 if (ret != 0) { 5033 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 5034 tdrpc->m, cred, p); 5035 if (nfsds_failerr(ret) && *failposp == -1) 5036 *failposp = i; 5037 else if (error == 0 && ret != 0) 5038 error = ret; 5039 } 5040 nmpp++; 5041 fhp++; 5042 } 5043 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5044 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 5045 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5046 *failposp = mirrorcnt - 1; 5047 else if (error == 0 && ret != 0) 5048 error = ret; 5049 if (error == 0) 5050 error = nfsrv_setextattr(vp, &na, p); 5051 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 5052 tdrpc = drpc; 5053 timo = hz / 50; /* Wait for 20msec. */ 5054 if (timo < 1) 5055 timo = 1; 5056 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5057 /* Wait for RPCs on separate threads to complete. */ 5058 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5059 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 5060 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5061 *failposp = i; 5062 else if (error == 0 && tdrpc->err != 0) 5063 error = tdrpc->err; 5064 } 5065 free(drpc, M_TEMP); 5066 return (error); 5067 } 5068 5069 static int 5070 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5071 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 5072 struct nfsvattr *dsnap) 5073 { 5074 uint32_t *tl; 5075 struct nfsrv_descript *nd; 5076 nfsv4stateid_t st; 5077 nfsattrbit_t attrbits; 5078 int error; 5079 5080 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 5081 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5082 /* 5083 * Use a stateid where other is an alternating 01010 pattern and 5084 * seqid is 0xffffffff. This value is not defined as special by 5085 * the RFC and is used by the FreeBSD NFS server to indicate an 5086 * MDS->DS proxy operation. 5087 */ 5088 st.other[0] = 0x55555555; 5089 st.other[1] = 0x55555555; 5090 st.other[2] = 0x55555555; 5091 st.seqid = 0xffffffff; 5092 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5093 NULL, NULL, 0, 0); 5094 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5095 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5096 5097 /* Do a Getattr for Size, Change, Access Time and Modify Time. */ 5098 NFSZERO_ATTRBIT(&attrbits); 5099 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5100 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5101 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5102 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5103 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5104 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5105 (void) nfsrv_putattrbit(nd, &attrbits); 5106 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5107 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5108 if (error != 0) { 5109 free(nd, M_TEMP); 5110 return (error); 5111 } 5112 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5113 nd->nd_repstat); 5114 /* Get rid of weak cache consistency data for now. */ 5115 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5116 (ND_NFSV4 | ND_V4WCCATTR)) { 5117 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5118 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5119 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5120 if (error != 0) 5121 goto nfsmout; 5122 /* 5123 * Get rid of Op# and status for next op. 5124 */ 5125 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5126 if (*++tl != 0) 5127 nd->nd_flag |= ND_NOMOREDATA; 5128 } 5129 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 5130 if (error != 0) 5131 goto nfsmout; 5132 if (nd->nd_repstat != 0) 5133 error = nd->nd_repstat; 5134 /* 5135 * Get the Change, Size, Access Time and Modify Time attributes and set 5136 * on the Metadata file, so its attributes will be what the file's 5137 * would be if it had been written. 5138 */ 5139 if (error == 0) { 5140 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5141 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5142 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5143 } 5144 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 5145 nfsmout: 5146 m_freem(nd->nd_mrep); 5147 free(nd, M_TEMP); 5148 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 5149 return (error); 5150 } 5151 5152 struct nfsrvsetattrdsdorpc { 5153 int done; 5154 int inprog; 5155 struct task tsk; 5156 fhandle_t fh; 5157 struct nfsmount *nmp; 5158 struct vnode *vp; 5159 struct ucred *cred; 5160 NFSPROC_T *p; 5161 struct nfsvattr na; 5162 struct nfsvattr dsna; 5163 int err; 5164 }; 5165 5166 /* 5167 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 5168 */ 5169 static void 5170 start_setattrdsdorpc(void *arg, int pending) 5171 { 5172 struct nfsrvsetattrdsdorpc *drpc; 5173 5174 drpc = (struct nfsrvsetattrdsdorpc *)arg; 5175 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 5176 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 5177 drpc->done = 1; 5178 } 5179 5180 static int 5181 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5182 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5183 struct nfsvattr *nap, int *failposp) 5184 { 5185 struct nfsrvsetattrdsdorpc *drpc, *tdrpc; 5186 struct nfsvattr na; 5187 int error, i, ret, timo; 5188 5189 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 5190 drpc = NULL; 5191 if (mirrorcnt > 1) 5192 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5193 M_WAITOK); 5194 5195 /* 5196 * Do the setattr RPC for every DS, using a separate kernel process 5197 * for every DS except the last one. 5198 */ 5199 error = 0; 5200 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5201 tdrpc->done = 0; 5202 tdrpc->inprog = 0; 5203 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5204 tdrpc->nmp = *nmpp; 5205 tdrpc->vp = vp; 5206 tdrpc->cred = cred; 5207 tdrpc->p = p; 5208 tdrpc->na = *nap; 5209 tdrpc->err = 0; 5210 ret = EIO; 5211 if (nfs_pnfsiothreads != 0) { 5212 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 5213 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 5214 ret); 5215 } 5216 if (ret != 0) { 5217 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 5218 &na); 5219 if (nfsds_failerr(ret) && *failposp == -1) 5220 *failposp = i; 5221 else if (error == 0 && ret != 0) 5222 error = ret; 5223 } 5224 nmpp++; 5225 fhp++; 5226 } 5227 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 5228 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5229 *failposp = mirrorcnt - 1; 5230 else if (error == 0 && ret != 0) 5231 error = ret; 5232 if (error == 0) 5233 error = nfsrv_setextattr(vp, &na, p); 5234 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 5235 tdrpc = drpc; 5236 timo = hz / 50; /* Wait for 20msec. */ 5237 if (timo < 1) 5238 timo = 1; 5239 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5240 /* Wait for RPCs on separate threads to complete. */ 5241 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5242 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 5243 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5244 *failposp = i; 5245 else if (error == 0 && tdrpc->err != 0) 5246 error = tdrpc->err; 5247 } 5248 free(drpc, M_TEMP); 5249 return (error); 5250 } 5251 5252 /* 5253 * Do a Setattr of an NFSv4 ACL on the DS file. 5254 */ 5255 static int 5256 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5257 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 5258 { 5259 struct nfsrv_descript *nd; 5260 nfsv4stateid_t st; 5261 nfsattrbit_t attrbits; 5262 int error; 5263 5264 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 5265 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5266 /* 5267 * Use a stateid where other is an alternating 01010 pattern and 5268 * seqid is 0xffffffff. This value is not defined as special by 5269 * the RFC and is used by the FreeBSD NFS server to indicate an 5270 * MDS->DS proxy operation. 5271 */ 5272 st.other[0] = 0x55555555; 5273 st.other[1] = 0x55555555; 5274 st.other[2] = 0x55555555; 5275 st.seqid = 0xffffffff; 5276 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5277 NULL, NULL, 0, 0); 5278 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5279 NFSZERO_ATTRBIT(&attrbits); 5280 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 5281 /* 5282 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 5283 * so passing in the metadata "vp" will be ok, since it is of 5284 * the same type (VREG). 5285 */ 5286 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 5287 NULL, 0, 0, 0, 0, 0, NULL); 5288 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5289 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5290 if (error != 0) { 5291 free(nd, M_TEMP); 5292 return (error); 5293 } 5294 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 5295 nd->nd_repstat); 5296 error = nd->nd_repstat; 5297 m_freem(nd->nd_mrep); 5298 free(nd, M_TEMP); 5299 return (error); 5300 } 5301 5302 struct nfsrvsetacldsdorpc { 5303 int done; 5304 int inprog; 5305 struct task tsk; 5306 fhandle_t fh; 5307 struct nfsmount *nmp; 5308 struct vnode *vp; 5309 struct ucred *cred; 5310 NFSPROC_T *p; 5311 struct acl *aclp; 5312 int err; 5313 }; 5314 5315 /* 5316 * Start up the thread that will execute nfsrv_setacldsdorpc(). 5317 */ 5318 static void 5319 start_setacldsdorpc(void *arg, int pending) 5320 { 5321 struct nfsrvsetacldsdorpc *drpc; 5322 5323 drpc = (struct nfsrvsetacldsdorpc *)arg; 5324 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 5325 drpc->vp, drpc->nmp, drpc->aclp); 5326 drpc->done = 1; 5327 } 5328 5329 static int 5330 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5331 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 5332 int *failposp) 5333 { 5334 struct nfsrvsetacldsdorpc *drpc, *tdrpc; 5335 int error, i, ret, timo; 5336 5337 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 5338 drpc = NULL; 5339 if (mirrorcnt > 1) 5340 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5341 M_WAITOK); 5342 5343 /* 5344 * Do the setattr RPC for every DS, using a separate kernel process 5345 * for every DS except the last one. 5346 */ 5347 error = 0; 5348 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5349 tdrpc->done = 0; 5350 tdrpc->inprog = 0; 5351 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5352 tdrpc->nmp = *nmpp; 5353 tdrpc->vp = vp; 5354 tdrpc->cred = cred; 5355 tdrpc->p = p; 5356 tdrpc->aclp = aclp; 5357 tdrpc->err = 0; 5358 ret = EIO; 5359 if (nfs_pnfsiothreads != 0) { 5360 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 5361 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 5362 ret); 5363 } 5364 if (ret != 0) { 5365 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 5366 aclp); 5367 if (nfsds_failerr(ret) && *failposp == -1) 5368 *failposp = i; 5369 else if (error == 0 && ret != 0) 5370 error = ret; 5371 } 5372 nmpp++; 5373 fhp++; 5374 } 5375 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 5376 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5377 *failposp = mirrorcnt - 1; 5378 else if (error == 0 && ret != 0) 5379 error = ret; 5380 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 5381 tdrpc = drpc; 5382 timo = hz / 50; /* Wait for 20msec. */ 5383 if (timo < 1) 5384 timo = 1; 5385 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5386 /* Wait for RPCs on separate threads to complete. */ 5387 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5388 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 5389 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5390 *failposp = i; 5391 else if (error == 0 && tdrpc->err != 0) 5392 error = tdrpc->err; 5393 } 5394 free(drpc, M_TEMP); 5395 return (error); 5396 } 5397 5398 /* 5399 * Getattr call to the DS for the Modify, Size and Change attributes. 5400 */ 5401 static int 5402 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5403 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 5404 { 5405 struct nfsrv_descript *nd; 5406 int error; 5407 nfsattrbit_t attrbits; 5408 5409 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 5410 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5411 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 5412 sizeof(fhandle_t), NULL, NULL, 0, 0); 5413 NFSZERO_ATTRBIT(&attrbits); 5414 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5415 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5416 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5417 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5418 (void) nfsrv_putattrbit(nd, &attrbits); 5419 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5420 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5421 if (error != 0) { 5422 free(nd, M_TEMP); 5423 return (error); 5424 } 5425 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 5426 nd->nd_repstat); 5427 if (nd->nd_repstat == 0) { 5428 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 5429 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 5430 NULL, NULL); 5431 /* 5432 * We can only save the updated values in the extended 5433 * attribute if the vp is exclusively locked. 5434 * This should happen when any of the following operations 5435 * occur on the vnode: 5436 * Close, Delegreturn, LayoutCommit, LayoutReturn 5437 * As such, the updated extended attribute should get saved 5438 * before nfsrv_checkdsattr() returns 0 and allows the cached 5439 * attributes to be returned without calling this function. 5440 */ 5441 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 5442 error = nfsrv_setextattr(vp, nap, p); 5443 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 5444 error); 5445 } 5446 } else 5447 error = nd->nd_repstat; 5448 m_freem(nd->nd_mrep); 5449 free(nd, M_TEMP); 5450 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 5451 return (error); 5452 } 5453 5454 /* 5455 * Get the device id and file handle for a DS file. 5456 */ 5457 int 5458 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 5459 fhandle_t *fhp, char *devid) 5460 { 5461 int buflen, error; 5462 char *buf; 5463 5464 buflen = 1024; 5465 buf = malloc(buflen, M_TEMP, M_WAITOK); 5466 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 5467 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 5468 free(buf, M_TEMP); 5469 return (error); 5470 } 5471 5472 /* 5473 * Do a Lookup against the DS for the filename. 5474 */ 5475 static int 5476 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 5477 struct vnode **nvpp, NFSPROC_T *p) 5478 { 5479 struct nameidata named; 5480 struct ucred *tcred; 5481 char *bufp; 5482 u_long *hashp; 5483 struct vnode *nvp; 5484 int error; 5485 5486 tcred = newnfs_getcred(); 5487 named.ni_cnd.cn_nameiop = LOOKUP; 5488 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 5489 named.ni_cnd.cn_cred = tcred; 5490 named.ni_cnd.cn_thread = p; 5491 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 5492 nfsvno_setpathbuf(&named, &bufp, &hashp); 5493 named.ni_cnd.cn_nameptr = bufp; 5494 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 5495 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 5496 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 5497 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 5498 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 5499 NFSFREECRED(tcred); 5500 nfsvno_relpathbuf(&named); 5501 if (error == 0) 5502 *nvpp = nvp; 5503 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 5504 return (error); 5505 } 5506 5507 /* 5508 * Set the file handle to the correct one. 5509 */ 5510 static void 5511 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *nvp, 5512 NFSPROC_T *p) 5513 { 5514 struct mount *mp; 5515 struct nfsnode *np; 5516 int ret; 5517 5518 np = VTONFS(nvp); 5519 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 5520 /* 5521 * We can only do a setextattr for an exclusively 5522 * locked vp. Instead of trying to upgrade a shared 5523 * lock, just leave dsf_fh zeroed out and it will 5524 * keep doing this lookup until it is done with an 5525 * exclusively locked vp. 5526 */ 5527 if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) { 5528 ret = vn_start_write(vp, &mp, V_WAIT); 5529 NFSD_DEBUG(4, "nfsrv_pnfssetfh: vn_start_write=%d\n", 5530 ret); 5531 if (ret == 0) { 5532 ret = vn_extattr_set(vp, IO_NODELOCKED, 5533 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 5534 sizeof(*pf), (char *)pf, p); 5535 vn_finished_write(mp); 5536 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft " 5537 "vn_extattr_set=%d\n", ret); 5538 } 5539 } 5540 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 5541 } 5542 5543 /* 5544 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 5545 * when the DS has failed. 5546 */ 5547 void 5548 nfsrv_killrpcs(struct nfsmount *nmp) 5549 { 5550 5551 /* 5552 * Call newnfs_nmcancelreqs() to cause 5553 * any RPCs in progress on the mount point to 5554 * fail. 5555 * This will cause any process waiting for an 5556 * RPC to complete while holding a vnode lock 5557 * on the mounted-on vnode (such as "df" or 5558 * a non-forced "umount") to fail. 5559 * This will unlock the mounted-on vnode so 5560 * a forced dismount can succeed. 5561 * The NFSMNTP_CANCELRPCS flag should be set when this function is 5562 * called. 5563 */ 5564 newnfs_nmcancelreqs(nmp); 5565 } 5566 5567 /* 5568 * Sum up the statfs info for each of the DSs, so that the client will 5569 * receive the total for all DSs. 5570 */ 5571 static int 5572 nfsrv_pnfsstatfs(struct statfs *sf) 5573 { 5574 struct statfs *tsf; 5575 struct nfsdevice *ds; 5576 struct vnode **dvpp, **tdvpp, *dvp; 5577 uint64_t tot; 5578 int cnt, error = 0, i; 5579 5580 if (nfsrv_devidcnt <= 0) 5581 return (ENXIO); 5582 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 5583 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 5584 5585 /* Get an array of the dvps for the DSs. */ 5586 tdvpp = dvpp; 5587 i = 0; 5588 NFSDDSLOCK(); 5589 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 5590 if (ds->nfsdev_nmp != NULL) { 5591 if (++i > nfsrv_devidcnt) 5592 break; 5593 *tdvpp++ = ds->nfsdev_dvp; 5594 } 5595 } 5596 NFSDDSUNLOCK(); 5597 cnt = i; 5598 5599 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 5600 tdvpp = dvpp; 5601 for (i = 0; i < cnt && error == 0; i++) { 5602 dvp = *tdvpp++; 5603 error = VFS_STATFS(dvp->v_mount, tsf); 5604 if (error == 0) { 5605 if (sf->f_bsize == 0) { 5606 if (tsf->f_bsize > 0) 5607 sf->f_bsize = tsf->f_bsize; 5608 else 5609 sf->f_bsize = 8192; 5610 } 5611 if (tsf->f_blocks > 0) { 5612 if (sf->f_bsize != tsf->f_bsize) { 5613 tot = tsf->f_blocks * tsf->f_bsize; 5614 sf->f_blocks += (tot / sf->f_bsize); 5615 } else 5616 sf->f_blocks += tsf->f_blocks; 5617 } 5618 if (tsf->f_bfree > 0) { 5619 if (sf->f_bsize != tsf->f_bsize) { 5620 tot = tsf->f_bfree * tsf->f_bsize; 5621 sf->f_bfree += (tot / sf->f_bsize); 5622 } else 5623 sf->f_bfree += tsf->f_bfree; 5624 } 5625 if (tsf->f_bavail > 0) { 5626 if (sf->f_bsize != tsf->f_bsize) { 5627 tot = tsf->f_bavail * tsf->f_bsize; 5628 sf->f_bavail += (tot / sf->f_bsize); 5629 } else 5630 sf->f_bavail += tsf->f_bavail; 5631 } 5632 } 5633 } 5634 free(tsf, M_TEMP); 5635 free(dvpp, M_TEMP); 5636 return (error); 5637 } 5638 5639 /* 5640 * Set an NFSv4 acl. 5641 */ 5642 int 5643 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 5644 { 5645 int error; 5646 5647 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 5648 error = NFSERR_ATTRNOTSUPP; 5649 goto out; 5650 } 5651 /* 5652 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 5653 * Make sure it has enough room for that - splitting every entry 5654 * into two and appending "canonical six" entries at the end. 5655 * Cribbed out of kern/vfs_acl.c - Rick M. 5656 */ 5657 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 5658 error = NFSERR_ATTRNOTSUPP; 5659 goto out; 5660 } 5661 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 5662 if (error == 0) { 5663 error = nfsrv_dssetacl(vp, aclp, cred, p); 5664 if (error == ENOENT) 5665 error = 0; 5666 } 5667 5668 out: 5669 NFSEXITCODE(error); 5670 return (error); 5671 } 5672 5673 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 5674 5675 /* 5676 * Called once to initialize data structures... 5677 */ 5678 static int 5679 nfsd_modevent(module_t mod, int type, void *data) 5680 { 5681 int error = 0, i; 5682 static int loaded = 0; 5683 5684 switch (type) { 5685 case MOD_LOAD: 5686 if (loaded) 5687 goto out; 5688 newnfs_portinit(); 5689 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 5690 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, 5691 MTX_DEF); 5692 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, 5693 MTX_DEF); 5694 } 5695 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 5696 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 5697 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 5698 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 5699 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 5700 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 5701 nfsrvd_initcache(); 5702 nfsd_init(); 5703 NFSD_LOCK(); 5704 nfsrvd_init(0); 5705 NFSD_UNLOCK(); 5706 nfsd_mntinit(); 5707 #ifdef VV_DISABLEDELEG 5708 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 5709 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 5710 #endif 5711 nfsd_call_servertimer = nfsrv_servertimer; 5712 nfsd_call_nfsd = nfssvc_nfsd; 5713 loaded = 1; 5714 break; 5715 5716 case MOD_UNLOAD: 5717 if (newnfs_numnfsd != 0) { 5718 error = EBUSY; 5719 break; 5720 } 5721 5722 #ifdef VV_DISABLEDELEG 5723 vn_deleg_ops.vndeleg_recall = NULL; 5724 vn_deleg_ops.vndeleg_disable = NULL; 5725 #endif 5726 nfsd_call_servertimer = NULL; 5727 nfsd_call_nfsd = NULL; 5728 5729 /* Clean out all NFSv4 state. */ 5730 nfsrv_throwawayallstate(curthread); 5731 5732 /* Clean the NFS server reply cache */ 5733 nfsrvd_cleancache(); 5734 5735 /* Free up the krpc server pool. */ 5736 if (nfsrvd_pool != NULL) 5737 svcpool_destroy(nfsrvd_pool); 5738 5739 /* and get rid of the locks */ 5740 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 5741 mtx_destroy(&nfsrchash_table[i].mtx); 5742 mtx_destroy(&nfsrcahash_table[i].mtx); 5743 } 5744 mtx_destroy(&nfsrc_udpmtx); 5745 mtx_destroy(&nfs_v4root_mutex); 5746 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 5747 mtx_destroy(&nfsrv_dontlistlock_mtx); 5748 mtx_destroy(&nfsrv_recalllock_mtx); 5749 for (i = 0; i < nfsrv_sessionhashsize; i++) 5750 mtx_destroy(&nfssessionhash[i].mtx); 5751 if (nfslayouthash != NULL) { 5752 for (i = 0; i < nfsrv_layouthashsize; i++) 5753 mtx_destroy(&nfslayouthash[i].mtx); 5754 free(nfslayouthash, M_NFSDSESSION); 5755 } 5756 lockdestroy(&nfsv4root_mnt.mnt_explock); 5757 free(nfsclienthash, M_NFSDCLIENT); 5758 free(nfslockhash, M_NFSDLOCKFILE); 5759 free(nfssessionhash, M_NFSDSESSION); 5760 loaded = 0; 5761 break; 5762 default: 5763 error = EOPNOTSUPP; 5764 break; 5765 } 5766 5767 out: 5768 NFSEXITCODE(error); 5769 return (error); 5770 } 5771 static moduledata_t nfsd_mod = { 5772 "nfsd", 5773 nfsd_modevent, 5774 NULL, 5775 }; 5776 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 5777 5778 /* So that loader and kldload(2) can find us, wherever we are.. */ 5779 MODULE_VERSION(nfsd, 1); 5780 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 5781 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); 5782 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 5783 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 5784 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 5785 5786