1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/capsicum.h> 40 #include <sys/extattr.h> 41 42 /* 43 * Functions that perform the vfs operations required by the routines in 44 * nfsd_serv.c. It is hoped that this change will make the server more 45 * portable. 46 */ 47 48 #include <fs/nfs/nfsport.h> 49 #include <sys/hash.h> 50 #include <sys/sysctl.h> 51 #include <nlm/nlm_prot.h> 52 #include <nlm/nlm.h> 53 54 FEATURE(nfsd, "NFSv4 server"); 55 56 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 57 extern int nfsrv_useacl; 58 extern int newnfs_numnfsd; 59 extern struct mount nfsv4root_mnt; 60 extern struct nfsrv_stablefirst nfsrv_stablefirst; 61 extern void (*nfsd_call_servertimer)(void); 62 extern SVCPOOL *nfsrvd_pool; 63 extern struct nfsv4lock nfsd_suspend_lock; 64 extern struct nfsclienthashhead *nfsclienthash; 65 extern struct nfslockhashhead *nfslockhash; 66 extern struct nfssessionhash *nfssessionhash; 67 extern int nfsrv_sessionhashsize; 68 extern struct nfsstatsv1 nfsstatsv1; 69 extern struct nfslayouthash *nfslayouthash; 70 extern int nfsrv_layouthashsize; 71 extern struct mtx nfsrv_dslock_mtx; 72 extern int nfs_pnfsiothreads; 73 extern struct nfsdontlisthead nfsrv_dontlisthead; 74 extern volatile int nfsrv_dontlistlen; 75 extern volatile int nfsrv_devidcnt; 76 extern int nfsrv_maxpnfsmirror; 77 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 78 NFSDLOCKMUTEX; 79 NFSSTATESPINLOCK; 80 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 81 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 82 struct mtx nfsrc_udpmtx; 83 struct mtx nfs_v4root_mutex; 84 struct mtx nfsrv_dontlistlock_mtx; 85 struct mtx nfsrv_recalllock_mtx; 86 struct nfsrvfh nfs_rootfh, nfs_pubfh; 87 int nfs_pubfhset = 0, nfs_rootfhset = 0; 88 struct proc *nfsd_master_proc = NULL; 89 int nfsd_debuglevel = 0; 90 static pid_t nfsd_master_pid = (pid_t)-1; 91 static char nfsd_master_comm[MAXCOMLEN + 1]; 92 static struct timeval nfsd_master_start; 93 static uint32_t nfsv4_sysid = 0; 94 static fhandle_t zerofh; 95 96 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 97 struct ucred *); 98 99 int nfsrv_enable_crossmntpt = 1; 100 static int nfs_commit_blks; 101 static int nfs_commit_miss; 102 extern int nfsrv_issuedelegs; 103 extern int nfsrv_dolocallocks; 104 extern int nfsd_enable_stringtouid; 105 extern struct nfsdevicehead nfsrv_devidhead; 106 107 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 108 NFSPROC_T *); 109 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 110 int *, char *, fhandle_t *); 111 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 112 NFSPROC_T *); 113 static int nfsrv_proxyds(struct nfsrv_descript *, struct vnode *, off_t, int, 114 struct ucred *, struct thread *, int, struct mbuf **, char *, 115 struct mbuf **, struct nfsvattr *, struct acl *); 116 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 117 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 118 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 119 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 120 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 121 char *, int *); 122 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 123 struct vnode *, struct nfsmount **, int, struct acl *, int *); 124 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 125 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 126 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 127 struct vnode *, struct nfsmount *, struct nfsvattr *); 128 static int nfsrv_putfhname(fhandle_t *, char *); 129 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 130 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 131 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, 132 struct vnode *, NFSPROC_T *); 133 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 134 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 135 NFSPROC_T *); 136 static int nfsrv_pnfsstatfs(struct statfs *); 137 138 int nfs_pnfsio(task_fn_t *, void *); 139 140 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); 141 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 142 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 143 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 144 0, ""); 145 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 146 0, ""); 147 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 148 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 149 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 150 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 151 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 152 0, "Debug level for NFS server"); 153 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 154 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 155 static int nfsrv_pnfsgetdsattr = 1; 156 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 157 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 158 159 /* 160 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 161 * not running. 162 * The dsN subdirectories for the increased values must have been created 163 * on all DS servers before this increase is done. 164 */ 165 u_int nfsrv_dsdirsize = 20; 166 static int 167 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 168 { 169 int error, newdsdirsize; 170 171 newdsdirsize = nfsrv_dsdirsize; 172 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 173 if (error != 0 || req->newptr == NULL) 174 return (error); 175 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 176 newnfs_numnfsd != 0) 177 return (EINVAL); 178 nfsrv_dsdirsize = newdsdirsize; 179 return (0); 180 } 181 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_RW, 0, 182 sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", 183 "Number of dsN subdirs on the DS servers"); 184 185 #define MAX_REORDERED_RPC 16 186 #define NUM_HEURISTIC 1031 187 #define NHUSE_INIT 64 188 #define NHUSE_INC 16 189 #define NHUSE_MAX 2048 190 191 static struct nfsheur { 192 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 193 off_t nh_nextoff; /* next offset for sequential detection */ 194 int nh_use; /* use count for selection */ 195 int nh_seqcount; /* heuristic */ 196 } nfsheur[NUM_HEURISTIC]; 197 198 199 /* 200 * Heuristic to detect sequential operation. 201 */ 202 static struct nfsheur * 203 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 204 { 205 struct nfsheur *nh; 206 int hi, try; 207 208 /* Locate best candidate. */ 209 try = 32; 210 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 211 nh = &nfsheur[hi]; 212 while (try--) { 213 if (nfsheur[hi].nh_vp == vp) { 214 nh = &nfsheur[hi]; 215 break; 216 } 217 if (nfsheur[hi].nh_use > 0) 218 --nfsheur[hi].nh_use; 219 hi = (hi + 1) % NUM_HEURISTIC; 220 if (nfsheur[hi].nh_use < nh->nh_use) 221 nh = &nfsheur[hi]; 222 } 223 224 /* Initialize hint if this is a new file. */ 225 if (nh->nh_vp != vp) { 226 nh->nh_vp = vp; 227 nh->nh_nextoff = uio->uio_offset; 228 nh->nh_use = NHUSE_INIT; 229 if (uio->uio_offset == 0) 230 nh->nh_seqcount = 4; 231 else 232 nh->nh_seqcount = 1; 233 } 234 235 /* Calculate heuristic. */ 236 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 237 uio->uio_offset == nh->nh_nextoff) { 238 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 239 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 240 if (nh->nh_seqcount > IO_SEQMAX) 241 nh->nh_seqcount = IO_SEQMAX; 242 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 243 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 244 /* Probably a reordered RPC, leave seqcount alone. */ 245 } else if (nh->nh_seqcount > 1) { 246 nh->nh_seqcount /= 2; 247 } else { 248 nh->nh_seqcount = 0; 249 } 250 nh->nh_use += NHUSE_INC; 251 if (nh->nh_use > NHUSE_MAX) 252 nh->nh_use = NHUSE_MAX; 253 return (nh); 254 } 255 256 /* 257 * Get attributes into nfsvattr structure. 258 */ 259 int 260 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 261 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 262 nfsattrbit_t *attrbitp) 263 { 264 int error, gotattr, lockedit = 0; 265 struct nfsvattr na; 266 267 if (vpislocked == 0) { 268 /* 269 * When vpislocked == 0, the vnode is either exclusively 270 * locked by this thread or not locked by this thread. 271 * As such, shared lock it, if not exclusively locked. 272 */ 273 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 274 lockedit = 1; 275 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 276 } 277 } 278 279 /* 280 * Acquire the Change, Size and TimeModify attributes, as required. 281 * This needs to be done for regular files if: 282 * - non-NFSv4 RPCs or 283 * - when attrbitp == NULL or 284 * - an NFSv4 RPC with any of the above attributes in attrbitp. 285 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 286 * these attributes. nfsrv_proxyds() will return an error if the 287 * server is not a pNFS one. 288 */ 289 gotattr = 0; 290 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 291 (nd->nd_flag & ND_NFSV4) == 0 || 292 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 293 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 294 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 295 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY))) { 296 error = nfsrv_proxyds(nd, vp, 0, 0, nd->nd_cred, p, 297 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); 298 if (error == 0) 299 gotattr = 1; 300 } 301 302 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 303 if (lockedit != 0) 304 NFSVOPUNLOCK(vp, 0); 305 306 /* 307 * If we got the Change, Size and Modify Time from the DS, 308 * replace them. 309 */ 310 if (gotattr != 0) { 311 nvap->na_atime = na.na_atime; 312 nvap->na_mtime = na.na_mtime; 313 nvap->na_filerev = na.na_filerev; 314 nvap->na_size = na.na_size; 315 } 316 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 317 error, (uintmax_t)na.na_filerev); 318 319 NFSEXITCODE(error); 320 return (error); 321 } 322 323 /* 324 * Get a file handle for a vnode. 325 */ 326 int 327 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 328 { 329 int error; 330 331 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 332 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 333 error = VOP_VPTOFH(vp, &fhp->fh_fid); 334 335 NFSEXITCODE(error); 336 return (error); 337 } 338 339 /* 340 * Perform access checking for vnodes obtained from file handles that would 341 * refer to files already opened by a Unix client. You cannot just use 342 * vn_writechk() and VOP_ACCESSX() for two reasons. 343 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 344 * case. 345 * 2 - The owner is to be given access irrespective of mode bits for some 346 * operations, so that processes that chmod after opening a file don't 347 * break. 348 */ 349 int 350 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 351 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 352 u_int32_t *supportedtypep) 353 { 354 struct vattr vattr; 355 int error = 0, getret = 0; 356 357 if (vpislocked == 0) { 358 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 359 error = EPERM; 360 goto out; 361 } 362 } 363 if (accmode & VWRITE) { 364 /* Just vn_writechk() changed to check rdonly */ 365 /* 366 * Disallow write attempts on read-only file systems; 367 * unless the file is a socket or a block or character 368 * device resident on the file system. 369 */ 370 if (NFSVNO_EXRDONLY(exp) || 371 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 372 switch (vp->v_type) { 373 case VREG: 374 case VDIR: 375 case VLNK: 376 error = EROFS; 377 default: 378 break; 379 } 380 } 381 /* 382 * If there's shared text associated with 383 * the inode, try to free it up once. If 384 * we fail, we can't allow writing. 385 */ 386 if (VOP_IS_TEXT(vp) && error == 0) 387 error = ETXTBSY; 388 } 389 if (error != 0) { 390 if (vpislocked == 0) 391 NFSVOPUNLOCK(vp, 0); 392 goto out; 393 } 394 395 /* 396 * Should the override still be applied when ACLs are enabled? 397 */ 398 error = VOP_ACCESSX(vp, accmode, cred, p); 399 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 400 /* 401 * Try again with VEXPLICIT_DENY, to see if the test for 402 * deletion is supported. 403 */ 404 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 405 if (error == 0) { 406 if (vp->v_type == VDIR) { 407 accmode &= ~(VDELETE | VDELETE_CHILD); 408 accmode |= VWRITE; 409 error = VOP_ACCESSX(vp, accmode, cred, p); 410 } else if (supportedtypep != NULL) { 411 *supportedtypep &= ~NFSACCESS_DELETE; 412 } 413 } 414 } 415 416 /* 417 * Allow certain operations for the owner (reads and writes 418 * on files that are already open). 419 */ 420 if (override != NFSACCCHK_NOOVERRIDE && 421 (error == EPERM || error == EACCES)) { 422 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 423 error = 0; 424 else if (override & NFSACCCHK_ALLOWOWNER) { 425 getret = VOP_GETATTR(vp, &vattr, cred); 426 if (getret == 0 && cred->cr_uid == vattr.va_uid) 427 error = 0; 428 } 429 } 430 if (vpislocked == 0) 431 NFSVOPUNLOCK(vp, 0); 432 433 out: 434 NFSEXITCODE(error); 435 return (error); 436 } 437 438 /* 439 * Set attribute(s) vnop. 440 */ 441 int 442 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 443 struct thread *p, struct nfsexstuff *exp) 444 { 445 int error; 446 447 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 448 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 449 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 450 nvap->na_vattr.va_size != VNOVAL || 451 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 452 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 453 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 454 /* For a pNFS server, set the attributes on the DS file. */ 455 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETATTR, 456 NULL, NULL, NULL, nvap, NULL); 457 if (error == ENOENT) 458 error = 0; 459 } 460 NFSEXITCODE(error); 461 return (error); 462 } 463 464 /* 465 * Set up nameidata for a lookup() call and do it. 466 */ 467 int 468 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 469 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 470 struct vnode **retdirp) 471 { 472 struct componentname *cnp = &ndp->ni_cnd; 473 int i; 474 struct iovec aiov; 475 struct uio auio; 476 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 477 int error = 0; 478 char *cp; 479 480 *retdirp = NULL; 481 cnp->cn_nameptr = cnp->cn_pnbuf; 482 ndp->ni_lcf = 0; 483 /* 484 * Extract and set starting directory. 485 */ 486 if (dp->v_type != VDIR) { 487 if (islocked) 488 vput(dp); 489 else 490 vrele(dp); 491 nfsvno_relpathbuf(ndp); 492 error = ENOTDIR; 493 goto out1; 494 } 495 if (islocked) 496 NFSVOPUNLOCK(dp, 0); 497 VREF(dp); 498 *retdirp = dp; 499 if (NFSVNO_EXRDONLY(exp)) 500 cnp->cn_flags |= RDONLY; 501 ndp->ni_segflg = UIO_SYSSPACE; 502 503 if (nd->nd_flag & ND_PUBLOOKUP) { 504 ndp->ni_loopcnt = 0; 505 if (cnp->cn_pnbuf[0] == '/') { 506 vrele(dp); 507 /* 508 * Check for degenerate pathnames here, since lookup() 509 * panics on them. 510 */ 511 for (i = 1; i < ndp->ni_pathlen; i++) 512 if (cnp->cn_pnbuf[i] != '/') 513 break; 514 if (i == ndp->ni_pathlen) { 515 error = NFSERR_ACCES; 516 goto out; 517 } 518 dp = rootvnode; 519 VREF(dp); 520 } 521 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 522 (nd->nd_flag & ND_NFSV4) == 0) { 523 /* 524 * Only cross mount points for NFSv4 when doing a 525 * mount while traversing the file system above 526 * the mount point, unless nfsrv_enable_crossmntpt is set. 527 */ 528 cnp->cn_flags |= NOCROSSMOUNT; 529 } 530 531 /* 532 * Initialize for scan, set ni_startdir and bump ref on dp again 533 * because lookup() will dereference ni_startdir. 534 */ 535 536 cnp->cn_thread = p; 537 ndp->ni_startdir = dp; 538 ndp->ni_rootdir = rootvnode; 539 ndp->ni_topdir = NULL; 540 541 if (!lockleaf) 542 cnp->cn_flags |= LOCKLEAF; 543 for (;;) { 544 cnp->cn_nameptr = cnp->cn_pnbuf; 545 /* 546 * Call lookup() to do the real work. If an error occurs, 547 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 548 * we do not have to dereference anything before returning. 549 * In either case ni_startdir will be dereferenced and NULLed 550 * out. 551 */ 552 error = lookup(ndp); 553 if (error) 554 break; 555 556 /* 557 * Check for encountering a symbolic link. Trivial 558 * termination occurs if no symlink encountered. 559 */ 560 if ((cnp->cn_flags & ISSYMLINK) == 0) { 561 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 562 nfsvno_relpathbuf(ndp); 563 if (ndp->ni_vp && !lockleaf) 564 NFSVOPUNLOCK(ndp->ni_vp, 0); 565 break; 566 } 567 568 /* 569 * Validate symlink 570 */ 571 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 572 NFSVOPUNLOCK(ndp->ni_dvp, 0); 573 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 574 error = EINVAL; 575 goto badlink2; 576 } 577 578 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 579 error = ELOOP; 580 goto badlink2; 581 } 582 if (ndp->ni_pathlen > 1) 583 cp = uma_zalloc(namei_zone, M_WAITOK); 584 else 585 cp = cnp->cn_pnbuf; 586 aiov.iov_base = cp; 587 aiov.iov_len = MAXPATHLEN; 588 auio.uio_iov = &aiov; 589 auio.uio_iovcnt = 1; 590 auio.uio_offset = 0; 591 auio.uio_rw = UIO_READ; 592 auio.uio_segflg = UIO_SYSSPACE; 593 auio.uio_td = NULL; 594 auio.uio_resid = MAXPATHLEN; 595 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 596 if (error) { 597 badlink1: 598 if (ndp->ni_pathlen > 1) 599 uma_zfree(namei_zone, cp); 600 badlink2: 601 vrele(ndp->ni_dvp); 602 vput(ndp->ni_vp); 603 break; 604 } 605 linklen = MAXPATHLEN - auio.uio_resid; 606 if (linklen == 0) { 607 error = ENOENT; 608 goto badlink1; 609 } 610 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 611 error = ENAMETOOLONG; 612 goto badlink1; 613 } 614 615 /* 616 * Adjust or replace path 617 */ 618 if (ndp->ni_pathlen > 1) { 619 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 620 uma_zfree(namei_zone, cnp->cn_pnbuf); 621 cnp->cn_pnbuf = cp; 622 } else 623 cnp->cn_pnbuf[linklen] = '\0'; 624 ndp->ni_pathlen += linklen; 625 626 /* 627 * Cleanup refs for next loop and check if root directory 628 * should replace current directory. Normally ni_dvp 629 * becomes the new base directory and is cleaned up when 630 * we loop. Explicitly null pointers after invalidation 631 * to clarify operation. 632 */ 633 vput(ndp->ni_vp); 634 ndp->ni_vp = NULL; 635 636 if (cnp->cn_pnbuf[0] == '/') { 637 vrele(ndp->ni_dvp); 638 ndp->ni_dvp = ndp->ni_rootdir; 639 VREF(ndp->ni_dvp); 640 } 641 ndp->ni_startdir = ndp->ni_dvp; 642 ndp->ni_dvp = NULL; 643 } 644 if (!lockleaf) 645 cnp->cn_flags &= ~LOCKLEAF; 646 647 out: 648 if (error) { 649 nfsvno_relpathbuf(ndp); 650 ndp->ni_vp = NULL; 651 ndp->ni_dvp = NULL; 652 ndp->ni_startdir = NULL; 653 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 654 ndp->ni_dvp = NULL; 655 } 656 657 out1: 658 NFSEXITCODE2(error, nd); 659 return (error); 660 } 661 662 /* 663 * Set up a pathname buffer and return a pointer to it and, optionally 664 * set a hash pointer. 665 */ 666 void 667 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 668 { 669 struct componentname *cnp = &ndp->ni_cnd; 670 671 cnp->cn_flags |= (NOMACCHECK | HASBUF); 672 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 673 if (hashpp != NULL) 674 *hashpp = NULL; 675 *bufpp = cnp->cn_pnbuf; 676 } 677 678 /* 679 * Release the above path buffer, if not released by nfsvno_namei(). 680 */ 681 void 682 nfsvno_relpathbuf(struct nameidata *ndp) 683 { 684 685 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 686 panic("nfsrelpath"); 687 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 688 ndp->ni_cnd.cn_flags &= ~HASBUF; 689 } 690 691 /* 692 * Readlink vnode op into an mbuf list. 693 */ 694 int 695 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, 696 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 697 { 698 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; 699 struct iovec *ivp = iv; 700 struct uio io, *uiop = &io; 701 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; 702 int i, len, tlen, error = 0; 703 704 len = 0; 705 i = 0; 706 while (len < NFS_MAXPATHLEN) { 707 NFSMGET(mp); 708 MCLGET(mp, M_WAITOK); 709 mp->m_len = M_SIZE(mp); 710 if (len == 0) { 711 mp3 = mp2 = mp; 712 } else { 713 mp2->m_next = mp; 714 mp2 = mp; 715 } 716 if ((len + mp->m_len) > NFS_MAXPATHLEN) { 717 mp->m_len = NFS_MAXPATHLEN - len; 718 len = NFS_MAXPATHLEN; 719 } else { 720 len += mp->m_len; 721 } 722 ivp->iov_base = mtod(mp, caddr_t); 723 ivp->iov_len = mp->m_len; 724 i++; 725 ivp++; 726 } 727 uiop->uio_iov = iv; 728 uiop->uio_iovcnt = i; 729 uiop->uio_offset = 0; 730 uiop->uio_resid = len; 731 uiop->uio_rw = UIO_READ; 732 uiop->uio_segflg = UIO_SYSSPACE; 733 uiop->uio_td = NULL; 734 error = VOP_READLINK(vp, uiop, cred); 735 if (error) { 736 m_freem(mp3); 737 *lenp = 0; 738 goto out; 739 } 740 if (uiop->uio_resid > 0) { 741 len -= uiop->uio_resid; 742 tlen = NFSM_RNDUP(len); 743 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); 744 } 745 *lenp = len; 746 *mpp = mp3; 747 *mpendp = mp; 748 749 out: 750 NFSEXITCODE(error); 751 return (error); 752 } 753 754 /* 755 * Read vnode op call into mbuf list. 756 */ 757 int 758 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 759 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) 760 { 761 struct mbuf *m; 762 int i; 763 struct iovec *iv; 764 struct iovec *iv2; 765 int error = 0, len, left, siz, tlen, ioflag = 0; 766 struct mbuf *m2 = NULL, *m3; 767 struct uio io, *uiop = &io; 768 struct nfsheur *nh; 769 770 /* 771 * Attempt to read from a DS file. A return of ENOENT implies 772 * there is no DS file to read. 773 */ 774 error = nfsrv_proxyds(NULL, vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 775 NULL, mpendp, NULL, NULL); 776 if (error != ENOENT) 777 return (error); 778 779 len = left = NFSM_RNDUP(cnt); 780 m3 = NULL; 781 /* 782 * Generate the mbuf list with the uio_iov ref. to it. 783 */ 784 i = 0; 785 while (left > 0) { 786 NFSMGET(m); 787 MCLGET(m, M_WAITOK); 788 m->m_len = 0; 789 siz = min(M_TRAILINGSPACE(m), left); 790 left -= siz; 791 i++; 792 if (m3) 793 m2->m_next = m; 794 else 795 m3 = m; 796 m2 = m; 797 } 798 iv = malloc(i * sizeof (struct iovec), 799 M_TEMP, M_WAITOK); 800 uiop->uio_iov = iv2 = iv; 801 m = m3; 802 left = len; 803 i = 0; 804 while (left > 0) { 805 if (m == NULL) 806 panic("nfsvno_read iov"); 807 siz = min(M_TRAILINGSPACE(m), left); 808 if (siz > 0) { 809 iv->iov_base = mtod(m, caddr_t) + m->m_len; 810 iv->iov_len = siz; 811 m->m_len += siz; 812 left -= siz; 813 iv++; 814 i++; 815 } 816 m = m->m_next; 817 } 818 uiop->uio_iovcnt = i; 819 uiop->uio_offset = off; 820 uiop->uio_resid = len; 821 uiop->uio_rw = UIO_READ; 822 uiop->uio_segflg = UIO_SYSSPACE; 823 uiop->uio_td = NULL; 824 nh = nfsrv_sequential_heuristic(uiop, vp); 825 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 826 /* XXX KDM make this more systematic? */ 827 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 828 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 829 free(iv2, M_TEMP); 830 if (error) { 831 m_freem(m3); 832 *mpp = NULL; 833 goto out; 834 } 835 nh->nh_nextoff = uiop->uio_offset; 836 tlen = len - uiop->uio_resid; 837 cnt = cnt < tlen ? cnt : tlen; 838 tlen = NFSM_RNDUP(cnt); 839 if (tlen == 0) { 840 m_freem(m3); 841 m3 = NULL; 842 } else if (len != tlen || tlen != cnt) 843 nfsrv_adj(m3, len - tlen, tlen - cnt); 844 *mpp = m3; 845 *mpendp = m2; 846 847 out: 848 NFSEXITCODE(error); 849 return (error); 850 } 851 852 /* 853 * Write vnode op from an mbuf list. 854 */ 855 int 856 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, 857 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 858 { 859 struct iovec *ivp; 860 int i, len; 861 struct iovec *iv; 862 int ioflags, error; 863 struct uio io, *uiop = &io; 864 struct nfsheur *nh; 865 866 /* 867 * Attempt to write to a DS file. A return of ENOENT implies 868 * there is no DS file to write. 869 */ 870 error = nfsrv_proxyds(NULL, vp, off, retlen, cred, p, NFSPROC_WRITEDS, 871 &mp, cp, NULL, NULL, NULL); 872 if (error != ENOENT) { 873 *stable = NFSWRITE_FILESYNC; 874 return (error); 875 } 876 877 ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 878 M_WAITOK); 879 uiop->uio_iov = iv = ivp; 880 uiop->uio_iovcnt = cnt; 881 i = mtod(mp, caddr_t) + mp->m_len - cp; 882 len = retlen; 883 while (len > 0) { 884 if (mp == NULL) 885 panic("nfsvno_write"); 886 if (i > 0) { 887 i = min(i, len); 888 ivp->iov_base = cp; 889 ivp->iov_len = i; 890 ivp++; 891 len -= i; 892 } 893 mp = mp->m_next; 894 if (mp) { 895 i = mp->m_len; 896 cp = mtod(mp, caddr_t); 897 } 898 } 899 900 if (*stable == NFSWRITE_UNSTABLE) 901 ioflags = IO_NODELOCKED; 902 else 903 ioflags = (IO_SYNC | IO_NODELOCKED); 904 uiop->uio_resid = retlen; 905 uiop->uio_rw = UIO_WRITE; 906 uiop->uio_segflg = UIO_SYSSPACE; 907 NFSUIOPROC(uiop, p); 908 uiop->uio_offset = off; 909 nh = nfsrv_sequential_heuristic(uiop, vp); 910 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 911 /* XXX KDM make this more systematic? */ 912 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 913 error = VOP_WRITE(vp, uiop, ioflags, cred); 914 if (error == 0) 915 nh->nh_nextoff = uiop->uio_offset; 916 free(iv, M_TEMP); 917 918 NFSEXITCODE(error); 919 return (error); 920 } 921 922 /* 923 * Common code for creating a regular file (plus special files for V2). 924 */ 925 int 926 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 927 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 928 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) 929 { 930 u_quad_t tempsize; 931 int error; 932 933 error = nd->nd_repstat; 934 if (!error && ndp->ni_vp == NULL) { 935 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 936 vrele(ndp->ni_startdir); 937 error = VOP_CREATE(ndp->ni_dvp, 938 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 939 /* For a pNFS server, create the data file on a DS. */ 940 if (error == 0 && nvap->na_type == VREG) { 941 /* 942 * Create a data file on a DS for a pNFS server. 943 * This function just returns if not 944 * running a pNFS DS or the creation fails. 945 */ 946 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 947 nd->nd_cred, p); 948 } 949 vput(ndp->ni_dvp); 950 nfsvno_relpathbuf(ndp); 951 if (!error) { 952 if (*exclusive_flagp) { 953 *exclusive_flagp = 0; 954 NFSVNO_ATTRINIT(nvap); 955 nvap->na_atime.tv_sec = cverf[0]; 956 nvap->na_atime.tv_nsec = cverf[1]; 957 error = VOP_SETATTR(ndp->ni_vp, 958 &nvap->na_vattr, nd->nd_cred); 959 if (error != 0) { 960 vput(ndp->ni_vp); 961 ndp->ni_vp = NULL; 962 error = NFSERR_NOTSUPP; 963 } 964 } 965 } 966 /* 967 * NFS V2 Only. nfsrvd_mknod() does this for V3. 968 * (This implies, just get out on an error.) 969 */ 970 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 971 nvap->na_type == VFIFO) { 972 if (nvap->na_type == VCHR && rdev == 0xffffffff) 973 nvap->na_type = VFIFO; 974 if (nvap->na_type != VFIFO && 975 (error = priv_check_cred(nd->nd_cred, 976 PRIV_VFS_MKNOD_DEV, 0))) { 977 vrele(ndp->ni_startdir); 978 nfsvno_relpathbuf(ndp); 979 vput(ndp->ni_dvp); 980 goto out; 981 } 982 nvap->na_rdev = rdev; 983 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 984 &ndp->ni_cnd, &nvap->na_vattr); 985 vput(ndp->ni_dvp); 986 nfsvno_relpathbuf(ndp); 987 vrele(ndp->ni_startdir); 988 if (error) 989 goto out; 990 } else { 991 vrele(ndp->ni_startdir); 992 nfsvno_relpathbuf(ndp); 993 vput(ndp->ni_dvp); 994 error = ENXIO; 995 goto out; 996 } 997 *vpp = ndp->ni_vp; 998 } else { 999 /* 1000 * Handle cases where error is already set and/or 1001 * the file exists. 1002 * 1 - clean up the lookup 1003 * 2 - iff !error and na_size set, truncate it 1004 */ 1005 vrele(ndp->ni_startdir); 1006 nfsvno_relpathbuf(ndp); 1007 *vpp = ndp->ni_vp; 1008 if (ndp->ni_dvp == *vpp) 1009 vrele(ndp->ni_dvp); 1010 else 1011 vput(ndp->ni_dvp); 1012 if (!error && nvap->na_size != VNOVAL) { 1013 error = nfsvno_accchk(*vpp, VWRITE, 1014 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1015 NFSACCCHK_VPISLOCKED, NULL); 1016 if (!error) { 1017 tempsize = nvap->na_size; 1018 NFSVNO_ATTRINIT(nvap); 1019 nvap->na_size = tempsize; 1020 error = VOP_SETATTR(*vpp, 1021 &nvap->na_vattr, nd->nd_cred); 1022 } 1023 } 1024 if (error) 1025 vput(*vpp); 1026 } 1027 1028 out: 1029 NFSEXITCODE(error); 1030 return (error); 1031 } 1032 1033 /* 1034 * Do a mknod vnode op. 1035 */ 1036 int 1037 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1038 struct thread *p) 1039 { 1040 int error = 0; 1041 enum vtype vtyp; 1042 1043 vtyp = nvap->na_type; 1044 /* 1045 * Iff doesn't exist, create it. 1046 */ 1047 if (ndp->ni_vp) { 1048 vrele(ndp->ni_startdir); 1049 nfsvno_relpathbuf(ndp); 1050 vput(ndp->ni_dvp); 1051 vrele(ndp->ni_vp); 1052 error = EEXIST; 1053 goto out; 1054 } 1055 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1056 vrele(ndp->ni_startdir); 1057 nfsvno_relpathbuf(ndp); 1058 vput(ndp->ni_dvp); 1059 error = NFSERR_BADTYPE; 1060 goto out; 1061 } 1062 if (vtyp == VSOCK) { 1063 vrele(ndp->ni_startdir); 1064 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1065 &ndp->ni_cnd, &nvap->na_vattr); 1066 vput(ndp->ni_dvp); 1067 nfsvno_relpathbuf(ndp); 1068 } else { 1069 if (nvap->na_type != VFIFO && 1070 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { 1071 vrele(ndp->ni_startdir); 1072 nfsvno_relpathbuf(ndp); 1073 vput(ndp->ni_dvp); 1074 goto out; 1075 } 1076 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1077 &ndp->ni_cnd, &nvap->na_vattr); 1078 vput(ndp->ni_dvp); 1079 nfsvno_relpathbuf(ndp); 1080 vrele(ndp->ni_startdir); 1081 /* 1082 * Since VOP_MKNOD returns the ni_vp, I can't 1083 * see any reason to do the lookup. 1084 */ 1085 } 1086 1087 out: 1088 NFSEXITCODE(error); 1089 return (error); 1090 } 1091 1092 /* 1093 * Mkdir vnode op. 1094 */ 1095 int 1096 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1097 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1098 { 1099 int error = 0; 1100 1101 if (ndp->ni_vp != NULL) { 1102 if (ndp->ni_dvp == ndp->ni_vp) 1103 vrele(ndp->ni_dvp); 1104 else 1105 vput(ndp->ni_dvp); 1106 vrele(ndp->ni_vp); 1107 nfsvno_relpathbuf(ndp); 1108 error = EEXIST; 1109 goto out; 1110 } 1111 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1112 &nvap->na_vattr); 1113 vput(ndp->ni_dvp); 1114 nfsvno_relpathbuf(ndp); 1115 1116 out: 1117 NFSEXITCODE(error); 1118 return (error); 1119 } 1120 1121 /* 1122 * symlink vnode op. 1123 */ 1124 int 1125 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1126 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1127 struct nfsexstuff *exp) 1128 { 1129 int error = 0; 1130 1131 if (ndp->ni_vp) { 1132 vrele(ndp->ni_startdir); 1133 nfsvno_relpathbuf(ndp); 1134 if (ndp->ni_dvp == ndp->ni_vp) 1135 vrele(ndp->ni_dvp); 1136 else 1137 vput(ndp->ni_dvp); 1138 vrele(ndp->ni_vp); 1139 error = EEXIST; 1140 goto out; 1141 } 1142 1143 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1144 &nvap->na_vattr, pathcp); 1145 vput(ndp->ni_dvp); 1146 vrele(ndp->ni_startdir); 1147 nfsvno_relpathbuf(ndp); 1148 /* 1149 * Although FreeBSD still had the lookup code in 1150 * it for 7/current, there doesn't seem to be any 1151 * point, since VOP_SYMLINK() returns the ni_vp. 1152 * Just vput it for v2. 1153 */ 1154 if (!not_v2 && !error) 1155 vput(ndp->ni_vp); 1156 1157 out: 1158 NFSEXITCODE(error); 1159 return (error); 1160 } 1161 1162 /* 1163 * Parse symbolic link arguments. 1164 * This function has an ugly side effect. It will malloc() an area for 1165 * the symlink and set iov_base to point to it, only if it succeeds. 1166 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1167 * be FREE'd later. 1168 */ 1169 int 1170 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1171 struct thread *p, char **pathcpp, int *lenp) 1172 { 1173 u_int32_t *tl; 1174 char *pathcp = NULL; 1175 int error = 0, len; 1176 struct nfsv2_sattr *sp; 1177 1178 *pathcpp = NULL; 1179 *lenp = 0; 1180 if ((nd->nd_flag & ND_NFSV3) && 1181 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1182 goto nfsmout; 1183 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1184 len = fxdr_unsigned(int, *tl); 1185 if (len > NFS_MAXPATHLEN || len <= 0) { 1186 error = EBADRPC; 1187 goto nfsmout; 1188 } 1189 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1190 error = nfsrv_mtostr(nd, pathcp, len); 1191 if (error) 1192 goto nfsmout; 1193 if (nd->nd_flag & ND_NFSV2) { 1194 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1195 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1196 } 1197 *pathcpp = pathcp; 1198 *lenp = len; 1199 NFSEXITCODE2(0, nd); 1200 return (0); 1201 nfsmout: 1202 if (pathcp) 1203 free(pathcp, M_TEMP); 1204 NFSEXITCODE2(error, nd); 1205 return (error); 1206 } 1207 1208 /* 1209 * Remove a non-directory object. 1210 */ 1211 int 1212 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1213 struct thread *p, struct nfsexstuff *exp) 1214 { 1215 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1216 int error = 0, mirrorcnt; 1217 char fname[PNFS_FILENAME_LEN + 1]; 1218 fhandle_t fh; 1219 1220 vp = ndp->ni_vp; 1221 dsdvp[0] = NULL; 1222 if (vp->v_type == VDIR) 1223 error = NFSERR_ISDIR; 1224 else if (is_v4) 1225 error = nfsrv_checkremove(vp, 1, p); 1226 if (error == 0) 1227 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1228 if (!error) 1229 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1230 if (error == 0 && dsdvp[0] != NULL) 1231 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1232 if (ndp->ni_dvp == vp) 1233 vrele(ndp->ni_dvp); 1234 else 1235 vput(ndp->ni_dvp); 1236 vput(vp); 1237 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1238 nfsvno_relpathbuf(ndp); 1239 NFSEXITCODE(error); 1240 return (error); 1241 } 1242 1243 /* 1244 * Remove a directory. 1245 */ 1246 int 1247 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1248 struct thread *p, struct nfsexstuff *exp) 1249 { 1250 struct vnode *vp; 1251 int error = 0; 1252 1253 vp = ndp->ni_vp; 1254 if (vp->v_type != VDIR) { 1255 error = ENOTDIR; 1256 goto out; 1257 } 1258 /* 1259 * No rmdir "." please. 1260 */ 1261 if (ndp->ni_dvp == vp) { 1262 error = EINVAL; 1263 goto out; 1264 } 1265 /* 1266 * The root of a mounted filesystem cannot be deleted. 1267 */ 1268 if (vp->v_vflag & VV_ROOT) 1269 error = EBUSY; 1270 out: 1271 if (!error) 1272 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1273 if (ndp->ni_dvp == vp) 1274 vrele(ndp->ni_dvp); 1275 else 1276 vput(ndp->ni_dvp); 1277 vput(vp); 1278 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1279 nfsvno_relpathbuf(ndp); 1280 NFSEXITCODE(error); 1281 return (error); 1282 } 1283 1284 /* 1285 * Rename vnode op. 1286 */ 1287 int 1288 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1289 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1290 { 1291 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1292 int error = 0, mirrorcnt; 1293 char fname[PNFS_FILENAME_LEN + 1]; 1294 fhandle_t fh; 1295 1296 dsdvp[0] = NULL; 1297 fvp = fromndp->ni_vp; 1298 if (ndstat) { 1299 vrele(fromndp->ni_dvp); 1300 vrele(fvp); 1301 error = ndstat; 1302 goto out1; 1303 } 1304 tdvp = tondp->ni_dvp; 1305 tvp = tondp->ni_vp; 1306 if (tvp != NULL) { 1307 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1308 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1309 goto out; 1310 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1311 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1312 goto out; 1313 } 1314 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1315 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1316 goto out; 1317 } 1318 1319 /* 1320 * A rename to '.' or '..' results in a prematurely 1321 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1322 * here. 1323 */ 1324 if ((tondp->ni_cnd.cn_namelen == 1 && 1325 tondp->ni_cnd.cn_nameptr[0] == '.') || 1326 (tondp->ni_cnd.cn_namelen == 2 && 1327 tondp->ni_cnd.cn_nameptr[0] == '.' && 1328 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1329 error = EINVAL; 1330 goto out; 1331 } 1332 } 1333 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1334 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1335 goto out; 1336 } 1337 if (fvp->v_mount != tdvp->v_mount) { 1338 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1339 goto out; 1340 } 1341 if (fvp == tdvp) { 1342 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1343 goto out; 1344 } 1345 if (fvp == tvp) { 1346 /* 1347 * If source and destination are the same, there is nothing to 1348 * do. Set error to -1 to indicate this. 1349 */ 1350 error = -1; 1351 goto out; 1352 } 1353 if (ndflag & ND_NFSV4) { 1354 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1355 error = nfsrv_checkremove(fvp, 0, p); 1356 NFSVOPUNLOCK(fvp, 0); 1357 } else 1358 error = EPERM; 1359 if (tvp && !error) 1360 error = nfsrv_checkremove(tvp, 1, p); 1361 } else { 1362 /* 1363 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1364 * that the NFSv4 client won't be confused by the rename. 1365 * Since nfsd_recalldelegation() can only be called on an 1366 * unlocked vnode at this point and fvp is the file that will 1367 * still exist after the rename, just do fvp. 1368 */ 1369 nfsd_recalldelegation(fvp, p); 1370 } 1371 if (error == 0 && tvp != NULL) { 1372 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1373 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1374 " dsdvp=%p\n", dsdvp[0]); 1375 } 1376 out: 1377 if (!error) { 1378 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1379 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1380 &tondp->ni_cnd); 1381 } else { 1382 if (tdvp == tvp) 1383 vrele(tdvp); 1384 else 1385 vput(tdvp); 1386 if (tvp) 1387 vput(tvp); 1388 vrele(fromndp->ni_dvp); 1389 vrele(fvp); 1390 if (error == -1) 1391 error = 0; 1392 } 1393 1394 /* 1395 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1396 * if the rename succeeded, the DS file for the tvp needs to be 1397 * removed. 1398 */ 1399 if (error == 0 && dsdvp[0] != NULL) { 1400 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1401 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1402 } 1403 1404 vrele(tondp->ni_startdir); 1405 nfsvno_relpathbuf(tondp); 1406 out1: 1407 vrele(fromndp->ni_startdir); 1408 nfsvno_relpathbuf(fromndp); 1409 NFSEXITCODE(error); 1410 return (error); 1411 } 1412 1413 /* 1414 * Link vnode op. 1415 */ 1416 int 1417 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1418 struct thread *p, struct nfsexstuff *exp) 1419 { 1420 struct vnode *xp; 1421 int error = 0; 1422 1423 xp = ndp->ni_vp; 1424 if (xp != NULL) { 1425 error = EEXIST; 1426 } else { 1427 xp = ndp->ni_dvp; 1428 if (vp->v_mount != xp->v_mount) 1429 error = EXDEV; 1430 } 1431 if (!error) { 1432 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1433 if ((vp->v_iflag & VI_DOOMED) == 0) 1434 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1435 else 1436 error = EPERM; 1437 if (ndp->ni_dvp == vp) 1438 vrele(ndp->ni_dvp); 1439 else 1440 vput(ndp->ni_dvp); 1441 NFSVOPUNLOCK(vp, 0); 1442 } else { 1443 if (ndp->ni_dvp == ndp->ni_vp) 1444 vrele(ndp->ni_dvp); 1445 else 1446 vput(ndp->ni_dvp); 1447 if (ndp->ni_vp) 1448 vrele(ndp->ni_vp); 1449 } 1450 nfsvno_relpathbuf(ndp); 1451 NFSEXITCODE(error); 1452 return (error); 1453 } 1454 1455 /* 1456 * Do the fsync() appropriate for the commit. 1457 */ 1458 int 1459 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1460 struct thread *td) 1461 { 1462 int error = 0; 1463 1464 /* 1465 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1466 * file is done. At this time VOP_FSYNC does not accept offset and 1467 * byte count parameters so call VOP_FSYNC the whole file for now. 1468 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1469 * File systems that do not use the buffer cache (as indicated 1470 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1471 */ 1472 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1473 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1474 /* 1475 * Give up and do the whole thing 1476 */ 1477 if (vp->v_object && 1478 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1479 VM_OBJECT_WLOCK(vp->v_object); 1480 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1481 VM_OBJECT_WUNLOCK(vp->v_object); 1482 } 1483 error = VOP_FSYNC(vp, MNT_WAIT, td); 1484 } else { 1485 /* 1486 * Locate and synchronously write any buffers that fall 1487 * into the requested range. Note: we are assuming that 1488 * f_iosize is a power of 2. 1489 */ 1490 int iosize = vp->v_mount->mnt_stat.f_iosize; 1491 int iomask = iosize - 1; 1492 struct bufobj *bo; 1493 daddr_t lblkno; 1494 1495 /* 1496 * Align to iosize boundary, super-align to page boundary. 1497 */ 1498 if (off & iomask) { 1499 cnt += off & iomask; 1500 off &= ~(u_quad_t)iomask; 1501 } 1502 if (off & PAGE_MASK) { 1503 cnt += off & PAGE_MASK; 1504 off &= ~(u_quad_t)PAGE_MASK; 1505 } 1506 lblkno = off / iosize; 1507 1508 if (vp->v_object && 1509 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1510 VM_OBJECT_WLOCK(vp->v_object); 1511 vm_object_page_clean(vp->v_object, off, off + cnt, 1512 OBJPC_SYNC); 1513 VM_OBJECT_WUNLOCK(vp->v_object); 1514 } 1515 1516 bo = &vp->v_bufobj; 1517 BO_LOCK(bo); 1518 while (cnt > 0) { 1519 struct buf *bp; 1520 1521 /* 1522 * If we have a buffer and it is marked B_DELWRI we 1523 * have to lock and write it. Otherwise the prior 1524 * write is assumed to have already been committed. 1525 * 1526 * gbincore() can return invalid buffers now so we 1527 * have to check that bit as well (though B_DELWRI 1528 * should not be set if B_INVAL is set there could be 1529 * a race here since we haven't locked the buffer). 1530 */ 1531 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1532 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1533 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1534 BO_LOCK(bo); 1535 continue; /* retry */ 1536 } 1537 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1538 B_DELWRI) { 1539 bremfree(bp); 1540 bp->b_flags &= ~B_ASYNC; 1541 bwrite(bp); 1542 ++nfs_commit_miss; 1543 } else 1544 BUF_UNLOCK(bp); 1545 BO_LOCK(bo); 1546 } 1547 ++nfs_commit_blks; 1548 if (cnt < iosize) 1549 break; 1550 cnt -= iosize; 1551 ++lblkno; 1552 } 1553 BO_UNLOCK(bo); 1554 } 1555 NFSEXITCODE(error); 1556 return (error); 1557 } 1558 1559 /* 1560 * Statfs vnode op. 1561 */ 1562 int 1563 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1564 { 1565 struct statfs *tsf; 1566 int error; 1567 1568 tsf = NULL; 1569 if (nfsrv_devidcnt > 0) { 1570 /* For a pNFS service, get the DS numbers. */ 1571 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1572 error = nfsrv_pnfsstatfs(tsf); 1573 if (error != 0) { 1574 free(tsf, M_TEMP); 1575 tsf = NULL; 1576 } 1577 } 1578 error = VFS_STATFS(vp->v_mount, sf); 1579 if (error == 0) { 1580 if (tsf != NULL) { 1581 sf->f_blocks = tsf->f_blocks; 1582 sf->f_bavail = tsf->f_bavail; 1583 sf->f_bfree = tsf->f_bfree; 1584 sf->f_bsize = tsf->f_bsize; 1585 } 1586 /* 1587 * Since NFS handles these values as unsigned on the 1588 * wire, there is no way to represent negative values, 1589 * so set them to 0. Without this, they will appear 1590 * to be very large positive values for clients like 1591 * Solaris10. 1592 */ 1593 if (sf->f_bavail < 0) 1594 sf->f_bavail = 0; 1595 if (sf->f_ffree < 0) 1596 sf->f_ffree = 0; 1597 } 1598 free(tsf, M_TEMP); 1599 NFSEXITCODE(error); 1600 return (error); 1601 } 1602 1603 /* 1604 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1605 * must handle nfsrv_opencheck() calls after any other access checks. 1606 */ 1607 void 1608 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1609 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1610 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1611 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, 1612 struct nfsexstuff *exp, struct vnode **vpp) 1613 { 1614 struct vnode *vp = NULL; 1615 u_quad_t tempsize; 1616 struct nfsexstuff nes; 1617 1618 if (ndp->ni_vp == NULL) 1619 nd->nd_repstat = nfsrv_opencheck(clientid, 1620 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1621 if (!nd->nd_repstat) { 1622 if (ndp->ni_vp == NULL) { 1623 vrele(ndp->ni_startdir); 1624 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1625 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1626 /* For a pNFS server, create the data file on a DS. */ 1627 if (nd->nd_repstat == 0) { 1628 /* 1629 * Create a data file on a DS for a pNFS server. 1630 * This function just returns if not 1631 * running a pNFS DS or the creation fails. 1632 */ 1633 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1634 cred, p); 1635 } 1636 vput(ndp->ni_dvp); 1637 nfsvno_relpathbuf(ndp); 1638 if (!nd->nd_repstat) { 1639 if (*exclusive_flagp) { 1640 *exclusive_flagp = 0; 1641 NFSVNO_ATTRINIT(nvap); 1642 nvap->na_atime.tv_sec = cverf[0]; 1643 nvap->na_atime.tv_nsec = cverf[1]; 1644 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1645 &nvap->na_vattr, cred); 1646 if (nd->nd_repstat != 0) { 1647 vput(ndp->ni_vp); 1648 ndp->ni_vp = NULL; 1649 nd->nd_repstat = NFSERR_NOTSUPP; 1650 } else 1651 NFSSETBIT_ATTRBIT(attrbitp, 1652 NFSATTRBIT_TIMEACCESS); 1653 } else { 1654 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1655 aclp, p, attrbitp, exp); 1656 } 1657 } 1658 vp = ndp->ni_vp; 1659 } else { 1660 if (ndp->ni_startdir) 1661 vrele(ndp->ni_startdir); 1662 nfsvno_relpathbuf(ndp); 1663 vp = ndp->ni_vp; 1664 if (create == NFSV4OPEN_CREATE) { 1665 if (ndp->ni_dvp == vp) 1666 vrele(ndp->ni_dvp); 1667 else 1668 vput(ndp->ni_dvp); 1669 } 1670 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1671 if (ndp->ni_cnd.cn_flags & RDONLY) 1672 NFSVNO_SETEXRDONLY(&nes); 1673 else 1674 NFSVNO_EXINIT(&nes); 1675 nd->nd_repstat = nfsvno_accchk(vp, 1676 VWRITE, cred, &nes, p, 1677 NFSACCCHK_NOOVERRIDE, 1678 NFSACCCHK_VPISLOCKED, NULL); 1679 nd->nd_repstat = nfsrv_opencheck(clientid, 1680 stateidp, stp, vp, nd, p, nd->nd_repstat); 1681 if (!nd->nd_repstat) { 1682 tempsize = nvap->na_size; 1683 NFSVNO_ATTRINIT(nvap); 1684 nvap->na_size = tempsize; 1685 nd->nd_repstat = VOP_SETATTR(vp, 1686 &nvap->na_vattr, cred); 1687 } 1688 } else if (vp->v_type == VREG) { 1689 nd->nd_repstat = nfsrv_opencheck(clientid, 1690 stateidp, stp, vp, nd, p, nd->nd_repstat); 1691 } 1692 } 1693 } else { 1694 if (ndp->ni_cnd.cn_flags & HASBUF) 1695 nfsvno_relpathbuf(ndp); 1696 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1697 vrele(ndp->ni_startdir); 1698 if (ndp->ni_dvp == ndp->ni_vp) 1699 vrele(ndp->ni_dvp); 1700 else 1701 vput(ndp->ni_dvp); 1702 if (ndp->ni_vp) 1703 vput(ndp->ni_vp); 1704 } 1705 } 1706 *vpp = vp; 1707 1708 NFSEXITCODE2(0, nd); 1709 } 1710 1711 /* 1712 * Updates the file rev and sets the mtime and ctime 1713 * to the current clock time, returning the va_filerev and va_Xtime 1714 * values. 1715 * Return ESTALE to indicate the vnode is VI_DOOMED. 1716 */ 1717 int 1718 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1719 struct nfsrv_descript *nd, struct thread *p) 1720 { 1721 struct vattr va; 1722 1723 VATTR_NULL(&va); 1724 vfs_timestamp(&va.va_mtime); 1725 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1726 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1727 if ((vp->v_iflag & VI_DOOMED) != 0) 1728 return (ESTALE); 1729 } 1730 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1731 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1732 return (0); 1733 } 1734 1735 /* 1736 * Glue routine to nfsv4_fillattr(). 1737 */ 1738 int 1739 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1740 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1741 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1742 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1743 { 1744 struct statfs *sf; 1745 int error; 1746 1747 sf = NULL; 1748 if (nfsrv_devidcnt > 0 && 1749 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1750 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 1751 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 1752 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 1753 error = nfsrv_pnfsstatfs(sf); 1754 if (error != 0) { 1755 free(sf, M_TEMP); 1756 sf = NULL; 1757 } 1758 } 1759 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 1760 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 1761 mounted_on_fileno, sf); 1762 free(sf, M_TEMP); 1763 NFSEXITCODE2(0, nd); 1764 return (error); 1765 } 1766 1767 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 1768 /* 1769 * nfs readdir service 1770 * - mallocs what it thinks is enough to read 1771 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 1772 * - calls VOP_READDIR() 1773 * - loops around building the reply 1774 * if the output generated exceeds count break out of loop 1775 * The NFSM_CLGET macro is used here so that the reply will be packed 1776 * tightly in mbuf clusters. 1777 * - it trims out records with d_fileno == 0 1778 * this doesn't matter for Unix clients, but they might confuse clients 1779 * for other os'. 1780 * - it trims out records with d_type == DT_WHT 1781 * these cannot be seen through NFS (unless we extend the protocol) 1782 * The alternate call nfsrvd_readdirplus() does lookups as well. 1783 * PS: The NFS protocol spec. does not clarify what the "count" byte 1784 * argument is a count of.. just name strings and file id's or the 1785 * entire reply rpc or ... 1786 * I tried just file name and id sizes and it confused the Sun client, 1787 * so I am using the full rpc size now. The "paranoia.." comment refers 1788 * to including the status longwords that are not a part of the dir. 1789 * "entry" structures, but are in the rpc. 1790 */ 1791 int 1792 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 1793 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 1794 { 1795 struct dirent *dp; 1796 u_int32_t *tl; 1797 int dirlen; 1798 char *cpos, *cend, *rbuf; 1799 struct nfsvattr at; 1800 int nlen, error = 0, getret = 1; 1801 int siz, cnt, fullsiz, eofflag, ncookies; 1802 u_int64_t off, toff, verf; 1803 u_long *cookies = NULL, *cookiep; 1804 struct uio io; 1805 struct iovec iv; 1806 int is_ufs; 1807 1808 if (nd->nd_repstat) { 1809 nfsrv_postopattr(nd, getret, &at); 1810 goto out; 1811 } 1812 if (nd->nd_flag & ND_NFSV2) { 1813 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1814 off = fxdr_unsigned(u_quad_t, *tl++); 1815 } else { 1816 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1817 off = fxdr_hyper(tl); 1818 tl += 2; 1819 verf = fxdr_hyper(tl); 1820 tl += 2; 1821 } 1822 toff = off; 1823 cnt = fxdr_unsigned(int, *tl); 1824 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1825 cnt = NFS_SRVMAXDATA(nd); 1826 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1827 fullsiz = siz; 1828 if (nd->nd_flag & ND_NFSV3) { 1829 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 1830 NULL); 1831 #if 0 1832 /* 1833 * va_filerev is not sufficient as a cookie verifier, 1834 * since it is not supposed to change when entries are 1835 * removed/added unless that offset cookies returned to 1836 * the client are no longer valid. 1837 */ 1838 if (!nd->nd_repstat && toff && verf != at.na_filerev) 1839 nd->nd_repstat = NFSERR_BAD_COOKIE; 1840 #endif 1841 } 1842 if (!nd->nd_repstat && vp->v_type != VDIR) 1843 nd->nd_repstat = NFSERR_NOTDIR; 1844 if (nd->nd_repstat == 0 && cnt == 0) { 1845 if (nd->nd_flag & ND_NFSV2) 1846 /* NFSv2 does not have NFSERR_TOOSMALL */ 1847 nd->nd_repstat = EPERM; 1848 else 1849 nd->nd_repstat = NFSERR_TOOSMALL; 1850 } 1851 if (!nd->nd_repstat) 1852 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 1853 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1854 NFSACCCHK_VPISLOCKED, NULL); 1855 if (nd->nd_repstat) { 1856 vput(vp); 1857 if (nd->nd_flag & ND_NFSV3) 1858 nfsrv_postopattr(nd, getret, &at); 1859 goto out; 1860 } 1861 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 1862 rbuf = malloc(siz, M_TEMP, M_WAITOK); 1863 again: 1864 eofflag = 0; 1865 if (cookies) { 1866 free(cookies, M_TEMP); 1867 cookies = NULL; 1868 } 1869 1870 iv.iov_base = rbuf; 1871 iv.iov_len = siz; 1872 io.uio_iov = &iv; 1873 io.uio_iovcnt = 1; 1874 io.uio_offset = (off_t)off; 1875 io.uio_resid = siz; 1876 io.uio_segflg = UIO_SYSSPACE; 1877 io.uio_rw = UIO_READ; 1878 io.uio_td = NULL; 1879 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 1880 &cookies); 1881 off = (u_int64_t)io.uio_offset; 1882 if (io.uio_resid) 1883 siz -= io.uio_resid; 1884 1885 if (!cookies && !nd->nd_repstat) 1886 nd->nd_repstat = NFSERR_PERM; 1887 if (nd->nd_flag & ND_NFSV3) { 1888 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 1889 if (!nd->nd_repstat) 1890 nd->nd_repstat = getret; 1891 } 1892 1893 /* 1894 * Handles the failed cases. nd->nd_repstat == 0 past here. 1895 */ 1896 if (nd->nd_repstat) { 1897 vput(vp); 1898 free(rbuf, M_TEMP); 1899 if (cookies) 1900 free(cookies, M_TEMP); 1901 if (nd->nd_flag & ND_NFSV3) 1902 nfsrv_postopattr(nd, getret, &at); 1903 goto out; 1904 } 1905 /* 1906 * If nothing read, return eof 1907 * rpc reply 1908 */ 1909 if (siz == 0) { 1910 vput(vp); 1911 if (nd->nd_flag & ND_NFSV2) { 1912 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1913 } else { 1914 nfsrv_postopattr(nd, getret, &at); 1915 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 1916 txdr_hyper(at.na_filerev, tl); 1917 tl += 2; 1918 } 1919 *tl++ = newnfs_false; 1920 *tl = newnfs_true; 1921 free(rbuf, M_TEMP); 1922 free(cookies, M_TEMP); 1923 goto out; 1924 } 1925 1926 /* 1927 * Check for degenerate cases of nothing useful read. 1928 * If so go try again 1929 */ 1930 cpos = rbuf; 1931 cend = rbuf + siz; 1932 dp = (struct dirent *)cpos; 1933 cookiep = cookies; 1934 1935 /* 1936 * For some reason FreeBSD's ufs_readdir() chooses to back the 1937 * directory offset up to a block boundary, so it is necessary to 1938 * skip over the records that precede the requested offset. This 1939 * requires the assumption that file offset cookies monotonically 1940 * increase. 1941 */ 1942 while (cpos < cend && ncookies > 0 && 1943 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 1944 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 1945 cpos += dp->d_reclen; 1946 dp = (struct dirent *)cpos; 1947 cookiep++; 1948 ncookies--; 1949 } 1950 if (cpos >= cend || ncookies == 0) { 1951 siz = fullsiz; 1952 toff = off; 1953 goto again; 1954 } 1955 vput(vp); 1956 1957 /* 1958 * dirlen is the size of the reply, including all XDR and must 1959 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 1960 * if the XDR should be included in "count", but to be safe, we do. 1961 * (Include the two booleans at the end of the reply in dirlen now.) 1962 */ 1963 if (nd->nd_flag & ND_NFSV3) { 1964 nfsrv_postopattr(nd, getret, &at); 1965 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1966 txdr_hyper(at.na_filerev, tl); 1967 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 1968 } else { 1969 dirlen = 2 * NFSX_UNSIGNED; 1970 } 1971 1972 /* Loop through the records and build reply */ 1973 while (cpos < cend && ncookies > 0) { 1974 nlen = dp->d_namlen; 1975 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 1976 nlen <= NFS_MAXNAMLEN) { 1977 if (nd->nd_flag & ND_NFSV3) 1978 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1979 else 1980 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1981 if (dirlen > cnt) { 1982 eofflag = 0; 1983 break; 1984 } 1985 1986 /* 1987 * Build the directory record xdr from 1988 * the dirent entry. 1989 */ 1990 if (nd->nd_flag & ND_NFSV3) { 1991 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1992 *tl++ = newnfs_true; 1993 *tl++ = 0; 1994 } else { 1995 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1996 *tl++ = newnfs_true; 1997 } 1998 *tl = txdr_unsigned(dp->d_fileno); 1999 (void) nfsm_strtom(nd, dp->d_name, nlen); 2000 if (nd->nd_flag & ND_NFSV3) { 2001 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2002 *tl++ = 0; 2003 } else 2004 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2005 *tl = txdr_unsigned(*cookiep); 2006 } 2007 cpos += dp->d_reclen; 2008 dp = (struct dirent *)cpos; 2009 cookiep++; 2010 ncookies--; 2011 } 2012 if (cpos < cend) 2013 eofflag = 0; 2014 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2015 *tl++ = newnfs_false; 2016 if (eofflag) 2017 *tl = newnfs_true; 2018 else 2019 *tl = newnfs_false; 2020 free(rbuf, M_TEMP); 2021 free(cookies, M_TEMP); 2022 2023 out: 2024 NFSEXITCODE2(0, nd); 2025 return (0); 2026 nfsmout: 2027 vput(vp); 2028 NFSEXITCODE2(error, nd); 2029 return (error); 2030 } 2031 2032 /* 2033 * Readdirplus for V3 and Readdir for V4. 2034 */ 2035 int 2036 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2037 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 2038 { 2039 struct dirent *dp; 2040 u_int32_t *tl; 2041 int dirlen; 2042 char *cpos, *cend, *rbuf; 2043 struct vnode *nvp; 2044 fhandle_t nfh; 2045 struct nfsvattr nva, at, *nvap = &nva; 2046 struct mbuf *mb0, *mb1; 2047 struct nfsreferral *refp; 2048 int nlen, r, error = 0, getret = 1, usevget = 1; 2049 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2050 caddr_t bpos0, bpos1; 2051 u_int64_t off, toff, verf; 2052 u_long *cookies = NULL, *cookiep; 2053 nfsattrbit_t attrbits, rderrbits, savbits; 2054 struct uio io; 2055 struct iovec iv; 2056 struct componentname cn; 2057 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2058 struct mount *mp, *new_mp; 2059 uint64_t mounted_on_fileno; 2060 2061 if (nd->nd_repstat) { 2062 nfsrv_postopattr(nd, getret, &at); 2063 goto out; 2064 } 2065 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2066 off = fxdr_hyper(tl); 2067 toff = off; 2068 tl += 2; 2069 verf = fxdr_hyper(tl); 2070 tl += 2; 2071 siz = fxdr_unsigned(int, *tl++); 2072 cnt = fxdr_unsigned(int, *tl); 2073 2074 /* 2075 * Use the server's maximum data transfer size as the upper bound 2076 * on reply datalen. 2077 */ 2078 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2079 cnt = NFS_SRVMAXDATA(nd); 2080 2081 /* 2082 * siz is a "hint" of how much directory information (name, fileid, 2083 * cookie) should be in the reply. At least one client "hints" 0, 2084 * so I set it to cnt for that case. I also round it up to the 2085 * next multiple of DIRBLKSIZ. 2086 */ 2087 if (siz <= 0) 2088 siz = cnt; 2089 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2090 2091 if (nd->nd_flag & ND_NFSV4) { 2092 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2093 if (error) 2094 goto nfsmout; 2095 NFSSET_ATTRBIT(&savbits, &attrbits); 2096 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); 2097 NFSZERO_ATTRBIT(&rderrbits); 2098 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2099 } else { 2100 NFSZERO_ATTRBIT(&attrbits); 2101 } 2102 fullsiz = siz; 2103 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2104 if (!nd->nd_repstat) { 2105 if (off && verf != at.na_filerev) { 2106 /* 2107 * va_filerev is not sufficient as a cookie verifier, 2108 * since it is not supposed to change when entries are 2109 * removed/added unless that offset cookies returned to 2110 * the client are no longer valid. 2111 */ 2112 #if 0 2113 if (nd->nd_flag & ND_NFSV4) { 2114 nd->nd_repstat = NFSERR_NOTSAME; 2115 } else { 2116 nd->nd_repstat = NFSERR_BAD_COOKIE; 2117 } 2118 #endif 2119 } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { 2120 nd->nd_repstat = NFSERR_BAD_COOKIE; 2121 } 2122 } 2123 if (!nd->nd_repstat && vp->v_type != VDIR) 2124 nd->nd_repstat = NFSERR_NOTDIR; 2125 if (!nd->nd_repstat && cnt == 0) 2126 nd->nd_repstat = NFSERR_TOOSMALL; 2127 if (!nd->nd_repstat) 2128 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2129 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2130 NFSACCCHK_VPISLOCKED, NULL); 2131 if (nd->nd_repstat) { 2132 vput(vp); 2133 if (nd->nd_flag & ND_NFSV3) 2134 nfsrv_postopattr(nd, getret, &at); 2135 goto out; 2136 } 2137 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2138 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2139 2140 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2141 again: 2142 eofflag = 0; 2143 if (cookies) { 2144 free(cookies, M_TEMP); 2145 cookies = NULL; 2146 } 2147 2148 iv.iov_base = rbuf; 2149 iv.iov_len = siz; 2150 io.uio_iov = &iv; 2151 io.uio_iovcnt = 1; 2152 io.uio_offset = (off_t)off; 2153 io.uio_resid = siz; 2154 io.uio_segflg = UIO_SYSSPACE; 2155 io.uio_rw = UIO_READ; 2156 io.uio_td = NULL; 2157 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2158 &cookies); 2159 off = (u_int64_t)io.uio_offset; 2160 if (io.uio_resid) 2161 siz -= io.uio_resid; 2162 2163 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2164 2165 if (!cookies && !nd->nd_repstat) 2166 nd->nd_repstat = NFSERR_PERM; 2167 if (!nd->nd_repstat) 2168 nd->nd_repstat = getret; 2169 if (nd->nd_repstat) { 2170 vput(vp); 2171 if (cookies) 2172 free(cookies, M_TEMP); 2173 free(rbuf, M_TEMP); 2174 if (nd->nd_flag & ND_NFSV3) 2175 nfsrv_postopattr(nd, getret, &at); 2176 goto out; 2177 } 2178 /* 2179 * If nothing read, return eof 2180 * rpc reply 2181 */ 2182 if (siz == 0) { 2183 vput(vp); 2184 if (nd->nd_flag & ND_NFSV3) 2185 nfsrv_postopattr(nd, getret, &at); 2186 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2187 txdr_hyper(at.na_filerev, tl); 2188 tl += 2; 2189 *tl++ = newnfs_false; 2190 *tl = newnfs_true; 2191 free(cookies, M_TEMP); 2192 free(rbuf, M_TEMP); 2193 goto out; 2194 } 2195 2196 /* 2197 * Check for degenerate cases of nothing useful read. 2198 * If so go try again 2199 */ 2200 cpos = rbuf; 2201 cend = rbuf + siz; 2202 dp = (struct dirent *)cpos; 2203 cookiep = cookies; 2204 2205 /* 2206 * For some reason FreeBSD's ufs_readdir() chooses to back the 2207 * directory offset up to a block boundary, so it is necessary to 2208 * skip over the records that precede the requested offset. This 2209 * requires the assumption that file offset cookies monotonically 2210 * increase. 2211 */ 2212 while (cpos < cend && ncookies > 0 && 2213 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2214 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2215 ((nd->nd_flag & ND_NFSV4) && 2216 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2217 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2218 cpos += dp->d_reclen; 2219 dp = (struct dirent *)cpos; 2220 cookiep++; 2221 ncookies--; 2222 } 2223 if (cpos >= cend || ncookies == 0) { 2224 siz = fullsiz; 2225 toff = off; 2226 goto again; 2227 } 2228 2229 /* 2230 * Busy the file system so that the mount point won't go away 2231 * and, as such, VFS_VGET() can be used safely. 2232 */ 2233 mp = vp->v_mount; 2234 vfs_ref(mp); 2235 NFSVOPUNLOCK(vp, 0); 2236 nd->nd_repstat = vfs_busy(mp, 0); 2237 vfs_rel(mp); 2238 if (nd->nd_repstat != 0) { 2239 vrele(vp); 2240 free(cookies, M_TEMP); 2241 free(rbuf, M_TEMP); 2242 if (nd->nd_flag & ND_NFSV3) 2243 nfsrv_postopattr(nd, getret, &at); 2244 goto out; 2245 } 2246 2247 /* 2248 * Check to see if entries in this directory can be safely acquired 2249 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2250 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2251 * automount of the snapshot directory that is required will 2252 * be done. 2253 * This needs to be done here for NFSv4, since NFSv4 never does 2254 * a VFS_VGET() for "." or "..". 2255 */ 2256 if (is_zfs == 1) { 2257 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2258 if (r == EOPNOTSUPP) { 2259 usevget = 0; 2260 cn.cn_nameiop = LOOKUP; 2261 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2262 cn.cn_cred = nd->nd_cred; 2263 cn.cn_thread = p; 2264 } else if (r == 0) 2265 vput(nvp); 2266 } 2267 2268 /* 2269 * Save this position, in case there is an error before one entry 2270 * is created. 2271 */ 2272 mb0 = nd->nd_mb; 2273 bpos0 = nd->nd_bpos; 2274 2275 /* 2276 * Fill in the first part of the reply. 2277 * dirlen is the reply length in bytes and cannot exceed cnt. 2278 * (Include the two booleans at the end of the reply in dirlen now, 2279 * so we recognize when we have exceeded cnt.) 2280 */ 2281 if (nd->nd_flag & ND_NFSV3) { 2282 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2283 nfsrv_postopattr(nd, getret, &at); 2284 } else { 2285 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2286 } 2287 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2288 txdr_hyper(at.na_filerev, tl); 2289 2290 /* 2291 * Save this position, in case there is an empty reply needed. 2292 */ 2293 mb1 = nd->nd_mb; 2294 bpos1 = nd->nd_bpos; 2295 2296 /* Loop through the records and build reply */ 2297 entrycnt = 0; 2298 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2299 nlen = dp->d_namlen; 2300 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2301 nlen <= NFS_MAXNAMLEN && 2302 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2303 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2304 || (nlen == 1 && dp->d_name[0] != '.'))) { 2305 /* 2306 * Save the current position in the reply, in case 2307 * this entry exceeds cnt. 2308 */ 2309 mb1 = nd->nd_mb; 2310 bpos1 = nd->nd_bpos; 2311 2312 /* 2313 * For readdir_and_lookup get the vnode using 2314 * the file number. 2315 */ 2316 nvp = NULL; 2317 refp = NULL; 2318 r = 0; 2319 at_root = 0; 2320 needs_unbusy = 0; 2321 new_mp = mp; 2322 mounted_on_fileno = (uint64_t)dp->d_fileno; 2323 if ((nd->nd_flag & ND_NFSV3) || 2324 NFSNONZERO_ATTRBIT(&savbits)) { 2325 if (nd->nd_flag & ND_NFSV4) 2326 refp = nfsv4root_getreferral(NULL, 2327 vp, dp->d_fileno); 2328 if (refp == NULL) { 2329 if (usevget) 2330 r = VFS_VGET(mp, dp->d_fileno, 2331 LK_SHARED, &nvp); 2332 else 2333 r = EOPNOTSUPP; 2334 if (r == EOPNOTSUPP) { 2335 if (usevget) { 2336 usevget = 0; 2337 cn.cn_nameiop = LOOKUP; 2338 cn.cn_lkflags = 2339 LK_SHARED | 2340 LK_RETRY; 2341 cn.cn_cred = 2342 nd->nd_cred; 2343 cn.cn_thread = p; 2344 } 2345 cn.cn_nameptr = dp->d_name; 2346 cn.cn_namelen = nlen; 2347 cn.cn_flags = ISLASTCN | 2348 NOFOLLOW | LOCKLEAF; 2349 if (nlen == 2 && 2350 dp->d_name[0] == '.' && 2351 dp->d_name[1] == '.') 2352 cn.cn_flags |= 2353 ISDOTDOT; 2354 if (NFSVOPLOCK(vp, LK_SHARED) 2355 != 0) { 2356 nd->nd_repstat = EPERM; 2357 break; 2358 } 2359 if ((vp->v_vflag & VV_ROOT) != 0 2360 && (cn.cn_flags & ISDOTDOT) 2361 != 0) { 2362 vref(vp); 2363 nvp = vp; 2364 r = 0; 2365 } else { 2366 r = VOP_LOOKUP(vp, &nvp, 2367 &cn); 2368 if (vp != nvp) 2369 NFSVOPUNLOCK(vp, 2370 0); 2371 } 2372 } 2373 2374 /* 2375 * For NFSv4, check to see if nvp is 2376 * a mount point and get the mount 2377 * point vnode, as required. 2378 */ 2379 if (r == 0 && 2380 nfsrv_enable_crossmntpt != 0 && 2381 (nd->nd_flag & ND_NFSV4) != 0 && 2382 nvp->v_type == VDIR && 2383 nvp->v_mountedhere != NULL) { 2384 new_mp = nvp->v_mountedhere; 2385 r = vfs_busy(new_mp, 0); 2386 vput(nvp); 2387 nvp = NULL; 2388 if (r == 0) { 2389 r = VFS_ROOT(new_mp, 2390 LK_SHARED, &nvp); 2391 needs_unbusy = 1; 2392 if (r == 0) 2393 at_root = 1; 2394 } 2395 } 2396 } 2397 if (!r) { 2398 if (refp == NULL && 2399 ((nd->nd_flag & ND_NFSV3) || 2400 NFSNONZERO_ATTRBIT(&attrbits))) { 2401 r = nfsvno_getfh(nvp, &nfh, p); 2402 if (!r) 2403 r = nfsvno_getattr(nvp, nvap, nd, p, 2404 1, &attrbits); 2405 if (r == 0 && is_zfs == 1 && 2406 nfsrv_enable_crossmntpt != 0 && 2407 (nd->nd_flag & ND_NFSV4) != 0 && 2408 nvp->v_type == VDIR && 2409 vp->v_mount != nvp->v_mount) { 2410 /* 2411 * For a ZFS snapshot, there is a 2412 * pseudo mount that does not set 2413 * v_mountedhere, so it needs to 2414 * be detected via a different 2415 * mount structure. 2416 */ 2417 at_root = 1; 2418 if (new_mp == mp) 2419 new_mp = nvp->v_mount; 2420 } 2421 } 2422 } else { 2423 nvp = NULL; 2424 } 2425 if (r) { 2426 if (!NFSISSET_ATTRBIT(&attrbits, 2427 NFSATTRBIT_RDATTRERROR)) { 2428 if (nvp != NULL) 2429 vput(nvp); 2430 if (needs_unbusy != 0) 2431 vfs_unbusy(new_mp); 2432 nd->nd_repstat = r; 2433 break; 2434 } 2435 } 2436 } 2437 2438 /* 2439 * Build the directory record xdr 2440 */ 2441 if (nd->nd_flag & ND_NFSV3) { 2442 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2443 *tl++ = newnfs_true; 2444 *tl++ = 0; 2445 *tl = txdr_unsigned(dp->d_fileno); 2446 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2447 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2448 *tl++ = 0; 2449 *tl = txdr_unsigned(*cookiep); 2450 nfsrv_postopattr(nd, 0, nvap); 2451 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2452 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2453 if (nvp != NULL) 2454 vput(nvp); 2455 } else { 2456 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2457 *tl++ = newnfs_true; 2458 *tl++ = 0; 2459 *tl = txdr_unsigned(*cookiep); 2460 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2461 if (nvp != NULL) { 2462 supports_nfsv4acls = 2463 nfs_supportsnfsv4acls(nvp); 2464 NFSVOPUNLOCK(nvp, 0); 2465 } else 2466 supports_nfsv4acls = 0; 2467 if (refp != NULL) { 2468 dirlen += nfsrv_putreferralattr(nd, 2469 &savbits, refp, 0, 2470 &nd->nd_repstat); 2471 if (nd->nd_repstat) { 2472 if (nvp != NULL) 2473 vrele(nvp); 2474 if (needs_unbusy != 0) 2475 vfs_unbusy(new_mp); 2476 break; 2477 } 2478 } else if (r) { 2479 dirlen += nfsvno_fillattr(nd, new_mp, 2480 nvp, nvap, &nfh, r, &rderrbits, 2481 nd->nd_cred, p, isdgram, 0, 2482 supports_nfsv4acls, at_root, 2483 mounted_on_fileno); 2484 } else { 2485 dirlen += nfsvno_fillattr(nd, new_mp, 2486 nvp, nvap, &nfh, r, &attrbits, 2487 nd->nd_cred, p, isdgram, 0, 2488 supports_nfsv4acls, at_root, 2489 mounted_on_fileno); 2490 } 2491 if (nvp != NULL) 2492 vrele(nvp); 2493 dirlen += (3 * NFSX_UNSIGNED); 2494 } 2495 if (needs_unbusy != 0) 2496 vfs_unbusy(new_mp); 2497 if (dirlen <= cnt) 2498 entrycnt++; 2499 } 2500 cpos += dp->d_reclen; 2501 dp = (struct dirent *)cpos; 2502 cookiep++; 2503 ncookies--; 2504 } 2505 vrele(vp); 2506 vfs_unbusy(mp); 2507 2508 /* 2509 * If dirlen > cnt, we must strip off the last entry. If that 2510 * results in an empty reply, report NFSERR_TOOSMALL. 2511 */ 2512 if (dirlen > cnt || nd->nd_repstat) { 2513 if (!nd->nd_repstat && entrycnt == 0) 2514 nd->nd_repstat = NFSERR_TOOSMALL; 2515 if (nd->nd_repstat) { 2516 newnfs_trimtrailing(nd, mb0, bpos0); 2517 if (nd->nd_flag & ND_NFSV3) 2518 nfsrv_postopattr(nd, getret, &at); 2519 } else 2520 newnfs_trimtrailing(nd, mb1, bpos1); 2521 eofflag = 0; 2522 } else if (cpos < cend) 2523 eofflag = 0; 2524 if (!nd->nd_repstat) { 2525 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2526 *tl++ = newnfs_false; 2527 if (eofflag) 2528 *tl = newnfs_true; 2529 else 2530 *tl = newnfs_false; 2531 } 2532 free(cookies, M_TEMP); 2533 free(rbuf, M_TEMP); 2534 2535 out: 2536 NFSEXITCODE2(0, nd); 2537 return (0); 2538 nfsmout: 2539 vput(vp); 2540 NFSEXITCODE2(error, nd); 2541 return (error); 2542 } 2543 2544 /* 2545 * Get the settable attributes out of the mbuf list. 2546 * (Return 0 or EBADRPC) 2547 */ 2548 int 2549 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2550 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2551 { 2552 u_int32_t *tl; 2553 struct nfsv2_sattr *sp; 2554 int error = 0, toclient = 0; 2555 2556 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2557 case ND_NFSV2: 2558 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2559 /* 2560 * Some old clients didn't fill in the high order 16bits. 2561 * --> check the low order 2 bytes for 0xffff 2562 */ 2563 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2564 nvap->na_mode = nfstov_mode(sp->sa_mode); 2565 if (sp->sa_uid != newnfs_xdrneg1) 2566 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2567 if (sp->sa_gid != newnfs_xdrneg1) 2568 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2569 if (sp->sa_size != newnfs_xdrneg1) 2570 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2571 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2572 #ifdef notyet 2573 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2574 #else 2575 nvap->na_atime.tv_sec = 2576 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2577 nvap->na_atime.tv_nsec = 0; 2578 #endif 2579 } 2580 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2581 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2582 break; 2583 case ND_NFSV3: 2584 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2585 if (*tl == newnfs_true) { 2586 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2587 nvap->na_mode = nfstov_mode(*tl); 2588 } 2589 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2590 if (*tl == newnfs_true) { 2591 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2592 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2593 } 2594 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2595 if (*tl == newnfs_true) { 2596 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2597 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2598 } 2599 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2600 if (*tl == newnfs_true) { 2601 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2602 nvap->na_size = fxdr_hyper(tl); 2603 } 2604 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2605 switch (fxdr_unsigned(int, *tl)) { 2606 case NFSV3SATTRTIME_TOCLIENT: 2607 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2608 fxdr_nfsv3time(tl, &nvap->na_atime); 2609 toclient = 1; 2610 break; 2611 case NFSV3SATTRTIME_TOSERVER: 2612 vfs_timestamp(&nvap->na_atime); 2613 nvap->na_vaflags |= VA_UTIMES_NULL; 2614 break; 2615 } 2616 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2617 switch (fxdr_unsigned(int, *tl)) { 2618 case NFSV3SATTRTIME_TOCLIENT: 2619 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2620 fxdr_nfsv3time(tl, &nvap->na_mtime); 2621 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2622 break; 2623 case NFSV3SATTRTIME_TOSERVER: 2624 vfs_timestamp(&nvap->na_mtime); 2625 if (!toclient) 2626 nvap->na_vaflags |= VA_UTIMES_NULL; 2627 break; 2628 } 2629 break; 2630 case ND_NFSV4: 2631 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2632 } 2633 nfsmout: 2634 NFSEXITCODE2(error, nd); 2635 return (error); 2636 } 2637 2638 /* 2639 * Handle the setable attributes for V4. 2640 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2641 */ 2642 int 2643 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2644 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2645 { 2646 u_int32_t *tl; 2647 int attrsum = 0; 2648 int i, j; 2649 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2650 int toclient = 0; 2651 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2652 uid_t uid; 2653 gid_t gid; 2654 2655 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2656 if (error) 2657 goto nfsmout; 2658 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2659 attrsize = fxdr_unsigned(int, *tl); 2660 2661 /* 2662 * Loop around getting the setable attributes. If an unsupported 2663 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2664 */ 2665 if (retnotsup) { 2666 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2667 bitpos = NFSATTRBIT_MAX; 2668 } else { 2669 bitpos = 0; 2670 } 2671 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2672 if (attrsum > attrsize) { 2673 error = NFSERR_BADXDR; 2674 goto nfsmout; 2675 } 2676 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2677 switch (bitpos) { 2678 case NFSATTRBIT_SIZE: 2679 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2680 if (vp != NULL && vp->v_type != VREG) { 2681 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 2682 NFSERR_INVAL; 2683 goto nfsmout; 2684 } 2685 nvap->na_size = fxdr_hyper(tl); 2686 attrsum += NFSX_HYPER; 2687 break; 2688 case NFSATTRBIT_ACL: 2689 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, 2690 p); 2691 if (error) 2692 goto nfsmout; 2693 if (aceerr && !nd->nd_repstat) 2694 nd->nd_repstat = aceerr; 2695 attrsum += aclsize; 2696 break; 2697 case NFSATTRBIT_ARCHIVE: 2698 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2699 if (!nd->nd_repstat) 2700 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2701 attrsum += NFSX_UNSIGNED; 2702 break; 2703 case NFSATTRBIT_HIDDEN: 2704 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2705 if (!nd->nd_repstat) 2706 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2707 attrsum += NFSX_UNSIGNED; 2708 break; 2709 case NFSATTRBIT_MIMETYPE: 2710 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2711 i = fxdr_unsigned(int, *tl); 2712 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 2713 if (error) 2714 goto nfsmout; 2715 if (!nd->nd_repstat) 2716 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2717 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 2718 break; 2719 case NFSATTRBIT_MODE: 2720 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2721 nvap->na_mode = nfstov_mode(*tl); 2722 attrsum += NFSX_UNSIGNED; 2723 break; 2724 case NFSATTRBIT_OWNER: 2725 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2726 j = fxdr_unsigned(int, *tl); 2727 if (j < 0) { 2728 error = NFSERR_BADXDR; 2729 goto nfsmout; 2730 } 2731 if (j > NFSV4_SMALLSTR) 2732 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2733 else 2734 cp = namestr; 2735 error = nfsrv_mtostr(nd, cp, j); 2736 if (error) { 2737 if (j > NFSV4_SMALLSTR) 2738 free(cp, M_NFSSTRING); 2739 goto nfsmout; 2740 } 2741 if (!nd->nd_repstat) { 2742 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, 2743 p); 2744 if (!nd->nd_repstat) 2745 nvap->na_uid = uid; 2746 } 2747 if (j > NFSV4_SMALLSTR) 2748 free(cp, M_NFSSTRING); 2749 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2750 break; 2751 case NFSATTRBIT_OWNERGROUP: 2752 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2753 j = fxdr_unsigned(int, *tl); 2754 if (j < 0) { 2755 error = NFSERR_BADXDR; 2756 goto nfsmout; 2757 } 2758 if (j > NFSV4_SMALLSTR) 2759 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2760 else 2761 cp = namestr; 2762 error = nfsrv_mtostr(nd, cp, j); 2763 if (error) { 2764 if (j > NFSV4_SMALLSTR) 2765 free(cp, M_NFSSTRING); 2766 goto nfsmout; 2767 } 2768 if (!nd->nd_repstat) { 2769 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, 2770 p); 2771 if (!nd->nd_repstat) 2772 nvap->na_gid = gid; 2773 } 2774 if (j > NFSV4_SMALLSTR) 2775 free(cp, M_NFSSTRING); 2776 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2777 break; 2778 case NFSATTRBIT_SYSTEM: 2779 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2780 if (!nd->nd_repstat) 2781 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2782 attrsum += NFSX_UNSIGNED; 2783 break; 2784 case NFSATTRBIT_TIMEACCESSSET: 2785 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2786 attrsum += NFSX_UNSIGNED; 2787 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2788 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2789 fxdr_nfsv4time(tl, &nvap->na_atime); 2790 toclient = 1; 2791 attrsum += NFSX_V4TIME; 2792 } else { 2793 vfs_timestamp(&nvap->na_atime); 2794 nvap->na_vaflags |= VA_UTIMES_NULL; 2795 } 2796 break; 2797 case NFSATTRBIT_TIMEBACKUP: 2798 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2799 if (!nd->nd_repstat) 2800 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2801 attrsum += NFSX_V4TIME; 2802 break; 2803 case NFSATTRBIT_TIMECREATE: 2804 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2805 if (!nd->nd_repstat) 2806 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2807 attrsum += NFSX_V4TIME; 2808 break; 2809 case NFSATTRBIT_TIMEMODIFYSET: 2810 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2811 attrsum += NFSX_UNSIGNED; 2812 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2813 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2814 fxdr_nfsv4time(tl, &nvap->na_mtime); 2815 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2816 attrsum += NFSX_V4TIME; 2817 } else { 2818 vfs_timestamp(&nvap->na_mtime); 2819 if (!toclient) 2820 nvap->na_vaflags |= VA_UTIMES_NULL; 2821 } 2822 break; 2823 default: 2824 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2825 /* 2826 * set bitpos so we drop out of the loop. 2827 */ 2828 bitpos = NFSATTRBIT_MAX; 2829 break; 2830 } 2831 } 2832 2833 /* 2834 * some clients pad the attrlist, so we need to skip over the 2835 * padding. 2836 */ 2837 if (attrsum > attrsize) { 2838 error = NFSERR_BADXDR; 2839 } else { 2840 attrsize = NFSM_RNDUP(attrsize); 2841 if (attrsum < attrsize) 2842 error = nfsm_advance(nd, attrsize - attrsum, -1); 2843 } 2844 nfsmout: 2845 NFSEXITCODE2(error, nd); 2846 return (error); 2847 } 2848 2849 /* 2850 * Check/setup export credentials. 2851 */ 2852 int 2853 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 2854 struct ucred *credanon) 2855 { 2856 int error = 0; 2857 2858 /* 2859 * Check/setup credentials. 2860 */ 2861 if (nd->nd_flag & ND_GSS) 2862 exp->nes_exflag &= ~MNT_EXPORTANON; 2863 2864 /* 2865 * Check to see if the operation is allowed for this security flavor. 2866 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to 2867 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 2868 * Also, allow Secinfo, so that it can acquire the correct flavor(s). 2869 */ 2870 if (nfsvno_testexp(nd, exp) && 2871 nd->nd_procnum != NFSV4OP_SECINFO && 2872 nd->nd_procnum != NFSPROC_FSINFO) { 2873 if (nd->nd_flag & ND_NFSV4) 2874 error = NFSERR_WRONGSEC; 2875 else 2876 error = (NFSERR_AUTHERR | AUTH_TOOWEAK); 2877 goto out; 2878 } 2879 2880 /* 2881 * Check to see if the file system is exported V4 only. 2882 */ 2883 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 2884 error = NFSERR_PROGNOTV4; 2885 goto out; 2886 } 2887 2888 /* 2889 * Now, map the user credentials. 2890 * (Note that ND_AUTHNONE will only be set for an NFSv3 2891 * Fsinfo RPC. If set for anything else, this code might need 2892 * to change.) 2893 */ 2894 if (NFSVNO_EXPORTED(exp)) { 2895 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 2896 NFSVNO_EXPORTANON(exp) || 2897 (nd->nd_flag & ND_AUTHNONE) != 0) { 2898 nd->nd_cred->cr_uid = credanon->cr_uid; 2899 nd->nd_cred->cr_gid = credanon->cr_gid; 2900 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 2901 credanon->cr_groups); 2902 } else if ((nd->nd_flag & ND_GSS) == 0) { 2903 /* 2904 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 2905 * if there is a replacement credential with a group 2906 * list set up by "nfsuserd -manage-gids". 2907 * If there is no replacement, nfsrv_getgrpscred() 2908 * simply returns its argument. 2909 */ 2910 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 2911 } 2912 } 2913 2914 out: 2915 NFSEXITCODE2(error, nd); 2916 return (error); 2917 } 2918 2919 /* 2920 * Check exports. 2921 */ 2922 int 2923 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 2924 struct ucred **credp) 2925 { 2926 int i, error, *secflavors; 2927 2928 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2929 &exp->nes_numsecflavor, &secflavors); 2930 if (error) { 2931 if (nfs_rootfhset) { 2932 exp->nes_exflag = 0; 2933 exp->nes_numsecflavor = 0; 2934 error = 0; 2935 } 2936 } else { 2937 /* Copy the security flavors. */ 2938 for (i = 0; i < exp->nes_numsecflavor; i++) 2939 exp->nes_secflavors[i] = secflavors[i]; 2940 } 2941 NFSEXITCODE(error); 2942 return (error); 2943 } 2944 2945 /* 2946 * Get a vnode for a file handle and export stuff. 2947 */ 2948 int 2949 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 2950 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 2951 struct ucred **credp) 2952 { 2953 int i, error, *secflavors; 2954 2955 *credp = NULL; 2956 exp->nes_numsecflavor = 0; 2957 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 2958 if (error != 0) 2959 /* Make sure the server replies ESTALE to the client. */ 2960 error = ESTALE; 2961 if (nam && !error) { 2962 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2963 &exp->nes_numsecflavor, &secflavors); 2964 if (error) { 2965 if (nfs_rootfhset) { 2966 exp->nes_exflag = 0; 2967 exp->nes_numsecflavor = 0; 2968 error = 0; 2969 } else { 2970 vput(*vpp); 2971 } 2972 } else { 2973 /* Copy the security flavors. */ 2974 for (i = 0; i < exp->nes_numsecflavor; i++) 2975 exp->nes_secflavors[i] = secflavors[i]; 2976 } 2977 } 2978 NFSEXITCODE(error); 2979 return (error); 2980 } 2981 2982 /* 2983 * nfsd_fhtovp() - convert a fh to a vnode ptr 2984 * - look up fsid in mount list (if not found ret error) 2985 * - get vp and export rights by calling nfsvno_fhtovp() 2986 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 2987 * for AUTH_SYS 2988 * - if mpp != NULL, return the mount point so that it can 2989 * be used for vn_finished_write() by the caller 2990 */ 2991 void 2992 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 2993 struct vnode **vpp, struct nfsexstuff *exp, 2994 struct mount **mpp, int startwrite, struct thread *p) 2995 { 2996 struct mount *mp; 2997 struct ucred *credanon; 2998 fhandle_t *fhp; 2999 3000 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3001 /* 3002 * Check for the special case of the nfsv4root_fh. 3003 */ 3004 mp = vfs_busyfs(&fhp->fh_fsid); 3005 if (mpp != NULL) 3006 *mpp = mp; 3007 if (mp == NULL) { 3008 *vpp = NULL; 3009 nd->nd_repstat = ESTALE; 3010 goto out; 3011 } 3012 3013 if (startwrite) { 3014 vn_start_write(NULL, mpp, V_WAIT); 3015 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3016 lktype = LK_EXCLUSIVE; 3017 } 3018 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3019 &credanon); 3020 vfs_unbusy(mp); 3021 3022 /* 3023 * For NFSv4 without a pseudo root fs, unexported file handles 3024 * can be returned, so that Lookup works everywhere. 3025 */ 3026 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3027 !(nd->nd_flag & ND_NFSV4)) { 3028 vput(*vpp); 3029 nd->nd_repstat = EACCES; 3030 } 3031 3032 /* 3033 * Personally, I've never seen any point in requiring a 3034 * reserved port#, since only in the rare case where the 3035 * clients are all boxes with secure system privileges, 3036 * does it provide any enhanced security, but... some people 3037 * believe it to be useful and keep putting this code back in. 3038 * (There is also some "security checker" out there that 3039 * complains if the nfs server doesn't enforce this.) 3040 * However, note the following: 3041 * RFC3530 (NFSv4) specifies that a reserved port# not be 3042 * required. 3043 * RFC2623 recommends that, if a reserved port# is checked for, 3044 * that there be a way to turn that off--> ifdef'd. 3045 */ 3046 #ifdef NFS_REQRSVPORT 3047 if (!nd->nd_repstat) { 3048 struct sockaddr_in *saddr; 3049 struct sockaddr_in6 *saddr6; 3050 3051 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3052 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3053 if (!(nd->nd_flag & ND_NFSV4) && 3054 ((saddr->sin_family == AF_INET && 3055 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3056 (saddr6->sin6_family == AF_INET6 && 3057 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3058 vput(*vpp); 3059 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3060 } 3061 } 3062 #endif /* NFS_REQRSVPORT */ 3063 3064 /* 3065 * Check/setup credentials. 3066 */ 3067 if (!nd->nd_repstat) { 3068 nd->nd_saveduid = nd->nd_cred->cr_uid; 3069 nd->nd_repstat = nfsd_excred(nd, exp, credanon); 3070 if (nd->nd_repstat) 3071 vput(*vpp); 3072 } 3073 if (credanon != NULL) 3074 crfree(credanon); 3075 if (nd->nd_repstat) { 3076 if (startwrite) 3077 vn_finished_write(mp); 3078 *vpp = NULL; 3079 if (mpp != NULL) 3080 *mpp = NULL; 3081 } 3082 3083 out: 3084 NFSEXITCODE2(0, nd); 3085 } 3086 3087 /* 3088 * glue for fp. 3089 */ 3090 static int 3091 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3092 { 3093 struct filedesc *fdp; 3094 struct file *fp; 3095 int error = 0; 3096 3097 fdp = p->td_proc->p_fd; 3098 if (fd < 0 || fd >= fdp->fd_nfiles || 3099 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3100 error = EBADF; 3101 goto out; 3102 } 3103 *fpp = fp; 3104 3105 out: 3106 NFSEXITCODE(error); 3107 return (error); 3108 } 3109 3110 /* 3111 * Called from nfssvc() to update the exports list. Just call 3112 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3113 * in the mount list. 3114 */ 3115 int 3116 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3117 { 3118 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3119 int error = 0; 3120 struct nameidata nd; 3121 fhandle_t fh; 3122 3123 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 3124 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3125 nfs_rootfhset = 0; 3126 else if (error == 0) { 3127 if (nfsexargp->fspec == NULL) { 3128 error = EPERM; 3129 goto out; 3130 } 3131 /* 3132 * If fspec != NULL, this is the v4root path. 3133 */ 3134 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 3135 nfsexargp->fspec, p); 3136 if ((error = namei(&nd)) != 0) 3137 goto out; 3138 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3139 vrele(nd.ni_vp); 3140 if (!error) { 3141 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 3142 NFSBCOPY((caddr_t)&fh, 3143 nfs_rootfh.nfsrvfh_data, 3144 sizeof (fhandle_t)); 3145 nfs_rootfhset = 1; 3146 } 3147 } 3148 3149 out: 3150 NFSEXITCODE(error); 3151 return (error); 3152 } 3153 3154 /* 3155 * This function needs to test to see if the system is near its limit 3156 * for memory allocation via malloc() or mget() and return True iff 3157 * either of these resources are near their limit. 3158 * XXX (For now, this is just a stub.) 3159 */ 3160 int nfsrv_testmalloclimit = 0; 3161 int 3162 nfsrv_mallocmget_limit(void) 3163 { 3164 static int printmesg = 0; 3165 static int testval = 1; 3166 3167 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3168 if ((printmesg++ % 100) == 0) 3169 printf("nfsd: malloc/mget near limit\n"); 3170 return (1); 3171 } 3172 return (0); 3173 } 3174 3175 /* 3176 * BSD specific initialization of a mount point. 3177 */ 3178 void 3179 nfsd_mntinit(void) 3180 { 3181 static int inited = 0; 3182 3183 if (inited) 3184 return; 3185 inited = 1; 3186 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3187 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 3188 TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); 3189 nfsv4root_mnt.mnt_export = NULL; 3190 TAILQ_INIT(&nfsv4root_opt); 3191 TAILQ_INIT(&nfsv4root_newopt); 3192 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 3193 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 3194 nfsv4root_mnt.mnt_nvnodelistsize = 0; 3195 nfsv4root_mnt.mnt_activevnodelistsize = 0; 3196 } 3197 3198 /* 3199 * Get a vnode for a file handle, without checking exports, etc. 3200 */ 3201 struct vnode * 3202 nfsvno_getvp(fhandle_t *fhp) 3203 { 3204 struct mount *mp; 3205 struct vnode *vp; 3206 int error; 3207 3208 mp = vfs_busyfs(&fhp->fh_fsid); 3209 if (mp == NULL) 3210 return (NULL); 3211 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3212 vfs_unbusy(mp); 3213 if (error) 3214 return (NULL); 3215 return (vp); 3216 } 3217 3218 /* 3219 * Do a local VOP_ADVLOCK(). 3220 */ 3221 int 3222 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3223 u_int64_t end, struct thread *td) 3224 { 3225 int error = 0; 3226 struct flock fl; 3227 u_int64_t tlen; 3228 3229 if (nfsrv_dolocallocks == 0) 3230 goto out; 3231 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3232 3233 fl.l_whence = SEEK_SET; 3234 fl.l_type = ftype; 3235 fl.l_start = (off_t)first; 3236 if (end == NFS64BITSSET) { 3237 fl.l_len = 0; 3238 } else { 3239 tlen = end - first; 3240 fl.l_len = (off_t)tlen; 3241 } 3242 /* 3243 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3244 * values for all calls, so that all locks will be held by the 3245 * nfsd server. (The nfsd server handles conflicts between the 3246 * various clients.) 3247 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3248 * bytes, so it can't be put in l_sysid. 3249 */ 3250 if (nfsv4_sysid == 0) 3251 nfsv4_sysid = nlm_acquire_next_sysid(); 3252 fl.l_pid = (pid_t)0; 3253 fl.l_sysid = (int)nfsv4_sysid; 3254 3255 if (ftype == F_UNLCK) 3256 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3257 (F_POSIX | F_REMOTE)); 3258 else 3259 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3260 (F_POSIX | F_REMOTE)); 3261 3262 out: 3263 NFSEXITCODE(error); 3264 return (error); 3265 } 3266 3267 /* 3268 * Check the nfsv4 root exports. 3269 */ 3270 int 3271 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3272 { 3273 struct ucred *credanon; 3274 int exflags, error = 0, numsecflavor, *secflavors, i; 3275 3276 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3277 &credanon, &numsecflavor, &secflavors); 3278 if (error) { 3279 error = NFSERR_PROGUNAVAIL; 3280 goto out; 3281 } 3282 if (credanon != NULL) 3283 crfree(credanon); 3284 for (i = 0; i < numsecflavor; i++) { 3285 if (secflavors[i] == AUTH_SYS) 3286 nd->nd_flag |= ND_EXAUTHSYS; 3287 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3288 nd->nd_flag |= ND_EXGSS; 3289 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3290 nd->nd_flag |= ND_EXGSSINTEGRITY; 3291 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3292 nd->nd_flag |= ND_EXGSSPRIVACY; 3293 } 3294 3295 out: 3296 NFSEXITCODE(error); 3297 return (error); 3298 } 3299 3300 /* 3301 * Nfs server pseudo system call for the nfsd's 3302 */ 3303 /* 3304 * MPSAFE 3305 */ 3306 static int 3307 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3308 { 3309 struct file *fp; 3310 struct nfsd_addsock_args sockarg; 3311 struct nfsd_nfsd_args nfsdarg; 3312 struct nfsd_nfsd_oargs onfsdarg; 3313 struct nfsd_pnfsd_args pnfsdarg; 3314 struct vnode *vp, *nvp, *curdvp; 3315 struct pnfsdsfile *pf; 3316 struct nfsdevice *ds, *fds; 3317 cap_rights_t rights; 3318 int buflen, error, ret; 3319 char *buf, *cp, *cp2, *cp3; 3320 char fname[PNFS_FILENAME_LEN + 1]; 3321 3322 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3323 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3324 if (error) 3325 goto out; 3326 /* 3327 * Since we don't know what rights might be required, 3328 * pretend that we need them all. It is better to be too 3329 * careful than too reckless. 3330 */ 3331 error = fget(td, sockarg.sock, 3332 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 3333 if (error != 0) 3334 goto out; 3335 if (fp->f_type != DTYPE_SOCKET) { 3336 fdrop(fp, td); 3337 error = EPERM; 3338 goto out; 3339 } 3340 error = nfsrvd_addsock(fp); 3341 fdrop(fp, td); 3342 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3343 if (uap->argp == NULL) { 3344 error = EINVAL; 3345 goto out; 3346 } 3347 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3348 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3349 if (error == 0) { 3350 nfsdarg.principal = onfsdarg.principal; 3351 nfsdarg.minthreads = onfsdarg.minthreads; 3352 nfsdarg.maxthreads = onfsdarg.maxthreads; 3353 nfsdarg.version = 1; 3354 nfsdarg.addr = NULL; 3355 nfsdarg.addrlen = 0; 3356 nfsdarg.dnshost = NULL; 3357 nfsdarg.dnshostlen = 0; 3358 nfsdarg.mirrorcnt = 1; 3359 } 3360 } else 3361 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3362 if (error) 3363 goto out; 3364 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3365 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3366 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3367 nfsdarg.mirrorcnt >= 1 && 3368 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3369 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3370 nfsdarg.dspath != NULL) { 3371 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3372 " mirrorcnt=%d\n", nfsdarg.addrlen, 3373 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3374 nfsdarg.mirrorcnt); 3375 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3376 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3377 if (error != 0) { 3378 free(cp, M_TEMP); 3379 goto out; 3380 } 3381 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3382 nfsdarg.addr = cp; 3383 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3384 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3385 if (error != 0) { 3386 free(nfsdarg.addr, M_TEMP); 3387 free(cp, M_TEMP); 3388 goto out; 3389 } 3390 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3391 nfsdarg.dnshost = cp; 3392 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3393 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3394 if (error != 0) { 3395 free(nfsdarg.addr, M_TEMP); 3396 free(nfsdarg.dnshost, M_TEMP); 3397 free(cp, M_TEMP); 3398 goto out; 3399 } 3400 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3401 nfsdarg.dspath = cp; 3402 } else { 3403 nfsdarg.addr = NULL; 3404 nfsdarg.addrlen = 0; 3405 nfsdarg.dnshost = NULL; 3406 nfsdarg.dnshostlen = 0; 3407 nfsdarg.dspath = NULL; 3408 nfsdarg.dspathlen = 0; 3409 nfsdarg.mirrorcnt = 1; 3410 } 3411 error = nfsrvd_nfsd(td, &nfsdarg); 3412 free(nfsdarg.addr, M_TEMP); 3413 free(nfsdarg.dnshost, M_TEMP); 3414 free(nfsdarg.dspath, M_TEMP); 3415 } else if (uap->flag & NFSSVC_PNFSDS) { 3416 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3417 if (error == 0 && pnfsdarg.op == PNFSDOP_DELDSSERVER) { 3418 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3419 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3420 NULL); 3421 if (error == 0) 3422 error = nfsrv_deldsserver(cp, td); 3423 free(cp, M_TEMP); 3424 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3425 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3426 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3427 buf = malloc(buflen, M_TEMP, M_WAITOK); 3428 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3429 NULL); 3430 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3431 if (error == 0 && pnfsdarg.dspath != NULL) { 3432 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3433 error = copyinstr(pnfsdarg.dspath, cp2, 3434 PATH_MAX + 1, NULL); 3435 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3436 error); 3437 } else 3438 cp2 = NULL; 3439 if (error == 0 && pnfsdarg.curdspath != NULL) { 3440 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3441 error = copyinstr(pnfsdarg.curdspath, cp3, 3442 PATH_MAX + 1, NULL); 3443 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3444 error); 3445 } else 3446 cp3 = NULL; 3447 curdvp = NULL; 3448 fds = NULL; 3449 if (error == 0) 3450 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3451 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3452 &fds); 3453 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3454 if (error == 0) { 3455 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3456 printf("copymr: dsdir out of range\n"); 3457 pf->dsf_dir = 0; 3458 } 3459 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3460 error = nfsrv_copymr(vp, nvp, 3461 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3462 (struct pnfsdsfile *)buf, 3463 buflen / sizeof(*pf), td->td_ucred, td); 3464 vput(vp); 3465 vput(nvp); 3466 if (fds != NULL && error == 0) { 3467 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3468 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3469 if (ret == 0) { 3470 nfsrv_dsremove(curdvp, fname, 3471 td->td_ucred, td); 3472 NFSVOPUNLOCK(curdvp, 0); 3473 } 3474 } 3475 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3476 } 3477 free(cp, M_TEMP); 3478 free(cp2, M_TEMP); 3479 free(cp3, M_TEMP); 3480 free(buf, M_TEMP); 3481 } 3482 } else { 3483 error = nfssvc_srvcall(td, uap, td->td_ucred); 3484 } 3485 3486 out: 3487 NFSEXITCODE(error); 3488 return (error); 3489 } 3490 3491 static int 3492 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3493 { 3494 struct nfsex_args export; 3495 struct file *fp = NULL; 3496 int stablefd, len; 3497 struct nfsd_clid adminrevoke; 3498 struct nfsd_dumplist dumplist; 3499 struct nfsd_dumpclients *dumpclients; 3500 struct nfsd_dumplocklist dumplocklist; 3501 struct nfsd_dumplocks *dumplocks; 3502 struct nameidata nd; 3503 vnode_t vp; 3504 int error = EINVAL, igotlock; 3505 struct proc *procp; 3506 static int suspend_nfsd = 0; 3507 3508 if (uap->flag & NFSSVC_PUBLICFH) { 3509 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3510 sizeof (fhandle_t)); 3511 error = copyin(uap->argp, 3512 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3513 if (!error) 3514 nfs_pubfhset = 1; 3515 } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { 3516 error = copyin(uap->argp,(caddr_t)&export, 3517 sizeof (struct nfsex_args)); 3518 if (!error) 3519 error = nfsrv_v4rootexport(&export, cred, p); 3520 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3521 nfs_pubfhset = 0; 3522 error = 0; 3523 } else if (uap->flag & NFSSVC_STABLERESTART) { 3524 error = copyin(uap->argp, (caddr_t)&stablefd, 3525 sizeof (int)); 3526 if (!error) 3527 error = fp_getfvp(p, stablefd, &fp, &vp); 3528 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3529 error = EBADF; 3530 if (!error && newnfs_numnfsd != 0) 3531 error = EPERM; 3532 if (!error) { 3533 nfsrv_stablefirst.nsf_fp = fp; 3534 nfsrv_setupstable(p); 3535 } 3536 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3537 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3538 sizeof (struct nfsd_clid)); 3539 if (!error) 3540 error = nfsrv_adminrevoke(&adminrevoke, p); 3541 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3542 error = copyin(uap->argp, (caddr_t)&dumplist, 3543 sizeof (struct nfsd_dumplist)); 3544 if (!error && (dumplist.ndl_size < 1 || 3545 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3546 error = EPERM; 3547 if (!error) { 3548 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3549 dumpclients = (struct nfsd_dumpclients *)malloc(len, 3550 M_TEMP, M_WAITOK); 3551 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3552 error = copyout(dumpclients, 3553 CAST_USER_ADDR_T(dumplist.ndl_list), len); 3554 free(dumpclients, M_TEMP); 3555 } 3556 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3557 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3558 sizeof (struct nfsd_dumplocklist)); 3559 if (!error && (dumplocklist.ndllck_size < 1 || 3560 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3561 error = EPERM; 3562 if (!error) 3563 error = nfsrv_lookupfilename(&nd, 3564 dumplocklist.ndllck_fname, p); 3565 if (!error) { 3566 len = sizeof (struct nfsd_dumplocks) * 3567 dumplocklist.ndllck_size; 3568 dumplocks = (struct nfsd_dumplocks *)malloc(len, 3569 M_TEMP, M_WAITOK); 3570 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3571 dumplocklist.ndllck_size, p); 3572 vput(nd.ni_vp); 3573 error = copyout(dumplocks, 3574 CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); 3575 free(dumplocks, M_TEMP); 3576 } 3577 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 3578 procp = p->td_proc; 3579 PROC_LOCK(procp); 3580 nfsd_master_pid = procp->p_pid; 3581 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 3582 nfsd_master_start = procp->p_stats->p_start; 3583 nfsd_master_proc = procp; 3584 PROC_UNLOCK(procp); 3585 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 3586 NFSLOCKV4ROOTMUTEX(); 3587 if (suspend_nfsd == 0) { 3588 /* Lock out all nfsd threads */ 3589 do { 3590 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 3591 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 3592 } while (igotlock == 0 && suspend_nfsd == 0); 3593 suspend_nfsd = 1; 3594 } 3595 NFSUNLOCKV4ROOTMUTEX(); 3596 error = 0; 3597 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 3598 NFSLOCKV4ROOTMUTEX(); 3599 if (suspend_nfsd != 0) { 3600 nfsv4_unlock(&nfsd_suspend_lock, 0); 3601 suspend_nfsd = 0; 3602 } 3603 NFSUNLOCKV4ROOTMUTEX(); 3604 error = 0; 3605 } 3606 3607 NFSEXITCODE(error); 3608 return (error); 3609 } 3610 3611 /* 3612 * Check exports. 3613 * Returns 0 if ok, 1 otherwise. 3614 */ 3615 int 3616 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 3617 { 3618 int i; 3619 3620 /* 3621 * This seems odd, but allow the case where the security flavor 3622 * list is empty. This happens when NFSv4 is traversing non-exported 3623 * file systems. Exported file systems should always have a non-empty 3624 * security flavor list. 3625 */ 3626 if (exp->nes_numsecflavor == 0) 3627 return (0); 3628 3629 for (i = 0; i < exp->nes_numsecflavor; i++) { 3630 /* 3631 * The tests for privacy and integrity must be first, 3632 * since ND_GSS is set for everything but AUTH_SYS. 3633 */ 3634 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 3635 (nd->nd_flag & ND_GSSPRIVACY)) 3636 return (0); 3637 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 3638 (nd->nd_flag & ND_GSSINTEGRITY)) 3639 return (0); 3640 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 3641 (nd->nd_flag & ND_GSS)) 3642 return (0); 3643 if (exp->nes_secflavors[i] == AUTH_SYS && 3644 (nd->nd_flag & ND_GSS) == 0) 3645 return (0); 3646 } 3647 return (1); 3648 } 3649 3650 /* 3651 * Calculate a hash value for the fid in a file handle. 3652 */ 3653 uint32_t 3654 nfsrv_hashfh(fhandle_t *fhp) 3655 { 3656 uint32_t hashval; 3657 3658 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 3659 return (hashval); 3660 } 3661 3662 /* 3663 * Calculate a hash value for the sessionid. 3664 */ 3665 uint32_t 3666 nfsrv_hashsessionid(uint8_t *sessionid) 3667 { 3668 uint32_t hashval; 3669 3670 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 3671 return (hashval); 3672 } 3673 3674 /* 3675 * Signal the userland master nfsd to backup the stable restart file. 3676 */ 3677 void 3678 nfsrv_backupstable(void) 3679 { 3680 struct proc *procp; 3681 3682 if (nfsd_master_proc != NULL) { 3683 procp = pfind(nfsd_master_pid); 3684 /* Try to make sure it is the correct process. */ 3685 if (procp == nfsd_master_proc && 3686 procp->p_stats->p_start.tv_sec == 3687 nfsd_master_start.tv_sec && 3688 procp->p_stats->p_start.tv_usec == 3689 nfsd_master_start.tv_usec && 3690 strcmp(procp->p_comm, nfsd_master_comm) == 0) 3691 kern_psignal(procp, SIGUSR2); 3692 else 3693 nfsd_master_proc = NULL; 3694 3695 if (procp != NULL) 3696 PROC_UNLOCK(procp); 3697 } 3698 } 3699 3700 /* 3701 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 3702 * The arguments are in a structure, so that they can be passed through 3703 * taskqueue for a kernel process to execute this function. 3704 */ 3705 struct nfsrvdscreate { 3706 int done; 3707 int inprog; 3708 struct task tsk; 3709 struct ucred *tcred; 3710 struct vnode *dvp; 3711 NFSPROC_T *p; 3712 struct pnfsdsfile *pf; 3713 int err; 3714 fhandle_t fh; 3715 struct vattr va; 3716 struct vattr createva; 3717 }; 3718 3719 int 3720 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 3721 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 3722 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 3723 { 3724 struct vnode *nvp; 3725 struct nameidata named; 3726 struct vattr va; 3727 char *bufp; 3728 u_long *hashp; 3729 struct nfsnode *np; 3730 struct nfsmount *nmp; 3731 int error; 3732 3733 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 3734 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); 3735 nfsvno_setpathbuf(&named, &bufp, &hashp); 3736 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 3737 named.ni_cnd.cn_thread = p; 3738 named.ni_cnd.cn_nameptr = bufp; 3739 if (fnamep != NULL) { 3740 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 3741 named.ni_cnd.cn_namelen = strlen(bufp); 3742 } else 3743 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 3744 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 3745 3746 /* Create the date file in the DS mount. */ 3747 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 3748 if (error == 0) { 3749 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 3750 NFSVOPUNLOCK(dvp, 0); 3751 if (error == 0) { 3752 /* Set the ownership of the file. */ 3753 error = VOP_SETATTR(nvp, nvap, tcred); 3754 NFSD_DEBUG(4, "nfsrv_dscreate:" 3755 " setattr-uid=%d\n", error); 3756 if (error != 0) 3757 vput(nvp); 3758 } 3759 if (error != 0) 3760 printf("pNFS: pnfscreate failed=%d\n", error); 3761 } else 3762 printf("pNFS: pnfscreate vnlock=%d\n", error); 3763 if (error == 0) { 3764 np = VTONFS(nvp); 3765 nmp = VFSTONFS(nvp->v_mount); 3766 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 3767 != 0 || nmp->nm_nam->sa_len > sizeof( 3768 struct sockaddr_in6) || 3769 np->n_fhp->nfh_len != NFSX_MYFH) { 3770 printf("Bad DS file: fstype=%s salen=%d" 3771 " fhlen=%d\n", 3772 nvp->v_mount->mnt_vfc->vfc_name, 3773 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 3774 error = ENOENT; 3775 } 3776 3777 /* Set extattrs for the DS on the MDS file. */ 3778 if (error == 0) { 3779 if (dsa != NULL) { 3780 error = VOP_GETATTR(nvp, &va, tcred); 3781 if (error == 0) { 3782 dsa->dsa_filerev = va.va_filerev; 3783 dsa->dsa_size = va.va_size; 3784 dsa->dsa_atime = va.va_atime; 3785 dsa->dsa_mtime = va.va_mtime; 3786 } 3787 } 3788 if (error == 0) { 3789 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 3790 NFSX_MYFH); 3791 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 3792 nmp->nm_nam->sa_len); 3793 NFSBCOPY(named.ni_cnd.cn_nameptr, 3794 pf->dsf_filename, 3795 sizeof(pf->dsf_filename)); 3796 } 3797 } else 3798 printf("pNFS: pnfscreate can't get DS" 3799 " attr=%d\n", error); 3800 if (nvpp != NULL && error == 0) 3801 *nvpp = nvp; 3802 else 3803 vput(nvp); 3804 } 3805 nfsvno_relpathbuf(&named); 3806 return (error); 3807 } 3808 3809 /* 3810 * Start up the thread that will execute nfsrv_dscreate(). 3811 */ 3812 static void 3813 start_dscreate(void *arg, int pending) 3814 { 3815 struct nfsrvdscreate *dsc; 3816 3817 dsc = (struct nfsrvdscreate *)arg; 3818 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 3819 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 3820 dsc->done = 1; 3821 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 3822 } 3823 3824 /* 3825 * Create a pNFS data file on the Data Server(s). 3826 */ 3827 static void 3828 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 3829 NFSPROC_T *p) 3830 { 3831 struct nfsrvdscreate *dsc, *tdsc; 3832 struct nfsdevice *ds, *mds; 3833 struct mount *mp; 3834 struct pnfsdsfile *pf, *tpf; 3835 struct pnfsdsattr dsattr; 3836 struct vattr va; 3837 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 3838 struct nfsmount *nmp; 3839 fhandle_t fh; 3840 uid_t vauid; 3841 gid_t vagid; 3842 u_short vamode; 3843 struct ucred *tcred; 3844 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 3845 int failpos, timo; 3846 3847 /* Get a DS server directory in a round-robin order. */ 3848 mirrorcnt = 1; 3849 NFSDDSLOCK(); 3850 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 3851 if (ds->nfsdev_nmp != NULL) 3852 break; 3853 } 3854 if (ds == NULL) { 3855 NFSDDSUNLOCK(); 3856 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 3857 return; 3858 } 3859 i = dsdir[0] = ds->nfsdev_nextdir; 3860 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 3861 dvp[0] = ds->nfsdev_dsdir[i]; 3862 mds = TAILQ_NEXT(ds, nfsdev_list); 3863 if (nfsrv_maxpnfsmirror > 1 && mds != NULL) { 3864 TAILQ_FOREACH_FROM(mds, &nfsrv_devidhead, nfsdev_list) { 3865 if (mds->nfsdev_nmp != NULL) { 3866 dsdir[mirrorcnt] = i; 3867 dvp[mirrorcnt] = mds->nfsdev_dsdir[i]; 3868 mirrorcnt++; 3869 if (mirrorcnt >= nfsrv_maxpnfsmirror) 3870 break; 3871 } 3872 } 3873 } 3874 /* Put at end of list to implement round-robin usage. */ 3875 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 3876 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 3877 NFSDDSUNLOCK(); 3878 dsc = NULL; 3879 if (mirrorcnt > 1) 3880 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 3881 M_WAITOK | M_ZERO); 3882 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 3883 M_ZERO); 3884 3885 error = nfsvno_getfh(vp, &fh, p); 3886 if (error == 0) 3887 error = VOP_GETATTR(vp, &va, cred); 3888 if (error == 0) { 3889 /* Set the attributes for "vp" to Setattr the DS vp. */ 3890 vauid = va.va_uid; 3891 vagid = va.va_gid; 3892 vamode = va.va_mode; 3893 VATTR_NULL(&va); 3894 va.va_uid = vauid; 3895 va.va_gid = vagid; 3896 va.va_mode = vamode; 3897 va.va_size = 0; 3898 } else 3899 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 3900 3901 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 3902 cred->cr_gid); 3903 /* Make data file name based on FH. */ 3904 tcred = newnfs_getcred(); 3905 3906 /* 3907 * Create the file on each DS mirror, using kernel process(es) for the 3908 * additional mirrors. 3909 */ 3910 failpos = -1; 3911 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 3912 tpf->dsf_dir = dsdir[i]; 3913 tdsc->tcred = tcred; 3914 tdsc->p = p; 3915 tdsc->pf = tpf; 3916 tdsc->createva = *vap; 3917 tdsc->fh = fh; 3918 tdsc->va = va; 3919 tdsc->dvp = dvp[i]; 3920 tdsc->done = 0; 3921 tdsc->inprog = 0; 3922 tdsc->err = 0; 3923 ret = EIO; 3924 if (nfs_pnfsiothreads != 0) { 3925 ret = nfs_pnfsio(start_dscreate, tdsc); 3926 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 3927 } 3928 if (ret != 0) { 3929 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 3930 NULL, tcred, p, NULL); 3931 if (ret != 0) { 3932 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 3933 error)); 3934 if (failpos == -1 && nfsds_failerr(ret)) 3935 failpos = i; 3936 else 3937 error = ret; 3938 } 3939 } 3940 } 3941 if (error == 0) { 3942 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 3943 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 3944 &dsattr, NULL, tcred, p, NULL); 3945 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 3946 failpos = mirrorcnt - 1; 3947 error = 0; 3948 } 3949 } 3950 timo = hz / 50; /* Wait for 20msec. */ 3951 if (timo < 1) 3952 timo = 1; 3953 /* Wait for kernel task(s) to complete. */ 3954 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 3955 while (tdsc->inprog != 0 && tdsc->done == 0) 3956 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 3957 if (tdsc->err != 0) { 3958 if (failpos == -1 && nfsds_failerr(tdsc->err)) 3959 failpos = i; 3960 else if (error == 0) 3961 error = tdsc->err; 3962 } 3963 } 3964 3965 /* 3966 * If failpos has been set, that mirror has failed, so it needs 3967 * to be disabled. 3968 */ 3969 if (failpos >= 0) { 3970 nmp = VFSTONFS(dvp[failpos]->v_mount); 3971 NFSLOCKMNT(nmp); 3972 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 3973 NFSMNTP_CANCELRPCS)) == 0) { 3974 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 3975 NFSUNLOCKMNT(nmp); 3976 ds = nfsrv_deldsnmp(nmp, p); 3977 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 3978 ds); 3979 if (ds != NULL) 3980 nfsrv_killrpcs(nmp); 3981 NFSLOCKMNT(nmp); 3982 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 3983 wakeup(nmp); 3984 } 3985 NFSUNLOCKMNT(nmp); 3986 } 3987 3988 NFSFREECRED(tcred); 3989 if (error == 0) { 3990 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 3991 3992 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 3993 mirrorcnt, nfsrv_maxpnfsmirror); 3994 /* 3995 * For all mirrors that couldn't be created, fill in the 3996 * *pf structure, but with an IP address == 0.0.0.0. 3997 */ 3998 tpf = pf + mirrorcnt; 3999 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4000 *tpf = *pf; 4001 tpf->dsf_sin.sin_family = AF_INET; 4002 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4003 tpf->dsf_sin.sin_addr.s_addr = 0; 4004 tpf->dsf_sin.sin_port = 0; 4005 } 4006 4007 error = vn_start_write(vp, &mp, V_WAIT); 4008 if (error == 0) { 4009 error = vn_extattr_set(vp, IO_NODELOCKED, 4010 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4011 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4012 if (error == 0) 4013 error = vn_extattr_set(vp, IO_NODELOCKED, 4014 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4015 sizeof(dsattr), (char *)&dsattr, p); 4016 vn_finished_write(mp); 4017 if (error != 0) 4018 printf("pNFS: pnfscreate setextattr=%d\n", 4019 error); 4020 } else 4021 printf("pNFS: pnfscreate startwrite=%d\n", error); 4022 } else 4023 printf("pNFS: pnfscreate=%d\n", error); 4024 free(pf, M_TEMP); 4025 free(dsc, M_TEMP); 4026 } 4027 4028 /* 4029 * Get the information needed to remove the pNFS Data Server file from the 4030 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4031 * DS directory vnode. The caller must unlock *ddvp when done with it. 4032 */ 4033 static void 4034 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4035 int *mirrorcntp, char *fname, fhandle_t *fhp) 4036 { 4037 struct vattr va; 4038 struct ucred *tcred; 4039 char *buf; 4040 int buflen, error; 4041 4042 dvpp[0] = NULL; 4043 /* If not an exported regular file or not a pNFS server, just return. */ 4044 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4045 nfsrv_devidcnt == 0) 4046 return; 4047 4048 /* Check to see if this is the last hard link. */ 4049 tcred = newnfs_getcred(); 4050 error = VOP_GETATTR(vp, &va, tcred); 4051 NFSFREECRED(tcred); 4052 if (error != 0) { 4053 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4054 return; 4055 } 4056 if (va.va_nlink > 1) 4057 return; 4058 4059 error = nfsvno_getfh(vp, fhp, p); 4060 if (error != 0) { 4061 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4062 return; 4063 } 4064 4065 buflen = 1024; 4066 buf = malloc(buflen, M_TEMP, M_WAITOK); 4067 /* Get the directory vnode for the DS mount and the file handle. */ 4068 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4069 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4070 free(buf, M_TEMP); 4071 if (error != 0) 4072 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4073 } 4074 4075 /* 4076 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4077 * The arguments are in a structure, so that they can be passed through 4078 * taskqueue for a kernel process to execute this function. 4079 */ 4080 struct nfsrvdsremove { 4081 int done; 4082 int inprog; 4083 struct task tsk; 4084 struct ucred *tcred; 4085 struct vnode *dvp; 4086 NFSPROC_T *p; 4087 int err; 4088 char fname[PNFS_FILENAME_LEN + 1]; 4089 }; 4090 4091 static int 4092 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4093 NFSPROC_T *p) 4094 { 4095 struct nameidata named; 4096 struct vnode *nvp; 4097 char *bufp; 4098 u_long *hashp; 4099 int error; 4100 4101 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4102 if (error != 0) 4103 return (error); 4104 named.ni_cnd.cn_nameiop = DELETE; 4105 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4106 named.ni_cnd.cn_cred = tcred; 4107 named.ni_cnd.cn_thread = p; 4108 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 4109 nfsvno_setpathbuf(&named, &bufp, &hashp); 4110 named.ni_cnd.cn_nameptr = bufp; 4111 named.ni_cnd.cn_namelen = strlen(fname); 4112 strlcpy(bufp, fname, NAME_MAX); 4113 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4114 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4115 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4116 if (error == 0) { 4117 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4118 vput(nvp); 4119 } 4120 NFSVOPUNLOCK(dvp, 0); 4121 nfsvno_relpathbuf(&named); 4122 if (error != 0) 4123 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4124 return (error); 4125 } 4126 4127 /* 4128 * Start up the thread that will execute nfsrv_dsremove(). 4129 */ 4130 static void 4131 start_dsremove(void *arg, int pending) 4132 { 4133 struct nfsrvdsremove *dsrm; 4134 4135 dsrm = (struct nfsrvdsremove *)arg; 4136 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4137 dsrm->p); 4138 dsrm->done = 1; 4139 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4140 } 4141 4142 /* 4143 * Remove a pNFS data file from a Data Server. 4144 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4145 * removed to set up the dvp and fill in the FH. 4146 */ 4147 static void 4148 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4149 NFSPROC_T *p) 4150 { 4151 struct ucred *tcred; 4152 struct nfsrvdsremove *dsrm, *tdsrm; 4153 struct nfsdevice *ds; 4154 struct nfsmount *nmp; 4155 int failpos, i, ret, timo; 4156 4157 tcred = newnfs_getcred(); 4158 dsrm = NULL; 4159 if (mirrorcnt > 1) 4160 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4161 /* 4162 * Remove the file on each DS mirror, using kernel process(es) for the 4163 * additional mirrors. 4164 */ 4165 failpos = -1; 4166 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4167 tdsrm->tcred = tcred; 4168 tdsrm->p = p; 4169 tdsrm->dvp = dvp[i]; 4170 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4171 tdsrm->inprog = 0; 4172 tdsrm->done = 0; 4173 tdsrm->err = 0; 4174 ret = EIO; 4175 if (nfs_pnfsiothreads != 0) { 4176 ret = nfs_pnfsio(start_dsremove, tdsrm); 4177 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4178 } 4179 if (ret != 0) { 4180 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4181 if (failpos == -1 && nfsds_failerr(ret)) 4182 failpos = i; 4183 } 4184 } 4185 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4186 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4187 failpos = mirrorcnt - 1; 4188 timo = hz / 50; /* Wait for 20msec. */ 4189 if (timo < 1) 4190 timo = 1; 4191 /* Wait for kernel task(s) to complete. */ 4192 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4193 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4194 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4195 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4196 failpos = i; 4197 } 4198 4199 /* 4200 * If failpos has been set, that mirror has failed, so it needs 4201 * to be disabled. 4202 */ 4203 if (failpos >= 0) { 4204 nmp = VFSTONFS(dvp[failpos]->v_mount); 4205 NFSLOCKMNT(nmp); 4206 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4207 NFSMNTP_CANCELRPCS)) == 0) { 4208 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4209 NFSUNLOCKMNT(nmp); 4210 ds = nfsrv_deldsnmp(nmp, p); 4211 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4212 ds); 4213 if (ds != NULL) 4214 nfsrv_killrpcs(nmp); 4215 NFSLOCKMNT(nmp); 4216 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4217 wakeup(nmp); 4218 } 4219 NFSUNLOCKMNT(nmp); 4220 } 4221 4222 /* Get rid all layouts for the file. */ 4223 nfsrv_freefilelayouts(fhp); 4224 4225 NFSFREECRED(tcred); 4226 free(dsrm, M_TEMP); 4227 } 4228 4229 /* 4230 * Generate a file name based on the file handle and put it in *bufp. 4231 * Return the number of bytes generated. 4232 */ 4233 static int 4234 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4235 { 4236 int i; 4237 uint8_t *cp; 4238 const uint8_t *hexdigits = "0123456789abcdef"; 4239 4240 cp = (uint8_t *)fhp; 4241 for (i = 0; i < sizeof(*fhp); i++) { 4242 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4243 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4244 } 4245 bufp[2 * i] = '\0'; 4246 return (2 * i); 4247 } 4248 4249 /* 4250 * Update the Metadata file's attributes from the DS file when a Read/Write 4251 * layout is returned. 4252 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4253 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4254 */ 4255 int 4256 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4257 { 4258 struct ucred *tcred; 4259 int error; 4260 4261 /* Do this as root so that it won't fail with EACCES. */ 4262 tcred = newnfs_getcred(); 4263 error = nfsrv_proxyds(NULL, vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4264 NULL, NULL, NULL, nap, NULL); 4265 NFSFREECRED(tcred); 4266 return (error); 4267 } 4268 4269 /* 4270 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4271 */ 4272 static int 4273 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4274 NFSPROC_T *p) 4275 { 4276 int error; 4277 4278 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETACL, 4279 NULL, NULL, NULL, NULL, aclp); 4280 return (error); 4281 } 4282 4283 static int 4284 nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt, 4285 struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, 4286 char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) 4287 { 4288 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4289 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4290 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4291 struct nfsdevice *ds; 4292 struct pnfsdsattr dsattr; 4293 char *buf; 4294 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4295 4296 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4297 /* 4298 * If not a regular file, not exported or not a pNFS server, 4299 * just return ENOENT. 4300 */ 4301 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4302 nfsrv_devidcnt == 0) 4303 return (ENOENT); 4304 4305 buflen = 1024; 4306 buf = malloc(buflen, M_TEMP, M_WAITOK); 4307 error = 0; 4308 4309 /* 4310 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4311 * from the MetaData file's extended attribute. 4312 */ 4313 if (ioproc == NFSPROC_GETATTR) { 4314 error = vn_extattr_get(vp, IO_NODELOCKED, 4315 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4316 p); 4317 if (error == 0 && buflen != sizeof(dsattr)) 4318 error = ENXIO; 4319 if (error == 0) { 4320 NFSBCOPY(buf, &dsattr, buflen); 4321 nap->na_filerev = dsattr.dsa_filerev; 4322 nap->na_size = dsattr.dsa_size; 4323 nap->na_atime = dsattr.dsa_atime; 4324 nap->na_mtime = dsattr.dsa_mtime; 4325 4326 /* 4327 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4328 * returns 0, just return now. nfsrv_checkdsattr() 4329 * returns 0 if there is no Read/Write layout 4330 * plus either an Open/Write_access or Write 4331 * delegation issued to a client for the file. 4332 */ 4333 if (nfsrv_pnfsgetdsattr == 0 || 4334 nfsrv_checkdsattr(nd, vp, p) == 0) { 4335 free(buf, M_TEMP); 4336 return (error); 4337 } 4338 } 4339 4340 /* 4341 * Clear ENOATTR so the code below will attempt to do a 4342 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4343 * the extended attribute. 4344 */ 4345 if (error == ENOATTR) 4346 error = 0; 4347 } 4348 4349 origmircnt = -1; 4350 trycnt = 0; 4351 tryagain: 4352 if (error == 0) { 4353 buflen = 1024; 4354 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4355 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4356 NULL, NULL); 4357 if (error == 0) { 4358 for (i = 0; i < mirrorcnt; i++) 4359 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4360 } else 4361 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4362 } else 4363 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4364 if (error == 0) { 4365 failpos = -1; 4366 if (origmircnt == -1) 4367 origmircnt = mirrorcnt; 4368 /* 4369 * If failpos is set to a mirror#, then that mirror has 4370 * failed and will be disabled. For Read and Getattr, the 4371 * function only tries one mirror, so if that mirror has 4372 * failed, it will need to be retried. As such, increment 4373 * tryitagain for these cases. 4374 * For Write, Setattr and Setacl, the function tries all 4375 * mirrors and will not return an error for the case where 4376 * one mirror has failed. For these cases, the functioning 4377 * mirror(s) will have been modified, so a retry isn't 4378 * necessary. These functions will set failpos for the 4379 * failed mirror#. 4380 */ 4381 if (ioproc == NFSPROC_READDS) { 4382 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4383 mpp, mpp2); 4384 if (nfsds_failerr(error) && mirrorcnt > 1) { 4385 /* 4386 * Setting failpos will cause the mirror 4387 * to be disabled and then a retry of this 4388 * read is required. 4389 */ 4390 failpos = 0; 4391 error = 0; 4392 trycnt++; 4393 } 4394 } else if (ioproc == NFSPROC_WRITEDS) 4395 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4396 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4397 else if (ioproc == NFSPROC_SETATTR) 4398 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4399 mirrorcnt, nap, &failpos); 4400 else if (ioproc == NFSPROC_SETACL) 4401 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4402 mirrorcnt, aclp, &failpos); 4403 else { 4404 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4405 vp, nmp[mirrorcnt - 1], nap); 4406 if (nfsds_failerr(error) && mirrorcnt > 1) { 4407 /* 4408 * Setting failpos will cause the mirror 4409 * to be disabled and then a retry of this 4410 * getattr is required. 4411 */ 4412 failpos = mirrorcnt - 1; 4413 error = 0; 4414 trycnt++; 4415 } 4416 } 4417 ds = NULL; 4418 if (failpos >= 0) { 4419 failnmp = nmp[failpos]; 4420 NFSLOCKMNT(failnmp); 4421 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4422 NFSMNTP_CANCELRPCS)) == 0) { 4423 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4424 NFSUNLOCKMNT(failnmp); 4425 ds = nfsrv_deldsnmp(failnmp, p); 4426 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 4427 failpos, ds); 4428 if (ds != NULL) 4429 nfsrv_killrpcs(failnmp); 4430 NFSLOCKMNT(failnmp); 4431 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4432 wakeup(failnmp); 4433 } 4434 NFSUNLOCKMNT(failnmp); 4435 } 4436 for (i = 0; i < mirrorcnt; i++) 4437 NFSVOPUNLOCK(dvp[i], 0); 4438 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 4439 trycnt); 4440 /* Try the Read/Getattr again if a mirror was deleted. */ 4441 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 4442 goto tryagain; 4443 } else { 4444 /* Return ENOENT for any Extended Attribute error. */ 4445 error = ENOENT; 4446 } 4447 free(buf, M_TEMP); 4448 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 4449 return (error); 4450 } 4451 4452 /* 4453 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 4454 * attribute. 4455 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 4456 * to be checked. If it points to a NULL nmp, then it returns 4457 * a suitable destination. 4458 * curnmp - If non-NULL, it is the source mount for the copy. 4459 */ 4460 int 4461 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 4462 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 4463 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 4464 struct nfsmount *curnmp, int *ippos, int *dsdirp) 4465 { 4466 struct vnode *dvp, *nvp, **tdvpp; 4467 struct nfsmount *nmp, *newnmp; 4468 struct sockaddr *sad; 4469 struct sockaddr_in *sin; 4470 struct nfsdevice *ds, *fndds; 4471 struct pnfsdsfile *pf; 4472 uint32_t dsdir; 4473 int error, fhiszero, fnd, gotone, i, mirrorcnt; 4474 4475 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 4476 *mirrorcntp = 1; 4477 tdvpp = dvpp; 4478 if (nvpp != NULL) 4479 *nvpp = NULL; 4480 if (dvpp != NULL) 4481 *dvpp = NULL; 4482 if (ippos != NULL) 4483 *ippos = -1; 4484 if (newnmpp != NULL) 4485 newnmp = *newnmpp; 4486 else 4487 newnmp = NULL; 4488 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 4489 "pnfsd.dsfile", buflenp, buf, p); 4490 mirrorcnt = *buflenp / sizeof(*pf); 4491 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 4492 *buflenp != sizeof(*pf) * mirrorcnt)) 4493 error = ENOATTR; 4494 4495 pf = (struct pnfsdsfile *)buf; 4496 /* If curnmp != NULL, check for a match in the mirror list. */ 4497 if (curnmp != NULL && error == 0) { 4498 fnd = 0; 4499 for (i = 0; i < mirrorcnt; i++, pf++) { 4500 sad = (struct sockaddr *)&pf->dsf_sin; 4501 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 4502 if (ippos != NULL) 4503 *ippos = i; 4504 fnd = 1; 4505 break; 4506 } 4507 } 4508 if (fnd == 0) 4509 error = ENXIO; 4510 } 4511 4512 gotone = 0; 4513 pf = (struct pnfsdsfile *)buf; 4514 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 4515 error); 4516 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 4517 fhiszero = 0; 4518 sad = (struct sockaddr *)&pf->dsf_sin; 4519 sin = &pf->dsf_sin; 4520 dsdir = pf->dsf_dir; 4521 if (dsdir >= nfsrv_dsdirsize) { 4522 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 4523 error = ENOATTR; 4524 } else if (nvpp != NULL && newnmp != NULL && 4525 nfsaddr2_match(sad, newnmp->nm_nam)) 4526 error = EEXIST; 4527 if (error == 0) { 4528 if (ippos != NULL && curnmp == NULL && 4529 sad->sa_family == AF_INET && 4530 sin->sin_addr.s_addr == 0) 4531 *ippos = i; 4532 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 4533 fhiszero = 1; 4534 /* Use the socket address to find the mount point. */ 4535 fndds = NULL; 4536 NFSDDSLOCK(); 4537 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 4538 if (ds->nfsdev_nmp != NULL) { 4539 dvp = ds->nfsdev_dvp; 4540 nmp = VFSTONFS(dvp->v_mount); 4541 if (nmp != ds->nfsdev_nmp) 4542 printf("different2 nmp %p %p\n", 4543 nmp, ds->nfsdev_nmp); 4544 if (nfsaddr2_match(sad, nmp->nm_nam)) 4545 fndds = ds; 4546 else if (newnmpp != NULL && 4547 newnmp == NULL && 4548 (*newnmpp == NULL || fndds == NULL)) 4549 /* 4550 * Return a destination for the 4551 * copy in newnmpp. Choose the 4552 * last valid one before the 4553 * source mirror, so it isn't 4554 * always the first one. 4555 */ 4556 *newnmpp = nmp; 4557 } 4558 } 4559 NFSDDSUNLOCK(); 4560 if (fndds != NULL) { 4561 dvp = fndds->nfsdev_dsdir[dsdir]; 4562 if (lktype != 0 || fhiszero != 0 || 4563 (nvpp != NULL && *nvpp == NULL)) { 4564 if (fhiszero != 0) 4565 error = vn_lock(dvp, 4566 LK_EXCLUSIVE); 4567 else if (lktype != 0) 4568 error = vn_lock(dvp, lktype); 4569 else 4570 error = vn_lock(dvp, LK_SHARED); 4571 /* 4572 * If the file handle is all 0's, try to 4573 * do a Lookup against the DS to acquire 4574 * it. 4575 * If dvpp == NULL or the Lookup fails, 4576 * unlock dvp after the call. 4577 */ 4578 if (error == 0 && (fhiszero != 0 || 4579 (nvpp != NULL && *nvpp == NULL))) { 4580 error = nfsrv_pnfslookupds(vp, 4581 dvp, pf, &nvp, p); 4582 if (error == 0) { 4583 if (fhiszero != 0) 4584 nfsrv_pnfssetfh( 4585 vp, pf, 4586 nvp, p); 4587 if (nvpp != NULL && 4588 *nvpp == NULL) { 4589 *nvpp = nvp; 4590 *dsdirp = dsdir; 4591 } else 4592 vput(nvp); 4593 } 4594 if (error != 0 || lktype == 0) 4595 NFSVOPUNLOCK(dvp, 0); 4596 } 4597 } 4598 if (error == 0) { 4599 gotone++; 4600 NFSD_DEBUG(4, "gotone=%d\n", gotone); 4601 if (devid != NULL) { 4602 NFSBCOPY(fndds->nfsdev_deviceid, 4603 devid, NFSX_V4DEVICEID); 4604 devid += NFSX_V4DEVICEID; 4605 } 4606 if (dvpp != NULL) 4607 *tdvpp++ = dvp; 4608 if (fhp != NULL) 4609 NFSBCOPY(&pf->dsf_fh, fhp++, 4610 NFSX_MYFH); 4611 if (fnamep != NULL && gotone == 1) 4612 strlcpy(fnamep, 4613 pf->dsf_filename, 4614 sizeof(pf->dsf_filename)); 4615 } else 4616 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 4617 "err=%d\n", error); 4618 } 4619 } 4620 } 4621 if (error == 0 && gotone == 0) 4622 error = ENOENT; 4623 4624 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 4625 error); 4626 if (error == 0) 4627 *mirrorcntp = gotone; 4628 else { 4629 if (gotone > 0 && dvpp != NULL) { 4630 /* 4631 * If the error didn't occur on the first one and 4632 * dvpp != NULL, the one(s) prior to the failure will 4633 * have locked dvp's that need to be unlocked. 4634 */ 4635 for (i = 0; i < gotone; i++) { 4636 NFSVOPUNLOCK(*dvpp, 0); 4637 *dvpp++ = NULL; 4638 } 4639 } 4640 /* 4641 * If it found the vnode to be copied from before a failure, 4642 * it needs to be vput()'d. 4643 */ 4644 if (nvpp != NULL && *nvpp != NULL) { 4645 vput(*nvpp); 4646 *nvpp = NULL; 4647 } 4648 } 4649 return (error); 4650 } 4651 4652 /* 4653 * Set the extended attribute for the Change attribute. 4654 */ 4655 static int 4656 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4657 { 4658 struct pnfsdsattr dsattr; 4659 struct mount *mp; 4660 int error; 4661 4662 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 4663 error = vn_start_write(vp, &mp, V_WAIT); 4664 if (error == 0) { 4665 dsattr.dsa_filerev = nap->na_filerev; 4666 dsattr.dsa_size = nap->na_size; 4667 dsattr.dsa_atime = nap->na_atime; 4668 dsattr.dsa_mtime = nap->na_mtime; 4669 error = vn_extattr_set(vp, IO_NODELOCKED, 4670 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4671 sizeof(dsattr), (char *)&dsattr, p); 4672 vn_finished_write(mp); 4673 } 4674 if (error != 0) 4675 printf("pNFS: setextattr=%d\n", error); 4676 return (error); 4677 } 4678 4679 static int 4680 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 4681 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 4682 { 4683 uint32_t *tl; 4684 struct nfsrv_descript *nd; 4685 nfsv4stateid_t st; 4686 struct mbuf *m, *m2; 4687 int error = 0, retlen, tlen, trimlen; 4688 4689 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 4690 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4691 *mpp = NULL; 4692 /* 4693 * Use a stateid where other is an alternating 01010 pattern and 4694 * seqid is 0xffffffff. This value is not defined as special by 4695 * the RFC and is used by the FreeBSD NFS server to indicate an 4696 * MDS->DS proxy operation. 4697 */ 4698 st.other[0] = 0x55555555; 4699 st.other[1] = 0x55555555; 4700 st.other[2] = 0x55555555; 4701 st.seqid = 0xffffffff; 4702 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 4703 NULL, NULL, 0, 0); 4704 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 4705 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 4706 txdr_hyper(off, tl); 4707 *(tl + 2) = txdr_unsigned(len); 4708 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 4709 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 4710 if (error != 0) { 4711 free(nd, M_TEMP); 4712 return (error); 4713 } 4714 if (nd->nd_repstat == 0) { 4715 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 4716 NFSM_STRSIZ(retlen, len); 4717 if (retlen > 0) { 4718 /* Trim off the pre-data XDR from the mbuf chain. */ 4719 m = nd->nd_mrep; 4720 while (m != NULL && m != nd->nd_md) { 4721 if (m->m_next == nd->nd_md) { 4722 m->m_next = NULL; 4723 m_freem(nd->nd_mrep); 4724 nd->nd_mrep = m = nd->nd_md; 4725 } else 4726 m = m->m_next; 4727 } 4728 if (m == NULL) { 4729 printf("nfsrv_readdsrpc: busted mbuf list\n"); 4730 error = ENOENT; 4731 goto nfsmout; 4732 } 4733 4734 /* 4735 * Now, adjust first mbuf so that any XDR before the 4736 * read data is skipped over. 4737 */ 4738 trimlen = nd->nd_dpos - mtod(m, char *); 4739 if (trimlen > 0) { 4740 m->m_len -= trimlen; 4741 NFSM_DATAP(m, trimlen); 4742 } 4743 4744 /* 4745 * Truncate the mbuf chain at retlen bytes of data, 4746 * plus XDR padding that brings the length up to a 4747 * multiple of 4. 4748 */ 4749 tlen = NFSM_RNDUP(retlen); 4750 do { 4751 if (m->m_len >= tlen) { 4752 m->m_len = tlen; 4753 tlen = 0; 4754 m2 = m->m_next; 4755 m->m_next = NULL; 4756 m_freem(m2); 4757 break; 4758 } 4759 tlen -= m->m_len; 4760 m = m->m_next; 4761 } while (m != NULL); 4762 if (tlen > 0) { 4763 printf("nfsrv_readdsrpc: busted mbuf list\n"); 4764 error = ENOENT; 4765 goto nfsmout; 4766 } 4767 *mpp = nd->nd_mrep; 4768 *mpendp = m; 4769 nd->nd_mrep = NULL; 4770 } 4771 } else 4772 error = nd->nd_repstat; 4773 nfsmout: 4774 /* If nd->nd_mrep is already NULL, this is a no-op. */ 4775 m_freem(nd->nd_mrep); 4776 free(nd, M_TEMP); 4777 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 4778 return (error); 4779 } 4780 4781 /* 4782 * Do a write RPC on a DS data file, using this structure for the arguments, 4783 * so that this function can be executed by a separate kernel process. 4784 */ 4785 struct nfsrvwritedsdorpc { 4786 int done; 4787 int inprog; 4788 struct task tsk; 4789 fhandle_t fh; 4790 off_t off; 4791 int len; 4792 struct nfsmount *nmp; 4793 struct ucred *cred; 4794 NFSPROC_T *p; 4795 struct mbuf *m; 4796 int err; 4797 }; 4798 4799 static int 4800 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 4801 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 4802 { 4803 uint32_t *tl; 4804 struct nfsrv_descript *nd; 4805 nfsattrbit_t attrbits; 4806 nfsv4stateid_t st; 4807 int commit, error, retlen; 4808 4809 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4810 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 4811 sizeof(fhandle_t), NULL, NULL, 0, 0); 4812 4813 /* 4814 * Use a stateid where other is an alternating 01010 pattern and 4815 * seqid is 0xffffffff. This value is not defined as special by 4816 * the RFC and is used by the FreeBSD NFS server to indicate an 4817 * MDS->DS proxy operation. 4818 */ 4819 st.other[0] = 0x55555555; 4820 st.other[1] = 0x55555555; 4821 st.other[2] = 0x55555555; 4822 st.seqid = 0xffffffff; 4823 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 4824 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 4825 txdr_hyper(off, tl); 4826 tl += 2; 4827 /* 4828 * Do all writes FileSync, since the server doesn't hold onto dirty 4829 * buffers. Since clients should be accessing the DS servers directly 4830 * using the pNFS layouts, this just needs to work correctly as a 4831 * fallback. 4832 */ 4833 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 4834 *tl = txdr_unsigned(len); 4835 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 4836 4837 /* Put data in mbuf chain. */ 4838 nd->nd_mb->m_next = m; 4839 4840 /* Set nd_mb and nd_bpos to end of data. */ 4841 while (m->m_next != NULL) 4842 m = m->m_next; 4843 nd->nd_mb = m; 4844 nd->nd_bpos = mtod(m, char *) + m->m_len; 4845 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 4846 4847 /* Do a Getattr for Size, Change and Modify Time. */ 4848 NFSZERO_ATTRBIT(&attrbits); 4849 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 4850 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 4851 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 4852 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 4853 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 4854 *tl = txdr_unsigned(NFSV4OP_GETATTR); 4855 (void) nfsrv_putattrbit(nd, &attrbits); 4856 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 4857 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 4858 if (error != 0) { 4859 free(nd, M_TEMP); 4860 return (error); 4861 } 4862 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 4863 /* Get rid of weak cache consistency data for now. */ 4864 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 4865 (ND_NFSV4 | ND_V4WCCATTR)) { 4866 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 4867 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 4868 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 4869 if (error != 0) 4870 goto nfsmout; 4871 /* 4872 * Get rid of Op# and status for next op. 4873 */ 4874 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4875 if (*++tl != 0) 4876 nd->nd_flag |= ND_NOMOREDATA; 4877 } 4878 if (nd->nd_repstat == 0) { 4879 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 4880 retlen = fxdr_unsigned(int, *tl++); 4881 commit = fxdr_unsigned(int, *tl); 4882 if (commit != NFSWRITE_FILESYNC) 4883 error = NFSERR_IO; 4884 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 4885 retlen, commit, error); 4886 } else 4887 error = nd->nd_repstat; 4888 /* We have no use for the Write Verifier since we use FileSync. */ 4889 4890 /* 4891 * Get the Change, Size, Access Time and Modify Time attributes and set 4892 * on the Metadata file, so its attributes will be what the file's 4893 * would be if it had been written. 4894 */ 4895 if (error == 0) { 4896 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4897 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 4898 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 4899 } 4900 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 4901 nfsmout: 4902 m_freem(nd->nd_mrep); 4903 free(nd, M_TEMP); 4904 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 4905 return (error); 4906 } 4907 4908 /* 4909 * Start up the thread that will execute nfsrv_writedsdorpc(). 4910 */ 4911 static void 4912 start_writedsdorpc(void *arg, int pending) 4913 { 4914 struct nfsrvwritedsdorpc *drpc; 4915 4916 drpc = (struct nfsrvwritedsdorpc *)arg; 4917 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 4918 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 4919 drpc->done = 1; 4920 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 4921 } 4922 4923 static int 4924 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 4925 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 4926 struct mbuf **mpp, char *cp, int *failposp) 4927 { 4928 struct nfsrvwritedsdorpc *drpc, *tdrpc; 4929 struct nfsvattr na; 4930 struct mbuf *m; 4931 int error, i, offs, ret, timo; 4932 4933 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 4934 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 4935 drpc = NULL; 4936 if (mirrorcnt > 1) 4937 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 4938 M_WAITOK); 4939 4940 /* Calculate offset in mbuf chain that data starts. */ 4941 offs = cp - mtod(*mpp, char *); 4942 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 4943 4944 /* 4945 * Do the write RPC for every DS, using a separate kernel process 4946 * for every DS except the last one. 4947 */ 4948 error = 0; 4949 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 4950 tdrpc->done = 0; 4951 tdrpc->fh = *fhp; 4952 tdrpc->off = off; 4953 tdrpc->len = len; 4954 tdrpc->nmp = *nmpp; 4955 tdrpc->cred = cred; 4956 tdrpc->p = p; 4957 tdrpc->inprog = 0; 4958 tdrpc->err = 0; 4959 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 4960 ret = EIO; 4961 if (nfs_pnfsiothreads != 0) { 4962 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 4963 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 4964 ret); 4965 } 4966 if (ret != 0) { 4967 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 4968 tdrpc->m, cred, p); 4969 if (nfsds_failerr(ret) && *failposp == -1) 4970 *failposp = i; 4971 else if (error == 0 && ret != 0) 4972 error = ret; 4973 } 4974 nmpp++; 4975 fhp++; 4976 } 4977 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 4978 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 4979 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 4980 *failposp = mirrorcnt - 1; 4981 else if (error == 0 && ret != 0) 4982 error = ret; 4983 if (error == 0) 4984 error = nfsrv_setextattr(vp, &na, p); 4985 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 4986 tdrpc = drpc; 4987 timo = hz / 50; /* Wait for 20msec. */ 4988 if (timo < 1) 4989 timo = 1; 4990 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 4991 /* Wait for RPCs on separate threads to complete. */ 4992 while (tdrpc->inprog != 0 && tdrpc->done == 0) 4993 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 4994 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 4995 *failposp = i; 4996 else if (error == 0 && tdrpc->err != 0) 4997 error = tdrpc->err; 4998 } 4999 free(drpc, M_TEMP); 5000 return (error); 5001 } 5002 5003 static int 5004 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5005 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 5006 struct nfsvattr *dsnap) 5007 { 5008 uint32_t *tl; 5009 struct nfsrv_descript *nd; 5010 nfsv4stateid_t st; 5011 nfsattrbit_t attrbits; 5012 int error; 5013 5014 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 5015 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5016 /* 5017 * Use a stateid where other is an alternating 01010 pattern and 5018 * seqid is 0xffffffff. This value is not defined as special by 5019 * the RFC and is used by the FreeBSD NFS server to indicate an 5020 * MDS->DS proxy operation. 5021 */ 5022 st.other[0] = 0x55555555; 5023 st.other[1] = 0x55555555; 5024 st.other[2] = 0x55555555; 5025 st.seqid = 0xffffffff; 5026 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5027 NULL, NULL, 0, 0); 5028 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5029 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5030 5031 /* Do a Getattr for Size, Change, Access Time and Modify Time. */ 5032 NFSZERO_ATTRBIT(&attrbits); 5033 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5034 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5035 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5036 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5037 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5038 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5039 (void) nfsrv_putattrbit(nd, &attrbits); 5040 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5041 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5042 if (error != 0) { 5043 free(nd, M_TEMP); 5044 return (error); 5045 } 5046 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5047 nd->nd_repstat); 5048 /* Get rid of weak cache consistency data for now. */ 5049 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5050 (ND_NFSV4 | ND_V4WCCATTR)) { 5051 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5052 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5053 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5054 if (error != 0) 5055 goto nfsmout; 5056 /* 5057 * Get rid of Op# and status for next op. 5058 */ 5059 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5060 if (*++tl != 0) 5061 nd->nd_flag |= ND_NOMOREDATA; 5062 } 5063 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 5064 if (error != 0) 5065 goto nfsmout; 5066 if (nd->nd_repstat != 0) 5067 error = nd->nd_repstat; 5068 /* 5069 * Get the Change, Size, Access Time and Modify Time attributes and set 5070 * on the Metadata file, so its attributes will be what the file's 5071 * would be if it had been written. 5072 */ 5073 if (error == 0) { 5074 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5075 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5076 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5077 } 5078 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 5079 nfsmout: 5080 m_freem(nd->nd_mrep); 5081 free(nd, M_TEMP); 5082 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 5083 return (error); 5084 } 5085 5086 struct nfsrvsetattrdsdorpc { 5087 int done; 5088 int inprog; 5089 struct task tsk; 5090 fhandle_t fh; 5091 struct nfsmount *nmp; 5092 struct vnode *vp; 5093 struct ucred *cred; 5094 NFSPROC_T *p; 5095 struct nfsvattr na; 5096 struct nfsvattr dsna; 5097 int err; 5098 }; 5099 5100 /* 5101 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 5102 */ 5103 static void 5104 start_setattrdsdorpc(void *arg, int pending) 5105 { 5106 struct nfsrvsetattrdsdorpc *drpc; 5107 5108 drpc = (struct nfsrvsetattrdsdorpc *)arg; 5109 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 5110 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 5111 drpc->done = 1; 5112 } 5113 5114 static int 5115 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5116 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5117 struct nfsvattr *nap, int *failposp) 5118 { 5119 struct nfsrvsetattrdsdorpc *drpc, *tdrpc; 5120 struct nfsvattr na; 5121 int error, i, ret, timo; 5122 5123 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 5124 drpc = NULL; 5125 if (mirrorcnt > 1) 5126 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5127 M_WAITOK); 5128 5129 /* 5130 * Do the setattr RPC for every DS, using a separate kernel process 5131 * for every DS except the last one. 5132 */ 5133 error = 0; 5134 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5135 tdrpc->done = 0; 5136 tdrpc->inprog = 0; 5137 tdrpc->fh = *fhp; 5138 tdrpc->nmp = *nmpp; 5139 tdrpc->vp = vp; 5140 tdrpc->cred = cred; 5141 tdrpc->p = p; 5142 tdrpc->na = *nap; 5143 tdrpc->err = 0; 5144 ret = EIO; 5145 if (nfs_pnfsiothreads != 0) { 5146 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 5147 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 5148 ret); 5149 } 5150 if (ret != 0) { 5151 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 5152 &na); 5153 if (nfsds_failerr(ret) && *failposp == -1) 5154 *failposp = i; 5155 else if (error == 0 && ret != 0) 5156 error = ret; 5157 } 5158 nmpp++; 5159 fhp++; 5160 } 5161 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 5162 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5163 *failposp = mirrorcnt - 1; 5164 else if (error == 0 && ret != 0) 5165 error = ret; 5166 if (error == 0) 5167 error = nfsrv_setextattr(vp, &na, p); 5168 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 5169 tdrpc = drpc; 5170 timo = hz / 50; /* Wait for 20msec. */ 5171 if (timo < 1) 5172 timo = 1; 5173 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5174 /* Wait for RPCs on separate threads to complete. */ 5175 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5176 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 5177 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5178 *failposp = i; 5179 else if (error == 0 && tdrpc->err != 0) 5180 error = tdrpc->err; 5181 } 5182 free(drpc, M_TEMP); 5183 return (error); 5184 } 5185 5186 /* 5187 * Do a Setattr of an NFSv4 ACL on the DS file. 5188 */ 5189 static int 5190 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5191 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 5192 { 5193 struct nfsrv_descript *nd; 5194 nfsv4stateid_t st; 5195 nfsattrbit_t attrbits; 5196 int error; 5197 5198 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 5199 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5200 /* 5201 * Use a stateid where other is an alternating 01010 pattern and 5202 * seqid is 0xffffffff. This value is not defined as special by 5203 * the RFC and is used by the FreeBSD NFS server to indicate an 5204 * MDS->DS proxy operation. 5205 */ 5206 st.other[0] = 0x55555555; 5207 st.other[1] = 0x55555555; 5208 st.other[2] = 0x55555555; 5209 st.seqid = 0xffffffff; 5210 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5211 NULL, NULL, 0, 0); 5212 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5213 NFSZERO_ATTRBIT(&attrbits); 5214 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 5215 /* 5216 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 5217 * so passing in the metadata "vp" will be ok, since it is of 5218 * the same type (VREG). 5219 */ 5220 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 5221 NULL, 0, 0, 0, 0, 0, NULL); 5222 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5223 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5224 if (error != 0) { 5225 free(nd, M_TEMP); 5226 return (error); 5227 } 5228 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 5229 nd->nd_repstat); 5230 error = nd->nd_repstat; 5231 m_freem(nd->nd_mrep); 5232 free(nd, M_TEMP); 5233 return (error); 5234 } 5235 5236 struct nfsrvsetacldsdorpc { 5237 int done; 5238 int inprog; 5239 struct task tsk; 5240 fhandle_t fh; 5241 struct nfsmount *nmp; 5242 struct vnode *vp; 5243 struct ucred *cred; 5244 NFSPROC_T *p; 5245 struct acl *aclp; 5246 int err; 5247 }; 5248 5249 /* 5250 * Start up the thread that will execute nfsrv_setacldsdorpc(). 5251 */ 5252 static void 5253 start_setacldsdorpc(void *arg, int pending) 5254 { 5255 struct nfsrvsetacldsdorpc *drpc; 5256 5257 drpc = (struct nfsrvsetacldsdorpc *)arg; 5258 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 5259 drpc->vp, drpc->nmp, drpc->aclp); 5260 drpc->done = 1; 5261 } 5262 5263 static int 5264 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5265 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 5266 int *failposp) 5267 { 5268 struct nfsrvsetacldsdorpc *drpc, *tdrpc; 5269 int error, i, ret, timo; 5270 5271 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 5272 drpc = NULL; 5273 if (mirrorcnt > 1) 5274 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5275 M_WAITOK); 5276 5277 /* 5278 * Do the setattr RPC for every DS, using a separate kernel process 5279 * for every DS except the last one. 5280 */ 5281 error = 0; 5282 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5283 tdrpc->done = 0; 5284 tdrpc->inprog = 0; 5285 tdrpc->fh = *fhp; 5286 tdrpc->nmp = *nmpp; 5287 tdrpc->vp = vp; 5288 tdrpc->cred = cred; 5289 tdrpc->p = p; 5290 tdrpc->aclp = aclp; 5291 tdrpc->err = 0; 5292 ret = EIO; 5293 if (nfs_pnfsiothreads != 0) { 5294 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 5295 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 5296 ret); 5297 } 5298 if (ret != 0) { 5299 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 5300 aclp); 5301 if (nfsds_failerr(ret) && *failposp == -1) 5302 *failposp = i; 5303 else if (error == 0 && ret != 0) 5304 error = ret; 5305 } 5306 nmpp++; 5307 fhp++; 5308 } 5309 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 5310 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5311 *failposp = mirrorcnt - 1; 5312 else if (error == 0 && ret != 0) 5313 error = ret; 5314 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 5315 tdrpc = drpc; 5316 timo = hz / 50; /* Wait for 20msec. */ 5317 if (timo < 1) 5318 timo = 1; 5319 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5320 /* Wait for RPCs on separate threads to complete. */ 5321 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5322 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 5323 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5324 *failposp = i; 5325 else if (error == 0 && tdrpc->err != 0) 5326 error = tdrpc->err; 5327 } 5328 free(drpc, M_TEMP); 5329 return (error); 5330 } 5331 5332 /* 5333 * Getattr call to the DS for the Modify, Size and Change attributes. 5334 */ 5335 static int 5336 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5337 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 5338 { 5339 struct nfsrv_descript *nd; 5340 int error; 5341 nfsattrbit_t attrbits; 5342 5343 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 5344 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5345 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 5346 sizeof(fhandle_t), NULL, NULL, 0, 0); 5347 NFSZERO_ATTRBIT(&attrbits); 5348 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5349 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5350 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5351 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5352 (void) nfsrv_putattrbit(nd, &attrbits); 5353 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5354 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5355 if (error != 0) { 5356 free(nd, M_TEMP); 5357 return (error); 5358 } 5359 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 5360 nd->nd_repstat); 5361 if (nd->nd_repstat == 0) { 5362 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 5363 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 5364 NULL, NULL); 5365 /* 5366 * We can only save the updated values in the extended 5367 * attribute if the vp is exclusively locked. 5368 * This should happen when any of the following operations 5369 * occur on the vnode: 5370 * Close, Delegreturn, LayoutCommit, LayoutReturn 5371 * As such, the updated extended attribute should get saved 5372 * before nfsrv_checkdsattr() returns 0 and allows the cached 5373 * attributes to be returned without calling this function. 5374 */ 5375 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 5376 error = nfsrv_setextattr(vp, nap, p); 5377 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 5378 error); 5379 } 5380 } else 5381 error = nd->nd_repstat; 5382 m_freem(nd->nd_mrep); 5383 free(nd, M_TEMP); 5384 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 5385 return (error); 5386 } 5387 5388 /* 5389 * Get the device id and file handle for a DS file. 5390 */ 5391 int 5392 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 5393 fhandle_t *fhp, char *devid) 5394 { 5395 int buflen, error; 5396 char *buf; 5397 5398 buflen = 1024; 5399 buf = malloc(buflen, M_TEMP, M_WAITOK); 5400 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 5401 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 5402 free(buf, M_TEMP); 5403 return (error); 5404 } 5405 5406 /* 5407 * Do a Lookup against the DS for the filename. 5408 */ 5409 static int 5410 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 5411 struct vnode **nvpp, NFSPROC_T *p) 5412 { 5413 struct nameidata named; 5414 struct ucred *tcred; 5415 char *bufp; 5416 u_long *hashp; 5417 struct vnode *nvp; 5418 int error; 5419 5420 tcred = newnfs_getcred(); 5421 named.ni_cnd.cn_nameiop = LOOKUP; 5422 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 5423 named.ni_cnd.cn_cred = tcred; 5424 named.ni_cnd.cn_thread = p; 5425 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 5426 nfsvno_setpathbuf(&named, &bufp, &hashp); 5427 named.ni_cnd.cn_nameptr = bufp; 5428 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 5429 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 5430 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 5431 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 5432 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 5433 NFSFREECRED(tcred); 5434 nfsvno_relpathbuf(&named); 5435 if (error == 0) 5436 *nvpp = nvp; 5437 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 5438 return (error); 5439 } 5440 5441 /* 5442 * Set the file handle to the correct one. 5443 */ 5444 static void 5445 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *nvp, 5446 NFSPROC_T *p) 5447 { 5448 struct mount *mp; 5449 struct nfsnode *np; 5450 int ret; 5451 5452 np = VTONFS(nvp); 5453 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 5454 /* 5455 * We can only do a setextattr for an exclusively 5456 * locked vp. Instead of trying to upgrade a shared 5457 * lock, just leave dsf_fh zeroed out and it will 5458 * keep doing this lookup until it is done with an 5459 * exclusively locked vp. 5460 */ 5461 if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) { 5462 ret = vn_start_write(vp, &mp, V_WAIT); 5463 NFSD_DEBUG(4, "nfsrv_pnfssetfh: vn_start_write=%d\n", 5464 ret); 5465 if (ret == 0) { 5466 ret = vn_extattr_set(vp, IO_NODELOCKED, 5467 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 5468 sizeof(*pf), (char *)pf, p); 5469 vn_finished_write(mp); 5470 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft " 5471 "vn_extattr_set=%d\n", ret); 5472 } 5473 } 5474 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 5475 } 5476 5477 /* 5478 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 5479 * when the DS has failed. 5480 */ 5481 void 5482 nfsrv_killrpcs(struct nfsmount *nmp) 5483 { 5484 5485 /* 5486 * Call newnfs_nmcancelreqs() to cause 5487 * any RPCs in progress on the mount point to 5488 * fail. 5489 * This will cause any process waiting for an 5490 * RPC to complete while holding a vnode lock 5491 * on the mounted-on vnode (such as "df" or 5492 * a non-forced "umount") to fail. 5493 * This will unlock the mounted-on vnode so 5494 * a forced dismount can succeed. 5495 * The NFSMNTP_CANCELRPCS flag should be set when this function is 5496 * called. 5497 */ 5498 newnfs_nmcancelreqs(nmp); 5499 } 5500 5501 /* 5502 * Sum up the statfs info for each of the DSs, so that the client will 5503 * receive the total for all DSs. 5504 */ 5505 static int 5506 nfsrv_pnfsstatfs(struct statfs *sf) 5507 { 5508 struct statfs *tsf; 5509 struct nfsdevice *ds; 5510 struct vnode **dvpp, **tdvpp, *dvp; 5511 uint64_t tot; 5512 int cnt, error = 0, i; 5513 5514 if (nfsrv_devidcnt <= 0) 5515 return (ENXIO); 5516 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 5517 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 5518 5519 /* Get an array of the dvps for the DSs. */ 5520 tdvpp = dvpp; 5521 i = 0; 5522 NFSDDSLOCK(); 5523 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 5524 if (ds->nfsdev_nmp != NULL) { 5525 if (++i > nfsrv_devidcnt) 5526 break; 5527 *tdvpp++ = ds->nfsdev_dvp; 5528 } 5529 } 5530 NFSDDSUNLOCK(); 5531 cnt = i; 5532 5533 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 5534 tdvpp = dvpp; 5535 for (i = 0; i < cnt && error == 0; i++) { 5536 dvp = *tdvpp++; 5537 error = VFS_STATFS(dvp->v_mount, tsf); 5538 if (error == 0) { 5539 if (sf->f_bsize == 0) { 5540 if (tsf->f_bsize > 0) 5541 sf->f_bsize = tsf->f_bsize; 5542 else 5543 sf->f_bsize = 8192; 5544 } 5545 if (tsf->f_blocks > 0) { 5546 if (sf->f_bsize != tsf->f_bsize) { 5547 tot = tsf->f_blocks * tsf->f_bsize; 5548 sf->f_blocks += (tot / sf->f_bsize); 5549 } else 5550 sf->f_blocks += tsf->f_blocks; 5551 } 5552 if (tsf->f_bfree > 0) { 5553 if (sf->f_bsize != tsf->f_bsize) { 5554 tot = tsf->f_bfree * tsf->f_bsize; 5555 sf->f_bfree += (tot / sf->f_bsize); 5556 } else 5557 sf->f_bfree += tsf->f_bfree; 5558 } 5559 if (tsf->f_bavail > 0) { 5560 if (sf->f_bsize != tsf->f_bsize) { 5561 tot = tsf->f_bavail * tsf->f_bsize; 5562 sf->f_bavail += (tot / sf->f_bsize); 5563 } else 5564 sf->f_bavail += tsf->f_bavail; 5565 } 5566 } 5567 } 5568 free(tsf, M_TEMP); 5569 free(dvpp, M_TEMP); 5570 return (error); 5571 } 5572 5573 /* 5574 * Set an NFSv4 acl. 5575 */ 5576 int 5577 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 5578 { 5579 int error; 5580 5581 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 5582 error = NFSERR_ATTRNOTSUPP; 5583 goto out; 5584 } 5585 /* 5586 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 5587 * Make sure it has enough room for that - splitting every entry 5588 * into two and appending "canonical six" entries at the end. 5589 * Cribbed out of kern/vfs_acl.c - Rick M. 5590 */ 5591 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 5592 error = NFSERR_ATTRNOTSUPP; 5593 goto out; 5594 } 5595 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 5596 if (error == 0) { 5597 error = nfsrv_dssetacl(vp, aclp, cred, p); 5598 if (error == ENOENT) 5599 error = 0; 5600 } 5601 5602 out: 5603 NFSEXITCODE(error); 5604 return (error); 5605 } 5606 5607 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 5608 5609 /* 5610 * Called once to initialize data structures... 5611 */ 5612 static int 5613 nfsd_modevent(module_t mod, int type, void *data) 5614 { 5615 int error = 0, i; 5616 static int loaded = 0; 5617 5618 switch (type) { 5619 case MOD_LOAD: 5620 if (loaded) 5621 goto out; 5622 newnfs_portinit(); 5623 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 5624 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, 5625 MTX_DEF); 5626 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, 5627 MTX_DEF); 5628 } 5629 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 5630 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 5631 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 5632 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 5633 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 5634 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 5635 nfsrvd_initcache(); 5636 nfsd_init(); 5637 NFSD_LOCK(); 5638 nfsrvd_init(0); 5639 NFSD_UNLOCK(); 5640 nfsd_mntinit(); 5641 #ifdef VV_DISABLEDELEG 5642 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 5643 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 5644 #endif 5645 nfsd_call_servertimer = nfsrv_servertimer; 5646 nfsd_call_nfsd = nfssvc_nfsd; 5647 loaded = 1; 5648 break; 5649 5650 case MOD_UNLOAD: 5651 if (newnfs_numnfsd != 0) { 5652 error = EBUSY; 5653 break; 5654 } 5655 5656 #ifdef VV_DISABLEDELEG 5657 vn_deleg_ops.vndeleg_recall = NULL; 5658 vn_deleg_ops.vndeleg_disable = NULL; 5659 #endif 5660 nfsd_call_servertimer = NULL; 5661 nfsd_call_nfsd = NULL; 5662 5663 /* Clean out all NFSv4 state. */ 5664 nfsrv_throwawayallstate(curthread); 5665 5666 /* Clean the NFS server reply cache */ 5667 nfsrvd_cleancache(); 5668 5669 /* Free up the krpc server pool. */ 5670 if (nfsrvd_pool != NULL) 5671 svcpool_destroy(nfsrvd_pool); 5672 5673 /* and get rid of the locks */ 5674 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 5675 mtx_destroy(&nfsrchash_table[i].mtx); 5676 mtx_destroy(&nfsrcahash_table[i].mtx); 5677 } 5678 mtx_destroy(&nfsrc_udpmtx); 5679 mtx_destroy(&nfs_v4root_mutex); 5680 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 5681 mtx_destroy(&nfsrv_dontlistlock_mtx); 5682 mtx_destroy(&nfsrv_recalllock_mtx); 5683 for (i = 0; i < nfsrv_sessionhashsize; i++) 5684 mtx_destroy(&nfssessionhash[i].mtx); 5685 if (nfslayouthash != NULL) { 5686 for (i = 0; i < nfsrv_layouthashsize; i++) 5687 mtx_destroy(&nfslayouthash[i].mtx); 5688 free(nfslayouthash, M_NFSDSESSION); 5689 } 5690 lockdestroy(&nfsv4root_mnt.mnt_explock); 5691 free(nfsclienthash, M_NFSDCLIENT); 5692 free(nfslockhash, M_NFSDLOCKFILE); 5693 free(nfssessionhash, M_NFSDSESSION); 5694 loaded = 0; 5695 break; 5696 default: 5697 error = EOPNOTSUPP; 5698 break; 5699 } 5700 5701 out: 5702 NFSEXITCODE(error); 5703 return (error); 5704 } 5705 static moduledata_t nfsd_mod = { 5706 "nfsd", 5707 nfsd_modevent, 5708 NULL, 5709 }; 5710 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 5711 5712 /* So that loader and kldload(2) can find us, wherever we are.. */ 5713 MODULE_VERSION(nfsd, 1); 5714 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 5715 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); 5716 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 5717 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 5718 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 5719 5720