1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/capsicum.h> 40 #include <sys/extattr.h> 41 42 /* 43 * Functions that perform the vfs operations required by the routines in 44 * nfsd_serv.c. It is hoped that this change will make the server more 45 * portable. 46 */ 47 48 #include <fs/nfs/nfsport.h> 49 #include <sys/hash.h> 50 #include <sys/sysctl.h> 51 #include <nlm/nlm_prot.h> 52 #include <nlm/nlm.h> 53 54 FEATURE(nfsd, "NFSv4 server"); 55 56 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 57 extern int nfsrv_useacl; 58 extern int newnfs_numnfsd; 59 extern struct mount nfsv4root_mnt; 60 extern struct nfsrv_stablefirst nfsrv_stablefirst; 61 extern void (*nfsd_call_servertimer)(void); 62 extern SVCPOOL *nfsrvd_pool; 63 extern struct nfsv4lock nfsd_suspend_lock; 64 extern struct nfsclienthashhead *nfsclienthash; 65 extern struct nfslockhashhead *nfslockhash; 66 extern struct nfssessionhash *nfssessionhash; 67 extern int nfsrv_sessionhashsize; 68 extern struct nfsstatsv1 nfsstatsv1; 69 extern struct nfslayouthash *nfslayouthash; 70 extern int nfsrv_layouthashsize; 71 extern struct mtx nfsrv_dslock_mtx; 72 extern int nfs_pnfsiothreads; 73 extern struct nfsdontlisthead nfsrv_dontlisthead; 74 extern volatile int nfsrv_dontlistlen; 75 extern volatile int nfsrv_devidcnt; 76 extern int nfsrv_maxpnfsmirror; 77 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 78 NFSDLOCKMUTEX; 79 NFSSTATESPINLOCK; 80 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 81 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 82 struct mtx nfsrc_udpmtx; 83 struct mtx nfs_v4root_mutex; 84 struct mtx nfsrv_dontlistlock_mtx; 85 struct mtx nfsrv_recalllock_mtx; 86 struct nfsrvfh nfs_rootfh, nfs_pubfh; 87 int nfs_pubfhset = 0, nfs_rootfhset = 0; 88 struct proc *nfsd_master_proc = NULL; 89 int nfsd_debuglevel = 0; 90 static pid_t nfsd_master_pid = (pid_t)-1; 91 static char nfsd_master_comm[MAXCOMLEN + 1]; 92 static struct timeval nfsd_master_start; 93 static uint32_t nfsv4_sysid = 0; 94 static fhandle_t zerofh; 95 96 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 97 struct ucred *); 98 99 int nfsrv_enable_crossmntpt = 1; 100 static int nfs_commit_blks; 101 static int nfs_commit_miss; 102 extern int nfsrv_issuedelegs; 103 extern int nfsrv_dolocallocks; 104 extern int nfsd_enable_stringtouid; 105 extern struct nfsdevicehead nfsrv_devidhead; 106 107 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 108 NFSPROC_T *); 109 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 110 int *, char *, fhandle_t *); 111 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 112 NFSPROC_T *); 113 static int nfsrv_proxyds(struct nfsrv_descript *, struct vnode *, off_t, int, 114 struct ucred *, struct thread *, int, struct mbuf **, char *, 115 struct mbuf **, struct nfsvattr *, struct acl *); 116 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 117 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 118 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 119 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 120 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 121 char *, int *); 122 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 123 struct vnode *, struct nfsmount **, int, struct acl *, int *); 124 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 125 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 126 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 127 struct vnode *, struct nfsmount *, struct nfsvattr *); 128 static int nfsrv_putfhname(fhandle_t *, char *); 129 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 130 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 131 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, 132 struct vnode *, NFSPROC_T *); 133 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 134 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 135 NFSPROC_T *); 136 static int nfsrv_pnfsstatfs(struct statfs *); 137 138 int nfs_pnfsio(task_fn_t *, void *); 139 140 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); 141 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 142 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 143 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 144 0, ""); 145 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 146 0, ""); 147 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 148 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 149 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 150 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 151 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 152 0, "Debug level for NFS server"); 153 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 154 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 155 static int nfsrv_pnfsgetdsattr = 1; 156 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 157 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 158 159 /* 160 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 161 * not running. 162 * The dsN subdirectories for the increased values must have been created 163 * on all DS servers before this increase is done. 164 */ 165 u_int nfsrv_dsdirsize = 20; 166 static int 167 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 168 { 169 int error, newdsdirsize; 170 171 newdsdirsize = nfsrv_dsdirsize; 172 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 173 if (error != 0 || req->newptr == NULL) 174 return (error); 175 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 176 newnfs_numnfsd != 0) 177 return (EINVAL); 178 nfsrv_dsdirsize = newdsdirsize; 179 return (0); 180 } 181 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_RW, 0, 182 sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", 183 "Number of dsN subdirs on the DS servers"); 184 185 #define MAX_REORDERED_RPC 16 186 #define NUM_HEURISTIC 1031 187 #define NHUSE_INIT 64 188 #define NHUSE_INC 16 189 #define NHUSE_MAX 2048 190 191 static struct nfsheur { 192 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 193 off_t nh_nextoff; /* next offset for sequential detection */ 194 int nh_use; /* use count for selection */ 195 int nh_seqcount; /* heuristic */ 196 } nfsheur[NUM_HEURISTIC]; 197 198 199 /* 200 * Heuristic to detect sequential operation. 201 */ 202 static struct nfsheur * 203 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 204 { 205 struct nfsheur *nh; 206 int hi, try; 207 208 /* Locate best candidate. */ 209 try = 32; 210 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 211 nh = &nfsheur[hi]; 212 while (try--) { 213 if (nfsheur[hi].nh_vp == vp) { 214 nh = &nfsheur[hi]; 215 break; 216 } 217 if (nfsheur[hi].nh_use > 0) 218 --nfsheur[hi].nh_use; 219 hi = (hi + 1) % NUM_HEURISTIC; 220 if (nfsheur[hi].nh_use < nh->nh_use) 221 nh = &nfsheur[hi]; 222 } 223 224 /* Initialize hint if this is a new file. */ 225 if (nh->nh_vp != vp) { 226 nh->nh_vp = vp; 227 nh->nh_nextoff = uio->uio_offset; 228 nh->nh_use = NHUSE_INIT; 229 if (uio->uio_offset == 0) 230 nh->nh_seqcount = 4; 231 else 232 nh->nh_seqcount = 1; 233 } 234 235 /* Calculate heuristic. */ 236 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 237 uio->uio_offset == nh->nh_nextoff) { 238 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 239 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 240 if (nh->nh_seqcount > IO_SEQMAX) 241 nh->nh_seqcount = IO_SEQMAX; 242 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 243 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 244 /* Probably a reordered RPC, leave seqcount alone. */ 245 } else if (nh->nh_seqcount > 1) { 246 nh->nh_seqcount /= 2; 247 } else { 248 nh->nh_seqcount = 0; 249 } 250 nh->nh_use += NHUSE_INC; 251 if (nh->nh_use > NHUSE_MAX) 252 nh->nh_use = NHUSE_MAX; 253 return (nh); 254 } 255 256 /* 257 * Get attributes into nfsvattr structure. 258 */ 259 int 260 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 261 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 262 nfsattrbit_t *attrbitp) 263 { 264 int error, gotattr, lockedit = 0; 265 struct nfsvattr na; 266 267 if (vpislocked == 0) { 268 /* 269 * When vpislocked == 0, the vnode is either exclusively 270 * locked by this thread or not locked by this thread. 271 * As such, shared lock it, if not exclusively locked. 272 */ 273 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 274 lockedit = 1; 275 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 276 } 277 } 278 279 /* 280 * Acquire the Change, Size and TimeModify attributes, as required. 281 * This needs to be done for regular files if: 282 * - non-NFSv4 RPCs or 283 * - when attrbitp == NULL or 284 * - an NFSv4 RPC with any of the above attributes in attrbitp. 285 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 286 * these attributes. nfsrv_proxyds() will return an error if the 287 * server is not a pNFS one. 288 */ 289 gotattr = 0; 290 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 291 (nd->nd_flag & ND_NFSV4) == 0 || 292 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 293 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 294 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 295 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY))) { 296 error = nfsrv_proxyds(nd, vp, 0, 0, nd->nd_cred, p, 297 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); 298 if (error == 0) 299 gotattr = 1; 300 } 301 302 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 303 if (lockedit != 0) 304 NFSVOPUNLOCK(vp, 0); 305 306 /* 307 * If we got the Change, Size and Modify Time from the DS, 308 * replace them. 309 */ 310 if (gotattr != 0) { 311 nvap->na_atime = na.na_atime; 312 nvap->na_mtime = na.na_mtime; 313 nvap->na_filerev = na.na_filerev; 314 nvap->na_size = na.na_size; 315 } 316 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 317 error, (uintmax_t)na.na_filerev); 318 319 NFSEXITCODE(error); 320 return (error); 321 } 322 323 /* 324 * Get a file handle for a vnode. 325 */ 326 int 327 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 328 { 329 int error; 330 331 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 332 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 333 error = VOP_VPTOFH(vp, &fhp->fh_fid); 334 335 NFSEXITCODE(error); 336 return (error); 337 } 338 339 /* 340 * Perform access checking for vnodes obtained from file handles that would 341 * refer to files already opened by a Unix client. You cannot just use 342 * vn_writechk() and VOP_ACCESSX() for two reasons. 343 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 344 * case. 345 * 2 - The owner is to be given access irrespective of mode bits for some 346 * operations, so that processes that chmod after opening a file don't 347 * break. 348 */ 349 int 350 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 351 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 352 u_int32_t *supportedtypep) 353 { 354 struct vattr vattr; 355 int error = 0, getret = 0; 356 357 if (vpislocked == 0) { 358 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 359 error = EPERM; 360 goto out; 361 } 362 } 363 if (accmode & VWRITE) { 364 /* Just vn_writechk() changed to check rdonly */ 365 /* 366 * Disallow write attempts on read-only file systems; 367 * unless the file is a socket or a block or character 368 * device resident on the file system. 369 */ 370 if (NFSVNO_EXRDONLY(exp) || 371 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 372 switch (vp->v_type) { 373 case VREG: 374 case VDIR: 375 case VLNK: 376 error = EROFS; 377 default: 378 break; 379 } 380 } 381 /* 382 * If there's shared text associated with 383 * the inode, try to free it up once. If 384 * we fail, we can't allow writing. 385 */ 386 if (VOP_IS_TEXT(vp) && error == 0) 387 error = ETXTBSY; 388 } 389 if (error != 0) { 390 if (vpislocked == 0) 391 NFSVOPUNLOCK(vp, 0); 392 goto out; 393 } 394 395 /* 396 * Should the override still be applied when ACLs are enabled? 397 */ 398 error = VOP_ACCESSX(vp, accmode, cred, p); 399 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 400 /* 401 * Try again with VEXPLICIT_DENY, to see if the test for 402 * deletion is supported. 403 */ 404 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 405 if (error == 0) { 406 if (vp->v_type == VDIR) { 407 accmode &= ~(VDELETE | VDELETE_CHILD); 408 accmode |= VWRITE; 409 error = VOP_ACCESSX(vp, accmode, cred, p); 410 } else if (supportedtypep != NULL) { 411 *supportedtypep &= ~NFSACCESS_DELETE; 412 } 413 } 414 } 415 416 /* 417 * Allow certain operations for the owner (reads and writes 418 * on files that are already open). 419 */ 420 if (override != NFSACCCHK_NOOVERRIDE && 421 (error == EPERM || error == EACCES)) { 422 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 423 error = 0; 424 else if (override & NFSACCCHK_ALLOWOWNER) { 425 getret = VOP_GETATTR(vp, &vattr, cred); 426 if (getret == 0 && cred->cr_uid == vattr.va_uid) 427 error = 0; 428 } 429 } 430 if (vpislocked == 0) 431 NFSVOPUNLOCK(vp, 0); 432 433 out: 434 NFSEXITCODE(error); 435 return (error); 436 } 437 438 /* 439 * Set attribute(s) vnop. 440 */ 441 int 442 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 443 struct thread *p, struct nfsexstuff *exp) 444 { 445 int error; 446 447 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 448 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 449 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 450 nvap->na_vattr.va_size != VNOVAL || 451 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 452 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 453 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 454 /* For a pNFS server, set the attributes on the DS file. */ 455 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETATTR, 456 NULL, NULL, NULL, nvap, NULL); 457 if (error == ENOENT) 458 error = 0; 459 } 460 NFSEXITCODE(error); 461 return (error); 462 } 463 464 /* 465 * Set up nameidata for a lookup() call and do it. 466 */ 467 int 468 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 469 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 470 struct vnode **retdirp) 471 { 472 struct componentname *cnp = &ndp->ni_cnd; 473 int i; 474 struct iovec aiov; 475 struct uio auio; 476 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 477 int error = 0; 478 char *cp; 479 480 *retdirp = NULL; 481 cnp->cn_nameptr = cnp->cn_pnbuf; 482 ndp->ni_lcf = 0; 483 /* 484 * Extract and set starting directory. 485 */ 486 if (dp->v_type != VDIR) { 487 if (islocked) 488 vput(dp); 489 else 490 vrele(dp); 491 nfsvno_relpathbuf(ndp); 492 error = ENOTDIR; 493 goto out1; 494 } 495 if (islocked) 496 NFSVOPUNLOCK(dp, 0); 497 VREF(dp); 498 *retdirp = dp; 499 if (NFSVNO_EXRDONLY(exp)) 500 cnp->cn_flags |= RDONLY; 501 ndp->ni_segflg = UIO_SYSSPACE; 502 503 if (nd->nd_flag & ND_PUBLOOKUP) { 504 ndp->ni_loopcnt = 0; 505 if (cnp->cn_pnbuf[0] == '/') { 506 vrele(dp); 507 /* 508 * Check for degenerate pathnames here, since lookup() 509 * panics on them. 510 */ 511 for (i = 1; i < ndp->ni_pathlen; i++) 512 if (cnp->cn_pnbuf[i] != '/') 513 break; 514 if (i == ndp->ni_pathlen) { 515 error = NFSERR_ACCES; 516 goto out; 517 } 518 dp = rootvnode; 519 VREF(dp); 520 } 521 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 522 (nd->nd_flag & ND_NFSV4) == 0) { 523 /* 524 * Only cross mount points for NFSv4 when doing a 525 * mount while traversing the file system above 526 * the mount point, unless nfsrv_enable_crossmntpt is set. 527 */ 528 cnp->cn_flags |= NOCROSSMOUNT; 529 } 530 531 /* 532 * Initialize for scan, set ni_startdir and bump ref on dp again 533 * because lookup() will dereference ni_startdir. 534 */ 535 536 cnp->cn_thread = p; 537 ndp->ni_startdir = dp; 538 ndp->ni_rootdir = rootvnode; 539 ndp->ni_topdir = NULL; 540 541 if (!lockleaf) 542 cnp->cn_flags |= LOCKLEAF; 543 for (;;) { 544 cnp->cn_nameptr = cnp->cn_pnbuf; 545 /* 546 * Call lookup() to do the real work. If an error occurs, 547 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 548 * we do not have to dereference anything before returning. 549 * In either case ni_startdir will be dereferenced and NULLed 550 * out. 551 */ 552 error = lookup(ndp); 553 if (error) 554 break; 555 556 /* 557 * Check for encountering a symbolic link. Trivial 558 * termination occurs if no symlink encountered. 559 */ 560 if ((cnp->cn_flags & ISSYMLINK) == 0) { 561 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 562 nfsvno_relpathbuf(ndp); 563 if (ndp->ni_vp && !lockleaf) 564 NFSVOPUNLOCK(ndp->ni_vp, 0); 565 break; 566 } 567 568 /* 569 * Validate symlink 570 */ 571 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 572 NFSVOPUNLOCK(ndp->ni_dvp, 0); 573 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 574 error = EINVAL; 575 goto badlink2; 576 } 577 578 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 579 error = ELOOP; 580 goto badlink2; 581 } 582 if (ndp->ni_pathlen > 1) 583 cp = uma_zalloc(namei_zone, M_WAITOK); 584 else 585 cp = cnp->cn_pnbuf; 586 aiov.iov_base = cp; 587 aiov.iov_len = MAXPATHLEN; 588 auio.uio_iov = &aiov; 589 auio.uio_iovcnt = 1; 590 auio.uio_offset = 0; 591 auio.uio_rw = UIO_READ; 592 auio.uio_segflg = UIO_SYSSPACE; 593 auio.uio_td = NULL; 594 auio.uio_resid = MAXPATHLEN; 595 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 596 if (error) { 597 badlink1: 598 if (ndp->ni_pathlen > 1) 599 uma_zfree(namei_zone, cp); 600 badlink2: 601 vrele(ndp->ni_dvp); 602 vput(ndp->ni_vp); 603 break; 604 } 605 linklen = MAXPATHLEN - auio.uio_resid; 606 if (linklen == 0) { 607 error = ENOENT; 608 goto badlink1; 609 } 610 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 611 error = ENAMETOOLONG; 612 goto badlink1; 613 } 614 615 /* 616 * Adjust or replace path 617 */ 618 if (ndp->ni_pathlen > 1) { 619 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 620 uma_zfree(namei_zone, cnp->cn_pnbuf); 621 cnp->cn_pnbuf = cp; 622 } else 623 cnp->cn_pnbuf[linklen] = '\0'; 624 ndp->ni_pathlen += linklen; 625 626 /* 627 * Cleanup refs for next loop and check if root directory 628 * should replace current directory. Normally ni_dvp 629 * becomes the new base directory and is cleaned up when 630 * we loop. Explicitly null pointers after invalidation 631 * to clarify operation. 632 */ 633 vput(ndp->ni_vp); 634 ndp->ni_vp = NULL; 635 636 if (cnp->cn_pnbuf[0] == '/') { 637 vrele(ndp->ni_dvp); 638 ndp->ni_dvp = ndp->ni_rootdir; 639 VREF(ndp->ni_dvp); 640 } 641 ndp->ni_startdir = ndp->ni_dvp; 642 ndp->ni_dvp = NULL; 643 } 644 if (!lockleaf) 645 cnp->cn_flags &= ~LOCKLEAF; 646 647 out: 648 if (error) { 649 nfsvno_relpathbuf(ndp); 650 ndp->ni_vp = NULL; 651 ndp->ni_dvp = NULL; 652 ndp->ni_startdir = NULL; 653 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 654 ndp->ni_dvp = NULL; 655 } 656 657 out1: 658 NFSEXITCODE2(error, nd); 659 return (error); 660 } 661 662 /* 663 * Set up a pathname buffer and return a pointer to it and, optionally 664 * set a hash pointer. 665 */ 666 void 667 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 668 { 669 struct componentname *cnp = &ndp->ni_cnd; 670 671 cnp->cn_flags |= (NOMACCHECK | HASBUF); 672 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 673 if (hashpp != NULL) 674 *hashpp = NULL; 675 *bufpp = cnp->cn_pnbuf; 676 } 677 678 /* 679 * Release the above path buffer, if not released by nfsvno_namei(). 680 */ 681 void 682 nfsvno_relpathbuf(struct nameidata *ndp) 683 { 684 685 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 686 panic("nfsrelpath"); 687 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 688 ndp->ni_cnd.cn_flags &= ~HASBUF; 689 } 690 691 /* 692 * Readlink vnode op into an mbuf list. 693 */ 694 int 695 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, 696 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 697 { 698 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; 699 struct iovec *ivp = iv; 700 struct uio io, *uiop = &io; 701 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; 702 int i, len, tlen, error = 0; 703 704 len = 0; 705 i = 0; 706 while (len < NFS_MAXPATHLEN) { 707 NFSMGET(mp); 708 MCLGET(mp, M_WAITOK); 709 mp->m_len = M_SIZE(mp); 710 if (len == 0) { 711 mp3 = mp2 = mp; 712 } else { 713 mp2->m_next = mp; 714 mp2 = mp; 715 } 716 if ((len + mp->m_len) > NFS_MAXPATHLEN) { 717 mp->m_len = NFS_MAXPATHLEN - len; 718 len = NFS_MAXPATHLEN; 719 } else { 720 len += mp->m_len; 721 } 722 ivp->iov_base = mtod(mp, caddr_t); 723 ivp->iov_len = mp->m_len; 724 i++; 725 ivp++; 726 } 727 uiop->uio_iov = iv; 728 uiop->uio_iovcnt = i; 729 uiop->uio_offset = 0; 730 uiop->uio_resid = len; 731 uiop->uio_rw = UIO_READ; 732 uiop->uio_segflg = UIO_SYSSPACE; 733 uiop->uio_td = NULL; 734 error = VOP_READLINK(vp, uiop, cred); 735 if (error) { 736 m_freem(mp3); 737 *lenp = 0; 738 goto out; 739 } 740 if (uiop->uio_resid > 0) { 741 len -= uiop->uio_resid; 742 tlen = NFSM_RNDUP(len); 743 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); 744 } 745 *lenp = len; 746 *mpp = mp3; 747 *mpendp = mp; 748 749 out: 750 NFSEXITCODE(error); 751 return (error); 752 } 753 754 /* 755 * Read vnode op call into mbuf list. 756 */ 757 int 758 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 759 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) 760 { 761 struct mbuf *m; 762 int i; 763 struct iovec *iv; 764 struct iovec *iv2; 765 int error = 0, len, left, siz, tlen, ioflag = 0; 766 struct mbuf *m2 = NULL, *m3; 767 struct uio io, *uiop = &io; 768 struct nfsheur *nh; 769 770 /* 771 * Attempt to read from a DS file. A return of ENOENT implies 772 * there is no DS file to read. 773 */ 774 error = nfsrv_proxyds(NULL, vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 775 NULL, mpendp, NULL, NULL); 776 if (error != ENOENT) 777 return (error); 778 779 len = left = NFSM_RNDUP(cnt); 780 m3 = NULL; 781 /* 782 * Generate the mbuf list with the uio_iov ref. to it. 783 */ 784 i = 0; 785 while (left > 0) { 786 NFSMGET(m); 787 MCLGET(m, M_WAITOK); 788 m->m_len = 0; 789 siz = min(M_TRAILINGSPACE(m), left); 790 left -= siz; 791 i++; 792 if (m3) 793 m2->m_next = m; 794 else 795 m3 = m; 796 m2 = m; 797 } 798 iv = malloc(i * sizeof (struct iovec), 799 M_TEMP, M_WAITOK); 800 uiop->uio_iov = iv2 = iv; 801 m = m3; 802 left = len; 803 i = 0; 804 while (left > 0) { 805 if (m == NULL) 806 panic("nfsvno_read iov"); 807 siz = min(M_TRAILINGSPACE(m), left); 808 if (siz > 0) { 809 iv->iov_base = mtod(m, caddr_t) + m->m_len; 810 iv->iov_len = siz; 811 m->m_len += siz; 812 left -= siz; 813 iv++; 814 i++; 815 } 816 m = m->m_next; 817 } 818 uiop->uio_iovcnt = i; 819 uiop->uio_offset = off; 820 uiop->uio_resid = len; 821 uiop->uio_rw = UIO_READ; 822 uiop->uio_segflg = UIO_SYSSPACE; 823 uiop->uio_td = NULL; 824 nh = nfsrv_sequential_heuristic(uiop, vp); 825 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 826 /* XXX KDM make this more systematic? */ 827 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 828 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 829 free(iv2, M_TEMP); 830 if (error) { 831 m_freem(m3); 832 *mpp = NULL; 833 goto out; 834 } 835 nh->nh_nextoff = uiop->uio_offset; 836 tlen = len - uiop->uio_resid; 837 cnt = cnt < tlen ? cnt : tlen; 838 tlen = NFSM_RNDUP(cnt); 839 if (tlen == 0) { 840 m_freem(m3); 841 m3 = NULL; 842 } else if (len != tlen || tlen != cnt) 843 nfsrv_adj(m3, len - tlen, tlen - cnt); 844 *mpp = m3; 845 *mpendp = m2; 846 847 out: 848 NFSEXITCODE(error); 849 return (error); 850 } 851 852 /* 853 * Write vnode op from an mbuf list. 854 */ 855 int 856 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, 857 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 858 { 859 struct iovec *ivp; 860 int i, len; 861 struct iovec *iv; 862 int ioflags, error; 863 struct uio io, *uiop = &io; 864 struct nfsheur *nh; 865 866 /* 867 * Attempt to write to a DS file. A return of ENOENT implies 868 * there is no DS file to write. 869 */ 870 error = nfsrv_proxyds(NULL, vp, off, retlen, cred, p, NFSPROC_WRITEDS, 871 &mp, cp, NULL, NULL, NULL); 872 if (error != ENOENT) { 873 *stable = NFSWRITE_FILESYNC; 874 return (error); 875 } 876 877 ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 878 M_WAITOK); 879 uiop->uio_iov = iv = ivp; 880 uiop->uio_iovcnt = cnt; 881 i = mtod(mp, caddr_t) + mp->m_len - cp; 882 len = retlen; 883 while (len > 0) { 884 if (mp == NULL) 885 panic("nfsvno_write"); 886 if (i > 0) { 887 i = min(i, len); 888 ivp->iov_base = cp; 889 ivp->iov_len = i; 890 ivp++; 891 len -= i; 892 } 893 mp = mp->m_next; 894 if (mp) { 895 i = mp->m_len; 896 cp = mtod(mp, caddr_t); 897 } 898 } 899 900 if (*stable == NFSWRITE_UNSTABLE) 901 ioflags = IO_NODELOCKED; 902 else 903 ioflags = (IO_SYNC | IO_NODELOCKED); 904 uiop->uio_resid = retlen; 905 uiop->uio_rw = UIO_WRITE; 906 uiop->uio_segflg = UIO_SYSSPACE; 907 NFSUIOPROC(uiop, p); 908 uiop->uio_offset = off; 909 nh = nfsrv_sequential_heuristic(uiop, vp); 910 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 911 /* XXX KDM make this more systematic? */ 912 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 913 error = VOP_WRITE(vp, uiop, ioflags, cred); 914 if (error == 0) 915 nh->nh_nextoff = uiop->uio_offset; 916 free(iv, M_TEMP); 917 918 NFSEXITCODE(error); 919 return (error); 920 } 921 922 /* 923 * Common code for creating a regular file (plus special files for V2). 924 */ 925 int 926 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 927 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 928 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) 929 { 930 u_quad_t tempsize; 931 int error; 932 933 error = nd->nd_repstat; 934 if (!error && ndp->ni_vp == NULL) { 935 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 936 vrele(ndp->ni_startdir); 937 error = VOP_CREATE(ndp->ni_dvp, 938 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 939 /* For a pNFS server, create the data file on a DS. */ 940 if (error == 0 && nvap->na_type == VREG) { 941 /* 942 * Create a data file on a DS for a pNFS server. 943 * This function just returns if not 944 * running a pNFS DS or the creation fails. 945 */ 946 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 947 nd->nd_cred, p); 948 } 949 vput(ndp->ni_dvp); 950 nfsvno_relpathbuf(ndp); 951 if (!error) { 952 if (*exclusive_flagp) { 953 *exclusive_flagp = 0; 954 NFSVNO_ATTRINIT(nvap); 955 nvap->na_atime.tv_sec = cverf[0]; 956 nvap->na_atime.tv_nsec = cverf[1]; 957 error = VOP_SETATTR(ndp->ni_vp, 958 &nvap->na_vattr, nd->nd_cred); 959 if (error != 0) { 960 vput(ndp->ni_vp); 961 ndp->ni_vp = NULL; 962 error = NFSERR_NOTSUPP; 963 } 964 } 965 } 966 /* 967 * NFS V2 Only. nfsrvd_mknod() does this for V3. 968 * (This implies, just get out on an error.) 969 */ 970 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 971 nvap->na_type == VFIFO) { 972 if (nvap->na_type == VCHR && rdev == 0xffffffff) 973 nvap->na_type = VFIFO; 974 if (nvap->na_type != VFIFO && 975 (error = priv_check_cred(nd->nd_cred, 976 PRIV_VFS_MKNOD_DEV, 0))) { 977 vrele(ndp->ni_startdir); 978 nfsvno_relpathbuf(ndp); 979 vput(ndp->ni_dvp); 980 goto out; 981 } 982 nvap->na_rdev = rdev; 983 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 984 &ndp->ni_cnd, &nvap->na_vattr); 985 vput(ndp->ni_dvp); 986 nfsvno_relpathbuf(ndp); 987 vrele(ndp->ni_startdir); 988 if (error) 989 goto out; 990 } else { 991 vrele(ndp->ni_startdir); 992 nfsvno_relpathbuf(ndp); 993 vput(ndp->ni_dvp); 994 error = ENXIO; 995 goto out; 996 } 997 *vpp = ndp->ni_vp; 998 } else { 999 /* 1000 * Handle cases where error is already set and/or 1001 * the file exists. 1002 * 1 - clean up the lookup 1003 * 2 - iff !error and na_size set, truncate it 1004 */ 1005 vrele(ndp->ni_startdir); 1006 nfsvno_relpathbuf(ndp); 1007 *vpp = ndp->ni_vp; 1008 if (ndp->ni_dvp == *vpp) 1009 vrele(ndp->ni_dvp); 1010 else 1011 vput(ndp->ni_dvp); 1012 if (!error && nvap->na_size != VNOVAL) { 1013 error = nfsvno_accchk(*vpp, VWRITE, 1014 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1015 NFSACCCHK_VPISLOCKED, NULL); 1016 if (!error) { 1017 tempsize = nvap->na_size; 1018 NFSVNO_ATTRINIT(nvap); 1019 nvap->na_size = tempsize; 1020 error = VOP_SETATTR(*vpp, 1021 &nvap->na_vattr, nd->nd_cred); 1022 } 1023 } 1024 if (error) 1025 vput(*vpp); 1026 } 1027 1028 out: 1029 NFSEXITCODE(error); 1030 return (error); 1031 } 1032 1033 /* 1034 * Do a mknod vnode op. 1035 */ 1036 int 1037 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1038 struct thread *p) 1039 { 1040 int error = 0; 1041 enum vtype vtyp; 1042 1043 vtyp = nvap->na_type; 1044 /* 1045 * Iff doesn't exist, create it. 1046 */ 1047 if (ndp->ni_vp) { 1048 vrele(ndp->ni_startdir); 1049 nfsvno_relpathbuf(ndp); 1050 vput(ndp->ni_dvp); 1051 vrele(ndp->ni_vp); 1052 error = EEXIST; 1053 goto out; 1054 } 1055 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1056 vrele(ndp->ni_startdir); 1057 nfsvno_relpathbuf(ndp); 1058 vput(ndp->ni_dvp); 1059 error = NFSERR_BADTYPE; 1060 goto out; 1061 } 1062 if (vtyp == VSOCK) { 1063 vrele(ndp->ni_startdir); 1064 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1065 &ndp->ni_cnd, &nvap->na_vattr); 1066 vput(ndp->ni_dvp); 1067 nfsvno_relpathbuf(ndp); 1068 } else { 1069 if (nvap->na_type != VFIFO && 1070 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { 1071 vrele(ndp->ni_startdir); 1072 nfsvno_relpathbuf(ndp); 1073 vput(ndp->ni_dvp); 1074 goto out; 1075 } 1076 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1077 &ndp->ni_cnd, &nvap->na_vattr); 1078 vput(ndp->ni_dvp); 1079 nfsvno_relpathbuf(ndp); 1080 vrele(ndp->ni_startdir); 1081 /* 1082 * Since VOP_MKNOD returns the ni_vp, I can't 1083 * see any reason to do the lookup. 1084 */ 1085 } 1086 1087 out: 1088 NFSEXITCODE(error); 1089 return (error); 1090 } 1091 1092 /* 1093 * Mkdir vnode op. 1094 */ 1095 int 1096 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1097 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1098 { 1099 int error = 0; 1100 1101 if (ndp->ni_vp != NULL) { 1102 if (ndp->ni_dvp == ndp->ni_vp) 1103 vrele(ndp->ni_dvp); 1104 else 1105 vput(ndp->ni_dvp); 1106 vrele(ndp->ni_vp); 1107 nfsvno_relpathbuf(ndp); 1108 error = EEXIST; 1109 goto out; 1110 } 1111 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1112 &nvap->na_vattr); 1113 vput(ndp->ni_dvp); 1114 nfsvno_relpathbuf(ndp); 1115 1116 out: 1117 NFSEXITCODE(error); 1118 return (error); 1119 } 1120 1121 /* 1122 * symlink vnode op. 1123 */ 1124 int 1125 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1126 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1127 struct nfsexstuff *exp) 1128 { 1129 int error = 0; 1130 1131 if (ndp->ni_vp) { 1132 vrele(ndp->ni_startdir); 1133 nfsvno_relpathbuf(ndp); 1134 if (ndp->ni_dvp == ndp->ni_vp) 1135 vrele(ndp->ni_dvp); 1136 else 1137 vput(ndp->ni_dvp); 1138 vrele(ndp->ni_vp); 1139 error = EEXIST; 1140 goto out; 1141 } 1142 1143 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1144 &nvap->na_vattr, pathcp); 1145 vput(ndp->ni_dvp); 1146 vrele(ndp->ni_startdir); 1147 nfsvno_relpathbuf(ndp); 1148 /* 1149 * Although FreeBSD still had the lookup code in 1150 * it for 7/current, there doesn't seem to be any 1151 * point, since VOP_SYMLINK() returns the ni_vp. 1152 * Just vput it for v2. 1153 */ 1154 if (!not_v2 && !error) 1155 vput(ndp->ni_vp); 1156 1157 out: 1158 NFSEXITCODE(error); 1159 return (error); 1160 } 1161 1162 /* 1163 * Parse symbolic link arguments. 1164 * This function has an ugly side effect. It will malloc() an area for 1165 * the symlink and set iov_base to point to it, only if it succeeds. 1166 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1167 * be FREE'd later. 1168 */ 1169 int 1170 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1171 struct thread *p, char **pathcpp, int *lenp) 1172 { 1173 u_int32_t *tl; 1174 char *pathcp = NULL; 1175 int error = 0, len; 1176 struct nfsv2_sattr *sp; 1177 1178 *pathcpp = NULL; 1179 *lenp = 0; 1180 if ((nd->nd_flag & ND_NFSV3) && 1181 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1182 goto nfsmout; 1183 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1184 len = fxdr_unsigned(int, *tl); 1185 if (len > NFS_MAXPATHLEN || len <= 0) { 1186 error = EBADRPC; 1187 goto nfsmout; 1188 } 1189 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1190 error = nfsrv_mtostr(nd, pathcp, len); 1191 if (error) 1192 goto nfsmout; 1193 if (nd->nd_flag & ND_NFSV2) { 1194 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1195 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1196 } 1197 *pathcpp = pathcp; 1198 *lenp = len; 1199 NFSEXITCODE2(0, nd); 1200 return (0); 1201 nfsmout: 1202 if (pathcp) 1203 free(pathcp, M_TEMP); 1204 NFSEXITCODE2(error, nd); 1205 return (error); 1206 } 1207 1208 /* 1209 * Remove a non-directory object. 1210 */ 1211 int 1212 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1213 struct thread *p, struct nfsexstuff *exp) 1214 { 1215 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1216 int error = 0, mirrorcnt; 1217 char fname[PNFS_FILENAME_LEN + 1]; 1218 fhandle_t fh; 1219 1220 vp = ndp->ni_vp; 1221 dsdvp[0] = NULL; 1222 if (vp->v_type == VDIR) 1223 error = NFSERR_ISDIR; 1224 else if (is_v4) 1225 error = nfsrv_checkremove(vp, 1, p); 1226 if (error == 0) 1227 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1228 if (!error) 1229 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1230 if (error == 0 && dsdvp[0] != NULL) 1231 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1232 if (ndp->ni_dvp == vp) 1233 vrele(ndp->ni_dvp); 1234 else 1235 vput(ndp->ni_dvp); 1236 vput(vp); 1237 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1238 nfsvno_relpathbuf(ndp); 1239 NFSEXITCODE(error); 1240 return (error); 1241 } 1242 1243 /* 1244 * Remove a directory. 1245 */ 1246 int 1247 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1248 struct thread *p, struct nfsexstuff *exp) 1249 { 1250 struct vnode *vp; 1251 int error = 0; 1252 1253 vp = ndp->ni_vp; 1254 if (vp->v_type != VDIR) { 1255 error = ENOTDIR; 1256 goto out; 1257 } 1258 /* 1259 * No rmdir "." please. 1260 */ 1261 if (ndp->ni_dvp == vp) { 1262 error = EINVAL; 1263 goto out; 1264 } 1265 /* 1266 * The root of a mounted filesystem cannot be deleted. 1267 */ 1268 if (vp->v_vflag & VV_ROOT) 1269 error = EBUSY; 1270 out: 1271 if (!error) 1272 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1273 if (ndp->ni_dvp == vp) 1274 vrele(ndp->ni_dvp); 1275 else 1276 vput(ndp->ni_dvp); 1277 vput(vp); 1278 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1279 nfsvno_relpathbuf(ndp); 1280 NFSEXITCODE(error); 1281 return (error); 1282 } 1283 1284 /* 1285 * Rename vnode op. 1286 */ 1287 int 1288 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1289 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1290 { 1291 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1292 int error = 0, mirrorcnt; 1293 char fname[PNFS_FILENAME_LEN + 1]; 1294 fhandle_t fh; 1295 1296 dsdvp[0] = NULL; 1297 fvp = fromndp->ni_vp; 1298 if (ndstat) { 1299 vrele(fromndp->ni_dvp); 1300 vrele(fvp); 1301 error = ndstat; 1302 goto out1; 1303 } 1304 tdvp = tondp->ni_dvp; 1305 tvp = tondp->ni_vp; 1306 if (tvp != NULL) { 1307 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1308 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1309 goto out; 1310 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1311 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1312 goto out; 1313 } 1314 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1315 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1316 goto out; 1317 } 1318 1319 /* 1320 * A rename to '.' or '..' results in a prematurely 1321 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1322 * here. 1323 */ 1324 if ((tondp->ni_cnd.cn_namelen == 1 && 1325 tondp->ni_cnd.cn_nameptr[0] == '.') || 1326 (tondp->ni_cnd.cn_namelen == 2 && 1327 tondp->ni_cnd.cn_nameptr[0] == '.' && 1328 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1329 error = EINVAL; 1330 goto out; 1331 } 1332 } 1333 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1334 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1335 goto out; 1336 } 1337 if (fvp->v_mount != tdvp->v_mount) { 1338 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1339 goto out; 1340 } 1341 if (fvp == tdvp) { 1342 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1343 goto out; 1344 } 1345 if (fvp == tvp) { 1346 /* 1347 * If source and destination are the same, there is nothing to 1348 * do. Set error to -1 to indicate this. 1349 */ 1350 error = -1; 1351 goto out; 1352 } 1353 if (ndflag & ND_NFSV4) { 1354 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1355 error = nfsrv_checkremove(fvp, 0, p); 1356 NFSVOPUNLOCK(fvp, 0); 1357 } else 1358 error = EPERM; 1359 if (tvp && !error) 1360 error = nfsrv_checkremove(tvp, 1, p); 1361 } else { 1362 /* 1363 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1364 * that the NFSv4 client won't be confused by the rename. 1365 * Since nfsd_recalldelegation() can only be called on an 1366 * unlocked vnode at this point and fvp is the file that will 1367 * still exist after the rename, just do fvp. 1368 */ 1369 nfsd_recalldelegation(fvp, p); 1370 } 1371 if (error == 0 && tvp != NULL) { 1372 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1373 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1374 " dsdvp=%p\n", dsdvp[0]); 1375 } 1376 out: 1377 if (!error) { 1378 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1379 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1380 &tondp->ni_cnd); 1381 } else { 1382 if (tdvp == tvp) 1383 vrele(tdvp); 1384 else 1385 vput(tdvp); 1386 if (tvp) 1387 vput(tvp); 1388 vrele(fromndp->ni_dvp); 1389 vrele(fvp); 1390 if (error == -1) 1391 error = 0; 1392 } 1393 1394 /* 1395 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1396 * if the rename succeeded, the DS file for the tvp needs to be 1397 * removed. 1398 */ 1399 if (error == 0 && dsdvp[0] != NULL) { 1400 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1401 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1402 } 1403 1404 vrele(tondp->ni_startdir); 1405 nfsvno_relpathbuf(tondp); 1406 out1: 1407 vrele(fromndp->ni_startdir); 1408 nfsvno_relpathbuf(fromndp); 1409 NFSEXITCODE(error); 1410 return (error); 1411 } 1412 1413 /* 1414 * Link vnode op. 1415 */ 1416 int 1417 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1418 struct thread *p, struct nfsexstuff *exp) 1419 { 1420 struct vnode *xp; 1421 int error = 0; 1422 1423 xp = ndp->ni_vp; 1424 if (xp != NULL) { 1425 error = EEXIST; 1426 } else { 1427 xp = ndp->ni_dvp; 1428 if (vp->v_mount != xp->v_mount) 1429 error = EXDEV; 1430 } 1431 if (!error) { 1432 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1433 if ((vp->v_iflag & VI_DOOMED) == 0) 1434 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1435 else 1436 error = EPERM; 1437 if (ndp->ni_dvp == vp) 1438 vrele(ndp->ni_dvp); 1439 else 1440 vput(ndp->ni_dvp); 1441 NFSVOPUNLOCK(vp, 0); 1442 } else { 1443 if (ndp->ni_dvp == ndp->ni_vp) 1444 vrele(ndp->ni_dvp); 1445 else 1446 vput(ndp->ni_dvp); 1447 if (ndp->ni_vp) 1448 vrele(ndp->ni_vp); 1449 } 1450 nfsvno_relpathbuf(ndp); 1451 NFSEXITCODE(error); 1452 return (error); 1453 } 1454 1455 /* 1456 * Do the fsync() appropriate for the commit. 1457 */ 1458 int 1459 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1460 struct thread *td) 1461 { 1462 int error = 0; 1463 1464 /* 1465 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1466 * file is done. At this time VOP_FSYNC does not accept offset and 1467 * byte count parameters so call VOP_FSYNC the whole file for now. 1468 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1469 * File systems that do not use the buffer cache (as indicated 1470 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1471 */ 1472 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1473 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1474 /* 1475 * Give up and do the whole thing 1476 */ 1477 if (vp->v_object && 1478 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1479 VM_OBJECT_WLOCK(vp->v_object); 1480 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1481 VM_OBJECT_WUNLOCK(vp->v_object); 1482 } 1483 error = VOP_FSYNC(vp, MNT_WAIT, td); 1484 } else { 1485 /* 1486 * Locate and synchronously write any buffers that fall 1487 * into the requested range. Note: we are assuming that 1488 * f_iosize is a power of 2. 1489 */ 1490 int iosize = vp->v_mount->mnt_stat.f_iosize; 1491 int iomask = iosize - 1; 1492 struct bufobj *bo; 1493 daddr_t lblkno; 1494 1495 /* 1496 * Align to iosize boundary, super-align to page boundary. 1497 */ 1498 if (off & iomask) { 1499 cnt += off & iomask; 1500 off &= ~(u_quad_t)iomask; 1501 } 1502 if (off & PAGE_MASK) { 1503 cnt += off & PAGE_MASK; 1504 off &= ~(u_quad_t)PAGE_MASK; 1505 } 1506 lblkno = off / iosize; 1507 1508 if (vp->v_object && 1509 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 1510 VM_OBJECT_WLOCK(vp->v_object); 1511 vm_object_page_clean(vp->v_object, off, off + cnt, 1512 OBJPC_SYNC); 1513 VM_OBJECT_WUNLOCK(vp->v_object); 1514 } 1515 1516 bo = &vp->v_bufobj; 1517 BO_LOCK(bo); 1518 while (cnt > 0) { 1519 struct buf *bp; 1520 1521 /* 1522 * If we have a buffer and it is marked B_DELWRI we 1523 * have to lock and write it. Otherwise the prior 1524 * write is assumed to have already been committed. 1525 * 1526 * gbincore() can return invalid buffers now so we 1527 * have to check that bit as well (though B_DELWRI 1528 * should not be set if B_INVAL is set there could be 1529 * a race here since we haven't locked the buffer). 1530 */ 1531 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1532 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1533 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1534 BO_LOCK(bo); 1535 continue; /* retry */ 1536 } 1537 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1538 B_DELWRI) { 1539 bremfree(bp); 1540 bp->b_flags &= ~B_ASYNC; 1541 bwrite(bp); 1542 ++nfs_commit_miss; 1543 } else 1544 BUF_UNLOCK(bp); 1545 BO_LOCK(bo); 1546 } 1547 ++nfs_commit_blks; 1548 if (cnt < iosize) 1549 break; 1550 cnt -= iosize; 1551 ++lblkno; 1552 } 1553 BO_UNLOCK(bo); 1554 } 1555 NFSEXITCODE(error); 1556 return (error); 1557 } 1558 1559 /* 1560 * Statfs vnode op. 1561 */ 1562 int 1563 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1564 { 1565 struct statfs *tsf; 1566 int error; 1567 1568 tsf = NULL; 1569 if (nfsrv_devidcnt > 0) { 1570 /* For a pNFS service, get the DS numbers. */ 1571 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1572 error = nfsrv_pnfsstatfs(tsf); 1573 if (error != 0) { 1574 free(tsf, M_TEMP); 1575 tsf = NULL; 1576 } 1577 } 1578 error = VFS_STATFS(vp->v_mount, sf); 1579 if (error == 0) { 1580 if (tsf != NULL) { 1581 sf->f_blocks = tsf->f_blocks; 1582 sf->f_bavail = tsf->f_bavail; 1583 sf->f_bfree = tsf->f_bfree; 1584 sf->f_bsize = tsf->f_bsize; 1585 } 1586 /* 1587 * Since NFS handles these values as unsigned on the 1588 * wire, there is no way to represent negative values, 1589 * so set them to 0. Without this, they will appear 1590 * to be very large positive values for clients like 1591 * Solaris10. 1592 */ 1593 if (sf->f_bavail < 0) 1594 sf->f_bavail = 0; 1595 if (sf->f_ffree < 0) 1596 sf->f_ffree = 0; 1597 } 1598 free(tsf, M_TEMP); 1599 NFSEXITCODE(error); 1600 return (error); 1601 } 1602 1603 /* 1604 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1605 * must handle nfsrv_opencheck() calls after any other access checks. 1606 */ 1607 void 1608 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1609 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1610 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1611 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, 1612 struct nfsexstuff *exp, struct vnode **vpp) 1613 { 1614 struct vnode *vp = NULL; 1615 u_quad_t tempsize; 1616 struct nfsexstuff nes; 1617 1618 if (ndp->ni_vp == NULL) 1619 nd->nd_repstat = nfsrv_opencheck(clientid, 1620 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1621 if (!nd->nd_repstat) { 1622 if (ndp->ni_vp == NULL) { 1623 vrele(ndp->ni_startdir); 1624 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1625 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1626 /* For a pNFS server, create the data file on a DS. */ 1627 if (nd->nd_repstat == 0) { 1628 /* 1629 * Create a data file on a DS for a pNFS server. 1630 * This function just returns if not 1631 * running a pNFS DS or the creation fails. 1632 */ 1633 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1634 cred, p); 1635 } 1636 vput(ndp->ni_dvp); 1637 nfsvno_relpathbuf(ndp); 1638 if (!nd->nd_repstat) { 1639 if (*exclusive_flagp) { 1640 *exclusive_flagp = 0; 1641 NFSVNO_ATTRINIT(nvap); 1642 nvap->na_atime.tv_sec = cverf[0]; 1643 nvap->na_atime.tv_nsec = cverf[1]; 1644 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1645 &nvap->na_vattr, cred); 1646 if (nd->nd_repstat != 0) { 1647 vput(ndp->ni_vp); 1648 ndp->ni_vp = NULL; 1649 nd->nd_repstat = NFSERR_NOTSUPP; 1650 } else 1651 NFSSETBIT_ATTRBIT(attrbitp, 1652 NFSATTRBIT_TIMEACCESS); 1653 } else { 1654 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1655 aclp, p, attrbitp, exp); 1656 } 1657 } 1658 vp = ndp->ni_vp; 1659 } else { 1660 if (ndp->ni_startdir) 1661 vrele(ndp->ni_startdir); 1662 nfsvno_relpathbuf(ndp); 1663 vp = ndp->ni_vp; 1664 if (create == NFSV4OPEN_CREATE) { 1665 if (ndp->ni_dvp == vp) 1666 vrele(ndp->ni_dvp); 1667 else 1668 vput(ndp->ni_dvp); 1669 } 1670 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1671 if (ndp->ni_cnd.cn_flags & RDONLY) 1672 NFSVNO_SETEXRDONLY(&nes); 1673 else 1674 NFSVNO_EXINIT(&nes); 1675 nd->nd_repstat = nfsvno_accchk(vp, 1676 VWRITE, cred, &nes, p, 1677 NFSACCCHK_NOOVERRIDE, 1678 NFSACCCHK_VPISLOCKED, NULL); 1679 nd->nd_repstat = nfsrv_opencheck(clientid, 1680 stateidp, stp, vp, nd, p, nd->nd_repstat); 1681 if (!nd->nd_repstat) { 1682 tempsize = nvap->na_size; 1683 NFSVNO_ATTRINIT(nvap); 1684 nvap->na_size = tempsize; 1685 nd->nd_repstat = VOP_SETATTR(vp, 1686 &nvap->na_vattr, cred); 1687 } 1688 } else if (vp->v_type == VREG) { 1689 nd->nd_repstat = nfsrv_opencheck(clientid, 1690 stateidp, stp, vp, nd, p, nd->nd_repstat); 1691 } 1692 } 1693 } else { 1694 if (ndp->ni_cnd.cn_flags & HASBUF) 1695 nfsvno_relpathbuf(ndp); 1696 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1697 vrele(ndp->ni_startdir); 1698 if (ndp->ni_dvp == ndp->ni_vp) 1699 vrele(ndp->ni_dvp); 1700 else 1701 vput(ndp->ni_dvp); 1702 if (ndp->ni_vp) 1703 vput(ndp->ni_vp); 1704 } 1705 } 1706 *vpp = vp; 1707 1708 NFSEXITCODE2(0, nd); 1709 } 1710 1711 /* 1712 * Updates the file rev and sets the mtime and ctime 1713 * to the current clock time, returning the va_filerev and va_Xtime 1714 * values. 1715 * Return ESTALE to indicate the vnode is VI_DOOMED. 1716 */ 1717 int 1718 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1719 struct nfsrv_descript *nd, struct thread *p) 1720 { 1721 struct vattr va; 1722 1723 VATTR_NULL(&va); 1724 vfs_timestamp(&va.va_mtime); 1725 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1726 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1727 if ((vp->v_iflag & VI_DOOMED) != 0) 1728 return (ESTALE); 1729 } 1730 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1731 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1732 return (0); 1733 } 1734 1735 /* 1736 * Glue routine to nfsv4_fillattr(). 1737 */ 1738 int 1739 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1740 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1741 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1742 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1743 { 1744 struct statfs *sf; 1745 int error; 1746 1747 sf = NULL; 1748 if (nfsrv_devidcnt > 0 && 1749 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1750 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 1751 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 1752 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 1753 error = nfsrv_pnfsstatfs(sf); 1754 if (error != 0) { 1755 free(sf, M_TEMP); 1756 sf = NULL; 1757 } 1758 } 1759 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 1760 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 1761 mounted_on_fileno, sf); 1762 free(sf, M_TEMP); 1763 NFSEXITCODE2(0, nd); 1764 return (error); 1765 } 1766 1767 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 1768 /* 1769 * nfs readdir service 1770 * - mallocs what it thinks is enough to read 1771 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 1772 * - calls VOP_READDIR() 1773 * - loops around building the reply 1774 * if the output generated exceeds count break out of loop 1775 * The NFSM_CLGET macro is used here so that the reply will be packed 1776 * tightly in mbuf clusters. 1777 * - it trims out records with d_fileno == 0 1778 * this doesn't matter for Unix clients, but they might confuse clients 1779 * for other os'. 1780 * - it trims out records with d_type == DT_WHT 1781 * these cannot be seen through NFS (unless we extend the protocol) 1782 * The alternate call nfsrvd_readdirplus() does lookups as well. 1783 * PS: The NFS protocol spec. does not clarify what the "count" byte 1784 * argument is a count of.. just name strings and file id's or the 1785 * entire reply rpc or ... 1786 * I tried just file name and id sizes and it confused the Sun client, 1787 * so I am using the full rpc size now. The "paranoia.." comment refers 1788 * to including the status longwords that are not a part of the dir. 1789 * "entry" structures, but are in the rpc. 1790 */ 1791 int 1792 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 1793 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 1794 { 1795 struct dirent *dp; 1796 u_int32_t *tl; 1797 int dirlen; 1798 char *cpos, *cend, *rbuf; 1799 struct nfsvattr at; 1800 int nlen, error = 0, getret = 1; 1801 int siz, cnt, fullsiz, eofflag, ncookies; 1802 u_int64_t off, toff, verf; 1803 u_long *cookies = NULL, *cookiep; 1804 struct uio io; 1805 struct iovec iv; 1806 int is_ufs; 1807 1808 if (nd->nd_repstat) { 1809 nfsrv_postopattr(nd, getret, &at); 1810 goto out; 1811 } 1812 if (nd->nd_flag & ND_NFSV2) { 1813 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1814 off = fxdr_unsigned(u_quad_t, *tl++); 1815 } else { 1816 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1817 off = fxdr_hyper(tl); 1818 tl += 2; 1819 verf = fxdr_hyper(tl); 1820 tl += 2; 1821 } 1822 toff = off; 1823 cnt = fxdr_unsigned(int, *tl); 1824 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1825 cnt = NFS_SRVMAXDATA(nd); 1826 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1827 fullsiz = siz; 1828 if (nd->nd_flag & ND_NFSV3) { 1829 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 1830 NULL); 1831 #if 0 1832 /* 1833 * va_filerev is not sufficient as a cookie verifier, 1834 * since it is not supposed to change when entries are 1835 * removed/added unless that offset cookies returned to 1836 * the client are no longer valid. 1837 */ 1838 if (!nd->nd_repstat && toff && verf != at.na_filerev) 1839 nd->nd_repstat = NFSERR_BAD_COOKIE; 1840 #endif 1841 } 1842 if (!nd->nd_repstat && vp->v_type != VDIR) 1843 nd->nd_repstat = NFSERR_NOTDIR; 1844 if (nd->nd_repstat == 0 && cnt == 0) { 1845 if (nd->nd_flag & ND_NFSV2) 1846 /* NFSv2 does not have NFSERR_TOOSMALL */ 1847 nd->nd_repstat = EPERM; 1848 else 1849 nd->nd_repstat = NFSERR_TOOSMALL; 1850 } 1851 if (!nd->nd_repstat) 1852 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 1853 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1854 NFSACCCHK_VPISLOCKED, NULL); 1855 if (nd->nd_repstat) { 1856 vput(vp); 1857 if (nd->nd_flag & ND_NFSV3) 1858 nfsrv_postopattr(nd, getret, &at); 1859 goto out; 1860 } 1861 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 1862 rbuf = malloc(siz, M_TEMP, M_WAITOK); 1863 again: 1864 eofflag = 0; 1865 if (cookies) { 1866 free(cookies, M_TEMP); 1867 cookies = NULL; 1868 } 1869 1870 iv.iov_base = rbuf; 1871 iv.iov_len = siz; 1872 io.uio_iov = &iv; 1873 io.uio_iovcnt = 1; 1874 io.uio_offset = (off_t)off; 1875 io.uio_resid = siz; 1876 io.uio_segflg = UIO_SYSSPACE; 1877 io.uio_rw = UIO_READ; 1878 io.uio_td = NULL; 1879 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 1880 &cookies); 1881 off = (u_int64_t)io.uio_offset; 1882 if (io.uio_resid) 1883 siz -= io.uio_resid; 1884 1885 if (!cookies && !nd->nd_repstat) 1886 nd->nd_repstat = NFSERR_PERM; 1887 if (nd->nd_flag & ND_NFSV3) { 1888 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 1889 if (!nd->nd_repstat) 1890 nd->nd_repstat = getret; 1891 } 1892 1893 /* 1894 * Handles the failed cases. nd->nd_repstat == 0 past here. 1895 */ 1896 if (nd->nd_repstat) { 1897 vput(vp); 1898 free(rbuf, M_TEMP); 1899 if (cookies) 1900 free(cookies, M_TEMP); 1901 if (nd->nd_flag & ND_NFSV3) 1902 nfsrv_postopattr(nd, getret, &at); 1903 goto out; 1904 } 1905 /* 1906 * If nothing read, return eof 1907 * rpc reply 1908 */ 1909 if (siz == 0) { 1910 vput(vp); 1911 if (nd->nd_flag & ND_NFSV2) { 1912 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1913 } else { 1914 nfsrv_postopattr(nd, getret, &at); 1915 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 1916 txdr_hyper(at.na_filerev, tl); 1917 tl += 2; 1918 } 1919 *tl++ = newnfs_false; 1920 *tl = newnfs_true; 1921 free(rbuf, M_TEMP); 1922 free(cookies, M_TEMP); 1923 goto out; 1924 } 1925 1926 /* 1927 * Check for degenerate cases of nothing useful read. 1928 * If so go try again 1929 */ 1930 cpos = rbuf; 1931 cend = rbuf + siz; 1932 dp = (struct dirent *)cpos; 1933 cookiep = cookies; 1934 1935 /* 1936 * For some reason FreeBSD's ufs_readdir() chooses to back the 1937 * directory offset up to a block boundary, so it is necessary to 1938 * skip over the records that precede the requested offset. This 1939 * requires the assumption that file offset cookies monotonically 1940 * increase. 1941 */ 1942 while (cpos < cend && ncookies > 0 && 1943 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 1944 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 1945 cpos += dp->d_reclen; 1946 dp = (struct dirent *)cpos; 1947 cookiep++; 1948 ncookies--; 1949 } 1950 if (cpos >= cend || ncookies == 0) { 1951 siz = fullsiz; 1952 toff = off; 1953 goto again; 1954 } 1955 vput(vp); 1956 1957 /* 1958 * dirlen is the size of the reply, including all XDR and must 1959 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 1960 * if the XDR should be included in "count", but to be safe, we do. 1961 * (Include the two booleans at the end of the reply in dirlen now.) 1962 */ 1963 if (nd->nd_flag & ND_NFSV3) { 1964 nfsrv_postopattr(nd, getret, &at); 1965 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1966 txdr_hyper(at.na_filerev, tl); 1967 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 1968 } else { 1969 dirlen = 2 * NFSX_UNSIGNED; 1970 } 1971 1972 /* Loop through the records and build reply */ 1973 while (cpos < cend && ncookies > 0) { 1974 nlen = dp->d_namlen; 1975 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 1976 nlen <= NFS_MAXNAMLEN) { 1977 if (nd->nd_flag & ND_NFSV3) 1978 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1979 else 1980 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 1981 if (dirlen > cnt) { 1982 eofflag = 0; 1983 break; 1984 } 1985 1986 /* 1987 * Build the directory record xdr from 1988 * the dirent entry. 1989 */ 1990 if (nd->nd_flag & ND_NFSV3) { 1991 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1992 *tl++ = newnfs_true; 1993 *tl++ = 0; 1994 } else { 1995 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1996 *tl++ = newnfs_true; 1997 } 1998 *tl = txdr_unsigned(dp->d_fileno); 1999 (void) nfsm_strtom(nd, dp->d_name, nlen); 2000 if (nd->nd_flag & ND_NFSV3) { 2001 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2002 *tl++ = 0; 2003 } else 2004 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2005 *tl = txdr_unsigned(*cookiep); 2006 } 2007 cpos += dp->d_reclen; 2008 dp = (struct dirent *)cpos; 2009 cookiep++; 2010 ncookies--; 2011 } 2012 if (cpos < cend) 2013 eofflag = 0; 2014 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2015 *tl++ = newnfs_false; 2016 if (eofflag) 2017 *tl = newnfs_true; 2018 else 2019 *tl = newnfs_false; 2020 free(rbuf, M_TEMP); 2021 free(cookies, M_TEMP); 2022 2023 out: 2024 NFSEXITCODE2(0, nd); 2025 return (0); 2026 nfsmout: 2027 vput(vp); 2028 NFSEXITCODE2(error, nd); 2029 return (error); 2030 } 2031 2032 /* 2033 * Readdirplus for V3 and Readdir for V4. 2034 */ 2035 int 2036 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2037 struct vnode *vp, struct thread *p, struct nfsexstuff *exp) 2038 { 2039 struct dirent *dp; 2040 u_int32_t *tl; 2041 int dirlen; 2042 char *cpos, *cend, *rbuf; 2043 struct vnode *nvp; 2044 fhandle_t nfh; 2045 struct nfsvattr nva, at, *nvap = &nva; 2046 struct mbuf *mb0, *mb1; 2047 struct nfsreferral *refp; 2048 int nlen, r, error = 0, getret = 1, usevget = 1; 2049 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2050 caddr_t bpos0, bpos1; 2051 u_int64_t off, toff, verf; 2052 u_long *cookies = NULL, *cookiep; 2053 nfsattrbit_t attrbits, rderrbits, savbits; 2054 struct uio io; 2055 struct iovec iv; 2056 struct componentname cn; 2057 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2058 struct mount *mp, *new_mp; 2059 uint64_t mounted_on_fileno; 2060 2061 if (nd->nd_repstat) { 2062 nfsrv_postopattr(nd, getret, &at); 2063 goto out; 2064 } 2065 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2066 off = fxdr_hyper(tl); 2067 toff = off; 2068 tl += 2; 2069 verf = fxdr_hyper(tl); 2070 tl += 2; 2071 siz = fxdr_unsigned(int, *tl++); 2072 cnt = fxdr_unsigned(int, *tl); 2073 2074 /* 2075 * Use the server's maximum data transfer size as the upper bound 2076 * on reply datalen. 2077 */ 2078 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2079 cnt = NFS_SRVMAXDATA(nd); 2080 2081 /* 2082 * siz is a "hint" of how much directory information (name, fileid, 2083 * cookie) should be in the reply. At least one client "hints" 0, 2084 * so I set it to cnt for that case. I also round it up to the 2085 * next multiple of DIRBLKSIZ. 2086 */ 2087 if (siz <= 0) 2088 siz = cnt; 2089 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2090 2091 if (nd->nd_flag & ND_NFSV4) { 2092 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2093 if (error) 2094 goto nfsmout; 2095 NFSSET_ATTRBIT(&savbits, &attrbits); 2096 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); 2097 NFSZERO_ATTRBIT(&rderrbits); 2098 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2099 } else { 2100 NFSZERO_ATTRBIT(&attrbits); 2101 } 2102 fullsiz = siz; 2103 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2104 if (!nd->nd_repstat) { 2105 if (off && verf != at.na_filerev) { 2106 /* 2107 * va_filerev is not sufficient as a cookie verifier, 2108 * since it is not supposed to change when entries are 2109 * removed/added unless that offset cookies returned to 2110 * the client are no longer valid. 2111 */ 2112 #if 0 2113 if (nd->nd_flag & ND_NFSV4) { 2114 nd->nd_repstat = NFSERR_NOTSAME; 2115 } else { 2116 nd->nd_repstat = NFSERR_BAD_COOKIE; 2117 } 2118 #endif 2119 } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { 2120 nd->nd_repstat = NFSERR_BAD_COOKIE; 2121 } 2122 } 2123 if (!nd->nd_repstat && vp->v_type != VDIR) 2124 nd->nd_repstat = NFSERR_NOTDIR; 2125 if (!nd->nd_repstat && cnt == 0) 2126 nd->nd_repstat = NFSERR_TOOSMALL; 2127 if (!nd->nd_repstat) 2128 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2129 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2130 NFSACCCHK_VPISLOCKED, NULL); 2131 if (nd->nd_repstat) { 2132 vput(vp); 2133 if (nd->nd_flag & ND_NFSV3) 2134 nfsrv_postopattr(nd, getret, &at); 2135 goto out; 2136 } 2137 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2138 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2139 2140 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2141 again: 2142 eofflag = 0; 2143 if (cookies) { 2144 free(cookies, M_TEMP); 2145 cookies = NULL; 2146 } 2147 2148 iv.iov_base = rbuf; 2149 iv.iov_len = siz; 2150 io.uio_iov = &iv; 2151 io.uio_iovcnt = 1; 2152 io.uio_offset = (off_t)off; 2153 io.uio_resid = siz; 2154 io.uio_segflg = UIO_SYSSPACE; 2155 io.uio_rw = UIO_READ; 2156 io.uio_td = NULL; 2157 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2158 &cookies); 2159 off = (u_int64_t)io.uio_offset; 2160 if (io.uio_resid) 2161 siz -= io.uio_resid; 2162 2163 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2164 2165 if (!cookies && !nd->nd_repstat) 2166 nd->nd_repstat = NFSERR_PERM; 2167 if (!nd->nd_repstat) 2168 nd->nd_repstat = getret; 2169 if (nd->nd_repstat) { 2170 vput(vp); 2171 if (cookies) 2172 free(cookies, M_TEMP); 2173 free(rbuf, M_TEMP); 2174 if (nd->nd_flag & ND_NFSV3) 2175 nfsrv_postopattr(nd, getret, &at); 2176 goto out; 2177 } 2178 /* 2179 * If nothing read, return eof 2180 * rpc reply 2181 */ 2182 if (siz == 0) { 2183 vput(vp); 2184 if (nd->nd_flag & ND_NFSV3) 2185 nfsrv_postopattr(nd, getret, &at); 2186 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2187 txdr_hyper(at.na_filerev, tl); 2188 tl += 2; 2189 *tl++ = newnfs_false; 2190 *tl = newnfs_true; 2191 free(cookies, M_TEMP); 2192 free(rbuf, M_TEMP); 2193 goto out; 2194 } 2195 2196 /* 2197 * Check for degenerate cases of nothing useful read. 2198 * If so go try again 2199 */ 2200 cpos = rbuf; 2201 cend = rbuf + siz; 2202 dp = (struct dirent *)cpos; 2203 cookiep = cookies; 2204 2205 /* 2206 * For some reason FreeBSD's ufs_readdir() chooses to back the 2207 * directory offset up to a block boundary, so it is necessary to 2208 * skip over the records that precede the requested offset. This 2209 * requires the assumption that file offset cookies monotonically 2210 * increase. 2211 */ 2212 while (cpos < cend && ncookies > 0 && 2213 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2214 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2215 ((nd->nd_flag & ND_NFSV4) && 2216 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2217 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2218 cpos += dp->d_reclen; 2219 dp = (struct dirent *)cpos; 2220 cookiep++; 2221 ncookies--; 2222 } 2223 if (cpos >= cend || ncookies == 0) { 2224 siz = fullsiz; 2225 toff = off; 2226 goto again; 2227 } 2228 2229 /* 2230 * Busy the file system so that the mount point won't go away 2231 * and, as such, VFS_VGET() can be used safely. 2232 */ 2233 mp = vp->v_mount; 2234 vfs_ref(mp); 2235 NFSVOPUNLOCK(vp, 0); 2236 nd->nd_repstat = vfs_busy(mp, 0); 2237 vfs_rel(mp); 2238 if (nd->nd_repstat != 0) { 2239 vrele(vp); 2240 free(cookies, M_TEMP); 2241 free(rbuf, M_TEMP); 2242 if (nd->nd_flag & ND_NFSV3) 2243 nfsrv_postopattr(nd, getret, &at); 2244 goto out; 2245 } 2246 2247 /* 2248 * Check to see if entries in this directory can be safely acquired 2249 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2250 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2251 * automount of the snapshot directory that is required will 2252 * be done. 2253 * This needs to be done here for NFSv4, since NFSv4 never does 2254 * a VFS_VGET() for "." or "..". 2255 */ 2256 if (is_zfs == 1) { 2257 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2258 if (r == EOPNOTSUPP) { 2259 usevget = 0; 2260 cn.cn_nameiop = LOOKUP; 2261 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2262 cn.cn_cred = nd->nd_cred; 2263 cn.cn_thread = p; 2264 } else if (r == 0) 2265 vput(nvp); 2266 } 2267 2268 /* 2269 * Save this position, in case there is an error before one entry 2270 * is created. 2271 */ 2272 mb0 = nd->nd_mb; 2273 bpos0 = nd->nd_bpos; 2274 2275 /* 2276 * Fill in the first part of the reply. 2277 * dirlen is the reply length in bytes and cannot exceed cnt. 2278 * (Include the two booleans at the end of the reply in dirlen now, 2279 * so we recognize when we have exceeded cnt.) 2280 */ 2281 if (nd->nd_flag & ND_NFSV3) { 2282 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2283 nfsrv_postopattr(nd, getret, &at); 2284 } else { 2285 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2286 } 2287 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2288 txdr_hyper(at.na_filerev, tl); 2289 2290 /* 2291 * Save this position, in case there is an empty reply needed. 2292 */ 2293 mb1 = nd->nd_mb; 2294 bpos1 = nd->nd_bpos; 2295 2296 /* Loop through the records and build reply */ 2297 entrycnt = 0; 2298 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2299 nlen = dp->d_namlen; 2300 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2301 nlen <= NFS_MAXNAMLEN && 2302 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2303 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2304 || (nlen == 1 && dp->d_name[0] != '.'))) { 2305 /* 2306 * Save the current position in the reply, in case 2307 * this entry exceeds cnt. 2308 */ 2309 mb1 = nd->nd_mb; 2310 bpos1 = nd->nd_bpos; 2311 2312 /* 2313 * For readdir_and_lookup get the vnode using 2314 * the file number. 2315 */ 2316 nvp = NULL; 2317 refp = NULL; 2318 r = 0; 2319 at_root = 0; 2320 needs_unbusy = 0; 2321 new_mp = mp; 2322 mounted_on_fileno = (uint64_t)dp->d_fileno; 2323 if ((nd->nd_flag & ND_NFSV3) || 2324 NFSNONZERO_ATTRBIT(&savbits)) { 2325 if (nd->nd_flag & ND_NFSV4) 2326 refp = nfsv4root_getreferral(NULL, 2327 vp, dp->d_fileno); 2328 if (refp == NULL) { 2329 if (usevget) 2330 r = VFS_VGET(mp, dp->d_fileno, 2331 LK_SHARED, &nvp); 2332 else 2333 r = EOPNOTSUPP; 2334 if (r == EOPNOTSUPP) { 2335 if (usevget) { 2336 usevget = 0; 2337 cn.cn_nameiop = LOOKUP; 2338 cn.cn_lkflags = 2339 LK_SHARED | 2340 LK_RETRY; 2341 cn.cn_cred = 2342 nd->nd_cred; 2343 cn.cn_thread = p; 2344 } 2345 cn.cn_nameptr = dp->d_name; 2346 cn.cn_namelen = nlen; 2347 cn.cn_flags = ISLASTCN | 2348 NOFOLLOW | LOCKLEAF; 2349 if (nlen == 2 && 2350 dp->d_name[0] == '.' && 2351 dp->d_name[1] == '.') 2352 cn.cn_flags |= 2353 ISDOTDOT; 2354 if (NFSVOPLOCK(vp, LK_SHARED) 2355 != 0) { 2356 nd->nd_repstat = EPERM; 2357 break; 2358 } 2359 if ((vp->v_vflag & VV_ROOT) != 0 2360 && (cn.cn_flags & ISDOTDOT) 2361 != 0) { 2362 vref(vp); 2363 nvp = vp; 2364 r = 0; 2365 } else { 2366 r = VOP_LOOKUP(vp, &nvp, 2367 &cn); 2368 if (vp != nvp) 2369 NFSVOPUNLOCK(vp, 2370 0); 2371 } 2372 } 2373 2374 /* 2375 * For NFSv4, check to see if nvp is 2376 * a mount point and get the mount 2377 * point vnode, as required. 2378 */ 2379 if (r == 0 && 2380 nfsrv_enable_crossmntpt != 0 && 2381 (nd->nd_flag & ND_NFSV4) != 0 && 2382 nvp->v_type == VDIR && 2383 nvp->v_mountedhere != NULL) { 2384 new_mp = nvp->v_mountedhere; 2385 r = vfs_busy(new_mp, 0); 2386 vput(nvp); 2387 nvp = NULL; 2388 if (r == 0) { 2389 r = VFS_ROOT(new_mp, 2390 LK_SHARED, &nvp); 2391 needs_unbusy = 1; 2392 if (r == 0) 2393 at_root = 1; 2394 } 2395 } 2396 } 2397 if (!r) { 2398 if (refp == NULL && 2399 ((nd->nd_flag & ND_NFSV3) || 2400 NFSNONZERO_ATTRBIT(&attrbits))) { 2401 r = nfsvno_getfh(nvp, &nfh, p); 2402 if (!r) 2403 r = nfsvno_getattr(nvp, nvap, nd, p, 2404 1, &attrbits); 2405 if (r == 0 && is_zfs == 1 && 2406 nfsrv_enable_crossmntpt != 0 && 2407 (nd->nd_flag & ND_NFSV4) != 0 && 2408 nvp->v_type == VDIR && 2409 vp->v_mount != nvp->v_mount) { 2410 /* 2411 * For a ZFS snapshot, there is a 2412 * pseudo mount that does not set 2413 * v_mountedhere, so it needs to 2414 * be detected via a different 2415 * mount structure. 2416 */ 2417 at_root = 1; 2418 if (new_mp == mp) 2419 new_mp = nvp->v_mount; 2420 } 2421 } 2422 } else { 2423 nvp = NULL; 2424 } 2425 if (r) { 2426 if (!NFSISSET_ATTRBIT(&attrbits, 2427 NFSATTRBIT_RDATTRERROR)) { 2428 if (nvp != NULL) 2429 vput(nvp); 2430 if (needs_unbusy != 0) 2431 vfs_unbusy(new_mp); 2432 nd->nd_repstat = r; 2433 break; 2434 } 2435 } 2436 } 2437 2438 /* 2439 * Build the directory record xdr 2440 */ 2441 if (nd->nd_flag & ND_NFSV3) { 2442 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2443 *tl++ = newnfs_true; 2444 *tl++ = 0; 2445 *tl = txdr_unsigned(dp->d_fileno); 2446 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2447 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2448 *tl++ = 0; 2449 *tl = txdr_unsigned(*cookiep); 2450 nfsrv_postopattr(nd, 0, nvap); 2451 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2452 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2453 if (nvp != NULL) 2454 vput(nvp); 2455 } else { 2456 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2457 *tl++ = newnfs_true; 2458 *tl++ = 0; 2459 *tl = txdr_unsigned(*cookiep); 2460 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2461 if (nvp != NULL) { 2462 supports_nfsv4acls = 2463 nfs_supportsnfsv4acls(nvp); 2464 NFSVOPUNLOCK(nvp, 0); 2465 } else 2466 supports_nfsv4acls = 0; 2467 if (refp != NULL) { 2468 dirlen += nfsrv_putreferralattr(nd, 2469 &savbits, refp, 0, 2470 &nd->nd_repstat); 2471 if (nd->nd_repstat) { 2472 if (nvp != NULL) 2473 vrele(nvp); 2474 if (needs_unbusy != 0) 2475 vfs_unbusy(new_mp); 2476 break; 2477 } 2478 } else if (r) { 2479 dirlen += nfsvno_fillattr(nd, new_mp, 2480 nvp, nvap, &nfh, r, &rderrbits, 2481 nd->nd_cred, p, isdgram, 0, 2482 supports_nfsv4acls, at_root, 2483 mounted_on_fileno); 2484 } else { 2485 dirlen += nfsvno_fillattr(nd, new_mp, 2486 nvp, nvap, &nfh, r, &attrbits, 2487 nd->nd_cred, p, isdgram, 0, 2488 supports_nfsv4acls, at_root, 2489 mounted_on_fileno); 2490 } 2491 if (nvp != NULL) 2492 vrele(nvp); 2493 dirlen += (3 * NFSX_UNSIGNED); 2494 } 2495 if (needs_unbusy != 0) 2496 vfs_unbusy(new_mp); 2497 if (dirlen <= cnt) 2498 entrycnt++; 2499 } 2500 cpos += dp->d_reclen; 2501 dp = (struct dirent *)cpos; 2502 cookiep++; 2503 ncookies--; 2504 } 2505 vrele(vp); 2506 vfs_unbusy(mp); 2507 2508 /* 2509 * If dirlen > cnt, we must strip off the last entry. If that 2510 * results in an empty reply, report NFSERR_TOOSMALL. 2511 */ 2512 if (dirlen > cnt || nd->nd_repstat) { 2513 if (!nd->nd_repstat && entrycnt == 0) 2514 nd->nd_repstat = NFSERR_TOOSMALL; 2515 if (nd->nd_repstat) { 2516 newnfs_trimtrailing(nd, mb0, bpos0); 2517 if (nd->nd_flag & ND_NFSV3) 2518 nfsrv_postopattr(nd, getret, &at); 2519 } else 2520 newnfs_trimtrailing(nd, mb1, bpos1); 2521 eofflag = 0; 2522 } else if (cpos < cend) 2523 eofflag = 0; 2524 if (!nd->nd_repstat) { 2525 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2526 *tl++ = newnfs_false; 2527 if (eofflag) 2528 *tl = newnfs_true; 2529 else 2530 *tl = newnfs_false; 2531 } 2532 free(cookies, M_TEMP); 2533 free(rbuf, M_TEMP); 2534 2535 out: 2536 NFSEXITCODE2(0, nd); 2537 return (0); 2538 nfsmout: 2539 vput(vp); 2540 NFSEXITCODE2(error, nd); 2541 return (error); 2542 } 2543 2544 /* 2545 * Get the settable attributes out of the mbuf list. 2546 * (Return 0 or EBADRPC) 2547 */ 2548 int 2549 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2550 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2551 { 2552 u_int32_t *tl; 2553 struct nfsv2_sattr *sp; 2554 int error = 0, toclient = 0; 2555 2556 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2557 case ND_NFSV2: 2558 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2559 /* 2560 * Some old clients didn't fill in the high order 16bits. 2561 * --> check the low order 2 bytes for 0xffff 2562 */ 2563 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2564 nvap->na_mode = nfstov_mode(sp->sa_mode); 2565 if (sp->sa_uid != newnfs_xdrneg1) 2566 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2567 if (sp->sa_gid != newnfs_xdrneg1) 2568 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2569 if (sp->sa_size != newnfs_xdrneg1) 2570 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2571 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2572 #ifdef notyet 2573 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2574 #else 2575 nvap->na_atime.tv_sec = 2576 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2577 nvap->na_atime.tv_nsec = 0; 2578 #endif 2579 } 2580 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2581 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2582 break; 2583 case ND_NFSV3: 2584 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2585 if (*tl == newnfs_true) { 2586 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2587 nvap->na_mode = nfstov_mode(*tl); 2588 } 2589 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2590 if (*tl == newnfs_true) { 2591 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2592 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2593 } 2594 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2595 if (*tl == newnfs_true) { 2596 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2597 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2598 } 2599 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2600 if (*tl == newnfs_true) { 2601 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2602 nvap->na_size = fxdr_hyper(tl); 2603 } 2604 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2605 switch (fxdr_unsigned(int, *tl)) { 2606 case NFSV3SATTRTIME_TOCLIENT: 2607 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2608 fxdr_nfsv3time(tl, &nvap->na_atime); 2609 toclient = 1; 2610 break; 2611 case NFSV3SATTRTIME_TOSERVER: 2612 vfs_timestamp(&nvap->na_atime); 2613 nvap->na_vaflags |= VA_UTIMES_NULL; 2614 break; 2615 } 2616 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2617 switch (fxdr_unsigned(int, *tl)) { 2618 case NFSV3SATTRTIME_TOCLIENT: 2619 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2620 fxdr_nfsv3time(tl, &nvap->na_mtime); 2621 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2622 break; 2623 case NFSV3SATTRTIME_TOSERVER: 2624 vfs_timestamp(&nvap->na_mtime); 2625 if (!toclient) 2626 nvap->na_vaflags |= VA_UTIMES_NULL; 2627 break; 2628 } 2629 break; 2630 case ND_NFSV4: 2631 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2632 } 2633 nfsmout: 2634 NFSEXITCODE2(error, nd); 2635 return (error); 2636 } 2637 2638 /* 2639 * Handle the setable attributes for V4. 2640 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2641 */ 2642 int 2643 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2644 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2645 { 2646 u_int32_t *tl; 2647 int attrsum = 0; 2648 int i, j; 2649 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2650 int toclient = 0; 2651 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2652 uid_t uid; 2653 gid_t gid; 2654 2655 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2656 if (error) 2657 goto nfsmout; 2658 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2659 attrsize = fxdr_unsigned(int, *tl); 2660 2661 /* 2662 * Loop around getting the setable attributes. If an unsupported 2663 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2664 */ 2665 if (retnotsup) { 2666 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2667 bitpos = NFSATTRBIT_MAX; 2668 } else { 2669 bitpos = 0; 2670 } 2671 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2672 if (attrsum > attrsize) { 2673 error = NFSERR_BADXDR; 2674 goto nfsmout; 2675 } 2676 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2677 switch (bitpos) { 2678 case NFSATTRBIT_SIZE: 2679 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2680 if (vp != NULL && vp->v_type != VREG) { 2681 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 2682 NFSERR_INVAL; 2683 goto nfsmout; 2684 } 2685 nvap->na_size = fxdr_hyper(tl); 2686 attrsum += NFSX_HYPER; 2687 break; 2688 case NFSATTRBIT_ACL: 2689 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, 2690 p); 2691 if (error) 2692 goto nfsmout; 2693 if (aceerr && !nd->nd_repstat) 2694 nd->nd_repstat = aceerr; 2695 attrsum += aclsize; 2696 break; 2697 case NFSATTRBIT_ARCHIVE: 2698 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2699 if (!nd->nd_repstat) 2700 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2701 attrsum += NFSX_UNSIGNED; 2702 break; 2703 case NFSATTRBIT_HIDDEN: 2704 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2705 if (!nd->nd_repstat) 2706 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2707 attrsum += NFSX_UNSIGNED; 2708 break; 2709 case NFSATTRBIT_MIMETYPE: 2710 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2711 i = fxdr_unsigned(int, *tl); 2712 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 2713 if (error) 2714 goto nfsmout; 2715 if (!nd->nd_repstat) 2716 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2717 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 2718 break; 2719 case NFSATTRBIT_MODE: 2720 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2721 nvap->na_mode = nfstov_mode(*tl); 2722 attrsum += NFSX_UNSIGNED; 2723 break; 2724 case NFSATTRBIT_OWNER: 2725 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2726 j = fxdr_unsigned(int, *tl); 2727 if (j < 0) { 2728 error = NFSERR_BADXDR; 2729 goto nfsmout; 2730 } 2731 if (j > NFSV4_SMALLSTR) 2732 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2733 else 2734 cp = namestr; 2735 error = nfsrv_mtostr(nd, cp, j); 2736 if (error) { 2737 if (j > NFSV4_SMALLSTR) 2738 free(cp, M_NFSSTRING); 2739 goto nfsmout; 2740 } 2741 if (!nd->nd_repstat) { 2742 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, 2743 p); 2744 if (!nd->nd_repstat) 2745 nvap->na_uid = uid; 2746 } 2747 if (j > NFSV4_SMALLSTR) 2748 free(cp, M_NFSSTRING); 2749 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2750 break; 2751 case NFSATTRBIT_OWNERGROUP: 2752 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2753 j = fxdr_unsigned(int, *tl); 2754 if (j < 0) { 2755 error = NFSERR_BADXDR; 2756 goto nfsmout; 2757 } 2758 if (j > NFSV4_SMALLSTR) 2759 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2760 else 2761 cp = namestr; 2762 error = nfsrv_mtostr(nd, cp, j); 2763 if (error) { 2764 if (j > NFSV4_SMALLSTR) 2765 free(cp, M_NFSSTRING); 2766 goto nfsmout; 2767 } 2768 if (!nd->nd_repstat) { 2769 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, 2770 p); 2771 if (!nd->nd_repstat) 2772 nvap->na_gid = gid; 2773 } 2774 if (j > NFSV4_SMALLSTR) 2775 free(cp, M_NFSSTRING); 2776 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2777 break; 2778 case NFSATTRBIT_SYSTEM: 2779 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2780 if (!nd->nd_repstat) 2781 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2782 attrsum += NFSX_UNSIGNED; 2783 break; 2784 case NFSATTRBIT_TIMEACCESSSET: 2785 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2786 attrsum += NFSX_UNSIGNED; 2787 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2788 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2789 fxdr_nfsv4time(tl, &nvap->na_atime); 2790 toclient = 1; 2791 attrsum += NFSX_V4TIME; 2792 } else { 2793 vfs_timestamp(&nvap->na_atime); 2794 nvap->na_vaflags |= VA_UTIMES_NULL; 2795 } 2796 break; 2797 case NFSATTRBIT_TIMEBACKUP: 2798 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2799 if (!nd->nd_repstat) 2800 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2801 attrsum += NFSX_V4TIME; 2802 break; 2803 case NFSATTRBIT_TIMECREATE: 2804 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2805 if (!nd->nd_repstat) 2806 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2807 attrsum += NFSX_V4TIME; 2808 break; 2809 case NFSATTRBIT_TIMEMODIFYSET: 2810 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2811 attrsum += NFSX_UNSIGNED; 2812 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 2813 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 2814 fxdr_nfsv4time(tl, &nvap->na_mtime); 2815 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2816 attrsum += NFSX_V4TIME; 2817 } else { 2818 vfs_timestamp(&nvap->na_mtime); 2819 if (!toclient) 2820 nvap->na_vaflags |= VA_UTIMES_NULL; 2821 } 2822 break; 2823 default: 2824 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2825 /* 2826 * set bitpos so we drop out of the loop. 2827 */ 2828 bitpos = NFSATTRBIT_MAX; 2829 break; 2830 } 2831 } 2832 2833 /* 2834 * some clients pad the attrlist, so we need to skip over the 2835 * padding. 2836 */ 2837 if (attrsum > attrsize) { 2838 error = NFSERR_BADXDR; 2839 } else { 2840 attrsize = NFSM_RNDUP(attrsize); 2841 if (attrsum < attrsize) 2842 error = nfsm_advance(nd, attrsize - attrsum, -1); 2843 } 2844 nfsmout: 2845 NFSEXITCODE2(error, nd); 2846 return (error); 2847 } 2848 2849 /* 2850 * Check/setup export credentials. 2851 */ 2852 int 2853 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 2854 struct ucred *credanon) 2855 { 2856 int error = 0; 2857 2858 /* 2859 * Check/setup credentials. 2860 */ 2861 if (nd->nd_flag & ND_GSS) 2862 exp->nes_exflag &= ~MNT_EXPORTANON; 2863 2864 /* 2865 * Check to see if the operation is allowed for this security flavor. 2866 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to 2867 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 2868 * Also, allow Secinfo, so that it can acquire the correct flavor(s). 2869 */ 2870 if (nfsvno_testexp(nd, exp) && 2871 nd->nd_procnum != NFSV4OP_SECINFO && 2872 nd->nd_procnum != NFSPROC_FSINFO) { 2873 if (nd->nd_flag & ND_NFSV4) 2874 error = NFSERR_WRONGSEC; 2875 else 2876 error = (NFSERR_AUTHERR | AUTH_TOOWEAK); 2877 goto out; 2878 } 2879 2880 /* 2881 * Check to see if the file system is exported V4 only. 2882 */ 2883 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 2884 error = NFSERR_PROGNOTV4; 2885 goto out; 2886 } 2887 2888 /* 2889 * Now, map the user credentials. 2890 * (Note that ND_AUTHNONE will only be set for an NFSv3 2891 * Fsinfo RPC. If set for anything else, this code might need 2892 * to change.) 2893 */ 2894 if (NFSVNO_EXPORTED(exp)) { 2895 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 2896 NFSVNO_EXPORTANON(exp) || 2897 (nd->nd_flag & ND_AUTHNONE) != 0) { 2898 nd->nd_cred->cr_uid = credanon->cr_uid; 2899 nd->nd_cred->cr_gid = credanon->cr_gid; 2900 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 2901 credanon->cr_groups); 2902 } else if ((nd->nd_flag & ND_GSS) == 0) { 2903 /* 2904 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 2905 * if there is a replacement credential with a group 2906 * list set up by "nfsuserd -manage-gids". 2907 * If there is no replacement, nfsrv_getgrpscred() 2908 * simply returns its argument. 2909 */ 2910 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 2911 } 2912 } 2913 2914 out: 2915 NFSEXITCODE2(error, nd); 2916 return (error); 2917 } 2918 2919 /* 2920 * Check exports. 2921 */ 2922 int 2923 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 2924 struct ucred **credp) 2925 { 2926 int i, error, *secflavors; 2927 2928 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2929 &exp->nes_numsecflavor, &secflavors); 2930 if (error) { 2931 if (nfs_rootfhset) { 2932 exp->nes_exflag = 0; 2933 exp->nes_numsecflavor = 0; 2934 error = 0; 2935 } 2936 } else { 2937 /* Copy the security flavors. */ 2938 for (i = 0; i < exp->nes_numsecflavor; i++) 2939 exp->nes_secflavors[i] = secflavors[i]; 2940 } 2941 NFSEXITCODE(error); 2942 return (error); 2943 } 2944 2945 /* 2946 * Get a vnode for a file handle and export stuff. 2947 */ 2948 int 2949 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 2950 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 2951 struct ucred **credp) 2952 { 2953 int i, error, *secflavors; 2954 2955 *credp = NULL; 2956 exp->nes_numsecflavor = 0; 2957 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 2958 if (error != 0) 2959 /* Make sure the server replies ESTALE to the client. */ 2960 error = ESTALE; 2961 if (nam && !error) { 2962 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 2963 &exp->nes_numsecflavor, &secflavors); 2964 if (error) { 2965 if (nfs_rootfhset) { 2966 exp->nes_exflag = 0; 2967 exp->nes_numsecflavor = 0; 2968 error = 0; 2969 } else { 2970 vput(*vpp); 2971 } 2972 } else { 2973 /* Copy the security flavors. */ 2974 for (i = 0; i < exp->nes_numsecflavor; i++) 2975 exp->nes_secflavors[i] = secflavors[i]; 2976 } 2977 } 2978 NFSEXITCODE(error); 2979 return (error); 2980 } 2981 2982 /* 2983 * nfsd_fhtovp() - convert a fh to a vnode ptr 2984 * - look up fsid in mount list (if not found ret error) 2985 * - get vp and export rights by calling nfsvno_fhtovp() 2986 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 2987 * for AUTH_SYS 2988 * - if mpp != NULL, return the mount point so that it can 2989 * be used for vn_finished_write() by the caller 2990 */ 2991 void 2992 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 2993 struct vnode **vpp, struct nfsexstuff *exp, 2994 struct mount **mpp, int startwrite, struct thread *p) 2995 { 2996 struct mount *mp; 2997 struct ucred *credanon; 2998 fhandle_t *fhp; 2999 3000 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3001 /* 3002 * Check for the special case of the nfsv4root_fh. 3003 */ 3004 mp = vfs_busyfs(&fhp->fh_fsid); 3005 if (mpp != NULL) 3006 *mpp = mp; 3007 if (mp == NULL) { 3008 *vpp = NULL; 3009 nd->nd_repstat = ESTALE; 3010 goto out; 3011 } 3012 3013 if (startwrite) { 3014 vn_start_write(NULL, mpp, V_WAIT); 3015 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3016 lktype = LK_EXCLUSIVE; 3017 } 3018 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3019 &credanon); 3020 vfs_unbusy(mp); 3021 3022 /* 3023 * For NFSv4 without a pseudo root fs, unexported file handles 3024 * can be returned, so that Lookup works everywhere. 3025 */ 3026 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3027 !(nd->nd_flag & ND_NFSV4)) { 3028 vput(*vpp); 3029 nd->nd_repstat = EACCES; 3030 } 3031 3032 /* 3033 * Personally, I've never seen any point in requiring a 3034 * reserved port#, since only in the rare case where the 3035 * clients are all boxes with secure system privileges, 3036 * does it provide any enhanced security, but... some people 3037 * believe it to be useful and keep putting this code back in. 3038 * (There is also some "security checker" out there that 3039 * complains if the nfs server doesn't enforce this.) 3040 * However, note the following: 3041 * RFC3530 (NFSv4) specifies that a reserved port# not be 3042 * required. 3043 * RFC2623 recommends that, if a reserved port# is checked for, 3044 * that there be a way to turn that off--> ifdef'd. 3045 */ 3046 #ifdef NFS_REQRSVPORT 3047 if (!nd->nd_repstat) { 3048 struct sockaddr_in *saddr; 3049 struct sockaddr_in6 *saddr6; 3050 3051 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3052 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3053 if (!(nd->nd_flag & ND_NFSV4) && 3054 ((saddr->sin_family == AF_INET && 3055 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3056 (saddr6->sin6_family == AF_INET6 && 3057 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3058 vput(*vpp); 3059 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3060 } 3061 } 3062 #endif /* NFS_REQRSVPORT */ 3063 3064 /* 3065 * Check/setup credentials. 3066 */ 3067 if (!nd->nd_repstat) { 3068 nd->nd_saveduid = nd->nd_cred->cr_uid; 3069 nd->nd_repstat = nfsd_excred(nd, exp, credanon); 3070 if (nd->nd_repstat) 3071 vput(*vpp); 3072 } 3073 if (credanon != NULL) 3074 crfree(credanon); 3075 if (nd->nd_repstat) { 3076 if (startwrite) 3077 vn_finished_write(mp); 3078 *vpp = NULL; 3079 if (mpp != NULL) 3080 *mpp = NULL; 3081 } 3082 3083 out: 3084 NFSEXITCODE2(0, nd); 3085 } 3086 3087 /* 3088 * glue for fp. 3089 */ 3090 static int 3091 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3092 { 3093 struct filedesc *fdp; 3094 struct file *fp; 3095 int error = 0; 3096 3097 fdp = p->td_proc->p_fd; 3098 if (fd < 0 || fd >= fdp->fd_nfiles || 3099 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3100 error = EBADF; 3101 goto out; 3102 } 3103 *fpp = fp; 3104 3105 out: 3106 NFSEXITCODE(error); 3107 return (error); 3108 } 3109 3110 /* 3111 * Called from nfssvc() to update the exports list. Just call 3112 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3113 * in the mount list. 3114 */ 3115 int 3116 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3117 { 3118 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3119 int error = 0; 3120 struct nameidata nd; 3121 fhandle_t fh; 3122 3123 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 3124 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3125 nfs_rootfhset = 0; 3126 else if (error == 0) { 3127 if (nfsexargp->fspec == NULL) { 3128 error = EPERM; 3129 goto out; 3130 } 3131 /* 3132 * If fspec != NULL, this is the v4root path. 3133 */ 3134 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 3135 nfsexargp->fspec, p); 3136 if ((error = namei(&nd)) != 0) 3137 goto out; 3138 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3139 vrele(nd.ni_vp); 3140 if (!error) { 3141 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 3142 NFSBCOPY((caddr_t)&fh, 3143 nfs_rootfh.nfsrvfh_data, 3144 sizeof (fhandle_t)); 3145 nfs_rootfhset = 1; 3146 } 3147 } 3148 3149 out: 3150 NFSEXITCODE(error); 3151 return (error); 3152 } 3153 3154 /* 3155 * This function needs to test to see if the system is near its limit 3156 * for memory allocation via malloc() or mget() and return True iff 3157 * either of these resources are near their limit. 3158 * XXX (For now, this is just a stub.) 3159 */ 3160 int nfsrv_testmalloclimit = 0; 3161 int 3162 nfsrv_mallocmget_limit(void) 3163 { 3164 static int printmesg = 0; 3165 static int testval = 1; 3166 3167 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3168 if ((printmesg++ % 100) == 0) 3169 printf("nfsd: malloc/mget near limit\n"); 3170 return (1); 3171 } 3172 return (0); 3173 } 3174 3175 /* 3176 * BSD specific initialization of a mount point. 3177 */ 3178 void 3179 nfsd_mntinit(void) 3180 { 3181 static int inited = 0; 3182 3183 if (inited) 3184 return; 3185 inited = 1; 3186 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3187 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 3188 TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); 3189 nfsv4root_mnt.mnt_export = NULL; 3190 TAILQ_INIT(&nfsv4root_opt); 3191 TAILQ_INIT(&nfsv4root_newopt); 3192 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 3193 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 3194 nfsv4root_mnt.mnt_nvnodelistsize = 0; 3195 nfsv4root_mnt.mnt_activevnodelistsize = 0; 3196 } 3197 3198 /* 3199 * Get a vnode for a file handle, without checking exports, etc. 3200 */ 3201 struct vnode * 3202 nfsvno_getvp(fhandle_t *fhp) 3203 { 3204 struct mount *mp; 3205 struct vnode *vp; 3206 int error; 3207 3208 mp = vfs_busyfs(&fhp->fh_fsid); 3209 if (mp == NULL) 3210 return (NULL); 3211 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3212 vfs_unbusy(mp); 3213 if (error) 3214 return (NULL); 3215 return (vp); 3216 } 3217 3218 /* 3219 * Do a local VOP_ADVLOCK(). 3220 */ 3221 int 3222 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3223 u_int64_t end, struct thread *td) 3224 { 3225 int error = 0; 3226 struct flock fl; 3227 u_int64_t tlen; 3228 3229 if (nfsrv_dolocallocks == 0) 3230 goto out; 3231 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3232 3233 fl.l_whence = SEEK_SET; 3234 fl.l_type = ftype; 3235 fl.l_start = (off_t)first; 3236 if (end == NFS64BITSSET) { 3237 fl.l_len = 0; 3238 } else { 3239 tlen = end - first; 3240 fl.l_len = (off_t)tlen; 3241 } 3242 /* 3243 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3244 * values for all calls, so that all locks will be held by the 3245 * nfsd server. (The nfsd server handles conflicts between the 3246 * various clients.) 3247 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3248 * bytes, so it can't be put in l_sysid. 3249 */ 3250 if (nfsv4_sysid == 0) 3251 nfsv4_sysid = nlm_acquire_next_sysid(); 3252 fl.l_pid = (pid_t)0; 3253 fl.l_sysid = (int)nfsv4_sysid; 3254 3255 if (ftype == F_UNLCK) 3256 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3257 (F_POSIX | F_REMOTE)); 3258 else 3259 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3260 (F_POSIX | F_REMOTE)); 3261 3262 out: 3263 NFSEXITCODE(error); 3264 return (error); 3265 } 3266 3267 /* 3268 * Check the nfsv4 root exports. 3269 */ 3270 int 3271 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3272 { 3273 struct ucred *credanon; 3274 int exflags, error = 0, numsecflavor, *secflavors, i; 3275 3276 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3277 &credanon, &numsecflavor, &secflavors); 3278 if (error) { 3279 error = NFSERR_PROGUNAVAIL; 3280 goto out; 3281 } 3282 if (credanon != NULL) 3283 crfree(credanon); 3284 for (i = 0; i < numsecflavor; i++) { 3285 if (secflavors[i] == AUTH_SYS) 3286 nd->nd_flag |= ND_EXAUTHSYS; 3287 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3288 nd->nd_flag |= ND_EXGSS; 3289 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3290 nd->nd_flag |= ND_EXGSSINTEGRITY; 3291 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3292 nd->nd_flag |= ND_EXGSSPRIVACY; 3293 } 3294 3295 out: 3296 NFSEXITCODE(error); 3297 return (error); 3298 } 3299 3300 /* 3301 * Nfs server pseudo system call for the nfsd's 3302 */ 3303 /* 3304 * MPSAFE 3305 */ 3306 static int 3307 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3308 { 3309 struct file *fp; 3310 struct nfsd_addsock_args sockarg; 3311 struct nfsd_nfsd_args nfsdarg; 3312 struct nfsd_nfsd_oargs onfsdarg; 3313 struct nfsd_pnfsd_args pnfsdarg; 3314 struct vnode *vp, *nvp, *curdvp; 3315 struct pnfsdsfile *pf; 3316 struct nfsdevice *ds, *fds; 3317 cap_rights_t rights; 3318 int buflen, error, ret; 3319 char *buf, *cp, *cp2, *cp3; 3320 char fname[PNFS_FILENAME_LEN + 1]; 3321 3322 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3323 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3324 if (error) 3325 goto out; 3326 /* 3327 * Since we don't know what rights might be required, 3328 * pretend that we need them all. It is better to be too 3329 * careful than too reckless. 3330 */ 3331 error = fget(td, sockarg.sock, 3332 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 3333 if (error != 0) 3334 goto out; 3335 if (fp->f_type != DTYPE_SOCKET) { 3336 fdrop(fp, td); 3337 error = EPERM; 3338 goto out; 3339 } 3340 error = nfsrvd_addsock(fp); 3341 fdrop(fp, td); 3342 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3343 if (uap->argp == NULL) { 3344 error = EINVAL; 3345 goto out; 3346 } 3347 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3348 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3349 if (error == 0) { 3350 nfsdarg.principal = onfsdarg.principal; 3351 nfsdarg.minthreads = onfsdarg.minthreads; 3352 nfsdarg.maxthreads = onfsdarg.maxthreads; 3353 nfsdarg.version = 1; 3354 nfsdarg.addr = NULL; 3355 nfsdarg.addrlen = 0; 3356 nfsdarg.dnshost = NULL; 3357 nfsdarg.dnshostlen = 0; 3358 nfsdarg.mirrorcnt = 1; 3359 } 3360 } else 3361 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3362 if (error) 3363 goto out; 3364 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3365 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3366 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3367 nfsdarg.mirrorcnt >= 1 && 3368 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3369 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3370 nfsdarg.dspath != NULL) { 3371 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3372 " mirrorcnt=%d\n", nfsdarg.addrlen, 3373 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3374 nfsdarg.mirrorcnt); 3375 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3376 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3377 if (error != 0) { 3378 free(cp, M_TEMP); 3379 goto out; 3380 } 3381 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3382 nfsdarg.addr = cp; 3383 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3384 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3385 if (error != 0) { 3386 free(nfsdarg.addr, M_TEMP); 3387 free(cp, M_TEMP); 3388 goto out; 3389 } 3390 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3391 nfsdarg.dnshost = cp; 3392 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3393 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3394 if (error != 0) { 3395 free(nfsdarg.addr, M_TEMP); 3396 free(nfsdarg.dnshost, M_TEMP); 3397 free(cp, M_TEMP); 3398 goto out; 3399 } 3400 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3401 nfsdarg.dspath = cp; 3402 } else { 3403 nfsdarg.addr = NULL; 3404 nfsdarg.addrlen = 0; 3405 nfsdarg.dnshost = NULL; 3406 nfsdarg.dnshostlen = 0; 3407 nfsdarg.dspath = NULL; 3408 nfsdarg.dspathlen = 0; 3409 nfsdarg.mirrorcnt = 1; 3410 } 3411 error = nfsrvd_nfsd(td, &nfsdarg); 3412 free(nfsdarg.addr, M_TEMP); 3413 free(nfsdarg.dnshost, M_TEMP); 3414 free(nfsdarg.dspath, M_TEMP); 3415 } else if (uap->flag & NFSSVC_PNFSDS) { 3416 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3417 if (error == 0 && pnfsdarg.op == PNFSDOP_DELDSSERVER) { 3418 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3419 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3420 NULL); 3421 if (error == 0) 3422 error = nfsrv_deldsserver(cp, td); 3423 free(cp, M_TEMP); 3424 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3425 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3426 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3427 buf = malloc(buflen, M_TEMP, M_WAITOK); 3428 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3429 NULL); 3430 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3431 if (error == 0 && pnfsdarg.dspath != NULL) { 3432 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3433 error = copyinstr(pnfsdarg.dspath, cp2, 3434 PATH_MAX + 1, NULL); 3435 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3436 error); 3437 } else 3438 cp2 = NULL; 3439 if (error == 0 && pnfsdarg.curdspath != NULL) { 3440 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3441 error = copyinstr(pnfsdarg.curdspath, cp3, 3442 PATH_MAX + 1, NULL); 3443 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3444 error); 3445 } else 3446 cp3 = NULL; 3447 curdvp = NULL; 3448 fds = NULL; 3449 if (error == 0) 3450 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3451 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3452 &fds); 3453 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3454 if (error == 0) { 3455 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3456 printf("copymr: dsdir out of range\n"); 3457 pf->dsf_dir = 0; 3458 } 3459 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3460 error = nfsrv_copymr(vp, nvp, 3461 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3462 (struct pnfsdsfile *)buf, 3463 buflen / sizeof(*pf), td->td_ucred, td); 3464 vput(vp); 3465 vput(nvp); 3466 if (fds != NULL && error == 0) { 3467 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3468 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3469 if (ret == 0) { 3470 nfsrv_dsremove(curdvp, fname, 3471 td->td_ucred, td); 3472 NFSVOPUNLOCK(curdvp, 0); 3473 } 3474 } 3475 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3476 } 3477 free(cp, M_TEMP); 3478 free(cp2, M_TEMP); 3479 free(cp3, M_TEMP); 3480 free(buf, M_TEMP); 3481 } 3482 } else { 3483 error = nfssvc_srvcall(td, uap, td->td_ucred); 3484 } 3485 3486 out: 3487 NFSEXITCODE(error); 3488 return (error); 3489 } 3490 3491 static int 3492 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3493 { 3494 struct nfsex_args export; 3495 struct file *fp = NULL; 3496 int stablefd, len; 3497 struct nfsd_clid adminrevoke; 3498 struct nfsd_dumplist dumplist; 3499 struct nfsd_dumpclients *dumpclients; 3500 struct nfsd_dumplocklist dumplocklist; 3501 struct nfsd_dumplocks *dumplocks; 3502 struct nameidata nd; 3503 vnode_t vp; 3504 int error = EINVAL, igotlock; 3505 struct proc *procp; 3506 static int suspend_nfsd = 0; 3507 3508 if (uap->flag & NFSSVC_PUBLICFH) { 3509 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3510 sizeof (fhandle_t)); 3511 error = copyin(uap->argp, 3512 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3513 if (!error) 3514 nfs_pubfhset = 1; 3515 } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { 3516 error = copyin(uap->argp,(caddr_t)&export, 3517 sizeof (struct nfsex_args)); 3518 if (!error) 3519 error = nfsrv_v4rootexport(&export, cred, p); 3520 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3521 nfs_pubfhset = 0; 3522 error = 0; 3523 } else if (uap->flag & NFSSVC_STABLERESTART) { 3524 error = copyin(uap->argp, (caddr_t)&stablefd, 3525 sizeof (int)); 3526 if (!error) 3527 error = fp_getfvp(p, stablefd, &fp, &vp); 3528 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3529 error = EBADF; 3530 if (!error && newnfs_numnfsd != 0) 3531 error = EPERM; 3532 if (!error) { 3533 nfsrv_stablefirst.nsf_fp = fp; 3534 nfsrv_setupstable(p); 3535 } 3536 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3537 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3538 sizeof (struct nfsd_clid)); 3539 if (!error) 3540 error = nfsrv_adminrevoke(&adminrevoke, p); 3541 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3542 error = copyin(uap->argp, (caddr_t)&dumplist, 3543 sizeof (struct nfsd_dumplist)); 3544 if (!error && (dumplist.ndl_size < 1 || 3545 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3546 error = EPERM; 3547 if (!error) { 3548 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3549 dumpclients = (struct nfsd_dumpclients *)malloc(len, 3550 M_TEMP, M_WAITOK); 3551 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3552 error = copyout(dumpclients, 3553 CAST_USER_ADDR_T(dumplist.ndl_list), len); 3554 free(dumpclients, M_TEMP); 3555 } 3556 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3557 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3558 sizeof (struct nfsd_dumplocklist)); 3559 if (!error && (dumplocklist.ndllck_size < 1 || 3560 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3561 error = EPERM; 3562 if (!error) 3563 error = nfsrv_lookupfilename(&nd, 3564 dumplocklist.ndllck_fname, p); 3565 if (!error) { 3566 len = sizeof (struct nfsd_dumplocks) * 3567 dumplocklist.ndllck_size; 3568 dumplocks = (struct nfsd_dumplocks *)malloc(len, 3569 M_TEMP, M_WAITOK); 3570 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3571 dumplocklist.ndllck_size, p); 3572 vput(nd.ni_vp); 3573 error = copyout(dumplocks, 3574 CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); 3575 free(dumplocks, M_TEMP); 3576 } 3577 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 3578 procp = p->td_proc; 3579 PROC_LOCK(procp); 3580 nfsd_master_pid = procp->p_pid; 3581 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 3582 nfsd_master_start = procp->p_stats->p_start; 3583 nfsd_master_proc = procp; 3584 PROC_UNLOCK(procp); 3585 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 3586 NFSLOCKV4ROOTMUTEX(); 3587 if (suspend_nfsd == 0) { 3588 /* Lock out all nfsd threads */ 3589 do { 3590 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 3591 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 3592 } while (igotlock == 0 && suspend_nfsd == 0); 3593 suspend_nfsd = 1; 3594 } 3595 NFSUNLOCKV4ROOTMUTEX(); 3596 error = 0; 3597 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 3598 NFSLOCKV4ROOTMUTEX(); 3599 if (suspend_nfsd != 0) { 3600 nfsv4_unlock(&nfsd_suspend_lock, 0); 3601 suspend_nfsd = 0; 3602 } 3603 NFSUNLOCKV4ROOTMUTEX(); 3604 error = 0; 3605 } 3606 3607 NFSEXITCODE(error); 3608 return (error); 3609 } 3610 3611 /* 3612 * Check exports. 3613 * Returns 0 if ok, 1 otherwise. 3614 */ 3615 int 3616 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 3617 { 3618 int i; 3619 3620 /* 3621 * This seems odd, but allow the case where the security flavor 3622 * list is empty. This happens when NFSv4 is traversing non-exported 3623 * file systems. Exported file systems should always have a non-empty 3624 * security flavor list. 3625 */ 3626 if (exp->nes_numsecflavor == 0) 3627 return (0); 3628 3629 for (i = 0; i < exp->nes_numsecflavor; i++) { 3630 /* 3631 * The tests for privacy and integrity must be first, 3632 * since ND_GSS is set for everything but AUTH_SYS. 3633 */ 3634 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 3635 (nd->nd_flag & ND_GSSPRIVACY)) 3636 return (0); 3637 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 3638 (nd->nd_flag & ND_GSSINTEGRITY)) 3639 return (0); 3640 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 3641 (nd->nd_flag & ND_GSS)) 3642 return (0); 3643 if (exp->nes_secflavors[i] == AUTH_SYS && 3644 (nd->nd_flag & ND_GSS) == 0) 3645 return (0); 3646 } 3647 return (1); 3648 } 3649 3650 /* 3651 * Calculate a hash value for the fid in a file handle. 3652 */ 3653 uint32_t 3654 nfsrv_hashfh(fhandle_t *fhp) 3655 { 3656 uint32_t hashval; 3657 3658 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 3659 return (hashval); 3660 } 3661 3662 /* 3663 * Calculate a hash value for the sessionid. 3664 */ 3665 uint32_t 3666 nfsrv_hashsessionid(uint8_t *sessionid) 3667 { 3668 uint32_t hashval; 3669 3670 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 3671 return (hashval); 3672 } 3673 3674 /* 3675 * Signal the userland master nfsd to backup the stable restart file. 3676 */ 3677 void 3678 nfsrv_backupstable(void) 3679 { 3680 struct proc *procp; 3681 3682 if (nfsd_master_proc != NULL) { 3683 procp = pfind(nfsd_master_pid); 3684 /* Try to make sure it is the correct process. */ 3685 if (procp == nfsd_master_proc && 3686 procp->p_stats->p_start.tv_sec == 3687 nfsd_master_start.tv_sec && 3688 procp->p_stats->p_start.tv_usec == 3689 nfsd_master_start.tv_usec && 3690 strcmp(procp->p_comm, nfsd_master_comm) == 0) 3691 kern_psignal(procp, SIGUSR2); 3692 else 3693 nfsd_master_proc = NULL; 3694 3695 if (procp != NULL) 3696 PROC_UNLOCK(procp); 3697 } 3698 } 3699 3700 /* 3701 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 3702 * The arguments are in a structure, so that they can be passed through 3703 * taskqueue for a kernel process to execute this function. 3704 */ 3705 struct nfsrvdscreate { 3706 int done; 3707 int inprog; 3708 struct task tsk; 3709 struct ucred *tcred; 3710 struct vnode *dvp; 3711 NFSPROC_T *p; 3712 struct pnfsdsfile *pf; 3713 int err; 3714 fhandle_t fh; 3715 struct vattr va; 3716 struct vattr createva; 3717 }; 3718 3719 int 3720 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 3721 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 3722 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 3723 { 3724 struct vnode *nvp; 3725 struct nameidata named; 3726 struct vattr va; 3727 char *bufp; 3728 u_long *hashp; 3729 struct nfsnode *np; 3730 struct nfsmount *nmp; 3731 int error; 3732 3733 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 3734 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); 3735 nfsvno_setpathbuf(&named, &bufp, &hashp); 3736 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 3737 named.ni_cnd.cn_thread = p; 3738 named.ni_cnd.cn_nameptr = bufp; 3739 if (fnamep != NULL) { 3740 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 3741 named.ni_cnd.cn_namelen = strlen(bufp); 3742 } else 3743 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 3744 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 3745 3746 /* Create the date file in the DS mount. */ 3747 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 3748 if (error == 0) { 3749 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 3750 NFSVOPUNLOCK(dvp, 0); 3751 if (error == 0) { 3752 /* Set the ownership of the file. */ 3753 error = VOP_SETATTR(nvp, nvap, tcred); 3754 NFSD_DEBUG(4, "nfsrv_dscreate:" 3755 " setattr-uid=%d\n", error); 3756 if (error != 0) 3757 vput(nvp); 3758 } 3759 if (error != 0) 3760 printf("pNFS: pnfscreate failed=%d\n", error); 3761 } else 3762 printf("pNFS: pnfscreate vnlock=%d\n", error); 3763 if (error == 0) { 3764 np = VTONFS(nvp); 3765 nmp = VFSTONFS(nvp->v_mount); 3766 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 3767 != 0 || nmp->nm_nam->sa_len > sizeof( 3768 struct sockaddr_in6) || 3769 np->n_fhp->nfh_len != NFSX_MYFH) { 3770 printf("Bad DS file: fstype=%s salen=%d" 3771 " fhlen=%d\n", 3772 nvp->v_mount->mnt_vfc->vfc_name, 3773 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 3774 error = ENOENT; 3775 } 3776 3777 /* Set extattrs for the DS on the MDS file. */ 3778 if (error == 0) { 3779 if (dsa != NULL) { 3780 error = VOP_GETATTR(nvp, &va, tcred); 3781 if (error == 0) { 3782 dsa->dsa_filerev = va.va_filerev; 3783 dsa->dsa_size = va.va_size; 3784 dsa->dsa_atime = va.va_atime; 3785 dsa->dsa_mtime = va.va_mtime; 3786 } 3787 } 3788 if (error == 0) { 3789 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 3790 NFSX_MYFH); 3791 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 3792 nmp->nm_nam->sa_len); 3793 NFSBCOPY(named.ni_cnd.cn_nameptr, 3794 pf->dsf_filename, 3795 sizeof(pf->dsf_filename)); 3796 } 3797 } else 3798 printf("pNFS: pnfscreate can't get DS" 3799 " attr=%d\n", error); 3800 if (nvpp != NULL && error == 0) 3801 *nvpp = nvp; 3802 else 3803 vput(nvp); 3804 } 3805 nfsvno_relpathbuf(&named); 3806 return (error); 3807 } 3808 3809 /* 3810 * Start up the thread that will execute nfsrv_dscreate(). 3811 */ 3812 static void 3813 start_dscreate(void *arg, int pending) 3814 { 3815 struct nfsrvdscreate *dsc; 3816 3817 dsc = (struct nfsrvdscreate *)arg; 3818 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 3819 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 3820 dsc->done = 1; 3821 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 3822 } 3823 3824 /* 3825 * Create a pNFS data file on the Data Server(s). 3826 */ 3827 static void 3828 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 3829 NFSPROC_T *p) 3830 { 3831 struct nfsrvdscreate *dsc, *tdsc; 3832 struct nfsdevice *ds, *mds; 3833 struct mount *mp; 3834 struct pnfsdsfile *pf, *tpf; 3835 struct pnfsdsattr dsattr; 3836 struct vattr va; 3837 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 3838 struct nfsmount *nmp; 3839 fhandle_t fh; 3840 uid_t vauid; 3841 gid_t vagid; 3842 u_short vamode; 3843 struct ucred *tcred; 3844 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 3845 int failpos, timo; 3846 3847 /* Get a DS server directory in a round-robin order. */ 3848 mirrorcnt = 1; 3849 NFSDDSLOCK(); 3850 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 3851 if (ds->nfsdev_nmp != NULL) 3852 break; 3853 } 3854 if (ds == NULL) { 3855 NFSDDSUNLOCK(); 3856 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 3857 return; 3858 } 3859 i = dsdir[0] = ds->nfsdev_nextdir; 3860 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 3861 dvp[0] = ds->nfsdev_dsdir[i]; 3862 if (nfsrv_maxpnfsmirror > 1) { 3863 mds = TAILQ_NEXT(ds, nfsdev_list); 3864 TAILQ_FOREACH_FROM(mds, &nfsrv_devidhead, nfsdev_list) { 3865 if (mds->nfsdev_nmp != NULL) { 3866 dsdir[mirrorcnt] = i; 3867 dvp[mirrorcnt] = mds->nfsdev_dsdir[i]; 3868 mirrorcnt++; 3869 if (mirrorcnt >= nfsrv_maxpnfsmirror) 3870 break; 3871 } 3872 } 3873 } 3874 /* Put at end of list to implement round-robin usage. */ 3875 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 3876 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 3877 NFSDDSUNLOCK(); 3878 dsc = NULL; 3879 if (mirrorcnt > 1) 3880 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 3881 M_WAITOK | M_ZERO); 3882 tpf = pf = malloc(sizeof(*pf) * mirrorcnt, M_TEMP, M_WAITOK | M_ZERO); 3883 3884 error = nfsvno_getfh(vp, &fh, p); 3885 if (error == 0) 3886 error = VOP_GETATTR(vp, &va, cred); 3887 if (error == 0) { 3888 /* Set the attributes for "vp" to Setattr the DS vp. */ 3889 vauid = va.va_uid; 3890 vagid = va.va_gid; 3891 vamode = va.va_mode; 3892 VATTR_NULL(&va); 3893 va.va_uid = vauid; 3894 va.va_gid = vagid; 3895 va.va_mode = vamode; 3896 va.va_size = 0; 3897 } else 3898 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 3899 3900 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 3901 cred->cr_gid); 3902 /* Make data file name based on FH. */ 3903 tcred = newnfs_getcred(); 3904 3905 /* 3906 * Create the file on each DS mirror, using kernel process(es) for the 3907 * additional mirrors. 3908 */ 3909 failpos = -1; 3910 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 3911 tpf->dsf_dir = dsdir[i]; 3912 tdsc->tcred = tcred; 3913 tdsc->p = p; 3914 tdsc->pf = tpf; 3915 tdsc->createva = *vap; 3916 tdsc->fh = fh; 3917 tdsc->va = va; 3918 tdsc->dvp = dvp[i]; 3919 tdsc->done = 0; 3920 tdsc->inprog = 0; 3921 tdsc->err = 0; 3922 ret = EIO; 3923 if (nfs_pnfsiothreads != 0) { 3924 ret = nfs_pnfsio(start_dscreate, tdsc); 3925 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 3926 } 3927 if (ret != 0) { 3928 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 3929 NULL, tcred, p, NULL); 3930 if (ret != 0) { 3931 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 3932 error)); 3933 if (failpos == -1 && nfsds_failerr(ret)) 3934 failpos = i; 3935 else 3936 error = ret; 3937 } 3938 } 3939 } 3940 if (error == 0) { 3941 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 3942 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 3943 &dsattr, NULL, tcred, p, NULL); 3944 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 3945 failpos = mirrorcnt - 1; 3946 error = 0; 3947 } 3948 } 3949 timo = hz / 50; /* Wait for 20msec. */ 3950 if (timo < 1) 3951 timo = 1; 3952 /* Wait for kernel task(s) to complete. */ 3953 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 3954 while (tdsc->inprog != 0 && tdsc->done == 0) 3955 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 3956 if (tdsc->err != 0) { 3957 if (failpos == -1 && nfsds_failerr(tdsc->err)) 3958 failpos = i; 3959 else if (error == 0) 3960 error = tdsc->err; 3961 } 3962 } 3963 3964 /* 3965 * If failpos has been set, that mirror has failed, so it needs 3966 * to be disabled. 3967 */ 3968 if (failpos >= 0) { 3969 nmp = VFSTONFS(dvp[failpos]->v_mount); 3970 NFSLOCKMNT(nmp); 3971 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 3972 NFSMNTP_CANCELRPCS)) == 0) { 3973 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 3974 NFSUNLOCKMNT(nmp); 3975 ds = nfsrv_deldsnmp(nmp, p); 3976 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 3977 ds); 3978 if (ds != NULL) 3979 nfsrv_killrpcs(nmp); 3980 NFSLOCKMNT(nmp); 3981 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 3982 wakeup(nmp); 3983 } 3984 NFSUNLOCKMNT(nmp); 3985 } 3986 3987 NFSFREECRED(tcred); 3988 if (error == 0) { 3989 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 3990 error = vn_start_write(vp, &mp, V_WAIT); 3991 if (error == 0) { 3992 error = vn_extattr_set(vp, IO_NODELOCKED, 3993 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 3994 sizeof(*pf) * mirrorcnt, (char *)pf, p); 3995 if (error == 0) 3996 error = vn_extattr_set(vp, IO_NODELOCKED, 3997 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 3998 sizeof(dsattr), (char *)&dsattr, p); 3999 vn_finished_write(mp); 4000 if (error != 0) 4001 printf("pNFS: pnfscreate setextattr=%d\n", 4002 error); 4003 } else 4004 printf("pNFS: pnfscreate startwrite=%d\n", error); 4005 } else 4006 printf("pNFS: pnfscreate=%d\n", error); 4007 free(pf, M_TEMP); 4008 free(dsc, M_TEMP); 4009 } 4010 4011 /* 4012 * Get the information needed to remove the pNFS Data Server file from the 4013 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4014 * DS directory vnode. The caller must unlock *ddvp when done with it. 4015 */ 4016 static void 4017 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4018 int *mirrorcntp, char *fname, fhandle_t *fhp) 4019 { 4020 struct vattr va; 4021 struct ucred *tcred; 4022 char *buf; 4023 int buflen, error; 4024 4025 dvpp[0] = NULL; 4026 /* If not an exported regular file or not a pNFS server, just return. */ 4027 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4028 nfsrv_devidcnt == 0) 4029 return; 4030 4031 /* Check to see if this is the last hard link. */ 4032 tcred = newnfs_getcred(); 4033 error = VOP_GETATTR(vp, &va, tcred); 4034 NFSFREECRED(tcred); 4035 if (error != 0) { 4036 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4037 return; 4038 } 4039 if (va.va_nlink > 1) 4040 return; 4041 4042 error = nfsvno_getfh(vp, fhp, p); 4043 if (error != 0) { 4044 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4045 return; 4046 } 4047 4048 buflen = 1024; 4049 buf = malloc(buflen, M_TEMP, M_WAITOK); 4050 /* Get the directory vnode for the DS mount and the file handle. */ 4051 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4052 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4053 free(buf, M_TEMP); 4054 if (error != 0) 4055 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4056 } 4057 4058 /* 4059 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4060 * The arguments are in a structure, so that they can be passed through 4061 * taskqueue for a kernel process to execute this function. 4062 */ 4063 struct nfsrvdsremove { 4064 int done; 4065 int inprog; 4066 struct task tsk; 4067 struct ucred *tcred; 4068 struct vnode *dvp; 4069 NFSPROC_T *p; 4070 int err; 4071 char fname[PNFS_FILENAME_LEN + 1]; 4072 }; 4073 4074 static int 4075 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4076 NFSPROC_T *p) 4077 { 4078 struct nameidata named; 4079 struct vnode *nvp; 4080 char *bufp; 4081 u_long *hashp; 4082 int error; 4083 4084 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4085 if (error != 0) 4086 return (error); 4087 named.ni_cnd.cn_nameiop = DELETE; 4088 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4089 named.ni_cnd.cn_cred = tcred; 4090 named.ni_cnd.cn_thread = p; 4091 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 4092 nfsvno_setpathbuf(&named, &bufp, &hashp); 4093 named.ni_cnd.cn_nameptr = bufp; 4094 named.ni_cnd.cn_namelen = strlen(fname); 4095 strlcpy(bufp, fname, NAME_MAX); 4096 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4097 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4098 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4099 if (error == 0) { 4100 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4101 vput(nvp); 4102 } 4103 NFSVOPUNLOCK(dvp, 0); 4104 nfsvno_relpathbuf(&named); 4105 if (error != 0) 4106 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4107 return (error); 4108 } 4109 4110 /* 4111 * Start up the thread that will execute nfsrv_dsremove(). 4112 */ 4113 static void 4114 start_dsremove(void *arg, int pending) 4115 { 4116 struct nfsrvdsremove *dsrm; 4117 4118 dsrm = (struct nfsrvdsremove *)arg; 4119 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4120 dsrm->p); 4121 dsrm->done = 1; 4122 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4123 } 4124 4125 /* 4126 * Remove a pNFS data file from a Data Server. 4127 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4128 * removed to set up the dvp and fill in the FH. 4129 */ 4130 static void 4131 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4132 NFSPROC_T *p) 4133 { 4134 struct ucred *tcred; 4135 struct nfsrvdsremove *dsrm, *tdsrm; 4136 struct nfsdevice *ds; 4137 struct nfsmount *nmp; 4138 int failpos, i, ret, timo; 4139 4140 tcred = newnfs_getcred(); 4141 dsrm = NULL; 4142 if (mirrorcnt > 1) 4143 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4144 /* 4145 * Remove the file on each DS mirror, using kernel process(es) for the 4146 * additional mirrors. 4147 */ 4148 failpos = -1; 4149 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4150 tdsrm->tcred = tcred; 4151 tdsrm->p = p; 4152 tdsrm->dvp = dvp[i]; 4153 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4154 tdsrm->inprog = 0; 4155 tdsrm->done = 0; 4156 tdsrm->err = 0; 4157 ret = EIO; 4158 if (nfs_pnfsiothreads != 0) { 4159 ret = nfs_pnfsio(start_dsremove, tdsrm); 4160 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4161 } 4162 if (ret != 0) { 4163 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4164 if (failpos == -1 && nfsds_failerr(ret)) 4165 failpos = i; 4166 } 4167 } 4168 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4169 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4170 failpos = mirrorcnt - 1; 4171 timo = hz / 50; /* Wait for 20msec. */ 4172 if (timo < 1) 4173 timo = 1; 4174 /* Wait for kernel task(s) to complete. */ 4175 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4176 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4177 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4178 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4179 failpos = i; 4180 } 4181 4182 /* 4183 * If failpos has been set, that mirror has failed, so it needs 4184 * to be disabled. 4185 */ 4186 if (failpos >= 0) { 4187 nmp = VFSTONFS(dvp[failpos]->v_mount); 4188 NFSLOCKMNT(nmp); 4189 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4190 NFSMNTP_CANCELRPCS)) == 0) { 4191 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4192 NFSUNLOCKMNT(nmp); 4193 ds = nfsrv_deldsnmp(nmp, p); 4194 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4195 ds); 4196 if (ds != NULL) 4197 nfsrv_killrpcs(nmp); 4198 NFSLOCKMNT(nmp); 4199 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4200 wakeup(nmp); 4201 } 4202 NFSUNLOCKMNT(nmp); 4203 } 4204 4205 /* Get rid all layouts for the file. */ 4206 nfsrv_freefilelayouts(fhp); 4207 4208 NFSFREECRED(tcred); 4209 free(dsrm, M_TEMP); 4210 } 4211 4212 /* 4213 * Generate a file name based on the file handle and put it in *bufp. 4214 * Return the number of bytes generated. 4215 */ 4216 static int 4217 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4218 { 4219 int i; 4220 uint8_t *cp; 4221 const uint8_t *hexdigits = "0123456789abcdef"; 4222 4223 cp = (uint8_t *)fhp; 4224 for (i = 0; i < sizeof(*fhp); i++) { 4225 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4226 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4227 } 4228 bufp[2 * i] = '\0'; 4229 return (2 * i); 4230 } 4231 4232 /* 4233 * Update the Metadata file's attributes from the DS file when a Read/Write 4234 * layout is returned. 4235 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4236 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4237 */ 4238 int 4239 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4240 { 4241 struct ucred *tcred; 4242 int error; 4243 4244 /* Do this as root so that it won't fail with EACCES. */ 4245 tcred = newnfs_getcred(); 4246 error = nfsrv_proxyds(NULL, vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4247 NULL, NULL, NULL, nap, NULL); 4248 NFSFREECRED(tcred); 4249 return (error); 4250 } 4251 4252 /* 4253 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4254 */ 4255 static int 4256 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4257 NFSPROC_T *p) 4258 { 4259 int error; 4260 4261 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETACL, 4262 NULL, NULL, NULL, NULL, aclp); 4263 return (error); 4264 } 4265 4266 static int 4267 nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt, 4268 struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, 4269 char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) 4270 { 4271 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4272 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4273 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4274 struct nfsdevice *ds; 4275 struct pnfsdsattr dsattr; 4276 char *buf; 4277 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4278 4279 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4280 /* 4281 * If not a regular file, not exported or not a pNFS server, 4282 * just return ENOENT. 4283 */ 4284 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4285 nfsrv_devidcnt == 0) 4286 return (ENOENT); 4287 4288 buflen = 1024; 4289 buf = malloc(buflen, M_TEMP, M_WAITOK); 4290 error = 0; 4291 4292 /* 4293 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4294 * from the MetaData file's extended attribute. 4295 */ 4296 if (ioproc == NFSPROC_GETATTR) { 4297 error = vn_extattr_get(vp, IO_NODELOCKED, 4298 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4299 p); 4300 if (error == 0 && buflen != sizeof(dsattr)) 4301 error = ENXIO; 4302 if (error == 0) { 4303 NFSBCOPY(buf, &dsattr, buflen); 4304 nap->na_filerev = dsattr.dsa_filerev; 4305 nap->na_size = dsattr.dsa_size; 4306 nap->na_atime = dsattr.dsa_atime; 4307 nap->na_mtime = dsattr.dsa_mtime; 4308 4309 /* 4310 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4311 * returns 0, just return now. nfsrv_checkdsattr() 4312 * returns 0 if there is no Read/Write layout 4313 * plus either an Open/Write_access or Write 4314 * delegation issued to a client for the file. 4315 */ 4316 if (nfsrv_pnfsgetdsattr == 0 || 4317 nfsrv_checkdsattr(nd, vp, p) == 0) { 4318 free(buf, M_TEMP); 4319 return (error); 4320 } 4321 } 4322 4323 /* 4324 * Clear ENOATTR so the code below will attempt to do a 4325 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4326 * the extended attribute. 4327 */ 4328 if (error == ENOATTR) 4329 error = 0; 4330 } 4331 4332 origmircnt = -1; 4333 trycnt = 0; 4334 tryagain: 4335 if (error == 0) { 4336 buflen = 1024; 4337 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4338 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4339 NULL, NULL); 4340 if (error == 0) { 4341 for (i = 0; i < mirrorcnt; i++) 4342 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4343 } else 4344 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4345 } else 4346 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4347 if (error == 0) { 4348 failpos = -1; 4349 if (origmircnt == -1) 4350 origmircnt = mirrorcnt; 4351 /* 4352 * If failpos is set to a mirror#, then that mirror has 4353 * failed and will be disabled. For Read and Getattr, the 4354 * function only tries one mirror, so if that mirror has 4355 * failed, it will need to be retried. As such, increment 4356 * tryitagain for these cases. 4357 * For Write, Setattr and Setacl, the function tries all 4358 * mirrors and will not return an error for the case where 4359 * one mirror has failed. For these cases, the functioning 4360 * mirror(s) will have been modified, so a retry isn't 4361 * necessary. These functions will set failpos for the 4362 * failed mirror#. 4363 */ 4364 if (ioproc == NFSPROC_READDS) { 4365 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4366 mpp, mpp2); 4367 if (nfsds_failerr(error) && mirrorcnt > 1) { 4368 /* 4369 * Setting failpos will cause the mirror 4370 * to be disabled and then a retry of this 4371 * read is required. 4372 */ 4373 failpos = 0; 4374 error = 0; 4375 trycnt++; 4376 } 4377 } else if (ioproc == NFSPROC_WRITEDS) 4378 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4379 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4380 else if (ioproc == NFSPROC_SETATTR) 4381 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4382 mirrorcnt, nap, &failpos); 4383 else if (ioproc == NFSPROC_SETACL) 4384 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4385 mirrorcnt, aclp, &failpos); 4386 else { 4387 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4388 vp, nmp[mirrorcnt - 1], nap); 4389 if (nfsds_failerr(error) && mirrorcnt > 1) { 4390 /* 4391 * Setting failpos will cause the mirror 4392 * to be disabled and then a retry of this 4393 * getattr is required. 4394 */ 4395 failpos = mirrorcnt - 1; 4396 error = 0; 4397 trycnt++; 4398 } 4399 } 4400 ds = NULL; 4401 if (failpos >= 0) { 4402 failnmp = nmp[failpos]; 4403 NFSLOCKMNT(failnmp); 4404 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4405 NFSMNTP_CANCELRPCS)) == 0) { 4406 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4407 NFSUNLOCKMNT(failnmp); 4408 ds = nfsrv_deldsnmp(failnmp, p); 4409 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 4410 failpos, ds); 4411 if (ds != NULL) 4412 nfsrv_killrpcs(failnmp); 4413 NFSLOCKMNT(failnmp); 4414 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4415 wakeup(failnmp); 4416 } 4417 NFSUNLOCKMNT(failnmp); 4418 } 4419 for (i = 0; i < mirrorcnt; i++) 4420 NFSVOPUNLOCK(dvp[i], 0); 4421 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 4422 trycnt); 4423 /* Try the Read/Getattr again if a mirror was deleted. */ 4424 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 4425 goto tryagain; 4426 } else { 4427 /* Return ENOENT for any Extended Attribute error. */ 4428 error = ENOENT; 4429 } 4430 free(buf, M_TEMP); 4431 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 4432 return (error); 4433 } 4434 4435 /* 4436 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 4437 * attribute. 4438 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 4439 * to be checked. If it points to a NULL nmp, then it returns 4440 * a suitable destination. 4441 * curnmp - If non-NULL, it is the source mount for the copy. 4442 */ 4443 int 4444 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 4445 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 4446 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 4447 struct nfsmount *curnmp, int *ippos, int *dsdirp) 4448 { 4449 struct vnode *dvp, *nvp, **tdvpp; 4450 struct nfsmount *nmp, *newnmp; 4451 struct sockaddr *sad; 4452 struct sockaddr_in *sin; 4453 struct nfsdevice *ds, *fndds; 4454 struct pnfsdsfile *pf; 4455 uint32_t dsdir; 4456 int error, fhiszero, fnd, gotone, i, mirrorcnt; 4457 4458 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 4459 *mirrorcntp = 1; 4460 tdvpp = dvpp; 4461 if (nvpp != NULL) 4462 *nvpp = NULL; 4463 if (dvpp != NULL) 4464 *dvpp = NULL; 4465 if (ippos != NULL) 4466 *ippos = -1; 4467 if (newnmpp != NULL) 4468 newnmp = *newnmpp; 4469 else 4470 newnmp = NULL; 4471 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 4472 "pnfsd.dsfile", buflenp, buf, p); 4473 mirrorcnt = *buflenp / sizeof(*pf); 4474 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 4475 *buflenp != sizeof(*pf) * mirrorcnt)) 4476 error = ENOATTR; 4477 4478 pf = (struct pnfsdsfile *)buf; 4479 /* If curnmp != NULL, check for a match in the mirror list. */ 4480 if (curnmp != NULL && error == 0) { 4481 fnd = 0; 4482 for (i = 0; i < mirrorcnt; i++, pf++) { 4483 sad = (struct sockaddr *)&pf->dsf_sin; 4484 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 4485 if (ippos != NULL) 4486 *ippos = i; 4487 fnd = 1; 4488 break; 4489 } 4490 } 4491 if (fnd == 0) 4492 error = ENXIO; 4493 } 4494 4495 gotone = 0; 4496 pf = (struct pnfsdsfile *)buf; 4497 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 4498 error); 4499 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 4500 fhiszero = 0; 4501 sad = (struct sockaddr *)&pf->dsf_sin; 4502 sin = &pf->dsf_sin; 4503 dsdir = pf->dsf_dir; 4504 if (dsdir >= nfsrv_dsdirsize) { 4505 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 4506 error = ENOATTR; 4507 } else if (nvpp != NULL && newnmp != NULL && 4508 nfsaddr2_match(sad, newnmp->nm_nam)) 4509 error = EEXIST; 4510 if (error == 0) { 4511 if (ippos != NULL && curnmp == NULL && 4512 sad->sa_family == AF_INET && 4513 sin->sin_addr.s_addr == 0) 4514 *ippos = i; 4515 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 4516 fhiszero = 1; 4517 /* Use the socket address to find the mount point. */ 4518 fndds = NULL; 4519 NFSDDSLOCK(); 4520 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 4521 if (ds->nfsdev_nmp != NULL) { 4522 dvp = ds->nfsdev_dvp; 4523 nmp = VFSTONFS(dvp->v_mount); 4524 if (nmp != ds->nfsdev_nmp) 4525 printf("different2 nmp %p %p\n", 4526 nmp, ds->nfsdev_nmp); 4527 if (nfsaddr2_match(sad, nmp->nm_nam)) 4528 fndds = ds; 4529 else if (newnmpp != NULL && 4530 newnmp == NULL && 4531 (*newnmpp == NULL || fndds == NULL)) 4532 /* 4533 * Return a destination for the 4534 * copy in newnmpp. Choose the 4535 * last valid one before the 4536 * source mirror, so it isn't 4537 * always the first one. 4538 */ 4539 *newnmpp = nmp; 4540 } 4541 } 4542 NFSDDSUNLOCK(); 4543 if (fndds != NULL) { 4544 dvp = fndds->nfsdev_dsdir[dsdir]; 4545 if (lktype != 0 || fhiszero != 0 || 4546 (nvpp != NULL && *nvpp == NULL)) { 4547 if (fhiszero != 0) 4548 error = vn_lock(dvp, 4549 LK_EXCLUSIVE); 4550 else if (lktype != 0) 4551 error = vn_lock(dvp, lktype); 4552 else 4553 error = vn_lock(dvp, LK_SHARED); 4554 /* 4555 * If the file handle is all 0's, try to 4556 * do a Lookup against the DS to acquire 4557 * it. 4558 * If dvpp == NULL or the Lookup fails, 4559 * unlock dvp after the call. 4560 */ 4561 if (error == 0 && (fhiszero != 0 || 4562 (nvpp != NULL && *nvpp == NULL))) { 4563 error = nfsrv_pnfslookupds(vp, 4564 dvp, pf, &nvp, p); 4565 if (error == 0) { 4566 if (fhiszero != 0) 4567 nfsrv_pnfssetfh( 4568 vp, pf, 4569 nvp, p); 4570 if (nvpp != NULL && 4571 *nvpp == NULL) { 4572 *nvpp = nvp; 4573 *dsdirp = dsdir; 4574 } else 4575 vput(nvp); 4576 } 4577 if (error != 0 || lktype == 0) 4578 NFSVOPUNLOCK(dvp, 0); 4579 } 4580 } 4581 if (error == 0) { 4582 gotone++; 4583 NFSD_DEBUG(4, "gotone=%d\n", gotone); 4584 if (devid != NULL) { 4585 NFSBCOPY(fndds->nfsdev_deviceid, 4586 devid, NFSX_V4DEVICEID); 4587 devid += NFSX_V4DEVICEID; 4588 } 4589 if (dvpp != NULL) 4590 *tdvpp++ = dvp; 4591 if (fhp != NULL) 4592 NFSBCOPY(&pf->dsf_fh, fhp++, 4593 NFSX_MYFH); 4594 if (fnamep != NULL && gotone == 1) 4595 strlcpy(fnamep, 4596 pf->dsf_filename, 4597 sizeof(pf->dsf_filename)); 4598 } else 4599 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 4600 "err=%d\n", error); 4601 } 4602 } 4603 } 4604 if (error == 0 && gotone == 0) 4605 error = ENOENT; 4606 4607 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 4608 error); 4609 if (error == 0) 4610 *mirrorcntp = gotone; 4611 else { 4612 if (gotone > 0 && dvpp != NULL) { 4613 /* 4614 * If the error didn't occur on the first one and 4615 * dvpp != NULL, the one(s) prior to the failure will 4616 * have locked dvp's that need to be unlocked. 4617 */ 4618 for (i = 0; i < gotone; i++) { 4619 NFSVOPUNLOCK(*dvpp, 0); 4620 *dvpp++ = NULL; 4621 } 4622 } 4623 /* 4624 * If it found the vnode to be copied from before a failure, 4625 * it needs to be vput()'d. 4626 */ 4627 if (nvpp != NULL && *nvpp != NULL) { 4628 vput(*nvpp); 4629 *nvpp = NULL; 4630 } 4631 } 4632 return (error); 4633 } 4634 4635 /* 4636 * Set the extended attribute for the Change attribute. 4637 */ 4638 static int 4639 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4640 { 4641 struct pnfsdsattr dsattr; 4642 struct mount *mp; 4643 int error; 4644 4645 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 4646 error = vn_start_write(vp, &mp, V_WAIT); 4647 if (error == 0) { 4648 dsattr.dsa_filerev = nap->na_filerev; 4649 dsattr.dsa_size = nap->na_size; 4650 dsattr.dsa_atime = nap->na_atime; 4651 dsattr.dsa_mtime = nap->na_mtime; 4652 error = vn_extattr_set(vp, IO_NODELOCKED, 4653 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4654 sizeof(dsattr), (char *)&dsattr, p); 4655 vn_finished_write(mp); 4656 } 4657 if (error != 0) 4658 printf("pNFS: setextattr=%d\n", error); 4659 return (error); 4660 } 4661 4662 static int 4663 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 4664 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 4665 { 4666 uint32_t *tl; 4667 struct nfsrv_descript *nd; 4668 nfsv4stateid_t st; 4669 struct mbuf *m, *m2; 4670 int error = 0, retlen, tlen, trimlen; 4671 4672 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 4673 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4674 *mpp = NULL; 4675 /* 4676 * Use a stateid where other is an alternating 01010 pattern and 4677 * seqid is 0xffffffff. This value is not defined as special by 4678 * the RFC and is used by the FreeBSD NFS server to indicate an 4679 * MDS->DS proxy operation. 4680 */ 4681 st.other[0] = 0x55555555; 4682 st.other[1] = 0x55555555; 4683 st.other[2] = 0x55555555; 4684 st.seqid = 0xffffffff; 4685 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 4686 NULL, NULL, 0, 0); 4687 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 4688 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 4689 txdr_hyper(off, tl); 4690 *(tl + 2) = txdr_unsigned(len); 4691 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 4692 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 4693 if (error != 0) { 4694 free(nd, M_TEMP); 4695 return (error); 4696 } 4697 if (nd->nd_repstat == 0) { 4698 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 4699 NFSM_STRSIZ(retlen, len); 4700 if (retlen > 0) { 4701 /* Trim off the pre-data XDR from the mbuf chain. */ 4702 m = nd->nd_mrep; 4703 while (m != NULL && m != nd->nd_md) { 4704 if (m->m_next == nd->nd_md) { 4705 m->m_next = NULL; 4706 m_freem(nd->nd_mrep); 4707 nd->nd_mrep = m = nd->nd_md; 4708 } else 4709 m = m->m_next; 4710 } 4711 if (m == NULL) { 4712 printf("nfsrv_readdsrpc: busted mbuf list\n"); 4713 error = ENOENT; 4714 goto nfsmout; 4715 } 4716 4717 /* 4718 * Now, adjust first mbuf so that any XDR before the 4719 * read data is skipped over. 4720 */ 4721 trimlen = nd->nd_dpos - mtod(m, char *); 4722 if (trimlen > 0) { 4723 m->m_len -= trimlen; 4724 NFSM_DATAP(m, trimlen); 4725 } 4726 4727 /* 4728 * Truncate the mbuf chain at retlen bytes of data, 4729 * plus XDR padding that brings the length up to a 4730 * multiple of 4. 4731 */ 4732 tlen = NFSM_RNDUP(retlen); 4733 do { 4734 if (m->m_len >= tlen) { 4735 m->m_len = tlen; 4736 tlen = 0; 4737 m2 = m->m_next; 4738 m->m_next = NULL; 4739 m_freem(m2); 4740 break; 4741 } 4742 tlen -= m->m_len; 4743 m = m->m_next; 4744 } while (m != NULL); 4745 if (tlen > 0) { 4746 printf("nfsrv_readdsrpc: busted mbuf list\n"); 4747 error = ENOENT; 4748 goto nfsmout; 4749 } 4750 *mpp = nd->nd_mrep; 4751 *mpendp = m; 4752 nd->nd_mrep = NULL; 4753 } 4754 } else 4755 error = nd->nd_repstat; 4756 nfsmout: 4757 /* If nd->nd_mrep is already NULL, this is a no-op. */ 4758 m_freem(nd->nd_mrep); 4759 free(nd, M_TEMP); 4760 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 4761 return (error); 4762 } 4763 4764 /* 4765 * Do a write RPC on a DS data file, using this structure for the arguments, 4766 * so that this function can be executed by a separate kernel process. 4767 */ 4768 struct nfsrvwritedsdorpc { 4769 int done; 4770 int inprog; 4771 struct task tsk; 4772 fhandle_t fh; 4773 off_t off; 4774 int len; 4775 struct nfsmount *nmp; 4776 struct ucred *cred; 4777 NFSPROC_T *p; 4778 struct mbuf *m; 4779 int err; 4780 }; 4781 4782 static int 4783 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 4784 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 4785 { 4786 uint32_t *tl; 4787 struct nfsrv_descript *nd; 4788 nfsattrbit_t attrbits; 4789 nfsv4stateid_t st; 4790 int commit, error, retlen; 4791 4792 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4793 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 4794 sizeof(fhandle_t), NULL, NULL, 0, 0); 4795 4796 /* 4797 * Use a stateid where other is an alternating 01010 pattern and 4798 * seqid is 0xffffffff. This value is not defined as special by 4799 * the RFC and is used by the FreeBSD NFS server to indicate an 4800 * MDS->DS proxy operation. 4801 */ 4802 st.other[0] = 0x55555555; 4803 st.other[1] = 0x55555555; 4804 st.other[2] = 0x55555555; 4805 st.seqid = 0xffffffff; 4806 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 4807 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 4808 txdr_hyper(off, tl); 4809 tl += 2; 4810 /* 4811 * Do all writes FileSync, since the server doesn't hold onto dirty 4812 * buffers. Since clients should be accessing the DS servers directly 4813 * using the pNFS layouts, this just needs to work correctly as a 4814 * fallback. 4815 */ 4816 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 4817 *tl = txdr_unsigned(len); 4818 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 4819 4820 /* Put data in mbuf chain. */ 4821 nd->nd_mb->m_next = m; 4822 4823 /* Set nd_mb and nd_bpos to end of data. */ 4824 while (m->m_next != NULL) 4825 m = m->m_next; 4826 nd->nd_mb = m; 4827 nd->nd_bpos = mtod(m, char *) + m->m_len; 4828 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 4829 4830 /* Do a Getattr for Size, Change and Modify Time. */ 4831 NFSZERO_ATTRBIT(&attrbits); 4832 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 4833 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 4834 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 4835 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 4836 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 4837 *tl = txdr_unsigned(NFSV4OP_GETATTR); 4838 (void) nfsrv_putattrbit(nd, &attrbits); 4839 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 4840 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 4841 if (error != 0) { 4842 free(nd, M_TEMP); 4843 return (error); 4844 } 4845 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 4846 /* Get rid of weak cache consistency data for now. */ 4847 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 4848 (ND_NFSV4 | ND_V4WCCATTR)) { 4849 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 4850 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 4851 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 4852 if (error != 0) 4853 goto nfsmout; 4854 /* 4855 * Get rid of Op# and status for next op. 4856 */ 4857 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4858 if (*++tl != 0) 4859 nd->nd_flag |= ND_NOMOREDATA; 4860 } 4861 if (nd->nd_repstat == 0) { 4862 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 4863 retlen = fxdr_unsigned(int, *tl++); 4864 commit = fxdr_unsigned(int, *tl); 4865 if (commit != NFSWRITE_FILESYNC) 4866 error = NFSERR_IO; 4867 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 4868 retlen, commit, error); 4869 } else 4870 error = nd->nd_repstat; 4871 /* We have no use for the Write Verifier since we use FileSync. */ 4872 4873 /* 4874 * Get the Change, Size, Access Time and Modify Time attributes and set 4875 * on the Metadata file, so its attributes will be what the file's 4876 * would be if it had been written. 4877 */ 4878 if (error == 0) { 4879 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4880 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 4881 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 4882 } 4883 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 4884 nfsmout: 4885 m_freem(nd->nd_mrep); 4886 free(nd, M_TEMP); 4887 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 4888 return (error); 4889 } 4890 4891 /* 4892 * Start up the thread that will execute nfsrv_writedsdorpc(). 4893 */ 4894 static void 4895 start_writedsdorpc(void *arg, int pending) 4896 { 4897 struct nfsrvwritedsdorpc *drpc; 4898 4899 drpc = (struct nfsrvwritedsdorpc *)arg; 4900 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 4901 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 4902 drpc->done = 1; 4903 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 4904 } 4905 4906 static int 4907 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 4908 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 4909 struct mbuf **mpp, char *cp, int *failposp) 4910 { 4911 struct nfsrvwritedsdorpc *drpc, *tdrpc; 4912 struct nfsvattr na; 4913 struct mbuf *m; 4914 int error, i, offs, ret, timo; 4915 4916 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 4917 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 4918 drpc = NULL; 4919 if (mirrorcnt > 1) 4920 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 4921 M_WAITOK); 4922 4923 /* Calculate offset in mbuf chain that data starts. */ 4924 offs = cp - mtod(*mpp, char *); 4925 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 4926 4927 /* 4928 * Do the write RPC for every DS, using a separate kernel process 4929 * for every DS except the last one. 4930 */ 4931 error = 0; 4932 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 4933 tdrpc->done = 0; 4934 tdrpc->fh = *fhp; 4935 tdrpc->off = off; 4936 tdrpc->len = len; 4937 tdrpc->nmp = *nmpp; 4938 tdrpc->cred = cred; 4939 tdrpc->p = p; 4940 tdrpc->inprog = 0; 4941 tdrpc->err = 0; 4942 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 4943 ret = EIO; 4944 if (nfs_pnfsiothreads != 0) { 4945 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 4946 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 4947 ret); 4948 } 4949 if (ret != 0) { 4950 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 4951 tdrpc->m, cred, p); 4952 if (nfsds_failerr(ret) && *failposp == -1) 4953 *failposp = i; 4954 else if (error == 0 && ret != 0) 4955 error = ret; 4956 } 4957 nmpp++; 4958 fhp++; 4959 } 4960 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 4961 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 4962 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 4963 *failposp = mirrorcnt - 1; 4964 else if (error == 0 && ret != 0) 4965 error = ret; 4966 if (error == 0) 4967 error = nfsrv_setextattr(vp, &na, p); 4968 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 4969 tdrpc = drpc; 4970 timo = hz / 50; /* Wait for 20msec. */ 4971 if (timo < 1) 4972 timo = 1; 4973 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 4974 /* Wait for RPCs on separate threads to complete. */ 4975 while (tdrpc->inprog != 0 && tdrpc->done == 0) 4976 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 4977 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 4978 *failposp = i; 4979 else if (error == 0 && tdrpc->err != 0) 4980 error = tdrpc->err; 4981 } 4982 free(drpc, M_TEMP); 4983 return (error); 4984 } 4985 4986 static int 4987 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 4988 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 4989 struct nfsvattr *dsnap) 4990 { 4991 uint32_t *tl; 4992 struct nfsrv_descript *nd; 4993 nfsv4stateid_t st; 4994 nfsattrbit_t attrbits; 4995 int error; 4996 4997 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 4998 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 4999 /* 5000 * Use a stateid where other is an alternating 01010 pattern and 5001 * seqid is 0xffffffff. This value is not defined as special by 5002 * the RFC and is used by the FreeBSD NFS server to indicate an 5003 * MDS->DS proxy operation. 5004 */ 5005 st.other[0] = 0x55555555; 5006 st.other[1] = 0x55555555; 5007 st.other[2] = 0x55555555; 5008 st.seqid = 0xffffffff; 5009 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5010 NULL, NULL, 0, 0); 5011 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5012 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5013 5014 /* Do a Getattr for Size, Change, Access Time and Modify Time. */ 5015 NFSZERO_ATTRBIT(&attrbits); 5016 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5017 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5018 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5019 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5020 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5021 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5022 (void) nfsrv_putattrbit(nd, &attrbits); 5023 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5024 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5025 if (error != 0) { 5026 free(nd, M_TEMP); 5027 return (error); 5028 } 5029 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5030 nd->nd_repstat); 5031 /* Get rid of weak cache consistency data for now. */ 5032 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5033 (ND_NFSV4 | ND_V4WCCATTR)) { 5034 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5035 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5036 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5037 if (error != 0) 5038 goto nfsmout; 5039 /* 5040 * Get rid of Op# and status for next op. 5041 */ 5042 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5043 if (*++tl != 0) 5044 nd->nd_flag |= ND_NOMOREDATA; 5045 } 5046 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 5047 if (error != 0) 5048 goto nfsmout; 5049 if (nd->nd_repstat != 0) 5050 error = nd->nd_repstat; 5051 /* 5052 * Get the Change, Size, Access Time and Modify Time attributes and set 5053 * on the Metadata file, so its attributes will be what the file's 5054 * would be if it had been written. 5055 */ 5056 if (error == 0) { 5057 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5058 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5059 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5060 } 5061 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 5062 nfsmout: 5063 m_freem(nd->nd_mrep); 5064 free(nd, M_TEMP); 5065 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 5066 return (error); 5067 } 5068 5069 struct nfsrvsetattrdsdorpc { 5070 int done; 5071 int inprog; 5072 struct task tsk; 5073 fhandle_t fh; 5074 struct nfsmount *nmp; 5075 struct vnode *vp; 5076 struct ucred *cred; 5077 NFSPROC_T *p; 5078 struct nfsvattr na; 5079 struct nfsvattr dsna; 5080 int err; 5081 }; 5082 5083 /* 5084 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 5085 */ 5086 static void 5087 start_setattrdsdorpc(void *arg, int pending) 5088 { 5089 struct nfsrvsetattrdsdorpc *drpc; 5090 5091 drpc = (struct nfsrvsetattrdsdorpc *)arg; 5092 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 5093 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 5094 drpc->done = 1; 5095 } 5096 5097 static int 5098 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5099 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5100 struct nfsvattr *nap, int *failposp) 5101 { 5102 struct nfsrvsetattrdsdorpc *drpc, *tdrpc; 5103 struct nfsvattr na; 5104 int error, i, ret, timo; 5105 5106 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 5107 drpc = NULL; 5108 if (mirrorcnt > 1) 5109 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5110 M_WAITOK); 5111 5112 /* 5113 * Do the setattr RPC for every DS, using a separate kernel process 5114 * for every DS except the last one. 5115 */ 5116 error = 0; 5117 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5118 tdrpc->done = 0; 5119 tdrpc->inprog = 0; 5120 tdrpc->fh = *fhp; 5121 tdrpc->nmp = *nmpp; 5122 tdrpc->vp = vp; 5123 tdrpc->cred = cred; 5124 tdrpc->p = p; 5125 tdrpc->na = *nap; 5126 tdrpc->err = 0; 5127 ret = EIO; 5128 if (nfs_pnfsiothreads != 0) { 5129 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 5130 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 5131 ret); 5132 } 5133 if (ret != 0) { 5134 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 5135 &na); 5136 if (nfsds_failerr(ret) && *failposp == -1) 5137 *failposp = i; 5138 else if (error == 0 && ret != 0) 5139 error = ret; 5140 } 5141 nmpp++; 5142 fhp++; 5143 } 5144 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 5145 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5146 *failposp = mirrorcnt - 1; 5147 else if (error == 0 && ret != 0) 5148 error = ret; 5149 if (error == 0) 5150 error = nfsrv_setextattr(vp, &na, p); 5151 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 5152 tdrpc = drpc; 5153 timo = hz / 50; /* Wait for 20msec. */ 5154 if (timo < 1) 5155 timo = 1; 5156 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5157 /* Wait for RPCs on separate threads to complete. */ 5158 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5159 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 5160 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5161 *failposp = i; 5162 else if (error == 0 && tdrpc->err != 0) 5163 error = tdrpc->err; 5164 } 5165 free(drpc, M_TEMP); 5166 return (error); 5167 } 5168 5169 /* 5170 * Do a Setattr of an NFSv4 ACL on the DS file. 5171 */ 5172 static int 5173 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5174 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 5175 { 5176 struct nfsrv_descript *nd; 5177 nfsv4stateid_t st; 5178 nfsattrbit_t attrbits; 5179 int error; 5180 5181 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 5182 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5183 /* 5184 * Use a stateid where other is an alternating 01010 pattern and 5185 * seqid is 0xffffffff. This value is not defined as special by 5186 * the RFC and is used by the FreeBSD NFS server to indicate an 5187 * MDS->DS proxy operation. 5188 */ 5189 st.other[0] = 0x55555555; 5190 st.other[1] = 0x55555555; 5191 st.other[2] = 0x55555555; 5192 st.seqid = 0xffffffff; 5193 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5194 NULL, NULL, 0, 0); 5195 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5196 NFSZERO_ATTRBIT(&attrbits); 5197 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 5198 /* 5199 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 5200 * so passing in the metadata "vp" will be ok, since it is of 5201 * the same type (VREG). 5202 */ 5203 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 5204 NULL, 0, 0, 0, 0, 0, NULL); 5205 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5206 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5207 if (error != 0) { 5208 free(nd, M_TEMP); 5209 return (error); 5210 } 5211 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 5212 nd->nd_repstat); 5213 error = nd->nd_repstat; 5214 m_freem(nd->nd_mrep); 5215 free(nd, M_TEMP); 5216 return (error); 5217 } 5218 5219 struct nfsrvsetacldsdorpc { 5220 int done; 5221 int inprog; 5222 struct task tsk; 5223 fhandle_t fh; 5224 struct nfsmount *nmp; 5225 struct vnode *vp; 5226 struct ucred *cred; 5227 NFSPROC_T *p; 5228 struct acl *aclp; 5229 int err; 5230 }; 5231 5232 /* 5233 * Start up the thread that will execute nfsrv_setacldsdorpc(). 5234 */ 5235 static void 5236 start_setacldsdorpc(void *arg, int pending) 5237 { 5238 struct nfsrvsetacldsdorpc *drpc; 5239 5240 drpc = (struct nfsrvsetacldsdorpc *)arg; 5241 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 5242 drpc->vp, drpc->nmp, drpc->aclp); 5243 drpc->done = 1; 5244 } 5245 5246 static int 5247 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5248 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 5249 int *failposp) 5250 { 5251 struct nfsrvsetacldsdorpc *drpc, *tdrpc; 5252 int error, i, ret, timo; 5253 5254 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 5255 drpc = NULL; 5256 if (mirrorcnt > 1) 5257 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5258 M_WAITOK); 5259 5260 /* 5261 * Do the setattr RPC for every DS, using a separate kernel process 5262 * for every DS except the last one. 5263 */ 5264 error = 0; 5265 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5266 tdrpc->done = 0; 5267 tdrpc->inprog = 0; 5268 tdrpc->fh = *fhp; 5269 tdrpc->nmp = *nmpp; 5270 tdrpc->vp = vp; 5271 tdrpc->cred = cred; 5272 tdrpc->p = p; 5273 tdrpc->aclp = aclp; 5274 tdrpc->err = 0; 5275 ret = EIO; 5276 if (nfs_pnfsiothreads != 0) { 5277 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 5278 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 5279 ret); 5280 } 5281 if (ret != 0) { 5282 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 5283 aclp); 5284 if (nfsds_failerr(ret) && *failposp == -1) 5285 *failposp = i; 5286 else if (error == 0 && ret != 0) 5287 error = ret; 5288 } 5289 nmpp++; 5290 fhp++; 5291 } 5292 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 5293 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5294 *failposp = mirrorcnt - 1; 5295 else if (error == 0 && ret != 0) 5296 error = ret; 5297 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 5298 tdrpc = drpc; 5299 timo = hz / 50; /* Wait for 20msec. */ 5300 if (timo < 1) 5301 timo = 1; 5302 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5303 /* Wait for RPCs on separate threads to complete. */ 5304 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5305 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 5306 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5307 *failposp = i; 5308 else if (error == 0 && tdrpc->err != 0) 5309 error = tdrpc->err; 5310 } 5311 free(drpc, M_TEMP); 5312 return (error); 5313 } 5314 5315 /* 5316 * Getattr call to the DS for the Modify, Size and Change attributes. 5317 */ 5318 static int 5319 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5320 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 5321 { 5322 struct nfsrv_descript *nd; 5323 int error; 5324 nfsattrbit_t attrbits; 5325 5326 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 5327 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5328 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 5329 sizeof(fhandle_t), NULL, NULL, 0, 0); 5330 NFSZERO_ATTRBIT(&attrbits); 5331 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5332 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5333 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5334 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5335 (void) nfsrv_putattrbit(nd, &attrbits); 5336 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5337 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5338 if (error != 0) { 5339 free(nd, M_TEMP); 5340 return (error); 5341 } 5342 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 5343 nd->nd_repstat); 5344 if (nd->nd_repstat == 0) { 5345 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 5346 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 5347 NULL, NULL); 5348 /* 5349 * We can only save the updated values in the extended 5350 * attribute if the vp is exclusively locked. 5351 * This should happen when any of the following operations 5352 * occur on the vnode: 5353 * Close, Delegreturn, LayoutCommit, LayoutReturn 5354 * As such, the updated extended attribute should get saved 5355 * before nfsrv_checkdsattr() returns 0 and allows the cached 5356 * attributes to be returned without calling this function. 5357 */ 5358 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 5359 error = nfsrv_setextattr(vp, nap, p); 5360 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 5361 error); 5362 } 5363 } else 5364 error = nd->nd_repstat; 5365 m_freem(nd->nd_mrep); 5366 free(nd, M_TEMP); 5367 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 5368 return (error); 5369 } 5370 5371 /* 5372 * Get the device id and file handle for a DS file. 5373 */ 5374 int 5375 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 5376 fhandle_t *fhp, char *devid) 5377 { 5378 int buflen, error; 5379 char *buf; 5380 5381 buflen = 1024; 5382 buf = malloc(buflen, M_TEMP, M_WAITOK); 5383 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 5384 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 5385 free(buf, M_TEMP); 5386 return (error); 5387 } 5388 5389 /* 5390 * Do a Lookup against the DS for the filename. 5391 */ 5392 static int 5393 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 5394 struct vnode **nvpp, NFSPROC_T *p) 5395 { 5396 struct nameidata named; 5397 struct ucred *tcred; 5398 char *bufp; 5399 u_long *hashp; 5400 struct vnode *nvp; 5401 int error; 5402 5403 tcred = newnfs_getcred(); 5404 named.ni_cnd.cn_nameiop = LOOKUP; 5405 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 5406 named.ni_cnd.cn_cred = tcred; 5407 named.ni_cnd.cn_thread = p; 5408 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 5409 nfsvno_setpathbuf(&named, &bufp, &hashp); 5410 named.ni_cnd.cn_nameptr = bufp; 5411 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 5412 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 5413 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 5414 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 5415 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 5416 NFSFREECRED(tcred); 5417 nfsvno_relpathbuf(&named); 5418 if (error == 0) 5419 *nvpp = nvp; 5420 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 5421 return (error); 5422 } 5423 5424 /* 5425 * Set the file handle to the correct one. 5426 */ 5427 static void 5428 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *nvp, 5429 NFSPROC_T *p) 5430 { 5431 struct mount *mp; 5432 struct nfsnode *np; 5433 int ret; 5434 5435 np = VTONFS(nvp); 5436 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 5437 /* 5438 * We can only do a setextattr for an exclusively 5439 * locked vp. Instead of trying to upgrade a shared 5440 * lock, just leave dsf_fh zeroed out and it will 5441 * keep doing this lookup until it is done with an 5442 * exclusively locked vp. 5443 */ 5444 if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) { 5445 ret = vn_start_write(vp, &mp, V_WAIT); 5446 NFSD_DEBUG(4, "nfsrv_pnfssetfh: vn_start_write=%d\n", 5447 ret); 5448 if (ret == 0) { 5449 ret = vn_extattr_set(vp, IO_NODELOCKED, 5450 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 5451 sizeof(*pf), (char *)pf, p); 5452 vn_finished_write(mp); 5453 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft " 5454 "vn_extattr_set=%d\n", ret); 5455 } 5456 } 5457 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 5458 } 5459 5460 /* 5461 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 5462 * when the DS has failed. 5463 */ 5464 void 5465 nfsrv_killrpcs(struct nfsmount *nmp) 5466 { 5467 5468 /* 5469 * Call newnfs_nmcancelreqs() to cause 5470 * any RPCs in progress on the mount point to 5471 * fail. 5472 * This will cause any process waiting for an 5473 * RPC to complete while holding a vnode lock 5474 * on the mounted-on vnode (such as "df" or 5475 * a non-forced "umount") to fail. 5476 * This will unlock the mounted-on vnode so 5477 * a forced dismount can succeed. 5478 * The NFSMNTP_CANCELRPCS flag should be set when this function is 5479 * called. 5480 */ 5481 newnfs_nmcancelreqs(nmp); 5482 } 5483 5484 /* 5485 * Sum up the statfs info for each of the DSs, so that the client will 5486 * receive the total for all DSs. 5487 */ 5488 static int 5489 nfsrv_pnfsstatfs(struct statfs *sf) 5490 { 5491 struct statfs *tsf; 5492 struct nfsdevice *ds; 5493 struct vnode **dvpp, **tdvpp, *dvp; 5494 uint64_t tot; 5495 int cnt, error = 0, i; 5496 5497 if (nfsrv_devidcnt <= 0) 5498 return (ENXIO); 5499 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 5500 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 5501 5502 /* Get an array of the dvps for the DSs. */ 5503 tdvpp = dvpp; 5504 i = 0; 5505 NFSDDSLOCK(); 5506 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 5507 if (ds->nfsdev_nmp != NULL) { 5508 if (++i > nfsrv_devidcnt) 5509 break; 5510 *tdvpp++ = ds->nfsdev_dvp; 5511 } 5512 } 5513 NFSDDSUNLOCK(); 5514 cnt = i; 5515 5516 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 5517 tdvpp = dvpp; 5518 for (i = 0; i < cnt && error == 0; i++) { 5519 dvp = *tdvpp++; 5520 error = VFS_STATFS(dvp->v_mount, tsf); 5521 if (error == 0) { 5522 if (sf->f_bsize == 0) { 5523 if (tsf->f_bsize > 0) 5524 sf->f_bsize = tsf->f_bsize; 5525 else 5526 sf->f_bsize = 8192; 5527 } 5528 if (tsf->f_blocks > 0) { 5529 if (sf->f_bsize != tsf->f_bsize) { 5530 tot = tsf->f_blocks * tsf->f_bsize; 5531 sf->f_blocks += (tot / sf->f_bsize); 5532 } else 5533 sf->f_blocks += tsf->f_blocks; 5534 } 5535 if (tsf->f_bfree > 0) { 5536 if (sf->f_bsize != tsf->f_bsize) { 5537 tot = tsf->f_bfree * tsf->f_bsize; 5538 sf->f_bfree += (tot / sf->f_bsize); 5539 } else 5540 sf->f_bfree += tsf->f_bfree; 5541 } 5542 if (tsf->f_bavail > 0) { 5543 if (sf->f_bsize != tsf->f_bsize) { 5544 tot = tsf->f_bavail * tsf->f_bsize; 5545 sf->f_bavail += (tot / sf->f_bsize); 5546 } else 5547 sf->f_bavail += tsf->f_bavail; 5548 } 5549 } 5550 } 5551 free(tsf, M_TEMP); 5552 free(dvpp, M_TEMP); 5553 return (error); 5554 } 5555 5556 /* 5557 * Set an NFSv4 acl. 5558 */ 5559 int 5560 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 5561 { 5562 int error; 5563 5564 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 5565 error = NFSERR_ATTRNOTSUPP; 5566 goto out; 5567 } 5568 /* 5569 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 5570 * Make sure it has enough room for that - splitting every entry 5571 * into two and appending "canonical six" entries at the end. 5572 * Cribbed out of kern/vfs_acl.c - Rick M. 5573 */ 5574 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 5575 error = NFSERR_ATTRNOTSUPP; 5576 goto out; 5577 } 5578 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 5579 if (error == 0) { 5580 error = nfsrv_dssetacl(vp, aclp, cred, p); 5581 if (error == ENOENT) 5582 error = 0; 5583 } 5584 5585 out: 5586 NFSEXITCODE(error); 5587 return (error); 5588 } 5589 5590 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 5591 5592 /* 5593 * Called once to initialize data structures... 5594 */ 5595 static int 5596 nfsd_modevent(module_t mod, int type, void *data) 5597 { 5598 int error = 0, i; 5599 static int loaded = 0; 5600 5601 switch (type) { 5602 case MOD_LOAD: 5603 if (loaded) 5604 goto out; 5605 newnfs_portinit(); 5606 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 5607 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, 5608 MTX_DEF); 5609 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, 5610 MTX_DEF); 5611 } 5612 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 5613 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 5614 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 5615 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 5616 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 5617 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 5618 nfsrvd_initcache(); 5619 nfsd_init(); 5620 NFSD_LOCK(); 5621 nfsrvd_init(0); 5622 NFSD_UNLOCK(); 5623 nfsd_mntinit(); 5624 #ifdef VV_DISABLEDELEG 5625 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 5626 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 5627 #endif 5628 nfsd_call_servertimer = nfsrv_servertimer; 5629 nfsd_call_nfsd = nfssvc_nfsd; 5630 loaded = 1; 5631 break; 5632 5633 case MOD_UNLOAD: 5634 if (newnfs_numnfsd != 0) { 5635 error = EBUSY; 5636 break; 5637 } 5638 5639 #ifdef VV_DISABLEDELEG 5640 vn_deleg_ops.vndeleg_recall = NULL; 5641 vn_deleg_ops.vndeleg_disable = NULL; 5642 #endif 5643 nfsd_call_servertimer = NULL; 5644 nfsd_call_nfsd = NULL; 5645 5646 /* Clean out all NFSv4 state. */ 5647 nfsrv_throwawayallstate(curthread); 5648 5649 /* Clean the NFS server reply cache */ 5650 nfsrvd_cleancache(); 5651 5652 /* Free up the krpc server pool. */ 5653 if (nfsrvd_pool != NULL) 5654 svcpool_destroy(nfsrvd_pool); 5655 5656 /* and get rid of the locks */ 5657 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 5658 mtx_destroy(&nfsrchash_table[i].mtx); 5659 mtx_destroy(&nfsrcahash_table[i].mtx); 5660 } 5661 mtx_destroy(&nfsrc_udpmtx); 5662 mtx_destroy(&nfs_v4root_mutex); 5663 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 5664 mtx_destroy(&nfsrv_dontlistlock_mtx); 5665 mtx_destroy(&nfsrv_recalllock_mtx); 5666 for (i = 0; i < nfsrv_sessionhashsize; i++) 5667 mtx_destroy(&nfssessionhash[i].mtx); 5668 if (nfslayouthash != NULL) { 5669 for (i = 0; i < nfsrv_layouthashsize; i++) 5670 mtx_destroy(&nfslayouthash[i].mtx); 5671 free(nfslayouthash, M_NFSDSESSION); 5672 } 5673 lockdestroy(&nfsv4root_mnt.mnt_explock); 5674 free(nfsclienthash, M_NFSDCLIENT); 5675 free(nfslockhash, M_NFSDLOCKFILE); 5676 free(nfssessionhash, M_NFSDSESSION); 5677 loaded = 0; 5678 break; 5679 default: 5680 error = EOPNOTSUPP; 5681 break; 5682 } 5683 5684 out: 5685 NFSEXITCODE(error); 5686 return (error); 5687 } 5688 static moduledata_t nfsd_mod = { 5689 "nfsd", 5690 nfsd_modevent, 5691 NULL, 5692 }; 5693 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 5694 5695 /* So that loader and kldload(2) can find us, wherever we are.. */ 5696 MODULE_VERSION(nfsd, 1); 5697 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 5698 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); 5699 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 5700 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 5701 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 5702 5703