1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/capsicum.h> 40 #include <sys/extattr.h> 41 42 /* 43 * Functions that perform the vfs operations required by the routines in 44 * nfsd_serv.c. It is hoped that this change will make the server more 45 * portable. 46 */ 47 48 #include <fs/nfs/nfsport.h> 49 #include <security/mac/mac_framework.h> 50 #include <sys/filio.h> 51 #include <sys/hash.h> 52 #include <sys/sysctl.h> 53 #include <nlm/nlm_prot.h> 54 #include <nlm/nlm.h> 55 56 FEATURE(nfsd, "NFSv4 server"); 57 58 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 59 extern int nfsrv_useacl; 60 extern int newnfs_numnfsd; 61 extern struct mount nfsv4root_mnt; 62 extern struct nfsrv_stablefirst nfsrv_stablefirst; 63 extern void (*nfsd_call_servertimer)(void); 64 extern SVCPOOL *nfsrvd_pool; 65 extern struct nfsv4lock nfsd_suspend_lock; 66 extern struct nfsclienthashhead *nfsclienthash; 67 extern struct nfslockhashhead *nfslockhash; 68 extern struct nfssessionhash *nfssessionhash; 69 extern int nfsrv_sessionhashsize; 70 extern struct nfsstatsv1 nfsstatsv1; 71 extern struct nfslayouthash *nfslayouthash; 72 extern int nfsrv_layouthashsize; 73 extern struct mtx nfsrv_dslock_mtx; 74 extern int nfs_pnfsiothreads; 75 extern struct nfsdontlisthead nfsrv_dontlisthead; 76 extern volatile int nfsrv_dontlistlen; 77 extern volatile int nfsrv_devidcnt; 78 extern int nfsrv_maxpnfsmirror; 79 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 80 NFSDLOCKMUTEX; 81 NFSSTATESPINLOCK; 82 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 83 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 84 struct mtx nfsrc_udpmtx; 85 struct mtx nfs_v4root_mutex; 86 struct mtx nfsrv_dontlistlock_mtx; 87 struct mtx nfsrv_recalllock_mtx; 88 struct nfsrvfh nfs_rootfh, nfs_pubfh; 89 int nfs_pubfhset = 0, nfs_rootfhset = 0; 90 struct proc *nfsd_master_proc = NULL; 91 int nfsd_debuglevel = 0; 92 static pid_t nfsd_master_pid = (pid_t)-1; 93 static char nfsd_master_comm[MAXCOMLEN + 1]; 94 static struct timeval nfsd_master_start; 95 static uint32_t nfsv4_sysid = 0; 96 static fhandle_t zerofh; 97 98 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 99 struct ucred *); 100 101 int nfsrv_enable_crossmntpt = 1; 102 static int nfs_commit_blks; 103 static int nfs_commit_miss; 104 extern int nfsrv_issuedelegs; 105 extern int nfsrv_dolocallocks; 106 extern int nfsd_enable_stringtouid; 107 extern struct nfsdevicehead nfsrv_devidhead; 108 109 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, 110 struct iovec **); 111 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, 112 struct mbuf **, struct iovec **); 113 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, 114 int *); 115 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 116 NFSPROC_T *); 117 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 118 int *, char *, fhandle_t *); 119 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 120 NFSPROC_T *); 121 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, 122 struct thread *, int, struct mbuf **, char *, struct mbuf **, 123 struct nfsvattr *, struct acl *, off_t *, int, bool *); 124 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 125 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 126 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 127 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 128 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 129 char *, int *); 130 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 131 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 132 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 133 struct vnode *, struct nfsmount **, int, struct acl *, int *); 134 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 135 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 136 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 137 struct vnode *, struct nfsmount *, struct nfsvattr *); 138 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, 139 NFSPROC_T *, struct nfsmount *); 140 static int nfsrv_putfhname(fhandle_t *, char *); 141 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 142 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 143 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, 144 struct vnode *, NFSPROC_T *); 145 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 146 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 147 NFSPROC_T *); 148 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); 149 static void nfsm_trimtrailing(struct nfsrv_descript *, struct mbuf *, 150 char *, int, int); 151 152 int nfs_pnfsio(task_fn_t *, void *); 153 154 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 155 "NFS server"); 156 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 157 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 158 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 159 0, ""); 160 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 161 0, ""); 162 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 163 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 164 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 165 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 166 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 167 0, "Debug level for NFS server"); 168 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 169 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 170 static int nfsrv_pnfsgetdsattr = 1; 171 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 172 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 173 174 /* 175 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 176 * not running. 177 * The dsN subdirectories for the increased values must have been created 178 * on all DS servers before this increase is done. 179 */ 180 u_int nfsrv_dsdirsize = 20; 181 static int 182 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 183 { 184 int error, newdsdirsize; 185 186 newdsdirsize = nfsrv_dsdirsize; 187 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 188 if (error != 0 || req->newptr == NULL) 189 return (error); 190 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 191 newnfs_numnfsd != 0) 192 return (EINVAL); 193 nfsrv_dsdirsize = newdsdirsize; 194 return (0); 195 } 196 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, 197 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), 198 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); 199 200 #define MAX_REORDERED_RPC 16 201 #define NUM_HEURISTIC 1031 202 #define NHUSE_INIT 64 203 #define NHUSE_INC 16 204 #define NHUSE_MAX 2048 205 206 static struct nfsheur { 207 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 208 off_t nh_nextoff; /* next offset for sequential detection */ 209 int nh_use; /* use count for selection */ 210 int nh_seqcount; /* heuristic */ 211 } nfsheur[NUM_HEURISTIC]; 212 213 214 /* 215 * Heuristic to detect sequential operation. 216 */ 217 static struct nfsheur * 218 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 219 { 220 struct nfsheur *nh; 221 int hi, try; 222 223 /* Locate best candidate. */ 224 try = 32; 225 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 226 nh = &nfsheur[hi]; 227 while (try--) { 228 if (nfsheur[hi].nh_vp == vp) { 229 nh = &nfsheur[hi]; 230 break; 231 } 232 if (nfsheur[hi].nh_use > 0) 233 --nfsheur[hi].nh_use; 234 hi = (hi + 1) % NUM_HEURISTIC; 235 if (nfsheur[hi].nh_use < nh->nh_use) 236 nh = &nfsheur[hi]; 237 } 238 239 /* Initialize hint if this is a new file. */ 240 if (nh->nh_vp != vp) { 241 nh->nh_vp = vp; 242 nh->nh_nextoff = uio->uio_offset; 243 nh->nh_use = NHUSE_INIT; 244 if (uio->uio_offset == 0) 245 nh->nh_seqcount = 4; 246 else 247 nh->nh_seqcount = 1; 248 } 249 250 /* Calculate heuristic. */ 251 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 252 uio->uio_offset == nh->nh_nextoff) { 253 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 254 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 255 if (nh->nh_seqcount > IO_SEQMAX) 256 nh->nh_seqcount = IO_SEQMAX; 257 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 258 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 259 /* Probably a reordered RPC, leave seqcount alone. */ 260 } else if (nh->nh_seqcount > 1) { 261 nh->nh_seqcount /= 2; 262 } else { 263 nh->nh_seqcount = 0; 264 } 265 nh->nh_use += NHUSE_INC; 266 if (nh->nh_use > NHUSE_MAX) 267 nh->nh_use = NHUSE_MAX; 268 return (nh); 269 } 270 271 /* 272 * Get attributes into nfsvattr structure. 273 */ 274 int 275 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 276 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 277 nfsattrbit_t *attrbitp) 278 { 279 int error, gotattr, lockedit = 0; 280 struct nfsvattr na; 281 282 if (vpislocked == 0) { 283 /* 284 * When vpislocked == 0, the vnode is either exclusively 285 * locked by this thread or not locked by this thread. 286 * As such, shared lock it, if not exclusively locked. 287 */ 288 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 289 lockedit = 1; 290 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 291 } 292 } 293 294 /* 295 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed 296 * attributes, as required. 297 * This needs to be done for regular files if: 298 * - non-NFSv4 RPCs or 299 * - when attrbitp == NULL or 300 * - an NFSv4 RPC with any of the above attributes in attrbitp. 301 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 302 * these attributes. nfsrv_proxyds() will return an error if the 303 * server is not a pNFS one. 304 */ 305 gotattr = 0; 306 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 307 (nd->nd_flag & ND_NFSV4) == 0 || 308 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 309 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 310 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 311 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || 312 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { 313 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, 314 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, 315 NULL); 316 if (error == 0) 317 gotattr = 1; 318 } 319 320 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 321 if (lockedit != 0) 322 NFSVOPUNLOCK(vp); 323 324 /* 325 * If we got the Change, Size and Modify Time from the DS, 326 * replace them. 327 */ 328 if (gotattr != 0) { 329 nvap->na_atime = na.na_atime; 330 nvap->na_mtime = na.na_mtime; 331 nvap->na_filerev = na.na_filerev; 332 nvap->na_size = na.na_size; 333 nvap->na_bytes = na.na_bytes; 334 } 335 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 336 error, (uintmax_t)na.na_filerev); 337 338 NFSEXITCODE(error); 339 return (error); 340 } 341 342 /* 343 * Get a file handle for a vnode. 344 */ 345 int 346 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 347 { 348 int error; 349 350 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 351 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 352 error = VOP_VPTOFH(vp, &fhp->fh_fid); 353 354 NFSEXITCODE(error); 355 return (error); 356 } 357 358 /* 359 * Perform access checking for vnodes obtained from file handles that would 360 * refer to files already opened by a Unix client. You cannot just use 361 * vn_writechk() and VOP_ACCESSX() for two reasons. 362 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 363 * case. 364 * 2 - The owner is to be given access irrespective of mode bits for some 365 * operations, so that processes that chmod after opening a file don't 366 * break. 367 */ 368 int 369 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 370 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 371 u_int32_t *supportedtypep) 372 { 373 struct vattr vattr; 374 int error = 0, getret = 0; 375 376 if (vpislocked == 0) { 377 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 378 error = EPERM; 379 goto out; 380 } 381 } 382 if (accmode & VWRITE) { 383 /* Just vn_writechk() changed to check rdonly */ 384 /* 385 * Disallow write attempts on read-only file systems; 386 * unless the file is a socket or a block or character 387 * device resident on the file system. 388 */ 389 if (NFSVNO_EXRDONLY(exp) || 390 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 391 switch (vp->v_type) { 392 case VREG: 393 case VDIR: 394 case VLNK: 395 error = EROFS; 396 default: 397 break; 398 } 399 } 400 /* 401 * If there's shared text associated with 402 * the inode, try to free it up once. If 403 * we fail, we can't allow writing. 404 */ 405 if (VOP_IS_TEXT(vp) && error == 0) 406 error = ETXTBSY; 407 } 408 if (error != 0) { 409 if (vpislocked == 0) 410 NFSVOPUNLOCK(vp); 411 goto out; 412 } 413 414 /* 415 * Should the override still be applied when ACLs are enabled? 416 */ 417 error = VOP_ACCESSX(vp, accmode, cred, p); 418 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 419 /* 420 * Try again with VEXPLICIT_DENY, to see if the test for 421 * deletion is supported. 422 */ 423 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 424 if (error == 0) { 425 if (vp->v_type == VDIR) { 426 accmode &= ~(VDELETE | VDELETE_CHILD); 427 accmode |= VWRITE; 428 error = VOP_ACCESSX(vp, accmode, cred, p); 429 } else if (supportedtypep != NULL) { 430 *supportedtypep &= ~NFSACCESS_DELETE; 431 } 432 } 433 } 434 435 /* 436 * Allow certain operations for the owner (reads and writes 437 * on files that are already open). 438 */ 439 if (override != NFSACCCHK_NOOVERRIDE && 440 (error == EPERM || error == EACCES)) { 441 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 442 error = 0; 443 else if (override & NFSACCCHK_ALLOWOWNER) { 444 getret = VOP_GETATTR(vp, &vattr, cred); 445 if (getret == 0 && cred->cr_uid == vattr.va_uid) 446 error = 0; 447 } 448 } 449 if (vpislocked == 0) 450 NFSVOPUNLOCK(vp); 451 452 out: 453 NFSEXITCODE(error); 454 return (error); 455 } 456 457 /* 458 * Set attribute(s) vnop. 459 */ 460 int 461 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 462 struct thread *p, struct nfsexstuff *exp) 463 { 464 u_quad_t savsize = 0; 465 int error, savedit; 466 time_t savbtime; 467 468 /* 469 * If this is an exported file system and a pNFS service is running, 470 * don't VOP_SETATTR() of size for the MDS file system. 471 */ 472 savedit = 0; 473 error = 0; 474 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && 475 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && 476 nvap->na_vattr.va_size > 0) { 477 savsize = nvap->na_vattr.va_size; 478 nvap->na_vattr.va_size = VNOVAL; 479 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 480 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 481 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 482 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 483 nvap->na_vattr.va_mtime.tv_sec != VNOVAL) 484 savedit = 1; 485 else 486 savedit = 2; 487 } 488 if (savedit != 2) 489 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 490 if (savedit != 0) 491 nvap->na_vattr.va_size = savsize; 492 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 493 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 494 nvap->na_vattr.va_size != VNOVAL || 495 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 496 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 497 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 498 /* Never modify birthtime on a DS file. */ 499 savbtime = nvap->na_vattr.va_birthtime.tv_sec; 500 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; 501 /* For a pNFS server, set the attributes on the DS file. */ 502 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, 503 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); 504 nvap->na_vattr.va_birthtime.tv_sec = savbtime; 505 if (error == ENOENT) 506 error = 0; 507 } 508 NFSEXITCODE(error); 509 return (error); 510 } 511 512 /* 513 * Set up nameidata for a lookup() call and do it. 514 */ 515 int 516 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 517 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 518 struct vnode **retdirp) 519 { 520 struct componentname *cnp = &ndp->ni_cnd; 521 int i; 522 struct iovec aiov; 523 struct uio auio; 524 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 525 int error = 0; 526 char *cp; 527 528 *retdirp = NULL; 529 cnp->cn_nameptr = cnp->cn_pnbuf; 530 ndp->ni_lcf = 0; 531 /* 532 * Extract and set starting directory. 533 */ 534 if (dp->v_type != VDIR) { 535 if (islocked) 536 vput(dp); 537 else 538 vrele(dp); 539 nfsvno_relpathbuf(ndp); 540 error = ENOTDIR; 541 goto out1; 542 } 543 if (islocked) 544 NFSVOPUNLOCK(dp); 545 VREF(dp); 546 *retdirp = dp; 547 if (NFSVNO_EXRDONLY(exp)) 548 cnp->cn_flags |= RDONLY; 549 ndp->ni_segflg = UIO_SYSSPACE; 550 551 if (nd->nd_flag & ND_PUBLOOKUP) { 552 ndp->ni_loopcnt = 0; 553 if (cnp->cn_pnbuf[0] == '/') { 554 vrele(dp); 555 /* 556 * Check for degenerate pathnames here, since lookup() 557 * panics on them. 558 */ 559 for (i = 1; i < ndp->ni_pathlen; i++) 560 if (cnp->cn_pnbuf[i] != '/') 561 break; 562 if (i == ndp->ni_pathlen) { 563 error = NFSERR_ACCES; 564 goto out; 565 } 566 dp = rootvnode; 567 VREF(dp); 568 } 569 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 570 (nd->nd_flag & ND_NFSV4) == 0) { 571 /* 572 * Only cross mount points for NFSv4 when doing a 573 * mount while traversing the file system above 574 * the mount point, unless nfsrv_enable_crossmntpt is set. 575 */ 576 cnp->cn_flags |= NOCROSSMOUNT; 577 } 578 579 /* 580 * Initialize for scan, set ni_startdir and bump ref on dp again 581 * because lookup() will dereference ni_startdir. 582 */ 583 584 cnp->cn_thread = p; 585 ndp->ni_startdir = dp; 586 ndp->ni_rootdir = rootvnode; 587 ndp->ni_topdir = NULL; 588 589 if (!lockleaf) 590 cnp->cn_flags |= LOCKLEAF; 591 for (;;) { 592 cnp->cn_nameptr = cnp->cn_pnbuf; 593 /* 594 * Call lookup() to do the real work. If an error occurs, 595 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 596 * we do not have to dereference anything before returning. 597 * In either case ni_startdir will be dereferenced and NULLed 598 * out. 599 */ 600 error = lookup(ndp); 601 if (error) 602 break; 603 604 /* 605 * Check for encountering a symbolic link. Trivial 606 * termination occurs if no symlink encountered. 607 */ 608 if ((cnp->cn_flags & ISSYMLINK) == 0) { 609 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 610 nfsvno_relpathbuf(ndp); 611 if (ndp->ni_vp && !lockleaf) 612 NFSVOPUNLOCK(ndp->ni_vp); 613 break; 614 } 615 616 /* 617 * Validate symlink 618 */ 619 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 620 NFSVOPUNLOCK(ndp->ni_dvp); 621 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 622 error = EINVAL; 623 goto badlink2; 624 } 625 626 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 627 error = ELOOP; 628 goto badlink2; 629 } 630 if (ndp->ni_pathlen > 1) 631 cp = uma_zalloc(namei_zone, M_WAITOK); 632 else 633 cp = cnp->cn_pnbuf; 634 aiov.iov_base = cp; 635 aiov.iov_len = MAXPATHLEN; 636 auio.uio_iov = &aiov; 637 auio.uio_iovcnt = 1; 638 auio.uio_offset = 0; 639 auio.uio_rw = UIO_READ; 640 auio.uio_segflg = UIO_SYSSPACE; 641 auio.uio_td = NULL; 642 auio.uio_resid = MAXPATHLEN; 643 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 644 if (error) { 645 badlink1: 646 if (ndp->ni_pathlen > 1) 647 uma_zfree(namei_zone, cp); 648 badlink2: 649 vrele(ndp->ni_dvp); 650 vput(ndp->ni_vp); 651 break; 652 } 653 linklen = MAXPATHLEN - auio.uio_resid; 654 if (linklen == 0) { 655 error = ENOENT; 656 goto badlink1; 657 } 658 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 659 error = ENAMETOOLONG; 660 goto badlink1; 661 } 662 663 /* 664 * Adjust or replace path 665 */ 666 if (ndp->ni_pathlen > 1) { 667 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 668 uma_zfree(namei_zone, cnp->cn_pnbuf); 669 cnp->cn_pnbuf = cp; 670 } else 671 cnp->cn_pnbuf[linklen] = '\0'; 672 ndp->ni_pathlen += linklen; 673 674 /* 675 * Cleanup refs for next loop and check if root directory 676 * should replace current directory. Normally ni_dvp 677 * becomes the new base directory and is cleaned up when 678 * we loop. Explicitly null pointers after invalidation 679 * to clarify operation. 680 */ 681 vput(ndp->ni_vp); 682 ndp->ni_vp = NULL; 683 684 if (cnp->cn_pnbuf[0] == '/') { 685 vrele(ndp->ni_dvp); 686 ndp->ni_dvp = ndp->ni_rootdir; 687 VREF(ndp->ni_dvp); 688 } 689 ndp->ni_startdir = ndp->ni_dvp; 690 ndp->ni_dvp = NULL; 691 } 692 if (!lockleaf) 693 cnp->cn_flags &= ~LOCKLEAF; 694 695 out: 696 if (error) { 697 nfsvno_relpathbuf(ndp); 698 ndp->ni_vp = NULL; 699 ndp->ni_dvp = NULL; 700 ndp->ni_startdir = NULL; 701 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 702 ndp->ni_dvp = NULL; 703 } 704 705 out1: 706 NFSEXITCODE2(error, nd); 707 return (error); 708 } 709 710 /* 711 * Set up a pathname buffer and return a pointer to it and, optionally 712 * set a hash pointer. 713 */ 714 void 715 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 716 { 717 struct componentname *cnp = &ndp->ni_cnd; 718 719 cnp->cn_flags |= (NOMACCHECK | HASBUF); 720 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 721 if (hashpp != NULL) 722 *hashpp = NULL; 723 *bufpp = cnp->cn_pnbuf; 724 } 725 726 /* 727 * Release the above path buffer, if not released by nfsvno_namei(). 728 */ 729 void 730 nfsvno_relpathbuf(struct nameidata *ndp) 731 { 732 733 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 734 panic("nfsrelpath"); 735 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 736 ndp->ni_cnd.cn_flags &= ~HASBUF; 737 } 738 739 /* 740 * Readlink vnode op into an mbuf list. 741 */ 742 int 743 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, 744 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 745 { 746 struct iovec *iv; 747 struct uio io, *uiop = &io; 748 struct mbuf *mp, *mp3; 749 int len, tlen, error = 0; 750 751 len = NFS_MAXPATHLEN; 752 if (maxextsiz > 0) 753 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 754 &mp3, &mp, &iv); 755 else 756 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); 757 uiop->uio_iov = iv; 758 uiop->uio_offset = 0; 759 uiop->uio_resid = len; 760 uiop->uio_rw = UIO_READ; 761 uiop->uio_segflg = UIO_SYSSPACE; 762 uiop->uio_td = NULL; 763 error = VOP_READLINK(vp, uiop, cred); 764 free(iv, M_TEMP); 765 if (error) { 766 m_freem(mp3); 767 *lenp = 0; 768 goto out; 769 } 770 if (uiop->uio_resid > 0) { 771 len -= uiop->uio_resid; 772 tlen = NFSM_RNDUP(len); 773 if (tlen == 0) { 774 m_freem(mp3); 775 mp3 = mp = NULL; 776 } else if (tlen != NFS_MAXPATHLEN || tlen != len) 777 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, 778 tlen - len); 779 } 780 *lenp = len; 781 *mpp = mp3; 782 *mpendp = mp; 783 784 out: 785 NFSEXITCODE(error); 786 return (error); 787 } 788 789 /* 790 * Create an mbuf chain and an associated iovec that can be used to Read 791 * or Getextattr of data. 792 * Upon success, return pointers to the first and last mbufs in the chain 793 * plus the malloc'd iovec and its iovlen. 794 */ 795 static int 796 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, 797 struct iovec **ivp) 798 { 799 struct mbuf *m, *m2 = NULL, *m3; 800 struct iovec *iv; 801 int i, left, siz; 802 803 left = len; 804 m3 = NULL; 805 /* 806 * Generate the mbuf list with the uio_iov ref. to it. 807 */ 808 i = 0; 809 while (left > 0) { 810 NFSMGET(m); 811 MCLGET(m, M_WAITOK); 812 m->m_len = 0; 813 siz = min(M_TRAILINGSPACE(m), left); 814 left -= siz; 815 i++; 816 if (m3) 817 m2->m_next = m; 818 else 819 m3 = m; 820 m2 = m; 821 } 822 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 823 m = m3; 824 left = len; 825 i = 0; 826 while (left > 0) { 827 if (m == NULL) 828 panic("nfsrv_createiovec iov"); 829 siz = min(M_TRAILINGSPACE(m), left); 830 if (siz > 0) { 831 iv->iov_base = mtod(m, caddr_t) + m->m_len; 832 iv->iov_len = siz; 833 m->m_len += siz; 834 left -= siz; 835 iv++; 836 i++; 837 } 838 m = m->m_next; 839 } 840 *mpp = m3; 841 *mpendp = m2; 842 return (i); 843 } 844 845 /* 846 * Create an mbuf chain and an associated iovec that can be used to Read 847 * or Getextattr of data. 848 * Upon success, return pointers to the first and last mbufs in the chain 849 * plus the malloc'd iovec and its iovlen. 850 * Same as above, but creates ext_pgs mbuf(s). 851 */ 852 static int 853 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, 854 struct mbuf **mpendp, struct iovec **ivp) 855 { 856 struct mbuf *m, *m2 = NULL, *m3; 857 struct iovec *iv; 858 int i, left, pgno, siz; 859 860 left = len; 861 m3 = NULL; 862 /* 863 * Generate the mbuf list with the uio_iov ref. to it. 864 */ 865 i = 0; 866 while (left > 0) { 867 siz = min(left, maxextsiz); 868 m = mb_alloc_ext_plus_pages(siz, M_WAITOK); 869 left -= siz; 870 i += m->m_epg_npgs; 871 if (m3 != NULL) 872 m2->m_next = m; 873 else 874 m3 = m; 875 m2 = m; 876 } 877 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 878 m = m3; 879 left = len; 880 i = 0; 881 pgno = 0; 882 while (left > 0) { 883 if (m == NULL) 884 panic("nfsvno_createiovec_extpgs iov"); 885 siz = min(PAGE_SIZE, left); 886 if (siz > 0) { 887 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); 888 iv->iov_len = siz; 889 m->m_len += siz; 890 if (pgno == m->m_epg_npgs - 1) 891 m->m_epg_last_len = siz; 892 left -= siz; 893 iv++; 894 i++; 895 pgno++; 896 } 897 if (pgno == m->m_epg_npgs && left > 0) { 898 m = m->m_next; 899 if (m == NULL) 900 panic("nfsvno_createiovec_extpgs iov"); 901 pgno = 0; 902 } 903 } 904 *mpp = m3; 905 *mpendp = m2; 906 return (i); 907 } 908 909 /* 910 * Read vnode op call into mbuf list. 911 */ 912 int 913 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 914 int maxextsiz, struct thread *p, struct mbuf **mpp, 915 struct mbuf **mpendp) 916 { 917 struct mbuf *m; 918 struct iovec *iv; 919 int error = 0, len, tlen, ioflag = 0; 920 struct mbuf *m3; 921 struct uio io, *uiop = &io; 922 struct nfsheur *nh; 923 924 /* 925 * Attempt to read from a DS file. A return of ENOENT implies 926 * there is no DS file to read. 927 */ 928 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 929 NULL, mpendp, NULL, NULL, NULL, 0, NULL); 930 if (error != ENOENT) 931 return (error); 932 933 len = NFSM_RNDUP(cnt); 934 if (maxextsiz > 0) 935 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 936 &m3, &m, &iv); 937 else 938 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); 939 uiop->uio_iov = iv; 940 uiop->uio_offset = off; 941 uiop->uio_resid = len; 942 uiop->uio_rw = UIO_READ; 943 uiop->uio_segflg = UIO_SYSSPACE; 944 uiop->uio_td = NULL; 945 nh = nfsrv_sequential_heuristic(uiop, vp); 946 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 947 /* XXX KDM make this more systematic? */ 948 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 949 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 950 free(iv, M_TEMP); 951 if (error) { 952 m_freem(m3); 953 *mpp = NULL; 954 goto out; 955 } 956 nh->nh_nextoff = uiop->uio_offset; 957 tlen = len - uiop->uio_resid; 958 cnt = cnt < tlen ? cnt : tlen; 959 tlen = NFSM_RNDUP(cnt); 960 if (tlen == 0) { 961 m_freem(m3); 962 m3 = m = NULL; 963 } else if (len != tlen || tlen != cnt) 964 m = nfsrv_adj(m3, len - tlen, tlen - cnt); 965 *mpp = m3; 966 *mpendp = m; 967 968 out: 969 NFSEXITCODE(error); 970 return (error); 971 } 972 973 /* 974 * Create the iovec for the mbuf chain passed in as an argument. 975 * The "cp" argument is where the data starts within the first mbuf in 976 * the chain. It returns the iovec and the iovcnt. 977 */ 978 static int 979 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, 980 int *iovcntp) 981 { 982 struct mbuf *mp; 983 struct iovec *ivp; 984 int cnt, i, len; 985 986 /* 987 * Loop through the mbuf chain, counting how many mbufs are a 988 * part of this write operation, so the iovec size is known. 989 */ 990 cnt = 0; 991 len = retlen; 992 mp = m; 993 i = mtod(mp, caddr_t) + mp->m_len - cp; 994 while (len > 0) { 995 if (i > 0) { 996 len -= i; 997 cnt++; 998 } 999 mp = mp->m_next; 1000 if (!mp) { 1001 if (len > 0) 1002 return (EBADRPC); 1003 } else 1004 i = mp->m_len; 1005 } 1006 1007 /* Now, create the iovec. */ 1008 mp = m; 1009 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 1010 M_WAITOK); 1011 *iovcntp = cnt; 1012 i = mtod(mp, caddr_t) + mp->m_len - cp; 1013 len = retlen; 1014 while (len > 0) { 1015 if (mp == NULL) 1016 panic("nfsrv_createiovecw"); 1017 if (i > 0) { 1018 i = min(i, len); 1019 ivp->iov_base = cp; 1020 ivp->iov_len = i; 1021 ivp++; 1022 len -= i; 1023 } 1024 mp = mp->m_next; 1025 if (mp) { 1026 i = mp->m_len; 1027 cp = mtod(mp, caddr_t); 1028 } 1029 } 1030 return (0); 1031 } 1032 1033 /* 1034 * Write vnode op from an mbuf list. 1035 */ 1036 int 1037 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, 1038 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 1039 { 1040 struct iovec *iv; 1041 int cnt, ioflags, error; 1042 struct uio io, *uiop = &io; 1043 struct nfsheur *nh; 1044 1045 /* 1046 * Attempt to write to a DS file. A return of ENOENT implies 1047 * there is no DS file to write. 1048 */ 1049 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, 1050 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); 1051 if (error != ENOENT) { 1052 *stable = NFSWRITE_FILESYNC; 1053 return (error); 1054 } 1055 1056 1057 if (*stable == NFSWRITE_UNSTABLE) 1058 ioflags = IO_NODELOCKED; 1059 else 1060 ioflags = (IO_SYNC | IO_NODELOCKED); 1061 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); 1062 if (error != 0) 1063 return (error); 1064 uiop->uio_iov = iv; 1065 uiop->uio_iovcnt = cnt; 1066 uiop->uio_resid = retlen; 1067 uiop->uio_rw = UIO_WRITE; 1068 uiop->uio_segflg = UIO_SYSSPACE; 1069 NFSUIOPROC(uiop, p); 1070 uiop->uio_offset = off; 1071 nh = nfsrv_sequential_heuristic(uiop, vp); 1072 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 1073 /* XXX KDM make this more systematic? */ 1074 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 1075 error = VOP_WRITE(vp, uiop, ioflags, cred); 1076 if (error == 0) 1077 nh->nh_nextoff = uiop->uio_offset; 1078 free(iv, M_TEMP); 1079 1080 NFSEXITCODE(error); 1081 return (error); 1082 } 1083 1084 /* 1085 * Common code for creating a regular file (plus special files for V2). 1086 */ 1087 int 1088 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 1089 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 1090 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) 1091 { 1092 u_quad_t tempsize; 1093 int error; 1094 struct thread *p = curthread; 1095 1096 error = nd->nd_repstat; 1097 if (!error && ndp->ni_vp == NULL) { 1098 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 1099 vrele(ndp->ni_startdir); 1100 error = VOP_CREATE(ndp->ni_dvp, 1101 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1102 /* For a pNFS server, create the data file on a DS. */ 1103 if (error == 0 && nvap->na_type == VREG) { 1104 /* 1105 * Create a data file on a DS for a pNFS server. 1106 * This function just returns if not 1107 * running a pNFS DS or the creation fails. 1108 */ 1109 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1110 nd->nd_cred, p); 1111 } 1112 vput(ndp->ni_dvp); 1113 nfsvno_relpathbuf(ndp); 1114 if (!error) { 1115 if (*exclusive_flagp) { 1116 *exclusive_flagp = 0; 1117 NFSVNO_ATTRINIT(nvap); 1118 nvap->na_atime.tv_sec = cverf[0]; 1119 nvap->na_atime.tv_nsec = cverf[1]; 1120 error = VOP_SETATTR(ndp->ni_vp, 1121 &nvap->na_vattr, nd->nd_cred); 1122 if (error != 0) { 1123 vput(ndp->ni_vp); 1124 ndp->ni_vp = NULL; 1125 error = NFSERR_NOTSUPP; 1126 } 1127 } 1128 } 1129 /* 1130 * NFS V2 Only. nfsrvd_mknod() does this for V3. 1131 * (This implies, just get out on an error.) 1132 */ 1133 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 1134 nvap->na_type == VFIFO) { 1135 if (nvap->na_type == VCHR && rdev == 0xffffffff) 1136 nvap->na_type = VFIFO; 1137 if (nvap->na_type != VFIFO && 1138 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { 1139 vrele(ndp->ni_startdir); 1140 nfsvno_relpathbuf(ndp); 1141 vput(ndp->ni_dvp); 1142 goto out; 1143 } 1144 nvap->na_rdev = rdev; 1145 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1146 &ndp->ni_cnd, &nvap->na_vattr); 1147 vput(ndp->ni_dvp); 1148 nfsvno_relpathbuf(ndp); 1149 vrele(ndp->ni_startdir); 1150 if (error) 1151 goto out; 1152 } else { 1153 vrele(ndp->ni_startdir); 1154 nfsvno_relpathbuf(ndp); 1155 vput(ndp->ni_dvp); 1156 error = ENXIO; 1157 goto out; 1158 } 1159 *vpp = ndp->ni_vp; 1160 } else { 1161 /* 1162 * Handle cases where error is already set and/or 1163 * the file exists. 1164 * 1 - clean up the lookup 1165 * 2 - iff !error and na_size set, truncate it 1166 */ 1167 vrele(ndp->ni_startdir); 1168 nfsvno_relpathbuf(ndp); 1169 *vpp = ndp->ni_vp; 1170 if (ndp->ni_dvp == *vpp) 1171 vrele(ndp->ni_dvp); 1172 else 1173 vput(ndp->ni_dvp); 1174 if (!error && nvap->na_size != VNOVAL) { 1175 error = nfsvno_accchk(*vpp, VWRITE, 1176 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1177 NFSACCCHK_VPISLOCKED, NULL); 1178 if (!error) { 1179 tempsize = nvap->na_size; 1180 NFSVNO_ATTRINIT(nvap); 1181 nvap->na_size = tempsize; 1182 error = VOP_SETATTR(*vpp, 1183 &nvap->na_vattr, nd->nd_cred); 1184 } 1185 } 1186 if (error) 1187 vput(*vpp); 1188 } 1189 1190 out: 1191 NFSEXITCODE(error); 1192 return (error); 1193 } 1194 1195 /* 1196 * Do a mknod vnode op. 1197 */ 1198 int 1199 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1200 struct thread *p) 1201 { 1202 int error = 0; 1203 enum vtype vtyp; 1204 1205 vtyp = nvap->na_type; 1206 /* 1207 * Iff doesn't exist, create it. 1208 */ 1209 if (ndp->ni_vp) { 1210 vrele(ndp->ni_startdir); 1211 nfsvno_relpathbuf(ndp); 1212 vput(ndp->ni_dvp); 1213 vrele(ndp->ni_vp); 1214 error = EEXIST; 1215 goto out; 1216 } 1217 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1218 vrele(ndp->ni_startdir); 1219 nfsvno_relpathbuf(ndp); 1220 vput(ndp->ni_dvp); 1221 error = NFSERR_BADTYPE; 1222 goto out; 1223 } 1224 if (vtyp == VSOCK) { 1225 vrele(ndp->ni_startdir); 1226 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1227 &ndp->ni_cnd, &nvap->na_vattr); 1228 vput(ndp->ni_dvp); 1229 nfsvno_relpathbuf(ndp); 1230 } else { 1231 if (nvap->na_type != VFIFO && 1232 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { 1233 vrele(ndp->ni_startdir); 1234 nfsvno_relpathbuf(ndp); 1235 vput(ndp->ni_dvp); 1236 goto out; 1237 } 1238 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1239 &ndp->ni_cnd, &nvap->na_vattr); 1240 vput(ndp->ni_dvp); 1241 nfsvno_relpathbuf(ndp); 1242 vrele(ndp->ni_startdir); 1243 /* 1244 * Since VOP_MKNOD returns the ni_vp, I can't 1245 * see any reason to do the lookup. 1246 */ 1247 } 1248 1249 out: 1250 NFSEXITCODE(error); 1251 return (error); 1252 } 1253 1254 /* 1255 * Mkdir vnode op. 1256 */ 1257 int 1258 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1259 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1260 { 1261 int error = 0; 1262 1263 if (ndp->ni_vp != NULL) { 1264 if (ndp->ni_dvp == ndp->ni_vp) 1265 vrele(ndp->ni_dvp); 1266 else 1267 vput(ndp->ni_dvp); 1268 vrele(ndp->ni_vp); 1269 nfsvno_relpathbuf(ndp); 1270 error = EEXIST; 1271 goto out; 1272 } 1273 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1274 &nvap->na_vattr); 1275 vput(ndp->ni_dvp); 1276 nfsvno_relpathbuf(ndp); 1277 1278 out: 1279 NFSEXITCODE(error); 1280 return (error); 1281 } 1282 1283 /* 1284 * symlink vnode op. 1285 */ 1286 int 1287 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1288 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1289 struct nfsexstuff *exp) 1290 { 1291 int error = 0; 1292 1293 if (ndp->ni_vp) { 1294 vrele(ndp->ni_startdir); 1295 nfsvno_relpathbuf(ndp); 1296 if (ndp->ni_dvp == ndp->ni_vp) 1297 vrele(ndp->ni_dvp); 1298 else 1299 vput(ndp->ni_dvp); 1300 vrele(ndp->ni_vp); 1301 error = EEXIST; 1302 goto out; 1303 } 1304 1305 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1306 &nvap->na_vattr, pathcp); 1307 vput(ndp->ni_dvp); 1308 vrele(ndp->ni_startdir); 1309 nfsvno_relpathbuf(ndp); 1310 /* 1311 * Although FreeBSD still had the lookup code in 1312 * it for 7/current, there doesn't seem to be any 1313 * point, since VOP_SYMLINK() returns the ni_vp. 1314 * Just vput it for v2. 1315 */ 1316 if (!not_v2 && !error) 1317 vput(ndp->ni_vp); 1318 1319 out: 1320 NFSEXITCODE(error); 1321 return (error); 1322 } 1323 1324 /* 1325 * Parse symbolic link arguments. 1326 * This function has an ugly side effect. It will malloc() an area for 1327 * the symlink and set iov_base to point to it, only if it succeeds. 1328 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1329 * be FREE'd later. 1330 */ 1331 int 1332 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1333 struct thread *p, char **pathcpp, int *lenp) 1334 { 1335 u_int32_t *tl; 1336 char *pathcp = NULL; 1337 int error = 0, len; 1338 struct nfsv2_sattr *sp; 1339 1340 *pathcpp = NULL; 1341 *lenp = 0; 1342 if ((nd->nd_flag & ND_NFSV3) && 1343 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1344 goto nfsmout; 1345 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1346 len = fxdr_unsigned(int, *tl); 1347 if (len > NFS_MAXPATHLEN || len <= 0) { 1348 error = EBADRPC; 1349 goto nfsmout; 1350 } 1351 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1352 error = nfsrv_mtostr(nd, pathcp, len); 1353 if (error) 1354 goto nfsmout; 1355 if (nd->nd_flag & ND_NFSV2) { 1356 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1357 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1358 } 1359 *pathcpp = pathcp; 1360 *lenp = len; 1361 NFSEXITCODE2(0, nd); 1362 return (0); 1363 nfsmout: 1364 if (pathcp) 1365 free(pathcp, M_TEMP); 1366 NFSEXITCODE2(error, nd); 1367 return (error); 1368 } 1369 1370 /* 1371 * Remove a non-directory object. 1372 */ 1373 int 1374 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1375 struct thread *p, struct nfsexstuff *exp) 1376 { 1377 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1378 int error = 0, mirrorcnt; 1379 char fname[PNFS_FILENAME_LEN + 1]; 1380 fhandle_t fh; 1381 1382 vp = ndp->ni_vp; 1383 dsdvp[0] = NULL; 1384 if (vp->v_type == VDIR) 1385 error = NFSERR_ISDIR; 1386 else if (is_v4) 1387 error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), 1388 p); 1389 if (error == 0) 1390 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1391 if (!error) 1392 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1393 if (error == 0 && dsdvp[0] != NULL) 1394 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1395 if (ndp->ni_dvp == vp) 1396 vrele(ndp->ni_dvp); 1397 else 1398 vput(ndp->ni_dvp); 1399 vput(vp); 1400 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1401 nfsvno_relpathbuf(ndp); 1402 NFSEXITCODE(error); 1403 return (error); 1404 } 1405 1406 /* 1407 * Remove a directory. 1408 */ 1409 int 1410 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1411 struct thread *p, struct nfsexstuff *exp) 1412 { 1413 struct vnode *vp; 1414 int error = 0; 1415 1416 vp = ndp->ni_vp; 1417 if (vp->v_type != VDIR) { 1418 error = ENOTDIR; 1419 goto out; 1420 } 1421 /* 1422 * No rmdir "." please. 1423 */ 1424 if (ndp->ni_dvp == vp) { 1425 error = EINVAL; 1426 goto out; 1427 } 1428 /* 1429 * The root of a mounted filesystem cannot be deleted. 1430 */ 1431 if (vp->v_vflag & VV_ROOT) 1432 error = EBUSY; 1433 out: 1434 if (!error) 1435 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1436 if (ndp->ni_dvp == vp) 1437 vrele(ndp->ni_dvp); 1438 else 1439 vput(ndp->ni_dvp); 1440 vput(vp); 1441 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1442 nfsvno_relpathbuf(ndp); 1443 NFSEXITCODE(error); 1444 return (error); 1445 } 1446 1447 /* 1448 * Rename vnode op. 1449 */ 1450 int 1451 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1452 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1453 { 1454 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1455 int error = 0, mirrorcnt; 1456 char fname[PNFS_FILENAME_LEN + 1]; 1457 fhandle_t fh; 1458 1459 dsdvp[0] = NULL; 1460 fvp = fromndp->ni_vp; 1461 if (ndstat) { 1462 vrele(fromndp->ni_dvp); 1463 vrele(fvp); 1464 error = ndstat; 1465 goto out1; 1466 } 1467 tdvp = tondp->ni_dvp; 1468 tvp = tondp->ni_vp; 1469 if (tvp != NULL) { 1470 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1471 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1472 goto out; 1473 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1474 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1475 goto out; 1476 } 1477 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1478 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1479 goto out; 1480 } 1481 1482 /* 1483 * A rename to '.' or '..' results in a prematurely 1484 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1485 * here. 1486 */ 1487 if ((tondp->ni_cnd.cn_namelen == 1 && 1488 tondp->ni_cnd.cn_nameptr[0] == '.') || 1489 (tondp->ni_cnd.cn_namelen == 2 && 1490 tondp->ni_cnd.cn_nameptr[0] == '.' && 1491 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1492 error = EINVAL; 1493 goto out; 1494 } 1495 } 1496 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1497 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1498 goto out; 1499 } 1500 if (fvp->v_mount != tdvp->v_mount) { 1501 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1502 goto out; 1503 } 1504 if (fvp == tdvp) { 1505 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1506 goto out; 1507 } 1508 if (fvp == tvp) { 1509 /* 1510 * If source and destination are the same, there is nothing to 1511 * do. Set error to -1 to indicate this. 1512 */ 1513 error = -1; 1514 goto out; 1515 } 1516 if (ndflag & ND_NFSV4) { 1517 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1518 error = nfsrv_checkremove(fvp, 0, NULL, 1519 (nfsquad_t)((u_quad_t)0), p); 1520 NFSVOPUNLOCK(fvp); 1521 } else 1522 error = EPERM; 1523 if (tvp && !error) 1524 error = nfsrv_checkremove(tvp, 1, NULL, 1525 (nfsquad_t)((u_quad_t)0), p); 1526 } else { 1527 /* 1528 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1529 * that the NFSv4 client won't be confused by the rename. 1530 * Since nfsd_recalldelegation() can only be called on an 1531 * unlocked vnode at this point and fvp is the file that will 1532 * still exist after the rename, just do fvp. 1533 */ 1534 nfsd_recalldelegation(fvp, p); 1535 } 1536 if (error == 0 && tvp != NULL) { 1537 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1538 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1539 " dsdvp=%p\n", dsdvp[0]); 1540 } 1541 out: 1542 if (!error) { 1543 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1544 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1545 &tondp->ni_cnd); 1546 } else { 1547 if (tdvp == tvp) 1548 vrele(tdvp); 1549 else 1550 vput(tdvp); 1551 if (tvp) 1552 vput(tvp); 1553 vrele(fromndp->ni_dvp); 1554 vrele(fvp); 1555 if (error == -1) 1556 error = 0; 1557 } 1558 1559 /* 1560 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1561 * if the rename succeeded, the DS file for the tvp needs to be 1562 * removed. 1563 */ 1564 if (error == 0 && dsdvp[0] != NULL) { 1565 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1566 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1567 } 1568 1569 vrele(tondp->ni_startdir); 1570 nfsvno_relpathbuf(tondp); 1571 out1: 1572 vrele(fromndp->ni_startdir); 1573 nfsvno_relpathbuf(fromndp); 1574 NFSEXITCODE(error); 1575 return (error); 1576 } 1577 1578 /* 1579 * Link vnode op. 1580 */ 1581 int 1582 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1583 struct thread *p, struct nfsexstuff *exp) 1584 { 1585 struct vnode *xp; 1586 int error = 0; 1587 1588 xp = ndp->ni_vp; 1589 if (xp != NULL) { 1590 error = EEXIST; 1591 } else { 1592 xp = ndp->ni_dvp; 1593 if (vp->v_mount != xp->v_mount) 1594 error = EXDEV; 1595 } 1596 if (!error) { 1597 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1598 if (!VN_IS_DOOMED(vp)) 1599 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1600 else 1601 error = EPERM; 1602 if (ndp->ni_dvp == vp) 1603 vrele(ndp->ni_dvp); 1604 else 1605 vput(ndp->ni_dvp); 1606 NFSVOPUNLOCK(vp); 1607 } else { 1608 if (ndp->ni_dvp == ndp->ni_vp) 1609 vrele(ndp->ni_dvp); 1610 else 1611 vput(ndp->ni_dvp); 1612 if (ndp->ni_vp) 1613 vrele(ndp->ni_vp); 1614 } 1615 nfsvno_relpathbuf(ndp); 1616 NFSEXITCODE(error); 1617 return (error); 1618 } 1619 1620 /* 1621 * Do the fsync() appropriate for the commit. 1622 */ 1623 int 1624 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1625 struct thread *td) 1626 { 1627 int error = 0; 1628 1629 /* 1630 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1631 * file is done. At this time VOP_FSYNC does not accept offset and 1632 * byte count parameters so call VOP_FSYNC the whole file for now. 1633 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1634 * File systems that do not use the buffer cache (as indicated 1635 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1636 */ 1637 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1638 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1639 /* 1640 * Give up and do the whole thing 1641 */ 1642 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1643 VM_OBJECT_WLOCK(vp->v_object); 1644 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1645 VM_OBJECT_WUNLOCK(vp->v_object); 1646 } 1647 error = VOP_FSYNC(vp, MNT_WAIT, td); 1648 } else { 1649 /* 1650 * Locate and synchronously write any buffers that fall 1651 * into the requested range. Note: we are assuming that 1652 * f_iosize is a power of 2. 1653 */ 1654 int iosize = vp->v_mount->mnt_stat.f_iosize; 1655 int iomask = iosize - 1; 1656 struct bufobj *bo; 1657 daddr_t lblkno; 1658 1659 /* 1660 * Align to iosize boundary, super-align to page boundary. 1661 */ 1662 if (off & iomask) { 1663 cnt += off & iomask; 1664 off &= ~(u_quad_t)iomask; 1665 } 1666 if (off & PAGE_MASK) { 1667 cnt += off & PAGE_MASK; 1668 off &= ~(u_quad_t)PAGE_MASK; 1669 } 1670 lblkno = off / iosize; 1671 1672 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1673 VM_OBJECT_WLOCK(vp->v_object); 1674 vm_object_page_clean(vp->v_object, off, off + cnt, 1675 OBJPC_SYNC); 1676 VM_OBJECT_WUNLOCK(vp->v_object); 1677 } 1678 1679 bo = &vp->v_bufobj; 1680 BO_LOCK(bo); 1681 while (cnt > 0) { 1682 struct buf *bp; 1683 1684 /* 1685 * If we have a buffer and it is marked B_DELWRI we 1686 * have to lock and write it. Otherwise the prior 1687 * write is assumed to have already been committed. 1688 * 1689 * gbincore() can return invalid buffers now so we 1690 * have to check that bit as well (though B_DELWRI 1691 * should not be set if B_INVAL is set there could be 1692 * a race here since we haven't locked the buffer). 1693 */ 1694 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1695 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1696 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1697 BO_LOCK(bo); 1698 continue; /* retry */ 1699 } 1700 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1701 B_DELWRI) { 1702 bremfree(bp); 1703 bp->b_flags &= ~B_ASYNC; 1704 bwrite(bp); 1705 ++nfs_commit_miss; 1706 } else 1707 BUF_UNLOCK(bp); 1708 BO_LOCK(bo); 1709 } 1710 ++nfs_commit_blks; 1711 if (cnt < iosize) 1712 break; 1713 cnt -= iosize; 1714 ++lblkno; 1715 } 1716 BO_UNLOCK(bo); 1717 } 1718 NFSEXITCODE(error); 1719 return (error); 1720 } 1721 1722 /* 1723 * Statfs vnode op. 1724 */ 1725 int 1726 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1727 { 1728 struct statfs *tsf; 1729 int error; 1730 1731 tsf = NULL; 1732 if (nfsrv_devidcnt > 0) { 1733 /* For a pNFS service, get the DS numbers. */ 1734 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1735 error = nfsrv_pnfsstatfs(tsf, vp->v_mount); 1736 if (error != 0) { 1737 free(tsf, M_TEMP); 1738 tsf = NULL; 1739 } 1740 } 1741 error = VFS_STATFS(vp->v_mount, sf); 1742 if (error == 0) { 1743 if (tsf != NULL) { 1744 sf->f_blocks = tsf->f_blocks; 1745 sf->f_bavail = tsf->f_bavail; 1746 sf->f_bfree = tsf->f_bfree; 1747 sf->f_bsize = tsf->f_bsize; 1748 } 1749 /* 1750 * Since NFS handles these values as unsigned on the 1751 * wire, there is no way to represent negative values, 1752 * so set them to 0. Without this, they will appear 1753 * to be very large positive values for clients like 1754 * Solaris10. 1755 */ 1756 if (sf->f_bavail < 0) 1757 sf->f_bavail = 0; 1758 if (sf->f_ffree < 0) 1759 sf->f_ffree = 0; 1760 } 1761 free(tsf, M_TEMP); 1762 NFSEXITCODE(error); 1763 return (error); 1764 } 1765 1766 /* 1767 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1768 * must handle nfsrv_opencheck() calls after any other access checks. 1769 */ 1770 void 1771 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1772 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1773 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1774 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, 1775 struct nfsexstuff *exp, struct vnode **vpp) 1776 { 1777 struct vnode *vp = NULL; 1778 u_quad_t tempsize; 1779 struct nfsexstuff nes; 1780 struct thread *p = curthread; 1781 1782 if (ndp->ni_vp == NULL) 1783 nd->nd_repstat = nfsrv_opencheck(clientid, 1784 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1785 if (!nd->nd_repstat) { 1786 if (ndp->ni_vp == NULL) { 1787 vrele(ndp->ni_startdir); 1788 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1789 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1790 /* For a pNFS server, create the data file on a DS. */ 1791 if (nd->nd_repstat == 0) { 1792 /* 1793 * Create a data file on a DS for a pNFS server. 1794 * This function just returns if not 1795 * running a pNFS DS or the creation fails. 1796 */ 1797 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1798 cred, p); 1799 } 1800 vput(ndp->ni_dvp); 1801 nfsvno_relpathbuf(ndp); 1802 if (!nd->nd_repstat) { 1803 if (*exclusive_flagp) { 1804 *exclusive_flagp = 0; 1805 NFSVNO_ATTRINIT(nvap); 1806 nvap->na_atime.tv_sec = cverf[0]; 1807 nvap->na_atime.tv_nsec = cverf[1]; 1808 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1809 &nvap->na_vattr, cred); 1810 if (nd->nd_repstat != 0) { 1811 vput(ndp->ni_vp); 1812 ndp->ni_vp = NULL; 1813 nd->nd_repstat = NFSERR_NOTSUPP; 1814 } else 1815 NFSSETBIT_ATTRBIT(attrbitp, 1816 NFSATTRBIT_TIMEACCESS); 1817 } else { 1818 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1819 aclp, p, attrbitp, exp); 1820 } 1821 } 1822 vp = ndp->ni_vp; 1823 } else { 1824 if (ndp->ni_startdir) 1825 vrele(ndp->ni_startdir); 1826 nfsvno_relpathbuf(ndp); 1827 vp = ndp->ni_vp; 1828 if (create == NFSV4OPEN_CREATE) { 1829 if (ndp->ni_dvp == vp) 1830 vrele(ndp->ni_dvp); 1831 else 1832 vput(ndp->ni_dvp); 1833 } 1834 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1835 if (ndp->ni_cnd.cn_flags & RDONLY) 1836 NFSVNO_SETEXRDONLY(&nes); 1837 else 1838 NFSVNO_EXINIT(&nes); 1839 nd->nd_repstat = nfsvno_accchk(vp, 1840 VWRITE, cred, &nes, p, 1841 NFSACCCHK_NOOVERRIDE, 1842 NFSACCCHK_VPISLOCKED, NULL); 1843 nd->nd_repstat = nfsrv_opencheck(clientid, 1844 stateidp, stp, vp, nd, p, nd->nd_repstat); 1845 if (!nd->nd_repstat) { 1846 tempsize = nvap->na_size; 1847 NFSVNO_ATTRINIT(nvap); 1848 nvap->na_size = tempsize; 1849 nd->nd_repstat = VOP_SETATTR(vp, 1850 &nvap->na_vattr, cred); 1851 } 1852 } else if (vp->v_type == VREG) { 1853 nd->nd_repstat = nfsrv_opencheck(clientid, 1854 stateidp, stp, vp, nd, p, nd->nd_repstat); 1855 } 1856 } 1857 } else { 1858 if (ndp->ni_cnd.cn_flags & HASBUF) 1859 nfsvno_relpathbuf(ndp); 1860 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1861 vrele(ndp->ni_startdir); 1862 if (ndp->ni_dvp == ndp->ni_vp) 1863 vrele(ndp->ni_dvp); 1864 else 1865 vput(ndp->ni_dvp); 1866 if (ndp->ni_vp) 1867 vput(ndp->ni_vp); 1868 } 1869 } 1870 *vpp = vp; 1871 1872 NFSEXITCODE2(0, nd); 1873 } 1874 1875 /* 1876 * Updates the file rev and sets the mtime and ctime 1877 * to the current clock time, returning the va_filerev and va_Xtime 1878 * values. 1879 * Return ESTALE to indicate the vnode is VIRF_DOOMED. 1880 */ 1881 int 1882 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1883 struct nfsrv_descript *nd, struct thread *p) 1884 { 1885 struct vattr va; 1886 1887 VATTR_NULL(&va); 1888 vfs_timestamp(&va.va_mtime); 1889 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1890 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1891 if (VN_IS_DOOMED(vp)) 1892 return (ESTALE); 1893 } 1894 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1895 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1896 return (0); 1897 } 1898 1899 /* 1900 * Glue routine to nfsv4_fillattr(). 1901 */ 1902 int 1903 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1904 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1905 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1906 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1907 { 1908 struct statfs *sf; 1909 int error; 1910 1911 sf = NULL; 1912 if (nfsrv_devidcnt > 0 && 1913 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1914 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 1915 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 1916 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 1917 error = nfsrv_pnfsstatfs(sf, mp); 1918 if (error != 0) { 1919 free(sf, M_TEMP); 1920 sf = NULL; 1921 } 1922 } 1923 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 1924 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 1925 mounted_on_fileno, sf); 1926 free(sf, M_TEMP); 1927 NFSEXITCODE2(0, nd); 1928 return (error); 1929 } 1930 1931 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 1932 /* 1933 * nfs readdir service 1934 * - mallocs what it thinks is enough to read 1935 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 1936 * - calls VOP_READDIR() 1937 * - loops around building the reply 1938 * if the output generated exceeds count break out of loop 1939 * The NFSM_CLGET macro is used here so that the reply will be packed 1940 * tightly in mbuf clusters. 1941 * - it trims out records with d_fileno == 0 1942 * this doesn't matter for Unix clients, but they might confuse clients 1943 * for other os'. 1944 * - it trims out records with d_type == DT_WHT 1945 * these cannot be seen through NFS (unless we extend the protocol) 1946 * The alternate call nfsrvd_readdirplus() does lookups as well. 1947 * PS: The NFS protocol spec. does not clarify what the "count" byte 1948 * argument is a count of.. just name strings and file id's or the 1949 * entire reply rpc or ... 1950 * I tried just file name and id sizes and it confused the Sun client, 1951 * so I am using the full rpc size now. The "paranoia.." comment refers 1952 * to including the status longwords that are not a part of the dir. 1953 * "entry" structures, but are in the rpc. 1954 */ 1955 int 1956 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 1957 struct vnode *vp, struct nfsexstuff *exp) 1958 { 1959 struct dirent *dp; 1960 u_int32_t *tl; 1961 int dirlen; 1962 char *cpos, *cend, *rbuf; 1963 struct nfsvattr at; 1964 int nlen, error = 0, getret = 1; 1965 int siz, cnt, fullsiz, eofflag, ncookies; 1966 u_int64_t off, toff, verf __unused; 1967 u_long *cookies = NULL, *cookiep; 1968 struct uio io; 1969 struct iovec iv; 1970 int is_ufs; 1971 struct thread *p = curthread; 1972 1973 if (nd->nd_repstat) { 1974 nfsrv_postopattr(nd, getret, &at); 1975 goto out; 1976 } 1977 if (nd->nd_flag & ND_NFSV2) { 1978 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1979 off = fxdr_unsigned(u_quad_t, *tl++); 1980 } else { 1981 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1982 off = fxdr_hyper(tl); 1983 tl += 2; 1984 verf = fxdr_hyper(tl); 1985 tl += 2; 1986 } 1987 toff = off; 1988 cnt = fxdr_unsigned(int, *tl); 1989 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1990 cnt = NFS_SRVMAXDATA(nd); 1991 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1992 fullsiz = siz; 1993 if (nd->nd_flag & ND_NFSV3) { 1994 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 1995 NULL); 1996 #if 0 1997 /* 1998 * va_filerev is not sufficient as a cookie verifier, 1999 * since it is not supposed to change when entries are 2000 * removed/added unless that offset cookies returned to 2001 * the client are no longer valid. 2002 */ 2003 if (!nd->nd_repstat && toff && verf != at.na_filerev) 2004 nd->nd_repstat = NFSERR_BAD_COOKIE; 2005 #endif 2006 } 2007 if (!nd->nd_repstat && vp->v_type != VDIR) 2008 nd->nd_repstat = NFSERR_NOTDIR; 2009 if (nd->nd_repstat == 0 && cnt == 0) { 2010 if (nd->nd_flag & ND_NFSV2) 2011 /* NFSv2 does not have NFSERR_TOOSMALL */ 2012 nd->nd_repstat = EPERM; 2013 else 2014 nd->nd_repstat = NFSERR_TOOSMALL; 2015 } 2016 if (!nd->nd_repstat) 2017 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2018 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2019 NFSACCCHK_VPISLOCKED, NULL); 2020 if (nd->nd_repstat) { 2021 vput(vp); 2022 if (nd->nd_flag & ND_NFSV3) 2023 nfsrv_postopattr(nd, getret, &at); 2024 goto out; 2025 } 2026 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2027 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2028 again: 2029 eofflag = 0; 2030 if (cookies) { 2031 free(cookies, M_TEMP); 2032 cookies = NULL; 2033 } 2034 2035 iv.iov_base = rbuf; 2036 iv.iov_len = siz; 2037 io.uio_iov = &iv; 2038 io.uio_iovcnt = 1; 2039 io.uio_offset = (off_t)off; 2040 io.uio_resid = siz; 2041 io.uio_segflg = UIO_SYSSPACE; 2042 io.uio_rw = UIO_READ; 2043 io.uio_td = NULL; 2044 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2045 &cookies); 2046 off = (u_int64_t)io.uio_offset; 2047 if (io.uio_resid) 2048 siz -= io.uio_resid; 2049 2050 if (!cookies && !nd->nd_repstat) 2051 nd->nd_repstat = NFSERR_PERM; 2052 if (nd->nd_flag & ND_NFSV3) { 2053 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2054 if (!nd->nd_repstat) 2055 nd->nd_repstat = getret; 2056 } 2057 2058 /* 2059 * Handles the failed cases. nd->nd_repstat == 0 past here. 2060 */ 2061 if (nd->nd_repstat) { 2062 vput(vp); 2063 free(rbuf, M_TEMP); 2064 if (cookies) 2065 free(cookies, M_TEMP); 2066 if (nd->nd_flag & ND_NFSV3) 2067 nfsrv_postopattr(nd, getret, &at); 2068 goto out; 2069 } 2070 /* 2071 * If nothing read, return eof 2072 * rpc reply 2073 */ 2074 if (siz == 0) { 2075 vput(vp); 2076 if (nd->nd_flag & ND_NFSV2) { 2077 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2078 } else { 2079 nfsrv_postopattr(nd, getret, &at); 2080 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2081 txdr_hyper(at.na_filerev, tl); 2082 tl += 2; 2083 } 2084 *tl++ = newnfs_false; 2085 *tl = newnfs_true; 2086 free(rbuf, M_TEMP); 2087 free(cookies, M_TEMP); 2088 goto out; 2089 } 2090 2091 /* 2092 * Check for degenerate cases of nothing useful read. 2093 * If so go try again 2094 */ 2095 cpos = rbuf; 2096 cend = rbuf + siz; 2097 dp = (struct dirent *)cpos; 2098 cookiep = cookies; 2099 2100 /* 2101 * For some reason FreeBSD's ufs_readdir() chooses to back the 2102 * directory offset up to a block boundary, so it is necessary to 2103 * skip over the records that precede the requested offset. This 2104 * requires the assumption that file offset cookies monotonically 2105 * increase. 2106 */ 2107 while (cpos < cend && ncookies > 0 && 2108 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2109 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 2110 cpos += dp->d_reclen; 2111 dp = (struct dirent *)cpos; 2112 cookiep++; 2113 ncookies--; 2114 } 2115 if (cpos >= cend || ncookies == 0) { 2116 siz = fullsiz; 2117 toff = off; 2118 goto again; 2119 } 2120 vput(vp); 2121 2122 /* 2123 * If cnt > MCLBYTES and the reply will not be saved, use 2124 * ext_pgs mbufs for TLS. 2125 * For NFSv4.0, we do not know for sure if the reply will 2126 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2127 */ 2128 if (cnt > MCLBYTES && siz > MCLBYTES && 2129 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2130 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2131 nd->nd_flag |= ND_EXTPG; 2132 2133 /* 2134 * dirlen is the size of the reply, including all XDR and must 2135 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 2136 * if the XDR should be included in "count", but to be safe, we do. 2137 * (Include the two booleans at the end of the reply in dirlen now.) 2138 */ 2139 if (nd->nd_flag & ND_NFSV3) { 2140 nfsrv_postopattr(nd, getret, &at); 2141 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2142 txdr_hyper(at.na_filerev, tl); 2143 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2144 } else { 2145 dirlen = 2 * NFSX_UNSIGNED; 2146 } 2147 2148 /* Loop through the records and build reply */ 2149 while (cpos < cend && ncookies > 0) { 2150 nlen = dp->d_namlen; 2151 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2152 nlen <= NFS_MAXNAMLEN) { 2153 if (nd->nd_flag & ND_NFSV3) 2154 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2155 else 2156 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2157 if (dirlen > cnt) { 2158 eofflag = 0; 2159 break; 2160 } 2161 2162 /* 2163 * Build the directory record xdr from 2164 * the dirent entry. 2165 */ 2166 if (nd->nd_flag & ND_NFSV3) { 2167 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2168 *tl++ = newnfs_true; 2169 *tl++ = 0; 2170 } else { 2171 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2172 *tl++ = newnfs_true; 2173 } 2174 *tl = txdr_unsigned(dp->d_fileno); 2175 (void) nfsm_strtom(nd, dp->d_name, nlen); 2176 if (nd->nd_flag & ND_NFSV3) { 2177 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2178 *tl++ = 0; 2179 } else 2180 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2181 *tl = txdr_unsigned(*cookiep); 2182 } 2183 cpos += dp->d_reclen; 2184 dp = (struct dirent *)cpos; 2185 cookiep++; 2186 ncookies--; 2187 } 2188 if (cpos < cend) 2189 eofflag = 0; 2190 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2191 *tl++ = newnfs_false; 2192 if (eofflag) 2193 *tl = newnfs_true; 2194 else 2195 *tl = newnfs_false; 2196 free(rbuf, M_TEMP); 2197 free(cookies, M_TEMP); 2198 2199 out: 2200 NFSEXITCODE2(0, nd); 2201 return (0); 2202 nfsmout: 2203 vput(vp); 2204 NFSEXITCODE2(error, nd); 2205 return (error); 2206 } 2207 2208 /* 2209 * Readdirplus for V3 and Readdir for V4. 2210 */ 2211 int 2212 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2213 struct vnode *vp, struct nfsexstuff *exp) 2214 { 2215 struct dirent *dp; 2216 u_int32_t *tl; 2217 int dirlen; 2218 char *cpos, *cend, *rbuf; 2219 struct vnode *nvp; 2220 fhandle_t nfh; 2221 struct nfsvattr nva, at, *nvap = &nva; 2222 struct mbuf *mb0, *mb1; 2223 struct nfsreferral *refp; 2224 int nlen, r, error = 0, getret = 1, usevget = 1; 2225 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2226 caddr_t bpos0, bpos1; 2227 u_int64_t off, toff, verf; 2228 u_long *cookies = NULL, *cookiep; 2229 nfsattrbit_t attrbits, rderrbits, savbits; 2230 struct uio io; 2231 struct iovec iv; 2232 struct componentname cn; 2233 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2234 struct mount *mp, *new_mp; 2235 uint64_t mounted_on_fileno; 2236 struct thread *p = curthread; 2237 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; 2238 2239 if (nd->nd_repstat) { 2240 nfsrv_postopattr(nd, getret, &at); 2241 goto out; 2242 } 2243 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2244 off = fxdr_hyper(tl); 2245 toff = off; 2246 tl += 2; 2247 verf = fxdr_hyper(tl); 2248 tl += 2; 2249 siz = fxdr_unsigned(int, *tl++); 2250 cnt = fxdr_unsigned(int, *tl); 2251 2252 /* 2253 * Use the server's maximum data transfer size as the upper bound 2254 * on reply datalen. 2255 */ 2256 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2257 cnt = NFS_SRVMAXDATA(nd); 2258 2259 /* 2260 * siz is a "hint" of how much directory information (name, fileid, 2261 * cookie) should be in the reply. At least one client "hints" 0, 2262 * so I set it to cnt for that case. I also round it up to the 2263 * next multiple of DIRBLKSIZ. 2264 * Since the size of a Readdirplus directory entry reply will always 2265 * be greater than a directory entry returned by VOP_READDIR(), it 2266 * does not make sense to read more than NFS_SRVMAXDATA() via 2267 * VOP_READDIR(). 2268 */ 2269 if (siz <= 0) 2270 siz = cnt; 2271 else if (siz > NFS_SRVMAXDATA(nd)) 2272 siz = NFS_SRVMAXDATA(nd); 2273 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2274 2275 if (nd->nd_flag & ND_NFSV4) { 2276 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2277 if (error) 2278 goto nfsmout; 2279 NFSSET_ATTRBIT(&savbits, &attrbits); 2280 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); 2281 NFSZERO_ATTRBIT(&rderrbits); 2282 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2283 } else { 2284 NFSZERO_ATTRBIT(&attrbits); 2285 } 2286 fullsiz = siz; 2287 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2288 #if 0 2289 if (!nd->nd_repstat) { 2290 if (off && verf != at.na_filerev) { 2291 /* 2292 * va_filerev is not sufficient as a cookie verifier, 2293 * since it is not supposed to change when entries are 2294 * removed/added unless that offset cookies returned to 2295 * the client are no longer valid. 2296 */ 2297 if (nd->nd_flag & ND_NFSV4) { 2298 nd->nd_repstat = NFSERR_NOTSAME; 2299 } else { 2300 nd->nd_repstat = NFSERR_BAD_COOKIE; 2301 } 2302 } 2303 } 2304 #endif 2305 if (!nd->nd_repstat && vp->v_type != VDIR) 2306 nd->nd_repstat = NFSERR_NOTDIR; 2307 if (!nd->nd_repstat && cnt == 0) 2308 nd->nd_repstat = NFSERR_TOOSMALL; 2309 if (!nd->nd_repstat) 2310 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2311 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2312 NFSACCCHK_VPISLOCKED, NULL); 2313 if (nd->nd_repstat) { 2314 vput(vp); 2315 if (nd->nd_flag & ND_NFSV3) 2316 nfsrv_postopattr(nd, getret, &at); 2317 goto out; 2318 } 2319 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2320 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2321 2322 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2323 again: 2324 eofflag = 0; 2325 if (cookies) { 2326 free(cookies, M_TEMP); 2327 cookies = NULL; 2328 } 2329 2330 iv.iov_base = rbuf; 2331 iv.iov_len = siz; 2332 io.uio_iov = &iv; 2333 io.uio_iovcnt = 1; 2334 io.uio_offset = (off_t)off; 2335 io.uio_resid = siz; 2336 io.uio_segflg = UIO_SYSSPACE; 2337 io.uio_rw = UIO_READ; 2338 io.uio_td = NULL; 2339 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2340 &cookies); 2341 off = (u_int64_t)io.uio_offset; 2342 if (io.uio_resid) 2343 siz -= io.uio_resid; 2344 2345 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2346 2347 if (!cookies && !nd->nd_repstat) 2348 nd->nd_repstat = NFSERR_PERM; 2349 if (!nd->nd_repstat) 2350 nd->nd_repstat = getret; 2351 if (nd->nd_repstat) { 2352 vput(vp); 2353 if (cookies) 2354 free(cookies, M_TEMP); 2355 free(rbuf, M_TEMP); 2356 if (nd->nd_flag & ND_NFSV3) 2357 nfsrv_postopattr(nd, getret, &at); 2358 goto out; 2359 } 2360 /* 2361 * If nothing read, return eof 2362 * rpc reply 2363 */ 2364 if (siz == 0) { 2365 vput(vp); 2366 if (nd->nd_flag & ND_NFSV3) 2367 nfsrv_postopattr(nd, getret, &at); 2368 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2369 txdr_hyper(at.na_filerev, tl); 2370 tl += 2; 2371 *tl++ = newnfs_false; 2372 *tl = newnfs_true; 2373 free(cookies, M_TEMP); 2374 free(rbuf, M_TEMP); 2375 goto out; 2376 } 2377 2378 /* 2379 * Check for degenerate cases of nothing useful read. 2380 * If so go try again 2381 */ 2382 cpos = rbuf; 2383 cend = rbuf + siz; 2384 dp = (struct dirent *)cpos; 2385 cookiep = cookies; 2386 2387 /* 2388 * For some reason FreeBSD's ufs_readdir() chooses to back the 2389 * directory offset up to a block boundary, so it is necessary to 2390 * skip over the records that precede the requested offset. This 2391 * requires the assumption that file offset cookies monotonically 2392 * increase. 2393 */ 2394 while (cpos < cend && ncookies > 0 && 2395 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2396 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2397 ((nd->nd_flag & ND_NFSV4) && 2398 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2399 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2400 cpos += dp->d_reclen; 2401 dp = (struct dirent *)cpos; 2402 cookiep++; 2403 ncookies--; 2404 } 2405 if (cpos >= cend || ncookies == 0) { 2406 siz = fullsiz; 2407 toff = off; 2408 goto again; 2409 } 2410 2411 /* 2412 * Busy the file system so that the mount point won't go away 2413 * and, as such, VFS_VGET() can be used safely. 2414 */ 2415 mp = vp->v_mount; 2416 vfs_ref(mp); 2417 NFSVOPUNLOCK(vp); 2418 nd->nd_repstat = vfs_busy(mp, 0); 2419 vfs_rel(mp); 2420 if (nd->nd_repstat != 0) { 2421 vrele(vp); 2422 free(cookies, M_TEMP); 2423 free(rbuf, M_TEMP); 2424 if (nd->nd_flag & ND_NFSV3) 2425 nfsrv_postopattr(nd, getret, &at); 2426 goto out; 2427 } 2428 2429 /* 2430 * Check to see if entries in this directory can be safely acquired 2431 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2432 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2433 * automount of the snapshot directory that is required will 2434 * be done. 2435 * This needs to be done here for NFSv4, since NFSv4 never does 2436 * a VFS_VGET() for "." or "..". 2437 */ 2438 if (is_zfs == 1) { 2439 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2440 if (r == EOPNOTSUPP) { 2441 usevget = 0; 2442 cn.cn_nameiop = LOOKUP; 2443 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2444 cn.cn_cred = nd->nd_cred; 2445 cn.cn_thread = p; 2446 } else if (r == 0) 2447 vput(nvp); 2448 } 2449 2450 /* 2451 * If the reply is likely to exceed MCLBYTES and the reply will 2452 * not be saved, use ext_pgs mbufs for TLS. 2453 * It is difficult to predict how large each entry will be and 2454 * how many entries have been read, so just assume the directory 2455 * entries grow by a factor of 4 when attributes are included. 2456 * For NFSv4.0, we do not know for sure if the reply will 2457 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2458 */ 2459 if (cnt > MCLBYTES && siz > MCLBYTES / 4 && 2460 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2461 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2462 nd->nd_flag |= ND_EXTPG; 2463 2464 /* 2465 * Save this position, in case there is an error before one entry 2466 * is created. 2467 */ 2468 mb0 = nd->nd_mb; 2469 bpos0 = nd->nd_bpos; 2470 bextpg0 = nd->nd_bextpg; 2471 bextpgsiz0 = nd->nd_bextpgsiz; 2472 2473 /* 2474 * Fill in the first part of the reply. 2475 * dirlen is the reply length in bytes and cannot exceed cnt. 2476 * (Include the two booleans at the end of the reply in dirlen now, 2477 * so we recognize when we have exceeded cnt.) 2478 */ 2479 if (nd->nd_flag & ND_NFSV3) { 2480 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2481 nfsrv_postopattr(nd, getret, &at); 2482 } else { 2483 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2484 } 2485 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2486 txdr_hyper(at.na_filerev, tl); 2487 2488 /* 2489 * Save this position, in case there is an empty reply needed. 2490 */ 2491 mb1 = nd->nd_mb; 2492 bpos1 = nd->nd_bpos; 2493 bextpg1 = nd->nd_bextpg; 2494 bextpgsiz1 = nd->nd_bextpgsiz; 2495 2496 /* Loop through the records and build reply */ 2497 entrycnt = 0; 2498 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2499 nlen = dp->d_namlen; 2500 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2501 nlen <= NFS_MAXNAMLEN && 2502 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2503 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2504 || (nlen == 1 && dp->d_name[0] != '.'))) { 2505 /* 2506 * Save the current position in the reply, in case 2507 * this entry exceeds cnt. 2508 */ 2509 mb1 = nd->nd_mb; 2510 bpos1 = nd->nd_bpos; 2511 bextpg1 = nd->nd_bextpg; 2512 bextpgsiz1 = nd->nd_bextpgsiz; 2513 2514 /* 2515 * For readdir_and_lookup get the vnode using 2516 * the file number. 2517 */ 2518 nvp = NULL; 2519 refp = NULL; 2520 r = 0; 2521 at_root = 0; 2522 needs_unbusy = 0; 2523 new_mp = mp; 2524 mounted_on_fileno = (uint64_t)dp->d_fileno; 2525 if ((nd->nd_flag & ND_NFSV3) || 2526 NFSNONZERO_ATTRBIT(&savbits)) { 2527 if (nd->nd_flag & ND_NFSV4) 2528 refp = nfsv4root_getreferral(NULL, 2529 vp, dp->d_fileno); 2530 if (refp == NULL) { 2531 if (usevget) 2532 r = VFS_VGET(mp, dp->d_fileno, 2533 LK_SHARED, &nvp); 2534 else 2535 r = EOPNOTSUPP; 2536 if (r == EOPNOTSUPP) { 2537 if (usevget) { 2538 usevget = 0; 2539 cn.cn_nameiop = LOOKUP; 2540 cn.cn_lkflags = 2541 LK_SHARED | 2542 LK_RETRY; 2543 cn.cn_cred = 2544 nd->nd_cred; 2545 cn.cn_thread = p; 2546 } 2547 cn.cn_nameptr = dp->d_name; 2548 cn.cn_namelen = nlen; 2549 cn.cn_flags = ISLASTCN | 2550 NOFOLLOW | LOCKLEAF; 2551 if (nlen == 2 && 2552 dp->d_name[0] == '.' && 2553 dp->d_name[1] == '.') 2554 cn.cn_flags |= 2555 ISDOTDOT; 2556 if (NFSVOPLOCK(vp, LK_SHARED) 2557 != 0) { 2558 nd->nd_repstat = EPERM; 2559 break; 2560 } 2561 if ((vp->v_vflag & VV_ROOT) != 0 2562 && (cn.cn_flags & ISDOTDOT) 2563 != 0) { 2564 vref(vp); 2565 nvp = vp; 2566 r = 0; 2567 } else { 2568 r = VOP_LOOKUP(vp, &nvp, 2569 &cn); 2570 if (vp != nvp) 2571 NFSVOPUNLOCK(vp); 2572 } 2573 } 2574 2575 /* 2576 * For NFSv4, check to see if nvp is 2577 * a mount point and get the mount 2578 * point vnode, as required. 2579 */ 2580 if (r == 0 && 2581 nfsrv_enable_crossmntpt != 0 && 2582 (nd->nd_flag & ND_NFSV4) != 0 && 2583 nvp->v_type == VDIR && 2584 nvp->v_mountedhere != NULL) { 2585 new_mp = nvp->v_mountedhere; 2586 r = vfs_busy(new_mp, 0); 2587 vput(nvp); 2588 nvp = NULL; 2589 if (r == 0) { 2590 r = VFS_ROOT(new_mp, 2591 LK_SHARED, &nvp); 2592 needs_unbusy = 1; 2593 if (r == 0) 2594 at_root = 1; 2595 } 2596 } 2597 } 2598 2599 /* 2600 * If we failed to look up the entry, then it 2601 * has become invalid, most likely removed. 2602 */ 2603 if (r != 0) { 2604 if (needs_unbusy) 2605 vfs_unbusy(new_mp); 2606 goto invalid; 2607 } 2608 KASSERT(refp != NULL || nvp != NULL, 2609 ("%s: undetected lookup error", __func__)); 2610 2611 if (refp == NULL && 2612 ((nd->nd_flag & ND_NFSV3) || 2613 NFSNONZERO_ATTRBIT(&attrbits))) { 2614 r = nfsvno_getfh(nvp, &nfh, p); 2615 if (!r) 2616 r = nfsvno_getattr(nvp, nvap, nd, p, 2617 1, &attrbits); 2618 if (r == 0 && is_zfs == 1 && 2619 nfsrv_enable_crossmntpt != 0 && 2620 (nd->nd_flag & ND_NFSV4) != 0 && 2621 nvp->v_type == VDIR && 2622 vp->v_mount != nvp->v_mount) { 2623 /* 2624 * For a ZFS snapshot, there is a 2625 * pseudo mount that does not set 2626 * v_mountedhere, so it needs to 2627 * be detected via a different 2628 * mount structure. 2629 */ 2630 at_root = 1; 2631 if (new_mp == mp) 2632 new_mp = nvp->v_mount; 2633 } 2634 } 2635 2636 /* 2637 * If we failed to get attributes of the entry, 2638 * then just skip it for NFSv3 (the traditional 2639 * behavior in the old NFS server). 2640 * For NFSv4 the behavior is controlled by 2641 * RDATTRERROR: we either ignore the error or 2642 * fail the request. 2643 * Note that RDATTRERROR is never set for NFSv3. 2644 */ 2645 if (r != 0) { 2646 if (!NFSISSET_ATTRBIT(&attrbits, 2647 NFSATTRBIT_RDATTRERROR)) { 2648 vput(nvp); 2649 if (needs_unbusy != 0) 2650 vfs_unbusy(new_mp); 2651 if ((nd->nd_flag & ND_NFSV3)) 2652 goto invalid; 2653 nd->nd_repstat = r; 2654 break; 2655 } 2656 } 2657 } 2658 2659 /* 2660 * Build the directory record xdr 2661 */ 2662 if (nd->nd_flag & ND_NFSV3) { 2663 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2664 *tl++ = newnfs_true; 2665 *tl++ = 0; 2666 *tl = txdr_unsigned(dp->d_fileno); 2667 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2668 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2669 *tl++ = 0; 2670 *tl = txdr_unsigned(*cookiep); 2671 nfsrv_postopattr(nd, 0, nvap); 2672 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2673 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2674 if (nvp != NULL) 2675 vput(nvp); 2676 } else { 2677 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2678 *tl++ = newnfs_true; 2679 *tl++ = 0; 2680 *tl = txdr_unsigned(*cookiep); 2681 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2682 if (nvp != NULL) { 2683 supports_nfsv4acls = 2684 nfs_supportsnfsv4acls(nvp); 2685 NFSVOPUNLOCK(nvp); 2686 } else 2687 supports_nfsv4acls = 0; 2688 if (refp != NULL) { 2689 dirlen += nfsrv_putreferralattr(nd, 2690 &savbits, refp, 0, 2691 &nd->nd_repstat); 2692 if (nd->nd_repstat) { 2693 if (nvp != NULL) 2694 vrele(nvp); 2695 if (needs_unbusy != 0) 2696 vfs_unbusy(new_mp); 2697 break; 2698 } 2699 } else if (r) { 2700 dirlen += nfsvno_fillattr(nd, new_mp, 2701 nvp, nvap, &nfh, r, &rderrbits, 2702 nd->nd_cred, p, isdgram, 0, 2703 supports_nfsv4acls, at_root, 2704 mounted_on_fileno); 2705 } else { 2706 dirlen += nfsvno_fillattr(nd, new_mp, 2707 nvp, nvap, &nfh, r, &attrbits, 2708 nd->nd_cred, p, isdgram, 0, 2709 supports_nfsv4acls, at_root, 2710 mounted_on_fileno); 2711 } 2712 if (nvp != NULL) 2713 vrele(nvp); 2714 dirlen += (3 * NFSX_UNSIGNED); 2715 } 2716 if (needs_unbusy != 0) 2717 vfs_unbusy(new_mp); 2718 if (dirlen <= cnt) 2719 entrycnt++; 2720 } 2721 invalid: 2722 cpos += dp->d_reclen; 2723 dp = (struct dirent *)cpos; 2724 cookiep++; 2725 ncookies--; 2726 } 2727 vrele(vp); 2728 vfs_unbusy(mp); 2729 2730 /* 2731 * If dirlen > cnt, we must strip off the last entry. If that 2732 * results in an empty reply, report NFSERR_TOOSMALL. 2733 */ 2734 if (dirlen > cnt || nd->nd_repstat) { 2735 if (!nd->nd_repstat && entrycnt == 0) 2736 nd->nd_repstat = NFSERR_TOOSMALL; 2737 if (nd->nd_repstat) { 2738 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); 2739 if (nd->nd_flag & ND_NFSV3) 2740 nfsrv_postopattr(nd, getret, &at); 2741 } else 2742 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); 2743 eofflag = 0; 2744 } else if (cpos < cend) 2745 eofflag = 0; 2746 if (!nd->nd_repstat) { 2747 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2748 *tl++ = newnfs_false; 2749 if (eofflag) 2750 *tl = newnfs_true; 2751 else 2752 *tl = newnfs_false; 2753 } 2754 free(cookies, M_TEMP); 2755 free(rbuf, M_TEMP); 2756 2757 out: 2758 NFSEXITCODE2(0, nd); 2759 return (0); 2760 nfsmout: 2761 vput(vp); 2762 NFSEXITCODE2(error, nd); 2763 return (error); 2764 } 2765 2766 /* 2767 * Get the settable attributes out of the mbuf list. 2768 * (Return 0 or EBADRPC) 2769 */ 2770 int 2771 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2772 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2773 { 2774 u_int32_t *tl; 2775 struct nfsv2_sattr *sp; 2776 int error = 0, toclient = 0; 2777 2778 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2779 case ND_NFSV2: 2780 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2781 /* 2782 * Some old clients didn't fill in the high order 16bits. 2783 * --> check the low order 2 bytes for 0xffff 2784 */ 2785 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2786 nvap->na_mode = nfstov_mode(sp->sa_mode); 2787 if (sp->sa_uid != newnfs_xdrneg1) 2788 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2789 if (sp->sa_gid != newnfs_xdrneg1) 2790 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2791 if (sp->sa_size != newnfs_xdrneg1) 2792 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2793 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2794 #ifdef notyet 2795 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2796 #else 2797 nvap->na_atime.tv_sec = 2798 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2799 nvap->na_atime.tv_nsec = 0; 2800 #endif 2801 } 2802 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2803 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2804 break; 2805 case ND_NFSV3: 2806 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2807 if (*tl == newnfs_true) { 2808 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2809 nvap->na_mode = nfstov_mode(*tl); 2810 } 2811 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2812 if (*tl == newnfs_true) { 2813 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2814 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2815 } 2816 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2817 if (*tl == newnfs_true) { 2818 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2819 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2820 } 2821 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2822 if (*tl == newnfs_true) { 2823 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2824 nvap->na_size = fxdr_hyper(tl); 2825 } 2826 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2827 switch (fxdr_unsigned(int, *tl)) { 2828 case NFSV3SATTRTIME_TOCLIENT: 2829 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2830 fxdr_nfsv3time(tl, &nvap->na_atime); 2831 toclient = 1; 2832 break; 2833 case NFSV3SATTRTIME_TOSERVER: 2834 vfs_timestamp(&nvap->na_atime); 2835 nvap->na_vaflags |= VA_UTIMES_NULL; 2836 break; 2837 } 2838 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2839 switch (fxdr_unsigned(int, *tl)) { 2840 case NFSV3SATTRTIME_TOCLIENT: 2841 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2842 fxdr_nfsv3time(tl, &nvap->na_mtime); 2843 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2844 break; 2845 case NFSV3SATTRTIME_TOSERVER: 2846 vfs_timestamp(&nvap->na_mtime); 2847 if (!toclient) 2848 nvap->na_vaflags |= VA_UTIMES_NULL; 2849 break; 2850 } 2851 break; 2852 case ND_NFSV4: 2853 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2854 } 2855 nfsmout: 2856 NFSEXITCODE2(error, nd); 2857 return (error); 2858 } 2859 2860 /* 2861 * Handle the setable attributes for V4. 2862 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2863 */ 2864 int 2865 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2866 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2867 { 2868 u_int32_t *tl; 2869 int attrsum = 0; 2870 int i, j; 2871 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2872 int moderet, toclient = 0; 2873 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2874 uid_t uid; 2875 gid_t gid; 2876 u_short mode, mask; /* Same type as va_mode. */ 2877 struct vattr va; 2878 2879 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2880 if (error) 2881 goto nfsmout; 2882 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2883 attrsize = fxdr_unsigned(int, *tl); 2884 2885 /* 2886 * Loop around getting the setable attributes. If an unsupported 2887 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2888 */ 2889 if (retnotsup) { 2890 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2891 bitpos = NFSATTRBIT_MAX; 2892 } else { 2893 bitpos = 0; 2894 } 2895 moderet = 0; 2896 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2897 if (attrsum > attrsize) { 2898 error = NFSERR_BADXDR; 2899 goto nfsmout; 2900 } 2901 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2902 switch (bitpos) { 2903 case NFSATTRBIT_SIZE: 2904 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2905 if (vp != NULL && vp->v_type != VREG) { 2906 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 2907 NFSERR_INVAL; 2908 goto nfsmout; 2909 } 2910 nvap->na_size = fxdr_hyper(tl); 2911 attrsum += NFSX_HYPER; 2912 break; 2913 case NFSATTRBIT_ACL: 2914 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, 2915 p); 2916 if (error) 2917 goto nfsmout; 2918 if (aceerr && !nd->nd_repstat) 2919 nd->nd_repstat = aceerr; 2920 attrsum += aclsize; 2921 break; 2922 case NFSATTRBIT_ARCHIVE: 2923 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2924 if (!nd->nd_repstat) 2925 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2926 attrsum += NFSX_UNSIGNED; 2927 break; 2928 case NFSATTRBIT_HIDDEN: 2929 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2930 if (!nd->nd_repstat) 2931 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2932 attrsum += NFSX_UNSIGNED; 2933 break; 2934 case NFSATTRBIT_MIMETYPE: 2935 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2936 i = fxdr_unsigned(int, *tl); 2937 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 2938 if (error) 2939 goto nfsmout; 2940 if (!nd->nd_repstat) 2941 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2942 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 2943 break; 2944 case NFSATTRBIT_MODE: 2945 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ 2946 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2947 nvap->na_mode = nfstov_mode(*tl); 2948 attrsum += NFSX_UNSIGNED; 2949 break; 2950 case NFSATTRBIT_OWNER: 2951 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2952 j = fxdr_unsigned(int, *tl); 2953 if (j < 0) { 2954 error = NFSERR_BADXDR; 2955 goto nfsmout; 2956 } 2957 if (j > NFSV4_SMALLSTR) 2958 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2959 else 2960 cp = namestr; 2961 error = nfsrv_mtostr(nd, cp, j); 2962 if (error) { 2963 if (j > NFSV4_SMALLSTR) 2964 free(cp, M_NFSSTRING); 2965 goto nfsmout; 2966 } 2967 if (!nd->nd_repstat) { 2968 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, 2969 &uid); 2970 if (!nd->nd_repstat) 2971 nvap->na_uid = uid; 2972 } 2973 if (j > NFSV4_SMALLSTR) 2974 free(cp, M_NFSSTRING); 2975 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2976 break; 2977 case NFSATTRBIT_OWNERGROUP: 2978 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2979 j = fxdr_unsigned(int, *tl); 2980 if (j < 0) { 2981 error = NFSERR_BADXDR; 2982 goto nfsmout; 2983 } 2984 if (j > NFSV4_SMALLSTR) 2985 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2986 else 2987 cp = namestr; 2988 error = nfsrv_mtostr(nd, cp, j); 2989 if (error) { 2990 if (j > NFSV4_SMALLSTR) 2991 free(cp, M_NFSSTRING); 2992 goto nfsmout; 2993 } 2994 if (!nd->nd_repstat) { 2995 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, 2996 &gid); 2997 if (!nd->nd_repstat) 2998 nvap->na_gid = gid; 2999 } 3000 if (j > NFSV4_SMALLSTR) 3001 free(cp, M_NFSSTRING); 3002 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3003 break; 3004 case NFSATTRBIT_SYSTEM: 3005 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3006 if (!nd->nd_repstat) 3007 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3008 attrsum += NFSX_UNSIGNED; 3009 break; 3010 case NFSATTRBIT_TIMEACCESSSET: 3011 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3012 attrsum += NFSX_UNSIGNED; 3013 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3014 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3015 fxdr_nfsv4time(tl, &nvap->na_atime); 3016 toclient = 1; 3017 attrsum += NFSX_V4TIME; 3018 } else { 3019 vfs_timestamp(&nvap->na_atime); 3020 nvap->na_vaflags |= VA_UTIMES_NULL; 3021 } 3022 break; 3023 case NFSATTRBIT_TIMEBACKUP: 3024 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3025 if (!nd->nd_repstat) 3026 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3027 attrsum += NFSX_V4TIME; 3028 break; 3029 case NFSATTRBIT_TIMECREATE: 3030 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3031 fxdr_nfsv4time(tl, &nvap->na_btime); 3032 attrsum += NFSX_V4TIME; 3033 break; 3034 case NFSATTRBIT_TIMEMODIFYSET: 3035 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3036 attrsum += NFSX_UNSIGNED; 3037 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3038 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3039 fxdr_nfsv4time(tl, &nvap->na_mtime); 3040 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3041 attrsum += NFSX_V4TIME; 3042 } else { 3043 vfs_timestamp(&nvap->na_mtime); 3044 if (!toclient) 3045 nvap->na_vaflags |= VA_UTIMES_NULL; 3046 } 3047 break; 3048 case NFSATTRBIT_MODESETMASKED: 3049 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 3050 mode = fxdr_unsigned(u_short, *tl++); 3051 mask = fxdr_unsigned(u_short, *tl); 3052 /* 3053 * vp == NULL implies an Open/Create operation. 3054 * This attribute can only be used for Setattr and 3055 * only for NFSv4.1 or higher. 3056 * If moderet != 0, a mode attribute has also been 3057 * specified and this attribute cannot be done in the 3058 * same Setattr operation. 3059 */ 3060 if ((nd->nd_flag & ND_NFSV41) == 0) 3061 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3062 else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || 3063 vp == NULL) 3064 nd->nd_repstat = NFSERR_INVAL; 3065 else if (moderet == 0) 3066 moderet = VOP_GETATTR(vp, &va, nd->nd_cred); 3067 if (moderet == 0) 3068 nvap->na_mode = (mode & mask) | 3069 (va.va_mode & ~mask); 3070 else 3071 nd->nd_repstat = moderet; 3072 attrsum += 2 * NFSX_UNSIGNED; 3073 break; 3074 default: 3075 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3076 /* 3077 * set bitpos so we drop out of the loop. 3078 */ 3079 bitpos = NFSATTRBIT_MAX; 3080 break; 3081 } 3082 } 3083 3084 /* 3085 * some clients pad the attrlist, so we need to skip over the 3086 * padding. 3087 */ 3088 if (attrsum > attrsize) { 3089 error = NFSERR_BADXDR; 3090 } else { 3091 attrsize = NFSM_RNDUP(attrsize); 3092 if (attrsum < attrsize) 3093 error = nfsm_advance(nd, attrsize - attrsum, -1); 3094 } 3095 nfsmout: 3096 NFSEXITCODE2(error, nd); 3097 return (error); 3098 } 3099 3100 /* 3101 * Check/setup export credentials. 3102 */ 3103 int 3104 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 3105 struct ucred *credanon) 3106 { 3107 int error = 0; 3108 3109 /* 3110 * Check/setup credentials. 3111 */ 3112 if (nd->nd_flag & ND_GSS) 3113 exp->nes_exflag &= ~MNT_EXPORTANON; 3114 3115 /* 3116 * Check to see if the operation is allowed for this security flavor. 3117 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to 3118 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 3119 * Also, allow Secinfo, so that it can acquire the correct flavor(s). 3120 */ 3121 if (nfsvno_testexp(nd, exp) && 3122 nd->nd_procnum != NFSV4OP_SECINFO && 3123 nd->nd_procnum != NFSPROC_FSINFO) { 3124 if (nd->nd_flag & ND_NFSV4) 3125 error = NFSERR_WRONGSEC; 3126 else 3127 error = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3128 goto out; 3129 } 3130 3131 /* 3132 * Check to see if the file system is exported V4 only. 3133 */ 3134 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 3135 error = NFSERR_PROGNOTV4; 3136 goto out; 3137 } 3138 3139 /* 3140 * Now, map the user credentials. 3141 * (Note that ND_AUTHNONE will only be set for an NFSv3 3142 * Fsinfo RPC. If set for anything else, this code might need 3143 * to change.) 3144 */ 3145 if (NFSVNO_EXPORTED(exp)) { 3146 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 3147 NFSVNO_EXPORTANON(exp) || 3148 (nd->nd_flag & ND_AUTHNONE) != 0) { 3149 nd->nd_cred->cr_uid = credanon->cr_uid; 3150 nd->nd_cred->cr_gid = credanon->cr_gid; 3151 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 3152 credanon->cr_groups); 3153 } else if ((nd->nd_flag & ND_GSS) == 0) { 3154 /* 3155 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 3156 * if there is a replacement credential with a group 3157 * list set up by "nfsuserd -manage-gids". 3158 * If there is no replacement, nfsrv_getgrpscred() 3159 * simply returns its argument. 3160 */ 3161 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 3162 } 3163 } 3164 3165 out: 3166 NFSEXITCODE2(error, nd); 3167 return (error); 3168 } 3169 3170 /* 3171 * Check exports. 3172 */ 3173 int 3174 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 3175 struct ucred **credp) 3176 { 3177 int error; 3178 3179 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3180 &exp->nes_numsecflavor, exp->nes_secflavors); 3181 if (error) { 3182 if (nfs_rootfhset) { 3183 exp->nes_exflag = 0; 3184 exp->nes_numsecflavor = 0; 3185 error = 0; 3186 } 3187 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3188 MAXSECFLAVORS) { 3189 printf("nfsvno_checkexp: numsecflavors out of range\n"); 3190 exp->nes_numsecflavor = 0; 3191 error = EACCES; 3192 } 3193 NFSEXITCODE(error); 3194 return (error); 3195 } 3196 3197 /* 3198 * Get a vnode for a file handle and export stuff. 3199 */ 3200 int 3201 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 3202 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 3203 struct ucred **credp) 3204 { 3205 int error; 3206 3207 *credp = NULL; 3208 exp->nes_numsecflavor = 0; 3209 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 3210 if (error != 0) 3211 /* Make sure the server replies ESTALE to the client. */ 3212 error = ESTALE; 3213 if (nam && !error) { 3214 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3215 &exp->nes_numsecflavor, exp->nes_secflavors); 3216 if (error) { 3217 if (nfs_rootfhset) { 3218 exp->nes_exflag = 0; 3219 exp->nes_numsecflavor = 0; 3220 error = 0; 3221 } else { 3222 vput(*vpp); 3223 } 3224 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3225 MAXSECFLAVORS) { 3226 printf("nfsvno_fhtovp: numsecflavors out of range\n"); 3227 exp->nes_numsecflavor = 0; 3228 error = EACCES; 3229 vput(*vpp); 3230 } 3231 } 3232 NFSEXITCODE(error); 3233 return (error); 3234 } 3235 3236 /* 3237 * nfsd_fhtovp() - convert a fh to a vnode ptr 3238 * - look up fsid in mount list (if not found ret error) 3239 * - get vp and export rights by calling nfsvno_fhtovp() 3240 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 3241 * for AUTH_SYS 3242 * - if mpp != NULL, return the mount point so that it can 3243 * be used for vn_finished_write() by the caller 3244 */ 3245 void 3246 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 3247 struct vnode **vpp, struct nfsexstuff *exp, 3248 struct mount **mpp, int startwrite) 3249 { 3250 struct mount *mp; 3251 struct ucred *credanon; 3252 fhandle_t *fhp; 3253 3254 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3255 /* 3256 * Check for the special case of the nfsv4root_fh. 3257 */ 3258 mp = vfs_busyfs(&fhp->fh_fsid); 3259 if (mpp != NULL) 3260 *mpp = mp; 3261 if (mp == NULL) { 3262 *vpp = NULL; 3263 nd->nd_repstat = ESTALE; 3264 goto out; 3265 } 3266 3267 if (startwrite) { 3268 vn_start_write(NULL, mpp, V_WAIT); 3269 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3270 lktype = LK_EXCLUSIVE; 3271 } 3272 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3273 &credanon); 3274 vfs_unbusy(mp); 3275 3276 /* 3277 * For NFSv4 without a pseudo root fs, unexported file handles 3278 * can be returned, so that Lookup works everywhere. 3279 */ 3280 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3281 !(nd->nd_flag & ND_NFSV4)) { 3282 vput(*vpp); 3283 nd->nd_repstat = EACCES; 3284 } 3285 3286 /* 3287 * Personally, I've never seen any point in requiring a 3288 * reserved port#, since only in the rare case where the 3289 * clients are all boxes with secure system privileges, 3290 * does it provide any enhanced security, but... some people 3291 * believe it to be useful and keep putting this code back in. 3292 * (There is also some "security checker" out there that 3293 * complains if the nfs server doesn't enforce this.) 3294 * However, note the following: 3295 * RFC3530 (NFSv4) specifies that a reserved port# not be 3296 * required. 3297 * RFC2623 recommends that, if a reserved port# is checked for, 3298 * that there be a way to turn that off--> ifdef'd. 3299 */ 3300 #ifdef NFS_REQRSVPORT 3301 if (!nd->nd_repstat) { 3302 struct sockaddr_in *saddr; 3303 struct sockaddr_in6 *saddr6; 3304 3305 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3306 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3307 if (!(nd->nd_flag & ND_NFSV4) && 3308 ((saddr->sin_family == AF_INET && 3309 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3310 (saddr6->sin6_family == AF_INET6 && 3311 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3312 vput(*vpp); 3313 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3314 } 3315 } 3316 #endif /* NFS_REQRSVPORT */ 3317 3318 /* 3319 * Check/setup credentials. 3320 */ 3321 if (!nd->nd_repstat) { 3322 nd->nd_saveduid = nd->nd_cred->cr_uid; 3323 nd->nd_repstat = nfsd_excred(nd, exp, credanon); 3324 if (nd->nd_repstat) 3325 vput(*vpp); 3326 } 3327 if (credanon != NULL) 3328 crfree(credanon); 3329 if (nd->nd_repstat) { 3330 if (startwrite) 3331 vn_finished_write(mp); 3332 *vpp = NULL; 3333 if (mpp != NULL) 3334 *mpp = NULL; 3335 } 3336 3337 out: 3338 NFSEXITCODE2(0, nd); 3339 } 3340 3341 /* 3342 * glue for fp. 3343 */ 3344 static int 3345 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3346 { 3347 struct filedesc *fdp; 3348 struct file *fp; 3349 int error = 0; 3350 3351 fdp = p->td_proc->p_fd; 3352 if (fd < 0 || fd >= fdp->fd_nfiles || 3353 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3354 error = EBADF; 3355 goto out; 3356 } 3357 *fpp = fp; 3358 3359 out: 3360 NFSEXITCODE(error); 3361 return (error); 3362 } 3363 3364 /* 3365 * Called from nfssvc() to update the exports list. Just call 3366 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3367 * in the mount list. 3368 */ 3369 int 3370 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3371 { 3372 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3373 int error = 0; 3374 struct nameidata nd; 3375 fhandle_t fh; 3376 3377 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 3378 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3379 nfs_rootfhset = 0; 3380 else if (error == 0) { 3381 if (nfsexargp->fspec == NULL) { 3382 error = EPERM; 3383 goto out; 3384 } 3385 /* 3386 * If fspec != NULL, this is the v4root path. 3387 */ 3388 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 3389 nfsexargp->fspec, p); 3390 if ((error = namei(&nd)) != 0) 3391 goto out; 3392 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3393 vrele(nd.ni_vp); 3394 if (!error) { 3395 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 3396 NFSBCOPY((caddr_t)&fh, 3397 nfs_rootfh.nfsrvfh_data, 3398 sizeof (fhandle_t)); 3399 nfs_rootfhset = 1; 3400 } 3401 } 3402 3403 out: 3404 NFSEXITCODE(error); 3405 return (error); 3406 } 3407 3408 /* 3409 * This function needs to test to see if the system is near its limit 3410 * for memory allocation via malloc() or mget() and return True iff 3411 * either of these resources are near their limit. 3412 * XXX (For now, this is just a stub.) 3413 */ 3414 int nfsrv_testmalloclimit = 0; 3415 int 3416 nfsrv_mallocmget_limit(void) 3417 { 3418 static int printmesg = 0; 3419 static int testval = 1; 3420 3421 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3422 if ((printmesg++ % 100) == 0) 3423 printf("nfsd: malloc/mget near limit\n"); 3424 return (1); 3425 } 3426 return (0); 3427 } 3428 3429 /* 3430 * BSD specific initialization of a mount point. 3431 */ 3432 void 3433 nfsd_mntinit(void) 3434 { 3435 static int inited = 0; 3436 3437 if (inited) 3438 return; 3439 inited = 1; 3440 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3441 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 3442 TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); 3443 nfsv4root_mnt.mnt_export = NULL; 3444 TAILQ_INIT(&nfsv4root_opt); 3445 TAILQ_INIT(&nfsv4root_newopt); 3446 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 3447 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 3448 nfsv4root_mnt.mnt_nvnodelistsize = 0; 3449 nfsv4root_mnt.mnt_lazyvnodelistsize = 0; 3450 } 3451 3452 /* 3453 * Get a vnode for a file handle, without checking exports, etc. 3454 */ 3455 struct vnode * 3456 nfsvno_getvp(fhandle_t *fhp) 3457 { 3458 struct mount *mp; 3459 struct vnode *vp; 3460 int error; 3461 3462 mp = vfs_busyfs(&fhp->fh_fsid); 3463 if (mp == NULL) 3464 return (NULL); 3465 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3466 vfs_unbusy(mp); 3467 if (error) 3468 return (NULL); 3469 return (vp); 3470 } 3471 3472 /* 3473 * Do a local VOP_ADVLOCK(). 3474 */ 3475 int 3476 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3477 u_int64_t end, struct thread *td) 3478 { 3479 int error = 0; 3480 struct flock fl; 3481 u_int64_t tlen; 3482 3483 if (nfsrv_dolocallocks == 0) 3484 goto out; 3485 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3486 3487 fl.l_whence = SEEK_SET; 3488 fl.l_type = ftype; 3489 fl.l_start = (off_t)first; 3490 if (end == NFS64BITSSET) { 3491 fl.l_len = 0; 3492 } else { 3493 tlen = end - first; 3494 fl.l_len = (off_t)tlen; 3495 } 3496 /* 3497 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3498 * values for all calls, so that all locks will be held by the 3499 * nfsd server. (The nfsd server handles conflicts between the 3500 * various clients.) 3501 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3502 * bytes, so it can't be put in l_sysid. 3503 */ 3504 if (nfsv4_sysid == 0) 3505 nfsv4_sysid = nlm_acquire_next_sysid(); 3506 fl.l_pid = (pid_t)0; 3507 fl.l_sysid = (int)nfsv4_sysid; 3508 3509 if (ftype == F_UNLCK) 3510 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3511 (F_POSIX | F_REMOTE)); 3512 else 3513 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3514 (F_POSIX | F_REMOTE)); 3515 3516 out: 3517 NFSEXITCODE(error); 3518 return (error); 3519 } 3520 3521 /* 3522 * Check the nfsv4 root exports. 3523 */ 3524 int 3525 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3526 { 3527 struct ucred *credanon; 3528 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; 3529 uint64_t exflags; 3530 3531 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3532 &credanon, &numsecflavor, secflavors); 3533 if (error) { 3534 error = NFSERR_PROGUNAVAIL; 3535 goto out; 3536 } 3537 if (credanon != NULL) 3538 crfree(credanon); 3539 for (i = 0; i < numsecflavor; i++) { 3540 if (secflavors[i] == AUTH_SYS) 3541 nd->nd_flag |= ND_EXAUTHSYS; 3542 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3543 nd->nd_flag |= ND_EXGSS; 3544 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3545 nd->nd_flag |= ND_EXGSSINTEGRITY; 3546 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3547 nd->nd_flag |= ND_EXGSSPRIVACY; 3548 } 3549 3550 out: 3551 NFSEXITCODE(error); 3552 return (error); 3553 } 3554 3555 /* 3556 * Nfs server pseudo system call for the nfsd's 3557 */ 3558 /* 3559 * MPSAFE 3560 */ 3561 static int 3562 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3563 { 3564 struct file *fp; 3565 struct nfsd_addsock_args sockarg; 3566 struct nfsd_nfsd_args nfsdarg; 3567 struct nfsd_nfsd_oargs onfsdarg; 3568 struct nfsd_pnfsd_args pnfsdarg; 3569 struct vnode *vp, *nvp, *curdvp; 3570 struct pnfsdsfile *pf; 3571 struct nfsdevice *ds, *fds; 3572 cap_rights_t rights; 3573 int buflen, error, ret; 3574 char *buf, *cp, *cp2, *cp3; 3575 char fname[PNFS_FILENAME_LEN + 1]; 3576 3577 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3578 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3579 if (error) 3580 goto out; 3581 /* 3582 * Since we don't know what rights might be required, 3583 * pretend that we need them all. It is better to be too 3584 * careful than too reckless. 3585 */ 3586 error = fget(td, sockarg.sock, 3587 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 3588 if (error != 0) 3589 goto out; 3590 if (fp->f_type != DTYPE_SOCKET) { 3591 fdrop(fp, td); 3592 error = EPERM; 3593 goto out; 3594 } 3595 error = nfsrvd_addsock(fp); 3596 fdrop(fp, td); 3597 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3598 if (uap->argp == NULL) { 3599 error = EINVAL; 3600 goto out; 3601 } 3602 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3603 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3604 if (error == 0) { 3605 nfsdarg.principal = onfsdarg.principal; 3606 nfsdarg.minthreads = onfsdarg.minthreads; 3607 nfsdarg.maxthreads = onfsdarg.maxthreads; 3608 nfsdarg.version = 1; 3609 nfsdarg.addr = NULL; 3610 nfsdarg.addrlen = 0; 3611 nfsdarg.dnshost = NULL; 3612 nfsdarg.dnshostlen = 0; 3613 nfsdarg.dspath = NULL; 3614 nfsdarg.dspathlen = 0; 3615 nfsdarg.mdspath = NULL; 3616 nfsdarg.mdspathlen = 0; 3617 nfsdarg.mirrorcnt = 1; 3618 } 3619 } else 3620 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3621 if (error) 3622 goto out; 3623 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3624 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3625 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3626 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 3627 nfsdarg.mirrorcnt >= 1 && 3628 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3629 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3630 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 3631 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3632 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 3633 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3634 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 3635 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3636 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3637 if (error != 0) { 3638 free(cp, M_TEMP); 3639 goto out; 3640 } 3641 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3642 nfsdarg.addr = cp; 3643 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3644 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3645 if (error != 0) { 3646 free(nfsdarg.addr, M_TEMP); 3647 free(cp, M_TEMP); 3648 goto out; 3649 } 3650 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3651 nfsdarg.dnshost = cp; 3652 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3653 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3654 if (error != 0) { 3655 free(nfsdarg.addr, M_TEMP); 3656 free(nfsdarg.dnshost, M_TEMP); 3657 free(cp, M_TEMP); 3658 goto out; 3659 } 3660 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3661 nfsdarg.dspath = cp; 3662 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 3663 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 3664 if (error != 0) { 3665 free(nfsdarg.addr, M_TEMP); 3666 free(nfsdarg.dnshost, M_TEMP); 3667 free(nfsdarg.dspath, M_TEMP); 3668 free(cp, M_TEMP); 3669 goto out; 3670 } 3671 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 3672 nfsdarg.mdspath = cp; 3673 } else { 3674 nfsdarg.addr = NULL; 3675 nfsdarg.addrlen = 0; 3676 nfsdarg.dnshost = NULL; 3677 nfsdarg.dnshostlen = 0; 3678 nfsdarg.dspath = NULL; 3679 nfsdarg.dspathlen = 0; 3680 nfsdarg.mdspath = NULL; 3681 nfsdarg.mdspathlen = 0; 3682 nfsdarg.mirrorcnt = 1; 3683 } 3684 error = nfsrvd_nfsd(td, &nfsdarg); 3685 free(nfsdarg.addr, M_TEMP); 3686 free(nfsdarg.dnshost, M_TEMP); 3687 free(nfsdarg.dspath, M_TEMP); 3688 free(nfsdarg.mdspath, M_TEMP); 3689 } else if (uap->flag & NFSSVC_PNFSDS) { 3690 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3691 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 3692 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 3693 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3694 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3695 NULL); 3696 if (error == 0) 3697 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 3698 free(cp, M_TEMP); 3699 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3700 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3701 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3702 buf = malloc(buflen, M_TEMP, M_WAITOK); 3703 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3704 NULL); 3705 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3706 if (error == 0 && pnfsdarg.dspath != NULL) { 3707 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3708 error = copyinstr(pnfsdarg.dspath, cp2, 3709 PATH_MAX + 1, NULL); 3710 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3711 error); 3712 } else 3713 cp2 = NULL; 3714 if (error == 0 && pnfsdarg.curdspath != NULL) { 3715 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3716 error = copyinstr(pnfsdarg.curdspath, cp3, 3717 PATH_MAX + 1, NULL); 3718 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3719 error); 3720 } else 3721 cp3 = NULL; 3722 curdvp = NULL; 3723 fds = NULL; 3724 if (error == 0) 3725 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3726 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3727 &fds); 3728 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3729 if (error == 0) { 3730 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3731 printf("copymr: dsdir out of range\n"); 3732 pf->dsf_dir = 0; 3733 } 3734 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3735 error = nfsrv_copymr(vp, nvp, 3736 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3737 (struct pnfsdsfile *)buf, 3738 buflen / sizeof(*pf), td->td_ucred, td); 3739 vput(vp); 3740 vput(nvp); 3741 if (fds != NULL && error == 0) { 3742 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3743 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3744 if (ret == 0) { 3745 nfsrv_dsremove(curdvp, fname, 3746 td->td_ucred, td); 3747 NFSVOPUNLOCK(curdvp); 3748 } 3749 } 3750 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3751 } 3752 free(cp, M_TEMP); 3753 free(cp2, M_TEMP); 3754 free(cp3, M_TEMP); 3755 free(buf, M_TEMP); 3756 } 3757 } else { 3758 error = nfssvc_srvcall(td, uap, td->td_ucred); 3759 } 3760 3761 out: 3762 NFSEXITCODE(error); 3763 return (error); 3764 } 3765 3766 static int 3767 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3768 { 3769 struct nfsex_args export; 3770 struct nfsex_oldargs oexp; 3771 struct file *fp = NULL; 3772 int stablefd, i, len; 3773 struct nfsd_clid adminrevoke; 3774 struct nfsd_dumplist dumplist; 3775 struct nfsd_dumpclients *dumpclients; 3776 struct nfsd_dumplocklist dumplocklist; 3777 struct nfsd_dumplocks *dumplocks; 3778 struct nameidata nd; 3779 vnode_t vp; 3780 int error = EINVAL, igotlock; 3781 struct proc *procp; 3782 gid_t *grps; 3783 static int suspend_nfsd = 0; 3784 3785 if (uap->flag & NFSSVC_PUBLICFH) { 3786 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3787 sizeof (fhandle_t)); 3788 error = copyin(uap->argp, 3789 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3790 if (!error) 3791 nfs_pubfhset = 1; 3792 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3793 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { 3794 error = copyin(uap->argp,(caddr_t)&export, 3795 sizeof (struct nfsex_args)); 3796 if (!error) { 3797 grps = NULL; 3798 if (export.export.ex_ngroups > NGROUPS_MAX || 3799 export.export.ex_ngroups < 0) 3800 error = EINVAL; 3801 else if (export.export.ex_ngroups > 0) { 3802 grps = malloc(export.export.ex_ngroups * 3803 sizeof(gid_t), M_TEMP, M_WAITOK); 3804 error = copyin(export.export.ex_groups, grps, 3805 export.export.ex_ngroups * sizeof(gid_t)); 3806 export.export.ex_groups = grps; 3807 } else 3808 export.export.ex_groups = NULL; 3809 if (!error) 3810 error = nfsrv_v4rootexport(&export, cred, p); 3811 free(grps, M_TEMP); 3812 } 3813 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3814 NFSSVC_V4ROOTEXPORT) { 3815 error = copyin(uap->argp,(caddr_t)&oexp, 3816 sizeof (struct nfsex_oldargs)); 3817 if (!error) { 3818 memset(&export.export, 0, sizeof(export.export)); 3819 export.export.ex_flags = (uint64_t)oexp.export.ex_flags; 3820 export.export.ex_root = oexp.export.ex_root; 3821 export.export.ex_uid = oexp.export.ex_anon.cr_uid; 3822 export.export.ex_ngroups = 3823 oexp.export.ex_anon.cr_ngroups; 3824 export.export.ex_groups = NULL; 3825 if (export.export.ex_ngroups > XU_NGROUPS || 3826 export.export.ex_ngroups < 0) 3827 error = EINVAL; 3828 else if (export.export.ex_ngroups > 0) { 3829 export.export.ex_groups = malloc( 3830 export.export.ex_ngroups * sizeof(gid_t), 3831 M_TEMP, M_WAITOK); 3832 for (i = 0; i < export.export.ex_ngroups; i++) 3833 export.export.ex_groups[i] = 3834 oexp.export.ex_anon.cr_groups[i]; 3835 } 3836 export.export.ex_addr = oexp.export.ex_addr; 3837 export.export.ex_addrlen = oexp.export.ex_addrlen; 3838 export.export.ex_mask = oexp.export.ex_mask; 3839 export.export.ex_masklen = oexp.export.ex_masklen; 3840 export.export.ex_indexfile = oexp.export.ex_indexfile; 3841 export.export.ex_numsecflavors = 3842 oexp.export.ex_numsecflavors; 3843 if (export.export.ex_numsecflavors >= MAXSECFLAVORS || 3844 export.export.ex_numsecflavors < 0) 3845 error = EINVAL; 3846 else { 3847 for (i = 0; i < export.export.ex_numsecflavors; 3848 i++) 3849 export.export.ex_secflavors[i] = 3850 oexp.export.ex_secflavors[i]; 3851 } 3852 export.fspec = oexp.fspec; 3853 if (error == 0) 3854 error = nfsrv_v4rootexport(&export, cred, p); 3855 free(export.export.ex_groups, M_TEMP); 3856 } 3857 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3858 nfs_pubfhset = 0; 3859 error = 0; 3860 } else if (uap->flag & NFSSVC_STABLERESTART) { 3861 error = copyin(uap->argp, (caddr_t)&stablefd, 3862 sizeof (int)); 3863 if (!error) 3864 error = fp_getfvp(p, stablefd, &fp, &vp); 3865 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3866 error = EBADF; 3867 if (!error && newnfs_numnfsd != 0) 3868 error = EPERM; 3869 if (!error) { 3870 nfsrv_stablefirst.nsf_fp = fp; 3871 nfsrv_setupstable(p); 3872 } 3873 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3874 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3875 sizeof (struct nfsd_clid)); 3876 if (!error) 3877 error = nfsrv_adminrevoke(&adminrevoke, p); 3878 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3879 error = copyin(uap->argp, (caddr_t)&dumplist, 3880 sizeof (struct nfsd_dumplist)); 3881 if (!error && (dumplist.ndl_size < 1 || 3882 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3883 error = EPERM; 3884 if (!error) { 3885 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3886 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 3887 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3888 error = copyout(dumpclients, dumplist.ndl_list, len); 3889 free(dumpclients, M_TEMP); 3890 } 3891 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3892 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3893 sizeof (struct nfsd_dumplocklist)); 3894 if (!error && (dumplocklist.ndllck_size < 1 || 3895 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3896 error = EPERM; 3897 if (!error) 3898 error = nfsrv_lookupfilename(&nd, 3899 dumplocklist.ndllck_fname, p); 3900 if (!error) { 3901 len = sizeof (struct nfsd_dumplocks) * 3902 dumplocklist.ndllck_size; 3903 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 3904 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3905 dumplocklist.ndllck_size, p); 3906 vput(nd.ni_vp); 3907 error = copyout(dumplocks, dumplocklist.ndllck_list, 3908 len); 3909 free(dumplocks, M_TEMP); 3910 } 3911 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 3912 procp = p->td_proc; 3913 PROC_LOCK(procp); 3914 nfsd_master_pid = procp->p_pid; 3915 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 3916 nfsd_master_start = procp->p_stats->p_start; 3917 nfsd_master_proc = procp; 3918 PROC_UNLOCK(procp); 3919 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 3920 NFSLOCKV4ROOTMUTEX(); 3921 if (suspend_nfsd == 0) { 3922 /* Lock out all nfsd threads */ 3923 do { 3924 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 3925 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 3926 } while (igotlock == 0 && suspend_nfsd == 0); 3927 suspend_nfsd = 1; 3928 } 3929 NFSUNLOCKV4ROOTMUTEX(); 3930 error = 0; 3931 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 3932 NFSLOCKV4ROOTMUTEX(); 3933 if (suspend_nfsd != 0) { 3934 nfsv4_unlock(&nfsd_suspend_lock, 0); 3935 suspend_nfsd = 0; 3936 } 3937 NFSUNLOCKV4ROOTMUTEX(); 3938 error = 0; 3939 } 3940 3941 NFSEXITCODE(error); 3942 return (error); 3943 } 3944 3945 /* 3946 * Check exports. 3947 * Returns 0 if ok, 1 otherwise. 3948 */ 3949 int 3950 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 3951 { 3952 int i; 3953 3954 /* 3955 * This seems odd, but allow the case where the security flavor 3956 * list is empty. This happens when NFSv4 is traversing non-exported 3957 * file systems. Exported file systems should always have a non-empty 3958 * security flavor list. 3959 */ 3960 if (exp->nes_numsecflavor == 0) 3961 return (0); 3962 3963 for (i = 0; i < exp->nes_numsecflavor; i++) { 3964 /* 3965 * The tests for privacy and integrity must be first, 3966 * since ND_GSS is set for everything but AUTH_SYS. 3967 */ 3968 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 3969 (nd->nd_flag & ND_GSSPRIVACY)) 3970 return (0); 3971 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 3972 (nd->nd_flag & ND_GSSINTEGRITY)) 3973 return (0); 3974 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 3975 (nd->nd_flag & ND_GSS)) 3976 return (0); 3977 if (exp->nes_secflavors[i] == AUTH_SYS && 3978 (nd->nd_flag & ND_GSS) == 0) 3979 return (0); 3980 } 3981 return (1); 3982 } 3983 3984 /* 3985 * Calculate a hash value for the fid in a file handle. 3986 */ 3987 uint32_t 3988 nfsrv_hashfh(fhandle_t *fhp) 3989 { 3990 uint32_t hashval; 3991 3992 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 3993 return (hashval); 3994 } 3995 3996 /* 3997 * Calculate a hash value for the sessionid. 3998 */ 3999 uint32_t 4000 nfsrv_hashsessionid(uint8_t *sessionid) 4001 { 4002 uint32_t hashval; 4003 4004 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 4005 return (hashval); 4006 } 4007 4008 /* 4009 * Signal the userland master nfsd to backup the stable restart file. 4010 */ 4011 void 4012 nfsrv_backupstable(void) 4013 { 4014 struct proc *procp; 4015 4016 if (nfsd_master_proc != NULL) { 4017 procp = pfind(nfsd_master_pid); 4018 /* Try to make sure it is the correct process. */ 4019 if (procp == nfsd_master_proc && 4020 procp->p_stats->p_start.tv_sec == 4021 nfsd_master_start.tv_sec && 4022 procp->p_stats->p_start.tv_usec == 4023 nfsd_master_start.tv_usec && 4024 strcmp(procp->p_comm, nfsd_master_comm) == 0) 4025 kern_psignal(procp, SIGUSR2); 4026 else 4027 nfsd_master_proc = NULL; 4028 4029 if (procp != NULL) 4030 PROC_UNLOCK(procp); 4031 } 4032 } 4033 4034 /* 4035 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 4036 * The arguments are in a structure, so that they can be passed through 4037 * taskqueue for a kernel process to execute this function. 4038 */ 4039 struct nfsrvdscreate { 4040 int done; 4041 int inprog; 4042 struct task tsk; 4043 struct ucred *tcred; 4044 struct vnode *dvp; 4045 NFSPROC_T *p; 4046 struct pnfsdsfile *pf; 4047 int err; 4048 fhandle_t fh; 4049 struct vattr va; 4050 struct vattr createva; 4051 }; 4052 4053 int 4054 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 4055 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 4056 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 4057 { 4058 struct vnode *nvp; 4059 struct nameidata named; 4060 struct vattr va; 4061 char *bufp; 4062 u_long *hashp; 4063 struct nfsnode *np; 4064 struct nfsmount *nmp; 4065 int error; 4066 4067 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 4068 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); 4069 nfsvno_setpathbuf(&named, &bufp, &hashp); 4070 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 4071 named.ni_cnd.cn_thread = p; 4072 named.ni_cnd.cn_nameptr = bufp; 4073 if (fnamep != NULL) { 4074 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 4075 named.ni_cnd.cn_namelen = strlen(bufp); 4076 } else 4077 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 4078 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 4079 4080 /* Create the date file in the DS mount. */ 4081 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4082 if (error == 0) { 4083 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 4084 NFSVOPUNLOCK(dvp); 4085 if (error == 0) { 4086 /* Set the ownership of the file. */ 4087 error = VOP_SETATTR(nvp, nvap, tcred); 4088 NFSD_DEBUG(4, "nfsrv_dscreate:" 4089 " setattr-uid=%d\n", error); 4090 if (error != 0) 4091 vput(nvp); 4092 } 4093 if (error != 0) 4094 printf("pNFS: pnfscreate failed=%d\n", error); 4095 } else 4096 printf("pNFS: pnfscreate vnlock=%d\n", error); 4097 if (error == 0) { 4098 np = VTONFS(nvp); 4099 nmp = VFSTONFS(nvp->v_mount); 4100 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 4101 != 0 || nmp->nm_nam->sa_len > sizeof( 4102 struct sockaddr_in6) || 4103 np->n_fhp->nfh_len != NFSX_MYFH) { 4104 printf("Bad DS file: fstype=%s salen=%d" 4105 " fhlen=%d\n", 4106 nvp->v_mount->mnt_vfc->vfc_name, 4107 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 4108 error = ENOENT; 4109 } 4110 4111 /* Set extattrs for the DS on the MDS file. */ 4112 if (error == 0) { 4113 if (dsa != NULL) { 4114 error = VOP_GETATTR(nvp, &va, tcred); 4115 if (error == 0) { 4116 dsa->dsa_filerev = va.va_filerev; 4117 dsa->dsa_size = va.va_size; 4118 dsa->dsa_atime = va.va_atime; 4119 dsa->dsa_mtime = va.va_mtime; 4120 dsa->dsa_bytes = va.va_bytes; 4121 } 4122 } 4123 if (error == 0) { 4124 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 4125 NFSX_MYFH); 4126 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 4127 nmp->nm_nam->sa_len); 4128 NFSBCOPY(named.ni_cnd.cn_nameptr, 4129 pf->dsf_filename, 4130 sizeof(pf->dsf_filename)); 4131 } 4132 } else 4133 printf("pNFS: pnfscreate can't get DS" 4134 " attr=%d\n", error); 4135 if (nvpp != NULL && error == 0) 4136 *nvpp = nvp; 4137 else 4138 vput(nvp); 4139 } 4140 nfsvno_relpathbuf(&named); 4141 return (error); 4142 } 4143 4144 /* 4145 * Start up the thread that will execute nfsrv_dscreate(). 4146 */ 4147 static void 4148 start_dscreate(void *arg, int pending) 4149 { 4150 struct nfsrvdscreate *dsc; 4151 4152 dsc = (struct nfsrvdscreate *)arg; 4153 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 4154 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 4155 dsc->done = 1; 4156 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 4157 } 4158 4159 /* 4160 * Create a pNFS data file on the Data Server(s). 4161 */ 4162 static void 4163 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 4164 NFSPROC_T *p) 4165 { 4166 struct nfsrvdscreate *dsc, *tdsc = NULL; 4167 struct nfsdevice *ds, *tds, *fds; 4168 struct mount *mp; 4169 struct pnfsdsfile *pf, *tpf; 4170 struct pnfsdsattr dsattr; 4171 struct vattr va; 4172 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4173 struct nfsmount *nmp; 4174 fhandle_t fh; 4175 uid_t vauid; 4176 gid_t vagid; 4177 u_short vamode; 4178 struct ucred *tcred; 4179 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 4180 int failpos, timo; 4181 4182 /* Get a DS server directory in a round-robin order. */ 4183 mirrorcnt = 1; 4184 mp = vp->v_mount; 4185 ds = fds = NULL; 4186 NFSDDSLOCK(); 4187 /* 4188 * Search for the first entry that handles this MDS fs, but use the 4189 * first entry for all MDS fs's otherwise. 4190 */ 4191 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 4192 if (tds->nfsdev_nmp != NULL) { 4193 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 4194 ds = tds; 4195 else if (tds->nfsdev_mdsisset != 0 && fsidcmp( 4196 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { 4197 ds = fds = tds; 4198 break; 4199 } 4200 } 4201 } 4202 if (ds == NULL) { 4203 NFSDDSUNLOCK(); 4204 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 4205 return; 4206 } 4207 i = dsdir[0] = ds->nfsdev_nextdir; 4208 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 4209 dvp[0] = ds->nfsdev_dsdir[i]; 4210 tds = TAILQ_NEXT(ds, nfsdev_list); 4211 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { 4212 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 4213 if (tds->nfsdev_nmp != NULL && 4214 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 4215 (tds->nfsdev_mdsisset != 0 && fds != NULL && 4216 fsidcmp(&mp->mnt_stat.f_fsid, 4217 &tds->nfsdev_mdsfsid) == 0))) { 4218 dsdir[mirrorcnt] = i; 4219 dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; 4220 mirrorcnt++; 4221 if (mirrorcnt >= nfsrv_maxpnfsmirror) 4222 break; 4223 } 4224 } 4225 } 4226 /* Put at end of list to implement round-robin usage. */ 4227 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 4228 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 4229 NFSDDSUNLOCK(); 4230 dsc = NULL; 4231 if (mirrorcnt > 1) 4232 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 4233 M_WAITOK | M_ZERO); 4234 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 4235 M_ZERO); 4236 4237 error = nfsvno_getfh(vp, &fh, p); 4238 if (error == 0) 4239 error = VOP_GETATTR(vp, &va, cred); 4240 if (error == 0) { 4241 /* Set the attributes for "vp" to Setattr the DS vp. */ 4242 vauid = va.va_uid; 4243 vagid = va.va_gid; 4244 vamode = va.va_mode; 4245 VATTR_NULL(&va); 4246 va.va_uid = vauid; 4247 va.va_gid = vagid; 4248 va.va_mode = vamode; 4249 va.va_size = 0; 4250 } else 4251 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 4252 4253 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 4254 cred->cr_gid); 4255 /* Make data file name based on FH. */ 4256 tcred = newnfs_getcred(); 4257 4258 /* 4259 * Create the file on each DS mirror, using kernel process(es) for the 4260 * additional mirrors. 4261 */ 4262 failpos = -1; 4263 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 4264 tpf->dsf_dir = dsdir[i]; 4265 tdsc->tcred = tcred; 4266 tdsc->p = p; 4267 tdsc->pf = tpf; 4268 tdsc->createva = *vap; 4269 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 4270 tdsc->va = va; 4271 tdsc->dvp = dvp[i]; 4272 tdsc->done = 0; 4273 tdsc->inprog = 0; 4274 tdsc->err = 0; 4275 ret = EIO; 4276 if (nfs_pnfsiothreads != 0) { 4277 ret = nfs_pnfsio(start_dscreate, tdsc); 4278 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 4279 } 4280 if (ret != 0) { 4281 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 4282 NULL, tcred, p, NULL); 4283 if (ret != 0) { 4284 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 4285 error)); 4286 if (failpos == -1 && nfsds_failerr(ret)) 4287 failpos = i; 4288 else 4289 error = ret; 4290 } 4291 } 4292 } 4293 if (error == 0) { 4294 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 4295 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 4296 &dsattr, NULL, tcred, p, NULL); 4297 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 4298 failpos = mirrorcnt - 1; 4299 error = 0; 4300 } 4301 } 4302 timo = hz / 50; /* Wait for 20msec. */ 4303 if (timo < 1) 4304 timo = 1; 4305 /* Wait for kernel task(s) to complete. */ 4306 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 4307 while (tdsc->inprog != 0 && tdsc->done == 0) 4308 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 4309 if (tdsc->err != 0) { 4310 if (failpos == -1 && nfsds_failerr(tdsc->err)) 4311 failpos = i; 4312 else if (error == 0) 4313 error = tdsc->err; 4314 } 4315 } 4316 4317 /* 4318 * If failpos has been set, that mirror has failed, so it needs 4319 * to be disabled. 4320 */ 4321 if (failpos >= 0) { 4322 nmp = VFSTONFS(dvp[failpos]->v_mount); 4323 NFSLOCKMNT(nmp); 4324 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4325 NFSMNTP_CANCELRPCS)) == 0) { 4326 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4327 NFSUNLOCKMNT(nmp); 4328 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4329 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 4330 ds); 4331 if (ds != NULL) 4332 nfsrv_killrpcs(nmp); 4333 NFSLOCKMNT(nmp); 4334 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4335 wakeup(nmp); 4336 } 4337 NFSUNLOCKMNT(nmp); 4338 } 4339 4340 NFSFREECRED(tcred); 4341 if (error == 0) { 4342 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 4343 4344 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 4345 mirrorcnt, nfsrv_maxpnfsmirror); 4346 /* 4347 * For all mirrors that couldn't be created, fill in the 4348 * *pf structure, but with an IP address == 0.0.0.0. 4349 */ 4350 tpf = pf + mirrorcnt; 4351 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4352 *tpf = *pf; 4353 tpf->dsf_sin.sin_family = AF_INET; 4354 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4355 tpf->dsf_sin.sin_addr.s_addr = 0; 4356 tpf->dsf_sin.sin_port = 0; 4357 } 4358 4359 error = vn_extattr_set(vp, IO_NODELOCKED, 4360 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4361 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4362 if (error == 0) 4363 error = vn_extattr_set(vp, IO_NODELOCKED, 4364 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4365 sizeof(dsattr), (char *)&dsattr, p); 4366 if (error != 0) 4367 printf("pNFS: pnfscreate setextattr=%d\n", 4368 error); 4369 } else 4370 printf("pNFS: pnfscreate=%d\n", error); 4371 free(pf, M_TEMP); 4372 free(dsc, M_TEMP); 4373 } 4374 4375 /* 4376 * Get the information needed to remove the pNFS Data Server file from the 4377 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4378 * DS directory vnode. The caller must unlock *ddvp when done with it. 4379 */ 4380 static void 4381 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4382 int *mirrorcntp, char *fname, fhandle_t *fhp) 4383 { 4384 struct vattr va; 4385 struct ucred *tcred; 4386 char *buf; 4387 int buflen, error; 4388 4389 dvpp[0] = NULL; 4390 /* If not an exported regular file or not a pNFS server, just return. */ 4391 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4392 nfsrv_devidcnt == 0) 4393 return; 4394 4395 /* Check to see if this is the last hard link. */ 4396 tcred = newnfs_getcred(); 4397 error = VOP_GETATTR(vp, &va, tcred); 4398 NFSFREECRED(tcred); 4399 if (error != 0) { 4400 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4401 return; 4402 } 4403 if (va.va_nlink > 1) 4404 return; 4405 4406 error = nfsvno_getfh(vp, fhp, p); 4407 if (error != 0) { 4408 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4409 return; 4410 } 4411 4412 buflen = 1024; 4413 buf = malloc(buflen, M_TEMP, M_WAITOK); 4414 /* Get the directory vnode for the DS mount and the file handle. */ 4415 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4416 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4417 free(buf, M_TEMP); 4418 if (error != 0) 4419 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4420 } 4421 4422 /* 4423 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4424 * The arguments are in a structure, so that they can be passed through 4425 * taskqueue for a kernel process to execute this function. 4426 */ 4427 struct nfsrvdsremove { 4428 int done; 4429 int inprog; 4430 struct task tsk; 4431 struct ucred *tcred; 4432 struct vnode *dvp; 4433 NFSPROC_T *p; 4434 int err; 4435 char fname[PNFS_FILENAME_LEN + 1]; 4436 }; 4437 4438 static int 4439 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4440 NFSPROC_T *p) 4441 { 4442 struct nameidata named; 4443 struct vnode *nvp; 4444 char *bufp; 4445 u_long *hashp; 4446 int error; 4447 4448 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4449 if (error != 0) 4450 return (error); 4451 named.ni_cnd.cn_nameiop = DELETE; 4452 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4453 named.ni_cnd.cn_cred = tcred; 4454 named.ni_cnd.cn_thread = p; 4455 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 4456 nfsvno_setpathbuf(&named, &bufp, &hashp); 4457 named.ni_cnd.cn_nameptr = bufp; 4458 named.ni_cnd.cn_namelen = strlen(fname); 4459 strlcpy(bufp, fname, NAME_MAX); 4460 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4461 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4462 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4463 if (error == 0) { 4464 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4465 vput(nvp); 4466 } 4467 NFSVOPUNLOCK(dvp); 4468 nfsvno_relpathbuf(&named); 4469 if (error != 0) 4470 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4471 return (error); 4472 } 4473 4474 /* 4475 * Start up the thread that will execute nfsrv_dsremove(). 4476 */ 4477 static void 4478 start_dsremove(void *arg, int pending) 4479 { 4480 struct nfsrvdsremove *dsrm; 4481 4482 dsrm = (struct nfsrvdsremove *)arg; 4483 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4484 dsrm->p); 4485 dsrm->done = 1; 4486 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4487 } 4488 4489 /* 4490 * Remove a pNFS data file from a Data Server. 4491 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4492 * removed to set up the dvp and fill in the FH. 4493 */ 4494 static void 4495 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4496 NFSPROC_T *p) 4497 { 4498 struct ucred *tcred; 4499 struct nfsrvdsremove *dsrm, *tdsrm; 4500 struct nfsdevice *ds; 4501 struct nfsmount *nmp; 4502 int failpos, i, ret, timo; 4503 4504 tcred = newnfs_getcred(); 4505 dsrm = NULL; 4506 if (mirrorcnt > 1) 4507 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4508 /* 4509 * Remove the file on each DS mirror, using kernel process(es) for the 4510 * additional mirrors. 4511 */ 4512 failpos = -1; 4513 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4514 tdsrm->tcred = tcred; 4515 tdsrm->p = p; 4516 tdsrm->dvp = dvp[i]; 4517 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4518 tdsrm->inprog = 0; 4519 tdsrm->done = 0; 4520 tdsrm->err = 0; 4521 ret = EIO; 4522 if (nfs_pnfsiothreads != 0) { 4523 ret = nfs_pnfsio(start_dsremove, tdsrm); 4524 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4525 } 4526 if (ret != 0) { 4527 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4528 if (failpos == -1 && nfsds_failerr(ret)) 4529 failpos = i; 4530 } 4531 } 4532 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4533 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4534 failpos = mirrorcnt - 1; 4535 timo = hz / 50; /* Wait for 20msec. */ 4536 if (timo < 1) 4537 timo = 1; 4538 /* Wait for kernel task(s) to complete. */ 4539 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4540 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4541 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4542 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4543 failpos = i; 4544 } 4545 4546 /* 4547 * If failpos has been set, that mirror has failed, so it needs 4548 * to be disabled. 4549 */ 4550 if (failpos >= 0) { 4551 nmp = VFSTONFS(dvp[failpos]->v_mount); 4552 NFSLOCKMNT(nmp); 4553 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4554 NFSMNTP_CANCELRPCS)) == 0) { 4555 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4556 NFSUNLOCKMNT(nmp); 4557 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4558 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4559 ds); 4560 if (ds != NULL) 4561 nfsrv_killrpcs(nmp); 4562 NFSLOCKMNT(nmp); 4563 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4564 wakeup(nmp); 4565 } 4566 NFSUNLOCKMNT(nmp); 4567 } 4568 4569 /* Get rid all layouts for the file. */ 4570 nfsrv_freefilelayouts(fhp); 4571 4572 NFSFREECRED(tcred); 4573 free(dsrm, M_TEMP); 4574 } 4575 4576 /* 4577 * Generate a file name based on the file handle and put it in *bufp. 4578 * Return the number of bytes generated. 4579 */ 4580 static int 4581 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4582 { 4583 int i; 4584 uint8_t *cp; 4585 const uint8_t *hexdigits = "0123456789abcdef"; 4586 4587 cp = (uint8_t *)fhp; 4588 for (i = 0; i < sizeof(*fhp); i++) { 4589 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4590 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4591 } 4592 bufp[2 * i] = '\0'; 4593 return (2 * i); 4594 } 4595 4596 /* 4597 * Update the Metadata file's attributes from the DS file when a Read/Write 4598 * layout is returned. 4599 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4600 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4601 */ 4602 int 4603 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4604 { 4605 struct ucred *tcred; 4606 int error; 4607 4608 /* Do this as root so that it won't fail with EACCES. */ 4609 tcred = newnfs_getcred(); 4610 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4611 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); 4612 NFSFREECRED(tcred); 4613 return (error); 4614 } 4615 4616 /* 4617 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4618 */ 4619 static int 4620 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4621 NFSPROC_T *p) 4622 { 4623 int error; 4624 4625 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, 4626 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); 4627 return (error); 4628 } 4629 4630 static int 4631 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 4632 struct thread *p, int ioproc, struct mbuf **mpp, char *cp, 4633 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, 4634 off_t *offp, int content, bool *eofp) 4635 { 4636 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4637 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4638 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4639 struct nfsdevice *ds; 4640 struct pnfsdsattr dsattr; 4641 struct opnfsdsattr odsattr; 4642 char *buf; 4643 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4644 4645 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4646 /* 4647 * If not a regular file, not exported or not a pNFS server, 4648 * just return ENOENT. 4649 */ 4650 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4651 nfsrv_devidcnt == 0) 4652 return (ENOENT); 4653 4654 buflen = 1024; 4655 buf = malloc(buflen, M_TEMP, M_WAITOK); 4656 error = 0; 4657 4658 /* 4659 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4660 * from the MetaData file's extended attribute. 4661 */ 4662 if (ioproc == NFSPROC_GETATTR) { 4663 error = vn_extattr_get(vp, IO_NODELOCKED, 4664 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4665 p); 4666 if (error == 0) { 4667 if (buflen == sizeof(odsattr)) { 4668 NFSBCOPY(buf, &odsattr, buflen); 4669 nap->na_filerev = odsattr.dsa_filerev; 4670 nap->na_size = odsattr.dsa_size; 4671 nap->na_atime = odsattr.dsa_atime; 4672 nap->na_mtime = odsattr.dsa_mtime; 4673 /* 4674 * Fake na_bytes by rounding up na_size. 4675 * Since we don't know the block size, just 4676 * use BLKDEV_IOSIZE. 4677 */ 4678 nap->na_bytes = (odsattr.dsa_size + 4679 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); 4680 } else if (buflen == sizeof(dsattr)) { 4681 NFSBCOPY(buf, &dsattr, buflen); 4682 nap->na_filerev = dsattr.dsa_filerev; 4683 nap->na_size = dsattr.dsa_size; 4684 nap->na_atime = dsattr.dsa_atime; 4685 nap->na_mtime = dsattr.dsa_mtime; 4686 nap->na_bytes = dsattr.dsa_bytes; 4687 } else 4688 error = ENXIO; 4689 } 4690 if (error == 0) { 4691 /* 4692 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4693 * returns 0, just return now. nfsrv_checkdsattr() 4694 * returns 0 if there is no Read/Write layout 4695 * plus either an Open/Write_access or Write 4696 * delegation issued to a client for the file. 4697 */ 4698 if (nfsrv_pnfsgetdsattr == 0 || 4699 nfsrv_checkdsattr(vp, p) == 0) { 4700 free(buf, M_TEMP); 4701 return (error); 4702 } 4703 } 4704 4705 /* 4706 * Clear ENOATTR so the code below will attempt to do a 4707 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4708 * the extended attribute. 4709 */ 4710 if (error == ENOATTR) 4711 error = 0; 4712 } 4713 4714 origmircnt = -1; 4715 trycnt = 0; 4716 tryagain: 4717 if (error == 0) { 4718 buflen = 1024; 4719 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == 4720 LK_EXCLUSIVE) 4721 printf("nfsrv_proxyds: Readds vp exclusively locked\n"); 4722 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4723 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4724 NULL, NULL); 4725 if (error == 0) { 4726 for (i = 0; i < mirrorcnt; i++) 4727 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4728 } else 4729 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4730 } else 4731 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4732 if (error == 0) { 4733 failpos = -1; 4734 if (origmircnt == -1) 4735 origmircnt = mirrorcnt; 4736 /* 4737 * If failpos is set to a mirror#, then that mirror has 4738 * failed and will be disabled. For Read, Getattr and Seek, the 4739 * function only tries one mirror, so if that mirror has 4740 * failed, it will need to be retried. As such, increment 4741 * tryitagain for these cases. 4742 * For Write, Setattr and Setacl, the function tries all 4743 * mirrors and will not return an error for the case where 4744 * one mirror has failed. For these cases, the functioning 4745 * mirror(s) will have been modified, so a retry isn't 4746 * necessary. These functions will set failpos for the 4747 * failed mirror#. 4748 */ 4749 if (ioproc == NFSPROC_READDS) { 4750 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4751 mpp, mpp2); 4752 if (nfsds_failerr(error) && mirrorcnt > 1) { 4753 /* 4754 * Setting failpos will cause the mirror 4755 * to be disabled and then a retry of this 4756 * read is required. 4757 */ 4758 failpos = 0; 4759 error = 0; 4760 trycnt++; 4761 } 4762 } else if (ioproc == NFSPROC_WRITEDS) 4763 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4764 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4765 else if (ioproc == NFSPROC_SETATTR) 4766 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4767 mirrorcnt, nap, &failpos); 4768 else if (ioproc == NFSPROC_SETACL) 4769 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4770 mirrorcnt, aclp, &failpos); 4771 else if (ioproc == NFSPROC_SEEKDS) { 4772 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, 4773 p, nmp[0]); 4774 if (nfsds_failerr(error) && mirrorcnt > 1) { 4775 /* 4776 * Setting failpos will cause the mirror 4777 * to be disabled and then a retry of this 4778 * read is required. 4779 */ 4780 failpos = 0; 4781 error = 0; 4782 trycnt++; 4783 } 4784 } else if (ioproc == NFSPROC_ALLOCATE) 4785 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, 4786 &nmp[0], mirrorcnt, &failpos); 4787 else { 4788 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4789 vp, nmp[mirrorcnt - 1], nap); 4790 if (nfsds_failerr(error) && mirrorcnt > 1) { 4791 /* 4792 * Setting failpos will cause the mirror 4793 * to be disabled and then a retry of this 4794 * getattr is required. 4795 */ 4796 failpos = mirrorcnt - 1; 4797 error = 0; 4798 trycnt++; 4799 } 4800 } 4801 ds = NULL; 4802 if (failpos >= 0) { 4803 failnmp = nmp[failpos]; 4804 NFSLOCKMNT(failnmp); 4805 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4806 NFSMNTP_CANCELRPCS)) == 0) { 4807 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4808 NFSUNLOCKMNT(failnmp); 4809 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 4810 failnmp, p); 4811 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 4812 failpos, ds); 4813 if (ds != NULL) 4814 nfsrv_killrpcs(failnmp); 4815 NFSLOCKMNT(failnmp); 4816 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4817 wakeup(failnmp); 4818 } 4819 NFSUNLOCKMNT(failnmp); 4820 } 4821 for (i = 0; i < mirrorcnt; i++) 4822 NFSVOPUNLOCK(dvp[i]); 4823 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 4824 trycnt); 4825 /* Try the Read/Getattr again if a mirror was deleted. */ 4826 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 4827 goto tryagain; 4828 } else { 4829 /* Return ENOENT for any Extended Attribute error. */ 4830 error = ENOENT; 4831 } 4832 free(buf, M_TEMP); 4833 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 4834 return (error); 4835 } 4836 4837 /* 4838 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 4839 * attribute. 4840 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 4841 * to be checked. If it points to a NULL nmp, then it returns 4842 * a suitable destination. 4843 * curnmp - If non-NULL, it is the source mount for the copy. 4844 */ 4845 int 4846 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 4847 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 4848 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 4849 struct nfsmount *curnmp, int *ippos, int *dsdirp) 4850 { 4851 struct vnode *dvp, *nvp = NULL, **tdvpp; 4852 struct mount *mp; 4853 struct nfsmount *nmp, *newnmp; 4854 struct sockaddr *sad; 4855 struct sockaddr_in *sin; 4856 struct nfsdevice *ds, *tds, *fndds; 4857 struct pnfsdsfile *pf; 4858 uint32_t dsdir; 4859 int error, fhiszero, fnd, gotone, i, mirrorcnt; 4860 4861 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 4862 *mirrorcntp = 1; 4863 tdvpp = dvpp; 4864 if (nvpp != NULL) 4865 *nvpp = NULL; 4866 if (dvpp != NULL) 4867 *dvpp = NULL; 4868 if (ippos != NULL) 4869 *ippos = -1; 4870 if (newnmpp != NULL) 4871 newnmp = *newnmpp; 4872 else 4873 newnmp = NULL; 4874 mp = vp->v_mount; 4875 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 4876 "pnfsd.dsfile", buflenp, buf, p); 4877 mirrorcnt = *buflenp / sizeof(*pf); 4878 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 4879 *buflenp != sizeof(*pf) * mirrorcnt)) 4880 error = ENOATTR; 4881 4882 pf = (struct pnfsdsfile *)buf; 4883 /* If curnmp != NULL, check for a match in the mirror list. */ 4884 if (curnmp != NULL && error == 0) { 4885 fnd = 0; 4886 for (i = 0; i < mirrorcnt; i++, pf++) { 4887 sad = (struct sockaddr *)&pf->dsf_sin; 4888 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 4889 if (ippos != NULL) 4890 *ippos = i; 4891 fnd = 1; 4892 break; 4893 } 4894 } 4895 if (fnd == 0) 4896 error = ENXIO; 4897 } 4898 4899 gotone = 0; 4900 pf = (struct pnfsdsfile *)buf; 4901 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 4902 error); 4903 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 4904 fhiszero = 0; 4905 sad = (struct sockaddr *)&pf->dsf_sin; 4906 sin = &pf->dsf_sin; 4907 dsdir = pf->dsf_dir; 4908 if (dsdir >= nfsrv_dsdirsize) { 4909 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 4910 error = ENOATTR; 4911 } else if (nvpp != NULL && newnmp != NULL && 4912 nfsaddr2_match(sad, newnmp->nm_nam)) 4913 error = EEXIST; 4914 if (error == 0) { 4915 if (ippos != NULL && curnmp == NULL && 4916 sad->sa_family == AF_INET && 4917 sin->sin_addr.s_addr == 0) 4918 *ippos = i; 4919 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 4920 fhiszero = 1; 4921 /* Use the socket address to find the mount point. */ 4922 fndds = NULL; 4923 NFSDDSLOCK(); 4924 /* Find a match for the IP address. */ 4925 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 4926 if (ds->nfsdev_nmp != NULL) { 4927 dvp = ds->nfsdev_dvp; 4928 nmp = VFSTONFS(dvp->v_mount); 4929 if (nmp != ds->nfsdev_nmp) 4930 printf("different2 nmp %p %p\n", 4931 nmp, ds->nfsdev_nmp); 4932 if (nfsaddr2_match(sad, nmp->nm_nam)) { 4933 fndds = ds; 4934 break; 4935 } 4936 } 4937 } 4938 if (fndds != NULL && newnmpp != NULL && 4939 newnmp == NULL) { 4940 /* Search for a place to make a mirror copy. */ 4941 TAILQ_FOREACH(tds, &nfsrv_devidhead, 4942 nfsdev_list) { 4943 if (tds->nfsdev_nmp != NULL && 4944 fndds != tds && 4945 ((tds->nfsdev_mdsisset == 0 && 4946 fndds->nfsdev_mdsisset == 0) || 4947 (tds->nfsdev_mdsisset != 0 && 4948 fndds->nfsdev_mdsisset != 0 && 4949 fsidcmp(&tds->nfsdev_mdsfsid, 4950 &mp->mnt_stat.f_fsid) == 0))) { 4951 *newnmpp = tds->nfsdev_nmp; 4952 break; 4953 } 4954 } 4955 if (tds != NULL) { 4956 /* 4957 * Move this entry to the end of the 4958 * list, so it won't be selected as 4959 * easily the next time. 4960 */ 4961 TAILQ_REMOVE(&nfsrv_devidhead, tds, 4962 nfsdev_list); 4963 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 4964 nfsdev_list); 4965 } 4966 } 4967 NFSDDSUNLOCK(); 4968 if (fndds != NULL) { 4969 dvp = fndds->nfsdev_dsdir[dsdir]; 4970 if (lktype != 0 || fhiszero != 0 || 4971 (nvpp != NULL && *nvpp == NULL)) { 4972 if (fhiszero != 0) 4973 error = vn_lock(dvp, 4974 LK_EXCLUSIVE); 4975 else if (lktype != 0) 4976 error = vn_lock(dvp, lktype); 4977 else 4978 error = vn_lock(dvp, LK_SHARED); 4979 /* 4980 * If the file handle is all 0's, try to 4981 * do a Lookup against the DS to acquire 4982 * it. 4983 * If dvpp == NULL or the Lookup fails, 4984 * unlock dvp after the call. 4985 */ 4986 if (error == 0 && (fhiszero != 0 || 4987 (nvpp != NULL && *nvpp == NULL))) { 4988 error = nfsrv_pnfslookupds(vp, 4989 dvp, pf, &nvp, p); 4990 if (error == 0) { 4991 if (fhiszero != 0) 4992 nfsrv_pnfssetfh( 4993 vp, pf, 4994 devid, 4995 fnamep, 4996 nvp, p); 4997 if (nvpp != NULL && 4998 *nvpp == NULL) { 4999 *nvpp = nvp; 5000 *dsdirp = dsdir; 5001 } else 5002 vput(nvp); 5003 } 5004 if (error != 0 || lktype == 0) 5005 NFSVOPUNLOCK(dvp); 5006 } 5007 } 5008 if (error == 0) { 5009 gotone++; 5010 NFSD_DEBUG(4, "gotone=%d\n", gotone); 5011 if (devid != NULL) { 5012 NFSBCOPY(fndds->nfsdev_deviceid, 5013 devid, NFSX_V4DEVICEID); 5014 devid += NFSX_V4DEVICEID; 5015 } 5016 if (dvpp != NULL) 5017 *tdvpp++ = dvp; 5018 if (fhp != NULL) 5019 NFSBCOPY(&pf->dsf_fh, fhp++, 5020 NFSX_MYFH); 5021 if (fnamep != NULL && gotone == 1) 5022 strlcpy(fnamep, 5023 pf->dsf_filename, 5024 sizeof(pf->dsf_filename)); 5025 } else 5026 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 5027 "err=%d\n", error); 5028 } 5029 } 5030 } 5031 if (error == 0 && gotone == 0) 5032 error = ENOENT; 5033 5034 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 5035 error); 5036 if (error == 0) 5037 *mirrorcntp = gotone; 5038 else { 5039 if (gotone > 0 && dvpp != NULL) { 5040 /* 5041 * If the error didn't occur on the first one and 5042 * dvpp != NULL, the one(s) prior to the failure will 5043 * have locked dvp's that need to be unlocked. 5044 */ 5045 for (i = 0; i < gotone; i++) { 5046 NFSVOPUNLOCK(*dvpp); 5047 *dvpp++ = NULL; 5048 } 5049 } 5050 /* 5051 * If it found the vnode to be copied from before a failure, 5052 * it needs to be vput()'d. 5053 */ 5054 if (nvpp != NULL && *nvpp != NULL) { 5055 vput(*nvpp); 5056 *nvpp = NULL; 5057 } 5058 } 5059 return (error); 5060 } 5061 5062 /* 5063 * Set the extended attribute for the Change attribute. 5064 */ 5065 static int 5066 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5067 { 5068 struct pnfsdsattr dsattr; 5069 int error; 5070 5071 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 5072 dsattr.dsa_filerev = nap->na_filerev; 5073 dsattr.dsa_size = nap->na_size; 5074 dsattr.dsa_atime = nap->na_atime; 5075 dsattr.dsa_mtime = nap->na_mtime; 5076 dsattr.dsa_bytes = nap->na_bytes; 5077 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5078 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); 5079 if (error != 0) 5080 printf("pNFS: setextattr=%d\n", error); 5081 return (error); 5082 } 5083 5084 static int 5085 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5086 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 5087 { 5088 uint32_t *tl; 5089 struct nfsrv_descript *nd; 5090 nfsv4stateid_t st; 5091 struct mbuf *m, *m2; 5092 int error = 0, retlen, tlen, trimlen; 5093 5094 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 5095 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5096 *mpp = NULL; 5097 /* 5098 * Use a stateid where other is an alternating 01010 pattern and 5099 * seqid is 0xffffffff. This value is not defined as special by 5100 * the RFC and is used by the FreeBSD NFS server to indicate an 5101 * MDS->DS proxy operation. 5102 */ 5103 st.other[0] = 0x55555555; 5104 st.other[1] = 0x55555555; 5105 st.other[2] = 0x55555555; 5106 st.seqid = 0xffffffff; 5107 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5108 NULL, NULL, 0, 0, false); 5109 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5110 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 5111 txdr_hyper(off, tl); 5112 *(tl + 2) = txdr_unsigned(len); 5113 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5114 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5115 if (error != 0) { 5116 free(nd, M_TEMP); 5117 return (error); 5118 } 5119 if (nd->nd_repstat == 0) { 5120 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 5121 NFSM_STRSIZ(retlen, len); 5122 if (retlen > 0) { 5123 /* Trim off the pre-data XDR from the mbuf chain. */ 5124 m = nd->nd_mrep; 5125 while (m != NULL && m != nd->nd_md) { 5126 if (m->m_next == nd->nd_md) { 5127 m->m_next = NULL; 5128 m_freem(nd->nd_mrep); 5129 nd->nd_mrep = m = nd->nd_md; 5130 } else 5131 m = m->m_next; 5132 } 5133 if (m == NULL) { 5134 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5135 error = ENOENT; 5136 goto nfsmout; 5137 } 5138 5139 /* 5140 * Now, adjust first mbuf so that any XDR before the 5141 * read data is skipped over. 5142 */ 5143 trimlen = nd->nd_dpos - mtod(m, char *); 5144 if (trimlen > 0) { 5145 m->m_len -= trimlen; 5146 NFSM_DATAP(m, trimlen); 5147 } 5148 5149 /* 5150 * Truncate the mbuf chain at retlen bytes of data, 5151 * plus XDR padding that brings the length up to a 5152 * multiple of 4. 5153 */ 5154 tlen = NFSM_RNDUP(retlen); 5155 do { 5156 if (m->m_len >= tlen) { 5157 m->m_len = tlen; 5158 tlen = 0; 5159 m2 = m->m_next; 5160 m->m_next = NULL; 5161 m_freem(m2); 5162 break; 5163 } 5164 tlen -= m->m_len; 5165 m = m->m_next; 5166 } while (m != NULL); 5167 if (tlen > 0) { 5168 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5169 error = ENOENT; 5170 goto nfsmout; 5171 } 5172 *mpp = nd->nd_mrep; 5173 *mpendp = m; 5174 nd->nd_mrep = NULL; 5175 } 5176 } else 5177 error = nd->nd_repstat; 5178 nfsmout: 5179 /* If nd->nd_mrep is already NULL, this is a no-op. */ 5180 m_freem(nd->nd_mrep); 5181 free(nd, M_TEMP); 5182 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 5183 return (error); 5184 } 5185 5186 /* 5187 * Do a write RPC on a DS data file, using this structure for the arguments, 5188 * so that this function can be executed by a separate kernel process. 5189 */ 5190 struct nfsrvwritedsdorpc { 5191 int done; 5192 int inprog; 5193 struct task tsk; 5194 fhandle_t fh; 5195 off_t off; 5196 int len; 5197 struct nfsmount *nmp; 5198 struct ucred *cred; 5199 NFSPROC_T *p; 5200 struct mbuf *m; 5201 int err; 5202 }; 5203 5204 static int 5205 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 5206 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 5207 { 5208 uint32_t *tl; 5209 struct nfsrv_descript *nd; 5210 nfsattrbit_t attrbits; 5211 nfsv4stateid_t st; 5212 int commit, error, retlen; 5213 5214 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5215 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 5216 sizeof(fhandle_t), NULL, NULL, 0, 0, false); 5217 5218 /* 5219 * Use a stateid where other is an alternating 01010 pattern and 5220 * seqid is 0xffffffff. This value is not defined as special by 5221 * the RFC and is used by the FreeBSD NFS server to indicate an 5222 * MDS->DS proxy operation. 5223 */ 5224 st.other[0] = 0x55555555; 5225 st.other[1] = 0x55555555; 5226 st.other[2] = 0x55555555; 5227 st.seqid = 0xffffffff; 5228 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5229 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 5230 txdr_hyper(off, tl); 5231 tl += 2; 5232 /* 5233 * Do all writes FileSync, since the server doesn't hold onto dirty 5234 * buffers. Since clients should be accessing the DS servers directly 5235 * using the pNFS layouts, this just needs to work correctly as a 5236 * fallback. 5237 */ 5238 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 5239 *tl = txdr_unsigned(len); 5240 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 5241 5242 /* Put data in mbuf chain. */ 5243 nd->nd_mb->m_next = m; 5244 5245 /* Set nd_mb and nd_bpos to end of data. */ 5246 while (m->m_next != NULL) 5247 m = m->m_next; 5248 nd->nd_mb = m; 5249 nfsm_set(nd, m->m_len); 5250 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 5251 5252 /* Do a Getattr for the attributes that change upon writing. */ 5253 NFSZERO_ATTRBIT(&attrbits); 5254 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5255 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5256 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5257 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5258 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5259 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5260 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5261 (void) nfsrv_putattrbit(nd, &attrbits); 5262 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5263 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5264 if (error != 0) { 5265 free(nd, M_TEMP); 5266 return (error); 5267 } 5268 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 5269 /* Get rid of weak cache consistency data for now. */ 5270 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5271 (ND_NFSV4 | ND_V4WCCATTR)) { 5272 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5273 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5274 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 5275 if (error != 0) 5276 goto nfsmout; 5277 /* 5278 * Get rid of Op# and status for next op. 5279 */ 5280 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5281 if (*++tl != 0) 5282 nd->nd_flag |= ND_NOMOREDATA; 5283 } 5284 if (nd->nd_repstat == 0) { 5285 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 5286 retlen = fxdr_unsigned(int, *tl++); 5287 commit = fxdr_unsigned(int, *tl); 5288 if (commit != NFSWRITE_FILESYNC) 5289 error = NFSERR_IO; 5290 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 5291 retlen, commit, error); 5292 } else 5293 error = nd->nd_repstat; 5294 /* We have no use for the Write Verifier since we use FileSync. */ 5295 5296 /* 5297 * Get the Change, Size, Access Time and Modify Time attributes and set 5298 * on the Metadata file, so its attributes will be what the file's 5299 * would be if it had been written. 5300 */ 5301 if (error == 0) { 5302 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5303 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5304 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5305 } 5306 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 5307 nfsmout: 5308 m_freem(nd->nd_mrep); 5309 free(nd, M_TEMP); 5310 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 5311 return (error); 5312 } 5313 5314 /* 5315 * Start up the thread that will execute nfsrv_writedsdorpc(). 5316 */ 5317 static void 5318 start_writedsdorpc(void *arg, int pending) 5319 { 5320 struct nfsrvwritedsdorpc *drpc; 5321 5322 drpc = (struct nfsrvwritedsdorpc *)arg; 5323 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5324 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 5325 drpc->done = 1; 5326 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 5327 } 5328 5329 static int 5330 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5331 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5332 struct mbuf **mpp, char *cp, int *failposp) 5333 { 5334 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; 5335 struct nfsvattr na; 5336 struct mbuf *m; 5337 int error, i, offs, ret, timo; 5338 5339 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 5340 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 5341 drpc = NULL; 5342 if (mirrorcnt > 1) 5343 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5344 M_WAITOK); 5345 5346 /* Calculate offset in mbuf chain that data starts. */ 5347 offs = cp - mtod(*mpp, char *); 5348 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 5349 5350 /* 5351 * Do the write RPC for every DS, using a separate kernel process 5352 * for every DS except the last one. 5353 */ 5354 error = 0; 5355 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5356 tdrpc->done = 0; 5357 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5358 tdrpc->off = off; 5359 tdrpc->len = len; 5360 tdrpc->nmp = *nmpp; 5361 tdrpc->cred = cred; 5362 tdrpc->p = p; 5363 tdrpc->inprog = 0; 5364 tdrpc->err = 0; 5365 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5366 ret = EIO; 5367 if (nfs_pnfsiothreads != 0) { 5368 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 5369 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 5370 ret); 5371 } 5372 if (ret != 0) { 5373 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 5374 tdrpc->m, cred, p); 5375 if (nfsds_failerr(ret) && *failposp == -1) 5376 *failposp = i; 5377 else if (error == 0 && ret != 0) 5378 error = ret; 5379 } 5380 nmpp++; 5381 fhp++; 5382 } 5383 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5384 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 5385 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5386 *failposp = mirrorcnt - 1; 5387 else if (error == 0 && ret != 0) 5388 error = ret; 5389 if (error == 0) 5390 error = nfsrv_setextattr(vp, &na, p); 5391 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 5392 tdrpc = drpc; 5393 timo = hz / 50; /* Wait for 20msec. */ 5394 if (timo < 1) 5395 timo = 1; 5396 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5397 /* Wait for RPCs on separate threads to complete. */ 5398 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5399 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 5400 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5401 *failposp = i; 5402 else if (error == 0 && tdrpc->err != 0) 5403 error = tdrpc->err; 5404 } 5405 free(drpc, M_TEMP); 5406 return (error); 5407 } 5408 5409 /* 5410 * Do a allocate RPC on a DS data file, using this structure for the arguments, 5411 * so that this function can be executed by a separate kernel process. 5412 */ 5413 struct nfsrvallocatedsdorpc { 5414 int done; 5415 int inprog; 5416 struct task tsk; 5417 fhandle_t fh; 5418 off_t off; 5419 off_t len; 5420 struct nfsmount *nmp; 5421 struct ucred *cred; 5422 NFSPROC_T *p; 5423 int err; 5424 }; 5425 5426 static int 5427 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5428 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5429 { 5430 uint32_t *tl; 5431 struct nfsrv_descript *nd; 5432 nfsattrbit_t attrbits; 5433 nfsv4stateid_t st; 5434 int error; 5435 5436 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5437 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, 5438 sizeof(fhandle_t), NULL, NULL, 0, 0, false); 5439 5440 /* 5441 * Use a stateid where other is an alternating 01010 pattern and 5442 * seqid is 0xffffffff. This value is not defined as special by 5443 * the RFC and is used by the FreeBSD NFS server to indicate an 5444 * MDS->DS proxy operation. 5445 */ 5446 st.other[0] = 0x55555555; 5447 st.other[1] = 0x55555555; 5448 st.other[2] = 0x55555555; 5449 st.seqid = 0xffffffff; 5450 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5451 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5452 txdr_hyper(off, tl); tl += 2; 5453 txdr_hyper(len, tl); tl += 2; 5454 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); 5455 5456 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5457 NFSGETATTR_ATTRBIT(&attrbits); 5458 nfsrv_putattrbit(nd, &attrbits); 5459 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5460 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5461 if (error != 0) { 5462 free(nd, M_TEMP); 5463 return (error); 5464 } 5465 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", 5466 nd->nd_repstat); 5467 if (nd->nd_repstat == 0) { 5468 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5469 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5470 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5471 } else 5472 error = nd->nd_repstat; 5473 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); 5474 nfsmout: 5475 m_freem(nd->nd_mrep); 5476 free(nd, M_TEMP); 5477 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); 5478 return (error); 5479 } 5480 5481 /* 5482 * Start up the thread that will execute nfsrv_allocatedsdorpc(). 5483 */ 5484 static void 5485 start_allocatedsdorpc(void *arg, int pending) 5486 { 5487 struct nfsrvallocatedsdorpc *drpc; 5488 5489 drpc = (struct nfsrvallocatedsdorpc *)arg; 5490 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5491 drpc->len, NULL, drpc->cred, drpc->p); 5492 drpc->done = 1; 5493 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); 5494 } 5495 5496 static int 5497 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5498 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5499 int *failposp) 5500 { 5501 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; 5502 struct nfsvattr na; 5503 int error, i, ret, timo; 5504 5505 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); 5506 drpc = NULL; 5507 if (mirrorcnt > 1) 5508 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5509 M_WAITOK); 5510 5511 /* 5512 * Do the allocate RPC for every DS, using a separate kernel process 5513 * for every DS except the last one. 5514 */ 5515 error = 0; 5516 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5517 tdrpc->done = 0; 5518 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5519 tdrpc->off = off; 5520 tdrpc->len = len; 5521 tdrpc->nmp = *nmpp; 5522 tdrpc->cred = cred; 5523 tdrpc->p = p; 5524 tdrpc->inprog = 0; 5525 tdrpc->err = 0; 5526 ret = EIO; 5527 if (nfs_pnfsiothreads != 0) { 5528 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); 5529 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", 5530 ret); 5531 } 5532 if (ret != 0) { 5533 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, 5534 cred, p); 5535 if (nfsds_failerr(ret) && *failposp == -1) 5536 *failposp = i; 5537 else if (error == 0 && ret != 0) 5538 error = ret; 5539 } 5540 nmpp++; 5541 fhp++; 5542 } 5543 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5544 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5545 *failposp = mirrorcnt - 1; 5546 else if (error == 0 && ret != 0) 5547 error = ret; 5548 if (error == 0) 5549 error = nfsrv_setextattr(vp, &na, p); 5550 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); 5551 tdrpc = drpc; 5552 timo = hz / 50; /* Wait for 20msec. */ 5553 if (timo < 1) 5554 timo = 1; 5555 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5556 /* Wait for RPCs on separate threads to complete. */ 5557 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5558 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5559 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5560 *failposp = i; 5561 else if (error == 0 && tdrpc->err != 0) 5562 error = tdrpc->err; 5563 } 5564 free(drpc, M_TEMP); 5565 return (error); 5566 } 5567 5568 static int 5569 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5570 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 5571 struct nfsvattr *dsnap) 5572 { 5573 uint32_t *tl; 5574 struct nfsrv_descript *nd; 5575 nfsv4stateid_t st; 5576 nfsattrbit_t attrbits; 5577 int error; 5578 5579 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 5580 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5581 /* 5582 * Use a stateid where other is an alternating 01010 pattern and 5583 * seqid is 0xffffffff. This value is not defined as special by 5584 * the RFC and is used by the FreeBSD NFS server to indicate an 5585 * MDS->DS proxy operation. 5586 */ 5587 st.other[0] = 0x55555555; 5588 st.other[1] = 0x55555555; 5589 st.other[2] = 0x55555555; 5590 st.seqid = 0xffffffff; 5591 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5592 NULL, NULL, 0, 0, false); 5593 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5594 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5595 5596 /* Do a Getattr for the attributes that change due to writing. */ 5597 NFSZERO_ATTRBIT(&attrbits); 5598 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5599 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5600 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5601 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5602 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5603 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5604 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5605 (void) nfsrv_putattrbit(nd, &attrbits); 5606 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5607 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5608 if (error != 0) { 5609 free(nd, M_TEMP); 5610 return (error); 5611 } 5612 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5613 nd->nd_repstat); 5614 /* Get rid of weak cache consistency data for now. */ 5615 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5616 (ND_NFSV4 | ND_V4WCCATTR)) { 5617 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5618 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5619 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5620 if (error != 0) 5621 goto nfsmout; 5622 /* 5623 * Get rid of Op# and status for next op. 5624 */ 5625 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5626 if (*++tl != 0) 5627 nd->nd_flag |= ND_NOMOREDATA; 5628 } 5629 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 5630 if (error != 0) 5631 goto nfsmout; 5632 if (nd->nd_repstat != 0) 5633 error = nd->nd_repstat; 5634 /* 5635 * Get the Change, Size, Access Time and Modify Time attributes and set 5636 * on the Metadata file, so its attributes will be what the file's 5637 * would be if it had been written. 5638 */ 5639 if (error == 0) { 5640 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5641 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5642 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5643 } 5644 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 5645 nfsmout: 5646 m_freem(nd->nd_mrep); 5647 free(nd, M_TEMP); 5648 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 5649 return (error); 5650 } 5651 5652 struct nfsrvsetattrdsdorpc { 5653 int done; 5654 int inprog; 5655 struct task tsk; 5656 fhandle_t fh; 5657 struct nfsmount *nmp; 5658 struct vnode *vp; 5659 struct ucred *cred; 5660 NFSPROC_T *p; 5661 struct nfsvattr na; 5662 struct nfsvattr dsna; 5663 int err; 5664 }; 5665 5666 /* 5667 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 5668 */ 5669 static void 5670 start_setattrdsdorpc(void *arg, int pending) 5671 { 5672 struct nfsrvsetattrdsdorpc *drpc; 5673 5674 drpc = (struct nfsrvsetattrdsdorpc *)arg; 5675 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 5676 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 5677 drpc->done = 1; 5678 } 5679 5680 static int 5681 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5682 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5683 struct nfsvattr *nap, int *failposp) 5684 { 5685 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; 5686 struct nfsvattr na; 5687 int error, i, ret, timo; 5688 5689 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 5690 drpc = NULL; 5691 if (mirrorcnt > 1) 5692 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5693 M_WAITOK); 5694 5695 /* 5696 * Do the setattr RPC for every DS, using a separate kernel process 5697 * for every DS except the last one. 5698 */ 5699 error = 0; 5700 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5701 tdrpc->done = 0; 5702 tdrpc->inprog = 0; 5703 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5704 tdrpc->nmp = *nmpp; 5705 tdrpc->vp = vp; 5706 tdrpc->cred = cred; 5707 tdrpc->p = p; 5708 tdrpc->na = *nap; 5709 tdrpc->err = 0; 5710 ret = EIO; 5711 if (nfs_pnfsiothreads != 0) { 5712 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 5713 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 5714 ret); 5715 } 5716 if (ret != 0) { 5717 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 5718 &na); 5719 if (nfsds_failerr(ret) && *failposp == -1) 5720 *failposp = i; 5721 else if (error == 0 && ret != 0) 5722 error = ret; 5723 } 5724 nmpp++; 5725 fhp++; 5726 } 5727 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 5728 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5729 *failposp = mirrorcnt - 1; 5730 else if (error == 0 && ret != 0) 5731 error = ret; 5732 if (error == 0) 5733 error = nfsrv_setextattr(vp, &na, p); 5734 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 5735 tdrpc = drpc; 5736 timo = hz / 50; /* Wait for 20msec. */ 5737 if (timo < 1) 5738 timo = 1; 5739 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5740 /* Wait for RPCs on separate threads to complete. */ 5741 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5742 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 5743 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5744 *failposp = i; 5745 else if (error == 0 && tdrpc->err != 0) 5746 error = tdrpc->err; 5747 } 5748 free(drpc, M_TEMP); 5749 return (error); 5750 } 5751 5752 /* 5753 * Do a Setattr of an NFSv4 ACL on the DS file. 5754 */ 5755 static int 5756 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5757 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 5758 { 5759 struct nfsrv_descript *nd; 5760 nfsv4stateid_t st; 5761 nfsattrbit_t attrbits; 5762 int error; 5763 5764 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 5765 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5766 /* 5767 * Use a stateid where other is an alternating 01010 pattern and 5768 * seqid is 0xffffffff. This value is not defined as special by 5769 * the RFC and is used by the FreeBSD NFS server to indicate an 5770 * MDS->DS proxy operation. 5771 */ 5772 st.other[0] = 0x55555555; 5773 st.other[1] = 0x55555555; 5774 st.other[2] = 0x55555555; 5775 st.seqid = 0xffffffff; 5776 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5777 NULL, NULL, 0, 0, false); 5778 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5779 NFSZERO_ATTRBIT(&attrbits); 5780 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 5781 /* 5782 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 5783 * so passing in the metadata "vp" will be ok, since it is of 5784 * the same type (VREG). 5785 */ 5786 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 5787 NULL, 0, 0, 0, 0, 0, NULL); 5788 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5789 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5790 if (error != 0) { 5791 free(nd, M_TEMP); 5792 return (error); 5793 } 5794 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 5795 nd->nd_repstat); 5796 error = nd->nd_repstat; 5797 m_freem(nd->nd_mrep); 5798 free(nd, M_TEMP); 5799 return (error); 5800 } 5801 5802 struct nfsrvsetacldsdorpc { 5803 int done; 5804 int inprog; 5805 struct task tsk; 5806 fhandle_t fh; 5807 struct nfsmount *nmp; 5808 struct vnode *vp; 5809 struct ucred *cred; 5810 NFSPROC_T *p; 5811 struct acl *aclp; 5812 int err; 5813 }; 5814 5815 /* 5816 * Start up the thread that will execute nfsrv_setacldsdorpc(). 5817 */ 5818 static void 5819 start_setacldsdorpc(void *arg, int pending) 5820 { 5821 struct nfsrvsetacldsdorpc *drpc; 5822 5823 drpc = (struct nfsrvsetacldsdorpc *)arg; 5824 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 5825 drpc->vp, drpc->nmp, drpc->aclp); 5826 drpc->done = 1; 5827 } 5828 5829 static int 5830 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5831 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 5832 int *failposp) 5833 { 5834 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; 5835 int error, i, ret, timo; 5836 5837 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 5838 drpc = NULL; 5839 if (mirrorcnt > 1) 5840 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5841 M_WAITOK); 5842 5843 /* 5844 * Do the setattr RPC for every DS, using a separate kernel process 5845 * for every DS except the last one. 5846 */ 5847 error = 0; 5848 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5849 tdrpc->done = 0; 5850 tdrpc->inprog = 0; 5851 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5852 tdrpc->nmp = *nmpp; 5853 tdrpc->vp = vp; 5854 tdrpc->cred = cred; 5855 tdrpc->p = p; 5856 tdrpc->aclp = aclp; 5857 tdrpc->err = 0; 5858 ret = EIO; 5859 if (nfs_pnfsiothreads != 0) { 5860 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 5861 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 5862 ret); 5863 } 5864 if (ret != 0) { 5865 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 5866 aclp); 5867 if (nfsds_failerr(ret) && *failposp == -1) 5868 *failposp = i; 5869 else if (error == 0 && ret != 0) 5870 error = ret; 5871 } 5872 nmpp++; 5873 fhp++; 5874 } 5875 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 5876 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5877 *failposp = mirrorcnt - 1; 5878 else if (error == 0 && ret != 0) 5879 error = ret; 5880 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 5881 tdrpc = drpc; 5882 timo = hz / 50; /* Wait for 20msec. */ 5883 if (timo < 1) 5884 timo = 1; 5885 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5886 /* Wait for RPCs on separate threads to complete. */ 5887 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5888 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 5889 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5890 *failposp = i; 5891 else if (error == 0 && tdrpc->err != 0) 5892 error = tdrpc->err; 5893 } 5894 free(drpc, M_TEMP); 5895 return (error); 5896 } 5897 5898 /* 5899 * Getattr call to the DS for the attributes that change due to writing. 5900 */ 5901 static int 5902 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5903 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 5904 { 5905 struct nfsrv_descript *nd; 5906 int error; 5907 nfsattrbit_t attrbits; 5908 5909 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 5910 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5911 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 5912 sizeof(fhandle_t), NULL, NULL, 0, 0, false); 5913 NFSZERO_ATTRBIT(&attrbits); 5914 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5915 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5916 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5917 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5918 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5919 (void) nfsrv_putattrbit(nd, &attrbits); 5920 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5921 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5922 if (error != 0) { 5923 free(nd, M_TEMP); 5924 return (error); 5925 } 5926 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 5927 nd->nd_repstat); 5928 if (nd->nd_repstat == 0) { 5929 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 5930 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 5931 NULL, NULL); 5932 /* 5933 * We can only save the updated values in the extended 5934 * attribute if the vp is exclusively locked. 5935 * This should happen when any of the following operations 5936 * occur on the vnode: 5937 * Close, Delegreturn, LayoutCommit, LayoutReturn 5938 * As such, the updated extended attribute should get saved 5939 * before nfsrv_checkdsattr() returns 0 and allows the cached 5940 * attributes to be returned without calling this function. 5941 */ 5942 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 5943 error = nfsrv_setextattr(vp, nap, p); 5944 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 5945 error); 5946 } 5947 } else 5948 error = nd->nd_repstat; 5949 m_freem(nd->nd_mrep); 5950 free(nd, M_TEMP); 5951 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 5952 return (error); 5953 } 5954 5955 /* 5956 * Seek call to a DS. 5957 */ 5958 static int 5959 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, 5960 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) 5961 { 5962 uint32_t *tl; 5963 struct nfsrv_descript *nd; 5964 nfsv4stateid_t st; 5965 int error; 5966 5967 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); 5968 /* 5969 * Use a stateid where other is an alternating 01010 pattern and 5970 * seqid is 0xffffffff. This value is not defined as special by 5971 * the RFC and is used by the FreeBSD NFS server to indicate an 5972 * MDS->DS proxy operation. 5973 */ 5974 st.other[0] = 0x55555555; 5975 st.other[1] = 0x55555555; 5976 st.other[2] = 0x55555555; 5977 st.seqid = 0xffffffff; 5978 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5979 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, 5980 sizeof(fhandle_t), NULL, NULL, 0, 0, false); 5981 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5982 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); 5983 txdr_hyper(*offp, tl); tl += 2; 5984 *tl = txdr_unsigned(content); 5985 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5986 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5987 if (error != 0) { 5988 free(nd, M_TEMP); 5989 return (error); 5990 } 5991 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); 5992 if (nd->nd_repstat == 0) { 5993 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); 5994 if (*tl++ == newnfs_true) 5995 *eofp = true; 5996 else 5997 *eofp = false; 5998 *offp = fxdr_hyper(tl); 5999 } else 6000 error = nd->nd_repstat; 6001 nfsmout: 6002 m_freem(nd->nd_mrep); 6003 free(nd, M_TEMP); 6004 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); 6005 return (error); 6006 } 6007 6008 /* 6009 * Get the device id and file handle for a DS file. 6010 */ 6011 int 6012 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 6013 fhandle_t *fhp, char *devid) 6014 { 6015 int buflen, error; 6016 char *buf; 6017 6018 buflen = 1024; 6019 buf = malloc(buflen, M_TEMP, M_WAITOK); 6020 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 6021 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 6022 free(buf, M_TEMP); 6023 return (error); 6024 } 6025 6026 /* 6027 * Do a Lookup against the DS for the filename. 6028 */ 6029 static int 6030 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 6031 struct vnode **nvpp, NFSPROC_T *p) 6032 { 6033 struct nameidata named; 6034 struct ucred *tcred; 6035 char *bufp; 6036 u_long *hashp; 6037 struct vnode *nvp; 6038 int error; 6039 6040 tcred = newnfs_getcred(); 6041 named.ni_cnd.cn_nameiop = LOOKUP; 6042 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 6043 named.ni_cnd.cn_cred = tcred; 6044 named.ni_cnd.cn_thread = p; 6045 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 6046 nfsvno_setpathbuf(&named, &bufp, &hashp); 6047 named.ni_cnd.cn_nameptr = bufp; 6048 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 6049 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 6050 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 6051 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 6052 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 6053 NFSFREECRED(tcred); 6054 nfsvno_relpathbuf(&named); 6055 if (error == 0) 6056 *nvpp = nvp; 6057 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 6058 return (error); 6059 } 6060 6061 /* 6062 * Set the file handle to the correct one. 6063 */ 6064 static void 6065 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, 6066 char *fnamep, struct vnode *nvp, NFSPROC_T *p) 6067 { 6068 struct nfsnode *np; 6069 int ret = 0; 6070 6071 np = VTONFS(nvp); 6072 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 6073 /* 6074 * We can only do a vn_set_extattr() if the vnode is exclusively 6075 * locked and vn_start_write() has been done. If devid != NULL or 6076 * fnamep != NULL or the vnode is shared locked, vn_start_write() 6077 * may not have been done. 6078 * If not done now, it will be done on a future call. 6079 */ 6080 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == 6081 LK_EXCLUSIVE) 6082 ret = vn_extattr_set(vp, IO_NODELOCKED, 6083 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), 6084 (char *)pf, p); 6085 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 6086 } 6087 6088 /* 6089 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 6090 * when the DS has failed. 6091 */ 6092 void 6093 nfsrv_killrpcs(struct nfsmount *nmp) 6094 { 6095 6096 /* 6097 * Call newnfs_nmcancelreqs() to cause 6098 * any RPCs in progress on the mount point to 6099 * fail. 6100 * This will cause any process waiting for an 6101 * RPC to complete while holding a vnode lock 6102 * on the mounted-on vnode (such as "df" or 6103 * a non-forced "umount") to fail. 6104 * This will unlock the mounted-on vnode so 6105 * a forced dismount can succeed. 6106 * The NFSMNTP_CANCELRPCS flag should be set when this function is 6107 * called. 6108 */ 6109 newnfs_nmcancelreqs(nmp); 6110 } 6111 6112 /* 6113 * Sum up the statfs info for each of the DSs, so that the client will 6114 * receive the total for all DSs. 6115 */ 6116 static int 6117 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) 6118 { 6119 struct statfs *tsf; 6120 struct nfsdevice *ds; 6121 struct vnode **dvpp, **tdvpp, *dvp; 6122 uint64_t tot; 6123 int cnt, error = 0, i; 6124 6125 if (nfsrv_devidcnt <= 0) 6126 return (ENXIO); 6127 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 6128 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 6129 6130 /* Get an array of the dvps for the DSs. */ 6131 tdvpp = dvpp; 6132 i = 0; 6133 NFSDDSLOCK(); 6134 /* First, search for matches for same file system. */ 6135 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6136 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && 6137 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { 6138 if (++i > nfsrv_devidcnt) 6139 break; 6140 *tdvpp++ = ds->nfsdev_dvp; 6141 } 6142 } 6143 /* 6144 * If no matches for same file system, total all servers not assigned 6145 * to a file system. 6146 */ 6147 if (i == 0) { 6148 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6149 if (ds->nfsdev_nmp != NULL && 6150 ds->nfsdev_mdsisset == 0) { 6151 if (++i > nfsrv_devidcnt) 6152 break; 6153 *tdvpp++ = ds->nfsdev_dvp; 6154 } 6155 } 6156 } 6157 NFSDDSUNLOCK(); 6158 cnt = i; 6159 6160 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 6161 tdvpp = dvpp; 6162 for (i = 0; i < cnt && error == 0; i++) { 6163 dvp = *tdvpp++; 6164 error = VFS_STATFS(dvp->v_mount, tsf); 6165 if (error == 0) { 6166 if (sf->f_bsize == 0) { 6167 if (tsf->f_bsize > 0) 6168 sf->f_bsize = tsf->f_bsize; 6169 else 6170 sf->f_bsize = 8192; 6171 } 6172 if (tsf->f_blocks > 0) { 6173 if (sf->f_bsize != tsf->f_bsize) { 6174 tot = tsf->f_blocks * tsf->f_bsize; 6175 sf->f_blocks += (tot / sf->f_bsize); 6176 } else 6177 sf->f_blocks += tsf->f_blocks; 6178 } 6179 if (tsf->f_bfree > 0) { 6180 if (sf->f_bsize != tsf->f_bsize) { 6181 tot = tsf->f_bfree * tsf->f_bsize; 6182 sf->f_bfree += (tot / sf->f_bsize); 6183 } else 6184 sf->f_bfree += tsf->f_bfree; 6185 } 6186 if (tsf->f_bavail > 0) { 6187 if (sf->f_bsize != tsf->f_bsize) { 6188 tot = tsf->f_bavail * tsf->f_bsize; 6189 sf->f_bavail += (tot / sf->f_bsize); 6190 } else 6191 sf->f_bavail += tsf->f_bavail; 6192 } 6193 } 6194 } 6195 free(tsf, M_TEMP); 6196 free(dvpp, M_TEMP); 6197 return (error); 6198 } 6199 6200 /* 6201 * Set an NFSv4 acl. 6202 */ 6203 int 6204 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 6205 { 6206 int error; 6207 6208 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 6209 error = NFSERR_ATTRNOTSUPP; 6210 goto out; 6211 } 6212 /* 6213 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 6214 * Make sure it has enough room for that - splitting every entry 6215 * into two and appending "canonical six" entries at the end. 6216 * Cribbed out of kern/vfs_acl.c - Rick M. 6217 */ 6218 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 6219 error = NFSERR_ATTRNOTSUPP; 6220 goto out; 6221 } 6222 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 6223 if (error == 0) { 6224 error = nfsrv_dssetacl(vp, aclp, cred, p); 6225 if (error == ENOENT) 6226 error = 0; 6227 } 6228 6229 out: 6230 NFSEXITCODE(error); 6231 return (error); 6232 } 6233 6234 /* 6235 * Seek vnode op call (actually it is a VOP_IOCTL()). 6236 * This function is called with the vnode locked, but unlocks and vrele()s 6237 * the vp before returning. 6238 */ 6239 int 6240 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, 6241 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) 6242 { 6243 struct nfsvattr at; 6244 int error, ret; 6245 6246 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); 6247 /* 6248 * Attempt to seek on a DS file. A return of ENOENT implies 6249 * there is no DS file to seek on. 6250 */ 6251 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, 6252 NULL, NULL, NULL, NULL, offp, content, eofp); 6253 if (error != ENOENT) { 6254 vput(vp); 6255 return (error); 6256 } 6257 6258 /* 6259 * Do the VOP_IOCTL() call. For the case where *offp == file_size, 6260 * VOP_IOCTL() will return ENXIO. However, the correct reply for 6261 * NFSv4.2 is *eofp == true and error == 0 for this case. 6262 */ 6263 NFSVOPUNLOCK(vp); 6264 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); 6265 *eofp = false; 6266 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { 6267 /* Handle the cases where we might be at EOF. */ 6268 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); 6269 if (ret == 0 && *offp == at.na_size) { 6270 *eofp = true; 6271 error = 0; 6272 } 6273 if (ret != 0 && error == 0) 6274 error = ret; 6275 } 6276 vrele(vp); 6277 NFSEXITCODE(error); 6278 return (error); 6279 } 6280 6281 /* 6282 * Allocate vnode op call. 6283 */ 6284 int 6285 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6286 NFSPROC_T *p) 6287 { 6288 int error, trycnt; 6289 6290 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); 6291 /* 6292 * Attempt to allocate on a DS file. A return of ENOENT implies 6293 * there is no DS file to allocate on. 6294 */ 6295 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, 6296 NULL, NULL, NULL, NULL, &len, 0, NULL); 6297 if (error != ENOENT) 6298 return (error); 6299 error = 0; 6300 6301 /* 6302 * Do the actual VOP_ALLOCATE(), looping a reasonable number of 6303 * times to achieve completion. 6304 */ 6305 trycnt = 0; 6306 while (error == 0 && len > 0 && trycnt++ < 20) 6307 error = VOP_ALLOCATE(vp, &off, &len); 6308 if (error == 0 && len > 0) 6309 error = NFSERR_IO; 6310 NFSEXITCODE(error); 6311 return (error); 6312 } 6313 6314 /* 6315 * Get Extended Atribute vnode op into an mbuf list. 6316 */ 6317 int 6318 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, 6319 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, 6320 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 6321 { 6322 struct iovec *iv; 6323 struct uio io, *uiop = &io; 6324 struct mbuf *m, *m2; 6325 int alen, error, len, tlen; 6326 size_t siz; 6327 6328 /* First, find out the size of the extended attribute. */ 6329 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6330 &siz, cred, p); 6331 if (error != 0) 6332 return (NFSERR_NOXATTR); 6333 if (siz > maxresp - NFS_MAXXDR) 6334 return (NFSERR_XATTR2BIG); 6335 len = siz; 6336 tlen = NFSM_RNDUP(len); 6337 if (tlen > 0) { 6338 /* 6339 * If cnt > MCLBYTES and the reply will not be saved, use 6340 * ext_pgs mbufs for TLS. 6341 * For NFSv4.0, we do not know for sure if the reply will 6342 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 6343 * Always use ext_pgs mbufs if ND_EXTPG is set. 6344 */ 6345 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && 6346 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && 6347 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) 6348 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, 6349 maxextsiz, &m, &m2, &iv); 6350 else 6351 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, 6352 &iv); 6353 uiop->uio_iov = iv; 6354 } else { 6355 uiop->uio_iovcnt = 0; 6356 uiop->uio_iov = iv = NULL; 6357 m = m2 = NULL; 6358 } 6359 uiop->uio_offset = 0; 6360 uiop->uio_resid = tlen; 6361 uiop->uio_rw = UIO_READ; 6362 uiop->uio_segflg = UIO_SYSSPACE; 6363 uiop->uio_td = p; 6364 #ifdef MAC 6365 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6366 name); 6367 if (error != 0) 6368 goto out; 6369 #endif 6370 6371 if (tlen > 0) 6372 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6373 NULL, cred, p); 6374 if (error != 0) 6375 goto out; 6376 if (uiop->uio_resid > 0) { 6377 alen = tlen; 6378 len = tlen - uiop->uio_resid; 6379 tlen = NFSM_RNDUP(len); 6380 if (alen != tlen) 6381 printf("nfsvno_getxattr: weird size read\n"); 6382 if (tlen == 0) { 6383 m_freem(m); 6384 m = m2 = NULL; 6385 } else if (alen != tlen || tlen != len) 6386 m2 = nfsrv_adj(m, alen - tlen, tlen - len); 6387 } 6388 *lenp = len; 6389 *mpp = m; 6390 *mpendp = m2; 6391 6392 out: 6393 if (error != 0) { 6394 if (m != NULL) 6395 m_freem(m); 6396 *lenp = 0; 6397 } 6398 free(iv, M_TEMP); 6399 NFSEXITCODE(error); 6400 return (error); 6401 } 6402 6403 /* 6404 * Set Extended attribute vnode op from an mbuf list. 6405 */ 6406 int 6407 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, 6408 char *cp, struct ucred *cred, struct thread *p) 6409 { 6410 struct iovec *iv; 6411 struct uio uio, *uiop = &uio; 6412 int cnt, error; 6413 6414 error = 0; 6415 #ifdef MAC 6416 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6417 name); 6418 #endif 6419 if (error != 0) 6420 goto out; 6421 6422 uiop->uio_rw = UIO_WRITE; 6423 uiop->uio_segflg = UIO_SYSSPACE; 6424 uiop->uio_td = p; 6425 uiop->uio_offset = 0; 6426 uiop->uio_resid = len; 6427 if (len > 0) { 6428 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); 6429 uiop->uio_iov = iv; 6430 uiop->uio_iovcnt = cnt; 6431 } else { 6432 uiop->uio_iov = iv = NULL; 6433 uiop->uio_iovcnt = 0; 6434 } 6435 if (error == 0) { 6436 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6437 cred, p); 6438 free(iv, M_TEMP); 6439 } 6440 6441 out: 6442 NFSEXITCODE(error); 6443 return (error); 6444 } 6445 6446 /* 6447 * Remove Extended attribute vnode op. 6448 */ 6449 int 6450 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, 6451 struct ucred *cred, struct thread *p) 6452 { 6453 int error; 6454 6455 /* 6456 * Get rid of any delegations. I am not sure why this is required, 6457 * but RFC-8276 says so. 6458 */ 6459 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); 6460 if (error != 0) 6461 goto out; 6462 #ifdef MAC 6463 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6464 name); 6465 if (error != 0) 6466 goto out; 6467 #endif 6468 6469 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); 6470 if (error == EOPNOTSUPP) 6471 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6472 cred, p); 6473 out: 6474 NFSEXITCODE(error); 6475 return (error); 6476 } 6477 6478 /* 6479 * List Extended Atribute vnode op into an mbuf list. 6480 */ 6481 int 6482 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, 6483 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) 6484 { 6485 struct iovec iv; 6486 struct uio io; 6487 int error; 6488 size_t siz; 6489 6490 *bufp = NULL; 6491 /* First, find out the size of the extended attribute. */ 6492 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, 6493 p); 6494 if (error != 0) 6495 return (NFSERR_NOXATTR); 6496 if (siz <= cookie) { 6497 *lenp = 0; 6498 *eofp = true; 6499 goto out; 6500 } 6501 if (siz > cookie + *lenp) { 6502 siz = cookie + *lenp; 6503 *eofp = false; 6504 } else 6505 *eofp = true; 6506 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ 6507 if (siz > 10 * 1024 * 1024) { 6508 error = NFSERR_XATTR2BIG; 6509 goto out; 6510 } 6511 *bufp = malloc(siz, M_TEMP, M_WAITOK); 6512 iv.iov_base = *bufp; 6513 iv.iov_len = siz; 6514 io.uio_iovcnt = 1; 6515 io.uio_iov = &iv; 6516 io.uio_offset = 0; 6517 io.uio_resid = siz; 6518 io.uio_rw = UIO_READ; 6519 io.uio_segflg = UIO_SYSSPACE; 6520 io.uio_td = p; 6521 #ifdef MAC 6522 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); 6523 if (error != 0) 6524 goto out; 6525 #endif 6526 6527 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, 6528 p); 6529 if (error != 0) 6530 goto out; 6531 if (io.uio_resid > 0) 6532 siz -= io.uio_resid; 6533 *lenp = siz; 6534 6535 out: 6536 if (error != 0) { 6537 free(*bufp, M_TEMP); 6538 *bufp = NULL; 6539 } 6540 NFSEXITCODE(error); 6541 return (error); 6542 } 6543 6544 /* 6545 * Trim trailing data off the mbuf list being built. 6546 */ 6547 static void 6548 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, 6549 int bextpg, int bextpgsiz) 6550 { 6551 vm_page_t pg; 6552 int fullpgsiz, i; 6553 6554 if (mb->m_next != NULL) { 6555 m_freem(mb->m_next); 6556 mb->m_next = NULL; 6557 } 6558 if ((mb->m_flags & M_EXTPG) != 0) { 6559 /* First, get rid of any pages after this position. */ 6560 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { 6561 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); 6562 vm_page_unwire_noq(pg); 6563 vm_page_free(pg); 6564 } 6565 mb->m_epg_npgs = bextpg + 1; 6566 if (bextpg == 0) 6567 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; 6568 else 6569 fullpgsiz = PAGE_SIZE; 6570 mb->m_epg_last_len = fullpgsiz - bextpgsiz; 6571 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); 6572 for (i = 1; i < mb->m_epg_npgs; i++) 6573 mb->m_len += m_epg_pagelen(mb, i, 0); 6574 nd->nd_bextpgsiz = bextpgsiz; 6575 nd->nd_bextpg = bextpg; 6576 } else 6577 mb->m_len = bpos - mtod(mb, char *); 6578 nd->nd_mb = mb; 6579 nd->nd_bpos = bpos; 6580 } 6581 6582 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 6583 6584 /* 6585 * Called once to initialize data structures... 6586 */ 6587 static int 6588 nfsd_modevent(module_t mod, int type, void *data) 6589 { 6590 int error = 0, i; 6591 static int loaded = 0; 6592 6593 switch (type) { 6594 case MOD_LOAD: 6595 if (loaded) 6596 goto out; 6597 newnfs_portinit(); 6598 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 6599 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, 6600 MTX_DEF); 6601 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, 6602 MTX_DEF); 6603 } 6604 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 6605 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 6606 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 6607 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 6608 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 6609 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 6610 nfsrvd_initcache(); 6611 nfsd_init(); 6612 NFSD_LOCK(); 6613 nfsrvd_init(0); 6614 NFSD_UNLOCK(); 6615 nfsd_mntinit(); 6616 #ifdef VV_DISABLEDELEG 6617 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 6618 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 6619 #endif 6620 nfsd_call_servertimer = nfsrv_servertimer; 6621 nfsd_call_nfsd = nfssvc_nfsd; 6622 loaded = 1; 6623 break; 6624 6625 case MOD_UNLOAD: 6626 if (newnfs_numnfsd != 0) { 6627 error = EBUSY; 6628 break; 6629 } 6630 6631 #ifdef VV_DISABLEDELEG 6632 vn_deleg_ops.vndeleg_recall = NULL; 6633 vn_deleg_ops.vndeleg_disable = NULL; 6634 #endif 6635 nfsd_call_servertimer = NULL; 6636 nfsd_call_nfsd = NULL; 6637 6638 /* Clean out all NFSv4 state. */ 6639 nfsrv_throwawayallstate(curthread); 6640 6641 /* Clean the NFS server reply cache */ 6642 nfsrvd_cleancache(); 6643 6644 /* Free up the krpc server pool. */ 6645 if (nfsrvd_pool != NULL) 6646 svcpool_destroy(nfsrvd_pool); 6647 6648 /* and get rid of the locks */ 6649 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 6650 mtx_destroy(&nfsrchash_table[i].mtx); 6651 mtx_destroy(&nfsrcahash_table[i].mtx); 6652 } 6653 mtx_destroy(&nfsrc_udpmtx); 6654 mtx_destroy(&nfs_v4root_mutex); 6655 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 6656 mtx_destroy(&nfsrv_dontlistlock_mtx); 6657 mtx_destroy(&nfsrv_recalllock_mtx); 6658 for (i = 0; i < nfsrv_sessionhashsize; i++) 6659 mtx_destroy(&nfssessionhash[i].mtx); 6660 if (nfslayouthash != NULL) { 6661 for (i = 0; i < nfsrv_layouthashsize; i++) 6662 mtx_destroy(&nfslayouthash[i].mtx); 6663 free(nfslayouthash, M_NFSDSESSION); 6664 } 6665 lockdestroy(&nfsv4root_mnt.mnt_explock); 6666 free(nfsclienthash, M_NFSDCLIENT); 6667 free(nfslockhash, M_NFSDLOCKFILE); 6668 free(nfssessionhash, M_NFSDSESSION); 6669 loaded = 0; 6670 break; 6671 default: 6672 error = EOPNOTSUPP; 6673 break; 6674 } 6675 6676 out: 6677 NFSEXITCODE(error); 6678 return (error); 6679 } 6680 static moduledata_t nfsd_mod = { 6681 "nfsd", 6682 nfsd_modevent, 6683 NULL, 6684 }; 6685 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 6686 6687 /* So that loader and kldload(2) can find us, wherever we are.. */ 6688 MODULE_VERSION(nfsd, 1); 6689 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 6690 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 6691 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 6692 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 6693 6694