1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/capsicum.h> 40 #include <sys/extattr.h> 41 42 /* 43 * Functions that perform the vfs operations required by the routines in 44 * nfsd_serv.c. It is hoped that this change will make the server more 45 * portable. 46 */ 47 48 #include <fs/nfs/nfsport.h> 49 #include <security/mac/mac_framework.h> 50 #include <sys/filio.h> 51 #include <sys/hash.h> 52 #include <sys/sysctl.h> 53 #include <nlm/nlm_prot.h> 54 #include <nlm/nlm.h> 55 56 FEATURE(nfsd, "NFSv4 server"); 57 58 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 59 extern int nfsrv_useacl; 60 extern int newnfs_numnfsd; 61 extern struct mount nfsv4root_mnt; 62 extern struct nfsrv_stablefirst nfsrv_stablefirst; 63 extern void (*nfsd_call_servertimer)(void); 64 extern SVCPOOL *nfsrvd_pool; 65 extern struct nfsv4lock nfsd_suspend_lock; 66 extern struct nfsclienthashhead *nfsclienthash; 67 extern struct nfslockhashhead *nfslockhash; 68 extern struct nfssessionhash *nfssessionhash; 69 extern int nfsrv_sessionhashsize; 70 extern struct nfsstatsv1 nfsstatsv1; 71 extern struct nfslayouthash *nfslayouthash; 72 extern int nfsrv_layouthashsize; 73 extern struct mtx nfsrv_dslock_mtx; 74 extern int nfs_pnfsiothreads; 75 extern struct nfsdontlisthead nfsrv_dontlisthead; 76 extern volatile int nfsrv_dontlistlen; 77 extern volatile int nfsrv_devidcnt; 78 extern int nfsrv_maxpnfsmirror; 79 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 80 NFSDLOCKMUTEX; 81 NFSSTATESPINLOCK; 82 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 83 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 84 struct mtx nfsrc_udpmtx; 85 struct mtx nfs_v4root_mutex; 86 struct mtx nfsrv_dontlistlock_mtx; 87 struct mtx nfsrv_recalllock_mtx; 88 struct nfsrvfh nfs_rootfh, nfs_pubfh; 89 int nfs_pubfhset = 0, nfs_rootfhset = 0; 90 struct proc *nfsd_master_proc = NULL; 91 int nfsd_debuglevel = 0; 92 static pid_t nfsd_master_pid = (pid_t)-1; 93 static char nfsd_master_comm[MAXCOMLEN + 1]; 94 static struct timeval nfsd_master_start; 95 static uint32_t nfsv4_sysid = 0; 96 static fhandle_t zerofh; 97 98 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 99 struct ucred *); 100 101 int nfsrv_enable_crossmntpt = 1; 102 static int nfs_commit_blks; 103 static int nfs_commit_miss; 104 extern int nfsrv_issuedelegs; 105 extern int nfsrv_dolocallocks; 106 extern int nfsd_enable_stringtouid; 107 extern struct nfsdevicehead nfsrv_devidhead; 108 109 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, 110 struct iovec **); 111 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, 112 struct mbuf **, struct iovec **); 113 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, 114 int *); 115 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 116 NFSPROC_T *); 117 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 118 int *, char *, fhandle_t *); 119 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 120 NFSPROC_T *); 121 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, 122 struct thread *, int, struct mbuf **, char *, struct mbuf **, 123 struct nfsvattr *, struct acl *, off_t *, int, bool *); 124 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 125 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 126 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 127 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 128 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 129 char *, int *); 130 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 131 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 132 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 133 struct vnode *, struct nfsmount **, int, struct acl *, int *); 134 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 135 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 136 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 137 struct vnode *, struct nfsmount *, struct nfsvattr *); 138 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, 139 NFSPROC_T *, struct nfsmount *); 140 static int nfsrv_putfhname(fhandle_t *, char *); 141 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 142 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 143 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, 144 struct vnode *, NFSPROC_T *); 145 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 146 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 147 NFSPROC_T *); 148 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); 149 static void nfsm_trimtrailing(struct nfsrv_descript *, struct mbuf *, 150 char *, int, int); 151 152 int nfs_pnfsio(task_fn_t *, void *); 153 154 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 155 "NFS server"); 156 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 157 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 158 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 159 0, ""); 160 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 161 0, ""); 162 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 163 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 164 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 165 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 166 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 167 0, "Debug level for NFS server"); 168 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 169 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 170 static int nfsrv_pnfsgetdsattr = 1; 171 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 172 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 173 174 /* 175 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 176 * not running. 177 * The dsN subdirectories for the increased values must have been created 178 * on all DS servers before this increase is done. 179 */ 180 u_int nfsrv_dsdirsize = 20; 181 static int 182 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 183 { 184 int error, newdsdirsize; 185 186 newdsdirsize = nfsrv_dsdirsize; 187 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 188 if (error != 0 || req->newptr == NULL) 189 return (error); 190 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 191 newnfs_numnfsd != 0) 192 return (EINVAL); 193 nfsrv_dsdirsize = newdsdirsize; 194 return (0); 195 } 196 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, 197 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), 198 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); 199 200 #define MAX_REORDERED_RPC 16 201 #define NUM_HEURISTIC 1031 202 #define NHUSE_INIT 64 203 #define NHUSE_INC 16 204 #define NHUSE_MAX 2048 205 206 static struct nfsheur { 207 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 208 off_t nh_nextoff; /* next offset for sequential detection */ 209 int nh_use; /* use count for selection */ 210 int nh_seqcount; /* heuristic */ 211 } nfsheur[NUM_HEURISTIC]; 212 213 /* 214 * Heuristic to detect sequential operation. 215 */ 216 static struct nfsheur * 217 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 218 { 219 struct nfsheur *nh; 220 int hi, try; 221 222 /* Locate best candidate. */ 223 try = 32; 224 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 225 nh = &nfsheur[hi]; 226 while (try--) { 227 if (nfsheur[hi].nh_vp == vp) { 228 nh = &nfsheur[hi]; 229 break; 230 } 231 if (nfsheur[hi].nh_use > 0) 232 --nfsheur[hi].nh_use; 233 hi = (hi + 1) % NUM_HEURISTIC; 234 if (nfsheur[hi].nh_use < nh->nh_use) 235 nh = &nfsheur[hi]; 236 } 237 238 /* Initialize hint if this is a new file. */ 239 if (nh->nh_vp != vp) { 240 nh->nh_vp = vp; 241 nh->nh_nextoff = uio->uio_offset; 242 nh->nh_use = NHUSE_INIT; 243 if (uio->uio_offset == 0) 244 nh->nh_seqcount = 4; 245 else 246 nh->nh_seqcount = 1; 247 } 248 249 /* Calculate heuristic. */ 250 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 251 uio->uio_offset == nh->nh_nextoff) { 252 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 253 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 254 if (nh->nh_seqcount > IO_SEQMAX) 255 nh->nh_seqcount = IO_SEQMAX; 256 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 257 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 258 /* Probably a reordered RPC, leave seqcount alone. */ 259 } else if (nh->nh_seqcount > 1) { 260 nh->nh_seqcount /= 2; 261 } else { 262 nh->nh_seqcount = 0; 263 } 264 nh->nh_use += NHUSE_INC; 265 if (nh->nh_use > NHUSE_MAX) 266 nh->nh_use = NHUSE_MAX; 267 return (nh); 268 } 269 270 /* 271 * Get attributes into nfsvattr structure. 272 */ 273 int 274 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 275 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 276 nfsattrbit_t *attrbitp) 277 { 278 int error, gotattr, lockedit = 0; 279 struct nfsvattr na; 280 281 if (vpislocked == 0) { 282 /* 283 * When vpislocked == 0, the vnode is either exclusively 284 * locked by this thread or not locked by this thread. 285 * As such, shared lock it, if not exclusively locked. 286 */ 287 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 288 lockedit = 1; 289 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 290 } 291 } 292 293 /* 294 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed 295 * attributes, as required. 296 * This needs to be done for regular files if: 297 * - non-NFSv4 RPCs or 298 * - when attrbitp == NULL or 299 * - an NFSv4 RPC with any of the above attributes in attrbitp. 300 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 301 * these attributes. nfsrv_proxyds() will return an error if the 302 * server is not a pNFS one. 303 */ 304 gotattr = 0; 305 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 306 (nd->nd_flag & ND_NFSV4) == 0 || 307 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 308 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 309 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 310 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || 311 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { 312 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, 313 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, 314 NULL); 315 if (error == 0) 316 gotattr = 1; 317 } 318 319 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 320 if (lockedit != 0) 321 NFSVOPUNLOCK(vp); 322 323 /* 324 * If we got the Change, Size and Modify Time from the DS, 325 * replace them. 326 */ 327 if (gotattr != 0) { 328 nvap->na_atime = na.na_atime; 329 nvap->na_mtime = na.na_mtime; 330 nvap->na_filerev = na.na_filerev; 331 nvap->na_size = na.na_size; 332 nvap->na_bytes = na.na_bytes; 333 } 334 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 335 error, (uintmax_t)na.na_filerev); 336 337 NFSEXITCODE(error); 338 return (error); 339 } 340 341 /* 342 * Get a file handle for a vnode. 343 */ 344 int 345 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 346 { 347 int error; 348 349 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 350 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 351 error = VOP_VPTOFH(vp, &fhp->fh_fid); 352 353 NFSEXITCODE(error); 354 return (error); 355 } 356 357 /* 358 * Perform access checking for vnodes obtained from file handles that would 359 * refer to files already opened by a Unix client. You cannot just use 360 * vn_writechk() and VOP_ACCESSX() for two reasons. 361 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 362 * case. 363 * 2 - The owner is to be given access irrespective of mode bits for some 364 * operations, so that processes that chmod after opening a file don't 365 * break. 366 */ 367 int 368 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 369 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 370 u_int32_t *supportedtypep) 371 { 372 struct vattr vattr; 373 int error = 0, getret = 0; 374 375 if (vpislocked == 0) { 376 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 377 error = EPERM; 378 goto out; 379 } 380 } 381 if (accmode & VWRITE) { 382 /* Just vn_writechk() changed to check rdonly */ 383 /* 384 * Disallow write attempts on read-only file systems; 385 * unless the file is a socket or a block or character 386 * device resident on the file system. 387 */ 388 if (NFSVNO_EXRDONLY(exp) || 389 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 390 switch (vp->v_type) { 391 case VREG: 392 case VDIR: 393 case VLNK: 394 error = EROFS; 395 default: 396 break; 397 } 398 } 399 /* 400 * If there's shared text associated with 401 * the inode, try to free it up once. If 402 * we fail, we can't allow writing. 403 */ 404 if (VOP_IS_TEXT(vp) && error == 0) 405 error = ETXTBSY; 406 } 407 if (error != 0) { 408 if (vpislocked == 0) 409 NFSVOPUNLOCK(vp); 410 goto out; 411 } 412 413 /* 414 * Should the override still be applied when ACLs are enabled? 415 */ 416 error = VOP_ACCESSX(vp, accmode, cred, p); 417 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 418 /* 419 * Try again with VEXPLICIT_DENY, to see if the test for 420 * deletion is supported. 421 */ 422 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 423 if (error == 0) { 424 if (vp->v_type == VDIR) { 425 accmode &= ~(VDELETE | VDELETE_CHILD); 426 accmode |= VWRITE; 427 error = VOP_ACCESSX(vp, accmode, cred, p); 428 } else if (supportedtypep != NULL) { 429 *supportedtypep &= ~NFSACCESS_DELETE; 430 } 431 } 432 } 433 434 /* 435 * Allow certain operations for the owner (reads and writes 436 * on files that are already open). 437 */ 438 if (override != NFSACCCHK_NOOVERRIDE && 439 (error == EPERM || error == EACCES)) { 440 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 441 error = 0; 442 else if (override & NFSACCCHK_ALLOWOWNER) { 443 getret = VOP_GETATTR(vp, &vattr, cred); 444 if (getret == 0 && cred->cr_uid == vattr.va_uid) 445 error = 0; 446 } 447 } 448 if (vpislocked == 0) 449 NFSVOPUNLOCK(vp); 450 451 out: 452 NFSEXITCODE(error); 453 return (error); 454 } 455 456 /* 457 * Set attribute(s) vnop. 458 */ 459 int 460 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 461 struct thread *p, struct nfsexstuff *exp) 462 { 463 u_quad_t savsize = 0; 464 int error, savedit; 465 time_t savbtime; 466 467 /* 468 * If this is an exported file system and a pNFS service is running, 469 * don't VOP_SETATTR() of size for the MDS file system. 470 */ 471 savedit = 0; 472 error = 0; 473 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && 474 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && 475 nvap->na_vattr.va_size > 0) { 476 savsize = nvap->na_vattr.va_size; 477 nvap->na_vattr.va_size = VNOVAL; 478 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 479 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 480 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 481 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 482 nvap->na_vattr.va_mtime.tv_sec != VNOVAL) 483 savedit = 1; 484 else 485 savedit = 2; 486 } 487 if (savedit != 2) 488 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 489 if (savedit != 0) 490 nvap->na_vattr.va_size = savsize; 491 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 492 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 493 nvap->na_vattr.va_size != VNOVAL || 494 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 495 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 496 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 497 /* Never modify birthtime on a DS file. */ 498 savbtime = nvap->na_vattr.va_birthtime.tv_sec; 499 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; 500 /* For a pNFS server, set the attributes on the DS file. */ 501 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, 502 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); 503 nvap->na_vattr.va_birthtime.tv_sec = savbtime; 504 if (error == ENOENT) 505 error = 0; 506 } 507 NFSEXITCODE(error); 508 return (error); 509 } 510 511 /* 512 * Set up nameidata for a lookup() call and do it. 513 */ 514 int 515 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 516 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, 517 struct vnode **retdirp) 518 { 519 struct componentname *cnp = &ndp->ni_cnd; 520 int i; 521 struct iovec aiov; 522 struct uio auio; 523 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 524 int error = 0; 525 char *cp; 526 527 *retdirp = NULL; 528 cnp->cn_nameptr = cnp->cn_pnbuf; 529 ndp->ni_lcf = 0; 530 /* 531 * Extract and set starting directory. 532 */ 533 if (dp->v_type != VDIR) { 534 if (islocked) 535 vput(dp); 536 else 537 vrele(dp); 538 nfsvno_relpathbuf(ndp); 539 error = ENOTDIR; 540 goto out1; 541 } 542 if (islocked) 543 NFSVOPUNLOCK(dp); 544 VREF(dp); 545 *retdirp = dp; 546 if (NFSVNO_EXRDONLY(exp)) 547 cnp->cn_flags |= RDONLY; 548 ndp->ni_segflg = UIO_SYSSPACE; 549 550 if (nd->nd_flag & ND_PUBLOOKUP) { 551 ndp->ni_loopcnt = 0; 552 if (cnp->cn_pnbuf[0] == '/') { 553 vrele(dp); 554 /* 555 * Check for degenerate pathnames here, since lookup() 556 * panics on them. 557 */ 558 for (i = 1; i < ndp->ni_pathlen; i++) 559 if (cnp->cn_pnbuf[i] != '/') 560 break; 561 if (i == ndp->ni_pathlen) { 562 error = NFSERR_ACCES; 563 goto out; 564 } 565 dp = rootvnode; 566 VREF(dp); 567 } 568 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 569 (nd->nd_flag & ND_NFSV4) == 0) { 570 /* 571 * Only cross mount points for NFSv4 when doing a 572 * mount while traversing the file system above 573 * the mount point, unless nfsrv_enable_crossmntpt is set. 574 */ 575 cnp->cn_flags |= NOCROSSMOUNT; 576 } 577 578 /* 579 * Initialize for scan, set ni_startdir and bump ref on dp again 580 * because lookup() will dereference ni_startdir. 581 */ 582 583 cnp->cn_thread = p; 584 ndp->ni_startdir = dp; 585 ndp->ni_rootdir = rootvnode; 586 ndp->ni_topdir = NULL; 587 588 if (!lockleaf) 589 cnp->cn_flags |= LOCKLEAF; 590 for (;;) { 591 cnp->cn_nameptr = cnp->cn_pnbuf; 592 /* 593 * Call lookup() to do the real work. If an error occurs, 594 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 595 * we do not have to dereference anything before returning. 596 * In either case ni_startdir will be dereferenced and NULLed 597 * out. 598 */ 599 error = lookup(ndp); 600 if (error) 601 break; 602 603 /* 604 * Check for encountering a symbolic link. Trivial 605 * termination occurs if no symlink encountered. 606 */ 607 if ((cnp->cn_flags & ISSYMLINK) == 0) { 608 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) 609 nfsvno_relpathbuf(ndp); 610 if (ndp->ni_vp && !lockleaf) 611 NFSVOPUNLOCK(ndp->ni_vp); 612 break; 613 } 614 615 /* 616 * Validate symlink 617 */ 618 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 619 NFSVOPUNLOCK(ndp->ni_dvp); 620 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 621 error = EINVAL; 622 goto badlink2; 623 } 624 625 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 626 error = ELOOP; 627 goto badlink2; 628 } 629 if (ndp->ni_pathlen > 1) 630 cp = uma_zalloc(namei_zone, M_WAITOK); 631 else 632 cp = cnp->cn_pnbuf; 633 aiov.iov_base = cp; 634 aiov.iov_len = MAXPATHLEN; 635 auio.uio_iov = &aiov; 636 auio.uio_iovcnt = 1; 637 auio.uio_offset = 0; 638 auio.uio_rw = UIO_READ; 639 auio.uio_segflg = UIO_SYSSPACE; 640 auio.uio_td = NULL; 641 auio.uio_resid = MAXPATHLEN; 642 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 643 if (error) { 644 badlink1: 645 if (ndp->ni_pathlen > 1) 646 uma_zfree(namei_zone, cp); 647 badlink2: 648 vrele(ndp->ni_dvp); 649 vput(ndp->ni_vp); 650 break; 651 } 652 linklen = MAXPATHLEN - auio.uio_resid; 653 if (linklen == 0) { 654 error = ENOENT; 655 goto badlink1; 656 } 657 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 658 error = ENAMETOOLONG; 659 goto badlink1; 660 } 661 662 /* 663 * Adjust or replace path 664 */ 665 if (ndp->ni_pathlen > 1) { 666 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 667 uma_zfree(namei_zone, cnp->cn_pnbuf); 668 cnp->cn_pnbuf = cp; 669 } else 670 cnp->cn_pnbuf[linklen] = '\0'; 671 ndp->ni_pathlen += linklen; 672 673 /* 674 * Cleanup refs for next loop and check if root directory 675 * should replace current directory. Normally ni_dvp 676 * becomes the new base directory and is cleaned up when 677 * we loop. Explicitly null pointers after invalidation 678 * to clarify operation. 679 */ 680 vput(ndp->ni_vp); 681 ndp->ni_vp = NULL; 682 683 if (cnp->cn_pnbuf[0] == '/') { 684 vrele(ndp->ni_dvp); 685 ndp->ni_dvp = ndp->ni_rootdir; 686 VREF(ndp->ni_dvp); 687 } 688 ndp->ni_startdir = ndp->ni_dvp; 689 ndp->ni_dvp = NULL; 690 } 691 if (!lockleaf) 692 cnp->cn_flags &= ~LOCKLEAF; 693 694 out: 695 if (error) { 696 nfsvno_relpathbuf(ndp); 697 ndp->ni_vp = NULL; 698 ndp->ni_dvp = NULL; 699 ndp->ni_startdir = NULL; 700 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 701 ndp->ni_dvp = NULL; 702 } 703 704 out1: 705 NFSEXITCODE2(error, nd); 706 return (error); 707 } 708 709 /* 710 * Set up a pathname buffer and return a pointer to it and, optionally 711 * set a hash pointer. 712 */ 713 void 714 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 715 { 716 struct componentname *cnp = &ndp->ni_cnd; 717 718 cnp->cn_flags |= (NOMACCHECK | HASBUF); 719 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 720 if (hashpp != NULL) 721 *hashpp = NULL; 722 *bufpp = cnp->cn_pnbuf; 723 } 724 725 /* 726 * Release the above path buffer, if not released by nfsvno_namei(). 727 */ 728 void 729 nfsvno_relpathbuf(struct nameidata *ndp) 730 { 731 732 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) 733 panic("nfsrelpath"); 734 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 735 ndp->ni_cnd.cn_flags &= ~HASBUF; 736 } 737 738 /* 739 * Readlink vnode op into an mbuf list. 740 */ 741 int 742 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, 743 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 744 { 745 struct iovec *iv; 746 struct uio io, *uiop = &io; 747 struct mbuf *mp, *mp3; 748 int len, tlen, error = 0; 749 750 len = NFS_MAXPATHLEN; 751 if (maxextsiz > 0) 752 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 753 &mp3, &mp, &iv); 754 else 755 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); 756 uiop->uio_iov = iv; 757 uiop->uio_offset = 0; 758 uiop->uio_resid = len; 759 uiop->uio_rw = UIO_READ; 760 uiop->uio_segflg = UIO_SYSSPACE; 761 uiop->uio_td = NULL; 762 error = VOP_READLINK(vp, uiop, cred); 763 free(iv, M_TEMP); 764 if (error) { 765 m_freem(mp3); 766 *lenp = 0; 767 goto out; 768 } 769 if (uiop->uio_resid > 0) { 770 len -= uiop->uio_resid; 771 tlen = NFSM_RNDUP(len); 772 if (tlen == 0) { 773 m_freem(mp3); 774 mp3 = mp = NULL; 775 } else if (tlen != NFS_MAXPATHLEN || tlen != len) 776 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, 777 tlen - len); 778 } 779 *lenp = len; 780 *mpp = mp3; 781 *mpendp = mp; 782 783 out: 784 NFSEXITCODE(error); 785 return (error); 786 } 787 788 /* 789 * Create an mbuf chain and an associated iovec that can be used to Read 790 * or Getextattr of data. 791 * Upon success, return pointers to the first and last mbufs in the chain 792 * plus the malloc'd iovec and its iovlen. 793 */ 794 static int 795 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, 796 struct iovec **ivp) 797 { 798 struct mbuf *m, *m2 = NULL, *m3; 799 struct iovec *iv; 800 int i, left, siz; 801 802 left = len; 803 m3 = NULL; 804 /* 805 * Generate the mbuf list with the uio_iov ref. to it. 806 */ 807 i = 0; 808 while (left > 0) { 809 NFSMGET(m); 810 MCLGET(m, M_WAITOK); 811 m->m_len = 0; 812 siz = min(M_TRAILINGSPACE(m), left); 813 left -= siz; 814 i++; 815 if (m3) 816 m2->m_next = m; 817 else 818 m3 = m; 819 m2 = m; 820 } 821 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 822 m = m3; 823 left = len; 824 i = 0; 825 while (left > 0) { 826 if (m == NULL) 827 panic("nfsrv_createiovec iov"); 828 siz = min(M_TRAILINGSPACE(m), left); 829 if (siz > 0) { 830 iv->iov_base = mtod(m, caddr_t) + m->m_len; 831 iv->iov_len = siz; 832 m->m_len += siz; 833 left -= siz; 834 iv++; 835 i++; 836 } 837 m = m->m_next; 838 } 839 *mpp = m3; 840 *mpendp = m2; 841 return (i); 842 } 843 844 /* 845 * Create an mbuf chain and an associated iovec that can be used to Read 846 * or Getextattr of data. 847 * Upon success, return pointers to the first and last mbufs in the chain 848 * plus the malloc'd iovec and its iovlen. 849 * Same as above, but creates ext_pgs mbuf(s). 850 */ 851 static int 852 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, 853 struct mbuf **mpendp, struct iovec **ivp) 854 { 855 struct mbuf *m, *m2 = NULL, *m3; 856 struct iovec *iv; 857 int i, left, pgno, siz; 858 859 left = len; 860 m3 = NULL; 861 /* 862 * Generate the mbuf list with the uio_iov ref. to it. 863 */ 864 i = 0; 865 while (left > 0) { 866 siz = min(left, maxextsiz); 867 m = mb_alloc_ext_plus_pages(siz, M_WAITOK); 868 left -= siz; 869 i += m->m_epg_npgs; 870 if (m3 != NULL) 871 m2->m_next = m; 872 else 873 m3 = m; 874 m2 = m; 875 } 876 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 877 m = m3; 878 left = len; 879 i = 0; 880 pgno = 0; 881 while (left > 0) { 882 if (m == NULL) 883 panic("nfsvno_createiovec_extpgs iov"); 884 siz = min(PAGE_SIZE, left); 885 if (siz > 0) { 886 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); 887 iv->iov_len = siz; 888 m->m_len += siz; 889 if (pgno == m->m_epg_npgs - 1) 890 m->m_epg_last_len = siz; 891 left -= siz; 892 iv++; 893 i++; 894 pgno++; 895 } 896 if (pgno == m->m_epg_npgs && left > 0) { 897 m = m->m_next; 898 if (m == NULL) 899 panic("nfsvno_createiovec_extpgs iov"); 900 pgno = 0; 901 } 902 } 903 *mpp = m3; 904 *mpendp = m2; 905 return (i); 906 } 907 908 /* 909 * Read vnode op call into mbuf list. 910 */ 911 int 912 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 913 int maxextsiz, struct thread *p, struct mbuf **mpp, 914 struct mbuf **mpendp) 915 { 916 struct mbuf *m; 917 struct iovec *iv; 918 int error = 0, len, tlen, ioflag = 0; 919 struct mbuf *m3; 920 struct uio io, *uiop = &io; 921 struct nfsheur *nh; 922 923 /* 924 * Attempt to read from a DS file. A return of ENOENT implies 925 * there is no DS file to read. 926 */ 927 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 928 NULL, mpendp, NULL, NULL, NULL, 0, NULL); 929 if (error != ENOENT) 930 return (error); 931 932 len = NFSM_RNDUP(cnt); 933 if (maxextsiz > 0) 934 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 935 &m3, &m, &iv); 936 else 937 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); 938 uiop->uio_iov = iv; 939 uiop->uio_offset = off; 940 uiop->uio_resid = len; 941 uiop->uio_rw = UIO_READ; 942 uiop->uio_segflg = UIO_SYSSPACE; 943 uiop->uio_td = NULL; 944 nh = nfsrv_sequential_heuristic(uiop, vp); 945 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 946 /* XXX KDM make this more systematic? */ 947 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 948 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 949 free(iv, M_TEMP); 950 if (error) { 951 m_freem(m3); 952 *mpp = NULL; 953 goto out; 954 } 955 nh->nh_nextoff = uiop->uio_offset; 956 tlen = len - uiop->uio_resid; 957 cnt = cnt < tlen ? cnt : tlen; 958 tlen = NFSM_RNDUP(cnt); 959 if (tlen == 0) { 960 m_freem(m3); 961 m3 = m = NULL; 962 } else if (len != tlen || tlen != cnt) 963 m = nfsrv_adj(m3, len - tlen, tlen - cnt); 964 *mpp = m3; 965 *mpendp = m; 966 967 out: 968 NFSEXITCODE(error); 969 return (error); 970 } 971 972 /* 973 * Create the iovec for the mbuf chain passed in as an argument. 974 * The "cp" argument is where the data starts within the first mbuf in 975 * the chain. It returns the iovec and the iovcnt. 976 */ 977 static int 978 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, 979 int *iovcntp) 980 { 981 struct mbuf *mp; 982 struct iovec *ivp; 983 int cnt, i, len; 984 985 /* 986 * Loop through the mbuf chain, counting how many mbufs are a 987 * part of this write operation, so the iovec size is known. 988 */ 989 cnt = 0; 990 len = retlen; 991 mp = m; 992 i = mtod(mp, caddr_t) + mp->m_len - cp; 993 while (len > 0) { 994 if (i > 0) { 995 len -= i; 996 cnt++; 997 } 998 mp = mp->m_next; 999 if (!mp) { 1000 if (len > 0) 1001 return (EBADRPC); 1002 } else 1003 i = mp->m_len; 1004 } 1005 1006 /* Now, create the iovec. */ 1007 mp = m; 1008 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 1009 M_WAITOK); 1010 *iovcntp = cnt; 1011 i = mtod(mp, caddr_t) + mp->m_len - cp; 1012 len = retlen; 1013 while (len > 0) { 1014 if (mp == NULL) 1015 panic("nfsrv_createiovecw"); 1016 if (i > 0) { 1017 i = min(i, len); 1018 ivp->iov_base = cp; 1019 ivp->iov_len = i; 1020 ivp++; 1021 len -= i; 1022 } 1023 mp = mp->m_next; 1024 if (mp) { 1025 i = mp->m_len; 1026 cp = mtod(mp, caddr_t); 1027 } 1028 } 1029 return (0); 1030 } 1031 1032 /* 1033 * Write vnode op from an mbuf list. 1034 */ 1035 int 1036 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, 1037 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 1038 { 1039 struct iovec *iv; 1040 int cnt, ioflags, error; 1041 struct uio io, *uiop = &io; 1042 struct nfsheur *nh; 1043 1044 /* 1045 * Attempt to write to a DS file. A return of ENOENT implies 1046 * there is no DS file to write. 1047 */ 1048 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, 1049 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); 1050 if (error != ENOENT) { 1051 *stable = NFSWRITE_FILESYNC; 1052 return (error); 1053 } 1054 1055 if (*stable == NFSWRITE_UNSTABLE) 1056 ioflags = IO_NODELOCKED; 1057 else 1058 ioflags = (IO_SYNC | IO_NODELOCKED); 1059 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); 1060 if (error != 0) 1061 return (error); 1062 uiop->uio_iov = iv; 1063 uiop->uio_iovcnt = cnt; 1064 uiop->uio_resid = retlen; 1065 uiop->uio_rw = UIO_WRITE; 1066 uiop->uio_segflg = UIO_SYSSPACE; 1067 NFSUIOPROC(uiop, p); 1068 uiop->uio_offset = off; 1069 nh = nfsrv_sequential_heuristic(uiop, vp); 1070 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 1071 /* XXX KDM make this more systematic? */ 1072 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 1073 error = VOP_WRITE(vp, uiop, ioflags, cred); 1074 if (error == 0) 1075 nh->nh_nextoff = uiop->uio_offset; 1076 free(iv, M_TEMP); 1077 1078 NFSEXITCODE(error); 1079 return (error); 1080 } 1081 1082 /* 1083 * Common code for creating a regular file (plus special files for V2). 1084 */ 1085 int 1086 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 1087 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 1088 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) 1089 { 1090 u_quad_t tempsize; 1091 int error; 1092 struct thread *p = curthread; 1093 1094 error = nd->nd_repstat; 1095 if (!error && ndp->ni_vp == NULL) { 1096 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 1097 vrele(ndp->ni_startdir); 1098 error = VOP_CREATE(ndp->ni_dvp, 1099 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1100 /* For a pNFS server, create the data file on a DS. */ 1101 if (error == 0 && nvap->na_type == VREG) { 1102 /* 1103 * Create a data file on a DS for a pNFS server. 1104 * This function just returns if not 1105 * running a pNFS DS or the creation fails. 1106 */ 1107 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1108 nd->nd_cred, p); 1109 } 1110 vput(ndp->ni_dvp); 1111 nfsvno_relpathbuf(ndp); 1112 if (!error) { 1113 if (*exclusive_flagp) { 1114 *exclusive_flagp = 0; 1115 NFSVNO_ATTRINIT(nvap); 1116 nvap->na_atime.tv_sec = cverf[0]; 1117 nvap->na_atime.tv_nsec = cverf[1]; 1118 error = VOP_SETATTR(ndp->ni_vp, 1119 &nvap->na_vattr, nd->nd_cred); 1120 if (error != 0) { 1121 vput(ndp->ni_vp); 1122 ndp->ni_vp = NULL; 1123 error = NFSERR_NOTSUPP; 1124 } 1125 } 1126 } 1127 /* 1128 * NFS V2 Only. nfsrvd_mknod() does this for V3. 1129 * (This implies, just get out on an error.) 1130 */ 1131 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 1132 nvap->na_type == VFIFO) { 1133 if (nvap->na_type == VCHR && rdev == 0xffffffff) 1134 nvap->na_type = VFIFO; 1135 if (nvap->na_type != VFIFO && 1136 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { 1137 vrele(ndp->ni_startdir); 1138 nfsvno_relpathbuf(ndp); 1139 vput(ndp->ni_dvp); 1140 goto out; 1141 } 1142 nvap->na_rdev = rdev; 1143 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1144 &ndp->ni_cnd, &nvap->na_vattr); 1145 vput(ndp->ni_dvp); 1146 nfsvno_relpathbuf(ndp); 1147 vrele(ndp->ni_startdir); 1148 if (error) 1149 goto out; 1150 } else { 1151 vrele(ndp->ni_startdir); 1152 nfsvno_relpathbuf(ndp); 1153 vput(ndp->ni_dvp); 1154 error = ENXIO; 1155 goto out; 1156 } 1157 *vpp = ndp->ni_vp; 1158 } else { 1159 /* 1160 * Handle cases where error is already set and/or 1161 * the file exists. 1162 * 1 - clean up the lookup 1163 * 2 - iff !error and na_size set, truncate it 1164 */ 1165 vrele(ndp->ni_startdir); 1166 nfsvno_relpathbuf(ndp); 1167 *vpp = ndp->ni_vp; 1168 if (ndp->ni_dvp == *vpp) 1169 vrele(ndp->ni_dvp); 1170 else 1171 vput(ndp->ni_dvp); 1172 if (!error && nvap->na_size != VNOVAL) { 1173 error = nfsvno_accchk(*vpp, VWRITE, 1174 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1175 NFSACCCHK_VPISLOCKED, NULL); 1176 if (!error) { 1177 tempsize = nvap->na_size; 1178 NFSVNO_ATTRINIT(nvap); 1179 nvap->na_size = tempsize; 1180 error = VOP_SETATTR(*vpp, 1181 &nvap->na_vattr, nd->nd_cred); 1182 } 1183 } 1184 if (error) 1185 vput(*vpp); 1186 } 1187 1188 out: 1189 NFSEXITCODE(error); 1190 return (error); 1191 } 1192 1193 /* 1194 * Do a mknod vnode op. 1195 */ 1196 int 1197 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1198 struct thread *p) 1199 { 1200 int error = 0; 1201 enum vtype vtyp; 1202 1203 vtyp = nvap->na_type; 1204 /* 1205 * Iff doesn't exist, create it. 1206 */ 1207 if (ndp->ni_vp) { 1208 vrele(ndp->ni_startdir); 1209 nfsvno_relpathbuf(ndp); 1210 vput(ndp->ni_dvp); 1211 vrele(ndp->ni_vp); 1212 error = EEXIST; 1213 goto out; 1214 } 1215 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1216 vrele(ndp->ni_startdir); 1217 nfsvno_relpathbuf(ndp); 1218 vput(ndp->ni_dvp); 1219 error = NFSERR_BADTYPE; 1220 goto out; 1221 } 1222 if (vtyp == VSOCK) { 1223 vrele(ndp->ni_startdir); 1224 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1225 &ndp->ni_cnd, &nvap->na_vattr); 1226 vput(ndp->ni_dvp); 1227 nfsvno_relpathbuf(ndp); 1228 } else { 1229 if (nvap->na_type != VFIFO && 1230 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { 1231 vrele(ndp->ni_startdir); 1232 nfsvno_relpathbuf(ndp); 1233 vput(ndp->ni_dvp); 1234 goto out; 1235 } 1236 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1237 &ndp->ni_cnd, &nvap->na_vattr); 1238 vput(ndp->ni_dvp); 1239 nfsvno_relpathbuf(ndp); 1240 vrele(ndp->ni_startdir); 1241 /* 1242 * Since VOP_MKNOD returns the ni_vp, I can't 1243 * see any reason to do the lookup. 1244 */ 1245 } 1246 1247 out: 1248 NFSEXITCODE(error); 1249 return (error); 1250 } 1251 1252 /* 1253 * Mkdir vnode op. 1254 */ 1255 int 1256 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1257 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1258 { 1259 int error = 0; 1260 1261 if (ndp->ni_vp != NULL) { 1262 if (ndp->ni_dvp == ndp->ni_vp) 1263 vrele(ndp->ni_dvp); 1264 else 1265 vput(ndp->ni_dvp); 1266 vrele(ndp->ni_vp); 1267 nfsvno_relpathbuf(ndp); 1268 error = EEXIST; 1269 goto out; 1270 } 1271 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1272 &nvap->na_vattr); 1273 vput(ndp->ni_dvp); 1274 nfsvno_relpathbuf(ndp); 1275 1276 out: 1277 NFSEXITCODE(error); 1278 return (error); 1279 } 1280 1281 /* 1282 * symlink vnode op. 1283 */ 1284 int 1285 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1286 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1287 struct nfsexstuff *exp) 1288 { 1289 int error = 0; 1290 1291 if (ndp->ni_vp) { 1292 vrele(ndp->ni_startdir); 1293 nfsvno_relpathbuf(ndp); 1294 if (ndp->ni_dvp == ndp->ni_vp) 1295 vrele(ndp->ni_dvp); 1296 else 1297 vput(ndp->ni_dvp); 1298 vrele(ndp->ni_vp); 1299 error = EEXIST; 1300 goto out; 1301 } 1302 1303 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1304 &nvap->na_vattr, pathcp); 1305 vput(ndp->ni_dvp); 1306 vrele(ndp->ni_startdir); 1307 nfsvno_relpathbuf(ndp); 1308 /* 1309 * Although FreeBSD still had the lookup code in 1310 * it for 7/current, there doesn't seem to be any 1311 * point, since VOP_SYMLINK() returns the ni_vp. 1312 * Just vput it for v2. 1313 */ 1314 if (!not_v2 && !error) 1315 vput(ndp->ni_vp); 1316 1317 out: 1318 NFSEXITCODE(error); 1319 return (error); 1320 } 1321 1322 /* 1323 * Parse symbolic link arguments. 1324 * This function has an ugly side effect. It will malloc() an area for 1325 * the symlink and set iov_base to point to it, only if it succeeds. 1326 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1327 * be FREE'd later. 1328 */ 1329 int 1330 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1331 struct thread *p, char **pathcpp, int *lenp) 1332 { 1333 u_int32_t *tl; 1334 char *pathcp = NULL; 1335 int error = 0, len; 1336 struct nfsv2_sattr *sp; 1337 1338 *pathcpp = NULL; 1339 *lenp = 0; 1340 if ((nd->nd_flag & ND_NFSV3) && 1341 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1342 goto nfsmout; 1343 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1344 len = fxdr_unsigned(int, *tl); 1345 if (len > NFS_MAXPATHLEN || len <= 0) { 1346 error = EBADRPC; 1347 goto nfsmout; 1348 } 1349 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1350 error = nfsrv_mtostr(nd, pathcp, len); 1351 if (error) 1352 goto nfsmout; 1353 if (nd->nd_flag & ND_NFSV2) { 1354 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1355 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1356 } 1357 *pathcpp = pathcp; 1358 *lenp = len; 1359 NFSEXITCODE2(0, nd); 1360 return (0); 1361 nfsmout: 1362 if (pathcp) 1363 free(pathcp, M_TEMP); 1364 NFSEXITCODE2(error, nd); 1365 return (error); 1366 } 1367 1368 /* 1369 * Remove a non-directory object. 1370 */ 1371 int 1372 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1373 struct thread *p, struct nfsexstuff *exp) 1374 { 1375 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1376 int error = 0, mirrorcnt; 1377 char fname[PNFS_FILENAME_LEN + 1]; 1378 fhandle_t fh; 1379 1380 vp = ndp->ni_vp; 1381 dsdvp[0] = NULL; 1382 if (vp->v_type == VDIR) 1383 error = NFSERR_ISDIR; 1384 else if (is_v4) 1385 error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), 1386 p); 1387 if (error == 0) 1388 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1389 if (!error) 1390 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1391 if (error == 0 && dsdvp[0] != NULL) 1392 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1393 if (ndp->ni_dvp == vp) 1394 vrele(ndp->ni_dvp); 1395 else 1396 vput(ndp->ni_dvp); 1397 vput(vp); 1398 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1399 nfsvno_relpathbuf(ndp); 1400 NFSEXITCODE(error); 1401 return (error); 1402 } 1403 1404 /* 1405 * Remove a directory. 1406 */ 1407 int 1408 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1409 struct thread *p, struct nfsexstuff *exp) 1410 { 1411 struct vnode *vp; 1412 int error = 0; 1413 1414 vp = ndp->ni_vp; 1415 if (vp->v_type != VDIR) { 1416 error = ENOTDIR; 1417 goto out; 1418 } 1419 /* 1420 * No rmdir "." please. 1421 */ 1422 if (ndp->ni_dvp == vp) { 1423 error = EINVAL; 1424 goto out; 1425 } 1426 /* 1427 * The root of a mounted filesystem cannot be deleted. 1428 */ 1429 if (vp->v_vflag & VV_ROOT) 1430 error = EBUSY; 1431 out: 1432 if (!error) 1433 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1434 if (ndp->ni_dvp == vp) 1435 vrele(ndp->ni_dvp); 1436 else 1437 vput(ndp->ni_dvp); 1438 vput(vp); 1439 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) 1440 nfsvno_relpathbuf(ndp); 1441 NFSEXITCODE(error); 1442 return (error); 1443 } 1444 1445 /* 1446 * Rename vnode op. 1447 */ 1448 int 1449 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1450 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1451 { 1452 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1453 int error = 0, mirrorcnt; 1454 char fname[PNFS_FILENAME_LEN + 1]; 1455 fhandle_t fh; 1456 1457 dsdvp[0] = NULL; 1458 fvp = fromndp->ni_vp; 1459 if (ndstat) { 1460 vrele(fromndp->ni_dvp); 1461 vrele(fvp); 1462 error = ndstat; 1463 goto out1; 1464 } 1465 tdvp = tondp->ni_dvp; 1466 tvp = tondp->ni_vp; 1467 if (tvp != NULL) { 1468 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1469 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1470 goto out; 1471 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1472 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1473 goto out; 1474 } 1475 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1476 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1477 goto out; 1478 } 1479 1480 /* 1481 * A rename to '.' or '..' results in a prematurely 1482 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1483 * here. 1484 */ 1485 if ((tondp->ni_cnd.cn_namelen == 1 && 1486 tondp->ni_cnd.cn_nameptr[0] == '.') || 1487 (tondp->ni_cnd.cn_namelen == 2 && 1488 tondp->ni_cnd.cn_nameptr[0] == '.' && 1489 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1490 error = EINVAL; 1491 goto out; 1492 } 1493 } 1494 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1495 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1496 goto out; 1497 } 1498 if (fvp->v_mount != tdvp->v_mount) { 1499 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1500 goto out; 1501 } 1502 if (fvp == tdvp) { 1503 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1504 goto out; 1505 } 1506 if (fvp == tvp) { 1507 /* 1508 * If source and destination are the same, there is nothing to 1509 * do. Set error to -1 to indicate this. 1510 */ 1511 error = -1; 1512 goto out; 1513 } 1514 if (ndflag & ND_NFSV4) { 1515 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1516 error = nfsrv_checkremove(fvp, 0, NULL, 1517 (nfsquad_t)((u_quad_t)0), p); 1518 NFSVOPUNLOCK(fvp); 1519 } else 1520 error = EPERM; 1521 if (tvp && !error) 1522 error = nfsrv_checkremove(tvp, 1, NULL, 1523 (nfsquad_t)((u_quad_t)0), p); 1524 } else { 1525 /* 1526 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1527 * that the NFSv4 client won't be confused by the rename. 1528 * Since nfsd_recalldelegation() can only be called on an 1529 * unlocked vnode at this point and fvp is the file that will 1530 * still exist after the rename, just do fvp. 1531 */ 1532 nfsd_recalldelegation(fvp, p); 1533 } 1534 if (error == 0 && tvp != NULL) { 1535 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1536 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1537 " dsdvp=%p\n", dsdvp[0]); 1538 } 1539 out: 1540 if (!error) { 1541 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1542 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1543 &tondp->ni_cnd); 1544 } else { 1545 if (tdvp == tvp) 1546 vrele(tdvp); 1547 else 1548 vput(tdvp); 1549 if (tvp) 1550 vput(tvp); 1551 vrele(fromndp->ni_dvp); 1552 vrele(fvp); 1553 if (error == -1) 1554 error = 0; 1555 } 1556 1557 /* 1558 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1559 * if the rename succeeded, the DS file for the tvp needs to be 1560 * removed. 1561 */ 1562 if (error == 0 && dsdvp[0] != NULL) { 1563 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1564 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1565 } 1566 1567 vrele(tondp->ni_startdir); 1568 nfsvno_relpathbuf(tondp); 1569 out1: 1570 vrele(fromndp->ni_startdir); 1571 nfsvno_relpathbuf(fromndp); 1572 NFSEXITCODE(error); 1573 return (error); 1574 } 1575 1576 /* 1577 * Link vnode op. 1578 */ 1579 int 1580 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1581 struct thread *p, struct nfsexstuff *exp) 1582 { 1583 struct vnode *xp; 1584 int error = 0; 1585 1586 xp = ndp->ni_vp; 1587 if (xp != NULL) { 1588 error = EEXIST; 1589 } else { 1590 xp = ndp->ni_dvp; 1591 if (vp->v_mount != xp->v_mount) 1592 error = EXDEV; 1593 } 1594 if (!error) { 1595 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1596 if (!VN_IS_DOOMED(vp)) 1597 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1598 else 1599 error = EPERM; 1600 if (ndp->ni_dvp == vp) 1601 vrele(ndp->ni_dvp); 1602 else 1603 vput(ndp->ni_dvp); 1604 NFSVOPUNLOCK(vp); 1605 } else { 1606 if (ndp->ni_dvp == ndp->ni_vp) 1607 vrele(ndp->ni_dvp); 1608 else 1609 vput(ndp->ni_dvp); 1610 if (ndp->ni_vp) 1611 vrele(ndp->ni_vp); 1612 } 1613 nfsvno_relpathbuf(ndp); 1614 NFSEXITCODE(error); 1615 return (error); 1616 } 1617 1618 /* 1619 * Do the fsync() appropriate for the commit. 1620 */ 1621 int 1622 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1623 struct thread *td) 1624 { 1625 int error = 0; 1626 1627 /* 1628 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1629 * file is done. At this time VOP_FSYNC does not accept offset and 1630 * byte count parameters so call VOP_FSYNC the whole file for now. 1631 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1632 * File systems that do not use the buffer cache (as indicated 1633 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1634 */ 1635 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1636 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1637 /* 1638 * Give up and do the whole thing 1639 */ 1640 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1641 VM_OBJECT_WLOCK(vp->v_object); 1642 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1643 VM_OBJECT_WUNLOCK(vp->v_object); 1644 } 1645 error = VOP_FSYNC(vp, MNT_WAIT, td); 1646 } else { 1647 /* 1648 * Locate and synchronously write any buffers that fall 1649 * into the requested range. Note: we are assuming that 1650 * f_iosize is a power of 2. 1651 */ 1652 int iosize = vp->v_mount->mnt_stat.f_iosize; 1653 int iomask = iosize - 1; 1654 struct bufobj *bo; 1655 daddr_t lblkno; 1656 1657 /* 1658 * Align to iosize boundary, super-align to page boundary. 1659 */ 1660 if (off & iomask) { 1661 cnt += off & iomask; 1662 off &= ~(u_quad_t)iomask; 1663 } 1664 if (off & PAGE_MASK) { 1665 cnt += off & PAGE_MASK; 1666 off &= ~(u_quad_t)PAGE_MASK; 1667 } 1668 lblkno = off / iosize; 1669 1670 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1671 VM_OBJECT_WLOCK(vp->v_object); 1672 vm_object_page_clean(vp->v_object, off, off + cnt, 1673 OBJPC_SYNC); 1674 VM_OBJECT_WUNLOCK(vp->v_object); 1675 } 1676 1677 bo = &vp->v_bufobj; 1678 BO_LOCK(bo); 1679 while (cnt > 0) { 1680 struct buf *bp; 1681 1682 /* 1683 * If we have a buffer and it is marked B_DELWRI we 1684 * have to lock and write it. Otherwise the prior 1685 * write is assumed to have already been committed. 1686 * 1687 * gbincore() can return invalid buffers now so we 1688 * have to check that bit as well (though B_DELWRI 1689 * should not be set if B_INVAL is set there could be 1690 * a race here since we haven't locked the buffer). 1691 */ 1692 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1693 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1694 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1695 BO_LOCK(bo); 1696 continue; /* retry */ 1697 } 1698 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1699 B_DELWRI) { 1700 bremfree(bp); 1701 bp->b_flags &= ~B_ASYNC; 1702 bwrite(bp); 1703 ++nfs_commit_miss; 1704 } else 1705 BUF_UNLOCK(bp); 1706 BO_LOCK(bo); 1707 } 1708 ++nfs_commit_blks; 1709 if (cnt < iosize) 1710 break; 1711 cnt -= iosize; 1712 ++lblkno; 1713 } 1714 BO_UNLOCK(bo); 1715 } 1716 NFSEXITCODE(error); 1717 return (error); 1718 } 1719 1720 /* 1721 * Statfs vnode op. 1722 */ 1723 int 1724 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1725 { 1726 struct statfs *tsf; 1727 int error; 1728 1729 tsf = NULL; 1730 if (nfsrv_devidcnt > 0) { 1731 /* For a pNFS service, get the DS numbers. */ 1732 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1733 error = nfsrv_pnfsstatfs(tsf, vp->v_mount); 1734 if (error != 0) { 1735 free(tsf, M_TEMP); 1736 tsf = NULL; 1737 } 1738 } 1739 error = VFS_STATFS(vp->v_mount, sf); 1740 if (error == 0) { 1741 if (tsf != NULL) { 1742 sf->f_blocks = tsf->f_blocks; 1743 sf->f_bavail = tsf->f_bavail; 1744 sf->f_bfree = tsf->f_bfree; 1745 sf->f_bsize = tsf->f_bsize; 1746 } 1747 /* 1748 * Since NFS handles these values as unsigned on the 1749 * wire, there is no way to represent negative values, 1750 * so set them to 0. Without this, they will appear 1751 * to be very large positive values for clients like 1752 * Solaris10. 1753 */ 1754 if (sf->f_bavail < 0) 1755 sf->f_bavail = 0; 1756 if (sf->f_ffree < 0) 1757 sf->f_ffree = 0; 1758 } 1759 free(tsf, M_TEMP); 1760 NFSEXITCODE(error); 1761 return (error); 1762 } 1763 1764 /* 1765 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1766 * must handle nfsrv_opencheck() calls after any other access checks. 1767 */ 1768 void 1769 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1770 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1771 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1772 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, 1773 struct nfsexstuff *exp, struct vnode **vpp) 1774 { 1775 struct vnode *vp = NULL; 1776 u_quad_t tempsize; 1777 struct nfsexstuff nes; 1778 struct thread *p = curthread; 1779 1780 if (ndp->ni_vp == NULL) 1781 nd->nd_repstat = nfsrv_opencheck(clientid, 1782 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1783 if (!nd->nd_repstat) { 1784 if (ndp->ni_vp == NULL) { 1785 vrele(ndp->ni_startdir); 1786 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1787 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1788 /* For a pNFS server, create the data file on a DS. */ 1789 if (nd->nd_repstat == 0) { 1790 /* 1791 * Create a data file on a DS for a pNFS server. 1792 * This function just returns if not 1793 * running a pNFS DS or the creation fails. 1794 */ 1795 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1796 cred, p); 1797 } 1798 vput(ndp->ni_dvp); 1799 nfsvno_relpathbuf(ndp); 1800 if (!nd->nd_repstat) { 1801 if (*exclusive_flagp) { 1802 *exclusive_flagp = 0; 1803 NFSVNO_ATTRINIT(nvap); 1804 nvap->na_atime.tv_sec = cverf[0]; 1805 nvap->na_atime.tv_nsec = cverf[1]; 1806 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1807 &nvap->na_vattr, cred); 1808 if (nd->nd_repstat != 0) { 1809 vput(ndp->ni_vp); 1810 ndp->ni_vp = NULL; 1811 nd->nd_repstat = NFSERR_NOTSUPP; 1812 } else 1813 NFSSETBIT_ATTRBIT(attrbitp, 1814 NFSATTRBIT_TIMEACCESS); 1815 } else { 1816 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1817 aclp, p, attrbitp, exp); 1818 } 1819 } 1820 vp = ndp->ni_vp; 1821 } else { 1822 if (ndp->ni_startdir) 1823 vrele(ndp->ni_startdir); 1824 nfsvno_relpathbuf(ndp); 1825 vp = ndp->ni_vp; 1826 if (create == NFSV4OPEN_CREATE) { 1827 if (ndp->ni_dvp == vp) 1828 vrele(ndp->ni_dvp); 1829 else 1830 vput(ndp->ni_dvp); 1831 } 1832 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1833 if (ndp->ni_cnd.cn_flags & RDONLY) 1834 NFSVNO_SETEXRDONLY(&nes); 1835 else 1836 NFSVNO_EXINIT(&nes); 1837 nd->nd_repstat = nfsvno_accchk(vp, 1838 VWRITE, cred, &nes, p, 1839 NFSACCCHK_NOOVERRIDE, 1840 NFSACCCHK_VPISLOCKED, NULL); 1841 nd->nd_repstat = nfsrv_opencheck(clientid, 1842 stateidp, stp, vp, nd, p, nd->nd_repstat); 1843 if (!nd->nd_repstat) { 1844 tempsize = nvap->na_size; 1845 NFSVNO_ATTRINIT(nvap); 1846 nvap->na_size = tempsize; 1847 nd->nd_repstat = VOP_SETATTR(vp, 1848 &nvap->na_vattr, cred); 1849 } 1850 } else if (vp->v_type == VREG) { 1851 nd->nd_repstat = nfsrv_opencheck(clientid, 1852 stateidp, stp, vp, nd, p, nd->nd_repstat); 1853 } 1854 } 1855 } else { 1856 if (ndp->ni_cnd.cn_flags & HASBUF) 1857 nfsvno_relpathbuf(ndp); 1858 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1859 vrele(ndp->ni_startdir); 1860 if (ndp->ni_dvp == ndp->ni_vp) 1861 vrele(ndp->ni_dvp); 1862 else 1863 vput(ndp->ni_dvp); 1864 if (ndp->ni_vp) 1865 vput(ndp->ni_vp); 1866 } 1867 } 1868 *vpp = vp; 1869 1870 NFSEXITCODE2(0, nd); 1871 } 1872 1873 /* 1874 * Updates the file rev and sets the mtime and ctime 1875 * to the current clock time, returning the va_filerev and va_Xtime 1876 * values. 1877 * Return ESTALE to indicate the vnode is VIRF_DOOMED. 1878 */ 1879 int 1880 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1881 struct nfsrv_descript *nd, struct thread *p) 1882 { 1883 struct vattr va; 1884 1885 VATTR_NULL(&va); 1886 vfs_timestamp(&va.va_mtime); 1887 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1888 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1889 if (VN_IS_DOOMED(vp)) 1890 return (ESTALE); 1891 } 1892 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1893 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1894 return (0); 1895 } 1896 1897 /* 1898 * Glue routine to nfsv4_fillattr(). 1899 */ 1900 int 1901 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1902 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1903 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1904 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1905 { 1906 struct statfs *sf; 1907 int error; 1908 1909 sf = NULL; 1910 if (nfsrv_devidcnt > 0 && 1911 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1912 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 1913 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 1914 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 1915 error = nfsrv_pnfsstatfs(sf, mp); 1916 if (error != 0) { 1917 free(sf, M_TEMP); 1918 sf = NULL; 1919 } 1920 } 1921 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 1922 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 1923 mounted_on_fileno, sf); 1924 free(sf, M_TEMP); 1925 NFSEXITCODE2(0, nd); 1926 return (error); 1927 } 1928 1929 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 1930 /* 1931 * nfs readdir service 1932 * - mallocs what it thinks is enough to read 1933 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 1934 * - calls VOP_READDIR() 1935 * - loops around building the reply 1936 * if the output generated exceeds count break out of loop 1937 * The NFSM_CLGET macro is used here so that the reply will be packed 1938 * tightly in mbuf clusters. 1939 * - it trims out records with d_fileno == 0 1940 * this doesn't matter for Unix clients, but they might confuse clients 1941 * for other os'. 1942 * - it trims out records with d_type == DT_WHT 1943 * these cannot be seen through NFS (unless we extend the protocol) 1944 * The alternate call nfsrvd_readdirplus() does lookups as well. 1945 * PS: The NFS protocol spec. does not clarify what the "count" byte 1946 * argument is a count of.. just name strings and file id's or the 1947 * entire reply rpc or ... 1948 * I tried just file name and id sizes and it confused the Sun client, 1949 * so I am using the full rpc size now. The "paranoia.." comment refers 1950 * to including the status longwords that are not a part of the dir. 1951 * "entry" structures, but are in the rpc. 1952 */ 1953 int 1954 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 1955 struct vnode *vp, struct nfsexstuff *exp) 1956 { 1957 struct dirent *dp; 1958 u_int32_t *tl; 1959 int dirlen; 1960 char *cpos, *cend, *rbuf; 1961 struct nfsvattr at; 1962 int nlen, error = 0, getret = 1; 1963 int siz, cnt, fullsiz, eofflag, ncookies; 1964 u_int64_t off, toff, verf __unused; 1965 u_long *cookies = NULL, *cookiep; 1966 struct uio io; 1967 struct iovec iv; 1968 int is_ufs; 1969 struct thread *p = curthread; 1970 1971 if (nd->nd_repstat) { 1972 nfsrv_postopattr(nd, getret, &at); 1973 goto out; 1974 } 1975 if (nd->nd_flag & ND_NFSV2) { 1976 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1977 off = fxdr_unsigned(u_quad_t, *tl++); 1978 } else { 1979 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1980 off = fxdr_hyper(tl); 1981 tl += 2; 1982 verf = fxdr_hyper(tl); 1983 tl += 2; 1984 } 1985 toff = off; 1986 cnt = fxdr_unsigned(int, *tl); 1987 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 1988 cnt = NFS_SRVMAXDATA(nd); 1989 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 1990 fullsiz = siz; 1991 if (nd->nd_flag & ND_NFSV3) { 1992 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 1993 NULL); 1994 #if 0 1995 /* 1996 * va_filerev is not sufficient as a cookie verifier, 1997 * since it is not supposed to change when entries are 1998 * removed/added unless that offset cookies returned to 1999 * the client are no longer valid. 2000 */ 2001 if (!nd->nd_repstat && toff && verf != at.na_filerev) 2002 nd->nd_repstat = NFSERR_BAD_COOKIE; 2003 #endif 2004 } 2005 if (!nd->nd_repstat && vp->v_type != VDIR) 2006 nd->nd_repstat = NFSERR_NOTDIR; 2007 if (nd->nd_repstat == 0 && cnt == 0) { 2008 if (nd->nd_flag & ND_NFSV2) 2009 /* NFSv2 does not have NFSERR_TOOSMALL */ 2010 nd->nd_repstat = EPERM; 2011 else 2012 nd->nd_repstat = NFSERR_TOOSMALL; 2013 } 2014 if (!nd->nd_repstat) 2015 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2016 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2017 NFSACCCHK_VPISLOCKED, NULL); 2018 if (nd->nd_repstat) { 2019 vput(vp); 2020 if (nd->nd_flag & ND_NFSV3) 2021 nfsrv_postopattr(nd, getret, &at); 2022 goto out; 2023 } 2024 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2025 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2026 again: 2027 eofflag = 0; 2028 if (cookies) { 2029 free(cookies, M_TEMP); 2030 cookies = NULL; 2031 } 2032 2033 iv.iov_base = rbuf; 2034 iv.iov_len = siz; 2035 io.uio_iov = &iv; 2036 io.uio_iovcnt = 1; 2037 io.uio_offset = (off_t)off; 2038 io.uio_resid = siz; 2039 io.uio_segflg = UIO_SYSSPACE; 2040 io.uio_rw = UIO_READ; 2041 io.uio_td = NULL; 2042 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2043 &cookies); 2044 off = (u_int64_t)io.uio_offset; 2045 if (io.uio_resid) 2046 siz -= io.uio_resid; 2047 2048 if (!cookies && !nd->nd_repstat) 2049 nd->nd_repstat = NFSERR_PERM; 2050 if (nd->nd_flag & ND_NFSV3) { 2051 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2052 if (!nd->nd_repstat) 2053 nd->nd_repstat = getret; 2054 } 2055 2056 /* 2057 * Handles the failed cases. nd->nd_repstat == 0 past here. 2058 */ 2059 if (nd->nd_repstat) { 2060 vput(vp); 2061 free(rbuf, M_TEMP); 2062 if (cookies) 2063 free(cookies, M_TEMP); 2064 if (nd->nd_flag & ND_NFSV3) 2065 nfsrv_postopattr(nd, getret, &at); 2066 goto out; 2067 } 2068 /* 2069 * If nothing read, return eof 2070 * rpc reply 2071 */ 2072 if (siz == 0) { 2073 vput(vp); 2074 if (nd->nd_flag & ND_NFSV2) { 2075 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2076 } else { 2077 nfsrv_postopattr(nd, getret, &at); 2078 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2079 txdr_hyper(at.na_filerev, tl); 2080 tl += 2; 2081 } 2082 *tl++ = newnfs_false; 2083 *tl = newnfs_true; 2084 free(rbuf, M_TEMP); 2085 free(cookies, M_TEMP); 2086 goto out; 2087 } 2088 2089 /* 2090 * Check for degenerate cases of nothing useful read. 2091 * If so go try again 2092 */ 2093 cpos = rbuf; 2094 cend = rbuf + siz; 2095 dp = (struct dirent *)cpos; 2096 cookiep = cookies; 2097 2098 /* 2099 * For some reason FreeBSD's ufs_readdir() chooses to back the 2100 * directory offset up to a block boundary, so it is necessary to 2101 * skip over the records that precede the requested offset. This 2102 * requires the assumption that file offset cookies monotonically 2103 * increase. 2104 */ 2105 while (cpos < cend && ncookies > 0 && 2106 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2107 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 2108 cpos += dp->d_reclen; 2109 dp = (struct dirent *)cpos; 2110 cookiep++; 2111 ncookies--; 2112 } 2113 if (cpos >= cend || ncookies == 0) { 2114 siz = fullsiz; 2115 toff = off; 2116 goto again; 2117 } 2118 vput(vp); 2119 2120 /* 2121 * If cnt > MCLBYTES and the reply will not be saved, use 2122 * ext_pgs mbufs for TLS. 2123 * For NFSv4.0, we do not know for sure if the reply will 2124 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2125 */ 2126 if (cnt > MCLBYTES && siz > MCLBYTES && 2127 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2128 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2129 nd->nd_flag |= ND_EXTPG; 2130 2131 /* 2132 * dirlen is the size of the reply, including all XDR and must 2133 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 2134 * if the XDR should be included in "count", but to be safe, we do. 2135 * (Include the two booleans at the end of the reply in dirlen now.) 2136 */ 2137 if (nd->nd_flag & ND_NFSV3) { 2138 nfsrv_postopattr(nd, getret, &at); 2139 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2140 txdr_hyper(at.na_filerev, tl); 2141 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2142 } else { 2143 dirlen = 2 * NFSX_UNSIGNED; 2144 } 2145 2146 /* Loop through the records and build reply */ 2147 while (cpos < cend && ncookies > 0) { 2148 nlen = dp->d_namlen; 2149 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2150 nlen <= NFS_MAXNAMLEN) { 2151 if (nd->nd_flag & ND_NFSV3) 2152 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2153 else 2154 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2155 if (dirlen > cnt) { 2156 eofflag = 0; 2157 break; 2158 } 2159 2160 /* 2161 * Build the directory record xdr from 2162 * the dirent entry. 2163 */ 2164 if (nd->nd_flag & ND_NFSV3) { 2165 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2166 *tl++ = newnfs_true; 2167 *tl++ = 0; 2168 } else { 2169 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2170 *tl++ = newnfs_true; 2171 } 2172 *tl = txdr_unsigned(dp->d_fileno); 2173 (void) nfsm_strtom(nd, dp->d_name, nlen); 2174 if (nd->nd_flag & ND_NFSV3) { 2175 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2176 *tl++ = 0; 2177 } else 2178 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2179 *tl = txdr_unsigned(*cookiep); 2180 } 2181 cpos += dp->d_reclen; 2182 dp = (struct dirent *)cpos; 2183 cookiep++; 2184 ncookies--; 2185 } 2186 if (cpos < cend) 2187 eofflag = 0; 2188 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2189 *tl++ = newnfs_false; 2190 if (eofflag) 2191 *tl = newnfs_true; 2192 else 2193 *tl = newnfs_false; 2194 free(rbuf, M_TEMP); 2195 free(cookies, M_TEMP); 2196 2197 out: 2198 NFSEXITCODE2(0, nd); 2199 return (0); 2200 nfsmout: 2201 vput(vp); 2202 NFSEXITCODE2(error, nd); 2203 return (error); 2204 } 2205 2206 /* 2207 * Readdirplus for V3 and Readdir for V4. 2208 */ 2209 int 2210 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2211 struct vnode *vp, struct nfsexstuff *exp) 2212 { 2213 struct dirent *dp; 2214 u_int32_t *tl; 2215 int dirlen; 2216 char *cpos, *cend, *rbuf; 2217 struct vnode *nvp; 2218 fhandle_t nfh; 2219 struct nfsvattr nva, at, *nvap = &nva; 2220 struct mbuf *mb0, *mb1; 2221 struct nfsreferral *refp; 2222 int nlen, r, error = 0, getret = 1, usevget = 1; 2223 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2224 caddr_t bpos0, bpos1; 2225 u_int64_t off, toff, verf; 2226 u_long *cookies = NULL, *cookiep; 2227 nfsattrbit_t attrbits, rderrbits, savbits; 2228 struct uio io; 2229 struct iovec iv; 2230 struct componentname cn; 2231 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2232 struct mount *mp, *new_mp; 2233 uint64_t mounted_on_fileno; 2234 struct thread *p = curthread; 2235 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; 2236 2237 if (nd->nd_repstat) { 2238 nfsrv_postopattr(nd, getret, &at); 2239 goto out; 2240 } 2241 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2242 off = fxdr_hyper(tl); 2243 toff = off; 2244 tl += 2; 2245 verf = fxdr_hyper(tl); 2246 tl += 2; 2247 siz = fxdr_unsigned(int, *tl++); 2248 cnt = fxdr_unsigned(int, *tl); 2249 2250 /* 2251 * Use the server's maximum data transfer size as the upper bound 2252 * on reply datalen. 2253 */ 2254 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2255 cnt = NFS_SRVMAXDATA(nd); 2256 2257 /* 2258 * siz is a "hint" of how much directory information (name, fileid, 2259 * cookie) should be in the reply. At least one client "hints" 0, 2260 * so I set it to cnt for that case. I also round it up to the 2261 * next multiple of DIRBLKSIZ. 2262 * Since the size of a Readdirplus directory entry reply will always 2263 * be greater than a directory entry returned by VOP_READDIR(), it 2264 * does not make sense to read more than NFS_SRVMAXDATA() via 2265 * VOP_READDIR(). 2266 */ 2267 if (siz <= 0) 2268 siz = cnt; 2269 else if (siz > NFS_SRVMAXDATA(nd)) 2270 siz = NFS_SRVMAXDATA(nd); 2271 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2272 2273 if (nd->nd_flag & ND_NFSV4) { 2274 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2275 if (error) 2276 goto nfsmout; 2277 NFSSET_ATTRBIT(&savbits, &attrbits); 2278 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); 2279 NFSZERO_ATTRBIT(&rderrbits); 2280 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2281 } else { 2282 NFSZERO_ATTRBIT(&attrbits); 2283 } 2284 fullsiz = siz; 2285 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2286 #if 0 2287 if (!nd->nd_repstat) { 2288 if (off && verf != at.na_filerev) { 2289 /* 2290 * va_filerev is not sufficient as a cookie verifier, 2291 * since it is not supposed to change when entries are 2292 * removed/added unless that offset cookies returned to 2293 * the client are no longer valid. 2294 */ 2295 if (nd->nd_flag & ND_NFSV4) { 2296 nd->nd_repstat = NFSERR_NOTSAME; 2297 } else { 2298 nd->nd_repstat = NFSERR_BAD_COOKIE; 2299 } 2300 } 2301 } 2302 #endif 2303 if (!nd->nd_repstat && vp->v_type != VDIR) 2304 nd->nd_repstat = NFSERR_NOTDIR; 2305 if (!nd->nd_repstat && cnt == 0) 2306 nd->nd_repstat = NFSERR_TOOSMALL; 2307 if (!nd->nd_repstat) 2308 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2309 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2310 NFSACCCHK_VPISLOCKED, NULL); 2311 if (nd->nd_repstat) { 2312 vput(vp); 2313 if (nd->nd_flag & ND_NFSV3) 2314 nfsrv_postopattr(nd, getret, &at); 2315 goto out; 2316 } 2317 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2318 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2319 2320 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2321 again: 2322 eofflag = 0; 2323 if (cookies) { 2324 free(cookies, M_TEMP); 2325 cookies = NULL; 2326 } 2327 2328 iv.iov_base = rbuf; 2329 iv.iov_len = siz; 2330 io.uio_iov = &iv; 2331 io.uio_iovcnt = 1; 2332 io.uio_offset = (off_t)off; 2333 io.uio_resid = siz; 2334 io.uio_segflg = UIO_SYSSPACE; 2335 io.uio_rw = UIO_READ; 2336 io.uio_td = NULL; 2337 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2338 &cookies); 2339 off = (u_int64_t)io.uio_offset; 2340 if (io.uio_resid) 2341 siz -= io.uio_resid; 2342 2343 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2344 2345 if (!cookies && !nd->nd_repstat) 2346 nd->nd_repstat = NFSERR_PERM; 2347 if (!nd->nd_repstat) 2348 nd->nd_repstat = getret; 2349 if (nd->nd_repstat) { 2350 vput(vp); 2351 if (cookies) 2352 free(cookies, M_TEMP); 2353 free(rbuf, M_TEMP); 2354 if (nd->nd_flag & ND_NFSV3) 2355 nfsrv_postopattr(nd, getret, &at); 2356 goto out; 2357 } 2358 /* 2359 * If nothing read, return eof 2360 * rpc reply 2361 */ 2362 if (siz == 0) { 2363 vput(vp); 2364 if (nd->nd_flag & ND_NFSV3) 2365 nfsrv_postopattr(nd, getret, &at); 2366 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2367 txdr_hyper(at.na_filerev, tl); 2368 tl += 2; 2369 *tl++ = newnfs_false; 2370 *tl = newnfs_true; 2371 free(cookies, M_TEMP); 2372 free(rbuf, M_TEMP); 2373 goto out; 2374 } 2375 2376 /* 2377 * Check for degenerate cases of nothing useful read. 2378 * If so go try again 2379 */ 2380 cpos = rbuf; 2381 cend = rbuf + siz; 2382 dp = (struct dirent *)cpos; 2383 cookiep = cookies; 2384 2385 /* 2386 * For some reason FreeBSD's ufs_readdir() chooses to back the 2387 * directory offset up to a block boundary, so it is necessary to 2388 * skip over the records that precede the requested offset. This 2389 * requires the assumption that file offset cookies monotonically 2390 * increase. 2391 */ 2392 while (cpos < cend && ncookies > 0 && 2393 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2394 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2395 ((nd->nd_flag & ND_NFSV4) && 2396 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2397 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2398 cpos += dp->d_reclen; 2399 dp = (struct dirent *)cpos; 2400 cookiep++; 2401 ncookies--; 2402 } 2403 if (cpos >= cend || ncookies == 0) { 2404 siz = fullsiz; 2405 toff = off; 2406 goto again; 2407 } 2408 2409 /* 2410 * Busy the file system so that the mount point won't go away 2411 * and, as such, VFS_VGET() can be used safely. 2412 */ 2413 mp = vp->v_mount; 2414 vfs_ref(mp); 2415 NFSVOPUNLOCK(vp); 2416 nd->nd_repstat = vfs_busy(mp, 0); 2417 vfs_rel(mp); 2418 if (nd->nd_repstat != 0) { 2419 vrele(vp); 2420 free(cookies, M_TEMP); 2421 free(rbuf, M_TEMP); 2422 if (nd->nd_flag & ND_NFSV3) 2423 nfsrv_postopattr(nd, getret, &at); 2424 goto out; 2425 } 2426 2427 /* 2428 * Check to see if entries in this directory can be safely acquired 2429 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2430 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2431 * automount of the snapshot directory that is required will 2432 * be done. 2433 * This needs to be done here for NFSv4, since NFSv4 never does 2434 * a VFS_VGET() for "." or "..". 2435 */ 2436 if (is_zfs == 1) { 2437 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2438 if (r == EOPNOTSUPP) { 2439 usevget = 0; 2440 cn.cn_nameiop = LOOKUP; 2441 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2442 cn.cn_cred = nd->nd_cred; 2443 cn.cn_thread = p; 2444 } else if (r == 0) 2445 vput(nvp); 2446 } 2447 2448 /* 2449 * If the reply is likely to exceed MCLBYTES and the reply will 2450 * not be saved, use ext_pgs mbufs for TLS. 2451 * It is difficult to predict how large each entry will be and 2452 * how many entries have been read, so just assume the directory 2453 * entries grow by a factor of 4 when attributes are included. 2454 * For NFSv4.0, we do not know for sure if the reply will 2455 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2456 */ 2457 if (cnt > MCLBYTES && siz > MCLBYTES / 4 && 2458 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2459 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2460 nd->nd_flag |= ND_EXTPG; 2461 2462 /* 2463 * Save this position, in case there is an error before one entry 2464 * is created. 2465 */ 2466 mb0 = nd->nd_mb; 2467 bpos0 = nd->nd_bpos; 2468 bextpg0 = nd->nd_bextpg; 2469 bextpgsiz0 = nd->nd_bextpgsiz; 2470 2471 /* 2472 * Fill in the first part of the reply. 2473 * dirlen is the reply length in bytes and cannot exceed cnt. 2474 * (Include the two booleans at the end of the reply in dirlen now, 2475 * so we recognize when we have exceeded cnt.) 2476 */ 2477 if (nd->nd_flag & ND_NFSV3) { 2478 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2479 nfsrv_postopattr(nd, getret, &at); 2480 } else { 2481 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2482 } 2483 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2484 txdr_hyper(at.na_filerev, tl); 2485 2486 /* 2487 * Save this position, in case there is an empty reply needed. 2488 */ 2489 mb1 = nd->nd_mb; 2490 bpos1 = nd->nd_bpos; 2491 bextpg1 = nd->nd_bextpg; 2492 bextpgsiz1 = nd->nd_bextpgsiz; 2493 2494 /* Loop through the records and build reply */ 2495 entrycnt = 0; 2496 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2497 nlen = dp->d_namlen; 2498 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2499 nlen <= NFS_MAXNAMLEN && 2500 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2501 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2502 || (nlen == 1 && dp->d_name[0] != '.'))) { 2503 /* 2504 * Save the current position in the reply, in case 2505 * this entry exceeds cnt. 2506 */ 2507 mb1 = nd->nd_mb; 2508 bpos1 = nd->nd_bpos; 2509 bextpg1 = nd->nd_bextpg; 2510 bextpgsiz1 = nd->nd_bextpgsiz; 2511 2512 /* 2513 * For readdir_and_lookup get the vnode using 2514 * the file number. 2515 */ 2516 nvp = NULL; 2517 refp = NULL; 2518 r = 0; 2519 at_root = 0; 2520 needs_unbusy = 0; 2521 new_mp = mp; 2522 mounted_on_fileno = (uint64_t)dp->d_fileno; 2523 if ((nd->nd_flag & ND_NFSV3) || 2524 NFSNONZERO_ATTRBIT(&savbits)) { 2525 if (nd->nd_flag & ND_NFSV4) 2526 refp = nfsv4root_getreferral(NULL, 2527 vp, dp->d_fileno); 2528 if (refp == NULL) { 2529 if (usevget) 2530 r = VFS_VGET(mp, dp->d_fileno, 2531 LK_SHARED, &nvp); 2532 else 2533 r = EOPNOTSUPP; 2534 if (r == EOPNOTSUPP) { 2535 if (usevget) { 2536 usevget = 0; 2537 cn.cn_nameiop = LOOKUP; 2538 cn.cn_lkflags = 2539 LK_SHARED | 2540 LK_RETRY; 2541 cn.cn_cred = 2542 nd->nd_cred; 2543 cn.cn_thread = p; 2544 } 2545 cn.cn_nameptr = dp->d_name; 2546 cn.cn_namelen = nlen; 2547 cn.cn_flags = ISLASTCN | 2548 NOFOLLOW | LOCKLEAF; 2549 if (nlen == 2 && 2550 dp->d_name[0] == '.' && 2551 dp->d_name[1] == '.') 2552 cn.cn_flags |= 2553 ISDOTDOT; 2554 if (NFSVOPLOCK(vp, LK_SHARED) 2555 != 0) { 2556 nd->nd_repstat = EPERM; 2557 break; 2558 } 2559 if ((vp->v_vflag & VV_ROOT) != 0 2560 && (cn.cn_flags & ISDOTDOT) 2561 != 0) { 2562 vref(vp); 2563 nvp = vp; 2564 r = 0; 2565 } else { 2566 r = VOP_LOOKUP(vp, &nvp, 2567 &cn); 2568 if (vp != nvp) 2569 NFSVOPUNLOCK(vp); 2570 } 2571 } 2572 2573 /* 2574 * For NFSv4, check to see if nvp is 2575 * a mount point and get the mount 2576 * point vnode, as required. 2577 */ 2578 if (r == 0 && 2579 nfsrv_enable_crossmntpt != 0 && 2580 (nd->nd_flag & ND_NFSV4) != 0 && 2581 nvp->v_type == VDIR && 2582 nvp->v_mountedhere != NULL) { 2583 new_mp = nvp->v_mountedhere; 2584 r = vfs_busy(new_mp, 0); 2585 vput(nvp); 2586 nvp = NULL; 2587 if (r == 0) { 2588 r = VFS_ROOT(new_mp, 2589 LK_SHARED, &nvp); 2590 needs_unbusy = 1; 2591 if (r == 0) 2592 at_root = 1; 2593 } 2594 } 2595 } 2596 2597 /* 2598 * If we failed to look up the entry, then it 2599 * has become invalid, most likely removed. 2600 */ 2601 if (r != 0) { 2602 if (needs_unbusy) 2603 vfs_unbusy(new_mp); 2604 goto invalid; 2605 } 2606 KASSERT(refp != NULL || nvp != NULL, 2607 ("%s: undetected lookup error", __func__)); 2608 2609 if (refp == NULL && 2610 ((nd->nd_flag & ND_NFSV3) || 2611 NFSNONZERO_ATTRBIT(&attrbits))) { 2612 r = nfsvno_getfh(nvp, &nfh, p); 2613 if (!r) 2614 r = nfsvno_getattr(nvp, nvap, nd, p, 2615 1, &attrbits); 2616 if (r == 0 && is_zfs == 1 && 2617 nfsrv_enable_crossmntpt != 0 && 2618 (nd->nd_flag & ND_NFSV4) != 0 && 2619 nvp->v_type == VDIR && 2620 vp->v_mount != nvp->v_mount) { 2621 /* 2622 * For a ZFS snapshot, there is a 2623 * pseudo mount that does not set 2624 * v_mountedhere, so it needs to 2625 * be detected via a different 2626 * mount structure. 2627 */ 2628 at_root = 1; 2629 if (new_mp == mp) 2630 new_mp = nvp->v_mount; 2631 } 2632 } 2633 2634 /* 2635 * If we failed to get attributes of the entry, 2636 * then just skip it for NFSv3 (the traditional 2637 * behavior in the old NFS server). 2638 * For NFSv4 the behavior is controlled by 2639 * RDATTRERROR: we either ignore the error or 2640 * fail the request. 2641 * Note that RDATTRERROR is never set for NFSv3. 2642 */ 2643 if (r != 0) { 2644 if (!NFSISSET_ATTRBIT(&attrbits, 2645 NFSATTRBIT_RDATTRERROR)) { 2646 vput(nvp); 2647 if (needs_unbusy != 0) 2648 vfs_unbusy(new_mp); 2649 if ((nd->nd_flag & ND_NFSV3)) 2650 goto invalid; 2651 nd->nd_repstat = r; 2652 break; 2653 } 2654 } 2655 } 2656 2657 /* 2658 * Build the directory record xdr 2659 */ 2660 if (nd->nd_flag & ND_NFSV3) { 2661 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2662 *tl++ = newnfs_true; 2663 *tl++ = 0; 2664 *tl = txdr_unsigned(dp->d_fileno); 2665 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2666 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2667 *tl++ = 0; 2668 *tl = txdr_unsigned(*cookiep); 2669 nfsrv_postopattr(nd, 0, nvap); 2670 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2671 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2672 if (nvp != NULL) 2673 vput(nvp); 2674 } else { 2675 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2676 *tl++ = newnfs_true; 2677 *tl++ = 0; 2678 *tl = txdr_unsigned(*cookiep); 2679 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2680 if (nvp != NULL) { 2681 supports_nfsv4acls = 2682 nfs_supportsnfsv4acls(nvp); 2683 NFSVOPUNLOCK(nvp); 2684 } else 2685 supports_nfsv4acls = 0; 2686 if (refp != NULL) { 2687 dirlen += nfsrv_putreferralattr(nd, 2688 &savbits, refp, 0, 2689 &nd->nd_repstat); 2690 if (nd->nd_repstat) { 2691 if (nvp != NULL) 2692 vrele(nvp); 2693 if (needs_unbusy != 0) 2694 vfs_unbusy(new_mp); 2695 break; 2696 } 2697 } else if (r) { 2698 dirlen += nfsvno_fillattr(nd, new_mp, 2699 nvp, nvap, &nfh, r, &rderrbits, 2700 nd->nd_cred, p, isdgram, 0, 2701 supports_nfsv4acls, at_root, 2702 mounted_on_fileno); 2703 } else { 2704 dirlen += nfsvno_fillattr(nd, new_mp, 2705 nvp, nvap, &nfh, r, &attrbits, 2706 nd->nd_cred, p, isdgram, 0, 2707 supports_nfsv4acls, at_root, 2708 mounted_on_fileno); 2709 } 2710 if (nvp != NULL) 2711 vrele(nvp); 2712 dirlen += (3 * NFSX_UNSIGNED); 2713 } 2714 if (needs_unbusy != 0) 2715 vfs_unbusy(new_mp); 2716 if (dirlen <= cnt) 2717 entrycnt++; 2718 } 2719 invalid: 2720 cpos += dp->d_reclen; 2721 dp = (struct dirent *)cpos; 2722 cookiep++; 2723 ncookies--; 2724 } 2725 vrele(vp); 2726 vfs_unbusy(mp); 2727 2728 /* 2729 * If dirlen > cnt, we must strip off the last entry. If that 2730 * results in an empty reply, report NFSERR_TOOSMALL. 2731 */ 2732 if (dirlen > cnt || nd->nd_repstat) { 2733 if (!nd->nd_repstat && entrycnt == 0) 2734 nd->nd_repstat = NFSERR_TOOSMALL; 2735 if (nd->nd_repstat) { 2736 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); 2737 if (nd->nd_flag & ND_NFSV3) 2738 nfsrv_postopattr(nd, getret, &at); 2739 } else 2740 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); 2741 eofflag = 0; 2742 } else if (cpos < cend) 2743 eofflag = 0; 2744 if (!nd->nd_repstat) { 2745 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2746 *tl++ = newnfs_false; 2747 if (eofflag) 2748 *tl = newnfs_true; 2749 else 2750 *tl = newnfs_false; 2751 } 2752 free(cookies, M_TEMP); 2753 free(rbuf, M_TEMP); 2754 2755 out: 2756 NFSEXITCODE2(0, nd); 2757 return (0); 2758 nfsmout: 2759 vput(vp); 2760 NFSEXITCODE2(error, nd); 2761 return (error); 2762 } 2763 2764 /* 2765 * Get the settable attributes out of the mbuf list. 2766 * (Return 0 or EBADRPC) 2767 */ 2768 int 2769 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2770 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2771 { 2772 u_int32_t *tl; 2773 struct nfsv2_sattr *sp; 2774 int error = 0, toclient = 0; 2775 2776 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2777 case ND_NFSV2: 2778 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2779 /* 2780 * Some old clients didn't fill in the high order 16bits. 2781 * --> check the low order 2 bytes for 0xffff 2782 */ 2783 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2784 nvap->na_mode = nfstov_mode(sp->sa_mode); 2785 if (sp->sa_uid != newnfs_xdrneg1) 2786 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2787 if (sp->sa_gid != newnfs_xdrneg1) 2788 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2789 if (sp->sa_size != newnfs_xdrneg1) 2790 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2791 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2792 #ifdef notyet 2793 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2794 #else 2795 nvap->na_atime.tv_sec = 2796 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2797 nvap->na_atime.tv_nsec = 0; 2798 #endif 2799 } 2800 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2801 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2802 break; 2803 case ND_NFSV3: 2804 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2805 if (*tl == newnfs_true) { 2806 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2807 nvap->na_mode = nfstov_mode(*tl); 2808 } 2809 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2810 if (*tl == newnfs_true) { 2811 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2812 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2813 } 2814 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2815 if (*tl == newnfs_true) { 2816 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2817 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2818 } 2819 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2820 if (*tl == newnfs_true) { 2821 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2822 nvap->na_size = fxdr_hyper(tl); 2823 } 2824 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2825 switch (fxdr_unsigned(int, *tl)) { 2826 case NFSV3SATTRTIME_TOCLIENT: 2827 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2828 fxdr_nfsv3time(tl, &nvap->na_atime); 2829 toclient = 1; 2830 break; 2831 case NFSV3SATTRTIME_TOSERVER: 2832 vfs_timestamp(&nvap->na_atime); 2833 nvap->na_vaflags |= VA_UTIMES_NULL; 2834 break; 2835 } 2836 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2837 switch (fxdr_unsigned(int, *tl)) { 2838 case NFSV3SATTRTIME_TOCLIENT: 2839 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2840 fxdr_nfsv3time(tl, &nvap->na_mtime); 2841 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2842 break; 2843 case NFSV3SATTRTIME_TOSERVER: 2844 vfs_timestamp(&nvap->na_mtime); 2845 if (!toclient) 2846 nvap->na_vaflags |= VA_UTIMES_NULL; 2847 break; 2848 } 2849 break; 2850 case ND_NFSV4: 2851 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2852 } 2853 nfsmout: 2854 NFSEXITCODE2(error, nd); 2855 return (error); 2856 } 2857 2858 /* 2859 * Handle the setable attributes for V4. 2860 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2861 */ 2862 int 2863 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2864 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2865 { 2866 u_int32_t *tl; 2867 int attrsum = 0; 2868 int i, j; 2869 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2870 int moderet, toclient = 0; 2871 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2872 uid_t uid; 2873 gid_t gid; 2874 u_short mode, mask; /* Same type as va_mode. */ 2875 struct vattr va; 2876 2877 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2878 if (error) 2879 goto nfsmout; 2880 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2881 attrsize = fxdr_unsigned(int, *tl); 2882 2883 /* 2884 * Loop around getting the setable attributes. If an unsupported 2885 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2886 */ 2887 if (retnotsup) { 2888 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2889 bitpos = NFSATTRBIT_MAX; 2890 } else { 2891 bitpos = 0; 2892 } 2893 moderet = 0; 2894 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2895 if (attrsum > attrsize) { 2896 error = NFSERR_BADXDR; 2897 goto nfsmout; 2898 } 2899 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2900 switch (bitpos) { 2901 case NFSATTRBIT_SIZE: 2902 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2903 if (vp != NULL && vp->v_type != VREG) { 2904 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 2905 NFSERR_INVAL; 2906 goto nfsmout; 2907 } 2908 nvap->na_size = fxdr_hyper(tl); 2909 attrsum += NFSX_HYPER; 2910 break; 2911 case NFSATTRBIT_ACL: 2912 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, 2913 p); 2914 if (error) 2915 goto nfsmout; 2916 if (aceerr && !nd->nd_repstat) 2917 nd->nd_repstat = aceerr; 2918 attrsum += aclsize; 2919 break; 2920 case NFSATTRBIT_ARCHIVE: 2921 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2922 if (!nd->nd_repstat) 2923 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2924 attrsum += NFSX_UNSIGNED; 2925 break; 2926 case NFSATTRBIT_HIDDEN: 2927 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2928 if (!nd->nd_repstat) 2929 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2930 attrsum += NFSX_UNSIGNED; 2931 break; 2932 case NFSATTRBIT_MIMETYPE: 2933 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2934 i = fxdr_unsigned(int, *tl); 2935 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 2936 if (error) 2937 goto nfsmout; 2938 if (!nd->nd_repstat) 2939 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2940 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 2941 break; 2942 case NFSATTRBIT_MODE: 2943 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ 2944 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2945 nvap->na_mode = nfstov_mode(*tl); 2946 attrsum += NFSX_UNSIGNED; 2947 break; 2948 case NFSATTRBIT_OWNER: 2949 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2950 j = fxdr_unsigned(int, *tl); 2951 if (j < 0) { 2952 error = NFSERR_BADXDR; 2953 goto nfsmout; 2954 } 2955 if (j > NFSV4_SMALLSTR) 2956 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2957 else 2958 cp = namestr; 2959 error = nfsrv_mtostr(nd, cp, j); 2960 if (error) { 2961 if (j > NFSV4_SMALLSTR) 2962 free(cp, M_NFSSTRING); 2963 goto nfsmout; 2964 } 2965 if (!nd->nd_repstat) { 2966 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, 2967 &uid); 2968 if (!nd->nd_repstat) 2969 nvap->na_uid = uid; 2970 } 2971 if (j > NFSV4_SMALLSTR) 2972 free(cp, M_NFSSTRING); 2973 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 2974 break; 2975 case NFSATTRBIT_OWNERGROUP: 2976 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2977 j = fxdr_unsigned(int, *tl); 2978 if (j < 0) { 2979 error = NFSERR_BADXDR; 2980 goto nfsmout; 2981 } 2982 if (j > NFSV4_SMALLSTR) 2983 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 2984 else 2985 cp = namestr; 2986 error = nfsrv_mtostr(nd, cp, j); 2987 if (error) { 2988 if (j > NFSV4_SMALLSTR) 2989 free(cp, M_NFSSTRING); 2990 goto nfsmout; 2991 } 2992 if (!nd->nd_repstat) { 2993 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, 2994 &gid); 2995 if (!nd->nd_repstat) 2996 nvap->na_gid = gid; 2997 } 2998 if (j > NFSV4_SMALLSTR) 2999 free(cp, M_NFSSTRING); 3000 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3001 break; 3002 case NFSATTRBIT_SYSTEM: 3003 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3004 if (!nd->nd_repstat) 3005 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3006 attrsum += NFSX_UNSIGNED; 3007 break; 3008 case NFSATTRBIT_TIMEACCESSSET: 3009 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3010 attrsum += NFSX_UNSIGNED; 3011 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3012 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3013 fxdr_nfsv4time(tl, &nvap->na_atime); 3014 toclient = 1; 3015 attrsum += NFSX_V4TIME; 3016 } else { 3017 vfs_timestamp(&nvap->na_atime); 3018 nvap->na_vaflags |= VA_UTIMES_NULL; 3019 } 3020 break; 3021 case NFSATTRBIT_TIMEBACKUP: 3022 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3023 if (!nd->nd_repstat) 3024 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3025 attrsum += NFSX_V4TIME; 3026 break; 3027 case NFSATTRBIT_TIMECREATE: 3028 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3029 fxdr_nfsv4time(tl, &nvap->na_btime); 3030 attrsum += NFSX_V4TIME; 3031 break; 3032 case NFSATTRBIT_TIMEMODIFYSET: 3033 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3034 attrsum += NFSX_UNSIGNED; 3035 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3036 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3037 fxdr_nfsv4time(tl, &nvap->na_mtime); 3038 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3039 attrsum += NFSX_V4TIME; 3040 } else { 3041 vfs_timestamp(&nvap->na_mtime); 3042 if (!toclient) 3043 nvap->na_vaflags |= VA_UTIMES_NULL; 3044 } 3045 break; 3046 case NFSATTRBIT_MODESETMASKED: 3047 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 3048 mode = fxdr_unsigned(u_short, *tl++); 3049 mask = fxdr_unsigned(u_short, *tl); 3050 /* 3051 * vp == NULL implies an Open/Create operation. 3052 * This attribute can only be used for Setattr and 3053 * only for NFSv4.1 or higher. 3054 * If moderet != 0, a mode attribute has also been 3055 * specified and this attribute cannot be done in the 3056 * same Setattr operation. 3057 */ 3058 if ((nd->nd_flag & ND_NFSV41) == 0) 3059 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3060 else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || 3061 vp == NULL) 3062 nd->nd_repstat = NFSERR_INVAL; 3063 else if (moderet == 0) 3064 moderet = VOP_GETATTR(vp, &va, nd->nd_cred); 3065 if (moderet == 0) 3066 nvap->na_mode = (mode & mask) | 3067 (va.va_mode & ~mask); 3068 else 3069 nd->nd_repstat = moderet; 3070 attrsum += 2 * NFSX_UNSIGNED; 3071 break; 3072 default: 3073 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3074 /* 3075 * set bitpos so we drop out of the loop. 3076 */ 3077 bitpos = NFSATTRBIT_MAX; 3078 break; 3079 } 3080 } 3081 3082 /* 3083 * some clients pad the attrlist, so we need to skip over the 3084 * padding. 3085 */ 3086 if (attrsum > attrsize) { 3087 error = NFSERR_BADXDR; 3088 } else { 3089 attrsize = NFSM_RNDUP(attrsize); 3090 if (attrsum < attrsize) 3091 error = nfsm_advance(nd, attrsize - attrsum, -1); 3092 } 3093 nfsmout: 3094 NFSEXITCODE2(error, nd); 3095 return (error); 3096 } 3097 3098 /* 3099 * Check/setup export credentials. 3100 */ 3101 int 3102 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 3103 struct ucred *credanon) 3104 { 3105 int error = 0; 3106 3107 /* 3108 * Check/setup credentials. 3109 */ 3110 if (nd->nd_flag & ND_GSS) 3111 exp->nes_exflag &= ~MNT_EXPORTANON; 3112 3113 /* 3114 * Check to see if the operation is allowed for this security flavor. 3115 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to 3116 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 3117 * Also, allow Secinfo, so that it can acquire the correct flavor(s). 3118 */ 3119 if (nfsvno_testexp(nd, exp) && 3120 nd->nd_procnum != NFSV4OP_SECINFO && 3121 nd->nd_procnum != NFSPROC_FSINFO) { 3122 if (nd->nd_flag & ND_NFSV4) 3123 error = NFSERR_WRONGSEC; 3124 else 3125 error = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3126 goto out; 3127 } 3128 3129 /* 3130 * Check to see if the file system is exported V4 only. 3131 */ 3132 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 3133 error = NFSERR_PROGNOTV4; 3134 goto out; 3135 } 3136 3137 /* 3138 * Now, map the user credentials. 3139 * (Note that ND_AUTHNONE will only be set for an NFSv3 3140 * Fsinfo RPC. If set for anything else, this code might need 3141 * to change.) 3142 */ 3143 if (NFSVNO_EXPORTED(exp)) { 3144 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 3145 NFSVNO_EXPORTANON(exp) || 3146 (nd->nd_flag & ND_AUTHNONE) != 0) { 3147 nd->nd_cred->cr_uid = credanon->cr_uid; 3148 nd->nd_cred->cr_gid = credanon->cr_gid; 3149 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 3150 credanon->cr_groups); 3151 } else if ((nd->nd_flag & ND_GSS) == 0) { 3152 /* 3153 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 3154 * if there is a replacement credential with a group 3155 * list set up by "nfsuserd -manage-gids". 3156 * If there is no replacement, nfsrv_getgrpscred() 3157 * simply returns its argument. 3158 */ 3159 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 3160 } 3161 } 3162 3163 out: 3164 NFSEXITCODE2(error, nd); 3165 return (error); 3166 } 3167 3168 /* 3169 * Check exports. 3170 */ 3171 int 3172 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 3173 struct ucred **credp) 3174 { 3175 int error; 3176 3177 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3178 &exp->nes_numsecflavor, exp->nes_secflavors); 3179 if (error) { 3180 if (nfs_rootfhset) { 3181 exp->nes_exflag = 0; 3182 exp->nes_numsecflavor = 0; 3183 error = 0; 3184 } 3185 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3186 MAXSECFLAVORS) { 3187 printf("nfsvno_checkexp: numsecflavors out of range\n"); 3188 exp->nes_numsecflavor = 0; 3189 error = EACCES; 3190 } 3191 NFSEXITCODE(error); 3192 return (error); 3193 } 3194 3195 /* 3196 * Get a vnode for a file handle and export stuff. 3197 */ 3198 int 3199 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 3200 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 3201 struct ucred **credp) 3202 { 3203 int error; 3204 3205 *credp = NULL; 3206 exp->nes_numsecflavor = 0; 3207 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 3208 if (error != 0) 3209 /* Make sure the server replies ESTALE to the client. */ 3210 error = ESTALE; 3211 if (nam && !error) { 3212 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3213 &exp->nes_numsecflavor, exp->nes_secflavors); 3214 if (error) { 3215 if (nfs_rootfhset) { 3216 exp->nes_exflag = 0; 3217 exp->nes_numsecflavor = 0; 3218 error = 0; 3219 } else { 3220 vput(*vpp); 3221 } 3222 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3223 MAXSECFLAVORS) { 3224 printf("nfsvno_fhtovp: numsecflavors out of range\n"); 3225 exp->nes_numsecflavor = 0; 3226 error = EACCES; 3227 vput(*vpp); 3228 } 3229 } 3230 NFSEXITCODE(error); 3231 return (error); 3232 } 3233 3234 /* 3235 * nfsd_fhtovp() - convert a fh to a vnode ptr 3236 * - look up fsid in mount list (if not found ret error) 3237 * - get vp and export rights by calling nfsvno_fhtovp() 3238 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 3239 * for AUTH_SYS 3240 * - if mpp != NULL, return the mount point so that it can 3241 * be used for vn_finished_write() by the caller 3242 */ 3243 void 3244 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 3245 struct vnode **vpp, struct nfsexstuff *exp, 3246 struct mount **mpp, int startwrite) 3247 { 3248 struct mount *mp; 3249 struct ucred *credanon; 3250 fhandle_t *fhp; 3251 3252 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3253 /* 3254 * Check for the special case of the nfsv4root_fh. 3255 */ 3256 mp = vfs_busyfs(&fhp->fh_fsid); 3257 if (mpp != NULL) 3258 *mpp = mp; 3259 if (mp == NULL) { 3260 *vpp = NULL; 3261 nd->nd_repstat = ESTALE; 3262 goto out; 3263 } 3264 3265 if (startwrite) { 3266 vn_start_write(NULL, mpp, V_WAIT); 3267 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3268 lktype = LK_EXCLUSIVE; 3269 } 3270 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3271 &credanon); 3272 vfs_unbusy(mp); 3273 3274 /* 3275 * For NFSv4 without a pseudo root fs, unexported file handles 3276 * can be returned, so that Lookup works everywhere. 3277 */ 3278 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3279 !(nd->nd_flag & ND_NFSV4)) { 3280 vput(*vpp); 3281 nd->nd_repstat = EACCES; 3282 } 3283 3284 /* 3285 * If TLS is required by the export, check the flags in nd_flag. 3286 */ 3287 if (nd->nd_repstat == 0 && ((NFSVNO_EXTLS(exp) && 3288 (nd->nd_flag & ND_TLS) == 0) || 3289 (NFSVNO_EXTLSCERT(exp) && 3290 (nd->nd_flag & ND_TLSCERT) == 0) || 3291 (NFSVNO_EXTLSCERTUSER(exp) && 3292 (nd->nd_flag & ND_TLSCERTUSER) == 0))) { 3293 vput(*vpp); 3294 nd->nd_repstat = NFSERR_ACCES; 3295 } 3296 3297 /* 3298 * Personally, I've never seen any point in requiring a 3299 * reserved port#, since only in the rare case where the 3300 * clients are all boxes with secure system privileges, 3301 * does it provide any enhanced security, but... some people 3302 * believe it to be useful and keep putting this code back in. 3303 * (There is also some "security checker" out there that 3304 * complains if the nfs server doesn't enforce this.) 3305 * However, note the following: 3306 * RFC3530 (NFSv4) specifies that a reserved port# not be 3307 * required. 3308 * RFC2623 recommends that, if a reserved port# is checked for, 3309 * that there be a way to turn that off--> ifdef'd. 3310 */ 3311 #ifdef NFS_REQRSVPORT 3312 if (!nd->nd_repstat) { 3313 struct sockaddr_in *saddr; 3314 struct sockaddr_in6 *saddr6; 3315 3316 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3317 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3318 if (!(nd->nd_flag & ND_NFSV4) && 3319 ((saddr->sin_family == AF_INET && 3320 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3321 (saddr6->sin6_family == AF_INET6 && 3322 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3323 vput(*vpp); 3324 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3325 } 3326 } 3327 #endif /* NFS_REQRSVPORT */ 3328 3329 /* 3330 * Check/setup credentials. 3331 */ 3332 if (!nd->nd_repstat) { 3333 nd->nd_saveduid = nd->nd_cred->cr_uid; 3334 nd->nd_repstat = nfsd_excred(nd, exp, credanon); 3335 if (nd->nd_repstat) 3336 vput(*vpp); 3337 } 3338 if (credanon != NULL) 3339 crfree(credanon); 3340 if (nd->nd_repstat) { 3341 if (startwrite) 3342 vn_finished_write(mp); 3343 *vpp = NULL; 3344 if (mpp != NULL) 3345 *mpp = NULL; 3346 } 3347 3348 out: 3349 NFSEXITCODE2(0, nd); 3350 } 3351 3352 /* 3353 * glue for fp. 3354 */ 3355 static int 3356 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3357 { 3358 struct filedesc *fdp; 3359 struct file *fp; 3360 int error = 0; 3361 3362 fdp = p->td_proc->p_fd; 3363 if (fd < 0 || fd >= fdp->fd_nfiles || 3364 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3365 error = EBADF; 3366 goto out; 3367 } 3368 *fpp = fp; 3369 3370 out: 3371 NFSEXITCODE(error); 3372 return (error); 3373 } 3374 3375 /* 3376 * Called from nfssvc() to update the exports list. Just call 3377 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3378 * in the mount list. 3379 */ 3380 int 3381 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3382 { 3383 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3384 int error = 0; 3385 struct nameidata nd; 3386 fhandle_t fh; 3387 3388 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 3389 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3390 nfs_rootfhset = 0; 3391 else if (error == 0) { 3392 if (nfsexargp->fspec == NULL) { 3393 error = EPERM; 3394 goto out; 3395 } 3396 /* 3397 * If fspec != NULL, this is the v4root path. 3398 */ 3399 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 3400 nfsexargp->fspec, p); 3401 if ((error = namei(&nd)) != 0) 3402 goto out; 3403 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3404 vrele(nd.ni_vp); 3405 if (!error) { 3406 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 3407 NFSBCOPY((caddr_t)&fh, 3408 nfs_rootfh.nfsrvfh_data, 3409 sizeof (fhandle_t)); 3410 nfs_rootfhset = 1; 3411 } 3412 } 3413 3414 out: 3415 NFSEXITCODE(error); 3416 return (error); 3417 } 3418 3419 /* 3420 * This function needs to test to see if the system is near its limit 3421 * for memory allocation via malloc() or mget() and return True iff 3422 * either of these resources are near their limit. 3423 * XXX (For now, this is just a stub.) 3424 */ 3425 int nfsrv_testmalloclimit = 0; 3426 int 3427 nfsrv_mallocmget_limit(void) 3428 { 3429 static int printmesg = 0; 3430 static int testval = 1; 3431 3432 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3433 if ((printmesg++ % 100) == 0) 3434 printf("nfsd: malloc/mget near limit\n"); 3435 return (1); 3436 } 3437 return (0); 3438 } 3439 3440 /* 3441 * BSD specific initialization of a mount point. 3442 */ 3443 void 3444 nfsd_mntinit(void) 3445 { 3446 static int inited = 0; 3447 3448 if (inited) 3449 return; 3450 inited = 1; 3451 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3452 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 3453 TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); 3454 nfsv4root_mnt.mnt_export = NULL; 3455 TAILQ_INIT(&nfsv4root_opt); 3456 TAILQ_INIT(&nfsv4root_newopt); 3457 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 3458 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 3459 nfsv4root_mnt.mnt_nvnodelistsize = 0; 3460 nfsv4root_mnt.mnt_lazyvnodelistsize = 0; 3461 } 3462 3463 /* 3464 * Get a vnode for a file handle, without checking exports, etc. 3465 */ 3466 struct vnode * 3467 nfsvno_getvp(fhandle_t *fhp) 3468 { 3469 struct mount *mp; 3470 struct vnode *vp; 3471 int error; 3472 3473 mp = vfs_busyfs(&fhp->fh_fsid); 3474 if (mp == NULL) 3475 return (NULL); 3476 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3477 vfs_unbusy(mp); 3478 if (error) 3479 return (NULL); 3480 return (vp); 3481 } 3482 3483 /* 3484 * Do a local VOP_ADVLOCK(). 3485 */ 3486 int 3487 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3488 u_int64_t end, struct thread *td) 3489 { 3490 int error = 0; 3491 struct flock fl; 3492 u_int64_t tlen; 3493 3494 if (nfsrv_dolocallocks == 0) 3495 goto out; 3496 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3497 3498 fl.l_whence = SEEK_SET; 3499 fl.l_type = ftype; 3500 fl.l_start = (off_t)first; 3501 if (end == NFS64BITSSET) { 3502 fl.l_len = 0; 3503 } else { 3504 tlen = end - first; 3505 fl.l_len = (off_t)tlen; 3506 } 3507 /* 3508 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3509 * values for all calls, so that all locks will be held by the 3510 * nfsd server. (The nfsd server handles conflicts between the 3511 * various clients.) 3512 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3513 * bytes, so it can't be put in l_sysid. 3514 */ 3515 if (nfsv4_sysid == 0) 3516 nfsv4_sysid = nlm_acquire_next_sysid(); 3517 fl.l_pid = (pid_t)0; 3518 fl.l_sysid = (int)nfsv4_sysid; 3519 3520 if (ftype == F_UNLCK) 3521 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3522 (F_POSIX | F_REMOTE)); 3523 else 3524 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3525 (F_POSIX | F_REMOTE)); 3526 3527 out: 3528 NFSEXITCODE(error); 3529 return (error); 3530 } 3531 3532 /* 3533 * Check the nfsv4 root exports. 3534 */ 3535 int 3536 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3537 { 3538 struct ucred *credanon; 3539 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; 3540 uint64_t exflags; 3541 3542 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3543 &credanon, &numsecflavor, secflavors); 3544 if (error) { 3545 error = NFSERR_PROGUNAVAIL; 3546 goto out; 3547 } 3548 if (credanon != NULL) 3549 crfree(credanon); 3550 for (i = 0; i < numsecflavor; i++) { 3551 if (secflavors[i] == AUTH_SYS) 3552 nd->nd_flag |= ND_EXAUTHSYS; 3553 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3554 nd->nd_flag |= ND_EXGSS; 3555 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3556 nd->nd_flag |= ND_EXGSSINTEGRITY; 3557 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3558 nd->nd_flag |= ND_EXGSSPRIVACY; 3559 } 3560 3561 /* And set ND_EXxx flags for TLS. */ 3562 if ((exflags & MNT_EXTLS) != 0) { 3563 nd->nd_flag |= ND_EXTLS; 3564 if ((exflags & MNT_EXTLSCERT) != 0) 3565 nd->nd_flag |= ND_EXTLSCERT; 3566 if ((exflags & MNT_EXTLSCERTUSER) != 0) 3567 nd->nd_flag |= ND_EXTLSCERTUSER; 3568 } 3569 3570 out: 3571 NFSEXITCODE(error); 3572 return (error); 3573 } 3574 3575 /* 3576 * Nfs server pseudo system call for the nfsd's 3577 */ 3578 /* 3579 * MPSAFE 3580 */ 3581 static int 3582 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3583 { 3584 struct file *fp; 3585 struct nfsd_addsock_args sockarg; 3586 struct nfsd_nfsd_args nfsdarg; 3587 struct nfsd_nfsd_oargs onfsdarg; 3588 struct nfsd_pnfsd_args pnfsdarg; 3589 struct vnode *vp, *nvp, *curdvp; 3590 struct pnfsdsfile *pf; 3591 struct nfsdevice *ds, *fds; 3592 cap_rights_t rights; 3593 int buflen, error, ret; 3594 char *buf, *cp, *cp2, *cp3; 3595 char fname[PNFS_FILENAME_LEN + 1]; 3596 3597 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3598 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3599 if (error) 3600 goto out; 3601 /* 3602 * Since we don't know what rights might be required, 3603 * pretend that we need them all. It is better to be too 3604 * careful than too reckless. 3605 */ 3606 error = fget(td, sockarg.sock, 3607 cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); 3608 if (error != 0) 3609 goto out; 3610 if (fp->f_type != DTYPE_SOCKET) { 3611 fdrop(fp, td); 3612 error = EPERM; 3613 goto out; 3614 } 3615 error = nfsrvd_addsock(fp); 3616 fdrop(fp, td); 3617 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3618 if (uap->argp == NULL) { 3619 error = EINVAL; 3620 goto out; 3621 } 3622 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3623 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3624 if (error == 0) { 3625 nfsdarg.principal = onfsdarg.principal; 3626 nfsdarg.minthreads = onfsdarg.minthreads; 3627 nfsdarg.maxthreads = onfsdarg.maxthreads; 3628 nfsdarg.version = 1; 3629 nfsdarg.addr = NULL; 3630 nfsdarg.addrlen = 0; 3631 nfsdarg.dnshost = NULL; 3632 nfsdarg.dnshostlen = 0; 3633 nfsdarg.dspath = NULL; 3634 nfsdarg.dspathlen = 0; 3635 nfsdarg.mdspath = NULL; 3636 nfsdarg.mdspathlen = 0; 3637 nfsdarg.mirrorcnt = 1; 3638 } 3639 } else 3640 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3641 if (error) 3642 goto out; 3643 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3644 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3645 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3646 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 3647 nfsdarg.mirrorcnt >= 1 && 3648 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3649 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3650 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 3651 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3652 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 3653 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3654 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 3655 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3656 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3657 if (error != 0) { 3658 free(cp, M_TEMP); 3659 goto out; 3660 } 3661 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3662 nfsdarg.addr = cp; 3663 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3664 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3665 if (error != 0) { 3666 free(nfsdarg.addr, M_TEMP); 3667 free(cp, M_TEMP); 3668 goto out; 3669 } 3670 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3671 nfsdarg.dnshost = cp; 3672 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3673 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3674 if (error != 0) { 3675 free(nfsdarg.addr, M_TEMP); 3676 free(nfsdarg.dnshost, M_TEMP); 3677 free(cp, M_TEMP); 3678 goto out; 3679 } 3680 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3681 nfsdarg.dspath = cp; 3682 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 3683 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 3684 if (error != 0) { 3685 free(nfsdarg.addr, M_TEMP); 3686 free(nfsdarg.dnshost, M_TEMP); 3687 free(nfsdarg.dspath, M_TEMP); 3688 free(cp, M_TEMP); 3689 goto out; 3690 } 3691 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 3692 nfsdarg.mdspath = cp; 3693 } else { 3694 nfsdarg.addr = NULL; 3695 nfsdarg.addrlen = 0; 3696 nfsdarg.dnshost = NULL; 3697 nfsdarg.dnshostlen = 0; 3698 nfsdarg.dspath = NULL; 3699 nfsdarg.dspathlen = 0; 3700 nfsdarg.mdspath = NULL; 3701 nfsdarg.mdspathlen = 0; 3702 nfsdarg.mirrorcnt = 1; 3703 } 3704 error = nfsrvd_nfsd(td, &nfsdarg); 3705 free(nfsdarg.addr, M_TEMP); 3706 free(nfsdarg.dnshost, M_TEMP); 3707 free(nfsdarg.dspath, M_TEMP); 3708 free(nfsdarg.mdspath, M_TEMP); 3709 } else if (uap->flag & NFSSVC_PNFSDS) { 3710 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3711 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 3712 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 3713 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3714 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3715 NULL); 3716 if (error == 0) 3717 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 3718 free(cp, M_TEMP); 3719 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3720 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3721 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3722 buf = malloc(buflen, M_TEMP, M_WAITOK); 3723 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3724 NULL); 3725 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3726 if (error == 0 && pnfsdarg.dspath != NULL) { 3727 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3728 error = copyinstr(pnfsdarg.dspath, cp2, 3729 PATH_MAX + 1, NULL); 3730 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3731 error); 3732 } else 3733 cp2 = NULL; 3734 if (error == 0 && pnfsdarg.curdspath != NULL) { 3735 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3736 error = copyinstr(pnfsdarg.curdspath, cp3, 3737 PATH_MAX + 1, NULL); 3738 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3739 error); 3740 } else 3741 cp3 = NULL; 3742 curdvp = NULL; 3743 fds = NULL; 3744 if (error == 0) 3745 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3746 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3747 &fds); 3748 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3749 if (error == 0) { 3750 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3751 printf("copymr: dsdir out of range\n"); 3752 pf->dsf_dir = 0; 3753 } 3754 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3755 error = nfsrv_copymr(vp, nvp, 3756 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3757 (struct pnfsdsfile *)buf, 3758 buflen / sizeof(*pf), td->td_ucred, td); 3759 vput(vp); 3760 vput(nvp); 3761 if (fds != NULL && error == 0) { 3762 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3763 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3764 if (ret == 0) { 3765 nfsrv_dsremove(curdvp, fname, 3766 td->td_ucred, td); 3767 NFSVOPUNLOCK(curdvp); 3768 } 3769 } 3770 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3771 } 3772 free(cp, M_TEMP); 3773 free(cp2, M_TEMP); 3774 free(cp3, M_TEMP); 3775 free(buf, M_TEMP); 3776 } 3777 } else { 3778 error = nfssvc_srvcall(td, uap, td->td_ucred); 3779 } 3780 3781 out: 3782 NFSEXITCODE(error); 3783 return (error); 3784 } 3785 3786 static int 3787 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3788 { 3789 struct nfsex_args export; 3790 struct nfsex_oldargs oexp; 3791 struct file *fp = NULL; 3792 int stablefd, i, len; 3793 struct nfsd_clid adminrevoke; 3794 struct nfsd_dumplist dumplist; 3795 struct nfsd_dumpclients *dumpclients; 3796 struct nfsd_dumplocklist dumplocklist; 3797 struct nfsd_dumplocks *dumplocks; 3798 struct nameidata nd; 3799 vnode_t vp; 3800 int error = EINVAL, igotlock; 3801 struct proc *procp; 3802 gid_t *grps; 3803 static int suspend_nfsd = 0; 3804 3805 if (uap->flag & NFSSVC_PUBLICFH) { 3806 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3807 sizeof (fhandle_t)); 3808 error = copyin(uap->argp, 3809 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3810 if (!error) 3811 nfs_pubfhset = 1; 3812 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3813 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { 3814 error = copyin(uap->argp,(caddr_t)&export, 3815 sizeof (struct nfsex_args)); 3816 if (!error) { 3817 grps = NULL; 3818 if (export.export.ex_ngroups > NGROUPS_MAX || 3819 export.export.ex_ngroups < 0) 3820 error = EINVAL; 3821 else if (export.export.ex_ngroups > 0) { 3822 grps = malloc(export.export.ex_ngroups * 3823 sizeof(gid_t), M_TEMP, M_WAITOK); 3824 error = copyin(export.export.ex_groups, grps, 3825 export.export.ex_ngroups * sizeof(gid_t)); 3826 export.export.ex_groups = grps; 3827 } else 3828 export.export.ex_groups = NULL; 3829 if (!error) 3830 error = nfsrv_v4rootexport(&export, cred, p); 3831 free(grps, M_TEMP); 3832 } 3833 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3834 NFSSVC_V4ROOTEXPORT) { 3835 error = copyin(uap->argp,(caddr_t)&oexp, 3836 sizeof (struct nfsex_oldargs)); 3837 if (!error) { 3838 memset(&export.export, 0, sizeof(export.export)); 3839 export.export.ex_flags = (uint64_t)oexp.export.ex_flags; 3840 export.export.ex_root = oexp.export.ex_root; 3841 export.export.ex_uid = oexp.export.ex_anon.cr_uid; 3842 export.export.ex_ngroups = 3843 oexp.export.ex_anon.cr_ngroups; 3844 export.export.ex_groups = NULL; 3845 if (export.export.ex_ngroups > XU_NGROUPS || 3846 export.export.ex_ngroups < 0) 3847 error = EINVAL; 3848 else if (export.export.ex_ngroups > 0) { 3849 export.export.ex_groups = malloc( 3850 export.export.ex_ngroups * sizeof(gid_t), 3851 M_TEMP, M_WAITOK); 3852 for (i = 0; i < export.export.ex_ngroups; i++) 3853 export.export.ex_groups[i] = 3854 oexp.export.ex_anon.cr_groups[i]; 3855 } 3856 export.export.ex_addr = oexp.export.ex_addr; 3857 export.export.ex_addrlen = oexp.export.ex_addrlen; 3858 export.export.ex_mask = oexp.export.ex_mask; 3859 export.export.ex_masklen = oexp.export.ex_masklen; 3860 export.export.ex_indexfile = oexp.export.ex_indexfile; 3861 export.export.ex_numsecflavors = 3862 oexp.export.ex_numsecflavors; 3863 if (export.export.ex_numsecflavors >= MAXSECFLAVORS || 3864 export.export.ex_numsecflavors < 0) 3865 error = EINVAL; 3866 else { 3867 for (i = 0; i < export.export.ex_numsecflavors; 3868 i++) 3869 export.export.ex_secflavors[i] = 3870 oexp.export.ex_secflavors[i]; 3871 } 3872 export.fspec = oexp.fspec; 3873 if (error == 0) 3874 error = nfsrv_v4rootexport(&export, cred, p); 3875 free(export.export.ex_groups, M_TEMP); 3876 } 3877 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3878 nfs_pubfhset = 0; 3879 error = 0; 3880 } else if (uap->flag & NFSSVC_STABLERESTART) { 3881 error = copyin(uap->argp, (caddr_t)&stablefd, 3882 sizeof (int)); 3883 if (!error) 3884 error = fp_getfvp(p, stablefd, &fp, &vp); 3885 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3886 error = EBADF; 3887 if (!error && newnfs_numnfsd != 0) 3888 error = EPERM; 3889 if (!error) { 3890 nfsrv_stablefirst.nsf_fp = fp; 3891 nfsrv_setupstable(p); 3892 } 3893 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3894 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3895 sizeof (struct nfsd_clid)); 3896 if (!error) 3897 error = nfsrv_adminrevoke(&adminrevoke, p); 3898 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3899 error = copyin(uap->argp, (caddr_t)&dumplist, 3900 sizeof (struct nfsd_dumplist)); 3901 if (!error && (dumplist.ndl_size < 1 || 3902 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3903 error = EPERM; 3904 if (!error) { 3905 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3906 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 3907 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3908 error = copyout(dumpclients, dumplist.ndl_list, len); 3909 free(dumpclients, M_TEMP); 3910 } 3911 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3912 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3913 sizeof (struct nfsd_dumplocklist)); 3914 if (!error && (dumplocklist.ndllck_size < 1 || 3915 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3916 error = EPERM; 3917 if (!error) 3918 error = nfsrv_lookupfilename(&nd, 3919 dumplocklist.ndllck_fname, p); 3920 if (!error) { 3921 len = sizeof (struct nfsd_dumplocks) * 3922 dumplocklist.ndllck_size; 3923 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 3924 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3925 dumplocklist.ndllck_size, p); 3926 vput(nd.ni_vp); 3927 error = copyout(dumplocks, dumplocklist.ndllck_list, 3928 len); 3929 free(dumplocks, M_TEMP); 3930 } 3931 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 3932 procp = p->td_proc; 3933 PROC_LOCK(procp); 3934 nfsd_master_pid = procp->p_pid; 3935 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 3936 nfsd_master_start = procp->p_stats->p_start; 3937 nfsd_master_proc = procp; 3938 PROC_UNLOCK(procp); 3939 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 3940 NFSLOCKV4ROOTMUTEX(); 3941 if (suspend_nfsd == 0) { 3942 /* Lock out all nfsd threads */ 3943 do { 3944 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 3945 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 3946 } while (igotlock == 0 && suspend_nfsd == 0); 3947 suspend_nfsd = 1; 3948 } 3949 NFSUNLOCKV4ROOTMUTEX(); 3950 error = 0; 3951 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 3952 NFSLOCKV4ROOTMUTEX(); 3953 if (suspend_nfsd != 0) { 3954 nfsv4_unlock(&nfsd_suspend_lock, 0); 3955 suspend_nfsd = 0; 3956 } 3957 NFSUNLOCKV4ROOTMUTEX(); 3958 error = 0; 3959 } 3960 3961 NFSEXITCODE(error); 3962 return (error); 3963 } 3964 3965 /* 3966 * Check exports. 3967 * Returns 0 if ok, 1 otherwise. 3968 */ 3969 int 3970 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 3971 { 3972 int i; 3973 3974 /* 3975 * This seems odd, but allow the case where the security flavor 3976 * list is empty. This happens when NFSv4 is traversing non-exported 3977 * file systems. Exported file systems should always have a non-empty 3978 * security flavor list. 3979 */ 3980 if (exp->nes_numsecflavor == 0) 3981 return (0); 3982 3983 for (i = 0; i < exp->nes_numsecflavor; i++) { 3984 /* 3985 * The tests for privacy and integrity must be first, 3986 * since ND_GSS is set for everything but AUTH_SYS. 3987 */ 3988 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 3989 (nd->nd_flag & ND_GSSPRIVACY)) 3990 return (0); 3991 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 3992 (nd->nd_flag & ND_GSSINTEGRITY)) 3993 return (0); 3994 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 3995 (nd->nd_flag & ND_GSS)) 3996 return (0); 3997 if (exp->nes_secflavors[i] == AUTH_SYS && 3998 (nd->nd_flag & ND_GSS) == 0) 3999 return (0); 4000 } 4001 return (1); 4002 } 4003 4004 /* 4005 * Calculate a hash value for the fid in a file handle. 4006 */ 4007 uint32_t 4008 nfsrv_hashfh(fhandle_t *fhp) 4009 { 4010 uint32_t hashval; 4011 4012 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 4013 return (hashval); 4014 } 4015 4016 /* 4017 * Calculate a hash value for the sessionid. 4018 */ 4019 uint32_t 4020 nfsrv_hashsessionid(uint8_t *sessionid) 4021 { 4022 uint32_t hashval; 4023 4024 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 4025 return (hashval); 4026 } 4027 4028 /* 4029 * Signal the userland master nfsd to backup the stable restart file. 4030 */ 4031 void 4032 nfsrv_backupstable(void) 4033 { 4034 struct proc *procp; 4035 4036 if (nfsd_master_proc != NULL) { 4037 procp = pfind(nfsd_master_pid); 4038 /* Try to make sure it is the correct process. */ 4039 if (procp == nfsd_master_proc && 4040 procp->p_stats->p_start.tv_sec == 4041 nfsd_master_start.tv_sec && 4042 procp->p_stats->p_start.tv_usec == 4043 nfsd_master_start.tv_usec && 4044 strcmp(procp->p_comm, nfsd_master_comm) == 0) 4045 kern_psignal(procp, SIGUSR2); 4046 else 4047 nfsd_master_proc = NULL; 4048 4049 if (procp != NULL) 4050 PROC_UNLOCK(procp); 4051 } 4052 } 4053 4054 /* 4055 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 4056 * The arguments are in a structure, so that they can be passed through 4057 * taskqueue for a kernel process to execute this function. 4058 */ 4059 struct nfsrvdscreate { 4060 int done; 4061 int inprog; 4062 struct task tsk; 4063 struct ucred *tcred; 4064 struct vnode *dvp; 4065 NFSPROC_T *p; 4066 struct pnfsdsfile *pf; 4067 int err; 4068 fhandle_t fh; 4069 struct vattr va; 4070 struct vattr createva; 4071 }; 4072 4073 int 4074 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 4075 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 4076 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 4077 { 4078 struct vnode *nvp; 4079 struct nameidata named; 4080 struct vattr va; 4081 char *bufp; 4082 u_long *hashp; 4083 struct nfsnode *np; 4084 struct nfsmount *nmp; 4085 int error; 4086 4087 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 4088 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); 4089 nfsvno_setpathbuf(&named, &bufp, &hashp); 4090 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 4091 named.ni_cnd.cn_thread = p; 4092 named.ni_cnd.cn_nameptr = bufp; 4093 if (fnamep != NULL) { 4094 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 4095 named.ni_cnd.cn_namelen = strlen(bufp); 4096 } else 4097 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 4098 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 4099 4100 /* Create the date file in the DS mount. */ 4101 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4102 if (error == 0) { 4103 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 4104 NFSVOPUNLOCK(dvp); 4105 if (error == 0) { 4106 /* Set the ownership of the file. */ 4107 error = VOP_SETATTR(nvp, nvap, tcred); 4108 NFSD_DEBUG(4, "nfsrv_dscreate:" 4109 " setattr-uid=%d\n", error); 4110 if (error != 0) 4111 vput(nvp); 4112 } 4113 if (error != 0) 4114 printf("pNFS: pnfscreate failed=%d\n", error); 4115 } else 4116 printf("pNFS: pnfscreate vnlock=%d\n", error); 4117 if (error == 0) { 4118 np = VTONFS(nvp); 4119 nmp = VFSTONFS(nvp->v_mount); 4120 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 4121 != 0 || nmp->nm_nam->sa_len > sizeof( 4122 struct sockaddr_in6) || 4123 np->n_fhp->nfh_len != NFSX_MYFH) { 4124 printf("Bad DS file: fstype=%s salen=%d" 4125 " fhlen=%d\n", 4126 nvp->v_mount->mnt_vfc->vfc_name, 4127 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 4128 error = ENOENT; 4129 } 4130 4131 /* Set extattrs for the DS on the MDS file. */ 4132 if (error == 0) { 4133 if (dsa != NULL) { 4134 error = VOP_GETATTR(nvp, &va, tcred); 4135 if (error == 0) { 4136 dsa->dsa_filerev = va.va_filerev; 4137 dsa->dsa_size = va.va_size; 4138 dsa->dsa_atime = va.va_atime; 4139 dsa->dsa_mtime = va.va_mtime; 4140 dsa->dsa_bytes = va.va_bytes; 4141 } 4142 } 4143 if (error == 0) { 4144 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 4145 NFSX_MYFH); 4146 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 4147 nmp->nm_nam->sa_len); 4148 NFSBCOPY(named.ni_cnd.cn_nameptr, 4149 pf->dsf_filename, 4150 sizeof(pf->dsf_filename)); 4151 } 4152 } else 4153 printf("pNFS: pnfscreate can't get DS" 4154 " attr=%d\n", error); 4155 if (nvpp != NULL && error == 0) 4156 *nvpp = nvp; 4157 else 4158 vput(nvp); 4159 } 4160 nfsvno_relpathbuf(&named); 4161 return (error); 4162 } 4163 4164 /* 4165 * Start up the thread that will execute nfsrv_dscreate(). 4166 */ 4167 static void 4168 start_dscreate(void *arg, int pending) 4169 { 4170 struct nfsrvdscreate *dsc; 4171 4172 dsc = (struct nfsrvdscreate *)arg; 4173 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 4174 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 4175 dsc->done = 1; 4176 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 4177 } 4178 4179 /* 4180 * Create a pNFS data file on the Data Server(s). 4181 */ 4182 static void 4183 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 4184 NFSPROC_T *p) 4185 { 4186 struct nfsrvdscreate *dsc, *tdsc = NULL; 4187 struct nfsdevice *ds, *tds, *fds; 4188 struct mount *mp; 4189 struct pnfsdsfile *pf, *tpf; 4190 struct pnfsdsattr dsattr; 4191 struct vattr va; 4192 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4193 struct nfsmount *nmp; 4194 fhandle_t fh; 4195 uid_t vauid; 4196 gid_t vagid; 4197 u_short vamode; 4198 struct ucred *tcred; 4199 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 4200 int failpos, timo; 4201 4202 /* Get a DS server directory in a round-robin order. */ 4203 mirrorcnt = 1; 4204 mp = vp->v_mount; 4205 ds = fds = NULL; 4206 NFSDDSLOCK(); 4207 /* 4208 * Search for the first entry that handles this MDS fs, but use the 4209 * first entry for all MDS fs's otherwise. 4210 */ 4211 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 4212 if (tds->nfsdev_nmp != NULL) { 4213 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 4214 ds = tds; 4215 else if (tds->nfsdev_mdsisset != 0 && fsidcmp( 4216 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { 4217 ds = fds = tds; 4218 break; 4219 } 4220 } 4221 } 4222 if (ds == NULL) { 4223 NFSDDSUNLOCK(); 4224 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 4225 return; 4226 } 4227 i = dsdir[0] = ds->nfsdev_nextdir; 4228 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 4229 dvp[0] = ds->nfsdev_dsdir[i]; 4230 tds = TAILQ_NEXT(ds, nfsdev_list); 4231 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { 4232 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 4233 if (tds->nfsdev_nmp != NULL && 4234 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 4235 (tds->nfsdev_mdsisset != 0 && fds != NULL && 4236 fsidcmp(&mp->mnt_stat.f_fsid, 4237 &tds->nfsdev_mdsfsid) == 0))) { 4238 dsdir[mirrorcnt] = i; 4239 dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; 4240 mirrorcnt++; 4241 if (mirrorcnt >= nfsrv_maxpnfsmirror) 4242 break; 4243 } 4244 } 4245 } 4246 /* Put at end of list to implement round-robin usage. */ 4247 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 4248 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 4249 NFSDDSUNLOCK(); 4250 dsc = NULL; 4251 if (mirrorcnt > 1) 4252 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 4253 M_WAITOK | M_ZERO); 4254 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 4255 M_ZERO); 4256 4257 error = nfsvno_getfh(vp, &fh, p); 4258 if (error == 0) 4259 error = VOP_GETATTR(vp, &va, cred); 4260 if (error == 0) { 4261 /* Set the attributes for "vp" to Setattr the DS vp. */ 4262 vauid = va.va_uid; 4263 vagid = va.va_gid; 4264 vamode = va.va_mode; 4265 VATTR_NULL(&va); 4266 va.va_uid = vauid; 4267 va.va_gid = vagid; 4268 va.va_mode = vamode; 4269 va.va_size = 0; 4270 } else 4271 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 4272 4273 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 4274 cred->cr_gid); 4275 /* Make data file name based on FH. */ 4276 tcred = newnfs_getcred(); 4277 4278 /* 4279 * Create the file on each DS mirror, using kernel process(es) for the 4280 * additional mirrors. 4281 */ 4282 failpos = -1; 4283 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 4284 tpf->dsf_dir = dsdir[i]; 4285 tdsc->tcred = tcred; 4286 tdsc->p = p; 4287 tdsc->pf = tpf; 4288 tdsc->createva = *vap; 4289 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 4290 tdsc->va = va; 4291 tdsc->dvp = dvp[i]; 4292 tdsc->done = 0; 4293 tdsc->inprog = 0; 4294 tdsc->err = 0; 4295 ret = EIO; 4296 if (nfs_pnfsiothreads != 0) { 4297 ret = nfs_pnfsio(start_dscreate, tdsc); 4298 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 4299 } 4300 if (ret != 0) { 4301 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 4302 NULL, tcred, p, NULL); 4303 if (ret != 0) { 4304 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 4305 error)); 4306 if (failpos == -1 && nfsds_failerr(ret)) 4307 failpos = i; 4308 else 4309 error = ret; 4310 } 4311 } 4312 } 4313 if (error == 0) { 4314 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 4315 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 4316 &dsattr, NULL, tcred, p, NULL); 4317 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 4318 failpos = mirrorcnt - 1; 4319 error = 0; 4320 } 4321 } 4322 timo = hz / 50; /* Wait for 20msec. */ 4323 if (timo < 1) 4324 timo = 1; 4325 /* Wait for kernel task(s) to complete. */ 4326 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 4327 while (tdsc->inprog != 0 && tdsc->done == 0) 4328 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 4329 if (tdsc->err != 0) { 4330 if (failpos == -1 && nfsds_failerr(tdsc->err)) 4331 failpos = i; 4332 else if (error == 0) 4333 error = tdsc->err; 4334 } 4335 } 4336 4337 /* 4338 * If failpos has been set, that mirror has failed, so it needs 4339 * to be disabled. 4340 */ 4341 if (failpos >= 0) { 4342 nmp = VFSTONFS(dvp[failpos]->v_mount); 4343 NFSLOCKMNT(nmp); 4344 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4345 NFSMNTP_CANCELRPCS)) == 0) { 4346 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4347 NFSUNLOCKMNT(nmp); 4348 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4349 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 4350 ds); 4351 if (ds != NULL) 4352 nfsrv_killrpcs(nmp); 4353 NFSLOCKMNT(nmp); 4354 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4355 wakeup(nmp); 4356 } 4357 NFSUNLOCKMNT(nmp); 4358 } 4359 4360 NFSFREECRED(tcred); 4361 if (error == 0) { 4362 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 4363 4364 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 4365 mirrorcnt, nfsrv_maxpnfsmirror); 4366 /* 4367 * For all mirrors that couldn't be created, fill in the 4368 * *pf structure, but with an IP address == 0.0.0.0. 4369 */ 4370 tpf = pf + mirrorcnt; 4371 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4372 *tpf = *pf; 4373 tpf->dsf_sin.sin_family = AF_INET; 4374 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4375 tpf->dsf_sin.sin_addr.s_addr = 0; 4376 tpf->dsf_sin.sin_port = 0; 4377 } 4378 4379 error = vn_extattr_set(vp, IO_NODELOCKED, 4380 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4381 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4382 if (error == 0) 4383 error = vn_extattr_set(vp, IO_NODELOCKED, 4384 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4385 sizeof(dsattr), (char *)&dsattr, p); 4386 if (error != 0) 4387 printf("pNFS: pnfscreate setextattr=%d\n", 4388 error); 4389 } else 4390 printf("pNFS: pnfscreate=%d\n", error); 4391 free(pf, M_TEMP); 4392 free(dsc, M_TEMP); 4393 } 4394 4395 /* 4396 * Get the information needed to remove the pNFS Data Server file from the 4397 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4398 * DS directory vnode. The caller must unlock *ddvp when done with it. 4399 */ 4400 static void 4401 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4402 int *mirrorcntp, char *fname, fhandle_t *fhp) 4403 { 4404 struct vattr va; 4405 struct ucred *tcred; 4406 char *buf; 4407 int buflen, error; 4408 4409 dvpp[0] = NULL; 4410 /* If not an exported regular file or not a pNFS server, just return. */ 4411 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4412 nfsrv_devidcnt == 0) 4413 return; 4414 4415 /* Check to see if this is the last hard link. */ 4416 tcred = newnfs_getcred(); 4417 error = VOP_GETATTR(vp, &va, tcred); 4418 NFSFREECRED(tcred); 4419 if (error != 0) { 4420 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4421 return; 4422 } 4423 if (va.va_nlink > 1) 4424 return; 4425 4426 error = nfsvno_getfh(vp, fhp, p); 4427 if (error != 0) { 4428 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4429 return; 4430 } 4431 4432 buflen = 1024; 4433 buf = malloc(buflen, M_TEMP, M_WAITOK); 4434 /* Get the directory vnode for the DS mount and the file handle. */ 4435 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4436 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4437 free(buf, M_TEMP); 4438 if (error != 0) 4439 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4440 } 4441 4442 /* 4443 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4444 * The arguments are in a structure, so that they can be passed through 4445 * taskqueue for a kernel process to execute this function. 4446 */ 4447 struct nfsrvdsremove { 4448 int done; 4449 int inprog; 4450 struct task tsk; 4451 struct ucred *tcred; 4452 struct vnode *dvp; 4453 NFSPROC_T *p; 4454 int err; 4455 char fname[PNFS_FILENAME_LEN + 1]; 4456 }; 4457 4458 static int 4459 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4460 NFSPROC_T *p) 4461 { 4462 struct nameidata named; 4463 struct vnode *nvp; 4464 char *bufp; 4465 u_long *hashp; 4466 int error; 4467 4468 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4469 if (error != 0) 4470 return (error); 4471 named.ni_cnd.cn_nameiop = DELETE; 4472 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4473 named.ni_cnd.cn_cred = tcred; 4474 named.ni_cnd.cn_thread = p; 4475 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 4476 nfsvno_setpathbuf(&named, &bufp, &hashp); 4477 named.ni_cnd.cn_nameptr = bufp; 4478 named.ni_cnd.cn_namelen = strlen(fname); 4479 strlcpy(bufp, fname, NAME_MAX); 4480 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4481 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4482 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4483 if (error == 0) { 4484 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4485 vput(nvp); 4486 } 4487 NFSVOPUNLOCK(dvp); 4488 nfsvno_relpathbuf(&named); 4489 if (error != 0) 4490 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4491 return (error); 4492 } 4493 4494 /* 4495 * Start up the thread that will execute nfsrv_dsremove(). 4496 */ 4497 static void 4498 start_dsremove(void *arg, int pending) 4499 { 4500 struct nfsrvdsremove *dsrm; 4501 4502 dsrm = (struct nfsrvdsremove *)arg; 4503 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4504 dsrm->p); 4505 dsrm->done = 1; 4506 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4507 } 4508 4509 /* 4510 * Remove a pNFS data file from a Data Server. 4511 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4512 * removed to set up the dvp and fill in the FH. 4513 */ 4514 static void 4515 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4516 NFSPROC_T *p) 4517 { 4518 struct ucred *tcred; 4519 struct nfsrvdsremove *dsrm, *tdsrm; 4520 struct nfsdevice *ds; 4521 struct nfsmount *nmp; 4522 int failpos, i, ret, timo; 4523 4524 tcred = newnfs_getcred(); 4525 dsrm = NULL; 4526 if (mirrorcnt > 1) 4527 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4528 /* 4529 * Remove the file on each DS mirror, using kernel process(es) for the 4530 * additional mirrors. 4531 */ 4532 failpos = -1; 4533 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4534 tdsrm->tcred = tcred; 4535 tdsrm->p = p; 4536 tdsrm->dvp = dvp[i]; 4537 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4538 tdsrm->inprog = 0; 4539 tdsrm->done = 0; 4540 tdsrm->err = 0; 4541 ret = EIO; 4542 if (nfs_pnfsiothreads != 0) { 4543 ret = nfs_pnfsio(start_dsremove, tdsrm); 4544 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4545 } 4546 if (ret != 0) { 4547 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4548 if (failpos == -1 && nfsds_failerr(ret)) 4549 failpos = i; 4550 } 4551 } 4552 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4553 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4554 failpos = mirrorcnt - 1; 4555 timo = hz / 50; /* Wait for 20msec. */ 4556 if (timo < 1) 4557 timo = 1; 4558 /* Wait for kernel task(s) to complete. */ 4559 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4560 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4561 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4562 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4563 failpos = i; 4564 } 4565 4566 /* 4567 * If failpos has been set, that mirror has failed, so it needs 4568 * to be disabled. 4569 */ 4570 if (failpos >= 0) { 4571 nmp = VFSTONFS(dvp[failpos]->v_mount); 4572 NFSLOCKMNT(nmp); 4573 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4574 NFSMNTP_CANCELRPCS)) == 0) { 4575 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4576 NFSUNLOCKMNT(nmp); 4577 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4578 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4579 ds); 4580 if (ds != NULL) 4581 nfsrv_killrpcs(nmp); 4582 NFSLOCKMNT(nmp); 4583 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4584 wakeup(nmp); 4585 } 4586 NFSUNLOCKMNT(nmp); 4587 } 4588 4589 /* Get rid all layouts for the file. */ 4590 nfsrv_freefilelayouts(fhp); 4591 4592 NFSFREECRED(tcred); 4593 free(dsrm, M_TEMP); 4594 } 4595 4596 /* 4597 * Generate a file name based on the file handle and put it in *bufp. 4598 * Return the number of bytes generated. 4599 */ 4600 static int 4601 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4602 { 4603 int i; 4604 uint8_t *cp; 4605 const uint8_t *hexdigits = "0123456789abcdef"; 4606 4607 cp = (uint8_t *)fhp; 4608 for (i = 0; i < sizeof(*fhp); i++) { 4609 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4610 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4611 } 4612 bufp[2 * i] = '\0'; 4613 return (2 * i); 4614 } 4615 4616 /* 4617 * Update the Metadata file's attributes from the DS file when a Read/Write 4618 * layout is returned. 4619 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4620 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4621 */ 4622 int 4623 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4624 { 4625 struct ucred *tcred; 4626 int error; 4627 4628 /* Do this as root so that it won't fail with EACCES. */ 4629 tcred = newnfs_getcred(); 4630 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4631 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); 4632 NFSFREECRED(tcred); 4633 return (error); 4634 } 4635 4636 /* 4637 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4638 */ 4639 static int 4640 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4641 NFSPROC_T *p) 4642 { 4643 int error; 4644 4645 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, 4646 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); 4647 return (error); 4648 } 4649 4650 static int 4651 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 4652 struct thread *p, int ioproc, struct mbuf **mpp, char *cp, 4653 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, 4654 off_t *offp, int content, bool *eofp) 4655 { 4656 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4657 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4658 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4659 struct nfsdevice *ds; 4660 struct pnfsdsattr dsattr; 4661 struct opnfsdsattr odsattr; 4662 char *buf; 4663 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4664 4665 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4666 /* 4667 * If not a regular file, not exported or not a pNFS server, 4668 * just return ENOENT. 4669 */ 4670 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4671 nfsrv_devidcnt == 0) 4672 return (ENOENT); 4673 4674 buflen = 1024; 4675 buf = malloc(buflen, M_TEMP, M_WAITOK); 4676 error = 0; 4677 4678 /* 4679 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4680 * from the MetaData file's extended attribute. 4681 */ 4682 if (ioproc == NFSPROC_GETATTR) { 4683 error = vn_extattr_get(vp, IO_NODELOCKED, 4684 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4685 p); 4686 if (error == 0) { 4687 if (buflen == sizeof(odsattr)) { 4688 NFSBCOPY(buf, &odsattr, buflen); 4689 nap->na_filerev = odsattr.dsa_filerev; 4690 nap->na_size = odsattr.dsa_size; 4691 nap->na_atime = odsattr.dsa_atime; 4692 nap->na_mtime = odsattr.dsa_mtime; 4693 /* 4694 * Fake na_bytes by rounding up na_size. 4695 * Since we don't know the block size, just 4696 * use BLKDEV_IOSIZE. 4697 */ 4698 nap->na_bytes = (odsattr.dsa_size + 4699 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); 4700 } else if (buflen == sizeof(dsattr)) { 4701 NFSBCOPY(buf, &dsattr, buflen); 4702 nap->na_filerev = dsattr.dsa_filerev; 4703 nap->na_size = dsattr.dsa_size; 4704 nap->na_atime = dsattr.dsa_atime; 4705 nap->na_mtime = dsattr.dsa_mtime; 4706 nap->na_bytes = dsattr.dsa_bytes; 4707 } else 4708 error = ENXIO; 4709 } 4710 if (error == 0) { 4711 /* 4712 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4713 * returns 0, just return now. nfsrv_checkdsattr() 4714 * returns 0 if there is no Read/Write layout 4715 * plus either an Open/Write_access or Write 4716 * delegation issued to a client for the file. 4717 */ 4718 if (nfsrv_pnfsgetdsattr == 0 || 4719 nfsrv_checkdsattr(vp, p) == 0) { 4720 free(buf, M_TEMP); 4721 return (error); 4722 } 4723 } 4724 4725 /* 4726 * Clear ENOATTR so the code below will attempt to do a 4727 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4728 * the extended attribute. 4729 */ 4730 if (error == ENOATTR) 4731 error = 0; 4732 } 4733 4734 origmircnt = -1; 4735 trycnt = 0; 4736 tryagain: 4737 if (error == 0) { 4738 buflen = 1024; 4739 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == 4740 LK_EXCLUSIVE) 4741 printf("nfsrv_proxyds: Readds vp exclusively locked\n"); 4742 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4743 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4744 NULL, NULL); 4745 if (error == 0) { 4746 for (i = 0; i < mirrorcnt; i++) 4747 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4748 } else 4749 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4750 } else 4751 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4752 if (error == 0) { 4753 failpos = -1; 4754 if (origmircnt == -1) 4755 origmircnt = mirrorcnt; 4756 /* 4757 * If failpos is set to a mirror#, then that mirror has 4758 * failed and will be disabled. For Read, Getattr and Seek, the 4759 * function only tries one mirror, so if that mirror has 4760 * failed, it will need to be retried. As such, increment 4761 * tryitagain for these cases. 4762 * For Write, Setattr and Setacl, the function tries all 4763 * mirrors and will not return an error for the case where 4764 * one mirror has failed. For these cases, the functioning 4765 * mirror(s) will have been modified, so a retry isn't 4766 * necessary. These functions will set failpos for the 4767 * failed mirror#. 4768 */ 4769 if (ioproc == NFSPROC_READDS) { 4770 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4771 mpp, mpp2); 4772 if (nfsds_failerr(error) && mirrorcnt > 1) { 4773 /* 4774 * Setting failpos will cause the mirror 4775 * to be disabled and then a retry of this 4776 * read is required. 4777 */ 4778 failpos = 0; 4779 error = 0; 4780 trycnt++; 4781 } 4782 } else if (ioproc == NFSPROC_WRITEDS) 4783 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4784 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4785 else if (ioproc == NFSPROC_SETATTR) 4786 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4787 mirrorcnt, nap, &failpos); 4788 else if (ioproc == NFSPROC_SETACL) 4789 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4790 mirrorcnt, aclp, &failpos); 4791 else if (ioproc == NFSPROC_SEEKDS) { 4792 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, 4793 p, nmp[0]); 4794 if (nfsds_failerr(error) && mirrorcnt > 1) { 4795 /* 4796 * Setting failpos will cause the mirror 4797 * to be disabled and then a retry of this 4798 * read is required. 4799 */ 4800 failpos = 0; 4801 error = 0; 4802 trycnt++; 4803 } 4804 } else if (ioproc == NFSPROC_ALLOCATE) 4805 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, 4806 &nmp[0], mirrorcnt, &failpos); 4807 else { 4808 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4809 vp, nmp[mirrorcnt - 1], nap); 4810 if (nfsds_failerr(error) && mirrorcnt > 1) { 4811 /* 4812 * Setting failpos will cause the mirror 4813 * to be disabled and then a retry of this 4814 * getattr is required. 4815 */ 4816 failpos = mirrorcnt - 1; 4817 error = 0; 4818 trycnt++; 4819 } 4820 } 4821 ds = NULL; 4822 if (failpos >= 0) { 4823 failnmp = nmp[failpos]; 4824 NFSLOCKMNT(failnmp); 4825 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4826 NFSMNTP_CANCELRPCS)) == 0) { 4827 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4828 NFSUNLOCKMNT(failnmp); 4829 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 4830 failnmp, p); 4831 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 4832 failpos, ds); 4833 if (ds != NULL) 4834 nfsrv_killrpcs(failnmp); 4835 NFSLOCKMNT(failnmp); 4836 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4837 wakeup(failnmp); 4838 } 4839 NFSUNLOCKMNT(failnmp); 4840 } 4841 for (i = 0; i < mirrorcnt; i++) 4842 NFSVOPUNLOCK(dvp[i]); 4843 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 4844 trycnt); 4845 /* Try the Read/Getattr again if a mirror was deleted. */ 4846 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 4847 goto tryagain; 4848 } else { 4849 /* Return ENOENT for any Extended Attribute error. */ 4850 error = ENOENT; 4851 } 4852 free(buf, M_TEMP); 4853 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 4854 return (error); 4855 } 4856 4857 /* 4858 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 4859 * attribute. 4860 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 4861 * to be checked. If it points to a NULL nmp, then it returns 4862 * a suitable destination. 4863 * curnmp - If non-NULL, it is the source mount for the copy. 4864 */ 4865 int 4866 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 4867 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 4868 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 4869 struct nfsmount *curnmp, int *ippos, int *dsdirp) 4870 { 4871 struct vnode *dvp, *nvp = NULL, **tdvpp; 4872 struct mount *mp; 4873 struct nfsmount *nmp, *newnmp; 4874 struct sockaddr *sad; 4875 struct sockaddr_in *sin; 4876 struct nfsdevice *ds, *tds, *fndds; 4877 struct pnfsdsfile *pf; 4878 uint32_t dsdir; 4879 int error, fhiszero, fnd, gotone, i, mirrorcnt; 4880 4881 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 4882 *mirrorcntp = 1; 4883 tdvpp = dvpp; 4884 if (nvpp != NULL) 4885 *nvpp = NULL; 4886 if (dvpp != NULL) 4887 *dvpp = NULL; 4888 if (ippos != NULL) 4889 *ippos = -1; 4890 if (newnmpp != NULL) 4891 newnmp = *newnmpp; 4892 else 4893 newnmp = NULL; 4894 mp = vp->v_mount; 4895 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 4896 "pnfsd.dsfile", buflenp, buf, p); 4897 mirrorcnt = *buflenp / sizeof(*pf); 4898 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 4899 *buflenp != sizeof(*pf) * mirrorcnt)) 4900 error = ENOATTR; 4901 4902 pf = (struct pnfsdsfile *)buf; 4903 /* If curnmp != NULL, check for a match in the mirror list. */ 4904 if (curnmp != NULL && error == 0) { 4905 fnd = 0; 4906 for (i = 0; i < mirrorcnt; i++, pf++) { 4907 sad = (struct sockaddr *)&pf->dsf_sin; 4908 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 4909 if (ippos != NULL) 4910 *ippos = i; 4911 fnd = 1; 4912 break; 4913 } 4914 } 4915 if (fnd == 0) 4916 error = ENXIO; 4917 } 4918 4919 gotone = 0; 4920 pf = (struct pnfsdsfile *)buf; 4921 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 4922 error); 4923 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 4924 fhiszero = 0; 4925 sad = (struct sockaddr *)&pf->dsf_sin; 4926 sin = &pf->dsf_sin; 4927 dsdir = pf->dsf_dir; 4928 if (dsdir >= nfsrv_dsdirsize) { 4929 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 4930 error = ENOATTR; 4931 } else if (nvpp != NULL && newnmp != NULL && 4932 nfsaddr2_match(sad, newnmp->nm_nam)) 4933 error = EEXIST; 4934 if (error == 0) { 4935 if (ippos != NULL && curnmp == NULL && 4936 sad->sa_family == AF_INET && 4937 sin->sin_addr.s_addr == 0) 4938 *ippos = i; 4939 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 4940 fhiszero = 1; 4941 /* Use the socket address to find the mount point. */ 4942 fndds = NULL; 4943 NFSDDSLOCK(); 4944 /* Find a match for the IP address. */ 4945 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 4946 if (ds->nfsdev_nmp != NULL) { 4947 dvp = ds->nfsdev_dvp; 4948 nmp = VFSTONFS(dvp->v_mount); 4949 if (nmp != ds->nfsdev_nmp) 4950 printf("different2 nmp %p %p\n", 4951 nmp, ds->nfsdev_nmp); 4952 if (nfsaddr2_match(sad, nmp->nm_nam)) { 4953 fndds = ds; 4954 break; 4955 } 4956 } 4957 } 4958 if (fndds != NULL && newnmpp != NULL && 4959 newnmp == NULL) { 4960 /* Search for a place to make a mirror copy. */ 4961 TAILQ_FOREACH(tds, &nfsrv_devidhead, 4962 nfsdev_list) { 4963 if (tds->nfsdev_nmp != NULL && 4964 fndds != tds && 4965 ((tds->nfsdev_mdsisset == 0 && 4966 fndds->nfsdev_mdsisset == 0) || 4967 (tds->nfsdev_mdsisset != 0 && 4968 fndds->nfsdev_mdsisset != 0 && 4969 fsidcmp(&tds->nfsdev_mdsfsid, 4970 &mp->mnt_stat.f_fsid) == 0))) { 4971 *newnmpp = tds->nfsdev_nmp; 4972 break; 4973 } 4974 } 4975 if (tds != NULL) { 4976 /* 4977 * Move this entry to the end of the 4978 * list, so it won't be selected as 4979 * easily the next time. 4980 */ 4981 TAILQ_REMOVE(&nfsrv_devidhead, tds, 4982 nfsdev_list); 4983 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 4984 nfsdev_list); 4985 } 4986 } 4987 NFSDDSUNLOCK(); 4988 if (fndds != NULL) { 4989 dvp = fndds->nfsdev_dsdir[dsdir]; 4990 if (lktype != 0 || fhiszero != 0 || 4991 (nvpp != NULL && *nvpp == NULL)) { 4992 if (fhiszero != 0) 4993 error = vn_lock(dvp, 4994 LK_EXCLUSIVE); 4995 else if (lktype != 0) 4996 error = vn_lock(dvp, lktype); 4997 else 4998 error = vn_lock(dvp, LK_SHARED); 4999 /* 5000 * If the file handle is all 0's, try to 5001 * do a Lookup against the DS to acquire 5002 * it. 5003 * If dvpp == NULL or the Lookup fails, 5004 * unlock dvp after the call. 5005 */ 5006 if (error == 0 && (fhiszero != 0 || 5007 (nvpp != NULL && *nvpp == NULL))) { 5008 error = nfsrv_pnfslookupds(vp, 5009 dvp, pf, &nvp, p); 5010 if (error == 0) { 5011 if (fhiszero != 0) 5012 nfsrv_pnfssetfh( 5013 vp, pf, 5014 devid, 5015 fnamep, 5016 nvp, p); 5017 if (nvpp != NULL && 5018 *nvpp == NULL) { 5019 *nvpp = nvp; 5020 *dsdirp = dsdir; 5021 } else 5022 vput(nvp); 5023 } 5024 if (error != 0 || lktype == 0) 5025 NFSVOPUNLOCK(dvp); 5026 } 5027 } 5028 if (error == 0) { 5029 gotone++; 5030 NFSD_DEBUG(4, "gotone=%d\n", gotone); 5031 if (devid != NULL) { 5032 NFSBCOPY(fndds->nfsdev_deviceid, 5033 devid, NFSX_V4DEVICEID); 5034 devid += NFSX_V4DEVICEID; 5035 } 5036 if (dvpp != NULL) 5037 *tdvpp++ = dvp; 5038 if (fhp != NULL) 5039 NFSBCOPY(&pf->dsf_fh, fhp++, 5040 NFSX_MYFH); 5041 if (fnamep != NULL && gotone == 1) 5042 strlcpy(fnamep, 5043 pf->dsf_filename, 5044 sizeof(pf->dsf_filename)); 5045 } else 5046 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 5047 "err=%d\n", error); 5048 } 5049 } 5050 } 5051 if (error == 0 && gotone == 0) 5052 error = ENOENT; 5053 5054 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 5055 error); 5056 if (error == 0) 5057 *mirrorcntp = gotone; 5058 else { 5059 if (gotone > 0 && dvpp != NULL) { 5060 /* 5061 * If the error didn't occur on the first one and 5062 * dvpp != NULL, the one(s) prior to the failure will 5063 * have locked dvp's that need to be unlocked. 5064 */ 5065 for (i = 0; i < gotone; i++) { 5066 NFSVOPUNLOCK(*dvpp); 5067 *dvpp++ = NULL; 5068 } 5069 } 5070 /* 5071 * If it found the vnode to be copied from before a failure, 5072 * it needs to be vput()'d. 5073 */ 5074 if (nvpp != NULL && *nvpp != NULL) { 5075 vput(*nvpp); 5076 *nvpp = NULL; 5077 } 5078 } 5079 return (error); 5080 } 5081 5082 /* 5083 * Set the extended attribute for the Change attribute. 5084 */ 5085 static int 5086 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5087 { 5088 struct pnfsdsattr dsattr; 5089 int error; 5090 5091 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 5092 dsattr.dsa_filerev = nap->na_filerev; 5093 dsattr.dsa_size = nap->na_size; 5094 dsattr.dsa_atime = nap->na_atime; 5095 dsattr.dsa_mtime = nap->na_mtime; 5096 dsattr.dsa_bytes = nap->na_bytes; 5097 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5098 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); 5099 if (error != 0) 5100 printf("pNFS: setextattr=%d\n", error); 5101 return (error); 5102 } 5103 5104 static int 5105 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5106 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 5107 { 5108 uint32_t *tl; 5109 struct nfsrv_descript *nd; 5110 nfsv4stateid_t st; 5111 struct mbuf *m, *m2; 5112 int error = 0, retlen, tlen, trimlen; 5113 5114 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 5115 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5116 *mpp = NULL; 5117 /* 5118 * Use a stateid where other is an alternating 01010 pattern and 5119 * seqid is 0xffffffff. This value is not defined as special by 5120 * the RFC and is used by the FreeBSD NFS server to indicate an 5121 * MDS->DS proxy operation. 5122 */ 5123 st.other[0] = 0x55555555; 5124 st.other[1] = 0x55555555; 5125 st.other[2] = 0x55555555; 5126 st.seqid = 0xffffffff; 5127 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5128 NULL, NULL, 0, 0); 5129 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5130 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 5131 txdr_hyper(off, tl); 5132 *(tl + 2) = txdr_unsigned(len); 5133 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5134 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5135 if (error != 0) { 5136 free(nd, M_TEMP); 5137 return (error); 5138 } 5139 if (nd->nd_repstat == 0) { 5140 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 5141 NFSM_STRSIZ(retlen, len); 5142 if (retlen > 0) { 5143 /* Trim off the pre-data XDR from the mbuf chain. */ 5144 m = nd->nd_mrep; 5145 while (m != NULL && m != nd->nd_md) { 5146 if (m->m_next == nd->nd_md) { 5147 m->m_next = NULL; 5148 m_freem(nd->nd_mrep); 5149 nd->nd_mrep = m = nd->nd_md; 5150 } else 5151 m = m->m_next; 5152 } 5153 if (m == NULL) { 5154 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5155 error = ENOENT; 5156 goto nfsmout; 5157 } 5158 5159 /* 5160 * Now, adjust first mbuf so that any XDR before the 5161 * read data is skipped over. 5162 */ 5163 trimlen = nd->nd_dpos - mtod(m, char *); 5164 if (trimlen > 0) { 5165 m->m_len -= trimlen; 5166 NFSM_DATAP(m, trimlen); 5167 } 5168 5169 /* 5170 * Truncate the mbuf chain at retlen bytes of data, 5171 * plus XDR padding that brings the length up to a 5172 * multiple of 4. 5173 */ 5174 tlen = NFSM_RNDUP(retlen); 5175 do { 5176 if (m->m_len >= tlen) { 5177 m->m_len = tlen; 5178 tlen = 0; 5179 m2 = m->m_next; 5180 m->m_next = NULL; 5181 m_freem(m2); 5182 break; 5183 } 5184 tlen -= m->m_len; 5185 m = m->m_next; 5186 } while (m != NULL); 5187 if (tlen > 0) { 5188 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5189 error = ENOENT; 5190 goto nfsmout; 5191 } 5192 *mpp = nd->nd_mrep; 5193 *mpendp = m; 5194 nd->nd_mrep = NULL; 5195 } 5196 } else 5197 error = nd->nd_repstat; 5198 nfsmout: 5199 /* If nd->nd_mrep is already NULL, this is a no-op. */ 5200 m_freem(nd->nd_mrep); 5201 free(nd, M_TEMP); 5202 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 5203 return (error); 5204 } 5205 5206 /* 5207 * Do a write RPC on a DS data file, using this structure for the arguments, 5208 * so that this function can be executed by a separate kernel process. 5209 */ 5210 struct nfsrvwritedsdorpc { 5211 int done; 5212 int inprog; 5213 struct task tsk; 5214 fhandle_t fh; 5215 off_t off; 5216 int len; 5217 struct nfsmount *nmp; 5218 struct ucred *cred; 5219 NFSPROC_T *p; 5220 struct mbuf *m; 5221 int err; 5222 }; 5223 5224 static int 5225 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 5226 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 5227 { 5228 uint32_t *tl; 5229 struct nfsrv_descript *nd; 5230 nfsattrbit_t attrbits; 5231 nfsv4stateid_t st; 5232 int commit, error, retlen; 5233 5234 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5235 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 5236 sizeof(fhandle_t), NULL, NULL, 0, 0); 5237 5238 /* 5239 * Use a stateid where other is an alternating 01010 pattern and 5240 * seqid is 0xffffffff. This value is not defined as special by 5241 * the RFC and is used by the FreeBSD NFS server to indicate an 5242 * MDS->DS proxy operation. 5243 */ 5244 st.other[0] = 0x55555555; 5245 st.other[1] = 0x55555555; 5246 st.other[2] = 0x55555555; 5247 st.seqid = 0xffffffff; 5248 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5249 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 5250 txdr_hyper(off, tl); 5251 tl += 2; 5252 /* 5253 * Do all writes FileSync, since the server doesn't hold onto dirty 5254 * buffers. Since clients should be accessing the DS servers directly 5255 * using the pNFS layouts, this just needs to work correctly as a 5256 * fallback. 5257 */ 5258 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 5259 *tl = txdr_unsigned(len); 5260 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 5261 5262 /* Put data in mbuf chain. */ 5263 nd->nd_mb->m_next = m; 5264 5265 /* Set nd_mb and nd_bpos to end of data. */ 5266 while (m->m_next != NULL) 5267 m = m->m_next; 5268 nd->nd_mb = m; 5269 nfsm_set(nd, m->m_len); 5270 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 5271 5272 /* Do a Getattr for the attributes that change upon writing. */ 5273 NFSZERO_ATTRBIT(&attrbits); 5274 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5275 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5276 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5277 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5278 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5279 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5280 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5281 (void) nfsrv_putattrbit(nd, &attrbits); 5282 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5283 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5284 if (error != 0) { 5285 free(nd, M_TEMP); 5286 return (error); 5287 } 5288 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 5289 /* Get rid of weak cache consistency data for now. */ 5290 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5291 (ND_NFSV4 | ND_V4WCCATTR)) { 5292 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5293 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5294 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 5295 if (error != 0) 5296 goto nfsmout; 5297 /* 5298 * Get rid of Op# and status for next op. 5299 */ 5300 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5301 if (*++tl != 0) 5302 nd->nd_flag |= ND_NOMOREDATA; 5303 } 5304 if (nd->nd_repstat == 0) { 5305 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 5306 retlen = fxdr_unsigned(int, *tl++); 5307 commit = fxdr_unsigned(int, *tl); 5308 if (commit != NFSWRITE_FILESYNC) 5309 error = NFSERR_IO; 5310 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 5311 retlen, commit, error); 5312 } else 5313 error = nd->nd_repstat; 5314 /* We have no use for the Write Verifier since we use FileSync. */ 5315 5316 /* 5317 * Get the Change, Size, Access Time and Modify Time attributes and set 5318 * on the Metadata file, so its attributes will be what the file's 5319 * would be if it had been written. 5320 */ 5321 if (error == 0) { 5322 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5323 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5324 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5325 } 5326 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 5327 nfsmout: 5328 m_freem(nd->nd_mrep); 5329 free(nd, M_TEMP); 5330 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 5331 return (error); 5332 } 5333 5334 /* 5335 * Start up the thread that will execute nfsrv_writedsdorpc(). 5336 */ 5337 static void 5338 start_writedsdorpc(void *arg, int pending) 5339 { 5340 struct nfsrvwritedsdorpc *drpc; 5341 5342 drpc = (struct nfsrvwritedsdorpc *)arg; 5343 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5344 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 5345 drpc->done = 1; 5346 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 5347 } 5348 5349 static int 5350 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5351 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5352 struct mbuf **mpp, char *cp, int *failposp) 5353 { 5354 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; 5355 struct nfsvattr na; 5356 struct mbuf *m; 5357 int error, i, offs, ret, timo; 5358 5359 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 5360 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 5361 drpc = NULL; 5362 if (mirrorcnt > 1) 5363 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5364 M_WAITOK); 5365 5366 /* Calculate offset in mbuf chain that data starts. */ 5367 offs = cp - mtod(*mpp, char *); 5368 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 5369 5370 /* 5371 * Do the write RPC for every DS, using a separate kernel process 5372 * for every DS except the last one. 5373 */ 5374 error = 0; 5375 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5376 tdrpc->done = 0; 5377 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5378 tdrpc->off = off; 5379 tdrpc->len = len; 5380 tdrpc->nmp = *nmpp; 5381 tdrpc->cred = cred; 5382 tdrpc->p = p; 5383 tdrpc->inprog = 0; 5384 tdrpc->err = 0; 5385 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5386 ret = EIO; 5387 if (nfs_pnfsiothreads != 0) { 5388 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 5389 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 5390 ret); 5391 } 5392 if (ret != 0) { 5393 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 5394 tdrpc->m, cred, p); 5395 if (nfsds_failerr(ret) && *failposp == -1) 5396 *failposp = i; 5397 else if (error == 0 && ret != 0) 5398 error = ret; 5399 } 5400 nmpp++; 5401 fhp++; 5402 } 5403 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5404 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 5405 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5406 *failposp = mirrorcnt - 1; 5407 else if (error == 0 && ret != 0) 5408 error = ret; 5409 if (error == 0) 5410 error = nfsrv_setextattr(vp, &na, p); 5411 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 5412 tdrpc = drpc; 5413 timo = hz / 50; /* Wait for 20msec. */ 5414 if (timo < 1) 5415 timo = 1; 5416 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5417 /* Wait for RPCs on separate threads to complete. */ 5418 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5419 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 5420 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5421 *failposp = i; 5422 else if (error == 0 && tdrpc->err != 0) 5423 error = tdrpc->err; 5424 } 5425 free(drpc, M_TEMP); 5426 return (error); 5427 } 5428 5429 /* 5430 * Do a allocate RPC on a DS data file, using this structure for the arguments, 5431 * so that this function can be executed by a separate kernel process. 5432 */ 5433 struct nfsrvallocatedsdorpc { 5434 int done; 5435 int inprog; 5436 struct task tsk; 5437 fhandle_t fh; 5438 off_t off; 5439 off_t len; 5440 struct nfsmount *nmp; 5441 struct ucred *cred; 5442 NFSPROC_T *p; 5443 int err; 5444 }; 5445 5446 static int 5447 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5448 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5449 { 5450 uint32_t *tl; 5451 struct nfsrv_descript *nd; 5452 nfsattrbit_t attrbits; 5453 nfsv4stateid_t st; 5454 int error; 5455 5456 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5457 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, 5458 sizeof(fhandle_t), NULL, NULL, 0, 0); 5459 5460 /* 5461 * Use a stateid where other is an alternating 01010 pattern and 5462 * seqid is 0xffffffff. This value is not defined as special by 5463 * the RFC and is used by the FreeBSD NFS server to indicate an 5464 * MDS->DS proxy operation. 5465 */ 5466 st.other[0] = 0x55555555; 5467 st.other[1] = 0x55555555; 5468 st.other[2] = 0x55555555; 5469 st.seqid = 0xffffffff; 5470 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5471 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5472 txdr_hyper(off, tl); tl += 2; 5473 txdr_hyper(len, tl); tl += 2; 5474 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); 5475 5476 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5477 NFSGETATTR_ATTRBIT(&attrbits); 5478 nfsrv_putattrbit(nd, &attrbits); 5479 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5480 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5481 if (error != 0) { 5482 free(nd, M_TEMP); 5483 return (error); 5484 } 5485 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", 5486 nd->nd_repstat); 5487 if (nd->nd_repstat == 0) { 5488 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5489 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5490 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5491 } else 5492 error = nd->nd_repstat; 5493 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); 5494 nfsmout: 5495 m_freem(nd->nd_mrep); 5496 free(nd, M_TEMP); 5497 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); 5498 return (error); 5499 } 5500 5501 /* 5502 * Start up the thread that will execute nfsrv_allocatedsdorpc(). 5503 */ 5504 static void 5505 start_allocatedsdorpc(void *arg, int pending) 5506 { 5507 struct nfsrvallocatedsdorpc *drpc; 5508 5509 drpc = (struct nfsrvallocatedsdorpc *)arg; 5510 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5511 drpc->len, NULL, drpc->cred, drpc->p); 5512 drpc->done = 1; 5513 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); 5514 } 5515 5516 static int 5517 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5518 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5519 int *failposp) 5520 { 5521 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; 5522 struct nfsvattr na; 5523 int error, i, ret, timo; 5524 5525 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); 5526 drpc = NULL; 5527 if (mirrorcnt > 1) 5528 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5529 M_WAITOK); 5530 5531 /* 5532 * Do the allocate RPC for every DS, using a separate kernel process 5533 * for every DS except the last one. 5534 */ 5535 error = 0; 5536 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5537 tdrpc->done = 0; 5538 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5539 tdrpc->off = off; 5540 tdrpc->len = len; 5541 tdrpc->nmp = *nmpp; 5542 tdrpc->cred = cred; 5543 tdrpc->p = p; 5544 tdrpc->inprog = 0; 5545 tdrpc->err = 0; 5546 ret = EIO; 5547 if (nfs_pnfsiothreads != 0) { 5548 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); 5549 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", 5550 ret); 5551 } 5552 if (ret != 0) { 5553 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, 5554 cred, p); 5555 if (nfsds_failerr(ret) && *failposp == -1) 5556 *failposp = i; 5557 else if (error == 0 && ret != 0) 5558 error = ret; 5559 } 5560 nmpp++; 5561 fhp++; 5562 } 5563 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5564 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5565 *failposp = mirrorcnt - 1; 5566 else if (error == 0 && ret != 0) 5567 error = ret; 5568 if (error == 0) 5569 error = nfsrv_setextattr(vp, &na, p); 5570 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); 5571 tdrpc = drpc; 5572 timo = hz / 50; /* Wait for 20msec. */ 5573 if (timo < 1) 5574 timo = 1; 5575 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5576 /* Wait for RPCs on separate threads to complete. */ 5577 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5578 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5579 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5580 *failposp = i; 5581 else if (error == 0 && tdrpc->err != 0) 5582 error = tdrpc->err; 5583 } 5584 free(drpc, M_TEMP); 5585 return (error); 5586 } 5587 5588 static int 5589 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5590 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 5591 struct nfsvattr *dsnap) 5592 { 5593 uint32_t *tl; 5594 struct nfsrv_descript *nd; 5595 nfsv4stateid_t st; 5596 nfsattrbit_t attrbits; 5597 int error; 5598 5599 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 5600 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5601 /* 5602 * Use a stateid where other is an alternating 01010 pattern and 5603 * seqid is 0xffffffff. This value is not defined as special by 5604 * the RFC and is used by the FreeBSD NFS server to indicate an 5605 * MDS->DS proxy operation. 5606 */ 5607 st.other[0] = 0x55555555; 5608 st.other[1] = 0x55555555; 5609 st.other[2] = 0x55555555; 5610 st.seqid = 0xffffffff; 5611 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5612 NULL, NULL, 0, 0); 5613 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5614 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5615 5616 /* Do a Getattr for the attributes that change due to writing. */ 5617 NFSZERO_ATTRBIT(&attrbits); 5618 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5619 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5620 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5621 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5622 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5623 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5624 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5625 (void) nfsrv_putattrbit(nd, &attrbits); 5626 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5627 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5628 if (error != 0) { 5629 free(nd, M_TEMP); 5630 return (error); 5631 } 5632 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5633 nd->nd_repstat); 5634 /* Get rid of weak cache consistency data for now. */ 5635 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5636 (ND_NFSV4 | ND_V4WCCATTR)) { 5637 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5638 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5639 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5640 if (error != 0) 5641 goto nfsmout; 5642 /* 5643 * Get rid of Op# and status for next op. 5644 */ 5645 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5646 if (*++tl != 0) 5647 nd->nd_flag |= ND_NOMOREDATA; 5648 } 5649 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 5650 if (error != 0) 5651 goto nfsmout; 5652 if (nd->nd_repstat != 0) 5653 error = nd->nd_repstat; 5654 /* 5655 * Get the Change, Size, Access Time and Modify Time attributes and set 5656 * on the Metadata file, so its attributes will be what the file's 5657 * would be if it had been written. 5658 */ 5659 if (error == 0) { 5660 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5661 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5662 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5663 } 5664 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 5665 nfsmout: 5666 m_freem(nd->nd_mrep); 5667 free(nd, M_TEMP); 5668 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 5669 return (error); 5670 } 5671 5672 struct nfsrvsetattrdsdorpc { 5673 int done; 5674 int inprog; 5675 struct task tsk; 5676 fhandle_t fh; 5677 struct nfsmount *nmp; 5678 struct vnode *vp; 5679 struct ucred *cred; 5680 NFSPROC_T *p; 5681 struct nfsvattr na; 5682 struct nfsvattr dsna; 5683 int err; 5684 }; 5685 5686 /* 5687 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 5688 */ 5689 static void 5690 start_setattrdsdorpc(void *arg, int pending) 5691 { 5692 struct nfsrvsetattrdsdorpc *drpc; 5693 5694 drpc = (struct nfsrvsetattrdsdorpc *)arg; 5695 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 5696 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 5697 drpc->done = 1; 5698 } 5699 5700 static int 5701 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5702 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5703 struct nfsvattr *nap, int *failposp) 5704 { 5705 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; 5706 struct nfsvattr na; 5707 int error, i, ret, timo; 5708 5709 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 5710 drpc = NULL; 5711 if (mirrorcnt > 1) 5712 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5713 M_WAITOK); 5714 5715 /* 5716 * Do the setattr RPC for every DS, using a separate kernel process 5717 * for every DS except the last one. 5718 */ 5719 error = 0; 5720 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5721 tdrpc->done = 0; 5722 tdrpc->inprog = 0; 5723 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5724 tdrpc->nmp = *nmpp; 5725 tdrpc->vp = vp; 5726 tdrpc->cred = cred; 5727 tdrpc->p = p; 5728 tdrpc->na = *nap; 5729 tdrpc->err = 0; 5730 ret = EIO; 5731 if (nfs_pnfsiothreads != 0) { 5732 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 5733 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 5734 ret); 5735 } 5736 if (ret != 0) { 5737 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 5738 &na); 5739 if (nfsds_failerr(ret) && *failposp == -1) 5740 *failposp = i; 5741 else if (error == 0 && ret != 0) 5742 error = ret; 5743 } 5744 nmpp++; 5745 fhp++; 5746 } 5747 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 5748 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5749 *failposp = mirrorcnt - 1; 5750 else if (error == 0 && ret != 0) 5751 error = ret; 5752 if (error == 0) 5753 error = nfsrv_setextattr(vp, &na, p); 5754 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 5755 tdrpc = drpc; 5756 timo = hz / 50; /* Wait for 20msec. */ 5757 if (timo < 1) 5758 timo = 1; 5759 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5760 /* Wait for RPCs on separate threads to complete. */ 5761 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5762 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 5763 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5764 *failposp = i; 5765 else if (error == 0 && tdrpc->err != 0) 5766 error = tdrpc->err; 5767 } 5768 free(drpc, M_TEMP); 5769 return (error); 5770 } 5771 5772 /* 5773 * Do a Setattr of an NFSv4 ACL on the DS file. 5774 */ 5775 static int 5776 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5777 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 5778 { 5779 struct nfsrv_descript *nd; 5780 nfsv4stateid_t st; 5781 nfsattrbit_t attrbits; 5782 int error; 5783 5784 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 5785 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5786 /* 5787 * Use a stateid where other is an alternating 01010 pattern and 5788 * seqid is 0xffffffff. This value is not defined as special by 5789 * the RFC and is used by the FreeBSD NFS server to indicate an 5790 * MDS->DS proxy operation. 5791 */ 5792 st.other[0] = 0x55555555; 5793 st.other[1] = 0x55555555; 5794 st.other[2] = 0x55555555; 5795 st.seqid = 0xffffffff; 5796 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5797 NULL, NULL, 0, 0); 5798 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5799 NFSZERO_ATTRBIT(&attrbits); 5800 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 5801 /* 5802 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 5803 * so passing in the metadata "vp" will be ok, since it is of 5804 * the same type (VREG). 5805 */ 5806 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 5807 NULL, 0, 0, 0, 0, 0, NULL); 5808 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5809 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5810 if (error != 0) { 5811 free(nd, M_TEMP); 5812 return (error); 5813 } 5814 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 5815 nd->nd_repstat); 5816 error = nd->nd_repstat; 5817 m_freem(nd->nd_mrep); 5818 free(nd, M_TEMP); 5819 return (error); 5820 } 5821 5822 struct nfsrvsetacldsdorpc { 5823 int done; 5824 int inprog; 5825 struct task tsk; 5826 fhandle_t fh; 5827 struct nfsmount *nmp; 5828 struct vnode *vp; 5829 struct ucred *cred; 5830 NFSPROC_T *p; 5831 struct acl *aclp; 5832 int err; 5833 }; 5834 5835 /* 5836 * Start up the thread that will execute nfsrv_setacldsdorpc(). 5837 */ 5838 static void 5839 start_setacldsdorpc(void *arg, int pending) 5840 { 5841 struct nfsrvsetacldsdorpc *drpc; 5842 5843 drpc = (struct nfsrvsetacldsdorpc *)arg; 5844 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 5845 drpc->vp, drpc->nmp, drpc->aclp); 5846 drpc->done = 1; 5847 } 5848 5849 static int 5850 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5851 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 5852 int *failposp) 5853 { 5854 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; 5855 int error, i, ret, timo; 5856 5857 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 5858 drpc = NULL; 5859 if (mirrorcnt > 1) 5860 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5861 M_WAITOK); 5862 5863 /* 5864 * Do the setattr RPC for every DS, using a separate kernel process 5865 * for every DS except the last one. 5866 */ 5867 error = 0; 5868 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5869 tdrpc->done = 0; 5870 tdrpc->inprog = 0; 5871 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5872 tdrpc->nmp = *nmpp; 5873 tdrpc->vp = vp; 5874 tdrpc->cred = cred; 5875 tdrpc->p = p; 5876 tdrpc->aclp = aclp; 5877 tdrpc->err = 0; 5878 ret = EIO; 5879 if (nfs_pnfsiothreads != 0) { 5880 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 5881 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 5882 ret); 5883 } 5884 if (ret != 0) { 5885 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 5886 aclp); 5887 if (nfsds_failerr(ret) && *failposp == -1) 5888 *failposp = i; 5889 else if (error == 0 && ret != 0) 5890 error = ret; 5891 } 5892 nmpp++; 5893 fhp++; 5894 } 5895 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 5896 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5897 *failposp = mirrorcnt - 1; 5898 else if (error == 0 && ret != 0) 5899 error = ret; 5900 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 5901 tdrpc = drpc; 5902 timo = hz / 50; /* Wait for 20msec. */ 5903 if (timo < 1) 5904 timo = 1; 5905 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5906 /* Wait for RPCs on separate threads to complete. */ 5907 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5908 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 5909 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5910 *failposp = i; 5911 else if (error == 0 && tdrpc->err != 0) 5912 error = tdrpc->err; 5913 } 5914 free(drpc, M_TEMP); 5915 return (error); 5916 } 5917 5918 /* 5919 * Getattr call to the DS for the attributes that change due to writing. 5920 */ 5921 static int 5922 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5923 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 5924 { 5925 struct nfsrv_descript *nd; 5926 int error; 5927 nfsattrbit_t attrbits; 5928 5929 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 5930 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5931 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 5932 sizeof(fhandle_t), NULL, NULL, 0, 0); 5933 NFSZERO_ATTRBIT(&attrbits); 5934 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5935 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5936 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5937 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5938 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5939 (void) nfsrv_putattrbit(nd, &attrbits); 5940 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5941 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5942 if (error != 0) { 5943 free(nd, M_TEMP); 5944 return (error); 5945 } 5946 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 5947 nd->nd_repstat); 5948 if (nd->nd_repstat == 0) { 5949 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 5950 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 5951 NULL, NULL); 5952 /* 5953 * We can only save the updated values in the extended 5954 * attribute if the vp is exclusively locked. 5955 * This should happen when any of the following operations 5956 * occur on the vnode: 5957 * Close, Delegreturn, LayoutCommit, LayoutReturn 5958 * As such, the updated extended attribute should get saved 5959 * before nfsrv_checkdsattr() returns 0 and allows the cached 5960 * attributes to be returned without calling this function. 5961 */ 5962 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 5963 error = nfsrv_setextattr(vp, nap, p); 5964 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 5965 error); 5966 } 5967 } else 5968 error = nd->nd_repstat; 5969 m_freem(nd->nd_mrep); 5970 free(nd, M_TEMP); 5971 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 5972 return (error); 5973 } 5974 5975 /* 5976 * Seek call to a DS. 5977 */ 5978 static int 5979 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, 5980 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) 5981 { 5982 uint32_t *tl; 5983 struct nfsrv_descript *nd; 5984 nfsv4stateid_t st; 5985 int error; 5986 5987 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); 5988 /* 5989 * Use a stateid where other is an alternating 01010 pattern and 5990 * seqid is 0xffffffff. This value is not defined as special by 5991 * the RFC and is used by the FreeBSD NFS server to indicate an 5992 * MDS->DS proxy operation. 5993 */ 5994 st.other[0] = 0x55555555; 5995 st.other[1] = 0x55555555; 5996 st.other[2] = 0x55555555; 5997 st.seqid = 0xffffffff; 5998 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5999 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, 6000 sizeof(fhandle_t), NULL, NULL, 0, 0); 6001 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6002 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); 6003 txdr_hyper(*offp, tl); tl += 2; 6004 *tl = txdr_unsigned(content); 6005 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6006 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6007 if (error != 0) { 6008 free(nd, M_TEMP); 6009 return (error); 6010 } 6011 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); 6012 if (nd->nd_repstat == 0) { 6013 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); 6014 if (*tl++ == newnfs_true) 6015 *eofp = true; 6016 else 6017 *eofp = false; 6018 *offp = fxdr_hyper(tl); 6019 } else 6020 error = nd->nd_repstat; 6021 nfsmout: 6022 m_freem(nd->nd_mrep); 6023 free(nd, M_TEMP); 6024 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); 6025 return (error); 6026 } 6027 6028 /* 6029 * Get the device id and file handle for a DS file. 6030 */ 6031 int 6032 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 6033 fhandle_t *fhp, char *devid) 6034 { 6035 int buflen, error; 6036 char *buf; 6037 6038 buflen = 1024; 6039 buf = malloc(buflen, M_TEMP, M_WAITOK); 6040 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 6041 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 6042 free(buf, M_TEMP); 6043 return (error); 6044 } 6045 6046 /* 6047 * Do a Lookup against the DS for the filename. 6048 */ 6049 static int 6050 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 6051 struct vnode **nvpp, NFSPROC_T *p) 6052 { 6053 struct nameidata named; 6054 struct ucred *tcred; 6055 char *bufp; 6056 u_long *hashp; 6057 struct vnode *nvp; 6058 int error; 6059 6060 tcred = newnfs_getcred(); 6061 named.ni_cnd.cn_nameiop = LOOKUP; 6062 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 6063 named.ni_cnd.cn_cred = tcred; 6064 named.ni_cnd.cn_thread = p; 6065 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; 6066 nfsvno_setpathbuf(&named, &bufp, &hashp); 6067 named.ni_cnd.cn_nameptr = bufp; 6068 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 6069 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 6070 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 6071 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 6072 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 6073 NFSFREECRED(tcred); 6074 nfsvno_relpathbuf(&named); 6075 if (error == 0) 6076 *nvpp = nvp; 6077 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 6078 return (error); 6079 } 6080 6081 /* 6082 * Set the file handle to the correct one. 6083 */ 6084 static void 6085 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, 6086 char *fnamep, struct vnode *nvp, NFSPROC_T *p) 6087 { 6088 struct nfsnode *np; 6089 int ret = 0; 6090 6091 np = VTONFS(nvp); 6092 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 6093 /* 6094 * We can only do a vn_set_extattr() if the vnode is exclusively 6095 * locked and vn_start_write() has been done. If devid != NULL or 6096 * fnamep != NULL or the vnode is shared locked, vn_start_write() 6097 * may not have been done. 6098 * If not done now, it will be done on a future call. 6099 */ 6100 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == 6101 LK_EXCLUSIVE) 6102 ret = vn_extattr_set(vp, IO_NODELOCKED, 6103 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), 6104 (char *)pf, p); 6105 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 6106 } 6107 6108 /* 6109 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 6110 * when the DS has failed. 6111 */ 6112 void 6113 nfsrv_killrpcs(struct nfsmount *nmp) 6114 { 6115 6116 /* 6117 * Call newnfs_nmcancelreqs() to cause 6118 * any RPCs in progress on the mount point to 6119 * fail. 6120 * This will cause any process waiting for an 6121 * RPC to complete while holding a vnode lock 6122 * on the mounted-on vnode (such as "df" or 6123 * a non-forced "umount") to fail. 6124 * This will unlock the mounted-on vnode so 6125 * a forced dismount can succeed. 6126 * The NFSMNTP_CANCELRPCS flag should be set when this function is 6127 * called. 6128 */ 6129 newnfs_nmcancelreqs(nmp); 6130 } 6131 6132 /* 6133 * Sum up the statfs info for each of the DSs, so that the client will 6134 * receive the total for all DSs. 6135 */ 6136 static int 6137 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) 6138 { 6139 struct statfs *tsf; 6140 struct nfsdevice *ds; 6141 struct vnode **dvpp, **tdvpp, *dvp; 6142 uint64_t tot; 6143 int cnt, error = 0, i; 6144 6145 if (nfsrv_devidcnt <= 0) 6146 return (ENXIO); 6147 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 6148 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 6149 6150 /* Get an array of the dvps for the DSs. */ 6151 tdvpp = dvpp; 6152 i = 0; 6153 NFSDDSLOCK(); 6154 /* First, search for matches for same file system. */ 6155 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6156 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && 6157 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { 6158 if (++i > nfsrv_devidcnt) 6159 break; 6160 *tdvpp++ = ds->nfsdev_dvp; 6161 } 6162 } 6163 /* 6164 * If no matches for same file system, total all servers not assigned 6165 * to a file system. 6166 */ 6167 if (i == 0) { 6168 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6169 if (ds->nfsdev_nmp != NULL && 6170 ds->nfsdev_mdsisset == 0) { 6171 if (++i > nfsrv_devidcnt) 6172 break; 6173 *tdvpp++ = ds->nfsdev_dvp; 6174 } 6175 } 6176 } 6177 NFSDDSUNLOCK(); 6178 cnt = i; 6179 6180 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 6181 tdvpp = dvpp; 6182 for (i = 0; i < cnt && error == 0; i++) { 6183 dvp = *tdvpp++; 6184 error = VFS_STATFS(dvp->v_mount, tsf); 6185 if (error == 0) { 6186 if (sf->f_bsize == 0) { 6187 if (tsf->f_bsize > 0) 6188 sf->f_bsize = tsf->f_bsize; 6189 else 6190 sf->f_bsize = 8192; 6191 } 6192 if (tsf->f_blocks > 0) { 6193 if (sf->f_bsize != tsf->f_bsize) { 6194 tot = tsf->f_blocks * tsf->f_bsize; 6195 sf->f_blocks += (tot / sf->f_bsize); 6196 } else 6197 sf->f_blocks += tsf->f_blocks; 6198 } 6199 if (tsf->f_bfree > 0) { 6200 if (sf->f_bsize != tsf->f_bsize) { 6201 tot = tsf->f_bfree * tsf->f_bsize; 6202 sf->f_bfree += (tot / sf->f_bsize); 6203 } else 6204 sf->f_bfree += tsf->f_bfree; 6205 } 6206 if (tsf->f_bavail > 0) { 6207 if (sf->f_bsize != tsf->f_bsize) { 6208 tot = tsf->f_bavail * tsf->f_bsize; 6209 sf->f_bavail += (tot / sf->f_bsize); 6210 } else 6211 sf->f_bavail += tsf->f_bavail; 6212 } 6213 } 6214 } 6215 free(tsf, M_TEMP); 6216 free(dvpp, M_TEMP); 6217 return (error); 6218 } 6219 6220 /* 6221 * Set an NFSv4 acl. 6222 */ 6223 int 6224 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 6225 { 6226 int error; 6227 6228 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 6229 error = NFSERR_ATTRNOTSUPP; 6230 goto out; 6231 } 6232 /* 6233 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 6234 * Make sure it has enough room for that - splitting every entry 6235 * into two and appending "canonical six" entries at the end. 6236 * Cribbed out of kern/vfs_acl.c - Rick M. 6237 */ 6238 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 6239 error = NFSERR_ATTRNOTSUPP; 6240 goto out; 6241 } 6242 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 6243 if (error == 0) { 6244 error = nfsrv_dssetacl(vp, aclp, cred, p); 6245 if (error == ENOENT) 6246 error = 0; 6247 } 6248 6249 out: 6250 NFSEXITCODE(error); 6251 return (error); 6252 } 6253 6254 /* 6255 * Seek vnode op call (actually it is a VOP_IOCTL()). 6256 * This function is called with the vnode locked, but unlocks and vrele()s 6257 * the vp before returning. 6258 */ 6259 int 6260 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, 6261 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) 6262 { 6263 struct nfsvattr at; 6264 int error, ret; 6265 6266 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); 6267 /* 6268 * Attempt to seek on a DS file. A return of ENOENT implies 6269 * there is no DS file to seek on. 6270 */ 6271 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, 6272 NULL, NULL, NULL, NULL, offp, content, eofp); 6273 if (error != ENOENT) { 6274 vput(vp); 6275 return (error); 6276 } 6277 6278 /* 6279 * Do the VOP_IOCTL() call. For the case where *offp == file_size, 6280 * VOP_IOCTL() will return ENXIO. However, the correct reply for 6281 * NFSv4.2 is *eofp == true and error == 0 for this case. 6282 */ 6283 NFSVOPUNLOCK(vp); 6284 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); 6285 *eofp = false; 6286 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { 6287 /* Handle the cases where we might be at EOF. */ 6288 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); 6289 if (ret == 0 && *offp == at.na_size) { 6290 *eofp = true; 6291 error = 0; 6292 } 6293 if (ret != 0 && error == 0) 6294 error = ret; 6295 } 6296 vrele(vp); 6297 NFSEXITCODE(error); 6298 return (error); 6299 } 6300 6301 /* 6302 * Allocate vnode op call. 6303 */ 6304 int 6305 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6306 NFSPROC_T *p) 6307 { 6308 int error, trycnt; 6309 6310 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); 6311 /* 6312 * Attempt to allocate on a DS file. A return of ENOENT implies 6313 * there is no DS file to allocate on. 6314 */ 6315 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, 6316 NULL, NULL, NULL, NULL, &len, 0, NULL); 6317 if (error != ENOENT) 6318 return (error); 6319 error = 0; 6320 6321 /* 6322 * Do the actual VOP_ALLOCATE(), looping a reasonable number of 6323 * times to achieve completion. 6324 */ 6325 trycnt = 0; 6326 while (error == 0 && len > 0 && trycnt++ < 20) 6327 error = VOP_ALLOCATE(vp, &off, &len); 6328 if (error == 0 && len > 0) 6329 error = NFSERR_IO; 6330 NFSEXITCODE(error); 6331 return (error); 6332 } 6333 6334 /* 6335 * Get Extended Atribute vnode op into an mbuf list. 6336 */ 6337 int 6338 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, 6339 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, 6340 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 6341 { 6342 struct iovec *iv; 6343 struct uio io, *uiop = &io; 6344 struct mbuf *m, *m2; 6345 int alen, error, len, tlen; 6346 size_t siz; 6347 6348 /* First, find out the size of the extended attribute. */ 6349 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6350 &siz, cred, p); 6351 if (error != 0) 6352 return (NFSERR_NOXATTR); 6353 if (siz > maxresp - NFS_MAXXDR) 6354 return (NFSERR_XATTR2BIG); 6355 len = siz; 6356 tlen = NFSM_RNDUP(len); 6357 if (tlen > 0) { 6358 /* 6359 * If cnt > MCLBYTES and the reply will not be saved, use 6360 * ext_pgs mbufs for TLS. 6361 * For NFSv4.0, we do not know for sure if the reply will 6362 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 6363 * Always use ext_pgs mbufs if ND_EXTPG is set. 6364 */ 6365 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && 6366 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && 6367 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) 6368 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, 6369 maxextsiz, &m, &m2, &iv); 6370 else 6371 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, 6372 &iv); 6373 uiop->uio_iov = iv; 6374 } else { 6375 uiop->uio_iovcnt = 0; 6376 uiop->uio_iov = iv = NULL; 6377 m = m2 = NULL; 6378 } 6379 uiop->uio_offset = 0; 6380 uiop->uio_resid = tlen; 6381 uiop->uio_rw = UIO_READ; 6382 uiop->uio_segflg = UIO_SYSSPACE; 6383 uiop->uio_td = p; 6384 #ifdef MAC 6385 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6386 name); 6387 if (error != 0) 6388 goto out; 6389 #endif 6390 6391 if (tlen > 0) 6392 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6393 NULL, cred, p); 6394 if (error != 0) 6395 goto out; 6396 if (uiop->uio_resid > 0) { 6397 alen = tlen; 6398 len = tlen - uiop->uio_resid; 6399 tlen = NFSM_RNDUP(len); 6400 if (alen != tlen) 6401 printf("nfsvno_getxattr: weird size read\n"); 6402 if (tlen == 0) { 6403 m_freem(m); 6404 m = m2 = NULL; 6405 } else if (alen != tlen || tlen != len) 6406 m2 = nfsrv_adj(m, alen - tlen, tlen - len); 6407 } 6408 *lenp = len; 6409 *mpp = m; 6410 *mpendp = m2; 6411 6412 out: 6413 if (error != 0) { 6414 if (m != NULL) 6415 m_freem(m); 6416 *lenp = 0; 6417 } 6418 free(iv, M_TEMP); 6419 NFSEXITCODE(error); 6420 return (error); 6421 } 6422 6423 /* 6424 * Set Extended attribute vnode op from an mbuf list. 6425 */ 6426 int 6427 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, 6428 char *cp, struct ucred *cred, struct thread *p) 6429 { 6430 struct iovec *iv; 6431 struct uio uio, *uiop = &uio; 6432 int cnt, error; 6433 6434 error = 0; 6435 #ifdef MAC 6436 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6437 name); 6438 #endif 6439 if (error != 0) 6440 goto out; 6441 6442 uiop->uio_rw = UIO_WRITE; 6443 uiop->uio_segflg = UIO_SYSSPACE; 6444 uiop->uio_td = p; 6445 uiop->uio_offset = 0; 6446 uiop->uio_resid = len; 6447 if (len > 0) { 6448 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); 6449 uiop->uio_iov = iv; 6450 uiop->uio_iovcnt = cnt; 6451 } else { 6452 uiop->uio_iov = iv = NULL; 6453 uiop->uio_iovcnt = 0; 6454 } 6455 if (error == 0) { 6456 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6457 cred, p); 6458 free(iv, M_TEMP); 6459 } 6460 6461 out: 6462 NFSEXITCODE(error); 6463 return (error); 6464 } 6465 6466 /* 6467 * Remove Extended attribute vnode op. 6468 */ 6469 int 6470 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, 6471 struct ucred *cred, struct thread *p) 6472 { 6473 int error; 6474 6475 /* 6476 * Get rid of any delegations. I am not sure why this is required, 6477 * but RFC-8276 says so. 6478 */ 6479 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); 6480 if (error != 0) 6481 goto out; 6482 #ifdef MAC 6483 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6484 name); 6485 if (error != 0) 6486 goto out; 6487 #endif 6488 6489 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); 6490 if (error == EOPNOTSUPP) 6491 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6492 cred, p); 6493 out: 6494 NFSEXITCODE(error); 6495 return (error); 6496 } 6497 6498 /* 6499 * List Extended Atribute vnode op into an mbuf list. 6500 */ 6501 int 6502 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, 6503 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) 6504 { 6505 struct iovec iv; 6506 struct uio io; 6507 int error; 6508 size_t siz; 6509 6510 *bufp = NULL; 6511 /* First, find out the size of the extended attribute. */ 6512 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, 6513 p); 6514 if (error != 0) 6515 return (NFSERR_NOXATTR); 6516 if (siz <= cookie) { 6517 *lenp = 0; 6518 *eofp = true; 6519 goto out; 6520 } 6521 if (siz > cookie + *lenp) { 6522 siz = cookie + *lenp; 6523 *eofp = false; 6524 } else 6525 *eofp = true; 6526 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ 6527 if (siz > 10 * 1024 * 1024) { 6528 error = NFSERR_XATTR2BIG; 6529 goto out; 6530 } 6531 *bufp = malloc(siz, M_TEMP, M_WAITOK); 6532 iv.iov_base = *bufp; 6533 iv.iov_len = siz; 6534 io.uio_iovcnt = 1; 6535 io.uio_iov = &iv; 6536 io.uio_offset = 0; 6537 io.uio_resid = siz; 6538 io.uio_rw = UIO_READ; 6539 io.uio_segflg = UIO_SYSSPACE; 6540 io.uio_td = p; 6541 #ifdef MAC 6542 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); 6543 if (error != 0) 6544 goto out; 6545 #endif 6546 6547 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, 6548 p); 6549 if (error != 0) 6550 goto out; 6551 if (io.uio_resid > 0) 6552 siz -= io.uio_resid; 6553 *lenp = siz; 6554 6555 out: 6556 if (error != 0) { 6557 free(*bufp, M_TEMP); 6558 *bufp = NULL; 6559 } 6560 NFSEXITCODE(error); 6561 return (error); 6562 } 6563 6564 /* 6565 * Trim trailing data off the mbuf list being built. 6566 */ 6567 static void 6568 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, 6569 int bextpg, int bextpgsiz) 6570 { 6571 vm_page_t pg; 6572 int fullpgsiz, i; 6573 6574 if (mb->m_next != NULL) { 6575 m_freem(mb->m_next); 6576 mb->m_next = NULL; 6577 } 6578 if ((mb->m_flags & M_EXTPG) != 0) { 6579 /* First, get rid of any pages after this position. */ 6580 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { 6581 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); 6582 vm_page_unwire_noq(pg); 6583 vm_page_free(pg); 6584 } 6585 mb->m_epg_npgs = bextpg + 1; 6586 if (bextpg == 0) 6587 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; 6588 else 6589 fullpgsiz = PAGE_SIZE; 6590 mb->m_epg_last_len = fullpgsiz - bextpgsiz; 6591 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); 6592 for (i = 1; i < mb->m_epg_npgs; i++) 6593 mb->m_len += m_epg_pagelen(mb, i, 0); 6594 nd->nd_bextpgsiz = bextpgsiz; 6595 nd->nd_bextpg = bextpg; 6596 } else 6597 mb->m_len = bpos - mtod(mb, char *); 6598 nd->nd_mb = mb; 6599 nd->nd_bpos = bpos; 6600 } 6601 6602 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 6603 6604 /* 6605 * Called once to initialize data structures... 6606 */ 6607 static int 6608 nfsd_modevent(module_t mod, int type, void *data) 6609 { 6610 int error = 0, i; 6611 static int loaded = 0; 6612 6613 switch (type) { 6614 case MOD_LOAD: 6615 if (loaded) 6616 goto out; 6617 newnfs_portinit(); 6618 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 6619 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, 6620 MTX_DEF); 6621 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, 6622 MTX_DEF); 6623 } 6624 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 6625 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 6626 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 6627 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 6628 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 6629 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 6630 nfsrvd_initcache(); 6631 nfsd_init(); 6632 NFSD_LOCK(); 6633 nfsrvd_init(0); 6634 NFSD_UNLOCK(); 6635 nfsd_mntinit(); 6636 #ifdef VV_DISABLEDELEG 6637 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 6638 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 6639 #endif 6640 nfsd_call_servertimer = nfsrv_servertimer; 6641 nfsd_call_nfsd = nfssvc_nfsd; 6642 loaded = 1; 6643 break; 6644 6645 case MOD_UNLOAD: 6646 if (newnfs_numnfsd != 0) { 6647 error = EBUSY; 6648 break; 6649 } 6650 6651 #ifdef VV_DISABLEDELEG 6652 vn_deleg_ops.vndeleg_recall = NULL; 6653 vn_deleg_ops.vndeleg_disable = NULL; 6654 #endif 6655 nfsd_call_servertimer = NULL; 6656 nfsd_call_nfsd = NULL; 6657 6658 /* Clean out all NFSv4 state. */ 6659 nfsrv_throwawayallstate(curthread); 6660 6661 /* Clean the NFS server reply cache */ 6662 nfsrvd_cleancache(); 6663 6664 /* Free up the krpc server pool. */ 6665 if (nfsrvd_pool != NULL) 6666 svcpool_destroy(nfsrvd_pool); 6667 6668 /* and get rid of the locks */ 6669 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 6670 mtx_destroy(&nfsrchash_table[i].mtx); 6671 mtx_destroy(&nfsrcahash_table[i].mtx); 6672 } 6673 mtx_destroy(&nfsrc_udpmtx); 6674 mtx_destroy(&nfs_v4root_mutex); 6675 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 6676 mtx_destroy(&nfsrv_dontlistlock_mtx); 6677 mtx_destroy(&nfsrv_recalllock_mtx); 6678 for (i = 0; i < nfsrv_sessionhashsize; i++) 6679 mtx_destroy(&nfssessionhash[i].mtx); 6680 if (nfslayouthash != NULL) { 6681 for (i = 0; i < nfsrv_layouthashsize; i++) 6682 mtx_destroy(&nfslayouthash[i].mtx); 6683 free(nfslayouthash, M_NFSDSESSION); 6684 } 6685 lockdestroy(&nfsv4root_mnt.mnt_explock); 6686 free(nfsclienthash, M_NFSDCLIENT); 6687 free(nfslockhash, M_NFSDLOCKFILE); 6688 free(nfssessionhash, M_NFSDSESSION); 6689 loaded = 0; 6690 break; 6691 default: 6692 error = EOPNOTSUPP; 6693 break; 6694 } 6695 6696 out: 6697 NFSEXITCODE(error); 6698 return (error); 6699 } 6700 static moduledata_t nfsd_mod = { 6701 "nfsd", 6702 nfsd_modevent, 6703 NULL, 6704 }; 6705 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 6706 6707 /* So that loader and kldload(2) can find us, wherever we are.. */ 6708 MODULE_VERSION(nfsd, 1); 6709 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 6710 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 6711 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 6712 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 6713