1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/capsicum.h> 37 #include <sys/extattr.h> 38 39 /* 40 * Functions that perform the vfs operations required by the routines in 41 * nfsd_serv.c. It is hoped that this change will make the server more 42 * portable. 43 */ 44 45 #include <fs/nfs/nfsport.h> 46 #include <security/mac/mac_framework.h> 47 #include <sys/callout.h> 48 #include <sys/filio.h> 49 #include <sys/hash.h> 50 #include <sys/osd.h> 51 #include <sys/sysctl.h> 52 #include <nlm/nlm_prot.h> 53 #include <nlm/nlm.h> 54 #include <vm/vm_param.h> 55 #include <vm/vnode_pager.h> 56 57 FEATURE(nfsd, "NFSv4 server"); 58 59 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 60 extern int nfsrv_useacl; 61 extern int newnfs_numnfsd; 62 extern int nfsrv_sessionhashsize; 63 extern struct nfslayouthash *nfslayouthash; 64 extern int nfsrv_layouthashsize; 65 extern struct mtx nfsrv_dslock_mtx; 66 extern int nfs_pnfsiothreads; 67 extern volatile int nfsrv_devidcnt; 68 extern int nfsrv_maxpnfsmirror; 69 extern uint32_t nfs_srvmaxio; 70 extern int nfs_bufpackets; 71 extern u_long sb_max_adj; 72 extern struct nfsv4lock nfsv4rootfs_lock; 73 74 NFSD_VNET_DECLARE(int, nfsrv_numnfsd); 75 NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); 76 NFSD_VNET_DECLARE(SVCPOOL *, nfsrvd_pool); 77 NFSD_VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); 78 NFSD_VNET_DECLARE(struct nfslockhashhead *, nfslockhash); 79 NFSD_VNET_DECLARE(struct nfssessionhash *, nfssessionhash); 80 NFSD_VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock); 81 NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); 82 83 NFSDLOCKMUTEX; 84 NFSSTATESPINLOCK; 85 struct mtx nfsrc_udpmtx; 86 struct mtx nfs_v4root_mutex; 87 struct mtx nfsrv_dontlistlock_mtx; 88 struct mtx nfsrv_recalllock_mtx; 89 struct nfsrvfh nfs_pubfh; 90 int nfs_pubfhset = 0; 91 int nfsd_debuglevel = 0; 92 static pid_t nfsd_master_pid = (pid_t)-1; 93 static char nfsd_master_comm[MAXCOMLEN + 1]; 94 static struct timeval nfsd_master_start; 95 static uint32_t nfsv4_sysid = 0; 96 static fhandle_t zerofh; 97 98 NFSD_VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL; 99 NFSD_VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl); 100 NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table); 101 NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table); 102 NFSD_VNET_DEFINE(struct nfsrvfh, nfs_rootfh); 103 NFSD_VNET_DEFINE(int, nfs_rootfhset) = 0; 104 NFSD_VNET_DEFINE(struct callout, nfsd_callout); 105 NFSD_VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt); 106 NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt); 107 NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt); 108 NFSD_VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false; 109 NFSD_VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false; 110 111 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 112 struct ucred *); 113 static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *); 114 115 int nfsrv_enable_crossmntpt = 1; 116 static int nfs_commit_blks; 117 static int nfs_commit_miss; 118 extern int nfsrv_issuedelegs; 119 extern int nfsrv_dolocallocks; 120 extern struct nfsdevicehead nfsrv_devidhead; 121 122 /* Map d_type to vnode type. */ 123 static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR, 124 VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON }; 125 126 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, 127 struct iovec **); 128 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, 129 struct mbuf **, struct iovec **); 130 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, 131 int *); 132 static void nfs_dtypetovtype(struct nfsvattr *, struct vnode *, uint8_t); 133 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 134 NFSPROC_T *); 135 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 136 int *, char *, fhandle_t *); 137 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 138 NFSPROC_T *); 139 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, 140 struct thread *, int, struct mbuf **, char *, struct mbuf **, 141 struct nfsvattr *, struct acl *, off_t *, int, bool *); 142 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 143 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 144 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 145 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 146 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 147 char *, int *); 148 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 149 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 150 static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 151 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 152 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 153 struct vnode *, struct nfsmount **, int, struct acl *, int *); 154 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 155 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 156 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 157 struct vnode *, struct nfsmount *, struct nfsvattr *); 158 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, 159 NFSPROC_T *, struct nfsmount *); 160 static int nfsrv_putfhname(fhandle_t *, char *); 161 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 162 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 163 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, 164 struct vnode *, NFSPROC_T *); 165 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 166 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 167 NFSPROC_T *); 168 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); 169 170 int nfs_pnfsio(task_fn_t *, void *); 171 172 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 173 "NFS server"); 174 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 175 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 176 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 177 0, ""); 178 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 179 0, ""); 180 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 181 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 182 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 183 0, "Debug level for NFS server"); 184 NFSD_VNET_DECLARE(int, nfsd_enable_stringtouid); 185 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, 186 CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_enable_stringtouid), 187 0, "Enable nfsd to accept numeric owner_names"); 188 static int nfsrv_pnfsgetdsattr = 1; 189 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 190 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 191 static bool nfsrv_recalldeleg = false; 192 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, recalldeleg, CTLFLAG_RW, 193 &nfsrv_recalldeleg, 0, 194 "When set remove/rename recalls delegations for same client"); 195 196 /* 197 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 198 * not running. 199 * The dsN subdirectories for the increased values must have been created 200 * on all DS servers before this increase is done. 201 */ 202 u_int nfsrv_dsdirsize = 20; 203 static int 204 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 205 { 206 int error, newdsdirsize; 207 208 newdsdirsize = nfsrv_dsdirsize; 209 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 210 if (error != 0 || req->newptr == NULL) 211 return (error); 212 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 213 newnfs_numnfsd != 0) 214 return (EINVAL); 215 nfsrv_dsdirsize = newdsdirsize; 216 return (0); 217 } 218 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, 219 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), 220 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); 221 222 /* 223 * nfs_srvmaxio can only be increased and only when the nfsd threads are 224 * not running. The setting must be a power of 2, with the current limit of 225 * 1Mbyte. 226 */ 227 static int 228 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS) 229 { 230 int error; 231 u_int newsrvmaxio; 232 uint64_t tval; 233 234 newsrvmaxio = nfs_srvmaxio; 235 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req); 236 if (error != 0 || req->newptr == NULL) 237 return (error); 238 if (newsrvmaxio == nfs_srvmaxio) 239 return (0); 240 if (newsrvmaxio < nfs_srvmaxio) { 241 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n"); 242 return (EINVAL); 243 } 244 if (newsrvmaxio > 1048576) { 245 printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n"); 246 return (EINVAL); 247 } 248 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) { 249 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n"); 250 return (EINVAL); 251 } 252 253 /* 254 * Check that kern.ipc.maxsockbuf is large enough for 255 * newsrviomax, given the setting of vfs.nfs.bufpackets. 256 */ 257 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets > 258 sb_max_adj) { 259 /* 260 * Suggest vfs.nfs.bufpackets * maximum RPC message for 261 * sb_max_adj. 262 */ 263 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets; 264 265 /* 266 * Convert suggested sb_max_adj value to a suggested 267 * sb_max value, which is what is set via kern.ipc.maxsockbuf. 268 * Perform the inverse calculation of (from uipc_sockbuf.c): 269 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES / 270 * (MSIZE + MCLBYTES); 271 * XXX If the calculation of sb_max_adj from sb_max changes, 272 * this calculation must be changed as well. 273 */ 274 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */ 275 tval += MCLBYTES - 1; /* Round up divide. */ 276 tval /= MCLBYTES; 277 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of " 278 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval, 279 newsrvmaxio); 280 return (EINVAL); 281 } 282 283 NFSD_LOCK(); 284 if (newnfs_numnfsd != 0) { 285 NFSD_UNLOCK(); 286 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd " 287 "threads are running\n"); 288 return (EINVAL); 289 } 290 291 292 nfs_srvmaxio = newsrvmaxio; 293 NFSD_UNLOCK(); 294 return (0); 295 } 296 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio, 297 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 298 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes"); 299 300 static int 301 sysctl_dolocallocks(SYSCTL_HANDLER_ARGS) 302 { 303 int error, igotlock, newdolocallocks; 304 305 newdolocallocks = nfsrv_dolocallocks; 306 error = sysctl_handle_int(oidp, &newdolocallocks, 0, req); 307 if (error != 0 || req->newptr == NULL) 308 return (error); 309 if (newdolocallocks == nfsrv_dolocallocks) 310 return (0); 311 if (jailed(curthread->td_ucred)) 312 return (EINVAL); 313 314 NFSLOCKV4ROOTMUTEX(); 315 do { 316 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, 317 NFSV4ROOTLOCKMUTEXPTR, NULL); 318 } while (!igotlock); 319 NFSUNLOCKV4ROOTMUTEX(); 320 321 nfsrv_dolocallocks = newdolocallocks; 322 323 NFSLOCKV4ROOTMUTEX(); 324 nfsv4_unlock(&nfsv4rootfs_lock, 0); 325 NFSUNLOCKV4ROOTMUTEX(); 326 return (0); 327 } 328 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, enable_locallocks, 329 CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 330 sysctl_dolocallocks, "IU", "Enable nfsd to acquire local locks on files"); 331 332 #define MAX_REORDERED_RPC 16 333 #define NUM_HEURISTIC 1031 334 #define NHUSE_INIT 64 335 #define NHUSE_INC 16 336 #define NHUSE_MAX 2048 337 338 static struct nfsheur { 339 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 340 off_t nh_nextoff; /* next offset for sequential detection */ 341 int nh_use; /* use count for selection */ 342 int nh_seqcount; /* heuristic */ 343 } nfsheur[NUM_HEURISTIC]; 344 345 /* 346 * Heuristic to detect sequential operation. 347 */ 348 static struct nfsheur * 349 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 350 { 351 struct nfsheur *nh; 352 int hi, try; 353 354 /* Locate best candidate. */ 355 try = 32; 356 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 357 nh = &nfsheur[hi]; 358 while (try--) { 359 if (nfsheur[hi].nh_vp == vp) { 360 nh = &nfsheur[hi]; 361 break; 362 } 363 if (nfsheur[hi].nh_use > 0) 364 --nfsheur[hi].nh_use; 365 hi = (hi + 1) % NUM_HEURISTIC; 366 if (nfsheur[hi].nh_use < nh->nh_use) 367 nh = &nfsheur[hi]; 368 } 369 370 /* Initialize hint if this is a new file. */ 371 if (nh->nh_vp != vp) { 372 nh->nh_vp = vp; 373 nh->nh_nextoff = uio->uio_offset; 374 nh->nh_use = NHUSE_INIT; 375 if (uio->uio_offset == 0) 376 nh->nh_seqcount = 4; 377 else 378 nh->nh_seqcount = 1; 379 } 380 381 /* Calculate heuristic. */ 382 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 383 uio->uio_offset == nh->nh_nextoff) { 384 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 385 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 386 if (nh->nh_seqcount > IO_SEQMAX) 387 nh->nh_seqcount = IO_SEQMAX; 388 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 389 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 390 /* Probably a reordered RPC, leave seqcount alone. */ 391 } else if (nh->nh_seqcount > 1) { 392 nh->nh_seqcount /= 2; 393 } else { 394 nh->nh_seqcount = 0; 395 } 396 nh->nh_use += NHUSE_INC; 397 if (nh->nh_use > NHUSE_MAX) 398 nh->nh_use = NHUSE_MAX; 399 return (nh); 400 } 401 402 /* 403 * Get attributes into nfsvattr structure. 404 */ 405 int 406 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 407 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 408 nfsattrbit_t *attrbitp) 409 { 410 int error, gotattr, lockedit = 0; 411 struct nfsvattr na; 412 413 if (vpislocked == 0) { 414 /* 415 * When vpislocked == 0, the vnode is either exclusively 416 * locked by this thread or not locked by this thread. 417 * As such, shared lock it, if not exclusively locked. 418 */ 419 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 420 lockedit = 1; 421 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 422 } 423 } 424 425 /* 426 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed 427 * attributes, as required. 428 * This needs to be done for regular files if: 429 * - non-NFSv4 RPCs or 430 * - when attrbitp == NULL or 431 * - an NFSv4 RPC with any of the above attributes in attrbitp. 432 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 433 * these attributes. nfsrv_proxyds() will return an error if the 434 * server is not a pNFS one. 435 */ 436 gotattr = 0; 437 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 438 (nd->nd_flag & ND_NFSV4) == 0 || 439 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 440 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 441 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 442 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || 443 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { 444 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, 445 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, 446 NULL); 447 if (error == 0) 448 gotattr = 1; 449 } 450 451 nvap->na_bsdflags = 0; 452 nvap->na_flags = 0; 453 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 454 if (lockedit != 0) 455 NFSVOPUNLOCK(vp); 456 457 /* 458 * If we got the Change, Size and Modify Time from the DS, 459 * replace them. 460 */ 461 if (gotattr != 0) { 462 nvap->na_atime = na.na_atime; 463 nvap->na_mtime = na.na_mtime; 464 nvap->na_filerev = na.na_filerev; 465 nvap->na_size = na.na_size; 466 nvap->na_bytes = na.na_bytes; 467 } 468 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 469 error, (uintmax_t)na.na_filerev); 470 471 NFSEXITCODE(error); 472 return (error); 473 } 474 475 /* 476 * Get a file handle for a vnode. 477 */ 478 int 479 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 480 { 481 int error; 482 483 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 484 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 485 error = VOP_VPTOFH(vp, &fhp->fh_fid); 486 487 NFSEXITCODE(error); 488 return (error); 489 } 490 491 /* 492 * Perform access checking for vnodes obtained from file handles that would 493 * refer to files already opened by a Unix client. You cannot just use 494 * vn_writechk() and VOP_ACCESSX() for two reasons. 495 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 496 * case. 497 * 2 - The owner is to be given access irrespective of mode bits for some 498 * operations, so that processes that chmod after opening a file don't 499 * break. 500 */ 501 int 502 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 503 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 504 u_int32_t *supportedtypep) 505 { 506 struct vattr vattr; 507 int error = 0, getret = 0; 508 509 if (vpislocked == 0) { 510 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 511 error = EPERM; 512 goto out; 513 } 514 } 515 if (accmode & VWRITE) { 516 /* Just vn_writechk() changed to check rdonly */ 517 /* 518 * Disallow write attempts on read-only file systems; 519 * unless the file is a socket or a block or character 520 * device resident on the file system. 521 */ 522 if (NFSVNO_EXRDONLY(exp) || 523 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 524 switch (vp->v_type) { 525 case VREG: 526 case VDIR: 527 case VLNK: 528 error = EROFS; 529 default: 530 break; 531 } 532 } 533 /* 534 * If there's shared text associated with 535 * the inode, try to free it up once. If 536 * we fail, we can't allow writing. 537 */ 538 if (VOP_IS_TEXT(vp) && error == 0) 539 error = ETXTBSY; 540 } 541 if (error != 0) { 542 if (vpislocked == 0) 543 NFSVOPUNLOCK(vp); 544 goto out; 545 } 546 547 /* 548 * Should the override still be applied when ACLs are enabled? 549 */ 550 error = VOP_ACCESSX(vp, accmode, cred, p); 551 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 552 /* 553 * Try again with VEXPLICIT_DENY, to see if the test for 554 * deletion is supported. 555 */ 556 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 557 if (error == 0) { 558 if (vp->v_type == VDIR) { 559 accmode &= ~(VDELETE | VDELETE_CHILD); 560 accmode |= VWRITE; 561 error = VOP_ACCESSX(vp, accmode, cred, p); 562 } else if (supportedtypep != NULL) { 563 *supportedtypep &= ~NFSACCESS_DELETE; 564 } 565 } 566 } 567 568 /* 569 * Allow certain operations for the owner (reads and writes 570 * on files that are already open). 571 */ 572 if (override != NFSACCCHK_NOOVERRIDE && 573 (error == EPERM || error == EACCES)) { 574 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 575 error = 0; 576 else if (override & NFSACCCHK_ALLOWOWNER) { 577 getret = VOP_GETATTR(vp, &vattr, cred); 578 if (getret == 0 && cred->cr_uid == vattr.va_uid) 579 error = 0; 580 } 581 } 582 if (vpislocked == 0) 583 NFSVOPUNLOCK(vp); 584 585 out: 586 NFSEXITCODE(error); 587 return (error); 588 } 589 590 /* 591 * Set attribute(s) vnop. 592 */ 593 int 594 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 595 struct thread *p, struct nfsexstuff *exp) 596 { 597 u_quad_t savsize = 0; 598 int error, savedit; 599 time_t savbtime; 600 601 /* 602 * If this is an exported file system and a pNFS service is running, 603 * don't VOP_SETATTR() of size for the MDS file system. 604 */ 605 savedit = 0; 606 error = 0; 607 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && 608 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && 609 nvap->na_vattr.va_size > 0) { 610 savsize = nvap->na_vattr.va_size; 611 nvap->na_vattr.va_size = VNOVAL; 612 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 613 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 614 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 615 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 616 nvap->na_vattr.va_mtime.tv_sec != VNOVAL) 617 savedit = 1; 618 else 619 savedit = 2; 620 } 621 if (savedit != 2) 622 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 623 if (savedit != 0) 624 nvap->na_vattr.va_size = savsize; 625 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 626 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 627 nvap->na_vattr.va_size != VNOVAL || 628 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 629 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 630 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 631 /* Never modify birthtime on a DS file. */ 632 savbtime = nvap->na_vattr.va_birthtime.tv_sec; 633 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; 634 /* For a pNFS server, set the attributes on the DS file. */ 635 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, 636 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); 637 nvap->na_vattr.va_birthtime.tv_sec = savbtime; 638 if (error == ENOENT) 639 error = 0; 640 } 641 NFSEXITCODE(error); 642 return (error); 643 } 644 645 /* 646 * Set up nameidata for a lookup() call and do it. 647 */ 648 int 649 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 650 struct vnode *dp, int islocked, struct nfsexstuff *exp, 651 struct vnode **retdirp) 652 { 653 struct componentname *cnp = &ndp->ni_cnd; 654 int i; 655 struct iovec aiov; 656 struct uio auio; 657 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 658 int error = 0; 659 char *cp; 660 661 *retdirp = NULL; 662 cnp->cn_nameptr = cnp->cn_pnbuf; 663 ndp->ni_lcf = 0; 664 /* 665 * Extract and set starting directory. 666 */ 667 if (dp->v_type != VDIR) { 668 if (islocked) 669 vput(dp); 670 else 671 vrele(dp); 672 nfsvno_relpathbuf(ndp); 673 error = ENOTDIR; 674 goto out1; 675 } 676 if (islocked) 677 NFSVOPUNLOCK(dp); 678 VREF(dp); 679 *retdirp = dp; 680 if (NFSVNO_EXRDONLY(exp)) 681 cnp->cn_flags |= RDONLY; 682 ndp->ni_segflg = UIO_SYSSPACE; 683 684 if (nd->nd_flag & ND_PUBLOOKUP) { 685 ndp->ni_loopcnt = 0; 686 if (cnp->cn_pnbuf[0] == '/') { 687 vrele(dp); 688 /* 689 * Check for degenerate pathnames here, since lookup() 690 * panics on them. 691 */ 692 for (i = 1; i < ndp->ni_pathlen; i++) 693 if (cnp->cn_pnbuf[i] != '/') 694 break; 695 if (i == ndp->ni_pathlen) { 696 error = NFSERR_ACCES; 697 goto out; 698 } 699 dp = rootvnode; 700 VREF(dp); 701 } 702 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 703 (nd->nd_flag & ND_NFSV4) == 0) { 704 /* 705 * Only cross mount points for NFSv4 when doing a 706 * mount while traversing the file system above 707 * the mount point, unless nfsrv_enable_crossmntpt is set. 708 */ 709 cnp->cn_flags |= NOCROSSMOUNT; 710 } 711 712 /* 713 * Initialize for scan, set ni_startdir and bump ref on dp again 714 * because lookup() will dereference ni_startdir. 715 */ 716 717 ndp->ni_startdir = dp; 718 ndp->ni_rootdir = rootvnode; 719 ndp->ni_topdir = NULL; 720 721 if (!lockleaf) 722 cnp->cn_flags |= LOCKLEAF; 723 for (;;) { 724 cnp->cn_nameptr = cnp->cn_pnbuf; 725 /* 726 * Call lookup() to do the real work. If an error occurs, 727 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 728 * we do not have to dereference anything before returning. 729 * In either case ni_startdir will be dereferenced and NULLed 730 * out. 731 */ 732 error = vfs_lookup(ndp); 733 if (error) 734 break; 735 736 /* 737 * Check for encountering a symbolic link. Trivial 738 * termination occurs if no symlink encountered. 739 */ 740 if ((cnp->cn_flags & ISSYMLINK) == 0) { 741 if (ndp->ni_vp && !lockleaf) 742 NFSVOPUNLOCK(ndp->ni_vp); 743 break; 744 } 745 746 /* 747 * Validate symlink 748 */ 749 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 750 NFSVOPUNLOCK(ndp->ni_dvp); 751 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 752 error = EINVAL; 753 goto badlink2; 754 } 755 756 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 757 error = ELOOP; 758 goto badlink2; 759 } 760 if (ndp->ni_pathlen > 1) 761 cp = uma_zalloc(namei_zone, M_WAITOK); 762 else 763 cp = cnp->cn_pnbuf; 764 aiov.iov_base = cp; 765 aiov.iov_len = MAXPATHLEN; 766 auio.uio_iov = &aiov; 767 auio.uio_iovcnt = 1; 768 auio.uio_offset = 0; 769 auio.uio_rw = UIO_READ; 770 auio.uio_segflg = UIO_SYSSPACE; 771 auio.uio_td = NULL; 772 auio.uio_resid = MAXPATHLEN; 773 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 774 if (error) { 775 badlink1: 776 if (ndp->ni_pathlen > 1) 777 uma_zfree(namei_zone, cp); 778 badlink2: 779 vrele(ndp->ni_dvp); 780 vput(ndp->ni_vp); 781 break; 782 } 783 linklen = MAXPATHLEN - auio.uio_resid; 784 if (linklen == 0) { 785 error = ENOENT; 786 goto badlink1; 787 } 788 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 789 error = ENAMETOOLONG; 790 goto badlink1; 791 } 792 793 /* 794 * Adjust or replace path 795 */ 796 if (ndp->ni_pathlen > 1) { 797 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 798 uma_zfree(namei_zone, cnp->cn_pnbuf); 799 cnp->cn_pnbuf = cp; 800 } else 801 cnp->cn_pnbuf[linklen] = '\0'; 802 ndp->ni_pathlen += linklen; 803 804 /* 805 * Cleanup refs for next loop and check if root directory 806 * should replace current directory. Normally ni_dvp 807 * becomes the new base directory and is cleaned up when 808 * we loop. Explicitly null pointers after invalidation 809 * to clarify operation. 810 */ 811 vput(ndp->ni_vp); 812 ndp->ni_vp = NULL; 813 814 if (cnp->cn_pnbuf[0] == '/') { 815 vrele(ndp->ni_dvp); 816 ndp->ni_dvp = ndp->ni_rootdir; 817 VREF(ndp->ni_dvp); 818 } 819 ndp->ni_startdir = ndp->ni_dvp; 820 ndp->ni_dvp = NULL; 821 } 822 if (!lockleaf) 823 cnp->cn_flags &= ~LOCKLEAF; 824 825 out: 826 if (error) { 827 nfsvno_relpathbuf(ndp); 828 ndp->ni_vp = NULL; 829 ndp->ni_dvp = NULL; 830 ndp->ni_startdir = NULL; 831 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 832 ndp->ni_dvp = NULL; 833 } 834 835 out1: 836 NFSEXITCODE2(error, nd); 837 return (error); 838 } 839 840 /* 841 * Set up a pathname buffer and return a pointer to it and, optionally 842 * set a hash pointer. 843 */ 844 void 845 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 846 { 847 struct componentname *cnp = &ndp->ni_cnd; 848 849 cnp->cn_flags |= (NOMACCHECK); 850 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 851 if (hashpp != NULL) 852 *hashpp = NULL; 853 *bufpp = cnp->cn_pnbuf; 854 } 855 856 /* 857 * Release the above path buffer, if not released by nfsvno_namei(). 858 */ 859 void 860 nfsvno_relpathbuf(struct nameidata *ndp) 861 { 862 863 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 864 ndp->ni_cnd.cn_pnbuf = NULL; 865 } 866 867 /* 868 * Readlink vnode op into an mbuf list. 869 */ 870 int 871 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, 872 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 873 { 874 struct iovec *iv; 875 struct uio io, *uiop = &io; 876 struct mbuf *mp, *mp3; 877 int len, tlen, error = 0; 878 879 len = NFS_MAXPATHLEN; 880 if (maxextsiz > 0) 881 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 882 &mp3, &mp, &iv); 883 else 884 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); 885 uiop->uio_iov = iv; 886 uiop->uio_offset = 0; 887 uiop->uio_resid = len; 888 uiop->uio_rw = UIO_READ; 889 uiop->uio_segflg = UIO_SYSSPACE; 890 uiop->uio_td = NULL; 891 error = VOP_READLINK(vp, uiop, cred); 892 free(iv, M_TEMP); 893 if (error) { 894 m_freem(mp3); 895 *lenp = 0; 896 goto out; 897 } 898 if (uiop->uio_resid > 0) { 899 len -= uiop->uio_resid; 900 tlen = NFSM_RNDUP(len); 901 if (tlen == 0) { 902 m_freem(mp3); 903 mp3 = mp = NULL; 904 } else if (tlen != NFS_MAXPATHLEN || tlen != len) 905 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, 906 tlen - len); 907 } 908 *lenp = len; 909 *mpp = mp3; 910 *mpendp = mp; 911 912 out: 913 NFSEXITCODE(error); 914 return (error); 915 } 916 917 /* 918 * Create an mbuf chain and an associated iovec that can be used to Read 919 * or Getextattr of data. 920 * Upon success, return pointers to the first and last mbufs in the chain 921 * plus the malloc'd iovec and its iovlen. 922 */ 923 static int 924 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, 925 struct iovec **ivp) 926 { 927 struct mbuf *m, *m2 = NULL, *m3; 928 struct iovec *iv; 929 int i, left, siz; 930 931 left = len; 932 m3 = NULL; 933 /* 934 * Generate the mbuf list with the uio_iov ref. to it. 935 */ 936 i = 0; 937 while (left > 0) { 938 NFSMGET(m); 939 MCLGET(m, M_WAITOK); 940 m->m_len = 0; 941 siz = min(M_TRAILINGSPACE(m), left); 942 left -= siz; 943 i++; 944 if (m3) 945 m2->m_next = m; 946 else 947 m3 = m; 948 m2 = m; 949 } 950 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 951 m = m3; 952 left = len; 953 i = 0; 954 while (left > 0) { 955 if (m == NULL) 956 panic("nfsrv_createiovec iov"); 957 siz = min(M_TRAILINGSPACE(m), left); 958 if (siz > 0) { 959 iv->iov_base = mtod(m, caddr_t) + m->m_len; 960 iv->iov_len = siz; 961 m->m_len += siz; 962 left -= siz; 963 iv++; 964 i++; 965 } 966 m = m->m_next; 967 } 968 *mpp = m3; 969 *mpendp = m2; 970 return (i); 971 } 972 973 /* 974 * Create an mbuf chain and an associated iovec that can be used to Read 975 * or Getextattr of data. 976 * Upon success, return pointers to the first and last mbufs in the chain 977 * plus the malloc'd iovec and its iovlen. 978 * Same as above, but creates ext_pgs mbuf(s). 979 */ 980 static int 981 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, 982 struct mbuf **mpendp, struct iovec **ivp) 983 { 984 struct mbuf *m, *m2 = NULL, *m3; 985 struct iovec *iv; 986 int i, left, pgno, siz; 987 988 left = len; 989 m3 = NULL; 990 /* 991 * Generate the mbuf list with the uio_iov ref. to it. 992 */ 993 i = 0; 994 while (left > 0) { 995 siz = min(left, maxextsiz); 996 m = mb_alloc_ext_plus_pages(siz, M_WAITOK); 997 left -= siz; 998 i += m->m_epg_npgs; 999 if (m3 != NULL) 1000 m2->m_next = m; 1001 else 1002 m3 = m; 1003 m2 = m; 1004 } 1005 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 1006 m = m3; 1007 left = len; 1008 i = 0; 1009 pgno = 0; 1010 while (left > 0) { 1011 if (m == NULL) 1012 panic("nfsvno_createiovec_extpgs iov"); 1013 siz = min(PAGE_SIZE, left); 1014 if (siz > 0) { 1015 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); 1016 iv->iov_len = siz; 1017 m->m_len += siz; 1018 if (pgno == m->m_epg_npgs - 1) 1019 m->m_epg_last_len = siz; 1020 left -= siz; 1021 iv++; 1022 i++; 1023 pgno++; 1024 } 1025 if (pgno == m->m_epg_npgs && left > 0) { 1026 m = m->m_next; 1027 if (m == NULL) 1028 panic("nfsvno_createiovec_extpgs iov"); 1029 pgno = 0; 1030 } 1031 } 1032 *mpp = m3; 1033 *mpendp = m2; 1034 return (i); 1035 } 1036 1037 /* 1038 * Read vnode op call into mbuf list. 1039 */ 1040 int 1041 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 1042 int maxextsiz, struct thread *p, struct mbuf **mpp, 1043 struct mbuf **mpendp) 1044 { 1045 struct mbuf *m; 1046 struct iovec *iv; 1047 int error = 0, len, tlen, ioflag = 0; 1048 struct mbuf *m3; 1049 struct uio io, *uiop = &io; 1050 struct nfsheur *nh; 1051 1052 /* 1053 * Attempt to read from a DS file. A return of ENOENT implies 1054 * there is no DS file to read. 1055 */ 1056 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 1057 NULL, mpendp, NULL, NULL, NULL, 0, NULL); 1058 if (error != ENOENT) 1059 return (error); 1060 1061 len = NFSM_RNDUP(cnt); 1062 if (maxextsiz > 0) 1063 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 1064 &m3, &m, &iv); 1065 else 1066 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); 1067 uiop->uio_iov = iv; 1068 uiop->uio_offset = off; 1069 uiop->uio_resid = len; 1070 uiop->uio_rw = UIO_READ; 1071 uiop->uio_segflg = UIO_SYSSPACE; 1072 uiop->uio_td = NULL; 1073 nh = nfsrv_sequential_heuristic(uiop, vp); 1074 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 1075 /* XXX KDM make this more systematic? */ 1076 NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid; 1077 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 1078 free(iv, M_TEMP); 1079 if (error) { 1080 m_freem(m3); 1081 *mpp = NULL; 1082 goto out; 1083 } 1084 nh->nh_nextoff = uiop->uio_offset; 1085 tlen = len - uiop->uio_resid; 1086 cnt = cnt < tlen ? cnt : tlen; 1087 tlen = NFSM_RNDUP(cnt); 1088 if (tlen == 0) { 1089 m_freem(m3); 1090 m3 = m = NULL; 1091 } else if (len != tlen || tlen != cnt) 1092 m = nfsrv_adj(m3, len - tlen, tlen - cnt); 1093 *mpp = m3; 1094 *mpendp = m; 1095 1096 out: 1097 NFSEXITCODE(error); 1098 return (error); 1099 } 1100 1101 /* 1102 * Create the iovec for the mbuf chain passed in as an argument. 1103 * The "cp" argument is where the data starts within the first mbuf in 1104 * the chain. It returns the iovec and the iovcnt. 1105 */ 1106 static int 1107 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, 1108 int *iovcntp) 1109 { 1110 struct mbuf *mp; 1111 struct iovec *ivp; 1112 int cnt, i, len; 1113 1114 /* 1115 * Loop through the mbuf chain, counting how many mbufs are a 1116 * part of this write operation, so the iovec size is known. 1117 */ 1118 cnt = 0; 1119 len = retlen; 1120 mp = m; 1121 i = mtod(mp, caddr_t) + mp->m_len - cp; 1122 while (len > 0) { 1123 if (i > 0) { 1124 len -= i; 1125 cnt++; 1126 } 1127 mp = mp->m_next; 1128 if (!mp) { 1129 if (len > 0) 1130 return (EBADRPC); 1131 } else 1132 i = mp->m_len; 1133 } 1134 1135 /* Now, create the iovec. */ 1136 mp = m; 1137 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 1138 M_WAITOK); 1139 *iovcntp = cnt; 1140 i = mtod(mp, caddr_t) + mp->m_len - cp; 1141 len = retlen; 1142 while (len > 0) { 1143 if (mp == NULL) 1144 panic("nfsrv_createiovecw"); 1145 if (i > 0) { 1146 i = min(i, len); 1147 ivp->iov_base = cp; 1148 ivp->iov_len = i; 1149 ivp++; 1150 len -= i; 1151 } 1152 mp = mp->m_next; 1153 if (mp) { 1154 i = mp->m_len; 1155 cp = mtod(mp, caddr_t); 1156 } 1157 } 1158 return (0); 1159 } 1160 1161 /* 1162 * Write vnode op from an mbuf list. 1163 */ 1164 int 1165 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, 1166 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 1167 { 1168 struct iovec *iv; 1169 int cnt, ioflags, error; 1170 struct uio io, *uiop = &io; 1171 struct nfsheur *nh; 1172 1173 /* 1174 * Attempt to write to a DS file. A return of ENOENT implies 1175 * there is no DS file to write. 1176 */ 1177 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, 1178 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); 1179 if (error != ENOENT) { 1180 *stable = NFSWRITE_FILESYNC; 1181 return (error); 1182 } 1183 1184 if (*stable == NFSWRITE_UNSTABLE) 1185 ioflags = IO_NODELOCKED; 1186 else 1187 ioflags = (IO_SYNC | IO_NODELOCKED); 1188 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); 1189 if (error != 0) 1190 return (error); 1191 uiop->uio_iov = iv; 1192 uiop->uio_iovcnt = cnt; 1193 uiop->uio_resid = retlen; 1194 uiop->uio_rw = UIO_WRITE; 1195 uiop->uio_segflg = UIO_SYSSPACE; 1196 NFSUIOPROC(uiop, p); 1197 uiop->uio_offset = off; 1198 nh = nfsrv_sequential_heuristic(uiop, vp); 1199 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 1200 /* XXX KDM make this more systematic? */ 1201 NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 1202 error = VOP_WRITE(vp, uiop, ioflags, cred); 1203 if (error == 0) 1204 nh->nh_nextoff = uiop->uio_offset; 1205 free(iv, M_TEMP); 1206 1207 NFSEXITCODE(error); 1208 return (error); 1209 } 1210 1211 /* 1212 * Common code for creating a regular file (plus special files for V2). 1213 */ 1214 int 1215 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 1216 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 1217 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) 1218 { 1219 u_quad_t tempsize; 1220 int error; 1221 struct thread *p = curthread; 1222 1223 error = nd->nd_repstat; 1224 if (!error && ndp->ni_vp == NULL) { 1225 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 1226 error = VOP_CREATE(ndp->ni_dvp, 1227 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1228 /* For a pNFS server, create the data file on a DS. */ 1229 if (error == 0 && nvap->na_type == VREG) { 1230 /* 1231 * Create a data file on a DS for a pNFS server. 1232 * This function just returns if not 1233 * running a pNFS DS or the creation fails. 1234 */ 1235 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1236 nd->nd_cred, p); 1237 } 1238 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1239 NULL, false); 1240 nfsvno_relpathbuf(ndp); 1241 if (!error) { 1242 if (*exclusive_flagp) { 1243 *exclusive_flagp = 0; 1244 NFSVNO_ATTRINIT(nvap); 1245 nvap->na_atime.tv_sec = cverf[0]; 1246 nvap->na_atime.tv_nsec = cverf[1]; 1247 error = VOP_SETATTR(ndp->ni_vp, 1248 &nvap->na_vattr, nd->nd_cred); 1249 if (error != 0) { 1250 vput(ndp->ni_vp); 1251 ndp->ni_vp = NULL; 1252 error = NFSERR_NOTSUPP; 1253 } 1254 } 1255 } 1256 /* 1257 * NFS V2 Only. nfsrvd_mknod() does this for V3. 1258 * (This implies, just get out on an error.) 1259 */ 1260 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 1261 nvap->na_type == VFIFO) { 1262 if (nvap->na_type == VCHR && rdev == 0xffffffff) 1263 nvap->na_type = VFIFO; 1264 if (nvap->na_type != VFIFO && 1265 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { 1266 nfsvno_relpathbuf(ndp); 1267 vput(ndp->ni_dvp); 1268 goto out; 1269 } 1270 nvap->na_rdev = rdev; 1271 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1272 &ndp->ni_cnd, &nvap->na_vattr); 1273 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1274 NULL, false); 1275 nfsvno_relpathbuf(ndp); 1276 if (error) 1277 goto out; 1278 } else { 1279 nfsvno_relpathbuf(ndp); 1280 vput(ndp->ni_dvp); 1281 error = ENXIO; 1282 goto out; 1283 } 1284 *vpp = ndp->ni_vp; 1285 } else { 1286 /* 1287 * Handle cases where error is already set and/or 1288 * the file exists. 1289 * 1 - clean up the lookup 1290 * 2 - iff !error and na_size set, truncate it 1291 */ 1292 nfsvno_relpathbuf(ndp); 1293 *vpp = ndp->ni_vp; 1294 if (ndp->ni_dvp == *vpp) 1295 vrele(ndp->ni_dvp); 1296 else 1297 vput(ndp->ni_dvp); 1298 if (!error && nvap->na_size != VNOVAL) { 1299 error = nfsvno_accchk(*vpp, VWRITE, 1300 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1301 NFSACCCHK_VPISLOCKED, NULL); 1302 if (!error) { 1303 tempsize = nvap->na_size; 1304 NFSVNO_ATTRINIT(nvap); 1305 nvap->na_size = tempsize; 1306 error = nfsvno_setattr(*vpp, nvap, 1307 nd->nd_cred, p, exp); 1308 } 1309 } 1310 if (error) 1311 vput(*vpp); 1312 } 1313 1314 out: 1315 NFSEXITCODE(error); 1316 return (error); 1317 } 1318 1319 /* 1320 * Do a mknod vnode op. 1321 */ 1322 int 1323 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1324 struct thread *p) 1325 { 1326 int error = 0; 1327 __enum_uint8(vtype) vtyp; 1328 1329 vtyp = nvap->na_type; 1330 /* 1331 * Iff doesn't exist, create it. 1332 */ 1333 if (ndp->ni_vp) { 1334 nfsvno_relpathbuf(ndp); 1335 vput(ndp->ni_dvp); 1336 vrele(ndp->ni_vp); 1337 error = EEXIST; 1338 goto out; 1339 } 1340 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1341 nfsvno_relpathbuf(ndp); 1342 vput(ndp->ni_dvp); 1343 error = NFSERR_BADTYPE; 1344 goto out; 1345 } 1346 if (vtyp == VSOCK) { 1347 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1348 &ndp->ni_cnd, &nvap->na_vattr); 1349 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1350 false); 1351 nfsvno_relpathbuf(ndp); 1352 } else { 1353 if (nvap->na_type != VFIFO && 1354 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { 1355 nfsvno_relpathbuf(ndp); 1356 vput(ndp->ni_dvp); 1357 goto out; 1358 } 1359 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1360 &ndp->ni_cnd, &nvap->na_vattr); 1361 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1362 false); 1363 nfsvno_relpathbuf(ndp); 1364 /* 1365 * Since VOP_MKNOD returns the ni_vp, I can't 1366 * see any reason to do the lookup. 1367 */ 1368 } 1369 1370 out: 1371 NFSEXITCODE(error); 1372 return (error); 1373 } 1374 1375 /* 1376 * Mkdir vnode op. 1377 */ 1378 int 1379 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1380 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1381 { 1382 int error = 0; 1383 1384 if (ndp->ni_vp != NULL) { 1385 if (ndp->ni_dvp == ndp->ni_vp) 1386 vrele(ndp->ni_dvp); 1387 else 1388 vput(ndp->ni_dvp); 1389 vrele(ndp->ni_vp); 1390 nfsvno_relpathbuf(ndp); 1391 error = EEXIST; 1392 goto out; 1393 } 1394 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1395 &nvap->na_vattr); 1396 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); 1397 nfsvno_relpathbuf(ndp); 1398 1399 out: 1400 NFSEXITCODE(error); 1401 return (error); 1402 } 1403 1404 /* 1405 * symlink vnode op. 1406 */ 1407 int 1408 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1409 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1410 struct nfsexstuff *exp) 1411 { 1412 int error = 0; 1413 1414 if (ndp->ni_vp) { 1415 nfsvno_relpathbuf(ndp); 1416 if (ndp->ni_dvp == ndp->ni_vp) 1417 vrele(ndp->ni_dvp); 1418 else 1419 vput(ndp->ni_dvp); 1420 vrele(ndp->ni_vp); 1421 error = EEXIST; 1422 goto out; 1423 } 1424 1425 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1426 &nvap->na_vattr, pathcp); 1427 /* 1428 * Although FreeBSD still had the lookup code in 1429 * it for 7/current, there doesn't seem to be any 1430 * point, since VOP_SYMLINK() returns the ni_vp. 1431 * Just vput it for v2. 1432 */ 1433 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0); 1434 nfsvno_relpathbuf(ndp); 1435 1436 out: 1437 NFSEXITCODE(error); 1438 return (error); 1439 } 1440 1441 /* 1442 * Parse symbolic link arguments. 1443 * This function has an ugly side effect. It will malloc() an area for 1444 * the symlink and set iov_base to point to it, only if it succeeds. 1445 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1446 * be FREE'd later. 1447 */ 1448 int 1449 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1450 struct thread *p, char **pathcpp, int *lenp) 1451 { 1452 u_int32_t *tl; 1453 char *pathcp = NULL; 1454 int error = 0, len; 1455 struct nfsv2_sattr *sp; 1456 1457 *pathcpp = NULL; 1458 *lenp = 0; 1459 if ((nd->nd_flag & ND_NFSV3) && 1460 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1461 goto nfsmout; 1462 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1463 len = fxdr_unsigned(int, *tl); 1464 if (len > NFS_MAXPATHLEN || len <= 0) { 1465 error = EBADRPC; 1466 goto nfsmout; 1467 } 1468 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1469 error = nfsrv_mtostr(nd, pathcp, len); 1470 if (error) 1471 goto nfsmout; 1472 if (nd->nd_flag & ND_NFSV2) { 1473 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1474 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1475 } 1476 *pathcpp = pathcp; 1477 *lenp = len; 1478 NFSEXITCODE2(0, nd); 1479 return (0); 1480 nfsmout: 1481 if (pathcp) 1482 free(pathcp, M_TEMP); 1483 NFSEXITCODE2(error, nd); 1484 return (error); 1485 } 1486 1487 /* 1488 * Remove a non-directory object. 1489 */ 1490 int 1491 nfsvno_removesub(struct nameidata *ndp, bool is_v4, struct nfsrv_descript *nd, 1492 struct thread *p, struct nfsexstuff *exp) 1493 { 1494 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS], *newvp; 1495 struct mount *mp; 1496 int error = 0, mirrorcnt, ret; 1497 char fname[PNFS_FILENAME_LEN + 1]; 1498 fhandle_t fh; 1499 1500 vp = ndp->ni_vp; 1501 dsdvp[0] = NULL; 1502 if (vp->v_type == VDIR) { 1503 error = NFSERR_ISDIR; 1504 } else if (is_v4) { 1505 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0) 1506 error = nfsrv_checkremove(vp, 1, NULL, 1507 (nfsquad_t)((u_quad_t)0), p); 1508 else 1509 error = nfsrv_checkremove(vp, 1, NULL, nd->nd_clientid, 1510 p); 1511 } 1512 if (error == 0) 1513 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1514 if (!error) 1515 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1516 if (error == 0 && dsdvp[0] != NULL) 1517 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1518 if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) 1519 error = nfsvno_getfh(vp, &fh, p); 1520 if (ndp->ni_dvp == vp) 1521 vrele(ndp->ni_dvp); 1522 else 1523 vput(ndp->ni_dvp); 1524 vput(vp); 1525 1526 /* Use ret to determine if the file still exists. */ 1527 if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) { 1528 mp = vfs_busyfs(&fh.fh_fsid); 1529 if (mp != NULL) { 1530 /* Find out if the file still exists. */ 1531 ret = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &newvp); 1532 if (ret == 0) 1533 vput(newvp); 1534 else 1535 ret = ESTALE; 1536 vfs_unbusy(mp); 1537 } else { 1538 ret = ESTALE; 1539 } 1540 if (ret == ESTALE) { 1541 /* Get rid of any delegation. */ 1542 nfsrv_removedeleg(&fh, nd, p); 1543 } 1544 } 1545 1546 nfsvno_relpathbuf(ndp); 1547 NFSEXITCODE(error); 1548 return (error); 1549 } 1550 1551 /* 1552 * Remove a directory. 1553 */ 1554 int 1555 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1556 struct thread *p, struct nfsexstuff *exp) 1557 { 1558 struct vnode *vp; 1559 int error = 0; 1560 1561 vp = ndp->ni_vp; 1562 if (vp->v_type != VDIR) { 1563 error = ENOTDIR; 1564 goto out; 1565 } 1566 /* 1567 * No rmdir "." please. 1568 */ 1569 if (ndp->ni_dvp == vp) { 1570 error = EINVAL; 1571 goto out; 1572 } 1573 /* 1574 * The root of a mounted filesystem cannot be deleted. 1575 */ 1576 if (vp->v_vflag & VV_ROOT) 1577 error = EBUSY; 1578 out: 1579 if (!error) 1580 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1581 if (ndp->ni_dvp == vp) 1582 vrele(ndp->ni_dvp); 1583 else 1584 vput(ndp->ni_dvp); 1585 vput(vp); 1586 nfsvno_relpathbuf(ndp); 1587 NFSEXITCODE(error); 1588 return (error); 1589 } 1590 1591 /* 1592 * Rename vnode op. 1593 */ 1594 int 1595 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1596 struct nfsrv_descript *nd, struct thread *p) 1597 { 1598 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS], *newvp; 1599 struct mount *mp; 1600 int error = 0, mirrorcnt, ret; 1601 char fname[PNFS_FILENAME_LEN + 1]; 1602 fhandle_t fh, fh2; 1603 1604 dsdvp[0] = NULL; 1605 fvp = fromndp->ni_vp; 1606 if (nd->nd_repstat != 0) { 1607 vrele(fromndp->ni_dvp); 1608 vrele(fvp); 1609 error = nd->nd_repstat; 1610 goto out1; 1611 } 1612 tdvp = tondp->ni_dvp; 1613 tvp = tondp->ni_vp; 1614 if (tvp != NULL) { 1615 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1616 error = (nd->nd_flag & ND_NFSV2) ? EISDIR : EEXIST; 1617 goto out; 1618 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1619 error = (nd->nd_flag & ND_NFSV2) ? ENOTDIR : EEXIST; 1620 goto out; 1621 } 1622 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1623 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1624 goto out; 1625 } 1626 1627 /* 1628 * A rename to '.' or '..' results in a prematurely 1629 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1630 * here. 1631 */ 1632 if ((tondp->ni_cnd.cn_namelen == 1 && 1633 tondp->ni_cnd.cn_nameptr[0] == '.') || 1634 (tondp->ni_cnd.cn_namelen == 2 && 1635 tondp->ni_cnd.cn_nameptr[0] == '.' && 1636 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1637 error = EINVAL; 1638 goto out; 1639 } 1640 } 1641 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1642 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1643 goto out; 1644 } 1645 if (fvp->v_mount != tdvp->v_mount) { 1646 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1647 goto out; 1648 } 1649 if (fvp == tdvp) { 1650 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1651 goto out; 1652 } 1653 if (fvp == tvp) { 1654 /* 1655 * If source and destination are the same, there is 1656 * nothing to do. Set error to EJUSTRETURN to indicate 1657 * this. 1658 */ 1659 error = EJUSTRETURN; 1660 goto out; 1661 } 1662 if (nd->nd_flag & ND_NFSV4) { 1663 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1664 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0) 1665 error = nfsrv_checkremove(fvp, 0, NULL, 1666 (nfsquad_t)((u_quad_t)0), p); 1667 else 1668 error = nfsrv_checkremove(fvp, 0, NULL, 1669 nd->nd_clientid, p); 1670 NFSVOPUNLOCK(fvp); 1671 } else 1672 error = EPERM; 1673 if (tvp && !error) { 1674 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0) 1675 error = nfsrv_checkremove(tvp, 1, NULL, 1676 (nfsquad_t)((u_quad_t)0), p); 1677 else 1678 error = nfsrv_checkremove(tvp, 1, NULL, 1679 nd->nd_clientid, p); 1680 } 1681 } else { 1682 /* 1683 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1684 * that the NFSv4 client won't be confused by the rename. 1685 * Since nfsd_recalldelegation() can only be called on an 1686 * unlocked vnode at this point and fvp is the file that will 1687 * still exist after the rename, just do fvp. 1688 */ 1689 nfsd_recalldelegation(fvp, p); 1690 } 1691 if (error == 0 && tvp != NULL) { 1692 if ((nd->nd_flag & ND_NFSV41) != 0) 1693 error = nfsvno_getfh(tvp, &fh2, p); 1694 if (error == 0) 1695 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, 1696 &fh); 1697 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1698 " dsdvp=%p\n", dsdvp[0]); 1699 } 1700 out: 1701 mp = NULL; 1702 if (error == 0) { 1703 error = VOP_GETWRITEMOUNT(tondp->ni_dvp, &mp); 1704 if (error == 0) { 1705 if (mp == NULL) { 1706 error = ENOENT; 1707 } else { 1708 error = lockmgr(&mp->mnt_renamelock, 1709 LK_EXCLUSIVE | LK_NOWAIT, NULL); 1710 if (error != 0) 1711 error = ERELOOKUP; 1712 } 1713 } 1714 } 1715 if (error == 0) { 1716 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1717 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1718 &tondp->ni_cnd); 1719 lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); 1720 vfs_rel(mp); 1721 } else { 1722 if (tdvp == tvp) 1723 vrele(tdvp); 1724 else 1725 vput(tdvp); 1726 if (tvp) 1727 vput(tvp); 1728 vrele(fromndp->ni_dvp); 1729 vrele(fvp); 1730 if (error == EJUSTRETURN) { 1731 error = 0; 1732 } else if (error == ERELOOKUP && mp != NULL) { 1733 lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); 1734 lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); 1735 vfs_rel(mp); 1736 } 1737 } 1738 1739 /* 1740 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1741 * if the rename succeeded, the DS file for the tvp needs to be 1742 * removed. 1743 */ 1744 if (error == 0 && dsdvp[0] != NULL) { 1745 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1746 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1747 } 1748 1749 /* Use ret to determine if the file still exists. */ 1750 if ((nd->nd_flag & ND_NFSV41) != 0 && error == 0) { 1751 mp = vfs_busyfs(&fh2.fh_fsid); 1752 if (mp != NULL) { 1753 /* Find out if the file still exists. */ 1754 ret = VFS_FHTOVP(mp, &fh2.fh_fid, LK_SHARED, &newvp); 1755 if (ret == 0) 1756 vput(newvp); 1757 else 1758 ret = ESTALE; 1759 vfs_unbusy(mp); 1760 } else { 1761 ret = ESTALE; 1762 } 1763 if (ret == ESTALE) { 1764 /* Get rid of any delegation. */ 1765 nfsrv_removedeleg(&fh2, nd, p); 1766 } 1767 } 1768 1769 nfsvno_relpathbuf(tondp); 1770 out1: 1771 nfsvno_relpathbuf(fromndp); 1772 NFSEXITCODE(error); 1773 return (error); 1774 } 1775 1776 /* 1777 * Link vnode op. 1778 */ 1779 int 1780 nfsvno_link(struct nameidata *ndp, struct vnode *vp, nfsquad_t clientid, 1781 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1782 { 1783 struct vnode *xp; 1784 int error = 0; 1785 1786 xp = ndp->ni_vp; 1787 if (xp != NULL) { 1788 error = EEXIST; 1789 } else { 1790 xp = ndp->ni_dvp; 1791 if (vp->v_mount != xp->v_mount) 1792 error = EXDEV; 1793 } 1794 if (!error) { 1795 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1796 if (!VN_IS_DOOMED(vp)) { 1797 error = nfsrv_checkremove(vp, 0, NULL, clientid, p); 1798 if (error == 0) 1799 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1800 } else 1801 error = EPERM; 1802 if (ndp->ni_dvp == vp) { 1803 vrele(ndp->ni_dvp); 1804 NFSVOPUNLOCK(vp); 1805 } else { 1806 vref(vp); 1807 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true); 1808 } 1809 } else { 1810 if (ndp->ni_dvp == ndp->ni_vp) 1811 vrele(ndp->ni_dvp); 1812 else 1813 vput(ndp->ni_dvp); 1814 if (ndp->ni_vp) 1815 vrele(ndp->ni_vp); 1816 } 1817 nfsvno_relpathbuf(ndp); 1818 NFSEXITCODE(error); 1819 return (error); 1820 } 1821 1822 /* 1823 * Do the fsync() appropriate for the commit. 1824 */ 1825 int 1826 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1827 struct thread *td) 1828 { 1829 int error = 0; 1830 1831 /* 1832 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1833 * file is done. At this time VOP_FSYNC does not accept offset and 1834 * byte count parameters so call VOP_FSYNC the whole file for now. 1835 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1836 * File systems that do not use the buffer cache (as indicated 1837 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1838 */ 1839 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1840 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1841 /* 1842 * Give up and do the whole thing 1843 */ 1844 vnode_pager_clean_sync(vp); 1845 error = VOP_FSYNC(vp, MNT_WAIT, td); 1846 } else { 1847 /* 1848 * Locate and synchronously write any buffers that fall 1849 * into the requested range. Note: we are assuming that 1850 * f_iosize is a power of 2. 1851 */ 1852 int iosize = vp->v_mount->mnt_stat.f_iosize; 1853 int iomask = iosize - 1; 1854 struct bufobj *bo; 1855 daddr_t lblkno; 1856 1857 /* 1858 * Align to iosize boundary, super-align to page boundary. 1859 */ 1860 if (off & iomask) { 1861 cnt += off & iomask; 1862 off &= ~(u_quad_t)iomask; 1863 } 1864 if (off & PAGE_MASK) { 1865 cnt += off & PAGE_MASK; 1866 off &= ~(u_quad_t)PAGE_MASK; 1867 } 1868 lblkno = off / iosize; 1869 1870 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1871 VM_OBJECT_WLOCK(vp->v_object); 1872 vm_object_page_clean(vp->v_object, off, off + cnt, 1873 OBJPC_SYNC); 1874 VM_OBJECT_WUNLOCK(vp->v_object); 1875 } 1876 1877 bo = &vp->v_bufobj; 1878 BO_LOCK(bo); 1879 while (cnt > 0) { 1880 struct buf *bp; 1881 1882 /* 1883 * If we have a buffer and it is marked B_DELWRI we 1884 * have to lock and write it. Otherwise the prior 1885 * write is assumed to have already been committed. 1886 * 1887 * gbincore() can return invalid buffers now so we 1888 * have to check that bit as well (though B_DELWRI 1889 * should not be set if B_INVAL is set there could be 1890 * a race here since we haven't locked the buffer). 1891 */ 1892 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1893 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1894 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1895 BO_LOCK(bo); 1896 continue; /* retry */ 1897 } 1898 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1899 B_DELWRI) { 1900 bremfree(bp); 1901 bp->b_flags &= ~B_ASYNC; 1902 bwrite(bp); 1903 ++nfs_commit_miss; 1904 } else 1905 BUF_UNLOCK(bp); 1906 BO_LOCK(bo); 1907 } 1908 ++nfs_commit_blks; 1909 if (cnt < iosize) 1910 break; 1911 cnt -= iosize; 1912 ++lblkno; 1913 } 1914 BO_UNLOCK(bo); 1915 } 1916 NFSEXITCODE(error); 1917 return (error); 1918 } 1919 1920 /* 1921 * Statfs vnode op. 1922 */ 1923 int 1924 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1925 { 1926 struct statfs *tsf; 1927 int error; 1928 1929 tsf = NULL; 1930 if (nfsrv_devidcnt > 0) { 1931 /* For a pNFS service, get the DS numbers. */ 1932 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1933 error = nfsrv_pnfsstatfs(tsf, vp->v_mount); 1934 if (error != 0) { 1935 free(tsf, M_TEMP); 1936 tsf = NULL; 1937 } 1938 } 1939 error = VFS_STATFS(vp->v_mount, sf); 1940 if (error == 0) { 1941 if (tsf != NULL) { 1942 sf->f_blocks = tsf->f_blocks; 1943 sf->f_bavail = tsf->f_bavail; 1944 sf->f_bfree = tsf->f_bfree; 1945 sf->f_bsize = tsf->f_bsize; 1946 } 1947 /* 1948 * Since NFS handles these values as unsigned on the 1949 * wire, there is no way to represent negative values, 1950 * so set them to 0. Without this, they will appear 1951 * to be very large positive values for clients like 1952 * Solaris10. 1953 */ 1954 if (sf->f_bavail < 0) 1955 sf->f_bavail = 0; 1956 if (sf->f_ffree < 0) 1957 sf->f_ffree = 0; 1958 } 1959 free(tsf, M_TEMP); 1960 NFSEXITCODE(error); 1961 return (error); 1962 } 1963 1964 /* 1965 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1966 * must handle nfsrv_opencheck() calls after any other access checks. 1967 */ 1968 void 1969 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1970 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1971 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1972 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, bool done_namei, 1973 struct nfsexstuff *exp, struct vnode **vpp) 1974 { 1975 struct vnode *vp = NULL; 1976 u_quad_t tempsize; 1977 struct nfsexstuff nes; 1978 struct thread *p = curthread; 1979 uint32_t oldrepstat; 1980 1981 if (ndp->ni_vp == NULL) { 1982 /* 1983 * If nfsrv_opencheck() sets nd_repstat, done_namei needs to be 1984 * set true, since cleanup after nfsvno_namei() is needed. 1985 */ 1986 oldrepstat = nd->nd_repstat; 1987 nd->nd_repstat = nfsrv_opencheck(clientid, 1988 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1989 if (nd->nd_repstat != 0 && oldrepstat == 0) 1990 done_namei = true; 1991 } 1992 if (!nd->nd_repstat) { 1993 if (ndp->ni_vp == NULL) { 1994 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1995 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1996 /* For a pNFS server, create the data file on a DS. */ 1997 if (nd->nd_repstat == 0) { 1998 /* 1999 * Create a data file on a DS for a pNFS server. 2000 * This function just returns if not 2001 * running a pNFS DS or the creation fails. 2002 */ 2003 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 2004 cred, p); 2005 } 2006 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ? 2007 &ndp->ni_vp : NULL, false); 2008 nfsvno_relpathbuf(ndp); 2009 if (!nd->nd_repstat) { 2010 if (*exclusive_flagp) { 2011 *exclusive_flagp = 0; 2012 NFSVNO_ATTRINIT(nvap); 2013 nvap->na_atime.tv_sec = cverf[0]; 2014 nvap->na_atime.tv_nsec = cverf[1]; 2015 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 2016 &nvap->na_vattr, cred); 2017 if (nd->nd_repstat != 0) { 2018 vput(ndp->ni_vp); 2019 ndp->ni_vp = NULL; 2020 nd->nd_repstat = NFSERR_NOTSUPP; 2021 } else 2022 NFSSETBIT_ATTRBIT(attrbitp, 2023 NFSATTRBIT_TIMEACCESS); 2024 } else { 2025 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 2026 aclp, p, attrbitp, exp); 2027 } 2028 } 2029 vp = ndp->ni_vp; 2030 } else { 2031 nfsvno_relpathbuf(ndp); 2032 vp = ndp->ni_vp; 2033 if (create == NFSV4OPEN_CREATE) { 2034 if (ndp->ni_dvp == vp) 2035 vrele(ndp->ni_dvp); 2036 else 2037 vput(ndp->ni_dvp); 2038 } 2039 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 2040 if (ndp->ni_cnd.cn_flags & RDONLY) 2041 NFSVNO_SETEXRDONLY(&nes); 2042 else 2043 NFSVNO_EXINIT(&nes); 2044 nd->nd_repstat = nfsvno_accchk(vp, 2045 VWRITE, cred, &nes, p, 2046 NFSACCCHK_NOOVERRIDE, 2047 NFSACCCHK_VPISLOCKED, NULL); 2048 nd->nd_repstat = nfsrv_opencheck(clientid, 2049 stateidp, stp, vp, nd, p, nd->nd_repstat); 2050 if (!nd->nd_repstat) { 2051 tempsize = nvap->na_size; 2052 NFSVNO_ATTRINIT(nvap); 2053 nvap->na_size = tempsize; 2054 nd->nd_repstat = nfsvno_setattr(vp, 2055 nvap, cred, p, exp); 2056 } 2057 } else if (vp->v_type == VREG) { 2058 nd->nd_repstat = nfsrv_opencheck(clientid, 2059 stateidp, stp, vp, nd, p, nd->nd_repstat); 2060 } 2061 } 2062 } else if (done_namei) { 2063 KASSERT(create == NFSV4OPEN_CREATE, 2064 ("nfsvno_open: not create")); 2065 /* 2066 * done_namei is set when nfsvno_namei() has completed 2067 * successfully, but a subsequent error was set in 2068 * nd_repstat. As such, cleanup of the nfsvno_namei() 2069 * results is required. 2070 */ 2071 nfsvno_relpathbuf(ndp); 2072 if (ndp->ni_dvp == ndp->ni_vp) 2073 vrele(ndp->ni_dvp); 2074 else 2075 vput(ndp->ni_dvp); 2076 if (ndp->ni_vp) 2077 vput(ndp->ni_vp); 2078 } 2079 *vpp = vp; 2080 2081 NFSEXITCODE2(0, nd); 2082 } 2083 2084 /* 2085 * Updates the file rev and sets the mtime and ctime 2086 * to the current clock time, returning the va_filerev and va_Xtime 2087 * values. 2088 * Return ESTALE to indicate the vnode is VIRF_DOOMED. 2089 */ 2090 int 2091 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 2092 struct nfsrv_descript *nd, struct thread *p) 2093 { 2094 struct vattr va; 2095 2096 VATTR_NULL(&va); 2097 vfs_timestamp(&va.va_mtime); 2098 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 2099 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 2100 if (VN_IS_DOOMED(vp)) 2101 return (ESTALE); 2102 } 2103 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 2104 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 2105 return (0); 2106 } 2107 2108 /* 2109 * Glue routine to nfsv4_fillattr(). 2110 */ 2111 int 2112 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 2113 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 2114 struct ucred *cred, struct thread *p, int isdgram, int reterr, 2115 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, 2116 bool xattrsupp, bool has_hiddensystem, bool has_namedattr, 2117 uint32_t clone_blksize) 2118 { 2119 struct statfs *sf; 2120 int error; 2121 2122 sf = NULL; 2123 if (nfsrv_devidcnt > 0 && 2124 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 2125 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 2126 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 2127 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 2128 error = nfsrv_pnfsstatfs(sf, mp); 2129 if (error != 0) { 2130 free(sf, M_TEMP); 2131 sf = NULL; 2132 } 2133 } 2134 2135 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 2136 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 2137 mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr, 2138 clone_blksize); 2139 free(sf, M_TEMP); 2140 NFSEXITCODE2(0, nd); 2141 return (error); 2142 } 2143 2144 /* 2145 * Convert a dirent d_type to a vnode type. 2146 */ 2147 static void nfs_dtypetovtype(struct nfsvattr *nvap, struct vnode *vp, 2148 uint8_t dtype) 2149 { 2150 2151 if ((vn_irflag_read(vp) & VIRF_NAMEDDIR) != 0) { 2152 nvap->na_type = VREG; 2153 nvap->na_bsdflags |= SFBSD_NAMEDATTR; 2154 } else if (dtype <= DT_WHT) { 2155 nvap->na_type = dtype_to_vnode[dtype]; 2156 } else { 2157 nvap->na_type = VNON; 2158 } 2159 } 2160 2161 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 2162 /* 2163 * nfs readdir service 2164 * - mallocs what it thinks is enough to read 2165 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 2166 * - calls VOP_READDIR() 2167 * - loops around building the reply 2168 * if the output generated exceeds count break out of loop 2169 * The NFSM_CLGET macro is used here so that the reply will be packed 2170 * tightly in mbuf clusters. 2171 * - it trims out records with d_fileno == 0 2172 * this doesn't matter for Unix clients, but they might confuse clients 2173 * for other os'. 2174 * - it trims out records with d_type == DT_WHT 2175 * these cannot be seen through NFS (unless we extend the protocol) 2176 * The alternate call nfsrvd_readdirplus() does lookups as well. 2177 * PS: The NFS protocol spec. does not clarify what the "count" byte 2178 * argument is a count of.. just name strings and file id's or the 2179 * entire reply rpc or ... 2180 * I tried just file name and id sizes and it confused the Sun client, 2181 * so I am using the full rpc size now. The "paranoia.." comment refers 2182 * to including the status longwords that are not a part of the dir. 2183 * "entry" structures, but are in the rpc. 2184 */ 2185 int 2186 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 2187 struct vnode *vp, struct nfsexstuff *exp) 2188 { 2189 struct dirent *dp; 2190 u_int32_t *tl; 2191 int dirlen; 2192 char *cpos, *cend, *rbuf; 2193 struct nfsvattr at; 2194 int nlen, error = 0, getret = 1; 2195 int siz, cnt, fullsiz, eofflag, ncookies; 2196 u_int64_t off, toff, verf __unused; 2197 uint64_t *cookies = NULL, *cookiep; 2198 struct uio io; 2199 struct iovec iv; 2200 int is_ufs; 2201 struct thread *p = curthread; 2202 2203 if (nd->nd_repstat) { 2204 nfsrv_postopattr(nd, getret, &at); 2205 goto out; 2206 } 2207 if (nd->nd_flag & ND_NFSV2) { 2208 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2209 off = fxdr_unsigned(u_quad_t, *tl++); 2210 } else { 2211 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 2212 off = fxdr_hyper(tl); 2213 tl += 2; 2214 verf = fxdr_hyper(tl); 2215 tl += 2; 2216 } 2217 toff = off; 2218 cnt = fxdr_unsigned(int, *tl); 2219 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2220 cnt = NFS_SRVMAXDATA(nd); 2221 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2222 fullsiz = siz; 2223 if (nd->nd_flag & ND_NFSV3) { 2224 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 2225 NULL); 2226 #if 0 2227 /* 2228 * va_filerev is not sufficient as a cookie verifier, 2229 * since it is not supposed to change when entries are 2230 * removed/added unless that offset cookies returned to 2231 * the client are no longer valid. 2232 */ 2233 if (!nd->nd_repstat && toff && verf != at.na_filerev) 2234 nd->nd_repstat = NFSERR_BAD_COOKIE; 2235 #endif 2236 } 2237 if (!nd->nd_repstat && vp->v_type != VDIR) 2238 nd->nd_repstat = NFSERR_NOTDIR; 2239 if (nd->nd_repstat == 0 && cnt == 0) { 2240 if (nd->nd_flag & ND_NFSV2) 2241 /* NFSv2 does not have NFSERR_TOOSMALL */ 2242 nd->nd_repstat = EPERM; 2243 else 2244 nd->nd_repstat = NFSERR_TOOSMALL; 2245 } 2246 if (!nd->nd_repstat) 2247 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2248 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2249 NFSACCCHK_VPISLOCKED, NULL); 2250 if (nd->nd_repstat) { 2251 vput(vp); 2252 if (nd->nd_flag & ND_NFSV3) 2253 nfsrv_postopattr(nd, getret, &at); 2254 goto out; 2255 } 2256 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2257 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2258 again: 2259 eofflag = 0; 2260 if (cookies) { 2261 free(cookies, M_TEMP); 2262 cookies = NULL; 2263 } 2264 2265 iv.iov_base = rbuf; 2266 iv.iov_len = siz; 2267 io.uio_iov = &iv; 2268 io.uio_iovcnt = 1; 2269 io.uio_offset = (off_t)off; 2270 io.uio_resid = siz; 2271 io.uio_segflg = UIO_SYSSPACE; 2272 io.uio_rw = UIO_READ; 2273 io.uio_td = NULL; 2274 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2275 &cookies); 2276 off = (u_int64_t)io.uio_offset; 2277 if (io.uio_resid) 2278 siz -= io.uio_resid; 2279 2280 if (!cookies && !nd->nd_repstat) 2281 nd->nd_repstat = NFSERR_PERM; 2282 if (nd->nd_flag & ND_NFSV3) { 2283 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2284 if (!nd->nd_repstat) 2285 nd->nd_repstat = getret; 2286 } 2287 2288 /* 2289 * Handles the failed cases. nd->nd_repstat == 0 past here. 2290 */ 2291 if (nd->nd_repstat) { 2292 vput(vp); 2293 free(rbuf, M_TEMP); 2294 if (cookies) 2295 free(cookies, M_TEMP); 2296 if (nd->nd_flag & ND_NFSV3) 2297 nfsrv_postopattr(nd, getret, &at); 2298 goto out; 2299 } 2300 /* 2301 * If nothing read, return eof 2302 * rpc reply 2303 */ 2304 if (siz == 0) { 2305 vput(vp); 2306 if (nd->nd_flag & ND_NFSV2) { 2307 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2308 } else { 2309 nfsrv_postopattr(nd, getret, &at); 2310 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2311 txdr_hyper(at.na_filerev, tl); 2312 tl += 2; 2313 } 2314 *tl++ = newnfs_false; 2315 *tl = newnfs_true; 2316 free(rbuf, M_TEMP); 2317 free(cookies, M_TEMP); 2318 goto out; 2319 } 2320 2321 /* 2322 * Check for degenerate cases of nothing useful read. 2323 * If so go try again 2324 */ 2325 cpos = rbuf; 2326 cend = rbuf + siz; 2327 dp = (struct dirent *)cpos; 2328 cookiep = cookies; 2329 2330 /* 2331 * For some reason FreeBSD's ufs_readdir() chooses to back the 2332 * directory offset up to a block boundary, so it is necessary to 2333 * skip over the records that precede the requested offset. This 2334 * requires the assumption that file offset cookies monotonically 2335 * increase. 2336 */ 2337 while (cpos < cend && ncookies > 0 && 2338 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2339 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 2340 cpos += dp->d_reclen; 2341 dp = (struct dirent *)cpos; 2342 cookiep++; 2343 ncookies--; 2344 } 2345 if (cpos >= cend || ncookies == 0) { 2346 siz = fullsiz; 2347 toff = off; 2348 goto again; 2349 } 2350 vput(vp); 2351 2352 /* 2353 * If cnt > MCLBYTES and the reply will not be saved, use 2354 * ext_pgs mbufs for TLS. 2355 * For NFSv4.0, we do not know for sure if the reply will 2356 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2357 */ 2358 if (cnt > MCLBYTES && siz > MCLBYTES && 2359 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2360 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2361 nd->nd_flag |= ND_EXTPG; 2362 2363 /* 2364 * dirlen is the size of the reply, including all XDR and must 2365 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 2366 * if the XDR should be included in "count", but to be safe, we do. 2367 * (Include the two booleans at the end of the reply in dirlen now.) 2368 */ 2369 if (nd->nd_flag & ND_NFSV3) { 2370 nfsrv_postopattr(nd, getret, &at); 2371 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2372 txdr_hyper(at.na_filerev, tl); 2373 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2374 } else { 2375 dirlen = 2 * NFSX_UNSIGNED; 2376 } 2377 2378 /* Loop through the records and build reply */ 2379 while (cpos < cend && ncookies > 0) { 2380 nlen = dp->d_namlen; 2381 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2382 nlen <= NFS_MAXNAMLEN) { 2383 if (nd->nd_flag & ND_NFSV3) 2384 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2385 else 2386 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2387 if (dirlen > cnt) { 2388 eofflag = 0; 2389 break; 2390 } 2391 2392 /* 2393 * Build the directory record xdr from 2394 * the dirent entry. 2395 */ 2396 if (nd->nd_flag & ND_NFSV3) { 2397 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2398 *tl++ = newnfs_true; 2399 txdr_hyper(dp->d_fileno, tl); 2400 } else { 2401 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2402 *tl++ = newnfs_true; 2403 *tl = txdr_unsigned(dp->d_fileno); 2404 } 2405 (void) nfsm_strtom(nd, dp->d_name, nlen); 2406 if (nd->nd_flag & ND_NFSV3) { 2407 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2408 txdr_hyper(*cookiep, tl); 2409 } else { 2410 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2411 *tl = txdr_unsigned(*cookiep); 2412 } 2413 } 2414 cpos += dp->d_reclen; 2415 dp = (struct dirent *)cpos; 2416 cookiep++; 2417 ncookies--; 2418 } 2419 if (cpos < cend) 2420 eofflag = 0; 2421 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2422 *tl++ = newnfs_false; 2423 if (eofflag) 2424 *tl = newnfs_true; 2425 else 2426 *tl = newnfs_false; 2427 free(rbuf, M_TEMP); 2428 free(cookies, M_TEMP); 2429 2430 out: 2431 NFSEXITCODE2(0, nd); 2432 return (0); 2433 nfsmout: 2434 vput(vp); 2435 NFSEXITCODE2(error, nd); 2436 return (error); 2437 } 2438 2439 /* 2440 * Readdirplus for V3 and Readdir for V4. 2441 */ 2442 int 2443 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2444 struct vnode *vp, struct nfsexstuff *exp) 2445 { 2446 struct dirent *dp; 2447 uint32_t clone_blksize, *tl; 2448 int dirlen; 2449 char *cpos, *cend, *rbuf; 2450 struct vnode *nvp; 2451 fhandle_t nfh; 2452 struct nfsvattr nva, at, *nvap = &nva; 2453 struct mbuf *mb0, *mb1; 2454 struct nfsreferral *refp; 2455 int nlen, r, error = 0, getret = 1, ret, usevget = 1; 2456 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2457 caddr_t bpos0, bpos1; 2458 u_int64_t off, toff, verf __unused; 2459 uint64_t *cookies = NULL, *cookiep; 2460 nfsattrbit_t attrbits, rderrbits, savbits, refbits; 2461 struct uio io; 2462 struct iovec iv; 2463 struct componentname cn; 2464 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2465 struct mount *mp, *new_mp; 2466 uint64_t mounted_on_fileno; 2467 struct thread *p = curthread; 2468 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; 2469 size_t atsiz; 2470 long pathval; 2471 bool has_hiddensystem, has_namedattr, xattrsupp; 2472 2473 if (nd->nd_repstat) { 2474 nfsrv_postopattr(nd, getret, &at); 2475 goto out; 2476 } 2477 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2478 off = fxdr_hyper(tl); 2479 toff = off; 2480 tl += 2; 2481 verf = fxdr_hyper(tl); 2482 tl += 2; 2483 siz = fxdr_unsigned(int, *tl++); 2484 cnt = fxdr_unsigned(int, *tl); 2485 2486 /* 2487 * Use the server's maximum data transfer size as the upper bound 2488 * on reply datalen. 2489 */ 2490 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2491 cnt = NFS_SRVMAXDATA(nd); 2492 2493 /* 2494 * siz is a "hint" of how much directory information (name, fileid, 2495 * cookie) should be in the reply. At least one client "hints" 0, 2496 * so I set it to cnt for that case. I also round it up to the 2497 * next multiple of DIRBLKSIZ. 2498 * Since the size of a Readdirplus directory entry reply will always 2499 * be greater than a directory entry returned by VOP_READDIR(), it 2500 * does not make sense to read more than NFS_SRVMAXDATA() via 2501 * VOP_READDIR(). 2502 */ 2503 if (siz <= 0) 2504 siz = cnt; 2505 else if (siz > NFS_SRVMAXDATA(nd)) 2506 siz = NFS_SRVMAXDATA(nd); 2507 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2508 2509 if (nd->nd_flag & ND_NFSV4) { 2510 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2511 if (error) 2512 goto nfsmout; 2513 NFSSET_ATTRBIT(&savbits, &attrbits); 2514 NFSSET_ATTRBIT(&refbits, &attrbits); 2515 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); 2516 NFSZERO_ATTRBIT(&rderrbits); 2517 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2518 /* 2519 * If these 4 bits are the only attributes requested by the 2520 * client, they can be satisfied without acquiring the vnode 2521 * for the file object unless it is a directory. 2522 * This will be indicated by savbits being all 0s. 2523 */ 2524 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_TYPE); 2525 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_FILEID); 2526 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_MOUNTEDONFILEID); 2527 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_RDATTRERROR); 2528 } else { 2529 NFSZERO_ATTRBIT(&attrbits); 2530 } 2531 fullsiz = siz; 2532 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2533 #if 0 2534 if (!nd->nd_repstat) { 2535 if (off && verf != at.na_filerev) { 2536 /* 2537 * va_filerev is not sufficient as a cookie verifier, 2538 * since it is not supposed to change when entries are 2539 * removed/added unless that offset cookies returned to 2540 * the client are no longer valid. 2541 */ 2542 if (nd->nd_flag & ND_NFSV4) { 2543 nd->nd_repstat = NFSERR_NOTSAME; 2544 } else { 2545 nd->nd_repstat = NFSERR_BAD_COOKIE; 2546 } 2547 } 2548 } 2549 #endif 2550 if (!nd->nd_repstat && vp->v_type != VDIR) 2551 nd->nd_repstat = NFSERR_NOTDIR; 2552 if (!nd->nd_repstat && cnt == 0) 2553 nd->nd_repstat = NFSERR_TOOSMALL; 2554 if (!nd->nd_repstat) 2555 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2556 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2557 NFSACCCHK_VPISLOCKED, NULL); 2558 if (nd->nd_repstat) { 2559 vput(vp); 2560 if (nd->nd_flag & ND_NFSV3) 2561 nfsrv_postopattr(nd, getret, &at); 2562 goto out; 2563 } 2564 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2565 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2566 2567 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2568 again: 2569 eofflag = 0; 2570 if (cookies) { 2571 free(cookies, M_TEMP); 2572 cookies = NULL; 2573 } 2574 2575 iv.iov_base = rbuf; 2576 iv.iov_len = siz; 2577 io.uio_iov = &iv; 2578 io.uio_iovcnt = 1; 2579 io.uio_offset = (off_t)off; 2580 io.uio_resid = siz; 2581 io.uio_segflg = UIO_SYSSPACE; 2582 io.uio_rw = UIO_READ; 2583 io.uio_td = NULL; 2584 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2585 &cookies); 2586 off = (u_int64_t)io.uio_offset; 2587 if (io.uio_resid) 2588 siz -= io.uio_resid; 2589 2590 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2591 2592 if (!cookies && !nd->nd_repstat) 2593 nd->nd_repstat = NFSERR_PERM; 2594 if (!nd->nd_repstat) 2595 nd->nd_repstat = getret; 2596 if (nd->nd_repstat) { 2597 vput(vp); 2598 if (cookies) 2599 free(cookies, M_TEMP); 2600 free(rbuf, M_TEMP); 2601 if (nd->nd_flag & ND_NFSV3) 2602 nfsrv_postopattr(nd, getret, &at); 2603 goto out; 2604 } 2605 /* 2606 * If nothing read, return eof 2607 * rpc reply 2608 */ 2609 if (siz == 0) { 2610 vput(vp); 2611 if (nd->nd_flag & ND_NFSV3) 2612 nfsrv_postopattr(nd, getret, &at); 2613 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2614 txdr_hyper(at.na_filerev, tl); 2615 tl += 2; 2616 *tl++ = newnfs_false; 2617 *tl = newnfs_true; 2618 free(cookies, M_TEMP); 2619 free(rbuf, M_TEMP); 2620 goto out; 2621 } 2622 2623 /* 2624 * Check for degenerate cases of nothing useful read. 2625 * If so go try again 2626 */ 2627 cpos = rbuf; 2628 cend = rbuf + siz; 2629 dp = (struct dirent *)cpos; 2630 cookiep = cookies; 2631 2632 /* 2633 * For some reason FreeBSD's ufs_readdir() chooses to back the 2634 * directory offset up to a block boundary, so it is necessary to 2635 * skip over the records that precede the requested offset. This 2636 * requires the assumption that file offset cookies monotonically 2637 * increase. 2638 */ 2639 while (cpos < cend && ncookies > 0 && 2640 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2641 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2642 ((nd->nd_flag & ND_NFSV4) && 2643 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2644 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2645 cpos += dp->d_reclen; 2646 dp = (struct dirent *)cpos; 2647 cookiep++; 2648 ncookies--; 2649 } 2650 if (cpos >= cend || ncookies == 0) { 2651 siz = fullsiz; 2652 toff = off; 2653 goto again; 2654 } 2655 2656 /* 2657 * Busy the file system so that the mount point won't go away 2658 * and, as such, VFS_VGET() can be used safely. 2659 */ 2660 mp = vp->v_mount; 2661 vfs_ref(mp); 2662 NFSVOPUNLOCK(vp); 2663 nd->nd_repstat = vfs_busy(mp, 0); 2664 vfs_rel(mp); 2665 if (nd->nd_repstat != 0) { 2666 vrele(vp); 2667 free(cookies, M_TEMP); 2668 free(rbuf, M_TEMP); 2669 if (nd->nd_flag & ND_NFSV3) 2670 nfsrv_postopattr(nd, getret, &at); 2671 goto out; 2672 } 2673 2674 /* 2675 * Check to see if entries in this directory can be safely acquired 2676 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2677 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2678 * automount of the snapshot directory that is required will 2679 * be done. 2680 * This needs to be done here for NFSv4, since NFSv4 never does 2681 * a VFS_VGET() for "." or "..". 2682 */ 2683 if (is_zfs == 1) { 2684 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2685 if (r == EOPNOTSUPP) { 2686 usevget = 0; 2687 cn.cn_nameiop = LOOKUP; 2688 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2689 cn.cn_cred = nd->nd_cred; 2690 } else if (r == 0) 2691 vput(nvp); 2692 } 2693 2694 /* 2695 * If the reply is likely to exceed MCLBYTES and the reply will 2696 * not be saved, use ext_pgs mbufs for TLS. 2697 * It is difficult to predict how large each entry will be and 2698 * how many entries have been read, so just assume the directory 2699 * entries grow by a factor of 4 when attributes are included. 2700 * For NFSv4.0, we do not know for sure if the reply will 2701 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2702 */ 2703 if (cnt > MCLBYTES && siz > MCLBYTES / 4 && 2704 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2705 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2706 nd->nd_flag |= ND_EXTPG; 2707 2708 /* 2709 * Save this position, in case there is an error before one entry 2710 * is created. 2711 */ 2712 mb0 = nd->nd_mb; 2713 bpos0 = nd->nd_bpos; 2714 bextpg0 = nd->nd_bextpg; 2715 bextpgsiz0 = nd->nd_bextpgsiz; 2716 2717 /* 2718 * Fill in the first part of the reply. 2719 * dirlen is the reply length in bytes and cannot exceed cnt. 2720 * (Include the two booleans at the end of the reply in dirlen now, 2721 * so we recognize when we have exceeded cnt.) 2722 */ 2723 if (nd->nd_flag & ND_NFSV3) { 2724 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2725 nfsrv_postopattr(nd, getret, &at); 2726 } else { 2727 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2728 } 2729 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2730 txdr_hyper(at.na_filerev, tl); 2731 2732 /* 2733 * Save this position, in case there is an empty reply needed. 2734 */ 2735 mb1 = nd->nd_mb; 2736 bpos1 = nd->nd_bpos; 2737 bextpg1 = nd->nd_bextpg; 2738 bextpgsiz1 = nd->nd_bextpgsiz; 2739 2740 /* Loop through the records and build reply */ 2741 entrycnt = 0; 2742 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2743 nlen = dp->d_namlen; 2744 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2745 nlen <= NFS_MAXNAMLEN && 2746 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2747 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2748 || (nlen == 1 && dp->d_name[0] != '.'))) { 2749 /* 2750 * Save the current position in the reply, in case 2751 * this entry exceeds cnt. 2752 */ 2753 mb1 = nd->nd_mb; 2754 bpos1 = nd->nd_bpos; 2755 bextpg1 = nd->nd_bextpg; 2756 bextpgsiz1 = nd->nd_bextpgsiz; 2757 2758 /* 2759 * For readdir_and_lookup get the vnode using 2760 * the file number. 2761 */ 2762 nvp = NULL; 2763 refp = NULL; 2764 r = 0; 2765 at_root = 0; 2766 needs_unbusy = 0; 2767 new_mp = mp; 2768 mounted_on_fileno = (uint64_t)dp->d_fileno; 2769 if ((nd->nd_flag & ND_NFSV3) || 2770 NFSNONZERO_ATTRBIT(&savbits) || 2771 dp->d_type == DT_UNKNOWN || 2772 (dp->d_type == DT_DIR && 2773 nfsrv_enable_crossmntpt != 0)) { 2774 if (nd->nd_flag & ND_NFSV4) 2775 refp = nfsv4root_getreferral(NULL, 2776 vp, dp->d_fileno); 2777 if (refp == NULL) { 2778 if (usevget) 2779 r = VFS_VGET(mp, dp->d_fileno, 2780 LK_SHARED, &nvp); 2781 else 2782 r = EOPNOTSUPP; 2783 if (r == 0 && (vn_irflag_read(vp) & 2784 VIRF_NAMEDDIR) != 0) 2785 vn_irflag_set_cond(nvp, 2786 VIRF_NAMEDATTR); 2787 if (r == EOPNOTSUPP) { 2788 if (usevget) { 2789 usevget = 0; 2790 cn.cn_nameiop = LOOKUP; 2791 cn.cn_lkflags = 2792 LK_SHARED | 2793 LK_RETRY; 2794 cn.cn_cred = 2795 nd->nd_cred; 2796 } 2797 cn.cn_nameptr = dp->d_name; 2798 cn.cn_namelen = nlen; 2799 cn.cn_flags = ISLASTCN | 2800 NOFOLLOW | LOCKLEAF; 2801 if ((vn_irflag_read(vp) & 2802 VIRF_NAMEDDIR) != 0) 2803 cn.cn_flags |= 2804 OPENNAMED; 2805 if (nlen == 2 && 2806 dp->d_name[0] == '.' && 2807 dp->d_name[1] == '.') 2808 cn.cn_flags |= 2809 ISDOTDOT; 2810 if (NFSVOPLOCK(vp, LK_SHARED) 2811 != 0) { 2812 nd->nd_repstat = EPERM; 2813 break; 2814 } 2815 if ((vp->v_vflag & VV_ROOT) != 0 2816 && (cn.cn_flags & ISDOTDOT) 2817 != 0) { 2818 vref(vp); 2819 nvp = vp; 2820 r = 0; 2821 } else { 2822 r = VOP_LOOKUP(vp, &nvp, 2823 &cn); 2824 if (vp != nvp) 2825 NFSVOPUNLOCK(vp); 2826 } 2827 } 2828 2829 /* 2830 * For NFSv4, check to see if nvp is 2831 * a mount point and get the mount 2832 * point vnode, as required. 2833 */ 2834 if (r == 0 && 2835 nfsrv_enable_crossmntpt != 0 && 2836 (nd->nd_flag & ND_NFSV4) != 0 && 2837 nvp->v_type == VDIR && 2838 nvp->v_mountedhere != NULL) { 2839 new_mp = nvp->v_mountedhere; 2840 r = vfs_busy(new_mp, 0); 2841 vput(nvp); 2842 nvp = NULL; 2843 if (r == 0) { 2844 r = VFS_ROOT(new_mp, 2845 LK_SHARED, &nvp); 2846 needs_unbusy = 1; 2847 if (r == 0) 2848 at_root = 1; 2849 } 2850 } 2851 } 2852 2853 /* 2854 * If we failed to look up the entry, then it 2855 * has become invalid, most likely removed. 2856 */ 2857 if (r != 0) { 2858 if (needs_unbusy) 2859 vfs_unbusy(new_mp); 2860 goto invalid; 2861 } 2862 KASSERT(refp != NULL || nvp != NULL, 2863 ("%s: undetected lookup error", __func__)); 2864 2865 if (refp == NULL && 2866 ((nd->nd_flag & ND_NFSV3) || 2867 NFSNONZERO_ATTRBIT(&attrbits))) { 2868 r = nfsvno_getfh(nvp, &nfh, p); 2869 if (!r) 2870 r = nfsvno_getattr(nvp, nvap, nd, p, 2871 1, &attrbits); 2872 if (r == 0 && is_zfs == 1 && 2873 nfsrv_enable_crossmntpt != 0 && 2874 (nd->nd_flag & ND_NFSV4) != 0 && 2875 nvp->v_type == VDIR && 2876 vp->v_mount != nvp->v_mount) { 2877 /* 2878 * For a ZFS snapshot, there is a 2879 * pseudo mount that does not set 2880 * v_mountedhere, so it needs to 2881 * be detected via a different 2882 * mount structure. 2883 */ 2884 at_root = 1; 2885 if (new_mp == mp) 2886 new_mp = nvp->v_mount; 2887 } 2888 } 2889 2890 /* 2891 * If we failed to get attributes of the entry, 2892 * then just skip it for NFSv3 (the traditional 2893 * behavior in the old NFS server). 2894 * For NFSv4 the behavior is controlled by 2895 * RDATTRERROR: we either ignore the error or 2896 * fail the request. 2897 * The exception is EOPNOTSUPP, which can be 2898 * returned by nfsvno_getfh() for certain 2899 * file systems, such as devfs. This indicates 2900 * that the file system cannot be exported, 2901 * so just skip over the entry. 2902 * Note that RDATTRERROR is never set for NFSv3. 2903 */ 2904 if (r != 0) { 2905 if (!NFSISSET_ATTRBIT(&attrbits, 2906 NFSATTRBIT_RDATTRERROR) || 2907 r == EOPNOTSUPP) { 2908 vput(nvp); 2909 if (needs_unbusy != 0) 2910 vfs_unbusy(new_mp); 2911 if ((nd->nd_flag & ND_NFSV3) || 2912 r == EOPNOTSUPP) 2913 goto invalid; 2914 nd->nd_repstat = r; 2915 break; 2916 } 2917 } 2918 } else if (NFSNONZERO_ATTRBIT(&attrbits)) { 2919 /* Only need Type and/or Fileid. */ 2920 VATTR_NULL(&nvap->na_vattr); 2921 nvap->na_fileid = dp->d_fileno; 2922 nfs_dtypetovtype(nvap, vp, dp->d_type); 2923 } 2924 2925 /* 2926 * Build the directory record xdr 2927 */ 2928 if (nd->nd_flag & ND_NFSV3) { 2929 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2930 *tl++ = newnfs_true; 2931 txdr_hyper(dp->d_fileno, tl); 2932 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2933 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2934 txdr_hyper(*cookiep, tl); 2935 nfsrv_postopattr(nd, 0, nvap); 2936 dirlen += nfsm_fhtom(NULL, nd, (u_int8_t *)&nfh, 2937 0, 1); 2938 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2939 if (nvp != NULL) 2940 vput(nvp); 2941 } else { 2942 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2943 *tl++ = newnfs_true; 2944 txdr_hyper(*cookiep, tl); 2945 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2946 xattrsupp = false; 2947 has_hiddensystem = false; 2948 has_namedattr = false; 2949 clone_blksize = 0; 2950 if (nvp != NULL) { 2951 supports_nfsv4acls = 2952 nfs_supportsnfsv4acls(nvp); 2953 if (NFSISSET_ATTRBIT(&attrbits, 2954 NFSATTRBIT_XATTRSUPPORT)) { 2955 ret = VOP_GETEXTATTR(nvp, 2956 EXTATTR_NAMESPACE_USER, 2957 "xxx", NULL, &atsiz, 2958 nd->nd_cred, p); 2959 xattrsupp = ret != EOPNOTSUPP; 2960 } 2961 if (VOP_PATHCONF(nvp, 2962 _PC_HAS_HIDDENSYSTEM, &pathval) != 2963 0) 2964 pathval = 0; 2965 has_hiddensystem = pathval > 0; 2966 pathval = 0; 2967 if (NFSISSET_ATTRBIT(&attrbits, 2968 NFSATTRBIT_NAMEDATTR) && 2969 VOP_PATHCONF(nvp, _PC_HAS_NAMEDATTR, 2970 &pathval) != 0) 2971 pathval = 0; 2972 has_namedattr = pathval > 0; 2973 pathval = 0; 2974 if (VOP_PATHCONF(nvp, _PC_CLONE_BLKSIZE, 2975 &pathval) != 0) 2976 pathval = 0; 2977 clone_blksize = pathval; 2978 NFSVOPUNLOCK(nvp); 2979 } else 2980 supports_nfsv4acls = 0; 2981 if (refp != NULL) { 2982 dirlen += nfsrv_putreferralattr(nd, 2983 &refbits, refp, 0, 2984 &nd->nd_repstat); 2985 if (nd->nd_repstat) { 2986 if (nvp != NULL) 2987 vrele(nvp); 2988 if (needs_unbusy != 0) 2989 vfs_unbusy(new_mp); 2990 break; 2991 } 2992 } else if (r) { 2993 dirlen += nfsvno_fillattr(nd, new_mp, 2994 nvp, nvap, &nfh, r, &rderrbits, 2995 nd->nd_cred, p, isdgram, 0, 2996 supports_nfsv4acls, at_root, 2997 mounted_on_fileno, xattrsupp, 2998 has_hiddensystem, has_namedattr, 2999 clone_blksize); 3000 } else { 3001 dirlen += nfsvno_fillattr(nd, new_mp, 3002 nvp, nvap, &nfh, r, &attrbits, 3003 nd->nd_cred, p, isdgram, 0, 3004 supports_nfsv4acls, at_root, 3005 mounted_on_fileno, xattrsupp, 3006 has_hiddensystem, has_namedattr, 3007 clone_blksize); 3008 } 3009 if (nvp != NULL) 3010 vrele(nvp); 3011 dirlen += (3 * NFSX_UNSIGNED); 3012 } 3013 if (needs_unbusy != 0) 3014 vfs_unbusy(new_mp); 3015 if (dirlen <= cnt) 3016 entrycnt++; 3017 } 3018 invalid: 3019 cpos += dp->d_reclen; 3020 dp = (struct dirent *)cpos; 3021 cookiep++; 3022 ncookies--; 3023 } 3024 vrele(vp); 3025 vfs_unbusy(mp); 3026 3027 /* 3028 * If dirlen > cnt, we must strip off the last entry. If that 3029 * results in an empty reply, report NFSERR_TOOSMALL. 3030 */ 3031 if (dirlen > cnt || nd->nd_repstat) { 3032 if (!nd->nd_repstat && entrycnt == 0) 3033 nd->nd_repstat = NFSERR_TOOSMALL; 3034 if (nd->nd_repstat) { 3035 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); 3036 if (nd->nd_flag & ND_NFSV3) 3037 nfsrv_postopattr(nd, getret, &at); 3038 } else 3039 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); 3040 eofflag = 0; 3041 } else if (cpos < cend) 3042 eofflag = 0; 3043 if (!nd->nd_repstat) { 3044 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3045 *tl++ = newnfs_false; 3046 if (eofflag) 3047 *tl = newnfs_true; 3048 else 3049 *tl = newnfs_false; 3050 } 3051 free(cookies, M_TEMP); 3052 free(rbuf, M_TEMP); 3053 3054 out: 3055 NFSEXITCODE2(0, nd); 3056 return (0); 3057 nfsmout: 3058 vput(vp); 3059 NFSEXITCODE2(error, nd); 3060 return (error); 3061 } 3062 3063 /* 3064 * Get the settable attributes out of the mbuf list. 3065 * (Return 0 or EBADRPC) 3066 */ 3067 int 3068 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 3069 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 3070 { 3071 u_int32_t *tl; 3072 struct nfsv2_sattr *sp; 3073 int error = 0, toclient = 0; 3074 3075 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 3076 case ND_NFSV2: 3077 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 3078 /* 3079 * Some old clients didn't fill in the high order 16bits. 3080 * --> check the low order 2 bytes for 0xffff 3081 */ 3082 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 3083 nvap->na_mode = nfstov_mode(sp->sa_mode); 3084 if (sp->sa_uid != newnfs_xdrneg1) 3085 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 3086 if (sp->sa_gid != newnfs_xdrneg1) 3087 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 3088 if (sp->sa_size != newnfs_xdrneg1) 3089 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 3090 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 3091 #ifdef notyet 3092 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 3093 #else 3094 nvap->na_atime.tv_sec = 3095 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 3096 nvap->na_atime.tv_nsec = 0; 3097 #endif 3098 } 3099 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 3100 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 3101 break; 3102 case ND_NFSV3: 3103 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3104 if (*tl == newnfs_true) { 3105 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3106 nvap->na_mode = nfstov_mode(*tl); 3107 } 3108 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3109 if (*tl == newnfs_true) { 3110 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3111 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 3112 } 3113 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3114 if (*tl == newnfs_true) { 3115 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3116 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 3117 } 3118 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3119 if (*tl == newnfs_true) { 3120 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3121 nvap->na_size = fxdr_hyper(tl); 3122 } 3123 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3124 switch (fxdr_unsigned(int, *tl)) { 3125 case NFSV3SATTRTIME_TOCLIENT: 3126 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3127 fxdr_nfsv3time(tl, &nvap->na_atime); 3128 toclient = 1; 3129 break; 3130 case NFSV3SATTRTIME_TOSERVER: 3131 vfs_timestamp(&nvap->na_atime); 3132 nvap->na_vaflags |= VA_UTIMES_NULL; 3133 break; 3134 } 3135 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3136 switch (fxdr_unsigned(int, *tl)) { 3137 case NFSV3SATTRTIME_TOCLIENT: 3138 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3139 fxdr_nfsv3time(tl, &nvap->na_mtime); 3140 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3141 break; 3142 case NFSV3SATTRTIME_TOSERVER: 3143 vfs_timestamp(&nvap->na_mtime); 3144 if (!toclient) 3145 nvap->na_vaflags |= VA_UTIMES_NULL; 3146 break; 3147 } 3148 break; 3149 case ND_NFSV4: 3150 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 3151 } 3152 nfsmout: 3153 NFSEXITCODE2(error, nd); 3154 return (error); 3155 } 3156 3157 /* 3158 * Handle the setable attributes for V4. 3159 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 3160 */ 3161 int 3162 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 3163 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 3164 { 3165 u_int32_t *tl; 3166 int attrsum = 0; 3167 int i, j; 3168 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 3169 int moderet, toclient = 0; 3170 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 3171 uid_t uid; 3172 gid_t gid; 3173 u_short mode, mask; /* Same type as va_mode. */ 3174 struct vattr va; 3175 3176 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 3177 if (error) 3178 goto nfsmout; 3179 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3180 attrsize = fxdr_unsigned(int, *tl); 3181 3182 /* 3183 * Loop around getting the setable attributes. If an unsupported 3184 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 3185 * Once nd_repstat != 0, do not set the attribute value, but keep 3186 * parsing the attribute(s). 3187 */ 3188 if (retnotsup) { 3189 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3190 bitpos = NFSATTRBIT_MAX; 3191 } else { 3192 bitpos = 0; 3193 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_HIDDEN) || 3194 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SYSTEM)) 3195 nvap->na_flags = 0; 3196 } 3197 moderet = 0; 3198 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 3199 if (attrsum > attrsize) { 3200 error = NFSERR_BADXDR; 3201 goto nfsmout; 3202 } 3203 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 3204 switch (bitpos) { 3205 case NFSATTRBIT_SIZE: 3206 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 3207 if (!nd->nd_repstat) { 3208 if (vp != NULL && vp->v_type != VREG) 3209 nd->nd_repstat = (vp->v_type == VDIR) ? 3210 NFSERR_ISDIR : NFSERR_INVAL; 3211 else 3212 nvap->na_size = fxdr_hyper(tl); 3213 } 3214 attrsum += NFSX_HYPER; 3215 break; 3216 case NFSATTRBIT_ACL: 3217 error = nfsrv_dissectacl(nd, aclp, true, &aceerr, 3218 &aclsize, p); 3219 if (error) 3220 goto nfsmout; 3221 if (aceerr && !nd->nd_repstat) 3222 nd->nd_repstat = aceerr; 3223 attrsum += aclsize; 3224 break; 3225 case NFSATTRBIT_ARCHIVE: 3226 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3227 if (!nd->nd_repstat) 3228 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3229 attrsum += NFSX_UNSIGNED; 3230 break; 3231 case NFSATTRBIT_HIDDEN: 3232 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); 3233 if (nd->nd_repstat == 0) { 3234 if (*tl == newnfs_true) 3235 nvap->na_flags |= UF_HIDDEN; 3236 } 3237 attrsum += NFSX_UNSIGNED; 3238 break; 3239 case NFSATTRBIT_MIMETYPE: 3240 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3241 i = fxdr_unsigned(int, *tl); 3242 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 3243 if (error) 3244 goto nfsmout; 3245 if (!nd->nd_repstat) 3246 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3247 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 3248 break; 3249 case NFSATTRBIT_MODE: 3250 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ 3251 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3252 if (!nd->nd_repstat) 3253 nvap->na_mode = nfstov_mode(*tl); 3254 attrsum += NFSX_UNSIGNED; 3255 break; 3256 case NFSATTRBIT_OWNER: 3257 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3258 j = fxdr_unsigned(int, *tl); 3259 if (j < 0) { 3260 error = NFSERR_BADXDR; 3261 goto nfsmout; 3262 } 3263 if (j > NFSV4_SMALLSTR) 3264 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3265 else 3266 cp = namestr; 3267 error = nfsrv_mtostr(nd, cp, j); 3268 if (error) { 3269 if (j > NFSV4_SMALLSTR) 3270 free(cp, M_NFSSTRING); 3271 goto nfsmout; 3272 } 3273 if (!nd->nd_repstat) { 3274 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, 3275 &uid); 3276 if (!nd->nd_repstat) 3277 nvap->na_uid = uid; 3278 } 3279 if (j > NFSV4_SMALLSTR) 3280 free(cp, M_NFSSTRING); 3281 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3282 break; 3283 case NFSATTRBIT_OWNERGROUP: 3284 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3285 j = fxdr_unsigned(int, *tl); 3286 if (j < 0) { 3287 error = NFSERR_BADXDR; 3288 goto nfsmout; 3289 } 3290 if (j > NFSV4_SMALLSTR) 3291 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3292 else 3293 cp = namestr; 3294 error = nfsrv_mtostr(nd, cp, j); 3295 if (error) { 3296 if (j > NFSV4_SMALLSTR) 3297 free(cp, M_NFSSTRING); 3298 goto nfsmout; 3299 } 3300 if (!nd->nd_repstat) { 3301 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, 3302 &gid); 3303 if (!nd->nd_repstat) 3304 nvap->na_gid = gid; 3305 } 3306 if (j > NFSV4_SMALLSTR) 3307 free(cp, M_NFSSTRING); 3308 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3309 break; 3310 case NFSATTRBIT_SYSTEM: 3311 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); 3312 if (nd->nd_repstat == 0) { 3313 if (*tl == newnfs_true) 3314 nvap->na_flags |= UF_SYSTEM; 3315 } 3316 attrsum += NFSX_UNSIGNED; 3317 break; 3318 case NFSATTRBIT_TIMEACCESSSET: 3319 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3320 attrsum += NFSX_UNSIGNED; 3321 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3322 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3323 if (!nd->nd_repstat) 3324 fxdr_nfsv4time(tl, &nvap->na_atime); 3325 toclient = 1; 3326 attrsum += NFSX_V4TIME; 3327 } else if (!nd->nd_repstat) { 3328 vfs_timestamp(&nvap->na_atime); 3329 nvap->na_vaflags |= VA_UTIMES_NULL; 3330 } 3331 break; 3332 case NFSATTRBIT_TIMEBACKUP: 3333 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3334 if (!nd->nd_repstat) 3335 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3336 attrsum += NFSX_V4TIME; 3337 break; 3338 case NFSATTRBIT_TIMECREATE: 3339 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3340 if (!nd->nd_repstat) 3341 fxdr_nfsv4time(tl, &nvap->na_btime); 3342 attrsum += NFSX_V4TIME; 3343 break; 3344 case NFSATTRBIT_TIMEMODIFYSET: 3345 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3346 attrsum += NFSX_UNSIGNED; 3347 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3348 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3349 if (!nd->nd_repstat) 3350 fxdr_nfsv4time(tl, &nvap->na_mtime); 3351 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3352 attrsum += NFSX_V4TIME; 3353 } else if (!nd->nd_repstat) { 3354 vfs_timestamp(&nvap->na_mtime); 3355 if (!toclient) 3356 nvap->na_vaflags |= VA_UTIMES_NULL; 3357 } 3358 break; 3359 case NFSATTRBIT_MODESETMASKED: 3360 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 3361 mode = fxdr_unsigned(u_short, *tl++); 3362 mask = fxdr_unsigned(u_short, *tl); 3363 /* 3364 * vp == NULL implies an Open/Create operation. 3365 * This attribute can only be used for Setattr and 3366 * only for NFSv4.1 or higher. 3367 * If moderet != 0, a mode attribute has also been 3368 * specified and this attribute cannot be done in the 3369 * same Setattr operation. 3370 */ 3371 if (!nd->nd_repstat) { 3372 if ((nd->nd_flag & ND_NFSV41) == 0) 3373 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3374 else if ((mode & ~07777) != 0 || 3375 (mask & ~07777) != 0 || vp == NULL) 3376 nd->nd_repstat = NFSERR_INVAL; 3377 else if (moderet == 0) 3378 moderet = VOP_GETATTR(vp, &va, 3379 nd->nd_cred); 3380 if (moderet == 0) 3381 nvap->na_mode = (mode & mask) | 3382 (va.va_mode & ~mask); 3383 else 3384 nd->nd_repstat = moderet; 3385 } 3386 attrsum += 2 * NFSX_UNSIGNED; 3387 break; 3388 case NFSATTRBIT_MODEUMASK: 3389 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 3390 mode = fxdr_unsigned(u_short, *tl++); 3391 mask = fxdr_unsigned(u_short, *tl); 3392 /* 3393 * If moderet != 0, mode has already been done. 3394 * If vp != NULL, this is not a file object creation. 3395 */ 3396 if (!nd->nd_repstat) { 3397 if ((nd->nd_flag & ND_NFSV42) == 0) 3398 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3399 else if ((mask & ~0777) != 0 || vp != NULL || 3400 moderet != 0) 3401 nd->nd_repstat = NFSERR_INVAL; 3402 else 3403 nvap->na_mode = (mode & ~mask); 3404 } 3405 attrsum += 2 * NFSX_UNSIGNED; 3406 break; 3407 default: 3408 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3409 /* 3410 * set bitpos so we drop out of the loop. 3411 */ 3412 bitpos = NFSATTRBIT_MAX; 3413 break; 3414 } 3415 } 3416 3417 /* 3418 * some clients pad the attrlist, so we need to skip over the 3419 * padding. This also skips over unparsed non-supported attributes. 3420 */ 3421 if (attrsum > attrsize) { 3422 error = NFSERR_BADXDR; 3423 } else { 3424 attrsize = NFSM_RNDUP(attrsize); 3425 if (attrsum < attrsize) 3426 error = nfsm_advance(nd, attrsize - attrsum, -1); 3427 } 3428 nfsmout: 3429 NFSEXITCODE2(error, nd); 3430 return (error); 3431 } 3432 3433 /* 3434 * Check/setup export credentials. 3435 */ 3436 int 3437 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 3438 struct ucred *credanon, bool testsec) 3439 { 3440 int error; 3441 3442 /* 3443 * Check/setup credentials. 3444 */ 3445 if (nd->nd_flag & ND_GSS) 3446 exp->nes_exflag &= ~MNT_EXPORTANON; 3447 3448 /* 3449 * Check to see if the operation is allowed for this security flavor. 3450 */ 3451 error = 0; 3452 if (testsec) { 3453 error = nfsvno_testexp(nd, exp); 3454 if (error != 0) 3455 goto out; 3456 } 3457 3458 /* 3459 * Check to see if the file system is exported V4 only. 3460 */ 3461 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 3462 error = NFSERR_PROGNOTV4; 3463 goto out; 3464 } 3465 3466 /* 3467 * Now, map the user credentials. 3468 * (Note that ND_AUTHNONE will only be set for an NFSv3 3469 * Fsinfo RPC. If set for anything else, this code might need 3470 * to change.) 3471 */ 3472 if (NFSVNO_EXPORTED(exp)) { 3473 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 3474 NFSVNO_EXPORTANON(exp) || 3475 (nd->nd_flag & ND_AUTHNONE) != 0) { 3476 nd->nd_cred->cr_uid = credanon->cr_uid; 3477 nd->nd_cred->cr_gid = credanon->cr_gid; 3478 /* 3479 * 'credanon' is already a 'struct ucred' that was built 3480 * internally with calls to crsetgroups_and_egid(), so 3481 * we don't need a fallback here. 3482 */ 3483 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 3484 credanon->cr_groups); 3485 } else if ((nd->nd_flag & ND_GSS) == 0) { 3486 /* 3487 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 3488 * if there is a replacement credential with a group 3489 * list set up by "nfsuserd -manage-gids". 3490 * If there is no replacement, nfsrv_getgrpscred() 3491 * simply returns its argument. 3492 */ 3493 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 3494 } 3495 } 3496 3497 out: 3498 NFSEXITCODE2(error, nd); 3499 return (error); 3500 } 3501 3502 /* 3503 * Check exports. 3504 */ 3505 int 3506 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 3507 struct ucred **credp) 3508 { 3509 int error; 3510 3511 error = 0; 3512 *credp = NULL; 3513 MNT_ILOCK(mp); 3514 if (mp->mnt_exjail == NULL || 3515 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) 3516 error = EACCES; 3517 MNT_IUNLOCK(mp); 3518 if (error == 0) 3519 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3520 &exp->nes_numsecflavor, exp->nes_secflavors); 3521 if (error) { 3522 if (NFSD_VNET(nfs_rootfhset)) { 3523 exp->nes_exflag = 0; 3524 exp->nes_numsecflavor = 0; 3525 error = 0; 3526 } 3527 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3528 MAXSECFLAVORS) { 3529 printf("nfsvno_checkexp: numsecflavors out of range\n"); 3530 exp->nes_numsecflavor = 0; 3531 error = EACCES; 3532 } 3533 NFSEXITCODE(error); 3534 return (error); 3535 } 3536 3537 /* 3538 * Get a vnode for a file handle and export stuff. 3539 */ 3540 int 3541 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 3542 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 3543 struct ucred **credp) 3544 { 3545 int error; 3546 3547 *credp = NULL; 3548 exp->nes_numsecflavor = 0; 3549 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 3550 if (error != 0) 3551 /* Make sure the server replies ESTALE to the client. */ 3552 error = ESTALE; 3553 if (nam && !error) { 3554 MNT_ILOCK(mp); 3555 if (mp->mnt_exjail == NULL || 3556 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) 3557 error = EACCES; 3558 MNT_IUNLOCK(mp); 3559 if (error == 0) 3560 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3561 &exp->nes_numsecflavor, exp->nes_secflavors); 3562 if (error) { 3563 if (NFSD_VNET(nfs_rootfhset)) { 3564 exp->nes_exflag = 0; 3565 exp->nes_numsecflavor = 0; 3566 error = 0; 3567 } else { 3568 vput(*vpp); 3569 } 3570 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3571 MAXSECFLAVORS) { 3572 printf("nfsvno_fhtovp: numsecflavors out of range\n"); 3573 exp->nes_numsecflavor = 0; 3574 error = EACCES; 3575 vput(*vpp); 3576 } 3577 } 3578 NFSEXITCODE(error); 3579 return (error); 3580 } 3581 3582 /* 3583 * nfsd_fhtovp() - convert a fh to a vnode ptr 3584 * - look up fsid in mount list (if not found ret error) 3585 * - get vp and export rights by calling nfsvno_fhtovp() 3586 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 3587 * for AUTH_SYS 3588 * - if mpp != NULL, return the mount point so that it can 3589 * be used for vn_finished_write() by the caller 3590 */ 3591 void 3592 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 3593 struct vnode **vpp, struct nfsexstuff *exp, 3594 struct mount **mpp, int startwrite, int nextop) 3595 { 3596 struct mount *mp, *mpw; 3597 struct ucred *credanon; 3598 fhandle_t *fhp; 3599 int error; 3600 3601 if (mpp != NULL) 3602 *mpp = NULL; 3603 *vpp = NULL; 3604 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3605 mp = vfs_busyfs(&fhp->fh_fsid); 3606 if (mp == NULL) { 3607 nd->nd_repstat = ESTALE; 3608 goto out; 3609 } 3610 3611 if (startwrite) { 3612 mpw = mp; 3613 error = vn_start_write(NULL, &mpw, V_WAIT); 3614 if (error != 0) { 3615 mpw = NULL; 3616 vfs_unbusy(mp); 3617 nd->nd_repstat = ESTALE; 3618 goto out; 3619 } 3620 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3621 lktype = LK_EXCLUSIVE; 3622 } else 3623 mpw = NULL; 3624 3625 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3626 &credanon); 3627 vfs_unbusy(mp); 3628 3629 if (nd->nd_repstat == 0 && 3630 nfp->nfsrvfh_len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH && 3631 nfp->nfsrvfh_len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) { 3632 if (nfp->nfsrvfh_len == NFSX_MYFH + NFSX_V4NAMEDDIRFH) 3633 vn_irflag_set_cond(*vpp, VIRF_NAMEDDIR); 3634 else 3635 vn_irflag_set_cond(*vpp, VIRF_NAMEDATTR); 3636 } 3637 3638 /* 3639 * For NFSv4 without a pseudo root fs, unexported file handles 3640 * can be returned, so that Lookup works everywhere. 3641 */ 3642 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3643 !(nd->nd_flag & ND_NFSV4)) { 3644 vput(*vpp); 3645 *vpp = NULL; 3646 nd->nd_repstat = EACCES; 3647 } 3648 3649 /* 3650 * Personally, I've never seen any point in requiring a 3651 * reserved port#, since only in the rare case where the 3652 * clients are all boxes with secure system privileges, 3653 * does it provide any enhanced security, but... some people 3654 * believe it to be useful and keep putting this code back in. 3655 * (There is also some "security checker" out there that 3656 * complains if the nfs server doesn't enforce this.) 3657 * However, note the following: 3658 * RFC3530 (NFSv4) specifies that a reserved port# not be 3659 * required. 3660 * RFC2623 recommends that, if a reserved port# is checked for, 3661 * that there be a way to turn that off--> ifdef'd. 3662 */ 3663 #ifdef NFS_REQRSVPORT 3664 if (!nd->nd_repstat) { 3665 struct sockaddr_in *saddr; 3666 struct sockaddr_in6 *saddr6; 3667 3668 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3669 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3670 if (!(nd->nd_flag & ND_NFSV4) && 3671 ((saddr->sin_family == AF_INET && 3672 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3673 (saddr6->sin6_family == AF_INET6 && 3674 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3675 vput(*vpp); 3676 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3677 } 3678 } 3679 #endif /* NFS_REQRSVPORT */ 3680 3681 /* 3682 * Check/setup credentials. 3683 */ 3684 if (!nd->nd_repstat) { 3685 nd->nd_saveduid = nd->nd_cred->cr_uid; 3686 nd->nd_repstat = nfsd_excred(nd, exp, credanon, 3687 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type)); 3688 if (nd->nd_repstat) 3689 vput(*vpp); 3690 } 3691 if (credanon != NULL) 3692 crfree(credanon); 3693 if (nd->nd_repstat) { 3694 vn_finished_write(mpw); 3695 *vpp = NULL; 3696 } else if (mpp != NULL) { 3697 *mpp = mpw; 3698 } 3699 3700 out: 3701 NFSEXITCODE2(0, nd); 3702 } 3703 3704 /* 3705 * glue for fp. 3706 */ 3707 static int 3708 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3709 { 3710 struct filedesc *fdp; 3711 struct file *fp; 3712 int error = 0; 3713 3714 fdp = p->td_proc->p_fd; 3715 if (fd < 0 || fd >= fdp->fd_nfiles || 3716 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3717 error = EBADF; 3718 goto out; 3719 } 3720 *fpp = fp; 3721 3722 out: 3723 NFSEXITCODE(error); 3724 return (error); 3725 } 3726 3727 /* 3728 * Called from nfssvc() to update the exports list. Just call 3729 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3730 * in the mount list. 3731 */ 3732 int 3733 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3734 { 3735 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3736 int error = 0; 3737 struct nameidata nd; 3738 fhandle_t fh; 3739 3740 error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export, false); 3741 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3742 NFSD_VNET(nfs_rootfhset) = 0; 3743 else if (error == 0) { 3744 if (nfsexargp->fspec == NULL) { 3745 error = EPERM; 3746 goto out; 3747 } 3748 /* 3749 * If fspec != NULL, this is the v4root path. 3750 */ 3751 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec); 3752 if ((error = namei(&nd)) != 0) 3753 goto out; 3754 NDFREE_PNBUF(&nd); 3755 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3756 vrele(nd.ni_vp); 3757 if (!error) { 3758 NFSD_VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH; 3759 NFSBCOPY((caddr_t)&fh, 3760 NFSD_VNET(nfs_rootfh).nfsrvfh_data, 3761 sizeof (fhandle_t)); 3762 NFSD_VNET(nfs_rootfhset) = 1; 3763 } 3764 } 3765 3766 out: 3767 NFSEXITCODE(error); 3768 return (error); 3769 } 3770 3771 /* 3772 * This function needs to test to see if the system is near its limit 3773 * for memory allocation via malloc() or mget() and return True iff 3774 * either of these resources are near their limit. 3775 * XXX (For now, this is just a stub.) 3776 */ 3777 int nfsrv_testmalloclimit = 0; 3778 int 3779 nfsrv_mallocmget_limit(void) 3780 { 3781 static int printmesg = 0; 3782 static int testval = 1; 3783 3784 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3785 if ((printmesg++ % 100) == 0) 3786 printf("nfsd: malloc/mget near limit\n"); 3787 return (1); 3788 } 3789 return (0); 3790 } 3791 3792 /* 3793 * BSD specific initialization of a mount point. 3794 */ 3795 void 3796 nfsd_mntinit(void) 3797 { 3798 3799 NFSD_LOCK(); 3800 if (NFSD_VNET(nfsrv_mntinited)) { 3801 NFSD_UNLOCK(); 3802 return; 3803 } 3804 NFSD_VNET(nfsrv_mntinited) = true; 3805 nfsrvd_init(0); 3806 NFSD_UNLOCK(); 3807 3808 NFSD_VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP, 3809 M_WAITOK | M_ZERO); 3810 NFSD_VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3811 mtx_init(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 3812 lockinit(&NFSD_VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0); 3813 TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelist); 3814 TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelist); 3815 NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL; 3816 TAILQ_INIT(&NFSD_VNET(nfsv4root_opt)); 3817 TAILQ_INIT(&NFSD_VNET(nfsv4root_newopt)); 3818 NFSD_VNET(nfsv4root_mnt)->mnt_opt = &NFSD_VNET(nfsv4root_opt); 3819 NFSD_VNET(nfsv4root_mnt)->mnt_optnew = &NFSD_VNET(nfsv4root_newopt); 3820 NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0; 3821 NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0; 3822 callout_init(&NFSD_VNET(nfsd_callout), 1); 3823 3824 nfsrvd_initcache(); 3825 nfsd_init(); 3826 } 3827 3828 static void 3829 nfsd_timer(void *arg) 3830 { 3831 struct vnet *vnetp; 3832 3833 vnetp = (struct vnet *)arg; 3834 NFSD_CURVNET_SET_QUIET(vnetp); 3835 nfsrv_servertimer(vnetp); 3836 callout_reset_sbt(&NFSD_VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer, 3837 arg, 0); 3838 NFSD_CURVNET_RESTORE(); 3839 } 3840 3841 /* 3842 * Get a vnode for a file handle, without checking exports, etc. 3843 */ 3844 struct vnode * 3845 nfsvno_getvp(fhandle_t *fhp) 3846 { 3847 struct mount *mp; 3848 struct vnode *vp; 3849 int error; 3850 3851 mp = vfs_busyfs(&fhp->fh_fsid); 3852 if (mp == NULL) 3853 return (NULL); 3854 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3855 vfs_unbusy(mp); 3856 if (error) 3857 return (NULL); 3858 return (vp); 3859 } 3860 3861 /* 3862 * Do a local VOP_ADVLOCK(). 3863 */ 3864 int 3865 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3866 u_int64_t end, struct thread *td) 3867 { 3868 int error = 0; 3869 struct flock fl; 3870 u_int64_t tlen; 3871 3872 if (nfsrv_dolocallocks == 0) 3873 goto out; 3874 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3875 3876 fl.l_whence = SEEK_SET; 3877 fl.l_type = ftype; 3878 fl.l_start = (off_t)first; 3879 if (end == NFS64BITSSET) { 3880 fl.l_len = 0; 3881 } else { 3882 tlen = end - first; 3883 fl.l_len = (off_t)tlen; 3884 } 3885 /* 3886 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3887 * values for all calls, so that all locks will be held by the 3888 * nfsd server. (The nfsd server handles conflicts between the 3889 * various clients.) 3890 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3891 * bytes, so it can't be put in l_sysid. 3892 */ 3893 if (nfsv4_sysid == 0) 3894 nfsv4_sysid = nlm_acquire_next_sysid(); 3895 fl.l_pid = (pid_t)0; 3896 fl.l_sysid = (int)nfsv4_sysid; 3897 3898 if (ftype == F_UNLCK) 3899 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3900 (F_POSIX | F_REMOTE)); 3901 else 3902 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3903 (F_POSIX | F_REMOTE)); 3904 3905 out: 3906 NFSEXITCODE(error); 3907 return (error); 3908 } 3909 3910 /* 3911 * Check the nfsv4 root exports. 3912 */ 3913 int 3914 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3915 { 3916 struct ucred *credanon; 3917 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; 3918 uint64_t exflags; 3919 3920 error = vfs_stdcheckexp(NFSD_VNET(nfsv4root_mnt), nd->nd_nam, &exflags, 3921 &credanon, &numsecflavor, secflavors); 3922 if (error) { 3923 error = NFSERR_PROGUNAVAIL; 3924 goto out; 3925 } 3926 if (credanon != NULL) 3927 crfree(credanon); 3928 for (i = 0; i < numsecflavor; i++) { 3929 if (secflavors[i] == AUTH_SYS) 3930 nd->nd_flag |= ND_EXAUTHSYS; 3931 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3932 nd->nd_flag |= ND_EXGSS; 3933 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3934 nd->nd_flag |= ND_EXGSSINTEGRITY; 3935 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3936 nd->nd_flag |= ND_EXGSSPRIVACY; 3937 } 3938 3939 /* And set ND_EXxx flags for TLS. */ 3940 if ((exflags & MNT_EXTLS) != 0) { 3941 nd->nd_flag |= ND_EXTLS; 3942 if ((exflags & MNT_EXTLSCERT) != 0) 3943 nd->nd_flag |= ND_EXTLSCERT; 3944 if ((exflags & MNT_EXTLSCERTUSER) != 0) 3945 nd->nd_flag |= ND_EXTLSCERTUSER; 3946 } 3947 3948 out: 3949 NFSEXITCODE(error); 3950 return (error); 3951 } 3952 3953 /* 3954 * Nfs server pseudo system call for the nfsd's 3955 */ 3956 /* 3957 * MPSAFE 3958 */ 3959 static int 3960 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3961 { 3962 struct file *fp; 3963 struct nfsd_addsock_args sockarg; 3964 struct nfsd_nfsd_args nfsdarg; 3965 struct nfsd_nfsd_oargs onfsdarg; 3966 struct nfsd_pnfsd_args pnfsdarg; 3967 struct vnode *vp, *nvp, *curdvp; 3968 struct pnfsdsfile *pf; 3969 struct nfsdevice *ds, *fds; 3970 cap_rights_t rights; 3971 int buflen, error, ret; 3972 char *buf, *cp, *cp2, *cp3; 3973 char fname[PNFS_FILENAME_LEN + 1]; 3974 3975 NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td)); 3976 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3977 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3978 if (error) 3979 goto out; 3980 /* 3981 * Since we don't know what rights might be required, 3982 * pretend that we need them all. It is better to be too 3983 * careful than too reckless. 3984 */ 3985 error = fget(td, sockarg.sock, 3986 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp); 3987 if (error != 0) 3988 goto out; 3989 if (fp->f_type != DTYPE_SOCKET) { 3990 fdrop(fp, td); 3991 error = EPERM; 3992 goto out; 3993 } 3994 error = nfsrvd_addsock(fp); 3995 fdrop(fp, td); 3996 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3997 if (uap->argp == NULL) { 3998 error = EINVAL; 3999 goto out; 4000 } 4001 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 4002 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 4003 if (error == 0) { 4004 nfsdarg.principal = onfsdarg.principal; 4005 nfsdarg.minthreads = onfsdarg.minthreads; 4006 nfsdarg.maxthreads = onfsdarg.maxthreads; 4007 nfsdarg.version = 1; 4008 nfsdarg.addr = NULL; 4009 nfsdarg.addrlen = 0; 4010 nfsdarg.dnshost = NULL; 4011 nfsdarg.dnshostlen = 0; 4012 nfsdarg.dspath = NULL; 4013 nfsdarg.dspathlen = 0; 4014 nfsdarg.mdspath = NULL; 4015 nfsdarg.mdspathlen = 0; 4016 nfsdarg.mirrorcnt = 1; 4017 } 4018 } else 4019 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 4020 if (error) 4021 goto out; 4022 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 4023 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 4024 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 4025 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 4026 nfsdarg.mirrorcnt >= 1 && 4027 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 4028 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 4029 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 4030 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 4031 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 4032 nfsdarg.dspathlen, nfsdarg.dnshostlen, 4033 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 4034 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 4035 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 4036 if (error != 0) { 4037 free(cp, M_TEMP); 4038 goto out; 4039 } 4040 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 4041 nfsdarg.addr = cp; 4042 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 4043 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 4044 if (error != 0) { 4045 free(nfsdarg.addr, M_TEMP); 4046 free(cp, M_TEMP); 4047 goto out; 4048 } 4049 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 4050 nfsdarg.dnshost = cp; 4051 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 4052 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 4053 if (error != 0) { 4054 free(nfsdarg.addr, M_TEMP); 4055 free(nfsdarg.dnshost, M_TEMP); 4056 free(cp, M_TEMP); 4057 goto out; 4058 } 4059 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 4060 nfsdarg.dspath = cp; 4061 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 4062 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 4063 if (error != 0) { 4064 free(nfsdarg.addr, M_TEMP); 4065 free(nfsdarg.dnshost, M_TEMP); 4066 free(nfsdarg.dspath, M_TEMP); 4067 free(cp, M_TEMP); 4068 goto out; 4069 } 4070 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 4071 nfsdarg.mdspath = cp; 4072 } else { 4073 nfsdarg.addr = NULL; 4074 nfsdarg.addrlen = 0; 4075 nfsdarg.dnshost = NULL; 4076 nfsdarg.dnshostlen = 0; 4077 nfsdarg.dspath = NULL; 4078 nfsdarg.dspathlen = 0; 4079 nfsdarg.mdspath = NULL; 4080 nfsdarg.mdspathlen = 0; 4081 nfsdarg.mirrorcnt = 1; 4082 } 4083 nfsd_timer(NFSD_TD_TO_VNET(td)); 4084 error = nfsrvd_nfsd(td, &nfsdarg); 4085 callout_drain(&NFSD_VNET(nfsd_callout)); 4086 free(nfsdarg.addr, M_TEMP); 4087 free(nfsdarg.dnshost, M_TEMP); 4088 free(nfsdarg.dspath, M_TEMP); 4089 free(nfsdarg.mdspath, M_TEMP); 4090 } else if (uap->flag & NFSSVC_PNFSDS) { 4091 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 4092 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 4093 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 4094 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4095 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 4096 NULL); 4097 if (error == 0) 4098 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 4099 free(cp, M_TEMP); 4100 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 4101 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4102 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 4103 buf = malloc(buflen, M_TEMP, M_WAITOK); 4104 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 4105 NULL); 4106 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 4107 if (error == 0 && pnfsdarg.dspath != NULL) { 4108 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4109 error = copyinstr(pnfsdarg.dspath, cp2, 4110 PATH_MAX + 1, NULL); 4111 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 4112 error); 4113 } else 4114 cp2 = NULL; 4115 if (error == 0 && pnfsdarg.curdspath != NULL) { 4116 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4117 error = copyinstr(pnfsdarg.curdspath, cp3, 4118 PATH_MAX + 1, NULL); 4119 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 4120 error); 4121 } else 4122 cp3 = NULL; 4123 curdvp = NULL; 4124 fds = NULL; 4125 if (error == 0) 4126 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 4127 &buflen, fname, td, &vp, &nvp, &pf, &ds, 4128 &fds); 4129 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 4130 if (error == 0) { 4131 if (pf->dsf_dir >= nfsrv_dsdirsize) { 4132 printf("copymr: dsdir out of range\n"); 4133 pf->dsf_dir = 0; 4134 } 4135 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 4136 error = nfsrv_copymr(vp, nvp, 4137 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 4138 (struct pnfsdsfile *)buf, 4139 buflen / sizeof(*pf), td->td_ucred, td); 4140 vput(vp); 4141 vput(nvp); 4142 if (fds != NULL && error == 0) { 4143 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 4144 ret = vn_lock(curdvp, LK_EXCLUSIVE); 4145 if (ret == 0) { 4146 nfsrv_dsremove(curdvp, fname, 4147 td->td_ucred, td); 4148 NFSVOPUNLOCK(curdvp); 4149 } 4150 } 4151 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 4152 } 4153 free(cp, M_TEMP); 4154 free(cp2, M_TEMP); 4155 free(cp3, M_TEMP); 4156 free(buf, M_TEMP); 4157 } 4158 } else { 4159 error = nfssvc_srvcall(td, uap, td->td_ucred); 4160 } 4161 4162 out: 4163 NFSD_CURVNET_RESTORE(); 4164 NFSEXITCODE(error); 4165 return (error); 4166 } 4167 4168 static int 4169 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 4170 { 4171 struct nfsex_args export; 4172 struct nfsex_oldargs oexp; 4173 struct file *fp = NULL; 4174 int stablefd, i, len; 4175 struct nfsd_clid adminrevoke; 4176 struct nfsd_dumplist dumplist; 4177 struct nfsd_dumpclients *dumpclients; 4178 struct nfsd_dumplocklist dumplocklist; 4179 struct nfsd_dumplocks *dumplocks; 4180 struct nameidata nd; 4181 vnode_t vp; 4182 int error = EINVAL, igotlock; 4183 struct proc *procp; 4184 gid_t *grps; 4185 4186 if (uap->flag & NFSSVC_PUBLICFH) { 4187 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 4188 sizeof (fhandle_t)); 4189 error = copyin(uap->argp, 4190 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 4191 if (!error) 4192 nfs_pubfhset = 1; 4193 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 4194 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { 4195 error = copyin(uap->argp,(caddr_t)&export, 4196 sizeof (struct nfsex_args)); 4197 if (!error) { 4198 grps = NULL; 4199 if (export.export.ex_ngroups > NGROUPS_MAX || 4200 export.export.ex_ngroups < 0) 4201 error = EINVAL; 4202 else if (export.export.ex_ngroups > 0) { 4203 grps = malloc(export.export.ex_ngroups * 4204 sizeof(gid_t), M_TEMP, M_WAITOK); 4205 error = copyin(export.export.ex_groups, grps, 4206 export.export.ex_ngroups * sizeof(gid_t)); 4207 export.export.ex_groups = grps; 4208 } else 4209 export.export.ex_groups = NULL; 4210 if (!error) 4211 error = nfsrv_v4rootexport(&export, cred, p); 4212 free(grps, M_TEMP); 4213 } 4214 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 4215 NFSSVC_V4ROOTEXPORT) { 4216 error = copyin(uap->argp,(caddr_t)&oexp, 4217 sizeof (struct nfsex_oldargs)); 4218 if (!error) { 4219 memset(&export.export, 0, sizeof(export.export)); 4220 export.export.ex_flags = (uint64_t)oexp.export.ex_flags; 4221 export.export.ex_root = oexp.export.ex_root; 4222 export.export.ex_uid = oexp.export.ex_anon.cr_uid; 4223 export.export.ex_ngroups = 4224 oexp.export.ex_anon.cr_ngroups; 4225 export.export.ex_groups = NULL; 4226 if (export.export.ex_ngroups > XU_NGROUPS || 4227 export.export.ex_ngroups < 0) 4228 error = EINVAL; 4229 else if (export.export.ex_ngroups > 0) { 4230 export.export.ex_groups = malloc( 4231 export.export.ex_ngroups * sizeof(gid_t), 4232 M_TEMP, M_WAITOK); 4233 for (i = 0; i < export.export.ex_ngroups; i++) 4234 export.export.ex_groups[i] = 4235 oexp.export.ex_anon.cr_groups[i]; 4236 } 4237 export.export.ex_addr = oexp.export.ex_addr; 4238 export.export.ex_addrlen = oexp.export.ex_addrlen; 4239 export.export.ex_mask = oexp.export.ex_mask; 4240 export.export.ex_masklen = oexp.export.ex_masklen; 4241 export.export.ex_indexfile = oexp.export.ex_indexfile; 4242 export.export.ex_numsecflavors = 4243 oexp.export.ex_numsecflavors; 4244 if (export.export.ex_numsecflavors >= MAXSECFLAVORS || 4245 export.export.ex_numsecflavors < 0) 4246 error = EINVAL; 4247 else { 4248 for (i = 0; i < export.export.ex_numsecflavors; 4249 i++) 4250 export.export.ex_secflavors[i] = 4251 oexp.export.ex_secflavors[i]; 4252 } 4253 export.fspec = oexp.fspec; 4254 if (error == 0) 4255 error = nfsrv_v4rootexport(&export, cred, p); 4256 free(export.export.ex_groups, M_TEMP); 4257 } 4258 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 4259 nfs_pubfhset = 0; 4260 error = 0; 4261 } else if (uap->flag & NFSSVC_STABLERESTART) { 4262 error = copyin(uap->argp, (caddr_t)&stablefd, 4263 sizeof (int)); 4264 if (!error) 4265 error = fp_getfvp(p, stablefd, &fp, &vp); 4266 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 4267 error = EBADF; 4268 if (!error && NFSD_VNET(nfsrv_numnfsd) != 0) 4269 error = ENXIO; 4270 if (!error) { 4271 NFSD_VNET(nfsrv_stablefirst).nsf_fp = fp; 4272 nfsrv_setupstable(p); 4273 } 4274 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 4275 error = copyin(uap->argp, (caddr_t)&adminrevoke, 4276 sizeof (struct nfsd_clid)); 4277 if (!error) 4278 error = nfsrv_adminrevoke(&adminrevoke, p); 4279 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 4280 error = copyin(uap->argp, (caddr_t)&dumplist, 4281 sizeof (struct nfsd_dumplist)); 4282 if (!error && (dumplist.ndl_size < 1 || 4283 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 4284 error = EPERM; 4285 if (!error) { 4286 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 4287 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 4288 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 4289 error = copyout(dumpclients, dumplist.ndl_list, len); 4290 free(dumpclients, M_TEMP); 4291 } 4292 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 4293 error = copyin(uap->argp, (caddr_t)&dumplocklist, 4294 sizeof (struct nfsd_dumplocklist)); 4295 if (!error && (dumplocklist.ndllck_size < 1 || 4296 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 4297 error = EPERM; 4298 if (!error) 4299 error = nfsrv_lookupfilename(&nd, 4300 dumplocklist.ndllck_fname, p); 4301 if (!error) { 4302 len = sizeof (struct nfsd_dumplocks) * 4303 dumplocklist.ndllck_size; 4304 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 4305 nfsrv_dumplocks(nd.ni_vp, dumplocks, 4306 dumplocklist.ndllck_size, p); 4307 vput(nd.ni_vp); 4308 error = copyout(dumplocks, dumplocklist.ndllck_list, 4309 len); 4310 free(dumplocks, M_TEMP); 4311 } 4312 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 4313 procp = p->td_proc; 4314 PROC_LOCK(procp); 4315 nfsd_master_pid = procp->p_pid; 4316 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 4317 nfsd_master_start = procp->p_stats->p_start; 4318 NFSD_VNET(nfsd_master_proc) = procp; 4319 PROC_UNLOCK(procp); 4320 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 4321 NFSLOCKV4ROOTMUTEX(); 4322 if (!NFSD_VNET(nfsrv_suspend_nfsd)) { 4323 /* Lock out all nfsd threads */ 4324 do { 4325 igotlock = nfsv4_lock( 4326 &NFSD_VNET(nfsd_suspend_lock), 1, NULL, 4327 NFSV4ROOTLOCKMUTEXPTR, NULL); 4328 } while (igotlock == 0 && 4329 !NFSD_VNET(nfsrv_suspend_nfsd)); 4330 NFSD_VNET(nfsrv_suspend_nfsd) = true; 4331 } 4332 NFSUNLOCKV4ROOTMUTEX(); 4333 error = 0; 4334 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 4335 NFSLOCKV4ROOTMUTEX(); 4336 if (NFSD_VNET(nfsrv_suspend_nfsd)) { 4337 nfsv4_unlock(&NFSD_VNET(nfsd_suspend_lock), 0); 4338 NFSD_VNET(nfsrv_suspend_nfsd) = false; 4339 } 4340 NFSUNLOCKV4ROOTMUTEX(); 4341 error = 0; 4342 } 4343 4344 NFSEXITCODE(error); 4345 return (error); 4346 } 4347 4348 /* 4349 * Check exports. 4350 * Returns 0 if ok, 1 otherwise. 4351 */ 4352 int 4353 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 4354 { 4355 int i; 4356 4357 if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || 4358 (NFSVNO_EXTLSCERT(exp) && 4359 (nd->nd_flag & ND_TLSCERT) == 0) || 4360 (NFSVNO_EXTLSCERTUSER(exp) && 4361 (nd->nd_flag & ND_TLSCERTUSER) == 0)) { 4362 if ((nd->nd_flag & ND_NFSV4) != 0) 4363 return (NFSERR_WRONGSEC); 4364 #ifdef notnow 4365 /* There is currently no auth_stat for this. */ 4366 else if ((nd->nd_flag & ND_TLS) == 0) 4367 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS); 4368 else 4369 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST); 4370 #endif 4371 else 4372 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 4373 } 4374 4375 /* 4376 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use 4377 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 4378 */ 4379 if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO) 4380 return (0); 4381 4382 /* 4383 * This seems odd, but allow the case where the security flavor 4384 * list is empty. This happens when NFSv4 is traversing non-exported 4385 * file systems. Exported file systems should always have a non-empty 4386 * security flavor list. 4387 */ 4388 if (exp->nes_numsecflavor == 0) 4389 return (0); 4390 4391 for (i = 0; i < exp->nes_numsecflavor; i++) { 4392 /* 4393 * The tests for privacy and integrity must be first, 4394 * since ND_GSS is set for everything but AUTH_SYS. 4395 */ 4396 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 4397 (nd->nd_flag & ND_GSSPRIVACY)) 4398 return (0); 4399 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 4400 (nd->nd_flag & ND_GSSINTEGRITY)) 4401 return (0); 4402 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 4403 (nd->nd_flag & ND_GSS)) 4404 return (0); 4405 if (exp->nes_secflavors[i] == AUTH_SYS && 4406 (nd->nd_flag & ND_GSS) == 0) 4407 return (0); 4408 } 4409 if ((nd->nd_flag & ND_NFSV4) != 0) 4410 return (NFSERR_WRONGSEC); 4411 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 4412 } 4413 4414 /* 4415 * Calculate a hash value for the fid in a file handle. 4416 */ 4417 uint32_t 4418 nfsrv_hashfh(fhandle_t *fhp) 4419 { 4420 uint32_t hashval; 4421 4422 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 4423 return (hashval); 4424 } 4425 4426 /* 4427 * Calculate a hash value for the sessionid. 4428 */ 4429 uint32_t 4430 nfsrv_hashsessionid(uint8_t *sessionid) 4431 { 4432 uint32_t hashval; 4433 4434 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 4435 return (hashval); 4436 } 4437 4438 /* 4439 * Signal the userland master nfsd to backup the stable restart file. 4440 */ 4441 void 4442 nfsrv_backupstable(void) 4443 { 4444 struct proc *procp; 4445 4446 if (NFSD_VNET(nfsd_master_proc) != NULL) { 4447 procp = pfind(nfsd_master_pid); 4448 /* Try to make sure it is the correct process. */ 4449 if (procp == NFSD_VNET(nfsd_master_proc) && 4450 procp->p_stats->p_start.tv_sec == 4451 nfsd_master_start.tv_sec && 4452 procp->p_stats->p_start.tv_usec == 4453 nfsd_master_start.tv_usec && 4454 strcmp(procp->p_comm, nfsd_master_comm) == 0) 4455 kern_psignal(procp, SIGUSR2); 4456 else 4457 NFSD_VNET(nfsd_master_proc) = NULL; 4458 4459 if (procp != NULL) 4460 PROC_UNLOCK(procp); 4461 } 4462 } 4463 4464 /* 4465 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 4466 * The arguments are in a structure, so that they can be passed through 4467 * taskqueue for a kernel process to execute this function. 4468 */ 4469 struct nfsrvdscreate { 4470 int done; 4471 int inprog; 4472 struct task tsk; 4473 struct ucred *tcred; 4474 struct vnode *dvp; 4475 NFSPROC_T *p; 4476 struct pnfsdsfile *pf; 4477 int err; 4478 fhandle_t fh; 4479 struct vattr va; 4480 struct vattr createva; 4481 }; 4482 4483 int 4484 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 4485 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 4486 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 4487 { 4488 struct vnode *nvp; 4489 struct nameidata named; 4490 struct vattr va; 4491 char *bufp; 4492 u_long *hashp; 4493 struct nfsnode *np; 4494 struct nfsmount *nmp; 4495 int error; 4496 4497 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 4498 LOCKPARENT | LOCKLEAF | NOCACHE); 4499 nfsvno_setpathbuf(&named, &bufp, &hashp); 4500 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 4501 named.ni_cnd.cn_nameptr = bufp; 4502 if (fnamep != NULL) { 4503 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 4504 named.ni_cnd.cn_namelen = strlen(bufp); 4505 } else 4506 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 4507 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 4508 4509 /* Create the date file in the DS mount. */ 4510 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4511 if (error == 0) { 4512 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 4513 vref(dvp); 4514 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false); 4515 if (error == 0) { 4516 /* Set the ownership of the file. */ 4517 error = VOP_SETATTR(nvp, nvap, tcred); 4518 NFSD_DEBUG(4, "nfsrv_dscreate:" 4519 " setattr-uid=%d\n", error); 4520 if (error != 0) 4521 vput(nvp); 4522 } 4523 if (error != 0) 4524 printf("pNFS: pnfscreate failed=%d\n", error); 4525 } else 4526 printf("pNFS: pnfscreate vnlock=%d\n", error); 4527 if (error == 0) { 4528 np = VTONFS(nvp); 4529 nmp = VFSTONFS(nvp->v_mount); 4530 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 4531 != 0 || nmp->nm_nam->sa_len > sizeof( 4532 struct sockaddr_in6) || 4533 np->n_fhp->nfh_len != NFSX_MYFH) { 4534 printf("Bad DS file: fstype=%s salen=%d" 4535 " fhlen=%d\n", 4536 nvp->v_mount->mnt_vfc->vfc_name, 4537 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 4538 error = ENOENT; 4539 } 4540 4541 /* Set extattrs for the DS on the MDS file. */ 4542 if (error == 0) { 4543 if (dsa != NULL) { 4544 error = VOP_GETATTR(nvp, &va, tcred); 4545 if (error == 0) { 4546 dsa->dsa_filerev = va.va_filerev; 4547 dsa->dsa_size = va.va_size; 4548 dsa->dsa_atime = va.va_atime; 4549 dsa->dsa_mtime = va.va_mtime; 4550 dsa->dsa_bytes = va.va_bytes; 4551 } 4552 } 4553 if (error == 0) { 4554 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 4555 NFSX_MYFH); 4556 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 4557 nmp->nm_nam->sa_len); 4558 NFSBCOPY(named.ni_cnd.cn_nameptr, 4559 pf->dsf_filename, 4560 sizeof(pf->dsf_filename)); 4561 } 4562 } else 4563 printf("pNFS: pnfscreate can't get DS" 4564 " attr=%d\n", error); 4565 if (nvpp != NULL && error == 0) 4566 *nvpp = nvp; 4567 else 4568 vput(nvp); 4569 } 4570 nfsvno_relpathbuf(&named); 4571 return (error); 4572 } 4573 4574 /* 4575 * Start up the thread that will execute nfsrv_dscreate(). 4576 */ 4577 static void 4578 start_dscreate(void *arg, int pending) 4579 { 4580 struct nfsrvdscreate *dsc; 4581 4582 dsc = (struct nfsrvdscreate *)arg; 4583 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 4584 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 4585 dsc->done = 1; 4586 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 4587 } 4588 4589 /* 4590 * Create a pNFS data file on the Data Server(s). 4591 */ 4592 static void 4593 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 4594 NFSPROC_T *p) 4595 { 4596 struct nfsrvdscreate *dsc, *tdsc = NULL; 4597 struct nfsdevice *ds, *tds, *fds; 4598 struct mount *mp; 4599 struct pnfsdsfile *pf, *tpf; 4600 struct pnfsdsattr dsattr; 4601 struct vattr va; 4602 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4603 struct nfsmount *nmp; 4604 fhandle_t fh; 4605 uid_t vauid; 4606 gid_t vagid; 4607 u_short vamode; 4608 struct ucred *tcred; 4609 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 4610 int failpos, timo; 4611 4612 /* Get a DS server directory in a round-robin order. */ 4613 mirrorcnt = 1; 4614 mp = vp->v_mount; 4615 ds = fds = NULL; 4616 NFSDDSLOCK(); 4617 /* 4618 * Search for the first entry that handles this MDS fs, but use the 4619 * first entry for all MDS fs's otherwise. 4620 */ 4621 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 4622 if (tds->nfsdev_nmp != NULL) { 4623 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 4624 ds = tds; 4625 else if (tds->nfsdev_mdsisset != 0 && fsidcmp( 4626 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { 4627 ds = fds = tds; 4628 break; 4629 } 4630 } 4631 } 4632 if (ds == NULL) { 4633 NFSDDSUNLOCK(); 4634 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 4635 return; 4636 } 4637 i = dsdir[0] = ds->nfsdev_nextdir; 4638 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 4639 dvp[0] = ds->nfsdev_dsdir[i]; 4640 tds = TAILQ_NEXT(ds, nfsdev_list); 4641 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { 4642 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 4643 if (tds->nfsdev_nmp != NULL && 4644 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 4645 (tds->nfsdev_mdsisset != 0 && fds != NULL && 4646 fsidcmp(&mp->mnt_stat.f_fsid, 4647 &tds->nfsdev_mdsfsid) == 0))) { 4648 dsdir[mirrorcnt] = i; 4649 dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; 4650 mirrorcnt++; 4651 if (mirrorcnt >= nfsrv_maxpnfsmirror) 4652 break; 4653 } 4654 } 4655 } 4656 /* Put at end of list to implement round-robin usage. */ 4657 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 4658 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 4659 NFSDDSUNLOCK(); 4660 dsc = NULL; 4661 if (mirrorcnt > 1) 4662 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 4663 M_WAITOK | M_ZERO); 4664 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 4665 M_ZERO); 4666 4667 error = nfsvno_getfh(vp, &fh, p); 4668 if (error == 0) 4669 error = VOP_GETATTR(vp, &va, cred); 4670 if (error == 0) { 4671 /* Set the attributes for "vp" to Setattr the DS vp. */ 4672 vauid = va.va_uid; 4673 vagid = va.va_gid; 4674 vamode = va.va_mode; 4675 VATTR_NULL(&va); 4676 va.va_uid = vauid; 4677 va.va_gid = vagid; 4678 va.va_mode = vamode; 4679 va.va_size = 0; 4680 } else 4681 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 4682 4683 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 4684 cred->cr_gid); 4685 /* Make data file name based on FH. */ 4686 tcred = newnfs_getcred(); 4687 4688 /* 4689 * Create the file on each DS mirror, using kernel process(es) for the 4690 * additional mirrors. 4691 */ 4692 failpos = -1; 4693 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 4694 tpf->dsf_dir = dsdir[i]; 4695 tdsc->tcred = tcred; 4696 tdsc->p = p; 4697 tdsc->pf = tpf; 4698 tdsc->createva = *vap; 4699 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 4700 tdsc->va = va; 4701 tdsc->dvp = dvp[i]; 4702 tdsc->done = 0; 4703 tdsc->inprog = 0; 4704 tdsc->err = 0; 4705 ret = EIO; 4706 if (nfs_pnfsiothreads != 0) { 4707 ret = nfs_pnfsio(start_dscreate, tdsc); 4708 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 4709 } 4710 if (ret != 0) { 4711 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 4712 NULL, tcred, p, NULL); 4713 if (ret != 0) { 4714 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 4715 error)); 4716 if (failpos == -1 && nfsds_failerr(ret)) 4717 failpos = i; 4718 else 4719 error = ret; 4720 } 4721 } 4722 } 4723 if (error == 0) { 4724 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 4725 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 4726 &dsattr, NULL, tcred, p, NULL); 4727 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 4728 failpos = mirrorcnt - 1; 4729 error = 0; 4730 } 4731 } 4732 timo = hz / 50; /* Wait for 20msec. */ 4733 if (timo < 1) 4734 timo = 1; 4735 /* Wait for kernel task(s) to complete. */ 4736 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 4737 while (tdsc->inprog != 0 && tdsc->done == 0) 4738 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 4739 if (tdsc->err != 0) { 4740 if (failpos == -1 && nfsds_failerr(tdsc->err)) 4741 failpos = i; 4742 else if (error == 0) 4743 error = tdsc->err; 4744 } 4745 } 4746 4747 /* 4748 * If failpos has been set, that mirror has failed, so it needs 4749 * to be disabled. 4750 */ 4751 if (failpos >= 0) { 4752 nmp = VFSTONFS(dvp[failpos]->v_mount); 4753 NFSLOCKMNT(nmp); 4754 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4755 NFSMNTP_CANCELRPCS)) == 0) { 4756 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4757 NFSUNLOCKMNT(nmp); 4758 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4759 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 4760 ds); 4761 if (ds != NULL) 4762 nfsrv_killrpcs(nmp); 4763 NFSLOCKMNT(nmp); 4764 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4765 wakeup(nmp); 4766 } 4767 NFSUNLOCKMNT(nmp); 4768 } 4769 4770 NFSFREECRED(tcred); 4771 if (error == 0) { 4772 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 4773 4774 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 4775 mirrorcnt, nfsrv_maxpnfsmirror); 4776 /* 4777 * For all mirrors that couldn't be created, fill in the 4778 * *pf structure, but with an IP address == 0.0.0.0. 4779 */ 4780 tpf = pf + mirrorcnt; 4781 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4782 *tpf = *pf; 4783 tpf->dsf_sin.sin_family = AF_INET; 4784 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4785 tpf->dsf_sin.sin_addr.s_addr = 0; 4786 tpf->dsf_sin.sin_port = 0; 4787 } 4788 4789 error = vn_extattr_set(vp, IO_NODELOCKED, 4790 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4791 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4792 if (error == 0) 4793 error = vn_extattr_set(vp, IO_NODELOCKED, 4794 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4795 sizeof(dsattr), (char *)&dsattr, p); 4796 if (error != 0) 4797 printf("pNFS: pnfscreate setextattr=%d\n", 4798 error); 4799 } else 4800 printf("pNFS: pnfscreate=%d\n", error); 4801 free(pf, M_TEMP); 4802 free(dsc, M_TEMP); 4803 } 4804 4805 /* 4806 * Get the information needed to remove the pNFS Data Server file from the 4807 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4808 * DS directory vnode. The caller must unlock *ddvp when done with it. 4809 */ 4810 static void 4811 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4812 int *mirrorcntp, char *fname, fhandle_t *fhp) 4813 { 4814 struct vattr va; 4815 struct ucred *tcred; 4816 char *buf; 4817 int buflen, error; 4818 4819 dvpp[0] = NULL; 4820 /* If not an exported regular file or not a pNFS server, just return. */ 4821 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4822 nfsrv_devidcnt == 0) 4823 return; 4824 4825 /* Check to see if this is the last hard link. */ 4826 tcred = newnfs_getcred(); 4827 error = VOP_GETATTR(vp, &va, tcred); 4828 NFSFREECRED(tcred); 4829 if (error != 0) { 4830 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4831 return; 4832 } 4833 if (va.va_nlink > 1) 4834 return; 4835 4836 error = nfsvno_getfh(vp, fhp, p); 4837 if (error != 0) { 4838 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4839 return; 4840 } 4841 4842 buflen = 1024; 4843 buf = malloc(buflen, M_TEMP, M_WAITOK); 4844 /* Get the directory vnode for the DS mount and the file handle. */ 4845 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4846 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4847 free(buf, M_TEMP); 4848 if (error != 0) 4849 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4850 } 4851 4852 /* 4853 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4854 * The arguments are in a structure, so that they can be passed through 4855 * taskqueue for a kernel process to execute this function. 4856 */ 4857 struct nfsrvdsremove { 4858 int done; 4859 int inprog; 4860 struct task tsk; 4861 struct ucred *tcred; 4862 struct vnode *dvp; 4863 NFSPROC_T *p; 4864 int err; 4865 char fname[PNFS_FILENAME_LEN + 1]; 4866 }; 4867 4868 static int 4869 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4870 NFSPROC_T *p) 4871 { 4872 struct nameidata named; 4873 struct vnode *nvp; 4874 char *bufp; 4875 u_long *hashp; 4876 int error; 4877 4878 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4879 if (error != 0) 4880 return (error); 4881 named.ni_cnd.cn_nameiop = DELETE; 4882 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4883 named.ni_cnd.cn_cred = tcred; 4884 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 4885 nfsvno_setpathbuf(&named, &bufp, &hashp); 4886 named.ni_cnd.cn_nameptr = bufp; 4887 named.ni_cnd.cn_namelen = strlen(fname); 4888 strlcpy(bufp, fname, NAME_MAX); 4889 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4890 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4891 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4892 if (error == 0) { 4893 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4894 vput(nvp); 4895 } 4896 NFSVOPUNLOCK(dvp); 4897 nfsvno_relpathbuf(&named); 4898 if (error != 0) 4899 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4900 return (error); 4901 } 4902 4903 /* 4904 * Start up the thread that will execute nfsrv_dsremove(). 4905 */ 4906 static void 4907 start_dsremove(void *arg, int pending) 4908 { 4909 struct nfsrvdsremove *dsrm; 4910 4911 dsrm = (struct nfsrvdsremove *)arg; 4912 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4913 dsrm->p); 4914 dsrm->done = 1; 4915 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4916 } 4917 4918 /* 4919 * Remove a pNFS data file from a Data Server. 4920 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4921 * removed to set up the dvp and fill in the FH. 4922 */ 4923 static void 4924 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4925 NFSPROC_T *p) 4926 { 4927 struct ucred *tcred; 4928 struct nfsrvdsremove *dsrm, *tdsrm; 4929 struct nfsdevice *ds; 4930 struct nfsmount *nmp; 4931 int failpos, i, ret, timo; 4932 4933 tcred = newnfs_getcred(); 4934 dsrm = NULL; 4935 if (mirrorcnt > 1) 4936 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4937 /* 4938 * Remove the file on each DS mirror, using kernel process(es) for the 4939 * additional mirrors. 4940 */ 4941 failpos = -1; 4942 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4943 tdsrm->tcred = tcred; 4944 tdsrm->p = p; 4945 tdsrm->dvp = dvp[i]; 4946 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4947 tdsrm->inprog = 0; 4948 tdsrm->done = 0; 4949 tdsrm->err = 0; 4950 ret = EIO; 4951 if (nfs_pnfsiothreads != 0) { 4952 ret = nfs_pnfsio(start_dsremove, tdsrm); 4953 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4954 } 4955 if (ret != 0) { 4956 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4957 if (failpos == -1 && nfsds_failerr(ret)) 4958 failpos = i; 4959 } 4960 } 4961 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4962 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4963 failpos = mirrorcnt - 1; 4964 timo = hz / 50; /* Wait for 20msec. */ 4965 if (timo < 1) 4966 timo = 1; 4967 /* Wait for kernel task(s) to complete. */ 4968 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4969 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4970 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4971 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4972 failpos = i; 4973 } 4974 4975 /* 4976 * If failpos has been set, that mirror has failed, so it needs 4977 * to be disabled. 4978 */ 4979 if (failpos >= 0) { 4980 nmp = VFSTONFS(dvp[failpos]->v_mount); 4981 NFSLOCKMNT(nmp); 4982 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4983 NFSMNTP_CANCELRPCS)) == 0) { 4984 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4985 NFSUNLOCKMNT(nmp); 4986 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4987 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4988 ds); 4989 if (ds != NULL) 4990 nfsrv_killrpcs(nmp); 4991 NFSLOCKMNT(nmp); 4992 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4993 wakeup(nmp); 4994 } 4995 NFSUNLOCKMNT(nmp); 4996 } 4997 4998 /* Get rid all layouts for the file. */ 4999 nfsrv_freefilelayouts(fhp); 5000 5001 NFSFREECRED(tcred); 5002 free(dsrm, M_TEMP); 5003 } 5004 5005 /* 5006 * Generate a file name based on the file handle and put it in *bufp. 5007 * Return the number of bytes generated. 5008 */ 5009 static int 5010 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 5011 { 5012 int i; 5013 uint8_t *cp; 5014 const uint8_t *hexdigits = "0123456789abcdef"; 5015 5016 cp = (uint8_t *)fhp; 5017 for (i = 0; i < sizeof(*fhp); i++) { 5018 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 5019 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 5020 } 5021 bufp[2 * i] = '\0'; 5022 return (2 * i); 5023 } 5024 5025 /* 5026 * Update the Metadata file's attributes from the DS file when a Read/Write 5027 * layout is returned. 5028 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 5029 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 5030 */ 5031 int 5032 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5033 { 5034 struct ucred *tcred; 5035 int error; 5036 5037 /* Do this as root so that it won't fail with EACCES. */ 5038 tcred = newnfs_getcred(); 5039 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 5040 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); 5041 NFSFREECRED(tcred); 5042 return (error); 5043 } 5044 5045 /* 5046 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 5047 */ 5048 static int 5049 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 5050 NFSPROC_T *p) 5051 { 5052 int error; 5053 5054 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, 5055 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); 5056 return (error); 5057 } 5058 5059 static int 5060 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 5061 struct thread *p, int ioproc, struct mbuf **mpp, char *cp, 5062 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, 5063 off_t *offp, int content, bool *eofp) 5064 { 5065 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 5066 fhandle_t fh[NFSDEV_MAXMIRRORS]; 5067 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 5068 struct nfsdevice *ds; 5069 struct pnfsdsattr dsattr; 5070 struct opnfsdsattr odsattr; 5071 char *buf; 5072 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 5073 5074 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 5075 /* 5076 * If not a regular file, not exported or not a pNFS server, 5077 * just return ENOENT. 5078 */ 5079 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 5080 nfsrv_devidcnt == 0) 5081 return (ENOENT); 5082 5083 buflen = 1024; 5084 buf = malloc(buflen, M_TEMP, M_WAITOK); 5085 error = 0; 5086 5087 /* 5088 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 5089 * from the MetaData file's extended attribute. 5090 */ 5091 if (ioproc == NFSPROC_GETATTR) { 5092 error = vn_extattr_get(vp, IO_NODELOCKED, 5093 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 5094 p); 5095 if (error == 0) { 5096 if (buflen == sizeof(odsattr)) { 5097 NFSBCOPY(buf, &odsattr, buflen); 5098 nap->na_filerev = odsattr.dsa_filerev; 5099 nap->na_size = odsattr.dsa_size; 5100 nap->na_atime = odsattr.dsa_atime; 5101 nap->na_mtime = odsattr.dsa_mtime; 5102 /* 5103 * Fake na_bytes by rounding up na_size. 5104 * Since we don't know the block size, just 5105 * use BLKDEV_IOSIZE. 5106 */ 5107 nap->na_bytes = (odsattr.dsa_size + 5108 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); 5109 } else if (buflen == sizeof(dsattr)) { 5110 NFSBCOPY(buf, &dsattr, buflen); 5111 nap->na_filerev = dsattr.dsa_filerev; 5112 nap->na_size = dsattr.dsa_size; 5113 nap->na_atime = dsattr.dsa_atime; 5114 nap->na_mtime = dsattr.dsa_mtime; 5115 nap->na_bytes = dsattr.dsa_bytes; 5116 } else 5117 error = ENXIO; 5118 } 5119 if (error == 0) { 5120 /* 5121 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 5122 * returns 0, just return now. nfsrv_checkdsattr() 5123 * returns 0 if there is no Read/Write layout 5124 * plus either an Open/Write_access or Write 5125 * delegation issued to a client for the file. 5126 */ 5127 if (nfsrv_pnfsgetdsattr == 0 || 5128 nfsrv_checkdsattr(vp, p) == 0) { 5129 free(buf, M_TEMP); 5130 return (error); 5131 } 5132 } 5133 5134 /* 5135 * Clear ENOATTR so the code below will attempt to do a 5136 * nfsrv_getattrdsrpc() to get the attributes and (re)create 5137 * the extended attribute. 5138 */ 5139 if (error == ENOATTR) 5140 error = 0; 5141 } 5142 5143 origmircnt = -1; 5144 trycnt = 0; 5145 tryagain: 5146 if (error == 0) { 5147 buflen = 1024; 5148 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == 5149 LK_EXCLUSIVE) 5150 printf("nfsrv_proxyds: Readds vp exclusively locked\n"); 5151 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 5152 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 5153 NULL, NULL); 5154 if (error == 0) { 5155 for (i = 0; i < mirrorcnt; i++) 5156 nmp[i] = VFSTONFS(dvp[i]->v_mount); 5157 } else 5158 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 5159 } else 5160 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 5161 if (error == 0) { 5162 failpos = -1; 5163 if (origmircnt == -1) 5164 origmircnt = mirrorcnt; 5165 /* 5166 * If failpos is set to a mirror#, then that mirror has 5167 * failed and will be disabled. For Read, Getattr and Seek, the 5168 * function only tries one mirror, so if that mirror has 5169 * failed, it will need to be retried. As such, increment 5170 * tryitagain for these cases. 5171 * For Write, Setattr and Setacl, the function tries all 5172 * mirrors and will not return an error for the case where 5173 * one mirror has failed. For these cases, the functioning 5174 * mirror(s) will have been modified, so a retry isn't 5175 * necessary. These functions will set failpos for the 5176 * failed mirror#. 5177 */ 5178 if (ioproc == NFSPROC_READDS) { 5179 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 5180 mpp, mpp2); 5181 if (nfsds_failerr(error) && mirrorcnt > 1) { 5182 /* 5183 * Setting failpos will cause the mirror 5184 * to be disabled and then a retry of this 5185 * read is required. 5186 */ 5187 failpos = 0; 5188 error = 0; 5189 trycnt++; 5190 } 5191 } else if (ioproc == NFSPROC_WRITEDS) 5192 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 5193 &nmp[0], mirrorcnt, mpp, cp, &failpos); 5194 else if (ioproc == NFSPROC_SETATTR) 5195 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 5196 mirrorcnt, nap, &failpos); 5197 else if (ioproc == NFSPROC_SETACL) 5198 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 5199 mirrorcnt, aclp, &failpos); 5200 else if (ioproc == NFSPROC_SEEKDS) { 5201 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, 5202 p, nmp[0]); 5203 if (nfsds_failerr(error) && mirrorcnt > 1) { 5204 /* 5205 * Setting failpos will cause the mirror 5206 * to be disabled and then a retry of this 5207 * read is required. 5208 */ 5209 failpos = 0; 5210 error = 0; 5211 trycnt++; 5212 } 5213 } else if (ioproc == NFSPROC_ALLOCATE) 5214 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, 5215 &nmp[0], mirrorcnt, &failpos); 5216 else if (ioproc == NFSPROC_DEALLOCATE) 5217 error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p, 5218 vp, &nmp[0], mirrorcnt, &failpos); 5219 else { 5220 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 5221 vp, nmp[mirrorcnt - 1], nap); 5222 if (nfsds_failerr(error) && mirrorcnt > 1) { 5223 /* 5224 * Setting failpos will cause the mirror 5225 * to be disabled and then a retry of this 5226 * getattr is required. 5227 */ 5228 failpos = mirrorcnt - 1; 5229 error = 0; 5230 trycnt++; 5231 } 5232 } 5233 ds = NULL; 5234 if (failpos >= 0) { 5235 failnmp = nmp[failpos]; 5236 NFSLOCKMNT(failnmp); 5237 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 5238 NFSMNTP_CANCELRPCS)) == 0) { 5239 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 5240 NFSUNLOCKMNT(failnmp); 5241 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 5242 failnmp, p); 5243 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 5244 failpos, ds); 5245 if (ds != NULL) 5246 nfsrv_killrpcs(failnmp); 5247 NFSLOCKMNT(failnmp); 5248 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 5249 wakeup(failnmp); 5250 } 5251 NFSUNLOCKMNT(failnmp); 5252 } 5253 for (i = 0; i < mirrorcnt; i++) 5254 NFSVOPUNLOCK(dvp[i]); 5255 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 5256 trycnt); 5257 /* Try the Read/Getattr again if a mirror was deleted. */ 5258 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 5259 goto tryagain; 5260 } else { 5261 /* Return ENOENT for any Extended Attribute error. */ 5262 error = ENOENT; 5263 } 5264 free(buf, M_TEMP); 5265 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 5266 return (error); 5267 } 5268 5269 /* 5270 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 5271 * attribute. 5272 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 5273 * to be checked. If it points to a NULL nmp, then it returns 5274 * a suitable destination. 5275 * curnmp - If non-NULL, it is the source mount for the copy. 5276 */ 5277 int 5278 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 5279 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 5280 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 5281 struct nfsmount *curnmp, int *ippos, int *dsdirp) 5282 { 5283 struct vnode *dvp, *nvp = NULL, **tdvpp; 5284 struct mount *mp; 5285 struct nfsmount *nmp, *newnmp; 5286 struct sockaddr *sad; 5287 struct sockaddr_in *sin; 5288 struct nfsdevice *ds, *tds, *fndds; 5289 struct pnfsdsfile *pf; 5290 uint32_t dsdir; 5291 int error, fhiszero, fnd, gotone, i, mirrorcnt; 5292 5293 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 5294 *mirrorcntp = 1; 5295 tdvpp = dvpp; 5296 if (nvpp != NULL) 5297 *nvpp = NULL; 5298 if (dvpp != NULL) 5299 *dvpp = NULL; 5300 if (ippos != NULL) 5301 *ippos = -1; 5302 if (newnmpp != NULL) 5303 newnmp = *newnmpp; 5304 else 5305 newnmp = NULL; 5306 mp = vp->v_mount; 5307 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5308 "pnfsd.dsfile", buflenp, buf, p); 5309 mirrorcnt = *buflenp / sizeof(*pf); 5310 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 5311 *buflenp != sizeof(*pf) * mirrorcnt)) 5312 error = ENOATTR; 5313 5314 pf = (struct pnfsdsfile *)buf; 5315 /* If curnmp != NULL, check for a match in the mirror list. */ 5316 if (curnmp != NULL && error == 0) { 5317 fnd = 0; 5318 for (i = 0; i < mirrorcnt; i++, pf++) { 5319 sad = (struct sockaddr *)&pf->dsf_sin; 5320 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 5321 if (ippos != NULL) 5322 *ippos = i; 5323 fnd = 1; 5324 break; 5325 } 5326 } 5327 if (fnd == 0) 5328 error = ENXIO; 5329 } 5330 5331 gotone = 0; 5332 pf = (struct pnfsdsfile *)buf; 5333 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 5334 error); 5335 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 5336 fhiszero = 0; 5337 sad = (struct sockaddr *)&pf->dsf_sin; 5338 sin = &pf->dsf_sin; 5339 dsdir = pf->dsf_dir; 5340 if (dsdir >= nfsrv_dsdirsize) { 5341 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 5342 error = ENOATTR; 5343 } else if (nvpp != NULL && newnmp != NULL && 5344 nfsaddr2_match(sad, newnmp->nm_nam)) 5345 error = EEXIST; 5346 if (error == 0) { 5347 if (ippos != NULL && curnmp == NULL && 5348 sad->sa_family == AF_INET && 5349 sin->sin_addr.s_addr == 0) 5350 *ippos = i; 5351 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 5352 fhiszero = 1; 5353 /* Use the socket address to find the mount point. */ 5354 fndds = NULL; 5355 NFSDDSLOCK(); 5356 /* Find a match for the IP address. */ 5357 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 5358 if (ds->nfsdev_nmp != NULL) { 5359 dvp = ds->nfsdev_dvp; 5360 nmp = VFSTONFS(dvp->v_mount); 5361 if (nmp != ds->nfsdev_nmp) 5362 printf("different2 nmp %p %p\n", 5363 nmp, ds->nfsdev_nmp); 5364 if (nfsaddr2_match(sad, nmp->nm_nam)) { 5365 fndds = ds; 5366 break; 5367 } 5368 } 5369 } 5370 if (fndds != NULL && newnmpp != NULL && 5371 newnmp == NULL) { 5372 /* Search for a place to make a mirror copy. */ 5373 TAILQ_FOREACH(tds, &nfsrv_devidhead, 5374 nfsdev_list) { 5375 if (tds->nfsdev_nmp != NULL && 5376 fndds != tds && 5377 ((tds->nfsdev_mdsisset == 0 && 5378 fndds->nfsdev_mdsisset == 0) || 5379 (tds->nfsdev_mdsisset != 0 && 5380 fndds->nfsdev_mdsisset != 0 && 5381 fsidcmp(&tds->nfsdev_mdsfsid, 5382 &mp->mnt_stat.f_fsid) == 0))) { 5383 *newnmpp = tds->nfsdev_nmp; 5384 break; 5385 } 5386 } 5387 if (tds != NULL) { 5388 /* 5389 * Move this entry to the end of the 5390 * list, so it won't be selected as 5391 * easily the next time. 5392 */ 5393 TAILQ_REMOVE(&nfsrv_devidhead, tds, 5394 nfsdev_list); 5395 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 5396 nfsdev_list); 5397 } 5398 } 5399 NFSDDSUNLOCK(); 5400 if (fndds != NULL) { 5401 dvp = fndds->nfsdev_dsdir[dsdir]; 5402 if (lktype != 0 || fhiszero != 0 || 5403 (nvpp != NULL && *nvpp == NULL)) { 5404 if (fhiszero != 0) 5405 error = vn_lock(dvp, 5406 LK_EXCLUSIVE); 5407 else if (lktype != 0) 5408 error = vn_lock(dvp, lktype); 5409 else 5410 error = vn_lock(dvp, LK_SHARED); 5411 /* 5412 * If the file handle is all 0's, try to 5413 * do a Lookup against the DS to acquire 5414 * it. 5415 * If dvpp == NULL or the Lookup fails, 5416 * unlock dvp after the call. 5417 */ 5418 if (error == 0 && (fhiszero != 0 || 5419 (nvpp != NULL && *nvpp == NULL))) { 5420 error = nfsrv_pnfslookupds(vp, 5421 dvp, pf, &nvp, p); 5422 if (error == 0) { 5423 if (fhiszero != 0) 5424 nfsrv_pnfssetfh( 5425 vp, pf, 5426 devid, 5427 fnamep, 5428 nvp, p); 5429 if (nvpp != NULL && 5430 *nvpp == NULL) { 5431 *nvpp = nvp; 5432 *dsdirp = dsdir; 5433 } else 5434 vput(nvp); 5435 } 5436 if (error != 0 || lktype == 0) 5437 NFSVOPUNLOCK(dvp); 5438 } 5439 } 5440 if (error == 0) { 5441 gotone++; 5442 NFSD_DEBUG(4, "gotone=%d\n", gotone); 5443 if (devid != NULL) { 5444 NFSBCOPY(fndds->nfsdev_deviceid, 5445 devid, NFSX_V4DEVICEID); 5446 devid += NFSX_V4DEVICEID; 5447 } 5448 if (dvpp != NULL) 5449 *tdvpp++ = dvp; 5450 if (fhp != NULL) 5451 NFSBCOPY(&pf->dsf_fh, fhp++, 5452 NFSX_MYFH); 5453 if (fnamep != NULL && gotone == 1) 5454 strlcpy(fnamep, 5455 pf->dsf_filename, 5456 sizeof(pf->dsf_filename)); 5457 } else 5458 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 5459 "err=%d\n", error); 5460 } 5461 } 5462 } 5463 if (error == 0 && gotone == 0) 5464 error = ENOENT; 5465 5466 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 5467 error); 5468 if (error == 0) 5469 *mirrorcntp = gotone; 5470 else { 5471 if (gotone > 0 && dvpp != NULL) { 5472 /* 5473 * If the error didn't occur on the first one and 5474 * dvpp != NULL, the one(s) prior to the failure will 5475 * have locked dvp's that need to be unlocked. 5476 */ 5477 for (i = 0; i < gotone; i++) { 5478 NFSVOPUNLOCK(*dvpp); 5479 *dvpp++ = NULL; 5480 } 5481 } 5482 /* 5483 * If it found the vnode to be copied from before a failure, 5484 * it needs to be vput()'d. 5485 */ 5486 if (nvpp != NULL && *nvpp != NULL) { 5487 vput(*nvpp); 5488 *nvpp = NULL; 5489 } 5490 } 5491 return (error); 5492 } 5493 5494 /* 5495 * Set the extended attribute for the Change attribute. 5496 */ 5497 static int 5498 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5499 { 5500 struct pnfsdsattr dsattr; 5501 int error; 5502 5503 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 5504 dsattr.dsa_filerev = nap->na_filerev; 5505 dsattr.dsa_size = nap->na_size; 5506 dsattr.dsa_atime = nap->na_atime; 5507 dsattr.dsa_mtime = nap->na_mtime; 5508 dsattr.dsa_bytes = nap->na_bytes; 5509 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5510 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); 5511 if (error != 0) 5512 printf("pNFS: setextattr=%d\n", error); 5513 return (error); 5514 } 5515 5516 static int 5517 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5518 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 5519 { 5520 uint32_t *tl; 5521 struct nfsrv_descript *nd; 5522 nfsv4stateid_t st; 5523 struct mbuf *m, *m2; 5524 int error = 0, retlen, tlen, trimlen; 5525 5526 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 5527 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5528 *mpp = NULL; 5529 /* 5530 * Use a stateid where other is an alternating 01010 pattern and 5531 * seqid is 0xffffffff. This value is not defined as special by 5532 * the RFC and is used by the FreeBSD NFS server to indicate an 5533 * MDS->DS proxy operation. 5534 */ 5535 st.other[0] = 0x55555555; 5536 st.other[1] = 0x55555555; 5537 st.other[2] = 0x55555555; 5538 st.seqid = 0xffffffff; 5539 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5540 NULL, NULL, 0, 0, cred); 5541 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5542 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 5543 txdr_hyper(off, tl); 5544 *(tl + 2) = txdr_unsigned(len); 5545 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5546 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5547 if (error != 0) { 5548 free(nd, M_TEMP); 5549 return (error); 5550 } 5551 if (nd->nd_repstat == 0) { 5552 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 5553 NFSM_STRSIZ(retlen, len); 5554 if (retlen > 0) { 5555 /* Trim off the pre-data XDR from the mbuf chain. */ 5556 m = nd->nd_mrep; 5557 while (m != NULL && m != nd->nd_md) { 5558 if (m->m_next == nd->nd_md) { 5559 m->m_next = NULL; 5560 m_freem(nd->nd_mrep); 5561 nd->nd_mrep = m = nd->nd_md; 5562 } else 5563 m = m->m_next; 5564 } 5565 if (m == NULL) { 5566 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5567 error = ENOENT; 5568 goto nfsmout; 5569 } 5570 5571 /* 5572 * Now, adjust first mbuf so that any XDR before the 5573 * read data is skipped over. 5574 */ 5575 trimlen = nd->nd_dpos - mtod(m, char *); 5576 if (trimlen > 0) { 5577 m->m_len -= trimlen; 5578 NFSM_DATAP(m, trimlen); 5579 } 5580 5581 /* 5582 * Truncate the mbuf chain at retlen bytes of data, 5583 * plus XDR padding that brings the length up to a 5584 * multiple of 4. 5585 */ 5586 tlen = NFSM_RNDUP(retlen); 5587 do { 5588 if (m->m_len >= tlen) { 5589 m->m_len = tlen; 5590 tlen = 0; 5591 m2 = m->m_next; 5592 m->m_next = NULL; 5593 m_freem(m2); 5594 break; 5595 } 5596 tlen -= m->m_len; 5597 m = m->m_next; 5598 } while (m != NULL); 5599 if (tlen > 0) { 5600 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5601 error = ENOENT; 5602 goto nfsmout; 5603 } 5604 *mpp = nd->nd_mrep; 5605 *mpendp = m; 5606 nd->nd_mrep = NULL; 5607 } 5608 } else 5609 error = nd->nd_repstat; 5610 nfsmout: 5611 /* If nd->nd_mrep is already NULL, this is a no-op. */ 5612 m_freem(nd->nd_mrep); 5613 free(nd, M_TEMP); 5614 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 5615 return (error); 5616 } 5617 5618 /* 5619 * Do a write RPC on a DS data file, using this structure for the arguments, 5620 * so that this function can be executed by a separate kernel process. 5621 */ 5622 struct nfsrvwritedsdorpc { 5623 int done; 5624 int inprog; 5625 struct task tsk; 5626 fhandle_t fh; 5627 off_t off; 5628 int len; 5629 struct nfsmount *nmp; 5630 struct ucred *cred; 5631 NFSPROC_T *p; 5632 struct mbuf *m; 5633 int err; 5634 }; 5635 5636 static int 5637 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 5638 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 5639 { 5640 uint32_t *tl; 5641 struct nfsrv_descript *nd; 5642 nfsattrbit_t attrbits; 5643 nfsv4stateid_t st; 5644 int commit, error, retlen; 5645 5646 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5647 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 5648 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5649 5650 /* 5651 * Use a stateid where other is an alternating 01010 pattern and 5652 * seqid is 0xffffffff. This value is not defined as special by 5653 * the RFC and is used by the FreeBSD NFS server to indicate an 5654 * MDS->DS proxy operation. 5655 */ 5656 st.other[0] = 0x55555555; 5657 st.other[1] = 0x55555555; 5658 st.other[2] = 0x55555555; 5659 st.seqid = 0xffffffff; 5660 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5661 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 5662 txdr_hyper(off, tl); 5663 tl += 2; 5664 /* 5665 * Do all writes FileSync, since the server doesn't hold onto dirty 5666 * buffers. Since clients should be accessing the DS servers directly 5667 * using the pNFS layouts, this just needs to work correctly as a 5668 * fallback. 5669 */ 5670 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 5671 *tl = txdr_unsigned(len); 5672 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 5673 5674 /* Put data in mbuf chain. */ 5675 nd->nd_mb->m_next = m; 5676 5677 /* Set nd_mb and nd_bpos to end of data. */ 5678 while (m->m_next != NULL) 5679 m = m->m_next; 5680 nd->nd_mb = m; 5681 nfsm_set(nd, m->m_len); 5682 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 5683 5684 /* Do a Getattr for the attributes that change upon writing. */ 5685 NFSZERO_ATTRBIT(&attrbits); 5686 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5687 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5688 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5689 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5690 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5691 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5692 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5693 (void) nfsrv_putattrbit(nd, &attrbits); 5694 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5695 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5696 if (error != 0) { 5697 free(nd, M_TEMP); 5698 return (error); 5699 } 5700 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 5701 /* Get rid of weak cache consistency data for now. */ 5702 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5703 (ND_NFSV4 | ND_V4WCCATTR)) { 5704 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5705 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 5706 NULL); 5707 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 5708 if (error != 0) 5709 goto nfsmout; 5710 /* 5711 * Get rid of Op# and status for next op. 5712 */ 5713 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5714 if (*++tl != 0) 5715 nd->nd_flag |= ND_NOMOREDATA; 5716 } 5717 if (nd->nd_repstat == 0) { 5718 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 5719 retlen = fxdr_unsigned(int, *tl++); 5720 commit = fxdr_unsigned(int, *tl); 5721 if (commit != NFSWRITE_FILESYNC) 5722 error = NFSERR_IO; 5723 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 5724 retlen, commit, error); 5725 } else 5726 error = nd->nd_repstat; 5727 /* We have no use for the Write Verifier since we use FileSync. */ 5728 5729 /* 5730 * Get the Change, Size, Access Time and Modify Time attributes and set 5731 * on the Metadata file, so its attributes will be what the file's 5732 * would be if it had been written. 5733 */ 5734 if (error == 0) { 5735 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5736 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5737 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 5738 NULL); 5739 } 5740 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 5741 nfsmout: 5742 m_freem(nd->nd_mrep); 5743 free(nd, M_TEMP); 5744 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 5745 return (error); 5746 } 5747 5748 /* 5749 * Start up the thread that will execute nfsrv_writedsdorpc(). 5750 */ 5751 static void 5752 start_writedsdorpc(void *arg, int pending) 5753 { 5754 struct nfsrvwritedsdorpc *drpc; 5755 5756 drpc = (struct nfsrvwritedsdorpc *)arg; 5757 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5758 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 5759 drpc->done = 1; 5760 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 5761 } 5762 5763 static int 5764 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5765 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5766 struct mbuf **mpp, char *cp, int *failposp) 5767 { 5768 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; 5769 struct nfsvattr na; 5770 struct mbuf *m; 5771 int error, i, offs, ret, timo; 5772 5773 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 5774 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 5775 drpc = NULL; 5776 if (mirrorcnt > 1) 5777 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5778 M_WAITOK); 5779 5780 /* Calculate offset in mbuf chain that data starts. */ 5781 offs = cp - mtod(*mpp, char *); 5782 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 5783 5784 /* 5785 * Do the write RPC for every DS, using a separate kernel process 5786 * for every DS except the last one. 5787 */ 5788 error = 0; 5789 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5790 tdrpc->done = 0; 5791 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5792 tdrpc->off = off; 5793 tdrpc->len = len; 5794 tdrpc->nmp = *nmpp; 5795 tdrpc->cred = cred; 5796 tdrpc->p = p; 5797 tdrpc->inprog = 0; 5798 tdrpc->err = 0; 5799 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5800 ret = EIO; 5801 if (nfs_pnfsiothreads != 0) { 5802 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 5803 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 5804 ret); 5805 } 5806 if (ret != 0) { 5807 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 5808 tdrpc->m, cred, p); 5809 if (nfsds_failerr(ret) && *failposp == -1) 5810 *failposp = i; 5811 else if (error == 0 && ret != 0) 5812 error = ret; 5813 } 5814 nmpp++; 5815 fhp++; 5816 } 5817 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5818 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 5819 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5820 *failposp = mirrorcnt - 1; 5821 else if (error == 0 && ret != 0) 5822 error = ret; 5823 if (error == 0) 5824 error = nfsrv_setextattr(vp, &na, p); 5825 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 5826 tdrpc = drpc; 5827 timo = hz / 50; /* Wait for 20msec. */ 5828 if (timo < 1) 5829 timo = 1; 5830 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5831 /* Wait for RPCs on separate threads to complete. */ 5832 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5833 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 5834 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5835 *failposp = i; 5836 else if (error == 0 && tdrpc->err != 0) 5837 error = tdrpc->err; 5838 } 5839 free(drpc, M_TEMP); 5840 return (error); 5841 } 5842 5843 /* 5844 * Do a allocate RPC on a DS data file, using this structure for the arguments, 5845 * so that this function can be executed by a separate kernel process. 5846 */ 5847 struct nfsrvallocatedsdorpc { 5848 int done; 5849 int inprog; 5850 struct task tsk; 5851 fhandle_t fh; 5852 off_t off; 5853 off_t len; 5854 struct nfsmount *nmp; 5855 struct ucred *cred; 5856 NFSPROC_T *p; 5857 int err; 5858 }; 5859 5860 static int 5861 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5862 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5863 { 5864 uint32_t *tl; 5865 struct nfsrv_descript *nd; 5866 nfsattrbit_t attrbits; 5867 nfsv4stateid_t st; 5868 int error; 5869 5870 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5871 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, 5872 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5873 5874 /* 5875 * Use a stateid where other is an alternating 01010 pattern and 5876 * seqid is 0xffffffff. This value is not defined as special by 5877 * the RFC and is used by the FreeBSD NFS server to indicate an 5878 * MDS->DS proxy operation. 5879 */ 5880 st.other[0] = 0x55555555; 5881 st.other[1] = 0x55555555; 5882 st.other[2] = 0x55555555; 5883 st.seqid = 0xffffffff; 5884 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5885 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5886 txdr_hyper(off, tl); tl += 2; 5887 txdr_hyper(len, tl); tl += 2; 5888 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); 5889 5890 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5891 NFSGETATTR_ATTRBIT(&attrbits); 5892 nfsrv_putattrbit(nd, &attrbits); 5893 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5894 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5895 if (error != 0) { 5896 free(nd, M_TEMP); 5897 return (error); 5898 } 5899 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", 5900 nd->nd_repstat); 5901 if (nd->nd_repstat == 0) { 5902 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5903 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5904 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 5905 NULL); 5906 } else 5907 error = nd->nd_repstat; 5908 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); 5909 nfsmout: 5910 m_freem(nd->nd_mrep); 5911 free(nd, M_TEMP); 5912 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); 5913 return (error); 5914 } 5915 5916 /* 5917 * Start up the thread that will execute nfsrv_allocatedsdorpc(). 5918 */ 5919 static void 5920 start_allocatedsdorpc(void *arg, int pending) 5921 { 5922 struct nfsrvallocatedsdorpc *drpc; 5923 5924 drpc = (struct nfsrvallocatedsdorpc *)arg; 5925 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5926 drpc->len, NULL, drpc->cred, drpc->p); 5927 drpc->done = 1; 5928 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); 5929 } 5930 5931 static int 5932 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5933 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5934 int *failposp) 5935 { 5936 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; 5937 struct nfsvattr na; 5938 int error, i, ret, timo; 5939 5940 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); 5941 drpc = NULL; 5942 if (mirrorcnt > 1) 5943 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5944 M_WAITOK); 5945 5946 /* 5947 * Do the allocate RPC for every DS, using a separate kernel process 5948 * for every DS except the last one. 5949 */ 5950 error = 0; 5951 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5952 tdrpc->done = 0; 5953 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5954 tdrpc->off = off; 5955 tdrpc->len = len; 5956 tdrpc->nmp = *nmpp; 5957 tdrpc->cred = cred; 5958 tdrpc->p = p; 5959 tdrpc->inprog = 0; 5960 tdrpc->err = 0; 5961 ret = EIO; 5962 if (nfs_pnfsiothreads != 0) { 5963 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); 5964 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", 5965 ret); 5966 } 5967 if (ret != 0) { 5968 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, 5969 cred, p); 5970 if (nfsds_failerr(ret) && *failposp == -1) 5971 *failposp = i; 5972 else if (error == 0 && ret != 0) 5973 error = ret; 5974 } 5975 nmpp++; 5976 fhp++; 5977 } 5978 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5979 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5980 *failposp = mirrorcnt - 1; 5981 else if (error == 0 && ret != 0) 5982 error = ret; 5983 if (error == 0) 5984 error = nfsrv_setextattr(vp, &na, p); 5985 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); 5986 tdrpc = drpc; 5987 timo = hz / 50; /* Wait for 20msec. */ 5988 if (timo < 1) 5989 timo = 1; 5990 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5991 /* Wait for RPCs on separate threads to complete. */ 5992 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5993 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5994 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5995 *failposp = i; 5996 else if (error == 0 && tdrpc->err != 0) 5997 error = tdrpc->err; 5998 } 5999 free(drpc, M_TEMP); 6000 return (error); 6001 } 6002 6003 /* 6004 * Do a deallocate RPC on a DS data file, using this structure for the 6005 * arguments, so that this function can be executed by a separate kernel 6006 * process. 6007 */ 6008 struct nfsrvdeallocatedsdorpc { 6009 int done; 6010 int inprog; 6011 struct task tsk; 6012 fhandle_t fh; 6013 off_t off; 6014 off_t len; 6015 struct nfsmount *nmp; 6016 struct ucred *cred; 6017 NFSPROC_T *p; 6018 int err; 6019 }; 6020 6021 static int 6022 nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 6023 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 6024 { 6025 uint32_t *tl; 6026 struct nfsrv_descript *nd; 6027 nfsattrbit_t attrbits; 6028 nfsv4stateid_t st; 6029 int error; 6030 6031 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6032 nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp, 6033 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6034 6035 /* 6036 * Use a stateid where other is an alternating 01010 pattern and 6037 * seqid is 0xffffffff. This value is not defined as special by 6038 * the RFC and is used by the FreeBSD NFS server to indicate an 6039 * MDS->DS proxy operation. 6040 */ 6041 st.other[0] = 0x55555555; 6042 st.other[1] = 0x55555555; 6043 st.other[2] = 0x55555555; 6044 st.seqid = 0xffffffff; 6045 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6046 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 6047 txdr_hyper(off, tl); tl += 2; 6048 txdr_hyper(len, tl); tl += 2; 6049 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len); 6050 6051 /* Do a Getattr for the attributes that change upon writing. */ 6052 NFSZERO_ATTRBIT(&attrbits); 6053 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 6054 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 6055 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 6056 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 6057 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 6058 *tl = txdr_unsigned(NFSV4OP_GETATTR); 6059 nfsrv_putattrbit(nd, &attrbits); 6060 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 6061 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6062 if (error != 0) { 6063 free(nd, M_TEMP); 6064 return (error); 6065 } 6066 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n", 6067 nd->nd_repstat); 6068 /* Get rid of weak cache consistency data for now. */ 6069 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 6070 (ND_NFSV4 | ND_V4WCCATTR)) { 6071 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 6072 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 6073 NULL); 6074 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error); 6075 if (error != 0) 6076 goto nfsmout; 6077 /* 6078 * Get rid of Op# and status for next op. 6079 */ 6080 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6081 if (*++tl != 0) 6082 nd->nd_flag |= ND_NOMOREDATA; 6083 } 6084 if (nd->nd_repstat == 0) { 6085 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6086 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 6087 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 6088 NULL); 6089 } else 6090 error = nd->nd_repstat; 6091 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error); 6092 nfsmout: 6093 m_freem(nd->nd_mrep); 6094 free(nd, M_TEMP); 6095 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error); 6096 return (error); 6097 } 6098 6099 /* 6100 * Start up the thread that will execute nfsrv_deallocatedsdorpc(). 6101 */ 6102 static void 6103 start_deallocatedsdorpc(void *arg, int pending) 6104 { 6105 struct nfsrvdeallocatedsdorpc *drpc; 6106 6107 drpc = (struct nfsrvdeallocatedsdorpc *)arg; 6108 drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 6109 drpc->len, NULL, drpc->cred, drpc->p); 6110 drpc->done = 1; 6111 NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err); 6112 } 6113 6114 static int 6115 nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 6116 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 6117 int *failposp) 6118 { 6119 struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL; 6120 struct nfsvattr na; 6121 int error, i, ret, timo; 6122 6123 NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n"); 6124 drpc = NULL; 6125 if (mirrorcnt > 1) 6126 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 6127 M_WAITOK); 6128 6129 /* 6130 * Do the deallocate RPC for every DS, using a separate kernel process 6131 * for every DS except the last one. 6132 */ 6133 error = 0; 6134 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6135 tdrpc->done = 0; 6136 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6137 tdrpc->off = off; 6138 tdrpc->len = len; 6139 tdrpc->nmp = *nmpp; 6140 tdrpc->cred = cred; 6141 tdrpc->p = p; 6142 tdrpc->inprog = 0; 6143 tdrpc->err = 0; 6144 ret = EIO; 6145 if (nfs_pnfsiothreads != 0) { 6146 ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc); 6147 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n", 6148 ret); 6149 } 6150 if (ret != 0) { 6151 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, 6152 NULL, cred, p); 6153 if (nfsds_failerr(ret) && *failposp == -1) 6154 *failposp = i; 6155 else if (error == 0 && ret != 0) 6156 error = ret; 6157 } 6158 nmpp++; 6159 fhp++; 6160 } 6161 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 6162 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6163 *failposp = mirrorcnt - 1; 6164 else if (error == 0 && ret != 0) 6165 error = ret; 6166 if (error == 0) 6167 error = nfsrv_setextattr(vp, &na, p); 6168 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error); 6169 tdrpc = drpc; 6170 timo = hz / 50; /* Wait for 20msec. */ 6171 if (timo < 1) 6172 timo = 1; 6173 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6174 /* Wait for RPCs on separate threads to complete. */ 6175 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6176 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 6177 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6178 *failposp = i; 6179 else if (error == 0 && tdrpc->err != 0) 6180 error = tdrpc->err; 6181 } 6182 free(drpc, M_TEMP); 6183 return (error); 6184 } 6185 6186 static int 6187 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6188 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 6189 struct nfsvattr *dsnap) 6190 { 6191 uint32_t *tl; 6192 struct nfsrv_descript *nd; 6193 nfsv4stateid_t st; 6194 nfsattrbit_t attrbits; 6195 int error; 6196 6197 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 6198 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6199 /* 6200 * Use a stateid where other is an alternating 01010 pattern and 6201 * seqid is 0xffffffff. This value is not defined as special by 6202 * the RFC and is used by the FreeBSD NFS server to indicate an 6203 * MDS->DS proxy operation. 6204 */ 6205 st.other[0] = 0x55555555; 6206 st.other[1] = 0x55555555; 6207 st.other[2] = 0x55555555; 6208 st.seqid = 0xffffffff; 6209 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 6210 NULL, NULL, 0, 0, cred); 6211 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6212 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 6213 6214 /* Do a Getattr for the attributes that change due to writing. */ 6215 NFSZERO_ATTRBIT(&attrbits); 6216 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 6217 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 6218 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 6219 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 6220 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 6221 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 6222 *tl = txdr_unsigned(NFSV4OP_GETATTR); 6223 (void) nfsrv_putattrbit(nd, &attrbits); 6224 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6225 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6226 if (error != 0) { 6227 free(nd, M_TEMP); 6228 return (error); 6229 } 6230 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 6231 nd->nd_repstat); 6232 /* Get rid of weak cache consistency data for now. */ 6233 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 6234 (ND_NFSV4 | ND_V4WCCATTR)) { 6235 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 6236 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 6237 NULL, NULL); 6238 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 6239 if (error != 0) 6240 goto nfsmout; 6241 /* 6242 * Get rid of Op# and status for next op. 6243 */ 6244 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6245 if (*++tl != 0) 6246 nd->nd_flag |= ND_NOMOREDATA; 6247 } 6248 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 6249 if (error != 0) 6250 goto nfsmout; 6251 if (nd->nd_repstat != 0) 6252 error = nd->nd_repstat; 6253 /* 6254 * Get the Change, Size, Access Time and Modify Time attributes and set 6255 * on the Metadata file, so its attributes will be what the file's 6256 * would be if it had been written. 6257 */ 6258 if (error == 0) { 6259 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6260 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 6261 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 6262 NULL, NULL); 6263 } 6264 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 6265 nfsmout: 6266 m_freem(nd->nd_mrep); 6267 free(nd, M_TEMP); 6268 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 6269 return (error); 6270 } 6271 6272 struct nfsrvsetattrdsdorpc { 6273 int done; 6274 int inprog; 6275 struct task tsk; 6276 fhandle_t fh; 6277 struct nfsmount *nmp; 6278 struct vnode *vp; 6279 struct ucred *cred; 6280 NFSPROC_T *p; 6281 struct nfsvattr na; 6282 struct nfsvattr dsna; 6283 int err; 6284 }; 6285 6286 /* 6287 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 6288 */ 6289 static void 6290 start_setattrdsdorpc(void *arg, int pending) 6291 { 6292 struct nfsrvsetattrdsdorpc *drpc; 6293 6294 drpc = (struct nfsrvsetattrdsdorpc *)arg; 6295 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 6296 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 6297 drpc->done = 1; 6298 } 6299 6300 static int 6301 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6302 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 6303 struct nfsvattr *nap, int *failposp) 6304 { 6305 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; 6306 struct nfsvattr na; 6307 int error, i, ret, timo; 6308 6309 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 6310 drpc = NULL; 6311 if (mirrorcnt > 1) 6312 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 6313 M_WAITOK); 6314 6315 /* 6316 * Do the setattr RPC for every DS, using a separate kernel process 6317 * for every DS except the last one. 6318 */ 6319 error = 0; 6320 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6321 tdrpc->done = 0; 6322 tdrpc->inprog = 0; 6323 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6324 tdrpc->nmp = *nmpp; 6325 tdrpc->vp = vp; 6326 tdrpc->cred = cred; 6327 tdrpc->p = p; 6328 tdrpc->na = *nap; 6329 tdrpc->err = 0; 6330 ret = EIO; 6331 if (nfs_pnfsiothreads != 0) { 6332 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 6333 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 6334 ret); 6335 } 6336 if (ret != 0) { 6337 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 6338 &na); 6339 if (nfsds_failerr(ret) && *failposp == -1) 6340 *failposp = i; 6341 else if (error == 0 && ret != 0) 6342 error = ret; 6343 } 6344 nmpp++; 6345 fhp++; 6346 } 6347 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 6348 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6349 *failposp = mirrorcnt - 1; 6350 else if (error == 0 && ret != 0) 6351 error = ret; 6352 if (error == 0) 6353 error = nfsrv_setextattr(vp, &na, p); 6354 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 6355 tdrpc = drpc; 6356 timo = hz / 50; /* Wait for 20msec. */ 6357 if (timo < 1) 6358 timo = 1; 6359 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6360 /* Wait for RPCs on separate threads to complete. */ 6361 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6362 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 6363 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6364 *failposp = i; 6365 else if (error == 0 && tdrpc->err != 0) 6366 error = tdrpc->err; 6367 } 6368 free(drpc, M_TEMP); 6369 return (error); 6370 } 6371 6372 /* 6373 * Do a Setattr of an NFSv4 ACL on the DS file. 6374 */ 6375 static int 6376 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6377 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 6378 { 6379 struct nfsrv_descript *nd; 6380 nfsv4stateid_t st; 6381 nfsattrbit_t attrbits; 6382 int error; 6383 6384 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 6385 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6386 /* 6387 * Use a stateid where other is an alternating 01010 pattern and 6388 * seqid is 0xffffffff. This value is not defined as special by 6389 * the RFC and is used by the FreeBSD NFS server to indicate an 6390 * MDS->DS proxy operation. 6391 */ 6392 st.other[0] = 0x55555555; 6393 st.other[1] = 0x55555555; 6394 st.other[2] = 0x55555555; 6395 st.seqid = 0xffffffff; 6396 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 6397 NULL, NULL, 0, 0, cred); 6398 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6399 NFSZERO_ATTRBIT(&attrbits); 6400 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 6401 /* 6402 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 6403 * so passing in the metadata "vp" will be ok, since it is of 6404 * the same type (VREG). 6405 */ 6406 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 6407 NULL, 0, 0, 0, 0, 0, NULL, false, false, false, 0); 6408 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6409 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6410 if (error != 0) { 6411 free(nd, M_TEMP); 6412 return (error); 6413 } 6414 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 6415 nd->nd_repstat); 6416 error = nd->nd_repstat; 6417 m_freem(nd->nd_mrep); 6418 free(nd, M_TEMP); 6419 return (error); 6420 } 6421 6422 struct nfsrvsetacldsdorpc { 6423 int done; 6424 int inprog; 6425 struct task tsk; 6426 fhandle_t fh; 6427 struct nfsmount *nmp; 6428 struct vnode *vp; 6429 struct ucred *cred; 6430 NFSPROC_T *p; 6431 struct acl *aclp; 6432 int err; 6433 }; 6434 6435 /* 6436 * Start up the thread that will execute nfsrv_setacldsdorpc(). 6437 */ 6438 static void 6439 start_setacldsdorpc(void *arg, int pending) 6440 { 6441 struct nfsrvsetacldsdorpc *drpc; 6442 6443 drpc = (struct nfsrvsetacldsdorpc *)arg; 6444 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 6445 drpc->vp, drpc->nmp, drpc->aclp); 6446 drpc->done = 1; 6447 } 6448 6449 static int 6450 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6451 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 6452 int *failposp) 6453 { 6454 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; 6455 int error, i, ret, timo; 6456 6457 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 6458 drpc = NULL; 6459 if (mirrorcnt > 1) 6460 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 6461 M_WAITOK); 6462 6463 /* 6464 * Do the setattr RPC for every DS, using a separate kernel process 6465 * for every DS except the last one. 6466 */ 6467 error = 0; 6468 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6469 tdrpc->done = 0; 6470 tdrpc->inprog = 0; 6471 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6472 tdrpc->nmp = *nmpp; 6473 tdrpc->vp = vp; 6474 tdrpc->cred = cred; 6475 tdrpc->p = p; 6476 tdrpc->aclp = aclp; 6477 tdrpc->err = 0; 6478 ret = EIO; 6479 if (nfs_pnfsiothreads != 0) { 6480 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 6481 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 6482 ret); 6483 } 6484 if (ret != 0) { 6485 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 6486 aclp); 6487 if (nfsds_failerr(ret) && *failposp == -1) 6488 *failposp = i; 6489 else if (error == 0 && ret != 0) 6490 error = ret; 6491 } 6492 nmpp++; 6493 fhp++; 6494 } 6495 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 6496 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6497 *failposp = mirrorcnt - 1; 6498 else if (error == 0 && ret != 0) 6499 error = ret; 6500 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 6501 tdrpc = drpc; 6502 timo = hz / 50; /* Wait for 20msec. */ 6503 if (timo < 1) 6504 timo = 1; 6505 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6506 /* Wait for RPCs on separate threads to complete. */ 6507 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6508 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 6509 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6510 *failposp = i; 6511 else if (error == 0 && tdrpc->err != 0) 6512 error = tdrpc->err; 6513 } 6514 free(drpc, M_TEMP); 6515 return (error); 6516 } 6517 6518 /* 6519 * Getattr call to the DS for the attributes that change due to writing. 6520 */ 6521 static int 6522 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6523 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 6524 { 6525 struct nfsrv_descript *nd; 6526 int error; 6527 nfsattrbit_t attrbits; 6528 6529 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 6530 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6531 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 6532 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6533 NFSZERO_ATTRBIT(&attrbits); 6534 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 6535 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 6536 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 6537 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 6538 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 6539 (void) nfsrv_putattrbit(nd, &attrbits); 6540 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6541 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6542 if (error != 0) { 6543 free(nd, M_TEMP); 6544 return (error); 6545 } 6546 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 6547 nd->nd_repstat); 6548 if (nd->nd_repstat == 0) { 6549 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 6550 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 6551 NULL, NULL, NULL, NULL); 6552 /* 6553 * We can only save the updated values in the extended 6554 * attribute if the vp is exclusively locked. 6555 * This should happen when any of the following operations 6556 * occur on the vnode: 6557 * Close, Delegreturn, LayoutCommit, LayoutReturn 6558 * As such, the updated extended attribute should get saved 6559 * before nfsrv_checkdsattr() returns 0 and allows the cached 6560 * attributes to be returned without calling this function. 6561 */ 6562 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 6563 error = nfsrv_setextattr(vp, nap, p); 6564 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 6565 error); 6566 } 6567 } else 6568 error = nd->nd_repstat; 6569 m_freem(nd->nd_mrep); 6570 free(nd, M_TEMP); 6571 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 6572 return (error); 6573 } 6574 6575 /* 6576 * Seek call to a DS. 6577 */ 6578 static int 6579 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, 6580 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) 6581 { 6582 uint32_t *tl; 6583 struct nfsrv_descript *nd; 6584 nfsv4stateid_t st; 6585 int error; 6586 6587 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); 6588 /* 6589 * Use a stateid where other is an alternating 01010 pattern and 6590 * seqid is 0xffffffff. This value is not defined as special by 6591 * the RFC and is used by the FreeBSD NFS server to indicate an 6592 * MDS->DS proxy operation. 6593 */ 6594 st.other[0] = 0x55555555; 6595 st.other[1] = 0x55555555; 6596 st.other[2] = 0x55555555; 6597 st.seqid = 0xffffffff; 6598 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6599 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, 6600 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6601 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6602 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); 6603 txdr_hyper(*offp, tl); tl += 2; 6604 *tl = txdr_unsigned(content); 6605 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6606 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6607 if (error != 0) { 6608 free(nd, M_TEMP); 6609 return (error); 6610 } 6611 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); 6612 if (nd->nd_repstat == 0) { 6613 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); 6614 if (*tl++ == newnfs_true) 6615 *eofp = true; 6616 else 6617 *eofp = false; 6618 *offp = fxdr_hyper(tl); 6619 } else 6620 error = nd->nd_repstat; 6621 nfsmout: 6622 m_freem(nd->nd_mrep); 6623 free(nd, M_TEMP); 6624 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); 6625 return (error); 6626 } 6627 6628 /* 6629 * Get the device id and file handle for a DS file. 6630 */ 6631 int 6632 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 6633 fhandle_t *fhp, char *devid) 6634 { 6635 int buflen, error; 6636 char *buf; 6637 6638 buflen = 1024; 6639 buf = malloc(buflen, M_TEMP, M_WAITOK); 6640 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 6641 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 6642 free(buf, M_TEMP); 6643 return (error); 6644 } 6645 6646 /* 6647 * Do a Lookup against the DS for the filename. 6648 */ 6649 static int 6650 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 6651 struct vnode **nvpp, NFSPROC_T *p) 6652 { 6653 struct nameidata named; 6654 struct ucred *tcred; 6655 char *bufp; 6656 u_long *hashp; 6657 struct vnode *nvp; 6658 int error; 6659 6660 tcred = newnfs_getcred(); 6661 named.ni_cnd.cn_nameiop = LOOKUP; 6662 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 6663 named.ni_cnd.cn_cred = tcred; 6664 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 6665 nfsvno_setpathbuf(&named, &bufp, &hashp); 6666 named.ni_cnd.cn_nameptr = bufp; 6667 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 6668 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 6669 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 6670 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 6671 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 6672 NFSFREECRED(tcred); 6673 nfsvno_relpathbuf(&named); 6674 if (error == 0) 6675 *nvpp = nvp; 6676 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 6677 return (error); 6678 } 6679 6680 /* 6681 * Set the file handle to the correct one. 6682 */ 6683 static void 6684 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, 6685 char *fnamep, struct vnode *nvp, NFSPROC_T *p) 6686 { 6687 struct nfsnode *np; 6688 int ret = 0; 6689 6690 np = VTONFS(nvp); 6691 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 6692 /* 6693 * We can only do a vn_set_extattr() if the vnode is exclusively 6694 * locked and vn_start_write() has been done. If devid != NULL or 6695 * fnamep != NULL or the vnode is shared locked, vn_start_write() 6696 * may not have been done. 6697 * If not done now, it will be done on a future call. 6698 */ 6699 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == 6700 LK_EXCLUSIVE) 6701 ret = vn_extattr_set(vp, IO_NODELOCKED, 6702 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), 6703 (char *)pf, p); 6704 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 6705 } 6706 6707 /* 6708 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 6709 * when the DS has failed. 6710 */ 6711 void 6712 nfsrv_killrpcs(struct nfsmount *nmp) 6713 { 6714 6715 /* 6716 * Call newnfs_nmcancelreqs() to cause 6717 * any RPCs in progress on the mount point to 6718 * fail. 6719 * This will cause any process waiting for an 6720 * RPC to complete while holding a vnode lock 6721 * on the mounted-on vnode (such as "df" or 6722 * a non-forced "umount") to fail. 6723 * This will unlock the mounted-on vnode so 6724 * a forced dismount can succeed. 6725 * The NFSMNTP_CANCELRPCS flag should be set when this function is 6726 * called. 6727 */ 6728 newnfs_nmcancelreqs(nmp); 6729 } 6730 6731 /* 6732 * Sum up the statfs info for each of the DSs, so that the client will 6733 * receive the total for all DSs. 6734 */ 6735 static int 6736 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) 6737 { 6738 struct statfs *tsf; 6739 struct nfsdevice *ds; 6740 struct vnode **dvpp, **tdvpp, *dvp; 6741 uint64_t tot; 6742 int cnt, error = 0, i; 6743 6744 if (nfsrv_devidcnt <= 0) 6745 return (ENXIO); 6746 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 6747 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 6748 6749 /* Get an array of the dvps for the DSs. */ 6750 tdvpp = dvpp; 6751 i = 0; 6752 NFSDDSLOCK(); 6753 /* First, search for matches for same file system. */ 6754 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6755 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && 6756 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { 6757 if (++i > nfsrv_devidcnt) 6758 break; 6759 *tdvpp++ = ds->nfsdev_dvp; 6760 } 6761 } 6762 /* 6763 * If no matches for same file system, total all servers not assigned 6764 * to a file system. 6765 */ 6766 if (i == 0) { 6767 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6768 if (ds->nfsdev_nmp != NULL && 6769 ds->nfsdev_mdsisset == 0) { 6770 if (++i > nfsrv_devidcnt) 6771 break; 6772 *tdvpp++ = ds->nfsdev_dvp; 6773 } 6774 } 6775 } 6776 NFSDDSUNLOCK(); 6777 cnt = i; 6778 6779 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 6780 tdvpp = dvpp; 6781 for (i = 0; i < cnt && error == 0; i++) { 6782 dvp = *tdvpp++; 6783 error = VFS_STATFS(dvp->v_mount, tsf); 6784 if (error == 0) { 6785 if (sf->f_bsize == 0) { 6786 if (tsf->f_bsize > 0) 6787 sf->f_bsize = tsf->f_bsize; 6788 else 6789 sf->f_bsize = 8192; 6790 } 6791 if (tsf->f_blocks > 0) { 6792 if (sf->f_bsize != tsf->f_bsize) { 6793 tot = tsf->f_blocks * tsf->f_bsize; 6794 sf->f_blocks += (tot / sf->f_bsize); 6795 } else 6796 sf->f_blocks += tsf->f_blocks; 6797 } 6798 if (tsf->f_bfree > 0) { 6799 if (sf->f_bsize != tsf->f_bsize) { 6800 tot = tsf->f_bfree * tsf->f_bsize; 6801 sf->f_bfree += (tot / sf->f_bsize); 6802 } else 6803 sf->f_bfree += tsf->f_bfree; 6804 } 6805 if (tsf->f_bavail > 0) { 6806 if (sf->f_bsize != tsf->f_bsize) { 6807 tot = tsf->f_bavail * tsf->f_bsize; 6808 sf->f_bavail += (tot / sf->f_bsize); 6809 } else 6810 sf->f_bavail += tsf->f_bavail; 6811 } 6812 } 6813 } 6814 free(tsf, M_TEMP); 6815 free(dvpp, M_TEMP); 6816 return (error); 6817 } 6818 6819 /* 6820 * Set an NFSv4 acl. 6821 */ 6822 int 6823 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 6824 { 6825 int error; 6826 6827 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 6828 error = NFSERR_ATTRNOTSUPP; 6829 goto out; 6830 } 6831 /* 6832 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 6833 * Make sure it has enough room for that - splitting every entry 6834 * into two and appending "canonical six" entries at the end. 6835 * Cribbed out of kern/vfs_acl.c - Rick M. 6836 */ 6837 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 6838 error = NFSERR_ATTRNOTSUPP; 6839 goto out; 6840 } 6841 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 6842 if (error == 0) { 6843 error = nfsrv_dssetacl(vp, aclp, cred, p); 6844 if (error == ENOENT) 6845 error = 0; 6846 } 6847 6848 out: 6849 NFSEXITCODE(error); 6850 return (error); 6851 } 6852 6853 /* 6854 * Seek vnode op call (actually it is a VOP_IOCTL()). 6855 * This function is called with the vnode locked, but unlocks and vrele()s 6856 * the vp before returning. 6857 */ 6858 int 6859 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, 6860 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) 6861 { 6862 struct nfsvattr at; 6863 int error, ret; 6864 6865 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); 6866 /* 6867 * Attempt to seek on a DS file. A return of ENOENT implies 6868 * there is no DS file to seek on. 6869 */ 6870 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, 6871 NULL, NULL, NULL, NULL, offp, content, eofp); 6872 if (error != ENOENT) { 6873 vput(vp); 6874 return (error); 6875 } 6876 6877 /* 6878 * Do the VOP_IOCTL() call. For the case where *offp == file_size, 6879 * VOP_IOCTL() will return ENXIO. However, the correct reply for 6880 * NFSv4.2 is *eofp == true and error == 0 for this case. 6881 */ 6882 NFSVOPUNLOCK(vp); 6883 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); 6884 *eofp = false; 6885 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { 6886 /* Handle the cases where we might be at EOF. */ 6887 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); 6888 if (ret == 0 && *offp == at.na_size) { 6889 *eofp = true; 6890 error = 0; 6891 } 6892 if (ret != 0 && error == 0) 6893 error = ret; 6894 } 6895 vrele(vp); 6896 NFSEXITCODE(error); 6897 return (error); 6898 } 6899 6900 /* 6901 * Allocate vnode op call. 6902 */ 6903 int 6904 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6905 NFSPROC_T *p) 6906 { 6907 int error; 6908 off_t olen; 6909 6910 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); 6911 /* 6912 * Attempt to allocate on a DS file. A return of ENOENT implies 6913 * there is no DS file to allocate on. 6914 */ 6915 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, 6916 NULL, NULL, NULL, NULL, &len, 0, NULL); 6917 if (error != ENOENT) 6918 return (error); 6919 6920 /* 6921 * Do the actual VOP_ALLOCATE(), looping so long as 6922 * progress is being made, to achieve completion. 6923 */ 6924 do { 6925 olen = len; 6926 error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred); 6927 if (error == 0 && len > 0 && olen > len) 6928 maybe_yield(); 6929 } while (error == 0 && len > 0 && olen > len); 6930 if (error == 0 && len > 0) 6931 error = NFSERR_IO; 6932 NFSEXITCODE(error); 6933 return (error); 6934 } 6935 6936 /* 6937 * Deallocate vnode op call. 6938 */ 6939 int 6940 nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6941 NFSPROC_T *p) 6942 { 6943 int error; 6944 off_t olen; 6945 6946 ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp"); 6947 /* 6948 * Attempt to deallocate on a DS file. A return of ENOENT implies 6949 * there is no DS file to deallocate on. 6950 */ 6951 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL, 6952 NULL, NULL, NULL, NULL, &len, 0, NULL); 6953 if (error != ENOENT) 6954 return (error); 6955 6956 /* 6957 * Do the actual VOP_DEALLOCATE(), looping so long as 6958 * progress is being made, to achieve completion. 6959 */ 6960 do { 6961 olen = len; 6962 error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred); 6963 if (error == 0 && len > 0 && olen > len) 6964 maybe_yield(); 6965 } while (error == 0 && len > 0 && olen > len); 6966 if (error == 0 && len > 0) 6967 error = NFSERR_IO; 6968 NFSEXITCODE(error); 6969 return (error); 6970 } 6971 6972 /* 6973 * Get Extended Atribute vnode op into an mbuf list. 6974 */ 6975 int 6976 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, 6977 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, 6978 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 6979 { 6980 struct iovec *iv; 6981 struct uio io, *uiop = &io; 6982 struct mbuf *m, *m2; 6983 int alen, error, len, tlen; 6984 size_t siz; 6985 6986 /* First, find out the size of the extended attribute. */ 6987 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6988 &siz, cred, p); 6989 if (error != 0) 6990 return (NFSERR_NOXATTR); 6991 if (siz > maxresp - NFS_MAXXDR) 6992 return (NFSERR_XATTR2BIG); 6993 len = siz; 6994 tlen = NFSM_RNDUP(len); 6995 if (tlen > 0) { 6996 /* 6997 * If cnt > MCLBYTES and the reply will not be saved, use 6998 * ext_pgs mbufs for TLS. 6999 * For NFSv4.0, we do not know for sure if the reply will 7000 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 7001 * Always use ext_pgs mbufs if ND_EXTPG is set. 7002 */ 7003 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && 7004 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && 7005 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) 7006 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, 7007 maxextsiz, &m, &m2, &iv); 7008 else 7009 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, 7010 &iv); 7011 uiop->uio_iov = iv; 7012 } else { 7013 uiop->uio_iovcnt = 0; 7014 uiop->uio_iov = iv = NULL; 7015 m = m2 = NULL; 7016 } 7017 uiop->uio_offset = 0; 7018 uiop->uio_resid = tlen; 7019 uiop->uio_rw = UIO_READ; 7020 uiop->uio_segflg = UIO_SYSSPACE; 7021 uiop->uio_td = p; 7022 #ifdef MAC 7023 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, 7024 name); 7025 if (error != 0) 7026 goto out; 7027 #endif 7028 7029 if (tlen > 0) 7030 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 7031 NULL, cred, p); 7032 if (error != 0) 7033 goto out; 7034 if (uiop->uio_resid > 0) { 7035 alen = tlen; 7036 len = tlen - uiop->uio_resid; 7037 tlen = NFSM_RNDUP(len); 7038 if (alen != tlen) 7039 printf("nfsvno_getxattr: weird size read\n"); 7040 if (tlen == 0) { 7041 m_freem(m); 7042 m = m2 = NULL; 7043 } else if (alen != tlen || tlen != len) 7044 m2 = nfsrv_adj(m, alen - tlen, tlen - len); 7045 } 7046 *lenp = len; 7047 *mpp = m; 7048 *mpendp = m2; 7049 7050 out: 7051 if (error != 0) { 7052 if (m != NULL) 7053 m_freem(m); 7054 *lenp = 0; 7055 } 7056 free(iv, M_TEMP); 7057 NFSEXITCODE(error); 7058 return (error); 7059 } 7060 7061 /* 7062 * Set Extended attribute vnode op from an mbuf list. 7063 */ 7064 int 7065 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, 7066 char *cp, struct ucred *cred, struct thread *p) 7067 { 7068 struct iovec *iv; 7069 struct uio uio, *uiop = &uio; 7070 int cnt, error; 7071 7072 error = 0; 7073 #ifdef MAC 7074 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, 7075 name); 7076 #endif 7077 if (error != 0) 7078 goto out; 7079 7080 uiop->uio_rw = UIO_WRITE; 7081 uiop->uio_segflg = UIO_SYSSPACE; 7082 uiop->uio_td = p; 7083 uiop->uio_offset = 0; 7084 uiop->uio_resid = len; 7085 if (len > 0) { 7086 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); 7087 uiop->uio_iov = iv; 7088 uiop->uio_iovcnt = cnt; 7089 } else { 7090 uiop->uio_iov = iv = NULL; 7091 uiop->uio_iovcnt = 0; 7092 } 7093 if (error == 0) { 7094 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 7095 cred, p); 7096 if (error == 0) { 7097 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 7098 nfsvno_updateds(vp, cred, p); 7099 error = VOP_FSYNC(vp, MNT_WAIT, p); 7100 } 7101 free(iv, M_TEMP); 7102 } 7103 7104 out: 7105 NFSEXITCODE(error); 7106 return (error); 7107 } 7108 7109 /* 7110 * For a pNFS server, the DS file's ctime and 7111 * va_filerev (TimeMetadata and Change) needs to 7112 * be updated. This is a hack, but works by 7113 * flipping the S_ISGID bit in va_mode and then 7114 * flipping it back. 7115 * It does result in two MDS->DS RPCs, but creating 7116 * a custom RPC just to do this seems overkill, since 7117 * Setxattr/Rmxattr will not be done that frequently. 7118 * If it fails part way through, that is not too 7119 * serious, since the DS file is never executed. 7120 */ 7121 static void 7122 nfsvno_updateds(struct vnode *vp, struct ucred *cred, NFSPROC_T *p) 7123 { 7124 struct nfsvattr nva; 7125 int ret; 7126 u_short tmode; 7127 7128 ret = VOP_GETATTR(vp, &nva.na_vattr, cred); 7129 if (ret == 0) { 7130 tmode = nva.na_mode; 7131 NFSVNO_ATTRINIT(&nva); 7132 tmode ^= S_ISGID; 7133 NFSVNO_SETATTRVAL(&nva, mode, tmode); 7134 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 7135 NFSPROC_SETATTR, NULL, NULL, NULL, &nva, 7136 NULL, NULL, 0, NULL); 7137 if (ret == 0) { 7138 tmode ^= S_ISGID; 7139 NFSVNO_SETATTRVAL(&nva, mode, tmode); 7140 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 7141 NFSPROC_SETATTR, NULL, NULL, NULL, 7142 &nva, NULL, NULL, 0, NULL); 7143 } 7144 } 7145 } 7146 7147 /* 7148 * Remove Extended attribute vnode op. 7149 */ 7150 int 7151 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, 7152 struct ucred *cred, struct thread *p) 7153 { 7154 int error; 7155 7156 /* 7157 * Get rid of any delegations. I am not sure why this is required, 7158 * but RFC-8276 says so. 7159 */ 7160 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); 7161 if (error != 0) 7162 goto out; 7163 #ifdef MAC 7164 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, 7165 name); 7166 if (error != 0) 7167 goto out; 7168 #endif 7169 7170 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); 7171 if (error == EOPNOTSUPP) 7172 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 7173 cred, p); 7174 if (error == 0) { 7175 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 7176 nfsvno_updateds(vp, cred, p); 7177 error = VOP_FSYNC(vp, MNT_WAIT, p); 7178 } 7179 out: 7180 NFSEXITCODE(error); 7181 return (error); 7182 } 7183 7184 /* 7185 * List Extended Atribute vnode op into an mbuf list. 7186 */ 7187 int 7188 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, 7189 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) 7190 { 7191 struct iovec iv; 7192 struct uio io; 7193 int error; 7194 size_t siz; 7195 7196 *bufp = NULL; 7197 /* First, find out the size of the extended attribute. */ 7198 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, 7199 p); 7200 if (error != 0) 7201 return (NFSERR_NOXATTR); 7202 if (siz <= cookie) { 7203 *lenp = 0; 7204 *eofp = true; 7205 goto out; 7206 } 7207 if (siz > cookie + *lenp) { 7208 siz = cookie + *lenp; 7209 *eofp = false; 7210 } else 7211 *eofp = true; 7212 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ 7213 if (siz > 10 * 1024 * 1024) { 7214 error = NFSERR_XATTR2BIG; 7215 goto out; 7216 } 7217 *bufp = malloc(siz, M_TEMP, M_WAITOK); 7218 iv.iov_base = *bufp; 7219 iv.iov_len = siz; 7220 io.uio_iovcnt = 1; 7221 io.uio_iov = &iv; 7222 io.uio_offset = 0; 7223 io.uio_resid = siz; 7224 io.uio_rw = UIO_READ; 7225 io.uio_segflg = UIO_SYSSPACE; 7226 io.uio_td = p; 7227 #ifdef MAC 7228 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); 7229 if (error != 0) 7230 goto out; 7231 #endif 7232 7233 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, 7234 p); 7235 if (error != 0) 7236 goto out; 7237 if (io.uio_resid > 0) 7238 siz -= io.uio_resid; 7239 *lenp = siz; 7240 7241 out: 7242 if (error != 0) { 7243 free(*bufp, M_TEMP); 7244 *bufp = NULL; 7245 } 7246 NFSEXITCODE(error); 7247 return (error); 7248 } 7249 7250 /* 7251 * Trim trailing data off the mbuf list being built. 7252 */ 7253 void 7254 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, 7255 int bextpg, int bextpgsiz) 7256 { 7257 vm_page_t pg; 7258 int fullpgsiz, i; 7259 7260 if (mb->m_next != NULL) { 7261 m_freem(mb->m_next); 7262 mb->m_next = NULL; 7263 } 7264 if ((mb->m_flags & M_EXTPG) != 0) { 7265 KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs, 7266 ("nfsm_trimtrailing: bextpg out of range")); 7267 KASSERT(bpos == (char *)(void *) 7268 PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz, 7269 ("nfsm_trimtrailing: bextpgsiz bad!")); 7270 7271 /* First, get rid of any pages after this position. */ 7272 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { 7273 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); 7274 vm_page_unwire_noq(pg); 7275 vm_page_free(pg); 7276 } 7277 mb->m_epg_npgs = bextpg + 1; 7278 if (bextpg == 0) 7279 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; 7280 else 7281 fullpgsiz = PAGE_SIZE; 7282 mb->m_epg_last_len = fullpgsiz - bextpgsiz; 7283 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); 7284 for (i = 1; i < mb->m_epg_npgs; i++) 7285 mb->m_len += m_epg_pagelen(mb, i, 0); 7286 nd->nd_bextpgsiz = bextpgsiz; 7287 nd->nd_bextpg = bextpg; 7288 } else 7289 mb->m_len = bpos - mtod(mb, char *); 7290 nd->nd_mb = mb; 7291 nd->nd_bpos = bpos; 7292 } 7293 7294 7295 /* 7296 * Check to see if a put file handle operation should test for 7297 * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR. 7298 * When Open is the next operation, NFSERR_WRONGSEC cannot be 7299 * replied for the Open cases that use a component. This can 7300 * be identified by the fact that the file handle's type is VDIR. 7301 */ 7302 bool 7303 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, __enum_uint8(vtype) vtyp) 7304 { 7305 7306 if ((nd->nd_flag & ND_NFSV4) == 0) 7307 return (true); 7308 7309 if ((nd->nd_flag & ND_LASTOP) != 0) 7310 return (false); 7311 7312 if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH || 7313 nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH || 7314 nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP || 7315 nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME) 7316 return (false); 7317 if (nextop == NFSV4OP_OPEN && vtyp == VDIR) 7318 return (false); 7319 return (true); 7320 } 7321 7322 /* 7323 * Check DSs marked no space. 7324 */ 7325 void 7326 nfsrv_checknospc(void) 7327 { 7328 struct statfs *tsf; 7329 struct nfsdevice *ds; 7330 struct vnode **dvpp, **tdvpp, *dvp; 7331 char *devid, *tdevid; 7332 int cnt, error = 0, i; 7333 7334 if (nfsrv_devidcnt <= 0) 7335 return; 7336 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 7337 devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK); 7338 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 7339 7340 /* Get an array of the dvps for the DSs. */ 7341 tdvpp = dvpp; 7342 tdevid = devid; 7343 i = 0; 7344 NFSDDSLOCK(); 7345 /* First, search for matches for same file system. */ 7346 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 7347 if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) { 7348 if (++i > nfsrv_devidcnt) 7349 break; 7350 *tdvpp++ = ds->nfsdev_dvp; 7351 NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID); 7352 tdevid += NFSX_V4DEVICEID; 7353 } 7354 } 7355 NFSDDSUNLOCK(); 7356 7357 /* Do a VFS_STATFS() for each of the DSs and clear no space. */ 7358 cnt = i; 7359 tdvpp = dvpp; 7360 tdevid = devid; 7361 for (i = 0; i < cnt && error == 0; i++) { 7362 dvp = *tdvpp++; 7363 error = VFS_STATFS(dvp->v_mount, tsf); 7364 if (error == 0 && tsf->f_bavail > 0) { 7365 NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n"); 7366 nfsrv_marknospc(tdevid, false); 7367 } 7368 tdevid += NFSX_V4DEVICEID; 7369 } 7370 free(tsf, M_TEMP); 7371 free(dvpp, M_TEMP); 7372 free(devid, M_TEMP); 7373 } 7374 7375 /* 7376 * Initialize everything that needs to be initialized for a vnet. 7377 */ 7378 static void 7379 nfsrv_vnetinit(const void *unused __unused) 7380 { 7381 7382 nfsd_mntinit(); 7383 } 7384 VNET_SYSINIT(nfsrv_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_ANY, 7385 nfsrv_vnetinit, NULL); 7386 7387 /* 7388 * Clean up everything that is in a vnet and needs to be 7389 * done when the jail is destroyed or the module unloaded. 7390 */ 7391 static void 7392 nfsrv_cleanup(const void *unused __unused) 7393 { 7394 int i; 7395 7396 NFSD_LOCK(); 7397 if (!NFSD_VNET(nfsrv_mntinited)) { 7398 NFSD_UNLOCK(); 7399 return; 7400 } 7401 NFSD_VNET(nfsrv_mntinited) = false; 7402 NFSD_UNLOCK(); 7403 7404 /* Clean out all NFSv4 state. */ 7405 nfsrv_throwawayallstate(curthread); 7406 7407 /* Clean the NFS server reply cache */ 7408 nfsrvd_cleancache(); 7409 7410 /* Clean out v4root exports. */ 7411 if (NFSD_VNET(nfsv4root_mnt)->mnt_export != NULL) { 7412 vfs_free_addrlist(NFSD_VNET(nfsv4root_mnt)->mnt_export); 7413 free(NFSD_VNET(nfsv4root_mnt)->mnt_export, M_MOUNT); 7414 NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL; 7415 } 7416 7417 /* Free up the krpc server pool. */ 7418 if (NFSD_VNET(nfsrvd_pool) != NULL) 7419 svcpool_destroy(NFSD_VNET(nfsrvd_pool)); 7420 7421 /* and get rid of the locks */ 7422 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 7423 mtx_destroy(&NFSD_VNET(nfsrchash_table)[i].mtx); 7424 mtx_destroy(&NFSD_VNET(nfsrcahash_table)[i].mtx); 7425 } 7426 mtx_destroy(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx); 7427 for (i = 0; i < nfsrv_sessionhashsize; i++) 7428 mtx_destroy(&NFSD_VNET(nfssessionhash)[i].mtx); 7429 lockdestroy(&NFSD_VNET(nfsv4root_mnt)->mnt_explock); 7430 free(NFSD_VNET(nfsrvudphashtbl), M_NFSRVCACHE); 7431 free(NFSD_VNET(nfsrchash_table), M_NFSRVCACHE); 7432 free(NFSD_VNET(nfsrcahash_table), M_NFSRVCACHE); 7433 free(NFSD_VNET(nfsclienthash), M_NFSDCLIENT); 7434 free(NFSD_VNET(nfslockhash), M_NFSDLOCKFILE); 7435 free(NFSD_VNET(nfssessionhash), M_NFSDSESSION); 7436 free(NFSD_VNET(nfsv4root_mnt), M_TEMP); 7437 NFSD_VNET(nfsv4root_mnt) = NULL; 7438 } 7439 VNET_SYSUNINIT(nfsrv_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY, 7440 nfsrv_cleanup, NULL); 7441 7442 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 7443 7444 /* 7445 * Called once to initialize data structures... 7446 */ 7447 static int 7448 nfsd_modevent(module_t mod, int type, void *data) 7449 { 7450 int error = 0, i; 7451 static int loaded = 0; 7452 7453 switch (type) { 7454 case MOD_LOAD: 7455 if (loaded) 7456 goto out; 7457 newnfs_portinit(); 7458 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 7459 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 7460 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 7461 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 7462 #ifdef VV_DISABLEDELEG 7463 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 7464 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 7465 #endif 7466 nfsd_call_nfsd = nfssvc_nfsd; 7467 loaded = 1; 7468 break; 7469 7470 case MOD_UNLOAD: 7471 if (newnfs_numnfsd != 0) { 7472 error = EBUSY; 7473 break; 7474 } 7475 7476 #ifdef VV_DISABLEDELEG 7477 vn_deleg_ops.vndeleg_recall = NULL; 7478 vn_deleg_ops.vndeleg_disable = NULL; 7479 #endif 7480 nfsd_call_nfsd = NULL; 7481 mtx_destroy(&nfsrc_udpmtx); 7482 mtx_destroy(&nfs_v4root_mutex); 7483 mtx_destroy(&nfsrv_dontlistlock_mtx); 7484 mtx_destroy(&nfsrv_recalllock_mtx); 7485 if (nfslayouthash != NULL) { 7486 for (i = 0; i < nfsrv_layouthashsize; i++) 7487 mtx_destroy(&nfslayouthash[i].mtx); 7488 free(nfslayouthash, M_NFSDSESSION); 7489 } 7490 loaded = 0; 7491 break; 7492 default: 7493 error = EOPNOTSUPP; 7494 break; 7495 } 7496 7497 out: 7498 NFSEXITCODE(error); 7499 return (error); 7500 } 7501 static moduledata_t nfsd_mod = { 7502 "nfsd", 7503 nfsd_modevent, 7504 NULL, 7505 }; 7506 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 7507 7508 /* So that loader and kldload(2) can find us, wherever we are.. */ 7509 MODULE_VERSION(nfsd, 1); 7510 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 7511 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 7512 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 7513 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 7514