1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/capsicum.h> 37 #include <sys/extattr.h> 38 39 /* 40 * Functions that perform the vfs operations required by the routines in 41 * nfsd_serv.c. It is hoped that this change will make the server more 42 * portable. 43 */ 44 45 #include <fs/nfs/nfsport.h> 46 #include <security/mac/mac_framework.h> 47 #include <sys/callout.h> 48 #include <sys/filio.h> 49 #include <sys/hash.h> 50 #include <sys/netexport.h> 51 #include <sys/osd.h> 52 #include <sys/sysctl.h> 53 #include <nlm/nlm_prot.h> 54 #include <nlm/nlm.h> 55 #include <vm/vm_param.h> 56 #include <vm/vnode_pager.h> 57 58 FEATURE(nfsd, "NFSv4 server"); 59 60 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 61 extern int nfsrv_useacl; 62 extern int newnfs_numnfsd; 63 extern int nfsrv_sessionhashsize; 64 extern struct nfslayouthash *nfslayouthash; 65 extern int nfsrv_layouthashsize; 66 extern struct mtx nfsrv_dslock_mtx; 67 extern int nfs_pnfsiothreads; 68 extern volatile int nfsrv_devidcnt; 69 extern int nfsrv_maxpnfsmirror; 70 extern uint32_t nfs_srvmaxio; 71 extern int nfs_bufpackets; 72 extern u_long sb_max_adj; 73 extern struct nfsv4lock nfsv4rootfs_lock; 74 75 uint64_t nfsrv_stripesiz = 0; 76 static int nfsrv_maxstripecnt = 1; 77 78 VNET_DECLARE(int, nfsrv_numnfsd); 79 VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); 80 VNET_DECLARE(SVCPOOL *, nfsrvd_pool); 81 VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); 82 VNET_DECLARE(struct nfslockhashhead *, nfslockhash); 83 VNET_DECLARE(struct nfssessionhash *, nfssessionhash); 84 VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock); 85 VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); 86 87 NFSDLOCKMUTEX; 88 NFSSTATESPINLOCK; 89 struct mtx nfsrc_udpmtx; 90 struct mtx nfs_v4root_mutex; 91 struct mtx nfsrv_dontlistlock_mtx; 92 struct mtx nfsrv_recalllock_mtx; 93 struct nfsrvfh nfs_pubfh; 94 int nfs_pubfhset = 0; 95 int nfsd_debuglevel = 0; 96 static pid_t nfsd_master_pid = (pid_t)-1; 97 static char nfsd_master_comm[MAXCOMLEN + 1]; 98 static struct timeval nfsd_master_start; 99 static uint32_t nfsv4_sysid = 0; 100 static fhandle_t zerofh; 101 102 VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL; 103 VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl); 104 VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table); 105 VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table); 106 VNET_DEFINE(struct nfsrvfh, nfs_rootfh); 107 VNET_DEFINE(int, nfs_rootfhset) = 0; 108 VNET_DEFINE(struct callout, nfsd_callout); 109 VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt); 110 VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt); 111 VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt); 112 VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false; 113 VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false; 114 115 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 116 struct ucred *); 117 static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *); 118 static void nfsvno_pnfsreplenish(void *); 119 static int nfsvno_pnfsusenumfile(struct nameidata *, struct vattr *); 120 121 int nfsrv_enable_crossmntpt = 1; 122 static int nfs_commit_blks; 123 static int nfs_commit_miss; 124 extern int nfsrv_issuedelegs; 125 extern int nfsrv_dolocallocks; 126 extern struct nfsdevicehead nfsrv_devidhead; 127 128 /* Map d_type to vnode type. */ 129 static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR, 130 VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON }; 131 132 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, 133 struct iovec **); 134 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, 135 struct mbuf **, struct iovec **); 136 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, 137 int *); 138 static void nfs_dtypetovtype(struct nfsvattr *, struct vnode *, uint8_t); 139 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 140 NFSPROC_T *); 141 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode ***, 142 int *, char *, fhandle_t *); 143 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 144 NFSPROC_T *); 145 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, 146 struct thread *, int, struct mbuf **, char *, struct mbuf **, 147 struct nfsvattr *, struct acl *, off_t *, int, bool *); 148 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 149 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 150 NFSPROC_T *, struct nfsmount **, int, int, uint64_t, struct mbuf **, 151 struct mbuf **, int *); 152 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 153 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int, uint64_t, 154 struct mbuf **, char *, int *); 155 #ifdef notnow 156 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 157 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 158 static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 159 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 160 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 161 struct vnode *, struct nfsmount **, int, struct acl *, int *); 162 #endif 163 static int nfsrv_setattrdsrpc(fhandle_t *, struct vnode *, struct ucred *, 164 NFSPROC_T *, struct nfsmount **, int, int, struct nfsvattr *, int *); 165 static int nfsrv_setattrdsdorpc(fhandle_t *, struct vnode *, struct ucred *, 166 NFSPROC_T *, struct nfsmount *, struct nfsvattr *, struct nfsvattr *); 167 static int nfsrv_getattrdsrpc(fhandle_t *, struct vnode *, struct ucred *, 168 NFSPROC_T *, struct nfsmount **, int, struct nfsvattr *, int *); 169 static int nfsrv_getattrdsdorpc(fhandle_t *, struct vnode *, struct ucred *, 170 NFSPROC_T *, struct nfsmount *, struct nfsvattr *); 171 #ifdef notnow 172 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, 173 NFSPROC_T *, struct nfsmount *); 174 #endif 175 static int nfsrv_putfhname(fhandle_t *, char *); 176 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 177 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 178 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char **, 179 char *, struct vnode *, NFSPROC_T *); 180 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 181 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 182 NFSPROC_T *); 183 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); 184 185 int nfs_pnfsio(task_fn_t *, void *); 186 187 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 188 "NFS server"); 189 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 190 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 191 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 192 0, ""); 193 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 194 0, ""); 195 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 196 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 197 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 198 0, "Debug level for NFS server"); 199 VNET_DECLARE(int, nfsd_enable_stringtouid); 200 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, 201 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nfsd_enable_stringtouid), 202 0, "Enable nfsd to accept numeric owner_names"); 203 /* 204 * vfs.nfsd.pnfsswitchforw and vfs.nfsd.pnfsnumfilemiss are writable so that 205 * statistics can be reset. 206 */ 207 static uint64_t nfsrv_pnfsswitchforw = 0; 208 SYSCTL_U64(_vfs_nfsd, OID_AUTO, pnfsswitchforw, CTLFLAG_RW, 209 &nfsrv_pnfsswitchforw, 0, "Number of times replenish switches to forward"); 210 static uint64_t nfsrv_pnfsnumfilemiss = 0; 211 SYSCTL_U64(_vfs_nfsd, OID_AUTO, pnfsnumfilemiss, CTLFLAG_RW, 212 &nfsrv_pnfsnumfilemiss, 0, "Number of numfile misses"); 213 static u_int nfsrv_pnfsforwcnt = 5; 214 SYSCTL_UINT(_vfs_nfsd, OID_AUTO, pnfsreplenishforwcnt, CTLFLAG_RW, 215 &nfsrv_pnfsforwcnt, 0, "Forward replenish cnt before switch to back"); 216 static int nfsrv_pnfsgetdsattr = 1; 217 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 218 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 219 static bool nfsrv_recalldeleg = false; 220 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, recalldeleg, CTLFLAG_RW, 221 &nfsrv_recalldeleg, 0, 222 "When set remove/rename recalls delegations for same client"); 223 224 /* 225 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 226 * not running. 227 * The dsN subdirectories for the increased values must have been created 228 * on all DS servers before this increase is done. 229 */ 230 u_int nfsrv_dsdirsize = 20; 231 static int 232 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 233 { 234 int error, newdsdirsize; 235 236 newdsdirsize = nfsrv_dsdirsize; 237 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 238 if (error != 0 || req->newptr == NULL) 239 return (error); 240 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 241 newnfs_numnfsd != 0) 242 return (EINVAL); 243 nfsrv_dsdirsize = newdsdirsize; 244 return (0); 245 } 246 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, 247 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), 248 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); 249 250 /* 251 * nfsrv_pnfsmaxnumfiles can only be decreased when the nfsd is not 252 * running. It can be increased when the nfsd is running, but the 253 * additional numfiles should have been precreated in .pnfshide/numfiles 254 * for all file systems before it is increased. 255 */ 256 static u_int nfsrv_pnfsmaxnumfiles = 1000; 257 static int 258 sysctl_pnfsmaxnumfiles(SYSCTL_HANDLER_ARGS) 259 { 260 int error, new_maxnumfiles; 261 262 new_maxnumfiles = nfsrv_pnfsmaxnumfiles; 263 error = sysctl_handle_int(oidp, &new_maxnumfiles, 0, req); 264 if (error != 0 || req->newptr == NULL) 265 return (error); 266 if (new_maxnumfiles < nfsrv_pnfsmaxnumfiles && newnfs_numnfsd != 0) 267 return (EBUSY); 268 if (new_maxnumfiles > 10000 || new_maxnumfiles < 100) 269 return (EINVAL); 270 nfsrv_pnfsmaxnumfiles = new_maxnumfiles; 271 return (0); 272 } 273 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, pnfsmaxnumfiles, 274 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 275 sizeof(nfsrv_pnfsmaxnumfiles), sysctl_pnfsmaxnumfiles, 276 "IU", "Maximum number of entries in .pnfshide/numfiles"); 277 278 /* 279 * nfs_srvmaxio can only be increased and only when the nfsd threads are 280 * not running. The setting must be a power of 2, with the current limit of 281 * 1Mbyte. 282 */ 283 static int 284 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS) 285 { 286 int error; 287 u_int newsrvmaxio; 288 uint64_t tval; 289 290 newsrvmaxio = nfs_srvmaxio; 291 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req); 292 if (error != 0 || req->newptr == NULL) 293 return (error); 294 if (newsrvmaxio == nfs_srvmaxio) 295 return (0); 296 if (newsrvmaxio < nfs_srvmaxio) { 297 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n"); 298 return (EINVAL); 299 } 300 if (newsrvmaxio > NFS_SRVLIMITIO) { 301 printf("nfsd: vfs.nfsd.srvmaxio cannot be > %d\n", 302 NFS_SRVLIMITIO); 303 return (EINVAL); 304 } 305 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) { 306 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n"); 307 return (EINVAL); 308 } 309 310 /* 311 * Check that kern.ipc.maxsockbuf is large enough for 312 * newsrviomax, given the setting of vfs.nfs.bufpackets. 313 */ 314 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets > 315 sb_max_adj) { 316 /* 317 * Suggest vfs.nfs.bufpackets * maximum RPC message for 318 * sb_max_adj. 319 */ 320 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets; 321 322 /* 323 * Convert suggested sb_max_adj value to a suggested 324 * sb_max value, which is what is set via kern.ipc.maxsockbuf. 325 * Perform the inverse calculation of (from uipc_sockbuf.c): 326 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES / 327 * (MSIZE + MCLBYTES); 328 * XXX If the calculation of sb_max_adj from sb_max changes, 329 * this calculation must be changed as well. 330 */ 331 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */ 332 tval += MCLBYTES - 1; /* Round up divide. */ 333 tval /= MCLBYTES; 334 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of " 335 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval, 336 newsrvmaxio); 337 return (EINVAL); 338 } 339 340 NFSD_LOCK(); 341 if (newnfs_numnfsd != 0) { 342 NFSD_UNLOCK(); 343 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd " 344 "threads are running\n"); 345 return (EINVAL); 346 } 347 348 349 nfs_srvmaxio = newsrvmaxio; 350 NFSD_UNLOCK(); 351 return (0); 352 } 353 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio, 354 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 355 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes"); 356 357 static int 358 sysctl_dolocallocks(SYSCTL_HANDLER_ARGS) 359 { 360 int error, igotlock, newdolocallocks; 361 362 newdolocallocks = nfsrv_dolocallocks; 363 error = sysctl_handle_int(oidp, &newdolocallocks, 0, req); 364 if (error != 0 || req->newptr == NULL) 365 return (error); 366 if (newdolocallocks == nfsrv_dolocallocks) 367 return (0); 368 if (jailed(curthread->td_ucred)) 369 return (EINVAL); 370 371 NFSLOCKV4ROOTMUTEX(); 372 do { 373 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, 374 NFSV4ROOTLOCKMUTEXPTR, NULL); 375 } while (!igotlock); 376 NFSUNLOCKV4ROOTMUTEX(); 377 378 nfsrv_dolocallocks = newdolocallocks; 379 380 NFSLOCKV4ROOTMUTEX(); 381 nfsv4_unlock(&nfsv4rootfs_lock, 0); 382 NFSUNLOCKV4ROOTMUTEX(); 383 return (0); 384 } 385 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, enable_locallocks, 386 CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 387 sysctl_dolocallocks, "IU", "Enable nfsd to acquire local locks on files"); 388 389 static int 390 sysctl_stripecnt(SYSCTL_HANDLER_ARGS) 391 { 392 int error, newmaxstripecnt; 393 394 newmaxstripecnt = nfsrv_maxstripecnt; 395 error = sysctl_handle_int(oidp, &newmaxstripecnt, 0, req); 396 if (error != 0 || req->newptr == NULL) 397 return (error); 398 if (newmaxstripecnt == nfsrv_maxstripecnt) 399 return (0); 400 if (newnfs_numnfsd > 0) 401 return (EPERM); 402 if (jailed(curthread->td_ucred)) 403 return (EINVAL); 404 if (newmaxstripecnt <= 0 || newmaxstripecnt > NFSDEV_MAXSTRIPE) 405 return (EINVAL); 406 nfsrv_maxstripecnt = newmaxstripecnt; 407 return (0); 408 } 409 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, pnfsstripecnt, 410 CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 411 sysctl_stripecnt, "IU", "Set the #stripes for a pNFS server"); 412 413 static int 414 sysctl_stripeunit(SYSCTL_HANDLER_ARGS) 415 { 416 uint64_t newstripesiz; 417 int error; 418 419 newstripesiz = nfsrv_stripesiz; 420 error = sysctl_handle_64(oidp, &newstripesiz, 0, req); 421 if (error != 0 || req->newptr == NULL) 422 return (error); 423 if (newstripesiz == nfsrv_stripesiz) 424 return (0); 425 if (newnfs_numnfsd > 0) 426 return (EPERM); 427 if (jailed(curthread->td_ucred)) 428 return (EINVAL); 429 nfsrv_stripesiz = newstripesiz; 430 if (newstripesiz == 0) 431 nfsrv_maxstripecnt = 1; 432 return (0); 433 } 434 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, pnfsstripeunit, 435 CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 436 sysctl_stripeunit, "QU", "Set the stripe unit length for a pNFS server"); 437 438 #define MAX_REORDERED_RPC 16 439 #define NUM_HEURISTIC 1031 440 #define NHUSE_INIT 64 441 #define NHUSE_INC 16 442 #define NHUSE_MAX 2048 443 444 static struct nfsheur { 445 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 446 off_t nh_nextoff; /* next offset for sequential detection */ 447 int nh_use; /* use count for selection */ 448 int nh_seqcount; /* heuristic */ 449 } nfsheur[NUM_HEURISTIC]; 450 451 /* 452 * Heuristic to detect sequential operation. 453 */ 454 static struct nfsheur * 455 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 456 { 457 struct nfsheur *nh; 458 int hi, try; 459 460 /* Locate best candidate. */ 461 try = 32; 462 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 463 nh = &nfsheur[hi]; 464 while (try--) { 465 if (nfsheur[hi].nh_vp == vp) { 466 nh = &nfsheur[hi]; 467 break; 468 } 469 if (nfsheur[hi].nh_use > 0) 470 --nfsheur[hi].nh_use; 471 hi = (hi + 1) % NUM_HEURISTIC; 472 if (nfsheur[hi].nh_use < nh->nh_use) 473 nh = &nfsheur[hi]; 474 } 475 476 /* Initialize hint if this is a new file. */ 477 if (nh->nh_vp != vp) { 478 nh->nh_vp = vp; 479 nh->nh_nextoff = uio->uio_offset; 480 nh->nh_use = NHUSE_INIT; 481 if (uio->uio_offset == 0) 482 nh->nh_seqcount = 4; 483 else 484 nh->nh_seqcount = 1; 485 } 486 487 /* Calculate heuristic. */ 488 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 489 uio->uio_offset == nh->nh_nextoff) { 490 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 491 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 492 if (nh->nh_seqcount > IO_SEQMAX) 493 nh->nh_seqcount = IO_SEQMAX; 494 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 495 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 496 /* Probably a reordered RPC, leave seqcount alone. */ 497 } else if (nh->nh_seqcount > 1) { 498 nh->nh_seqcount /= 2; 499 } else { 500 nh->nh_seqcount = 0; 501 } 502 nh->nh_use += NHUSE_INC; 503 if (nh->nh_use > NHUSE_MAX) 504 nh->nh_use = NHUSE_MAX; 505 return (nh); 506 } 507 508 /* 509 * Get attributes into nfsvattr structure. 510 */ 511 int 512 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 513 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 514 nfsattrbit_t *attrbitp) 515 { 516 int error, gotattr, lockedit = 0; 517 struct nfsvattr na; 518 519 if (vpislocked == 0) { 520 /* 521 * When vpislocked == 0, the vnode is either exclusively 522 * locked by this thread or not locked by this thread. 523 * As such, shared lock it, if not exclusively locked. 524 */ 525 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 526 lockedit = 1; 527 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 528 } 529 } 530 531 /* 532 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed 533 * attributes, as required. 534 * This needs to be done for regular files if: 535 * - non-NFSv4 RPCs or 536 * - when attrbitp == NULL or 537 * - an NFSv4 RPC with any of the above attributes in attrbitp. 538 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 539 * these attributes. nfsrv_proxyds() will return an error if the 540 * server is not a pNFS one. 541 */ 542 gotattr = 0; 543 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 544 (nd->nd_flag & ND_NFSV4) == 0 || 545 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 546 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 547 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 548 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || 549 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { 550 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, 551 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, 552 NULL); 553 if (error == 0) 554 gotattr = 1; 555 } 556 557 nvap->na_bsdflags = 0; 558 nvap->na_flags = 0; 559 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 560 if (lockedit != 0) 561 NFSVOPUNLOCK(vp); 562 563 /* 564 * If we got the Change, Size and Modify Time from the DS, 565 * replace them. 566 */ 567 if (gotattr != 0) { 568 nvap->na_atime = na.na_atime; 569 nvap->na_mtime = na.na_mtime; 570 nvap->na_filerev = na.na_filerev; 571 nvap->na_size = na.na_size; 572 nvap->na_bytes = na.na_bytes; 573 } 574 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 575 error, (uintmax_t)na.na_filerev); 576 577 NFSEXITCODE(error); 578 return (error); 579 } 580 581 /* 582 * Get a file handle for a vnode. 583 */ 584 int 585 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 586 { 587 int error; 588 589 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 590 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 591 error = VOP_VPTOFH(vp, &fhp->fh_fid); 592 593 NFSEXITCODE(error); 594 return (error); 595 } 596 597 /* 598 * Perform access checking for vnodes obtained from file handles that would 599 * refer to files already opened by a Unix client. You cannot just use 600 * vn_writechk() and VOP_ACCESSX() for two reasons. 601 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 602 * case. 603 * 2 - The owner is to be given access irrespective of mode bits for some 604 * operations, so that processes that chmod after opening a file don't 605 * break. 606 */ 607 int 608 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 609 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 610 u_int32_t *supportedtypep) 611 { 612 struct vattr vattr; 613 int error = 0, getret = 0; 614 615 if (vpislocked == 0) { 616 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 617 error = EPERM; 618 goto out; 619 } 620 } 621 if (accmode & VWRITE) { 622 /* Just vn_writechk() changed to check rdonly */ 623 /* 624 * Disallow write attempts on read-only file systems; 625 * unless the file is a socket or a block or character 626 * device resident on the file system. 627 */ 628 if (NFSVNO_EXRDONLY(exp) || 629 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 630 switch (vp->v_type) { 631 case VREG: 632 case VDIR: 633 case VLNK: 634 error = EROFS; 635 default: 636 break; 637 } 638 } 639 /* 640 * If there's shared text associated with 641 * the inode, try to free it up once. If 642 * we fail, we can't allow writing. 643 */ 644 if (VOP_IS_TEXT(vp) && error == 0) 645 error = ETXTBSY; 646 } 647 if (error != 0) { 648 if (vpislocked == 0) 649 NFSVOPUNLOCK(vp); 650 goto out; 651 } 652 653 /* 654 * Should the override still be applied when ACLs are enabled? 655 */ 656 error = VOP_ACCESSX(vp, accmode, cred, p); 657 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 658 /* 659 * Try again with VEXPLICIT_DENY, to see if the test for 660 * deletion is supported. 661 */ 662 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 663 if (error == 0) { 664 if (vp->v_type == VDIR) { 665 accmode &= ~(VDELETE | VDELETE_CHILD); 666 accmode |= VWRITE; 667 error = VOP_ACCESSX(vp, accmode, cred, p); 668 } else if (supportedtypep != NULL) { 669 *supportedtypep &= ~NFSACCESS_DELETE; 670 } 671 } 672 } 673 674 /* 675 * Allow certain operations for the owner (reads and writes 676 * on files that are already open). 677 */ 678 if (override != NFSACCCHK_NOOVERRIDE && 679 (error == EPERM || error == EACCES)) { 680 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 681 error = 0; 682 else if (override & NFSACCCHK_ALLOWOWNER) { 683 getret = VOP_GETATTR(vp, &vattr, cred); 684 if (getret == 0 && cred->cr_uid == vattr.va_uid) 685 error = 0; 686 } 687 } 688 if (vpislocked == 0) 689 NFSVOPUNLOCK(vp); 690 691 out: 692 NFSEXITCODE(error); 693 return (error); 694 } 695 696 /* 697 * Set attribute(s) vnop. 698 */ 699 int 700 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 701 struct thread *p, struct nfsexstuff *exp) 702 { 703 u_quad_t savsize = 0; 704 int error, savedit; 705 time_t savbtime; 706 707 /* 708 * If this is an exported file system and a pNFS service is running, 709 * don't VOP_SETATTR() of size for the MDS file system. 710 */ 711 savedit = 0; 712 error = 0; 713 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && 714 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && 715 nvap->na_vattr.va_size > 0) { 716 savsize = nvap->na_vattr.va_size; 717 nvap->na_vattr.va_size = VNOVAL; 718 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 719 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 720 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 721 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 722 nvap->na_vattr.va_mtime.tv_sec != VNOVAL) 723 savedit = 1; 724 else 725 savedit = 2; 726 } 727 if (savedit != 2) 728 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 729 if (savedit != 0) 730 nvap->na_vattr.va_size = savsize; 731 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 732 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 733 nvap->na_vattr.va_size != VNOVAL || 734 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 735 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 736 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 737 /* Never modify birthtime on a DS file. */ 738 savbtime = nvap->na_vattr.va_birthtime.tv_sec; 739 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; 740 /* For a pNFS server, set the attributes on the DS file. */ 741 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, 742 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); 743 nvap->na_vattr.va_birthtime.tv_sec = savbtime; 744 if (error == ENOENT) 745 error = 0; 746 } 747 NFSEXITCODE(error); 748 return (error); 749 } 750 751 /* 752 * Set up nameidata for a lookup() call and do it. 753 */ 754 int 755 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 756 struct vnode *dp, int islocked, struct nfsexstuff *exp, 757 struct vnode **retdirp) 758 { 759 struct componentname *cnp = &ndp->ni_cnd; 760 int i; 761 struct iovec aiov; 762 struct uio auio; 763 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 764 int error = 0; 765 char *cp; 766 767 *retdirp = NULL; 768 cnp->cn_nameptr = cnp->cn_pnbuf; 769 ndp->ni_lcf = 0; 770 /* 771 * Extract and set starting directory. 772 */ 773 if (dp->v_type != VDIR) { 774 if (islocked) 775 vput(dp); 776 else 777 vrele(dp); 778 nfsvno_relpathbuf(ndp); 779 error = ENOTDIR; 780 goto out1; 781 } 782 if (islocked) 783 NFSVOPUNLOCK(dp); 784 vref(dp); 785 *retdirp = dp; 786 if (NFSVNO_EXRDONLY(exp)) 787 cnp->cn_flags |= RDONLY; 788 ndp->ni_segflg = UIO_SYSSPACE; 789 790 if (nd->nd_flag & ND_PUBLOOKUP) { 791 ndp->ni_loopcnt = 0; 792 if (cnp->cn_pnbuf[0] == '/') { 793 vrele(dp); 794 /* 795 * Check for degenerate pathnames here, since lookup() 796 * panics on them. 797 */ 798 for (i = 1; i < ndp->ni_pathlen; i++) 799 if (cnp->cn_pnbuf[i] != '/') 800 break; 801 if (i == ndp->ni_pathlen) { 802 error = NFSERR_ACCES; 803 goto out; 804 } 805 dp = rootvnode; 806 vref(dp); 807 } 808 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 809 (nd->nd_flag & ND_NFSV4) == 0) { 810 /* 811 * Only cross mount points for NFSv4 when doing a 812 * mount while traversing the file system above 813 * the mount point, unless nfsrv_enable_crossmntpt is set. 814 */ 815 cnp->cn_flags |= NOCROSSMOUNT; 816 } 817 818 /* 819 * Initialize for scan, set ni_startdir and bump ref on dp again 820 * because lookup() will dereference ni_startdir. 821 */ 822 823 ndp->ni_startdir = dp; 824 ndp->ni_rootdir = rootvnode; 825 ndp->ni_topdir = NULL; 826 827 if (!lockleaf) 828 cnp->cn_flags |= LOCKLEAF; 829 for (;;) { 830 cnp->cn_nameptr = cnp->cn_pnbuf; 831 /* 832 * Call lookup() to do the real work. If an error occurs, 833 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 834 * we do not have to dereference anything before returning. 835 * In either case ni_startdir will be dereferenced and NULLed 836 * out. 837 */ 838 error = vfs_lookup(ndp); 839 if (error) 840 break; 841 842 /* 843 * Check for encountering a symbolic link. Trivial 844 * termination occurs if no symlink encountered. 845 */ 846 if ((cnp->cn_flags & ISSYMLINK) == 0) { 847 if (ndp->ni_vp && !lockleaf) 848 NFSVOPUNLOCK(ndp->ni_vp); 849 break; 850 } 851 852 /* 853 * Validate symlink 854 */ 855 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 856 NFSVOPUNLOCK(ndp->ni_dvp); 857 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 858 error = EINVAL; 859 goto badlink2; 860 } 861 862 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 863 error = ELOOP; 864 goto badlink2; 865 } 866 if (ndp->ni_pathlen > 1) 867 cp = uma_zalloc(namei_zone, M_WAITOK); 868 else 869 cp = cnp->cn_pnbuf; 870 aiov.iov_base = cp; 871 aiov.iov_len = MAXPATHLEN; 872 auio.uio_iov = &aiov; 873 auio.uio_iovcnt = 1; 874 auio.uio_offset = 0; 875 auio.uio_rw = UIO_READ; 876 auio.uio_segflg = UIO_SYSSPACE; 877 auio.uio_td = NULL; 878 auio.uio_resid = MAXPATHLEN; 879 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 880 if (error) { 881 badlink1: 882 if (ndp->ni_pathlen > 1) 883 uma_zfree(namei_zone, cp); 884 badlink2: 885 vrele(ndp->ni_dvp); 886 vput(ndp->ni_vp); 887 break; 888 } 889 linklen = MAXPATHLEN - auio.uio_resid; 890 if (linklen == 0) { 891 error = ENOENT; 892 goto badlink1; 893 } 894 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 895 error = ENAMETOOLONG; 896 goto badlink1; 897 } 898 899 /* 900 * Adjust or replace path 901 */ 902 if (ndp->ni_pathlen > 1) { 903 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 904 uma_zfree(namei_zone, cnp->cn_pnbuf); 905 cnp->cn_pnbuf = cp; 906 } else 907 cnp->cn_pnbuf[linklen] = '\0'; 908 ndp->ni_pathlen += linklen; 909 910 /* 911 * Cleanup refs for next loop and check if root directory 912 * should replace current directory. Normally ni_dvp 913 * becomes the new base directory and is cleaned up when 914 * we loop. Explicitly null pointers after invalidation 915 * to clarify operation. 916 */ 917 vput(ndp->ni_vp); 918 ndp->ni_vp = NULL; 919 920 if (cnp->cn_pnbuf[0] == '/') { 921 vrele(ndp->ni_dvp); 922 ndp->ni_dvp = ndp->ni_rootdir; 923 vref(ndp->ni_dvp); 924 } 925 ndp->ni_startdir = ndp->ni_dvp; 926 ndp->ni_dvp = NULL; 927 } 928 if (!lockleaf) 929 cnp->cn_flags &= ~LOCKLEAF; 930 931 out: 932 if (error) { 933 nfsvno_relpathbuf(ndp); 934 ndp->ni_vp = NULL; 935 ndp->ni_dvp = NULL; 936 ndp->ni_startdir = NULL; 937 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 938 ndp->ni_dvp = NULL; 939 } 940 941 out1: 942 NFSEXITCODE2(error, nd); 943 return (error); 944 } 945 946 /* 947 * Set up a pathname buffer and return a pointer to it and, optionally 948 * set a hash pointer. 949 */ 950 void 951 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 952 { 953 struct componentname *cnp = &ndp->ni_cnd; 954 955 cnp->cn_flags |= (NOMACCHECK); 956 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 957 if (hashpp != NULL) 958 *hashpp = NULL; 959 *bufpp = cnp->cn_pnbuf; 960 } 961 962 /* 963 * Release the above path buffer, if not released by nfsvno_namei(). 964 */ 965 void 966 nfsvno_relpathbuf(struct nameidata *ndp) 967 { 968 969 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 970 ndp->ni_cnd.cn_pnbuf = NULL; 971 } 972 973 /* 974 * Readlink vnode op into an mbuf list. 975 */ 976 int 977 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, 978 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 979 { 980 struct iovec *iv; 981 struct uio io, *uiop = &io; 982 struct mbuf *mp, *mp3; 983 int len, tlen, error = 0; 984 985 len = NFS_MAXPATHLEN; 986 if (maxextsiz > 0) 987 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 988 &mp3, &mp, &iv); 989 else 990 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); 991 uiop->uio_iov = iv; 992 uiop->uio_offset = 0; 993 uiop->uio_resid = len; 994 uiop->uio_rw = UIO_READ; 995 uiop->uio_segflg = UIO_SYSSPACE; 996 uiop->uio_td = NULL; 997 error = VOP_READLINK(vp, uiop, cred); 998 free(iv, M_TEMP); 999 if (error) { 1000 m_freem(mp3); 1001 *lenp = 0; 1002 goto out; 1003 } 1004 if (uiop->uio_resid > 0) { 1005 len -= uiop->uio_resid; 1006 tlen = NFSM_RNDUP(len); 1007 if (tlen == 0) { 1008 m_freem(mp3); 1009 mp3 = mp = NULL; 1010 } else if (tlen != NFS_MAXPATHLEN || tlen != len) 1011 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, 1012 tlen - len); 1013 } 1014 *lenp = len; 1015 *mpp = mp3; 1016 *mpendp = mp; 1017 1018 out: 1019 NFSEXITCODE(error); 1020 return (error); 1021 } 1022 1023 /* 1024 * Create an mbuf chain and an associated iovec that can be used to Read 1025 * or Getextattr of data. 1026 * Upon success, return pointers to the first and last mbufs in the chain 1027 * plus the malloc'd iovec and its iovlen. 1028 */ 1029 static int 1030 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, 1031 struct iovec **ivp) 1032 { 1033 struct mbuf *m, *m2 = NULL, *m3; 1034 struct iovec *iv; 1035 int i, left, siz; 1036 1037 left = len; 1038 m3 = NULL; 1039 /* 1040 * Generate the mbuf list with the uio_iov ref. to it. 1041 */ 1042 i = 0; 1043 while (left > 0) { 1044 NFSMGET(m); 1045 MCLGET(m, M_WAITOK); 1046 m->m_len = 0; 1047 siz = min(M_TRAILINGSPACE(m), left); 1048 left -= siz; 1049 i++; 1050 if (m3) 1051 m2->m_next = m; 1052 else 1053 m3 = m; 1054 m2 = m; 1055 } 1056 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 1057 m = m3; 1058 left = len; 1059 i = 0; 1060 while (left > 0) { 1061 if (m == NULL) 1062 panic("nfsrv_createiovec iov"); 1063 siz = min(M_TRAILINGSPACE(m), left); 1064 if (siz > 0) { 1065 iv->iov_base = mtod(m, caddr_t) + m->m_len; 1066 iv->iov_len = siz; 1067 m->m_len += siz; 1068 left -= siz; 1069 iv++; 1070 i++; 1071 } 1072 m = m->m_next; 1073 } 1074 *mpp = m3; 1075 *mpendp = m2; 1076 return (i); 1077 } 1078 1079 /* 1080 * Create an mbuf chain and an associated iovec that can be used to Read 1081 * or Getextattr of data. 1082 * Upon success, return pointers to the first and last mbufs in the chain 1083 * plus the malloc'd iovec and its iovlen. 1084 * Same as above, but creates ext_pgs mbuf(s). 1085 */ 1086 static int 1087 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, 1088 struct mbuf **mpendp, struct iovec **ivp) 1089 { 1090 struct mbuf *m, *m2 = NULL, *m3; 1091 struct iovec *iv; 1092 int i, left, pgno, siz; 1093 1094 left = len; 1095 m3 = NULL; 1096 /* 1097 * Generate the mbuf list with the uio_iov ref. to it. 1098 */ 1099 i = 0; 1100 while (left > 0) { 1101 siz = min(left, maxextsiz); 1102 m = mb_alloc_ext_plus_pages(siz, M_WAITOK); 1103 left -= siz; 1104 i += m->m_epg_npgs; 1105 if (m3 != NULL) 1106 m2->m_next = m; 1107 else 1108 m3 = m; 1109 m2 = m; 1110 } 1111 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 1112 m = m3; 1113 left = len; 1114 i = 0; 1115 pgno = 0; 1116 while (left > 0) { 1117 if (m == NULL) 1118 panic("nfsvno_createiovec_extpgs iov"); 1119 siz = min(PAGE_SIZE, left); 1120 if (siz > 0) { 1121 iv->iov_base = PHYS_TO_DMAP(m->m_epg_pa[pgno]); 1122 iv->iov_len = siz; 1123 m->m_len += siz; 1124 if (pgno == m->m_epg_npgs - 1) 1125 m->m_epg_last_len = siz; 1126 left -= siz; 1127 iv++; 1128 i++; 1129 pgno++; 1130 } 1131 if (pgno == m->m_epg_npgs && left > 0) { 1132 m = m->m_next; 1133 if (m == NULL) 1134 panic("nfsvno_createiovec_extpgs iov"); 1135 pgno = 0; 1136 } 1137 } 1138 *mpp = m3; 1139 *mpendp = m2; 1140 return (i); 1141 } 1142 1143 /* 1144 * Read vnode op call into mbuf list. 1145 */ 1146 int 1147 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 1148 int maxextsiz, struct thread *p, struct mbuf **mpp, 1149 struct mbuf **mpendp) 1150 { 1151 struct mbuf *m; 1152 struct iovec *iv; 1153 int error = 0, len, tlen, ioflag = 0; 1154 struct mbuf *m3; 1155 struct uio io, *uiop = &io; 1156 struct nfsheur *nh; 1157 1158 /* 1159 * Attempt to read from a DS file. A return of ENOENT implies 1160 * there is no DS file to read. 1161 */ 1162 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 1163 NULL, mpendp, NULL, NULL, NULL, 0, NULL); 1164 if (error != ENOENT) 1165 return (error); 1166 1167 len = NFSM_RNDUP(cnt); 1168 if (maxextsiz > 0) 1169 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 1170 &m3, &m, &iv); 1171 else 1172 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); 1173 uiop->uio_iov = iv; 1174 uiop->uio_offset = off; 1175 uiop->uio_resid = len; 1176 uiop->uio_rw = UIO_READ; 1177 uiop->uio_segflg = UIO_SYSSPACE; 1178 uiop->uio_td = NULL; 1179 nh = nfsrv_sequential_heuristic(uiop, vp); 1180 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 1181 /* XXX KDM make this more systematic? */ 1182 VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid; 1183 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 1184 free(iv, M_TEMP); 1185 if (error) { 1186 m_freem(m3); 1187 *mpp = NULL; 1188 goto out; 1189 } 1190 nh->nh_nextoff = uiop->uio_offset; 1191 tlen = len - uiop->uio_resid; 1192 cnt = cnt < tlen ? cnt : tlen; 1193 tlen = NFSM_RNDUP(cnt); 1194 if (tlen == 0) { 1195 m_freem(m3); 1196 m3 = m = NULL; 1197 } else if (len != tlen || tlen != cnt) 1198 m = nfsrv_adj(m3, len - tlen, tlen - cnt); 1199 *mpp = m3; 1200 *mpendp = m; 1201 1202 out: 1203 NFSEXITCODE(error); 1204 return (error); 1205 } 1206 1207 /* 1208 * Create the iovec for the mbuf chain passed in as an argument. 1209 * The "cp" argument is where the data starts within the first mbuf in 1210 * the chain. It returns the iovec and the iovcnt. 1211 */ 1212 static int 1213 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, 1214 int *iovcntp) 1215 { 1216 struct mbuf *mp; 1217 struct iovec *ivp; 1218 int cnt, i, len; 1219 1220 /* 1221 * Loop through the mbuf chain, counting how many mbufs are a 1222 * part of this write operation, so the iovec size is known. 1223 */ 1224 cnt = 0; 1225 len = retlen; 1226 mp = m; 1227 i = mtod(mp, caddr_t) + mp->m_len - cp; 1228 while (len > 0) { 1229 if (i > 0) { 1230 len -= i; 1231 cnt++; 1232 } 1233 mp = mp->m_next; 1234 if (!mp) { 1235 if (len > 0) 1236 return (EBADRPC); 1237 } else 1238 i = mp->m_len; 1239 } 1240 1241 /* Now, create the iovec. */ 1242 mp = m; 1243 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 1244 M_WAITOK); 1245 *iovcntp = cnt; 1246 i = mtod(mp, caddr_t) + mp->m_len - cp; 1247 len = retlen; 1248 while (len > 0) { 1249 if (mp == NULL) 1250 panic("nfsrv_createiovecw"); 1251 if (i > 0) { 1252 i = min(i, len); 1253 ivp->iov_base = cp; 1254 ivp->iov_len = i; 1255 ivp++; 1256 len -= i; 1257 } 1258 mp = mp->m_next; 1259 if (mp) { 1260 i = mp->m_len; 1261 cp = mtod(mp, caddr_t); 1262 } 1263 } 1264 return (0); 1265 } 1266 1267 /* 1268 * Write vnode op from an mbuf list. 1269 */ 1270 int 1271 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, 1272 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 1273 { 1274 struct iovec *iv; 1275 int cnt, ioflags, error; 1276 struct uio io, *uiop = &io; 1277 struct nfsheur *nh; 1278 1279 /* 1280 * Attempt to write to a DS file. A return of ENOENT implies 1281 * there is no DS file to write. 1282 */ 1283 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, 1284 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); 1285 if (error != ENOENT) { 1286 *stable = NFSWRITE_FILESYNC; 1287 return (error); 1288 } 1289 1290 if (*stable == NFSWRITE_UNSTABLE) 1291 ioflags = IO_NODELOCKED; 1292 else 1293 ioflags = (IO_SYNC | IO_NODELOCKED); 1294 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); 1295 if (error != 0) 1296 return (error); 1297 uiop->uio_iov = iv; 1298 uiop->uio_iovcnt = cnt; 1299 uiop->uio_resid = retlen; 1300 uiop->uio_rw = UIO_WRITE; 1301 uiop->uio_segflg = UIO_SYSSPACE; 1302 NFSUIOPROC(uiop, p); 1303 uiop->uio_offset = off; 1304 nh = nfsrv_sequential_heuristic(uiop, vp); 1305 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 1306 /* XXX KDM make this more systematic? */ 1307 VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 1308 error = VOP_WRITE(vp, uiop, ioflags, cred); 1309 if (error == 0) 1310 nh->nh_nextoff = uiop->uio_offset; 1311 free(iv, M_TEMP); 1312 1313 NFSEXITCODE(error); 1314 return (error); 1315 } 1316 1317 /* 1318 * Common code for creating a regular file (plus special files for V2). 1319 */ 1320 int 1321 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 1322 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 1323 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) 1324 { 1325 u_quad_t tempsize; 1326 int error; 1327 struct thread *p = curthread; 1328 1329 error = nd->nd_repstat; 1330 if (!error && ndp->ni_vp == NULL) { 1331 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 1332 error = ENOENT; 1333 if (nvap->na_type == VREG && 1334 !TAILQ_EMPTY(&nfsrv_devidhead)) 1335 error = nfsvno_pnfsusenumfile(ndp, 1336 &nvap->na_vattr); 1337 if (error == ENOENT) { 1338 error = VOP_CREATE(ndp->ni_dvp, 1339 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1340 /* 1341 * For a pNFS server, create the data file 1342 * on a DS. 1343 */ 1344 if (error == 0 && nvap->na_type == VREG) { 1345 /* 1346 * Create a data file on a DS for a 1347 * pNFS server. 1348 * This function just returns if not 1349 * running a pNFS DS or the creation 1350 * fails. 1351 */ 1352 nfsrv_pnfscreate(ndp->ni_vp, 1353 &nvap->na_vattr, nd->nd_cred, p); 1354 } 1355 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? 1356 &ndp->ni_vp : NULL, false); 1357 } 1358 nfsvno_relpathbuf(ndp); 1359 if (!error) { 1360 if (*exclusive_flagp) { 1361 *exclusive_flagp = 0; 1362 NFSVNO_ATTRINIT(nvap); 1363 nvap->na_atime.tv_sec = cverf[0]; 1364 nvap->na_atime.tv_nsec = cverf[1]; 1365 error = VOP_SETATTR(ndp->ni_vp, 1366 &nvap->na_vattr, nd->nd_cred); 1367 if (error != 0) { 1368 vput(ndp->ni_vp); 1369 ndp->ni_vp = NULL; 1370 error = NFSERR_NOTSUPP; 1371 } 1372 } 1373 } 1374 /* 1375 * NFS V2 Only. nfsrvd_mknod() does this for V3. 1376 * (This implies, just get out on an error.) 1377 */ 1378 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 1379 nvap->na_type == VFIFO) { 1380 if (nvap->na_type == VCHR && rdev == 0xffffffff) 1381 nvap->na_type = VFIFO; 1382 if (nvap->na_type != VFIFO && 1383 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { 1384 nfsvno_relpathbuf(ndp); 1385 vput(ndp->ni_dvp); 1386 goto out; 1387 } 1388 nvap->na_rdev = rdev; 1389 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1390 &ndp->ni_cnd, &nvap->na_vattr); 1391 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1392 NULL, false); 1393 nfsvno_relpathbuf(ndp); 1394 if (error) 1395 goto out; 1396 } else { 1397 nfsvno_relpathbuf(ndp); 1398 vput(ndp->ni_dvp); 1399 error = ENXIO; 1400 goto out; 1401 } 1402 *vpp = ndp->ni_vp; 1403 } else { 1404 /* 1405 * Handle cases where error is already set and/or 1406 * the file exists. 1407 * 1 - clean up the lookup 1408 * 2 - iff !error and na_size set, truncate it 1409 */ 1410 nfsvno_relpathbuf(ndp); 1411 *vpp = ndp->ni_vp; 1412 if (ndp->ni_dvp == *vpp) 1413 vrele(ndp->ni_dvp); 1414 else 1415 vput(ndp->ni_dvp); 1416 if (!error && nvap->na_size != VNOVAL) { 1417 error = nfsvno_accchk(*vpp, VWRITE, 1418 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1419 NFSACCCHK_VPISLOCKED, NULL); 1420 if (!error) { 1421 tempsize = nvap->na_size; 1422 NFSVNO_ATTRINIT(nvap); 1423 nvap->na_size = tempsize; 1424 error = nfsvno_setattr(*vpp, nvap, 1425 nd->nd_cred, p, exp); 1426 } 1427 } 1428 if (error) 1429 vput(*vpp); 1430 } 1431 1432 out: 1433 NFSEXITCODE(error); 1434 return (error); 1435 } 1436 1437 /* 1438 * Do a mknod vnode op. 1439 */ 1440 int 1441 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1442 struct thread *p) 1443 { 1444 int error = 0; 1445 __enum_uint8(vtype) vtyp; 1446 1447 vtyp = nvap->na_type; 1448 /* 1449 * Iff doesn't exist, create it. 1450 */ 1451 if (ndp->ni_vp) { 1452 nfsvno_relpathbuf(ndp); 1453 vput(ndp->ni_dvp); 1454 vrele(ndp->ni_vp); 1455 error = EEXIST; 1456 goto out; 1457 } 1458 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1459 nfsvno_relpathbuf(ndp); 1460 vput(ndp->ni_dvp); 1461 error = NFSERR_BADTYPE; 1462 goto out; 1463 } 1464 if (vtyp == VSOCK) { 1465 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1466 &ndp->ni_cnd, &nvap->na_vattr); 1467 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1468 false); 1469 nfsvno_relpathbuf(ndp); 1470 } else { 1471 if (nvap->na_type != VFIFO && 1472 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { 1473 nfsvno_relpathbuf(ndp); 1474 vput(ndp->ni_dvp); 1475 goto out; 1476 } 1477 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1478 &ndp->ni_cnd, &nvap->na_vattr); 1479 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1480 false); 1481 nfsvno_relpathbuf(ndp); 1482 /* 1483 * Since VOP_MKNOD returns the ni_vp, I can't 1484 * see any reason to do the lookup. 1485 */ 1486 } 1487 1488 out: 1489 NFSEXITCODE(error); 1490 return (error); 1491 } 1492 1493 /* 1494 * Mkdir vnode op. 1495 */ 1496 int 1497 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1498 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1499 { 1500 int error = 0; 1501 1502 if (ndp->ni_vp != NULL) { 1503 if (ndp->ni_dvp == ndp->ni_vp) 1504 vrele(ndp->ni_dvp); 1505 else 1506 vput(ndp->ni_dvp); 1507 vrele(ndp->ni_vp); 1508 nfsvno_relpathbuf(ndp); 1509 error = EEXIST; 1510 goto out; 1511 } 1512 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1513 &nvap->na_vattr); 1514 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); 1515 nfsvno_relpathbuf(ndp); 1516 1517 out: 1518 NFSEXITCODE(error); 1519 return (error); 1520 } 1521 1522 /* 1523 * symlink vnode op. 1524 */ 1525 int 1526 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1527 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1528 struct nfsexstuff *exp) 1529 { 1530 int error = 0; 1531 1532 if (ndp->ni_vp) { 1533 nfsvno_relpathbuf(ndp); 1534 if (ndp->ni_dvp == ndp->ni_vp) 1535 vrele(ndp->ni_dvp); 1536 else 1537 vput(ndp->ni_dvp); 1538 vrele(ndp->ni_vp); 1539 error = EEXIST; 1540 goto out; 1541 } 1542 1543 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1544 &nvap->na_vattr, pathcp); 1545 /* 1546 * Although FreeBSD still had the lookup code in 1547 * it for 7/current, there doesn't seem to be any 1548 * point, since VOP_SYMLINK() returns the ni_vp. 1549 * Just vput it for v2. 1550 */ 1551 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0); 1552 nfsvno_relpathbuf(ndp); 1553 1554 out: 1555 NFSEXITCODE(error); 1556 return (error); 1557 } 1558 1559 /* 1560 * Parse symbolic link arguments. 1561 * This function has an ugly side effect. It will malloc() an area for 1562 * the symlink and set iov_base to point to it, only if it succeeds. 1563 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1564 * be FREE'd later. 1565 */ 1566 int 1567 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1568 struct thread *p, char **pathcpp, int *lenp) 1569 { 1570 u_int32_t *tl; 1571 char *pathcp = NULL; 1572 int error = 0, len; 1573 struct nfsv2_sattr *sp; 1574 1575 *pathcpp = NULL; 1576 *lenp = 0; 1577 if ((nd->nd_flag & ND_NFSV3) && 1578 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, NULL, p))) 1579 goto nfsmout; 1580 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1581 len = fxdr_unsigned(int, *tl); 1582 if (len > NFS_MAXPATHLEN || len <= 0) { 1583 error = EBADRPC; 1584 goto nfsmout; 1585 } 1586 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1587 error = nfsrv_mtostr(nd, pathcp, len); 1588 if (error) 1589 goto nfsmout; 1590 if (nd->nd_flag & ND_NFSV2) { 1591 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1592 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1593 } 1594 *pathcpp = pathcp; 1595 *lenp = len; 1596 NFSEXITCODE2(0, nd); 1597 return (0); 1598 nfsmout: 1599 if (pathcp) 1600 free(pathcp, M_TEMP); 1601 NFSEXITCODE2(error, nd); 1602 return (error); 1603 } 1604 1605 /* 1606 * Remove a non-directory object. 1607 */ 1608 int 1609 nfsvno_removesub(struct nameidata *ndp, bool is_v4, struct nfsrv_descript *nd, 1610 struct thread *p, struct nfsexstuff *exp) 1611 { 1612 struct vnode *vp, **dsdvpp, *newvp; 1613 struct mount *mp; 1614 int error = 0, dsfilecnt, ret; 1615 char fname[PNFS_FILENAME_LEN + 1]; 1616 fhandle_t fh; 1617 1618 vp = ndp->ni_vp; 1619 dsdvpp = NULL; 1620 if (vp->v_type == VDIR) { 1621 error = NFSERR_ISDIR; 1622 } else if (is_v4) { 1623 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0) 1624 error = nfsrv_checkremove(vp, 1, NULL, 1625 (nfsquad_t)((u_quad_t)0), p); 1626 else 1627 error = nfsrv_checkremove(vp, 1, NULL, nd->nd_clientid, 1628 p); 1629 } 1630 if (error == 0) { 1631 nfsrv_pnfsremovesetup(vp, p, &dsdvpp, &dsfilecnt, fname, &fh); 1632 NFSD_DEBUG(4, "nfsrv_pnfsremovesetup err=%d dsfilecnt=%d\n", 1633 error, dsfilecnt); 1634 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1635 } 1636 if (error == 0 && dsdvpp != NULL) { 1637 nfsrv_pnfsremove(dsdvpp, dsfilecnt, fname, &fh, p); 1638 NFSD_DEBUG(4, "aft nfsrv_pnfsremove dsfilecnt=%d fname=%s\n", 1639 dsfilecnt, fname); 1640 } 1641 free(dsdvpp, M_TEMP); 1642 if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) 1643 error = nfsvno_getfh(vp, &fh, p); 1644 if (ndp->ni_dvp == vp) 1645 vrele(ndp->ni_dvp); 1646 else 1647 vput(ndp->ni_dvp); 1648 vput(vp); 1649 1650 /* Use ret to determine if the file still exists. */ 1651 if (is_v4 && (nd->nd_flag & ND_NFSV41) != 0 && error == 0) { 1652 mp = vfs_busyfs(&fh.fh_fsid); 1653 if (mp != NULL) { 1654 /* Find out if the file still exists. */ 1655 ret = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &newvp); 1656 if (ret == 0) 1657 vput(newvp); 1658 else 1659 ret = ESTALE; 1660 vfs_unbusy(mp); 1661 } else { 1662 ret = ESTALE; 1663 } 1664 if (ret == ESTALE) { 1665 /* Get rid of any delegation. */ 1666 nfsrv_removedeleg(&fh, nd, p); 1667 } 1668 } 1669 1670 nfsvno_relpathbuf(ndp); 1671 NFSEXITCODE(error); 1672 return (error); 1673 } 1674 1675 /* 1676 * Remove a directory. 1677 */ 1678 int 1679 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1680 struct thread *p, struct nfsexstuff *exp) 1681 { 1682 struct vnode *vp; 1683 int error = 0; 1684 1685 vp = ndp->ni_vp; 1686 if (vp->v_type != VDIR) { 1687 error = ENOTDIR; 1688 goto out; 1689 } 1690 /* 1691 * No rmdir "." please. 1692 */ 1693 if (ndp->ni_dvp == vp) { 1694 error = EINVAL; 1695 goto out; 1696 } 1697 /* 1698 * The root of a mounted filesystem cannot be deleted. 1699 */ 1700 if (vp->v_vflag & VV_ROOT) 1701 error = EBUSY; 1702 out: 1703 if (!error) 1704 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1705 if (ndp->ni_dvp == vp) 1706 vrele(ndp->ni_dvp); 1707 else 1708 vput(ndp->ni_dvp); 1709 vput(vp); 1710 nfsvno_relpathbuf(ndp); 1711 NFSEXITCODE(error); 1712 return (error); 1713 } 1714 1715 /* 1716 * Rename vnode op. 1717 */ 1718 int 1719 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1720 struct nfsrv_descript *nd, struct thread *p) 1721 { 1722 struct vnode *fvp, *tvp, *tdvp, **dsdvpp, *newvp; 1723 struct mount *mp; 1724 int error = 0, dsfilecnt, ret; 1725 char fname[PNFS_FILENAME_LEN + 1]; 1726 fhandle_t fh, fh2; 1727 1728 dsdvpp = NULL; 1729 fvp = fromndp->ni_vp; 1730 if (nd->nd_repstat != 0) { 1731 vrele(fromndp->ni_dvp); 1732 vrele(fvp); 1733 error = nd->nd_repstat; 1734 goto out1; 1735 } 1736 tdvp = tondp->ni_dvp; 1737 tvp = tondp->ni_vp; 1738 if (tvp != NULL) { 1739 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1740 error = (nd->nd_flag & ND_NFSV2) ? EISDIR : EEXIST; 1741 goto out; 1742 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1743 error = (nd->nd_flag & ND_NFSV2) ? ENOTDIR : EEXIST; 1744 goto out; 1745 } 1746 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1747 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1748 goto out; 1749 } 1750 1751 /* 1752 * A rename to '.' or '..' results in a prematurely 1753 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1754 * here. 1755 */ 1756 if ((tondp->ni_cnd.cn_namelen == 1 && 1757 tondp->ni_cnd.cn_nameptr[0] == '.') || 1758 (tondp->ni_cnd.cn_namelen == 2 && 1759 tondp->ni_cnd.cn_nameptr[0] == '.' && 1760 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1761 error = EINVAL; 1762 goto out; 1763 } 1764 } 1765 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1766 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1767 goto out; 1768 } 1769 if (fvp->v_mount != tdvp->v_mount) { 1770 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1771 goto out; 1772 } 1773 if (fvp == tdvp) { 1774 error = (nd->nd_flag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1775 goto out; 1776 } 1777 if (fvp == tvp) { 1778 /* 1779 * If source and destination are the same, there is 1780 * nothing to do. Set error to EJUSTRETURN to indicate 1781 * this. 1782 */ 1783 error = EJUSTRETURN; 1784 goto out; 1785 } 1786 if (nd->nd_flag & ND_NFSV4) { 1787 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1788 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0) 1789 error = nfsrv_checkremove(fvp, 0, NULL, 1790 (nfsquad_t)((u_quad_t)0), p); 1791 else 1792 error = nfsrv_checkremove(fvp, 0, NULL, 1793 nd->nd_clientid, p); 1794 NFSVOPUNLOCK(fvp); 1795 } else 1796 error = EPERM; 1797 if (tvp && !error) { 1798 if (nfsrv_recalldeleg || (nd->nd_flag & ND_NFSV41) == 0) 1799 error = nfsrv_checkremove(tvp, 1, NULL, 1800 (nfsquad_t)((u_quad_t)0), p); 1801 else 1802 error = nfsrv_checkremove(tvp, 1, NULL, 1803 nd->nd_clientid, p); 1804 } 1805 } else { 1806 /* 1807 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1808 * that the NFSv4 client won't be confused by the rename. 1809 * Since nfsd_recalldelegation() can only be called on an 1810 * unlocked vnode at this point and fvp is the file that will 1811 * still exist after the rename, just do fvp. 1812 */ 1813 nfsd_recalldelegation(fvp, p); 1814 } 1815 if (error == 0 && tvp != NULL) { 1816 if ((nd->nd_flag & ND_NFSV41) != 0) 1817 error = nfsvno_getfh(tvp, &fh2, p); 1818 if (error == 0) 1819 nfsrv_pnfsremovesetup(tvp, p, &dsdvpp, &dsfilecnt, 1820 fname, &fh); 1821 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1822 " dsdvpp=%p\n", dsdvpp); 1823 } 1824 out: 1825 mp = NULL; 1826 if (error == 0) { 1827 error = VOP_GETWRITEMOUNT(tondp->ni_dvp, &mp); 1828 if (error == 0) { 1829 if (mp == NULL) { 1830 error = ENOENT; 1831 } else { 1832 error = lockmgr(&mp->mnt_renamelock, 1833 LK_EXCLUSIVE | LK_NOWAIT, NULL); 1834 if (error != 0) 1835 error = ERELOOKUP; 1836 } 1837 } 1838 } 1839 if (error == 0) { 1840 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1841 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1842 &tondp->ni_cnd, 0); 1843 lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); 1844 vfs_rel(mp); 1845 } else { 1846 if (tdvp == tvp) 1847 vrele(tdvp); 1848 else 1849 vput(tdvp); 1850 if (tvp) 1851 vput(tvp); 1852 vrele(fromndp->ni_dvp); 1853 vrele(fvp); 1854 if (error == EJUSTRETURN) { 1855 error = 0; 1856 } else if (error == ERELOOKUP && mp != NULL) { 1857 lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); 1858 lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); 1859 vfs_rel(mp); 1860 } 1861 } 1862 1863 /* 1864 * If dsdvpp != NULL, it was set up by nfsrv_pnfsremovesetup() and 1865 * if the rename succeeded, the DS file for the tvp needs to be 1866 * removed. 1867 */ 1868 if (error == 0 && dsdvpp != NULL) { 1869 nfsrv_pnfsremove(dsdvpp, dsfilecnt, fname, &fh, p); 1870 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1871 free(dsdvpp, M_TEMP); 1872 } 1873 1874 /* Use ret to determine if the file still exists. */ 1875 if ((nd->nd_flag & ND_NFSV41) != 0 && error == 0) { 1876 mp = vfs_busyfs(&fh2.fh_fsid); 1877 if (mp != NULL) { 1878 /* Find out if the file still exists. */ 1879 ret = VFS_FHTOVP(mp, &fh2.fh_fid, LK_SHARED, &newvp); 1880 if (ret == 0) 1881 vput(newvp); 1882 else 1883 ret = ESTALE; 1884 vfs_unbusy(mp); 1885 } else { 1886 ret = ESTALE; 1887 } 1888 if (ret == ESTALE) { 1889 /* Get rid of any delegation. */ 1890 nfsrv_removedeleg(&fh2, nd, p); 1891 } 1892 } 1893 1894 nfsvno_relpathbuf(tondp); 1895 out1: 1896 nfsvno_relpathbuf(fromndp); 1897 NFSEXITCODE(error); 1898 return (error); 1899 } 1900 1901 /* 1902 * Link vnode op. 1903 */ 1904 int 1905 nfsvno_link(struct nameidata *ndp, struct vnode *vp, nfsquad_t clientid, 1906 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1907 { 1908 struct vnode *xp; 1909 int error = 0; 1910 1911 xp = ndp->ni_vp; 1912 if (xp != NULL) { 1913 error = EEXIST; 1914 } else { 1915 xp = ndp->ni_dvp; 1916 if (vp->v_mount != xp->v_mount) 1917 error = EXDEV; 1918 } 1919 if (!error) { 1920 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1921 if (!VN_IS_DOOMED(vp)) { 1922 error = nfsrv_checkremove(vp, 0, NULL, clientid, p); 1923 if (error == 0) 1924 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1925 } else 1926 error = EPERM; 1927 if (ndp->ni_dvp == vp) { 1928 vrele(ndp->ni_dvp); 1929 NFSVOPUNLOCK(vp); 1930 } else { 1931 vref(vp); 1932 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true); 1933 } 1934 } else { 1935 if (ndp->ni_dvp == ndp->ni_vp) 1936 vrele(ndp->ni_dvp); 1937 else 1938 vput(ndp->ni_dvp); 1939 if (ndp->ni_vp) 1940 vrele(ndp->ni_vp); 1941 } 1942 nfsvno_relpathbuf(ndp); 1943 NFSEXITCODE(error); 1944 return (error); 1945 } 1946 1947 /* 1948 * Do the fsync() appropriate for the commit. 1949 */ 1950 int 1951 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1952 struct thread *td) 1953 { 1954 int error = 0; 1955 1956 /* 1957 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1958 * file is done. At this time VOP_FSYNC does not accept offset and 1959 * byte count parameters so call VOP_FSYNC the whole file for now. 1960 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1961 * File systems that do not use the buffer cache (as indicated 1962 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1963 */ 1964 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1965 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1966 /* 1967 * Give up and do the whole thing 1968 */ 1969 vnode_pager_clean_sync(vp); 1970 error = VOP_FSYNC(vp, MNT_WAIT, td); 1971 } else { 1972 /* 1973 * Locate and synchronously write any buffers that fall 1974 * into the requested range. Note: we are assuming that 1975 * f_iosize is a power of 2. 1976 */ 1977 int iosize = vp->v_mount->mnt_stat.f_iosize; 1978 int iomask = iosize - 1; 1979 struct bufobj *bo; 1980 daddr_t lblkno; 1981 1982 /* 1983 * Align to iosize boundary, super-align to page boundary. 1984 */ 1985 if (off & iomask) { 1986 cnt += off & iomask; 1987 off &= ~(u_quad_t)iomask; 1988 } 1989 if (off & PAGE_MASK) { 1990 cnt += off & PAGE_MASK; 1991 off &= ~(u_quad_t)PAGE_MASK; 1992 } 1993 lblkno = off / iosize; 1994 1995 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1996 VM_OBJECT_WLOCK(vp->v_object); 1997 vm_object_page_clean(vp->v_object, off, off + cnt, 1998 OBJPC_SYNC); 1999 VM_OBJECT_WUNLOCK(vp->v_object); 2000 } 2001 2002 bo = &vp->v_bufobj; 2003 BO_LOCK(bo); 2004 while (cnt > 0) { 2005 struct buf *bp; 2006 2007 /* 2008 * If we have a buffer and it is marked B_DELWRI we 2009 * have to lock and write it. Otherwise the prior 2010 * write is assumed to have already been committed. 2011 * 2012 * gbincore() can return invalid buffers now so we 2013 * have to check that bit as well (though B_DELWRI 2014 * should not be set if B_INVAL is set there could be 2015 * a race here since we haven't locked the buffer). 2016 */ 2017 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 2018 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 2019 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 2020 BO_LOCK(bo); 2021 continue; /* retry */ 2022 } 2023 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 2024 B_DELWRI) { 2025 bremfree(bp); 2026 bp->b_flags &= ~B_ASYNC; 2027 bwrite(bp); 2028 ++nfs_commit_miss; 2029 } else 2030 BUF_UNLOCK(bp); 2031 BO_LOCK(bo); 2032 } 2033 ++nfs_commit_blks; 2034 if (cnt < iosize) 2035 break; 2036 cnt -= iosize; 2037 ++lblkno; 2038 } 2039 BO_UNLOCK(bo); 2040 } 2041 NFSEXITCODE(error); 2042 return (error); 2043 } 2044 2045 /* 2046 * Statfs vnode op. 2047 */ 2048 int 2049 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 2050 { 2051 struct statfs *tsf; 2052 int error; 2053 2054 tsf = NULL; 2055 if (nfsrv_devidcnt > 0) { 2056 /* For a pNFS service, get the DS numbers. */ 2057 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 2058 error = nfsrv_pnfsstatfs(tsf, vp->v_mount); 2059 if (error != 0) { 2060 free(tsf, M_TEMP); 2061 tsf = NULL; 2062 } 2063 } 2064 error = VFS_STATFS(vp->v_mount, sf); 2065 if (error == 0) { 2066 if (tsf != NULL) { 2067 sf->f_blocks = tsf->f_blocks; 2068 sf->f_bavail = tsf->f_bavail; 2069 sf->f_bfree = tsf->f_bfree; 2070 sf->f_bsize = tsf->f_bsize; 2071 } 2072 /* 2073 * Since NFS handles these values as unsigned on the 2074 * wire, there is no way to represent negative values, 2075 * so set them to 0. Without this, they will appear 2076 * to be very large positive values for clients like 2077 * Solaris10. 2078 */ 2079 if (sf->f_bavail < 0) 2080 sf->f_bavail = 0; 2081 if (sf->f_ffree < 0) 2082 sf->f_ffree = 0; 2083 } 2084 free(tsf, M_TEMP); 2085 NFSEXITCODE(error); 2086 return (error); 2087 } 2088 2089 /* 2090 * Replenish the numfiles in .pnfshide/numfiles directory. 2091 * These files are used for the pNFS server when an Open/Create needs a 2092 * new regular file. By creating them here asynchronously, we can avoid 2093 * the delay of doing do for Open/Create, since creation requires RPCs to 2094 * the DSs be done. 2095 * (A) - When the sleep times out, work backwards creating 2096 * one new numfile for each cycle. 2097 * (Use a timeout of 10msec for now.) 2098 * (B) - When the sleep returns 0, this indicates that a 2099 * nfsd thread didn't find a numfiles. For this case 2100 * be more agressive and create numfiles going forward. 2101 * (Use a timeout of 1msec for now.) 2102 * Runs as a kernel process. 2103 */ 2104 static char pnfshide_name[] = ".pnfshide"; 2105 static char numfiles_name[] = "numfiles"; 2106 2107 static void 2108 nfsvno_pnfsreplenish(void *arg) 2109 { 2110 struct componentname cn; 2111 struct vattr va; 2112 char name[11]; 2113 struct timespec ts; 2114 struct mount *mp = (struct mount *)arg, *temp_mp; 2115 struct ucred *cred; 2116 struct vnode *numfiledvp, *vp; 2117 struct netexport *nep; 2118 uint64_t prevcnt; 2119 time_t prevsec; 2120 u_int cnt, last_back, next_back, next_forw, prevrate[4]; 2121 int averate, error, i, timo; 2122 bool back, use_same_num; 2123 2124 cred = curthread->td_ucred; 2125 if (cred->cr_uid != 0) 2126 printf("nfsvno_pnfsreplenish: not root\n"); 2127 2128 /* 2129 * Do a lookup for ".pnfshide" in the root dir 2130 * of the file system. 2131 */ 2132 cn.cn_nameiop = LOOKUP; 2133 cn.cn_lkflags = LK_SHARED; 2134 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | NOCROSSMOUNT; 2135 cn.cn_cred = cred; 2136 cn.cn_nameptr = pnfshide_name; 2137 cn.cn_namelen = sizeof(pnfshide_name) - 1; 2138 vp = NULL; 2139 numfiledvp = NULL; 2140 error = vn_lock(mp->mnt_rootvnode, LK_SHARED); 2141 if (error == 0) { 2142 error = VOP_LOOKUP(mp->mnt_rootvnode, &vp, &cn); 2143 VOP_UNLOCK(mp->mnt_rootvnode); 2144 } 2145 2146 /* 2147 * Do a lookup for "numfiles" in the ".pnfshide" dir 2148 * of the file system. 2149 */ 2150 if (error == 0) { 2151 cn.cn_nameiop = LOOKUP; 2152 cn.cn_lkflags = LK_SHARED; 2153 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | NOCROSSMOUNT; 2154 cn.cn_cred = cred; 2155 cn.cn_nameptr = numfiles_name; 2156 cn.cn_namelen = sizeof(numfiles_name) - 1; 2157 error = VOP_LOOKUP(vp, &numfiledvp, &cn); 2158 vput(vp); 2159 if (error == 0) 2160 VOP_UNLOCK(numfiledvp); 2161 } 2162 lockmgr(&mp->mnt_explock, LK_SHARED, NULL); 2163 nep = mp->mnt_export; 2164 if (nep != NULL) 2165 (void)vfs_netexport_acquire(nep); 2166 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); 2167 2168 /* 2169 * The states for ne_pnfsnumfile are as follows: 2170 * NULL - Initial state for normal operation. 2171 * PNFSD_START - Transition state when the 2172 * replenisher kernel process is starting up. 2173 * non-NULL valid pointer - Points to the directory vnode for the 2174 * "numfiles" directory. 2175 * PNFSD_STOP - Transition state when the 2176 * replenisher kernel process is shutting down. 2177 * PNFSD_STOPPED - Replenisher kernel process has stopped and 2178 * vrele()'d the "numfiles" directory vnode. 2179 */ 2180 if (error != 0 || nep == NULL) { 2181 if (error == 0) 2182 error = ENOENT; 2183 goto out; 2184 } 2185 MNTEXP_LOCK(nep); 2186 KASSERT(nep->ne_pnfsnumfile == PNFSD_START || 2187 nep->ne_pnfsnumfile == PNFSD_STOP, 2188 ("nfsvno_pnfsreplenish: ne_pnfsnumfile not PNFSD_START/STOP")); 2189 if (nep->ne_pnfsnumfile == PNFSD_START) { 2190 nep->ne_pnfsnumfile = numfiledvp; 2191 wakeup(&nep->ne_pnfsnumfile); 2192 } 2193 MNTEXP_UNLOCK(nep); 2194 2195 VATTR_NULL(&va); 2196 va.va_mode = 0644; 2197 va.va_type = VREG; 2198 timo = hz / 1000; 2199 if (timo == 0) 2200 timo = 1; 2201 cnt = 5; 2202 back = false; 2203 use_same_num = false; 2204 prevcnt = 0; 2205 for (i = 0; i < 4; i++) 2206 prevrate[i] = 1; 2207 prevsec = 0; 2208 averate = 100; 2209 last_back = next_back = next_forw = UINT_MAX; 2210 2211 /* Loop around sleeping and then doing (A) or (B) */ 2212 for (;;) { 2213 /* Sample replenish rate once/sec. */ 2214 getnanouptime(&ts); 2215 if (ts.tv_sec != prevsec) { 2216 /* Calculate a moving ave. of creates/sec. */ 2217 prevsec = ts.tv_sec; 2218 for (i = 0; i < 3; i++) 2219 prevrate[i + 1] = prevrate[i]; 2220 prevrate[0] = atomic_load_int(&nep->ne_pnfsnumcnt) - 2221 prevcnt; 2222 if (prevrate[0] < 1) 2223 prevrate[0] = 1; 2224 prevcnt = atomic_load_int(&nep->ne_pnfsnumcnt); 2225 averate = prevrate[0] * 4 / 10 + prevrate[1] * 3 / 10 + 2226 prevrate[2] * 2 / 10 + prevrate[3] / 10; 2227 if (averate < 1) 2228 averate = 1; 2229 averate *= 2; 2230 } 2231 2232 if (cnt == 0) { 2233 error = tsleep(&mp->mnt_export, PVFS, "pnfsrpl", timo); 2234 if (error == ETIMEDOUT || error == EAGAIN) { 2235 if (!back) 2236 next_back = last_back; 2237 back = true; 2238 timo = hz / averate; 2239 if (timo == 0) 2240 timo = 1; 2241 } else { 2242 if (back) { 2243 next_forw = UINT_MAX; 2244 last_back = next_back; 2245 } 2246 back = false; 2247 cnt = nfsrv_pnfsforwcnt; 2248 timo = hz / 1000; 2249 if (timo == 0) 2250 timo = 1; 2251 nfsrv_pnfsswitchforw++; 2252 } 2253 } 2254 2255 /* Check for exports having gone away. */ 2256 if (mp->mnt_export == NULL) 2257 break; 2258 /* And check for replenisher being stopped. */ 2259 MNTEXP_LOCK(nep); 2260 if (nep->ne_pnfsnumfile != PNFSD_START && 2261 nep->ne_pnfsnumfile != PNFSD_STOP) { 2262 KASSERT(numfiledvp == nep->ne_pnfsnumfile, 2263 ("nfsvno_pnfsreplenish: numfiledvp changed")); 2264 MNTEXP_UNLOCK(nep); 2265 } else { 2266 MNTEXP_UNLOCK(nep); 2267 break; 2268 } 2269 2270 if (back) { 2271 /* This is (A) in this function's comment above. */ 2272 error = vn_start_write(numfiledvp, &temp_mp, V_NOWAIT); 2273 if (error == 0) 2274 error = vn_lock(numfiledvp, LK_EXCLUSIVE | 2275 LK_NOWAIT); 2276 if (error != 0 && temp_mp != NULL) 2277 vn_finished_write(temp_mp); 2278 if (error == EBUSY || error == EWOULDBLOCK) 2279 continue; 2280 if (error != 0) 2281 break; 2282 if (next_back == UINT_MAX) { 2283 if (nep->ne_pnfsnextfile == 0) 2284 next_back = nfsrv_pnfsmaxnumfiles - 1; 2285 else 2286 next_back = nep->ne_pnfsnextfile - 1; 2287 } else if (!use_same_num) { 2288 if (next_back == 0) 2289 next_back = nfsrv_pnfsmaxnumfiles - 1; 2290 else 2291 next_back--; 2292 } 2293 snprintf(name, sizeof(name), "%d", next_back); 2294 } else { 2295 /* This is (B) in this function's comment, above. */ 2296 vn_start_write(numfiledvp, &temp_mp, V_WAIT); 2297 error = vn_lock(numfiledvp, LK_EXCLUSIVE); 2298 if (error != 0 && temp_mp != NULL) 2299 vn_finished_write(temp_mp); 2300 if (error != 0) 2301 break; 2302 if (next_forw == UINT_MAX) 2303 next_forw = nep->ne_pnfsnextfile; 2304 else if (!use_same_num) 2305 next_forw = (next_forw + 1) % 2306 nfsrv_pnfsmaxnumfiles; 2307 snprintf(name, sizeof(name), "%d", next_forw); 2308 } 2309 use_same_num = false; 2310 2311 /* Do a lookup for the file. */ 2312 cn.cn_nameiop = CREATE; 2313 cn.cn_lkflags = LK_EXCLUSIVE; 2314 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | LOCKPARENT | 2315 NOCROSSMOUNT | MAKEENTRY; 2316 cn.cn_cred = cred; 2317 cn.cn_nameptr = name; 2318 cn.cn_namelen = strlen(name); 2319 vref(numfiledvp); 2320 error = VOP_LOOKUP(numfiledvp, &vp, &cn); 2321 if (error == 0) { 2322 VOP_VPUT_PAIR(numfiledvp, &vp, true); 2323 if (temp_mp != NULL) 2324 vn_finished_write(temp_mp); 2325 if (back) { 2326 last_back = next_back = UINT_MAX; 2327 timo = hz / 10; 2328 if (timo == 0) 2329 timo = 1; 2330 } else { 2331 timo = hz / 100; 2332 if (timo == 0) 2333 timo = 1; 2334 } 2335 cnt = 0; 2336 continue; 2337 } else if (error != ENOENT && error != EJUSTRETURN) { 2338 VOP_VPUT_PAIR(numfiledvp, NULL, true); 2339 if (temp_mp != NULL) 2340 vn_finished_write(temp_mp); 2341 if (error == ERELOOKUP) { 2342 use_same_num = true; 2343 continue; 2344 } 2345 printf("nfsvno_pnfsreplenish: lookup failed %d\n", 2346 error); 2347 break; 2348 } 2349 2350 /* Create the numfile and its DS file(s). */ 2351 error = VOP_CREATE(numfiledvp, &vp, &cn, &va); 2352 if (error == 0) { 2353 /* 2354 * Create a data file on a DS for a pNFS 2355 * server. This function just returns if 2356 * not running a pNFS DS or the creation 2357 * fails. 2358 */ 2359 nfsrv_pnfscreate(vp, &va, cred, curthread); 2360 } else 2361 printf("nfsvno_pnfsreplenish: vop_create failed %d\n", 2362 error); 2363 VOP_VPUT_PAIR(numfiledvp, error == 0 ? &vp : NULL, true); 2364 if (temp_mp != NULL) 2365 vn_finished_write(temp_mp); 2366 cnt = cnt > 0 ? cnt - 1 : 0; 2367 } 2368 out: 2369 if (numfiledvp != NULL) 2370 vrele(numfiledvp); 2371 if (nep != NULL) { 2372 MNTEXP_LOCK(nep); 2373 nep->ne_pnfsnumfile = PNFSD_STOPPED; 2374 wakeup(&mp->mnt_explock); 2375 MNTEXP_UNLOCK(nep); 2376 vfs_netexport_release(nep); 2377 } else 2378 wakeup(&mp->mnt_explock); 2379 kproc_exit(0); 2380 } 2381 2382 /* 2383 * Do a lookup of a file in the .numfiles directory. 2384 * If successful, use VOP_SETATTR() to set the uid/gid/mode and 2385 * then VOP_LINK()/VOP_REMOVE() the num file. 2386 * Return ENOENT to indicate that nfsvno_open() should fall back to 2387 * doing VOP_CREATE(), other errors for failure. 2388 * XXX This code probably is not correct for a stacked file 2389 * system, but should never be used for that case. 2390 */ 2391 static int 2392 nfsvno_pnfsusenumfile(struct nameidata *ndp, struct vattr *vap) 2393 { 2394 struct componentname cn; 2395 struct vattr va; 2396 char name[11]; 2397 gid_t gid; 2398 struct ucred *cred, *savcred; 2399 struct vnode *numfiledvp; 2400 struct mount *mp; 2401 struct netexport *nep; 2402 u_int nextf; 2403 int error; 2404 2405 cred = newnfs_getcred(); 2406 /* 2407 * Not sure if this is necessary. If all VOP calls use 2408 * cn_cred, it is not. 2409 */ 2410 savcred = curthread->td_ucred; 2411 curthread->td_ucred = cred; 2412 2413 /* 2414 * If the replenish kernel process is not yet running, 2415 * start it up now. 2416 */ 2417 numfiledvp = NULL; 2418 ndp->ni_vp = NULL; 2419 mp = ndp->ni_dvp->v_mount; 2420 lockmgr(&mp->mnt_explock, LK_SHARED, NULL); 2421 nep = mp->mnt_export; 2422 if (nep == NULL) { 2423 error = ENOENT; 2424 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); 2425 goto out; 2426 } 2427 (void)vfs_netexport_acquire(nep); 2428 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL); 2429 MNTEXP_LOCK(nep); 2430 if (nep->ne_pnfsnumfile == NULL) { 2431 /* Mark kernel process startup in-progress. */ 2432 nep->ne_pnfsnumfile = PNFSD_START; 2433 MNTEXP_UNLOCK(nep); 2434 2435 /* Create the replenish kernel process. */ 2436 error = kproc_create(nfsvno_pnfsreplenish, mp, NULL, RFHIGHPID, 2437 0, "pnfsreplenish"); 2438 if (error != 0) { 2439 printf("nfsvno_pnfsusenumfile: replenish won't start" 2440 " %d\n", error); 2441 error = ENOENT; 2442 goto out; 2443 } 2444 2445 /* And wait for it to set up ne_pnfsnumfile. */ 2446 MNTEXP_LOCK(nep); 2447 (void)msleep(&nep->ne_pnfsnumfile, MNTEXP_MTX(nep), PVFS, 2448 "pnfsnumf", hz); 2449 } 2450 2451 if (nep->ne_pnfsnumfile == NULL || 2452 nep->ne_pnfsnumfile == PNFSD_START || 2453 nep->ne_pnfsnumfile == PNFSD_STOP) { 2454 MNTEXP_UNLOCK(nep); 2455 error = ENOENT; 2456 goto out; 2457 } else { 2458 numfiledvp = nep->ne_pnfsnumfile; 2459 MNTEXP_UNLOCK(nep); 2460 /* 2461 * Check to ensure the new file is not in ".pnfshide/numfiles". 2462 */ 2463 if (numfiledvp == ndp->ni_dvp) { 2464 error = ENOENT; 2465 numfiledvp = NULL; 2466 goto out; 2467 } 2468 } 2469 2470 error = vn_lock(numfiledvp, LK_EXCLUSIVE); 2471 if (error != 0) { 2472 error = ENOENT; 2473 numfiledvp = NULL; 2474 goto out; 2475 } 2476 vref(numfiledvp); 2477 2478 /* Get the next filenum. */ 2479 nextf = nep->ne_pnfsnextfile; 2480 snprintf(name, sizeof(name), "%d", nextf); 2481 2482 /* Now, look up the numbered file. */ 2483 cn.cn_nameiop = DELETE; 2484 cn.cn_lkflags = LK_EXCLUSIVE; 2485 cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF | LOCKPARENT | 2486 NOCROSSMOUNT; 2487 cn.cn_cred = cred; 2488 cn.cn_nameptr = name; 2489 cn.cn_namelen = strlen(name); 2490 error = VOP_LOOKUP(numfiledvp, &ndp->ni_vp, &cn); 2491 if (error != 0) { 2492 nfsrv_pnfsnumfilemiss++; 2493 VOP_UNLOCK(numfiledvp); 2494 ndp->ni_vp = NULL; 2495 if (error == ENOENT || error == EJUSTRETURN) 2496 wakeup(&mp->mnt_export); 2497 else 2498 VOP_VPUT_PAIR(ndp->ni_dvp, NULL, true); 2499 goto out; 2500 } 2501 2502 /* 2503 * Set the new file's attributes to what VOP_CREATE() would 2504 * have set them to. 2505 */ 2506 gid = GID_NOGROUP; 2507 if (vap->va_gid == VNOVAL && 2508 VOP_GETATTR(ndp->ni_dvp, &va, cred) == 0) 2509 gid = va.va_gid; 2510 VATTR_NULL(&va); 2511 va.va_gid = gid; 2512 va.va_uid = ndp->ni_cnd.cn_cred->cr_uid; 2513 va.va_mode = vap->va_mode; 2514 error = VOP_SETATTR(ndp->ni_vp, &va, cred); 2515 if (error != 0) { 2516 VOP_UNLOCK(numfiledvp); 2517 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, true); 2518 ndp->ni_vp = NULL; 2519 printf("nfsvno_pnfsusenumfile: setattr failed %d\n", 2520 error); 2521 if (error == ENOENT) 2522 error = ENXIO; 2523 goto out; 2524 } 2525 2526 /* 2527 * Link the numbered file to the name VOP_CREATE() would have 2528 * created in the correct directory and then VOP_REMOVE() the 2529 * numbered file. 2530 * Use VOP_LINK()/VOP_REMOVE() so that the numbered file 2531 * directory can remain locked. 2532 */ 2533 error = VOP_LINK(ndp->ni_dvp, ndp->ni_vp, &ndp->ni_cnd); 2534 /* Remove the file in .numfiles. */ 2535 if (error == 0) { 2536 nep->ne_pnfsnextfile = (nextf + 1) % 2537 nfsrv_pnfsmaxnumfiles; 2538 error = VOP_REMOVE(numfiledvp, ndp->ni_vp, &cn); 2539 if (error != 0) { 2540 /* Shut down the numfiles stuff. */ 2541 MNTEXP_LOCK(nep); 2542 nep->ne_pnfsnumfile = PNFSD_STOP; 2543 MNTEXP_UNLOCK(nep); 2544 printf("nfsvno_pnfsusenumfile: remove failed " 2545 "%d %s\n", error, name); 2546 } 2547 } 2548 VOP_UNLOCK(numfiledvp); 2549 if (error != 0) { 2550 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, true); 2551 ndp->ni_vp = NULL; 2552 } else { 2553 atomic_add_int(&nep->ne_pnfsnumcnt, 1); 2554 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, false); 2555 } 2556 if (error == ENOENT) 2557 error = ENXIO; 2558 2559 out: 2560 if (numfiledvp != NULL) 2561 vrele(numfiledvp); 2562 if (nep != NULL) 2563 vfs_netexport_release(nep); 2564 curthread->td_ucred = savcred; /* Reset the thread's cred. */ 2565 NFSFREECRED(cred); 2566 return (error); 2567 } 2568 2569 /* 2570 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 2571 * must handle nfsrv_opencheck() calls after any other access checks. 2572 */ 2573 void 2574 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 2575 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 2576 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 2577 NFSACL_T *aclp, NFSACL_T *daclp, nfsattrbit_t *attrbitp, struct ucred *cred, 2578 bool done_namei, struct nfsexstuff *exp, struct vnode **vpp) 2579 { 2580 struct vattr va; 2581 struct vnode *vp = NULL; 2582 u_quad_t tempsize; 2583 struct nfsexstuff nes; 2584 struct thread *p = curthread; 2585 uint32_t oldrepstat; 2586 u_long savflags; 2587 int error; 2588 2589 if (ndp->ni_vp == NULL) { 2590 /* 2591 * If nfsrv_opencheck() sets nd_repstat, done_namei needs to be 2592 * set true, since cleanup after nfsvno_namei() is needed. 2593 */ 2594 oldrepstat = nd->nd_repstat; 2595 nd->nd_repstat = nfsrv_opencheck(clientid, 2596 stateidp, stp, NULL, nd, p, nd->nd_repstat); 2597 if (nd->nd_repstat != 0 && oldrepstat == 0) 2598 done_namei = true; 2599 } 2600 if (!nd->nd_repstat) { 2601 if (ndp->ni_vp == NULL) { 2602 struct sockaddr_in *sin; 2603 struct sockaddr_in6 *sin6; 2604 bool try_pnfs; 2605 2606 sin = (struct sockaddr_in *)nd->nd_nam; 2607 sin6 = (struct sockaddr_in6 *)nd->nd_nam; 2608 error = ENOENT; 2609 try_pnfs = !TAILQ_EMPTY(&nfsrv_devidhead); 2610 2611 if (try_pnfs && !(sin->sin_family == AF_INET && 2612 IN_LOOPBACK(ntohl(sin->sin_addr.s_addr))) && 2613 !(sin6->sin6_family == AF_INET6 && 2614 IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) { 2615 error = nfsvno_pnfsusenumfile(ndp, 2616 &nvap->na_vattr); 2617 if (error != ENOENT) 2618 nd->nd_repstat = error; 2619 } 2620 if (error == ENOENT) { 2621 /* 2622 * Most file systems ignore va_flags for 2623 * VOP_CREATE(), however setting va_flags 2624 * for VOP_CREATE() causes problems for ZFS. 2625 * So disable them and let nfsrv_fixattr() 2626 * do them, as required. 2627 */ 2628 savflags = nvap->na_flags; 2629 nvap->na_flags = VNOVAL; 2630 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 2631 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 2632 if (try_pnfs && nd->nd_repstat == 0) { 2633 /* 2634 * Create a data file on a DS for a pNFS 2635 * server. This function just returns if 2636 * not running a pNFS DS or the creation 2637 * fails. 2638 */ 2639 nfsrv_pnfscreate(ndp->ni_vp, 2640 &nvap->na_vattr, cred, p); 2641 } 2642 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ? 2643 &ndp->ni_vp : NULL, false); 2644 nvap->na_flags = savflags; 2645 } 2646 nfsvno_relpathbuf(ndp); 2647 if (!nd->nd_repstat) { 2648 if (*exclusive_flagp != NFSV4_EXCLUSIVE_NONE) { 2649 VATTR_NULL(&va); 2650 va.va_atime.tv_sec = cverf[0]; 2651 va.va_atime.tv_nsec = cverf[1]; 2652 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 2653 &va, cred); 2654 if (nd->nd_repstat != 0) { 2655 vput(ndp->ni_vp); 2656 ndp->ni_vp = NULL; 2657 nd->nd_repstat = NFSERR_NOTSUPP; 2658 } else { 2659 /* 2660 * Few clients set these 2661 * attributes in Open/Create 2662 * Exclusive_41. If this 2663 * changes, this should include 2664 * setting atime, instead of 2665 * the above. 2666 */ 2667 if (*exclusive_flagp == 2668 NFSV4_EXCLUSIVE_41 && 2669 (NFSISSET_ATTRBIT(attrbitp, 2670 NFSATTRBIT_OWNER) || 2671 NFSISSET_ATTRBIT(attrbitp, 2672 NFSATTRBIT_OWNERGROUP) || 2673 NFSISSET_ATTRBIT(attrbitp, 2674 NFSATTRBIT_TIMEMODIFYSET)|| 2675 NFSISSET_ATTRBIT(attrbitp, 2676 NFSATTRBIT_ARCHIVE) || 2677 NFSISSET_ATTRBIT(attrbitp, 2678 NFSATTRBIT_HIDDEN) || 2679 NFSISSET_ATTRBIT(attrbitp, 2680 NFSATTRBIT_SYSTEM) || 2681 aclp != NULL || 2682 daclp != NULL)) 2683 nfsrv_fixattr(nd, 2684 ndp->ni_vp, nvap, 2685 aclp, daclp, p, 2686 attrbitp, true); 2687 NFSSETBIT_ATTRBIT(attrbitp, 2688 NFSATTRBIT_TIMEACCESS); 2689 } 2690 *exclusive_flagp = NFSV4_EXCLUSIVE_NONE; 2691 } else { 2692 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 2693 aclp, daclp, p, attrbitp, false); 2694 } 2695 } 2696 vp = ndp->ni_vp; 2697 } else { 2698 nfsvno_relpathbuf(ndp); 2699 vp = ndp->ni_vp; 2700 if (create == NFSV4OPEN_CREATE) { 2701 if (ndp->ni_dvp == vp) 2702 vrele(ndp->ni_dvp); 2703 else 2704 vput(ndp->ni_dvp); 2705 } 2706 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 2707 if (ndp->ni_cnd.cn_flags & RDONLY) 2708 NFSVNO_SETEXRDONLY(&nes); 2709 else 2710 NFSVNO_EXINIT(&nes); 2711 nd->nd_repstat = nfsvno_accchk(vp, 2712 VWRITE, cred, &nes, p, 2713 NFSACCCHK_NOOVERRIDE, 2714 NFSACCCHK_VPISLOCKED, NULL); 2715 nd->nd_repstat = nfsrv_opencheck(clientid, 2716 stateidp, stp, vp, nd, p, nd->nd_repstat); 2717 if (!nd->nd_repstat) { 2718 tempsize = nvap->na_size; 2719 NFSVNO_ATTRINIT(nvap); 2720 nvap->na_size = tempsize; 2721 nd->nd_repstat = nfsvno_setattr(vp, 2722 nvap, cred, p, exp); 2723 } 2724 } else if (vp->v_type == VREG) { 2725 nd->nd_repstat = nfsrv_opencheck(clientid, 2726 stateidp, stp, vp, nd, p, nd->nd_repstat); 2727 } 2728 } 2729 } else if (done_namei) { 2730 KASSERT(create == NFSV4OPEN_CREATE, 2731 ("nfsvno_open: not create")); 2732 /* 2733 * done_namei is set when nfsvno_namei() has completed 2734 * successfully, but a subsequent error was set in 2735 * nd_repstat. As such, cleanup of the nfsvno_namei() 2736 * results is required. 2737 */ 2738 nfsvno_relpathbuf(ndp); 2739 if (ndp->ni_dvp == ndp->ni_vp) 2740 vrele(ndp->ni_dvp); 2741 else 2742 vput(ndp->ni_dvp); 2743 if (ndp->ni_vp) 2744 vput(ndp->ni_vp); 2745 } 2746 *vpp = vp; 2747 2748 NFSEXITCODE2(0, nd); 2749 } 2750 2751 /* 2752 * Updates the file rev and sets the mtime and ctime 2753 * to the current clock time, returning the va_filerev and va_Xtime 2754 * values. 2755 * Return ESTALE to indicate the vnode is VIRF_DOOMED. 2756 */ 2757 int 2758 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 2759 struct nfsrv_descript *nd, struct thread *p) 2760 { 2761 struct vattr va; 2762 2763 VATTR_NULL(&va); 2764 vfs_timestamp(&va.va_mtime); 2765 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 2766 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 2767 if (VN_IS_DOOMED(vp)) 2768 return (ESTALE); 2769 } 2770 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 2771 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 2772 return (0); 2773 } 2774 2775 /* 2776 * Glue routine to nfsv4_fillattr(). 2777 */ 2778 int 2779 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 2780 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 2781 struct ucred *cred, struct thread *p, int isdgram, int reterr, 2782 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, 2783 bool xattrsupp, bool has_hiddensystem, bool has_namedattr, 2784 uint32_t clone_blksize, bool has_caseinsensitive) 2785 { 2786 struct statfs *sf; 2787 int error; 2788 2789 sf = NULL; 2790 if (nfsrv_devidcnt > 0 && 2791 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 2792 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 2793 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 2794 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 2795 error = nfsrv_pnfsstatfs(sf, mp); 2796 if (error != 0) { 2797 free(sf, M_TEMP); 2798 sf = NULL; 2799 } 2800 } 2801 2802 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 2803 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 2804 mounted_on_fileno, sf, xattrsupp, has_hiddensystem, has_namedattr, 2805 clone_blksize, NULL, has_caseinsensitive); 2806 free(sf, M_TEMP); 2807 NFSEXITCODE2(0, nd); 2808 return (error); 2809 } 2810 2811 /* 2812 * Convert a dirent d_type to a vnode type. 2813 */ 2814 static void nfs_dtypetovtype(struct nfsvattr *nvap, struct vnode *vp, 2815 uint8_t dtype) 2816 { 2817 2818 if ((vn_irflag_read(vp) & VIRF_NAMEDDIR) != 0) { 2819 nvap->na_type = VREG; 2820 nvap->na_bsdflags |= SFBSD_NAMEDATTR; 2821 } else if (dtype <= DT_WHT) { 2822 nvap->na_type = dtype_to_vnode[dtype]; 2823 } else { 2824 nvap->na_type = VNON; 2825 } 2826 } 2827 2828 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 2829 /* 2830 * nfs readdir service 2831 * - mallocs what it thinks is enough to read 2832 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 2833 * - calls VOP_READDIR() 2834 * - loops around building the reply 2835 * if the output generated exceeds count break out of loop 2836 * The NFSM_CLGET macro is used here so that the reply will be packed 2837 * tightly in mbuf clusters. 2838 * - it trims out records with d_fileno == 0 2839 * this doesn't matter for Unix clients, but they might confuse clients 2840 * for other os'. 2841 * - it trims out records with d_type == DT_WHT 2842 * these cannot be seen through NFS (unless we extend the protocol) 2843 * The alternate call nfsrvd_readdirplus() does lookups as well. 2844 * PS: The NFS protocol spec. does not clarify what the "count" byte 2845 * argument is a count of.. just name strings and file id's or the 2846 * entire reply rpc or ... 2847 * I tried just file name and id sizes and it confused the Sun client, 2848 * so I am using the full rpc size now. The "paranoia.." comment refers 2849 * to including the status longwords that are not a part of the dir. 2850 * "entry" structures, but are in the rpc. 2851 */ 2852 int 2853 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 2854 struct vnode *vp, struct nfsexstuff *exp) 2855 { 2856 struct dirent *dp; 2857 u_int32_t *tl; 2858 int dirlen; 2859 char *cpos, *cend, *rbuf; 2860 struct nfsvattr at; 2861 int nlen, error = 0, getret = 1; 2862 int siz, cnt, fullsiz, eofflag, ncookies; 2863 u_int64_t off, toff, verf __unused; 2864 uint64_t *cookies = NULL, *cookiep; 2865 struct uio io; 2866 struct iovec iv; 2867 int is_ufs; 2868 struct thread *p = curthread; 2869 2870 if (nd->nd_repstat) { 2871 nfsrv_postopattr(nd, getret, &at); 2872 goto out; 2873 } 2874 if (nd->nd_flag & ND_NFSV2) { 2875 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2876 off = fxdr_unsigned(u_quad_t, *tl++); 2877 } else { 2878 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 2879 off = fxdr_hyper(tl); 2880 tl += 2; 2881 verf = fxdr_hyper(tl); 2882 tl += 2; 2883 } 2884 toff = off; 2885 cnt = fxdr_unsigned(int, *tl); 2886 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2887 cnt = NFS_SRVMAXDATA(nd); 2888 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2889 fullsiz = siz; 2890 if (nd->nd_flag & ND_NFSV3) { 2891 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 2892 NULL); 2893 #if 0 2894 /* 2895 * va_filerev is not sufficient as a cookie verifier, 2896 * since it is not supposed to change when entries are 2897 * removed/added unless that offset cookies returned to 2898 * the client are no longer valid. 2899 */ 2900 if (!nd->nd_repstat && toff && verf != at.na_filerev) 2901 nd->nd_repstat = NFSERR_BAD_COOKIE; 2902 #endif 2903 } 2904 if (!nd->nd_repstat && vp->v_type != VDIR) 2905 nd->nd_repstat = NFSERR_NOTDIR; 2906 if (nd->nd_repstat == 0 && cnt == 0) { 2907 if (nd->nd_flag & ND_NFSV2) 2908 /* NFSv2 does not have NFSERR_TOOSMALL */ 2909 nd->nd_repstat = EPERM; 2910 else 2911 nd->nd_repstat = NFSERR_TOOSMALL; 2912 } 2913 if (!nd->nd_repstat) 2914 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2915 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2916 NFSACCCHK_VPISLOCKED, NULL); 2917 if (nd->nd_repstat) { 2918 vput(vp); 2919 if (nd->nd_flag & ND_NFSV3) 2920 nfsrv_postopattr(nd, getret, &at); 2921 goto out; 2922 } 2923 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2924 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2925 again: 2926 eofflag = 0; 2927 if (cookies) { 2928 free(cookies, M_TEMP); 2929 cookies = NULL; 2930 } 2931 2932 iv.iov_base = rbuf; 2933 iv.iov_len = siz; 2934 io.uio_iov = &iv; 2935 io.uio_iovcnt = 1; 2936 io.uio_offset = (off_t)off; 2937 io.uio_resid = siz; 2938 io.uio_segflg = UIO_SYSSPACE; 2939 io.uio_rw = UIO_READ; 2940 io.uio_td = NULL; 2941 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2942 &cookies); 2943 off = (u_int64_t)io.uio_offset; 2944 if (io.uio_resid) 2945 siz -= io.uio_resid; 2946 2947 if (!cookies && !nd->nd_repstat) 2948 nd->nd_repstat = NFSERR_PERM; 2949 if (nd->nd_flag & ND_NFSV3) { 2950 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2951 if (!nd->nd_repstat) 2952 nd->nd_repstat = getret; 2953 } 2954 2955 /* 2956 * Handles the failed cases. nd->nd_repstat == 0 past here. 2957 */ 2958 if (nd->nd_repstat) { 2959 vput(vp); 2960 free(rbuf, M_TEMP); 2961 if (cookies) 2962 free(cookies, M_TEMP); 2963 if (nd->nd_flag & ND_NFSV3) 2964 nfsrv_postopattr(nd, getret, &at); 2965 goto out; 2966 } 2967 /* 2968 * If nothing read, return eof 2969 * rpc reply 2970 */ 2971 if (siz == 0) { 2972 vput(vp); 2973 if (nd->nd_flag & ND_NFSV2) { 2974 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2975 } else { 2976 nfsrv_postopattr(nd, getret, &at); 2977 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2978 txdr_hyper(at.na_filerev, tl); 2979 tl += 2; 2980 } 2981 *tl++ = newnfs_false; 2982 *tl = newnfs_true; 2983 free(rbuf, M_TEMP); 2984 free(cookies, M_TEMP); 2985 goto out; 2986 } 2987 2988 /* 2989 * Check for degenerate cases of nothing useful read. 2990 * If so go try again 2991 */ 2992 cpos = rbuf; 2993 cend = rbuf + siz; 2994 dp = (struct dirent *)cpos; 2995 cookiep = cookies; 2996 2997 /* 2998 * For some reason FreeBSD's ufs_readdir() chooses to back the 2999 * directory offset up to a block boundary, so it is necessary to 3000 * skip over the records that precede the requested offset. This 3001 * requires the assumption that file offset cookies monotonically 3002 * increase. 3003 */ 3004 while (cpos < cend && ncookies > 0 && 3005 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 3006 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 3007 cpos += dp->d_reclen; 3008 dp = (struct dirent *)cpos; 3009 cookiep++; 3010 ncookies--; 3011 } 3012 if (cpos >= cend || ncookies == 0) { 3013 siz = fullsiz; 3014 toff = off; 3015 goto again; 3016 } 3017 vput(vp); 3018 3019 /* 3020 * If cnt > MCLBYTES and the reply will not be saved, use 3021 * ext_pgs mbufs for TLS. 3022 * For NFSv4.0, we do not know for sure if the reply will 3023 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 3024 */ 3025 if (cnt > MCLBYTES && siz > MCLBYTES && 3026 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 3027 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 3028 nd->nd_flag |= ND_EXTPG; 3029 3030 /* 3031 * dirlen is the size of the reply, including all XDR and must 3032 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 3033 * if the XDR should be included in "count", but to be safe, we do. 3034 * (Include the two booleans at the end of the reply in dirlen now.) 3035 */ 3036 if (nd->nd_flag & ND_NFSV3) { 3037 nfsrv_postopattr(nd, getret, &at); 3038 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3039 txdr_hyper(at.na_filerev, tl); 3040 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 3041 } else { 3042 dirlen = 2 * NFSX_UNSIGNED; 3043 } 3044 3045 /* Loop through the records and build reply */ 3046 while (cpos < cend && ncookies > 0) { 3047 nlen = dp->d_namlen; 3048 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 3049 nlen <= NFS_MAXNAMLEN) { 3050 if (nd->nd_flag & ND_NFSV3) 3051 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 3052 else 3053 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 3054 if (dirlen > cnt) { 3055 eofflag = 0; 3056 break; 3057 } 3058 3059 /* 3060 * Build the directory record xdr from 3061 * the dirent entry. 3062 */ 3063 if (nd->nd_flag & ND_NFSV3) { 3064 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 3065 *tl++ = newnfs_true; 3066 txdr_hyper(dp->d_fileno, tl); 3067 } else { 3068 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3069 *tl++ = newnfs_true; 3070 *tl = txdr_unsigned(dp->d_fileno); 3071 } 3072 (void) nfsm_strtom(nd, dp->d_name, nlen); 3073 if (nd->nd_flag & ND_NFSV3) { 3074 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3075 txdr_hyper(*cookiep, tl); 3076 } else { 3077 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 3078 *tl = txdr_unsigned(*cookiep); 3079 } 3080 } 3081 cpos += dp->d_reclen; 3082 dp = (struct dirent *)cpos; 3083 cookiep++; 3084 ncookies--; 3085 } 3086 if (cpos < cend) 3087 eofflag = 0; 3088 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3089 *tl++ = newnfs_false; 3090 if (eofflag) 3091 *tl = newnfs_true; 3092 else 3093 *tl = newnfs_false; 3094 free(rbuf, M_TEMP); 3095 free(cookies, M_TEMP); 3096 3097 out: 3098 NFSEXITCODE2(0, nd); 3099 return (0); 3100 nfsmout: 3101 vput(vp); 3102 NFSEXITCODE2(error, nd); 3103 return (error); 3104 } 3105 3106 /* 3107 * Readdirplus for V3 and Readdir for V4. 3108 */ 3109 int 3110 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 3111 struct vnode *vp, struct nfsexstuff *exp) 3112 { 3113 struct dirent *dp; 3114 uint32_t clone_blksize, *tl; 3115 int dirlen; 3116 char *cpos, *cend, *rbuf; 3117 struct vnode *nvp; 3118 fhandle_t nfh; 3119 struct nfsvattr nva, at, *nvap = &nva; 3120 struct mbuf *mb0, *mb1; 3121 struct nfsreferral *refp; 3122 int nlen, r, error = 0, getret = 1, ret, usevget = 1; 3123 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 3124 caddr_t bpos0, bpos1; 3125 u_int64_t off, toff, verf __unused; 3126 uint64_t *cookies = NULL, *cookiep; 3127 nfsattrbit_t attrbits, rderrbits, savbits, refbits; 3128 struct uio io; 3129 struct iovec iv; 3130 struct componentname cn; 3131 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 3132 struct mount *mp, *new_mp; 3133 uint64_t mounted_on_fileno; 3134 struct thread *p = curthread; 3135 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; 3136 size_t atsiz; 3137 long pathval; 3138 bool has_caseinsensitive, has_hiddensystem, has_namedattr, xattrsupp; 3139 3140 NFSZERO_ATTRBIT(&savbits); /* Shut up gcc. */ 3141 if (nd->nd_repstat) { 3142 nfsrv_postopattr(nd, getret, &at); 3143 goto out; 3144 } 3145 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 3146 off = fxdr_hyper(tl); 3147 toff = off; 3148 tl += 2; 3149 verf = fxdr_hyper(tl); 3150 tl += 2; 3151 siz = fxdr_unsigned(int, *tl++); 3152 cnt = fxdr_unsigned(int, *tl); 3153 3154 /* 3155 * Use the server's maximum data transfer size as the upper bound 3156 * on reply datalen. 3157 */ 3158 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 3159 cnt = NFS_SRVMAXDATA(nd); 3160 3161 /* 3162 * siz is a "hint" of how much directory information (name, fileid, 3163 * cookie) should be in the reply. At least one client "hints" 0, 3164 * so I set it to cnt for that case. I also round it up to the 3165 * next multiple of DIRBLKSIZ. 3166 * Since the size of a Readdirplus directory entry reply will always 3167 * be greater than a directory entry returned by VOP_READDIR(), it 3168 * does not make sense to read more than NFS_SRVMAXDATA() via 3169 * VOP_READDIR(). 3170 */ 3171 if (siz <= 0) 3172 siz = cnt; 3173 else if (siz > NFS_SRVMAXDATA(nd)) 3174 siz = NFS_SRVMAXDATA(nd); 3175 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 3176 3177 if (nd->nd_flag & ND_NFSV4) { 3178 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 3179 if (error) 3180 goto nfsmout; 3181 NFSSET_ATTRBIT(&savbits, &attrbits); 3182 NFSSET_ATTRBIT(&refbits, &attrbits); 3183 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); 3184 NFSZERO_ATTRBIT(&rderrbits); 3185 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 3186 /* 3187 * If these 4 bits are the only attributes requested by the 3188 * client, they can be satisfied without acquiring the vnode 3189 * for the file object unless it is a directory. 3190 * This will be indicated by savbits being all 0s. 3191 */ 3192 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_TYPE); 3193 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_FILEID); 3194 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_MOUNTEDONFILEID); 3195 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_RDATTRERROR); 3196 } else { 3197 NFSZERO_ATTRBIT(&attrbits); 3198 } 3199 fullsiz = siz; 3200 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 3201 #if 0 3202 if (!nd->nd_repstat) { 3203 if (off && verf != at.na_filerev) { 3204 /* 3205 * va_filerev is not sufficient as a cookie verifier, 3206 * since it is not supposed to change when entries are 3207 * removed/added unless that offset cookies returned to 3208 * the client are no longer valid. 3209 */ 3210 if (nd->nd_flag & ND_NFSV4) { 3211 nd->nd_repstat = NFSERR_NOTSAME; 3212 } else { 3213 nd->nd_repstat = NFSERR_BAD_COOKIE; 3214 } 3215 } 3216 } 3217 #endif 3218 if (!nd->nd_repstat && vp->v_type != VDIR) 3219 nd->nd_repstat = NFSERR_NOTDIR; 3220 if (!nd->nd_repstat && cnt == 0) 3221 nd->nd_repstat = NFSERR_TOOSMALL; 3222 if (!nd->nd_repstat) 3223 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 3224 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 3225 NFSACCCHK_VPISLOCKED, NULL); 3226 if (nd->nd_repstat) { 3227 vput(vp); 3228 if (nd->nd_flag & ND_NFSV3) 3229 nfsrv_postopattr(nd, getret, &at); 3230 goto out; 3231 } 3232 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 3233 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 3234 3235 rbuf = malloc(siz, M_TEMP, M_WAITOK); 3236 again: 3237 eofflag = 0; 3238 if (cookies) { 3239 free(cookies, M_TEMP); 3240 cookies = NULL; 3241 } 3242 3243 iv.iov_base = rbuf; 3244 iv.iov_len = siz; 3245 io.uio_iov = &iv; 3246 io.uio_iovcnt = 1; 3247 io.uio_offset = (off_t)off; 3248 io.uio_resid = siz; 3249 io.uio_segflg = UIO_SYSSPACE; 3250 io.uio_rw = UIO_READ; 3251 io.uio_td = NULL; 3252 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 3253 &cookies); 3254 off = (u_int64_t)io.uio_offset; 3255 if (io.uio_resid) 3256 siz -= io.uio_resid; 3257 3258 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 3259 3260 if (!cookies && !nd->nd_repstat) 3261 nd->nd_repstat = NFSERR_PERM; 3262 if (!nd->nd_repstat) 3263 nd->nd_repstat = getret; 3264 if (nd->nd_repstat) { 3265 vput(vp); 3266 if (cookies) 3267 free(cookies, M_TEMP); 3268 free(rbuf, M_TEMP); 3269 if (nd->nd_flag & ND_NFSV3) 3270 nfsrv_postopattr(nd, getret, &at); 3271 goto out; 3272 } 3273 /* 3274 * If nothing read, return eof 3275 * rpc reply 3276 */ 3277 if (siz == 0) { 3278 ateof: 3279 vput(vp); 3280 if (nd->nd_flag & ND_NFSV3) 3281 nfsrv_postopattr(nd, getret, &at); 3282 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 3283 txdr_hyper(at.na_filerev, tl); 3284 tl += 2; 3285 *tl++ = newnfs_false; 3286 *tl = newnfs_true; 3287 free(cookies, M_TEMP); 3288 free(rbuf, M_TEMP); 3289 goto out; 3290 } 3291 3292 /* 3293 * Check for degenerate cases of nothing useful read. 3294 * If so go try again 3295 */ 3296 cpos = rbuf; 3297 cend = rbuf + siz; 3298 dp = (struct dirent *)cpos; 3299 cookiep = cookies; 3300 3301 /* 3302 * For some reason FreeBSD's ufs_readdir() chooses to back the 3303 * directory offset up to a block boundary, so it is necessary to 3304 * skip over the records that precede the requested offset. This 3305 * requires the assumption that file offset cookies monotonically 3306 * increase. 3307 */ 3308 while (cpos < cend && ncookies > 0 && 3309 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 3310 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 3311 ((nd->nd_flag & ND_NFSV4) && 3312 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 3313 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 3314 cpos += dp->d_reclen; 3315 dp = (struct dirent *)cpos; 3316 cookiep++; 3317 ncookies--; 3318 } 3319 if (cpos >= cend || ncookies == 0) { 3320 if (eofflag != 0) 3321 goto ateof; 3322 siz = fullsiz; 3323 toff = off; 3324 goto again; 3325 } 3326 3327 /* 3328 * Busy the file system so that the mount point won't go away 3329 * and, as such, VFS_VGET() can be used safely. 3330 */ 3331 mp = vp->v_mount; 3332 vfs_ref(mp); 3333 NFSVOPUNLOCK(vp); 3334 nd->nd_repstat = vfs_busy(mp, 0); 3335 vfs_rel(mp); 3336 if (nd->nd_repstat != 0) { 3337 vrele(vp); 3338 free(cookies, M_TEMP); 3339 free(rbuf, M_TEMP); 3340 if (nd->nd_flag & ND_NFSV3) 3341 nfsrv_postopattr(nd, getret, &at); 3342 goto out; 3343 } 3344 3345 /* 3346 * Check to see if entries in this directory can be safely acquired 3347 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 3348 * ZFS snapshot directories need VOP_LOOKUP(), so that any 3349 * automount of the snapshot directory that is required will 3350 * be done. 3351 * This needs to be done here for NFSv4, since NFSv4 never does 3352 * a VFS_VGET() for "." or "..". 3353 */ 3354 if (is_zfs == 1) { 3355 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 3356 if (r == EOPNOTSUPP) { 3357 usevget = 0; 3358 cn.cn_nameiop = LOOKUP; 3359 cn.cn_lkflags = LK_SHARED | LK_RETRY; 3360 cn.cn_cred = nd->nd_cred; 3361 } else if (r == 0) 3362 vput(nvp); 3363 } 3364 3365 /* 3366 * If the reply is likely to exceed MCLBYTES and the reply will 3367 * not be saved, use ext_pgs mbufs for TLS. 3368 * It is difficult to predict how large each entry will be and 3369 * how many entries have been read, so just assume the directory 3370 * entries grow by a factor of 4 when attributes are included. 3371 * For NFSv4.0, we do not know for sure if the reply will 3372 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 3373 */ 3374 if (cnt > MCLBYTES && siz > MCLBYTES / 4 && 3375 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 3376 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 3377 nd->nd_flag |= ND_EXTPG; 3378 3379 /* 3380 * Save this position, in case there is an error before one entry 3381 * is created. 3382 */ 3383 mb0 = nd->nd_mb; 3384 bpos0 = nd->nd_bpos; 3385 bextpg0 = nd->nd_bextpg; 3386 bextpgsiz0 = nd->nd_bextpgsiz; 3387 3388 /* 3389 * Fill in the first part of the reply. 3390 * dirlen is the reply length in bytes and cannot exceed cnt. 3391 * (Include the two booleans at the end of the reply in dirlen now, 3392 * so we recognize when we have exceeded cnt.) 3393 */ 3394 if (nd->nd_flag & ND_NFSV3) { 3395 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 3396 nfsrv_postopattr(nd, getret, &at); 3397 } else { 3398 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 3399 } 3400 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 3401 txdr_hyper(at.na_filerev, tl); 3402 3403 /* 3404 * Save this position, in case there is an empty reply needed. 3405 */ 3406 mb1 = nd->nd_mb; 3407 bpos1 = nd->nd_bpos; 3408 bextpg1 = nd->nd_bextpg; 3409 bextpgsiz1 = nd->nd_bextpgsiz; 3410 3411 /* Loop through the records and build reply */ 3412 entrycnt = 0; 3413 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 3414 nlen = dp->d_namlen; 3415 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 3416 nlen <= NFS_MAXNAMLEN && 3417 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 3418 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 3419 || (nlen == 1 && dp->d_name[0] != '.'))) { 3420 /* 3421 * Save the current position in the reply, in case 3422 * this entry exceeds cnt. 3423 */ 3424 mb1 = nd->nd_mb; 3425 bpos1 = nd->nd_bpos; 3426 bextpg1 = nd->nd_bextpg; 3427 bextpgsiz1 = nd->nd_bextpgsiz; 3428 3429 /* 3430 * For readdir_and_lookup get the vnode using 3431 * the file number. 3432 */ 3433 nvp = NULL; 3434 refp = NULL; 3435 r = 0; 3436 at_root = 0; 3437 needs_unbusy = 0; 3438 new_mp = mp; 3439 mounted_on_fileno = (uint64_t)dp->d_fileno; 3440 if ((nd->nd_flag & ND_NFSV3) || 3441 NFSNONZERO_ATTRBIT(&savbits) || 3442 dp->d_type == DT_UNKNOWN || 3443 (dp->d_type == DT_DIR && 3444 nfsrv_enable_crossmntpt != 0)) { 3445 if (nd->nd_flag & ND_NFSV4) 3446 refp = nfsv4root_getreferral(NULL, 3447 vp, dp->d_fileno); 3448 if (refp == NULL) { 3449 if (usevget) 3450 r = VFS_VGET(mp, dp->d_fileno, 3451 LK_SHARED, &nvp); 3452 else 3453 r = EOPNOTSUPP; 3454 if (r == 0 && (vn_irflag_read(vp) & 3455 VIRF_NAMEDDIR) != 0) 3456 vn_irflag_set_cond(nvp, 3457 VIRF_NAMEDATTR); 3458 if (r == EOPNOTSUPP) { 3459 if (usevget) { 3460 usevget = 0; 3461 cn.cn_nameiop = LOOKUP; 3462 cn.cn_lkflags = 3463 LK_SHARED | 3464 LK_RETRY; 3465 cn.cn_cred = 3466 nd->nd_cred; 3467 } 3468 cn.cn_nameptr = dp->d_name; 3469 cn.cn_namelen = nlen; 3470 cn.cn_flags = ISLASTCN | 3471 NOFOLLOW | LOCKLEAF; 3472 if ((vn_irflag_read(vp) & 3473 VIRF_NAMEDDIR) != 0) 3474 cn.cn_flags |= 3475 OPENNAMED; 3476 if (nlen == 2 && 3477 dp->d_name[0] == '.' && 3478 dp->d_name[1] == '.') 3479 cn.cn_flags |= 3480 ISDOTDOT; 3481 if (NFSVOPLOCK(vp, LK_SHARED) 3482 != 0) { 3483 nd->nd_repstat = EPERM; 3484 break; 3485 } 3486 if ((vp->v_vflag & VV_ROOT) != 0 3487 && (cn.cn_flags & ISDOTDOT) 3488 != 0) { 3489 vref(vp); 3490 nvp = vp; 3491 r = 0; 3492 } else { 3493 r = VOP_LOOKUP(vp, &nvp, 3494 &cn); 3495 if (vp != nvp) 3496 NFSVOPUNLOCK(vp); 3497 } 3498 } 3499 3500 /* 3501 * For NFSv4, check to see if nvp is 3502 * a mount point and get the mount 3503 * point vnode, as required. 3504 */ 3505 if (r == 0 && 3506 nfsrv_enable_crossmntpt != 0 && 3507 (nd->nd_flag & ND_NFSV4) != 0 && 3508 nvp->v_type == VDIR && 3509 nvp->v_mountedhere != NULL) { 3510 new_mp = nvp->v_mountedhere; 3511 r = vfs_busy(new_mp, 0); 3512 vput(nvp); 3513 nvp = NULL; 3514 if (r == 0) { 3515 r = VFS_ROOT(new_mp, 3516 LK_SHARED, &nvp); 3517 needs_unbusy = 1; 3518 if (r == 0) 3519 at_root = 1; 3520 } 3521 } 3522 } 3523 3524 /* 3525 * If we failed to look up the entry, then it 3526 * has become invalid, most likely removed. 3527 */ 3528 if (r != 0) { 3529 if (needs_unbusy) 3530 vfs_unbusy(new_mp); 3531 goto invalid; 3532 } 3533 KASSERT(refp != NULL || nvp != NULL, 3534 ("%s: undetected lookup error", __func__)); 3535 3536 if (refp == NULL && 3537 ((nd->nd_flag & ND_NFSV3) || 3538 NFSNONZERO_ATTRBIT(&attrbits))) { 3539 r = nfsvno_getfh(nvp, &nfh, p); 3540 if (!r) 3541 r = nfsvno_getattr(nvp, nvap, nd, p, 3542 1, &attrbits); 3543 if (r == 0 && is_zfs == 1 && 3544 nfsrv_enable_crossmntpt != 0 && 3545 (nd->nd_flag & ND_NFSV4) != 0 && 3546 nvp->v_type == VDIR && 3547 vp->v_mount != nvp->v_mount) { 3548 /* 3549 * For a ZFS snapshot, there is a 3550 * pseudo mount that does not set 3551 * v_mountedhere, so it needs to 3552 * be detected via a different 3553 * mount structure. 3554 */ 3555 at_root = 1; 3556 if (new_mp == mp) 3557 new_mp = nvp->v_mount; 3558 } 3559 } 3560 3561 /* 3562 * If we failed to get attributes of the entry, 3563 * then just skip it for NFSv3 (the traditional 3564 * behavior in the old NFS server). 3565 * For NFSv4 the behavior is controlled by 3566 * RDATTRERROR: we either ignore the error or 3567 * fail the request. 3568 * The exception is EOPNOTSUPP, which can be 3569 * returned by nfsvno_getfh() for certain 3570 * file systems, such as devfs. This indicates 3571 * that the file system cannot be exported, 3572 * so just skip over the entry. 3573 * Note that RDATTRERROR is never set for NFSv3. 3574 */ 3575 if (r != 0) { 3576 if (!NFSISSET_ATTRBIT(&attrbits, 3577 NFSATTRBIT_RDATTRERROR) || 3578 r == EOPNOTSUPP) { 3579 vput(nvp); 3580 if (needs_unbusy != 0) 3581 vfs_unbusy(new_mp); 3582 if ((nd->nd_flag & ND_NFSV3) || 3583 r == EOPNOTSUPP) 3584 goto invalid; 3585 nd->nd_repstat = r; 3586 break; 3587 } 3588 } 3589 } else if (NFSNONZERO_ATTRBIT(&attrbits)) { 3590 /* Only need Type and/or Fileid. */ 3591 VATTR_NULL(&nvap->na_vattr); 3592 nvap->na_fileid = dp->d_fileno; 3593 nfs_dtypetovtype(nvap, vp, dp->d_type); 3594 } 3595 3596 /* 3597 * Build the directory record xdr 3598 */ 3599 if (nd->nd_flag & ND_NFSV3) { 3600 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 3601 *tl++ = newnfs_true; 3602 txdr_hyper(dp->d_fileno, tl); 3603 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 3604 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3605 txdr_hyper(*cookiep, tl); 3606 nfsrv_postopattr(nd, 0, nvap); 3607 dirlen += nfsm_fhtom(NULL, nd, (u_int8_t *)&nfh, 3608 0, 1); 3609 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 3610 if (nvp != NULL) 3611 vput(nvp); 3612 } else { 3613 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 3614 *tl++ = newnfs_true; 3615 txdr_hyper(*cookiep, tl); 3616 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 3617 xattrsupp = false; 3618 has_hiddensystem = false; 3619 has_namedattr = false; 3620 has_caseinsensitive = false; 3621 clone_blksize = 0; 3622 if (nvp != NULL) { 3623 supports_nfsv4acls = 3624 nfs_supportsacls(nvp); 3625 if (NFSISSET_ATTRBIT(&attrbits, 3626 NFSATTRBIT_XATTRSUPPORT)) { 3627 ret = VOP_GETEXTATTR(nvp, 3628 EXTATTR_NAMESPACE_USER, 3629 "xxx", NULL, &atsiz, 3630 nd->nd_cred, p); 3631 xattrsupp = ret != EOPNOTSUPP; 3632 } 3633 if (VOP_PATHCONF(nvp, 3634 _PC_HAS_HIDDENSYSTEM, &pathval) != 3635 0) 3636 pathval = 0; 3637 has_hiddensystem = pathval > 0; 3638 pathval = 0; 3639 if (NFSISSET_ATTRBIT(&attrbits, 3640 NFSATTRBIT_NAMEDATTR) && 3641 VOP_PATHCONF(nvp, _PC_HAS_NAMEDATTR, 3642 &pathval) != 0) 3643 pathval = 0; 3644 has_namedattr = pathval > 0; 3645 pathval = 0; 3646 if (VOP_PATHCONF(nvp, _PC_CLONE_BLKSIZE, 3647 &pathval) != 0) 3648 pathval = 0; 3649 clone_blksize = pathval; 3650 if (VOP_PATHCONF(nvp, 3651 _PC_CASE_INSENSITIVE, 3652 &pathval) != 0) 3653 pathval = 0; 3654 has_caseinsensitive = pathval > 0; 3655 NFSVOPUNLOCK(nvp); 3656 } else 3657 supports_nfsv4acls = 0; 3658 if (refp != NULL) { 3659 dirlen += nfsrv_putreferralattr(nd, 3660 &refbits, refp, 0, 3661 &nd->nd_repstat); 3662 if (nd->nd_repstat) { 3663 if (nvp != NULL) 3664 vrele(nvp); 3665 if (needs_unbusy != 0) 3666 vfs_unbusy(new_mp); 3667 break; 3668 } 3669 } else if (r) { 3670 dirlen += nfsvno_fillattr(nd, new_mp, 3671 nvp, nvap, &nfh, r, &rderrbits, 3672 nd->nd_cred, p, isdgram, 0, 3673 supports_nfsv4acls, at_root, 3674 mounted_on_fileno, xattrsupp, 3675 has_hiddensystem, has_namedattr, 3676 clone_blksize, has_caseinsensitive); 3677 } else { 3678 dirlen += nfsvno_fillattr(nd, new_mp, 3679 nvp, nvap, &nfh, r, &attrbits, 3680 nd->nd_cred, p, isdgram, 0, 3681 supports_nfsv4acls, at_root, 3682 mounted_on_fileno, xattrsupp, 3683 has_hiddensystem, has_namedattr, 3684 clone_blksize, has_caseinsensitive); 3685 } 3686 if (nvp != NULL) 3687 vrele(nvp); 3688 dirlen += (3 * NFSX_UNSIGNED); 3689 } 3690 if (needs_unbusy != 0) 3691 vfs_unbusy(new_mp); 3692 if (dirlen <= cnt) 3693 entrycnt++; 3694 } 3695 invalid: 3696 cpos += dp->d_reclen; 3697 dp = (struct dirent *)cpos; 3698 cookiep++; 3699 ncookies--; 3700 } 3701 vrele(vp); 3702 vfs_unbusy(mp); 3703 3704 /* 3705 * If dirlen > cnt, we must strip off the last entry. If that 3706 * results in an empty reply, report NFSERR_TOOSMALL. 3707 */ 3708 if (dirlen > cnt || nd->nd_repstat) { 3709 if (!nd->nd_repstat && entrycnt == 0) 3710 nd->nd_repstat = NFSERR_TOOSMALL; 3711 if (nd->nd_repstat) { 3712 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); 3713 if (nd->nd_flag & ND_NFSV3) 3714 nfsrv_postopattr(nd, getret, &at); 3715 } else 3716 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); 3717 eofflag = 0; 3718 } else if (cpos < cend) 3719 eofflag = 0; 3720 if (!nd->nd_repstat) { 3721 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3722 *tl++ = newnfs_false; 3723 if (eofflag) 3724 *tl = newnfs_true; 3725 else 3726 *tl = newnfs_false; 3727 } 3728 free(cookies, M_TEMP); 3729 free(rbuf, M_TEMP); 3730 3731 out: 3732 NFSEXITCODE2(0, nd); 3733 return (0); 3734 nfsmout: 3735 vput(vp); 3736 NFSEXITCODE2(error, nd); 3737 return (error); 3738 } 3739 3740 /* 3741 * Get the settable attributes out of the mbuf list. 3742 * (Return 0 or EBADRPC) 3743 */ 3744 int 3745 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 3746 nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSACL_T *daclp, struct thread *p) 3747 { 3748 u_int32_t *tl; 3749 struct nfsv2_sattr *sp; 3750 int error = 0, toclient = 0; 3751 3752 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 3753 case ND_NFSV2: 3754 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 3755 /* 3756 * Some old clients didn't fill in the high order 16bits. 3757 * --> check the low order 2 bytes for 0xffff 3758 */ 3759 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 3760 nvap->na_mode = nfstov_mode(sp->sa_mode); 3761 if (sp->sa_uid != newnfs_xdrneg1) 3762 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 3763 if (sp->sa_gid != newnfs_xdrneg1) 3764 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 3765 if (sp->sa_size != newnfs_xdrneg1) 3766 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 3767 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 3768 #ifdef notyet 3769 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 3770 #else 3771 nvap->na_atime.tv_sec = 3772 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 3773 nvap->na_atime.tv_nsec = 0; 3774 #endif 3775 } 3776 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 3777 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 3778 break; 3779 case ND_NFSV3: 3780 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3781 if (*tl == newnfs_true) { 3782 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3783 nvap->na_mode = nfstov_mode(*tl); 3784 } 3785 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3786 if (*tl == newnfs_true) { 3787 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3788 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 3789 } 3790 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3791 if (*tl == newnfs_true) { 3792 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3793 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 3794 } 3795 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3796 if (*tl == newnfs_true) { 3797 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3798 nvap->na_size = fxdr_hyper(tl); 3799 } 3800 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3801 switch (fxdr_unsigned(int, *tl)) { 3802 case NFSV3SATTRTIME_TOCLIENT: 3803 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3804 fxdr_nfsv3time(tl, &nvap->na_atime); 3805 toclient = 1; 3806 break; 3807 case NFSV3SATTRTIME_TOSERVER: 3808 vfs_timestamp(&nvap->na_atime); 3809 nvap->na_vaflags |= VA_UTIMES_NULL; 3810 break; 3811 } 3812 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3813 switch (fxdr_unsigned(int, *tl)) { 3814 case NFSV3SATTRTIME_TOCLIENT: 3815 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 3816 fxdr_nfsv3time(tl, &nvap->na_mtime); 3817 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3818 break; 3819 case NFSV3SATTRTIME_TOSERVER: 3820 vfs_timestamp(&nvap->na_mtime); 3821 if (!toclient) 3822 nvap->na_vaflags |= VA_UTIMES_NULL; 3823 break; 3824 } 3825 break; 3826 case ND_NFSV4: 3827 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, daclp, p); 3828 } 3829 nfsmout: 3830 NFSEXITCODE2(error, nd); 3831 return (error); 3832 } 3833 3834 /* 3835 * Handle the setable attributes for V4. 3836 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 3837 */ 3838 int 3839 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 3840 nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSACL_T *daclp, struct thread *p) 3841 { 3842 u_int32_t *tl; 3843 int attrsum = 0; 3844 int i, j; 3845 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 3846 int moderet, toclient = 0; 3847 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 3848 uid_t uid; 3849 gid_t gid; 3850 u_short mode, mask; /* Same type as va_mode. */ 3851 struct vattr va; 3852 3853 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 3854 if (error) 3855 goto nfsmout; 3856 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3857 attrsize = fxdr_unsigned(int, *tl); 3858 3859 /* 3860 * Loop around getting the setable attributes. If an unsupported 3861 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 3862 * Once nd_repstat != 0, do not set the attribute value, but keep 3863 * parsing the attribute(s). 3864 */ 3865 if (retnotsup) { 3866 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3867 bitpos = NFSATTRBIT_MAX; 3868 } else { 3869 bitpos = 0; 3870 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ARCHIVE) || 3871 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_HIDDEN) || 3872 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SYSTEM)) 3873 nvap->na_flags = 0; 3874 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL) && 3875 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_POSIXDEFAULTACL) || 3876 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_POSIXACCESSACL))) 3877 nd->nd_repstat = NFSERR_INVAL; 3878 } 3879 moderet = 0; 3880 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 3881 if (attrsum > attrsize) { 3882 error = NFSERR_BADXDR; 3883 goto nfsmout; 3884 } 3885 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 3886 switch (bitpos) { 3887 case NFSATTRBIT_SIZE: 3888 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 3889 if (!nd->nd_repstat) { 3890 if (vp != NULL && vp->v_type != VREG) 3891 nd->nd_repstat = (vp->v_type == VDIR) ? 3892 NFSERR_ISDIR : NFSERR_INVAL; 3893 else 3894 nvap->na_size = fxdr_hyper(tl); 3895 } 3896 attrsum += NFSX_HYPER; 3897 break; 3898 case NFSATTRBIT_ACL: 3899 error = nfsrv_dissectacl(nd, aclp, true, false, &aceerr, 3900 &aclsize); 3901 if (error) 3902 goto nfsmout; 3903 if (aceerr && !nd->nd_repstat) 3904 nd->nd_repstat = aceerr; 3905 attrsum += aclsize; 3906 break; 3907 case NFSATTRBIT_ARCHIVE: 3908 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); 3909 if (nd->nd_repstat == 0) { 3910 if (*tl == newnfs_true) 3911 nvap->na_flags |= UF_ARCHIVE; 3912 } 3913 attrsum += NFSX_UNSIGNED; 3914 break; 3915 case NFSATTRBIT_HIDDEN: 3916 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); 3917 if (nd->nd_repstat == 0) { 3918 if (*tl == newnfs_true) 3919 nvap->na_flags |= UF_HIDDEN; 3920 } 3921 attrsum += NFSX_UNSIGNED; 3922 break; 3923 case NFSATTRBIT_MIMETYPE: 3924 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3925 i = fxdr_unsigned(int, *tl); 3926 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 3927 if (error) 3928 goto nfsmout; 3929 if (!nd->nd_repstat) 3930 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3931 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 3932 break; 3933 case NFSATTRBIT_MODE: 3934 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ 3935 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3936 if (!nd->nd_repstat) 3937 nvap->na_mode = nfstov_mode(*tl); 3938 attrsum += NFSX_UNSIGNED; 3939 break; 3940 case NFSATTRBIT_OWNER: 3941 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3942 j = fxdr_unsigned(int, *tl); 3943 if (j < 0) { 3944 error = NFSERR_BADXDR; 3945 goto nfsmout; 3946 } 3947 if (j > NFSV4_SMALLSTR) 3948 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3949 else 3950 cp = namestr; 3951 error = nfsrv_mtostr(nd, cp, j); 3952 if (error) { 3953 if (j > NFSV4_SMALLSTR) 3954 free(cp, M_NFSSTRING); 3955 goto nfsmout; 3956 } 3957 if (!nd->nd_repstat) { 3958 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, 3959 &uid); 3960 if (!nd->nd_repstat) 3961 nvap->na_uid = uid; 3962 } 3963 if (j > NFSV4_SMALLSTR) 3964 free(cp, M_NFSSTRING); 3965 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3966 break; 3967 case NFSATTRBIT_OWNERGROUP: 3968 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3969 j = fxdr_unsigned(int, *tl); 3970 if (j < 0) { 3971 error = NFSERR_BADXDR; 3972 goto nfsmout; 3973 } 3974 if (j > NFSV4_SMALLSTR) 3975 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3976 else 3977 cp = namestr; 3978 error = nfsrv_mtostr(nd, cp, j); 3979 if (error) { 3980 if (j > NFSV4_SMALLSTR) 3981 free(cp, M_NFSSTRING); 3982 goto nfsmout; 3983 } 3984 if (!nd->nd_repstat) { 3985 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, 3986 &gid); 3987 if (!nd->nd_repstat) 3988 nvap->na_gid = gid; 3989 } 3990 if (j > NFSV4_SMALLSTR) 3991 free(cp, M_NFSSTRING); 3992 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3993 break; 3994 case NFSATTRBIT_SYSTEM: 3995 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); 3996 if (nd->nd_repstat == 0) { 3997 if (*tl == newnfs_true) 3998 nvap->na_flags |= UF_SYSTEM; 3999 } 4000 attrsum += NFSX_UNSIGNED; 4001 break; 4002 case NFSATTRBIT_TIMEACCESSSET: 4003 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 4004 attrsum += NFSX_UNSIGNED; 4005 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 4006 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 4007 if (!nd->nd_repstat) 4008 fxdr_nfsv4time(tl, &nvap->na_atime); 4009 toclient = 1; 4010 attrsum += NFSX_V4TIME; 4011 } else if (!nd->nd_repstat) { 4012 vfs_timestamp(&nvap->na_atime); 4013 nvap->na_vaflags |= VA_UTIMES_NULL; 4014 } 4015 break; 4016 case NFSATTRBIT_TIMEBACKUP: 4017 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 4018 if (!nd->nd_repstat) 4019 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 4020 attrsum += NFSX_V4TIME; 4021 break; 4022 case NFSATTRBIT_TIMECREATE: 4023 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 4024 if (!nd->nd_repstat) 4025 fxdr_nfsv4time(tl, &nvap->na_btime); 4026 attrsum += NFSX_V4TIME; 4027 break; 4028 case NFSATTRBIT_TIMEMODIFYSET: 4029 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 4030 attrsum += NFSX_UNSIGNED; 4031 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 4032 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 4033 if (!nd->nd_repstat) 4034 fxdr_nfsv4time(tl, &nvap->na_mtime); 4035 nvap->na_vaflags &= ~VA_UTIMES_NULL; 4036 attrsum += NFSX_V4TIME; 4037 } else if (!nd->nd_repstat) { 4038 vfs_timestamp(&nvap->na_mtime); 4039 if (!toclient) 4040 nvap->na_vaflags |= VA_UTIMES_NULL; 4041 } 4042 break; 4043 case NFSATTRBIT_MODESETMASKED: 4044 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4045 mode = fxdr_unsigned(u_short, *tl++); 4046 mask = fxdr_unsigned(u_short, *tl); 4047 /* 4048 * vp == NULL implies an Open/Create operation. 4049 * This attribute can only be used for Setattr and 4050 * only for NFSv4.1 or higher. 4051 * If moderet != 0, a mode attribute has also been 4052 * specified and this attribute cannot be done in the 4053 * same Setattr operation. 4054 */ 4055 if (!nd->nd_repstat) { 4056 if ((nd->nd_flag & ND_NFSV41) == 0) 4057 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 4058 else if ((mode & ~07777) != 0 || 4059 (mask & ~07777) != 0 || vp == NULL) 4060 nd->nd_repstat = NFSERR_INVAL; 4061 else if (moderet == 0) 4062 moderet = VOP_GETATTR(vp, &va, 4063 nd->nd_cred); 4064 if (moderet == 0) 4065 nvap->na_mode = (mode & mask) | 4066 (va.va_mode & ~mask); 4067 else 4068 nd->nd_repstat = moderet; 4069 } 4070 attrsum += 2 * NFSX_UNSIGNED; 4071 break; 4072 case NFSATTRBIT_MODEUMASK: 4073 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 4074 mode = fxdr_unsigned(u_short, *tl++); 4075 mask = fxdr_unsigned(u_short, *tl); 4076 /* 4077 * If moderet != 0, mode has already been done. 4078 * If vp != NULL, this is not a file object creation. 4079 */ 4080 if (!nd->nd_repstat) { 4081 if ((nd->nd_flag & ND_NFSV42) == 0) 4082 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 4083 else if ((mask & ~0777) != 0 || vp != NULL || 4084 moderet != 0) 4085 nd->nd_repstat = NFSERR_INVAL; 4086 else 4087 nvap->na_mode = (mode & ~mask); 4088 } 4089 attrsum += 2 * NFSX_UNSIGNED; 4090 break; 4091 case NFSATTRBIT_POSIXACCESSACL: 4092 error = nfsrv_dissectacl(nd, aclp, true, true, &aceerr, 4093 &aclsize); 4094 if (error != 0) 4095 goto nfsmout; 4096 if (!nd->nd_repstat) { 4097 if ((nd->nd_flag & ND_NFSV42) == 0) 4098 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 4099 else if (aclp != NULL && aclp->acl_cnt == 0) 4100 nd->nd_repstat = NFSERR_INVAL; 4101 else if (aceerr != 0) 4102 nd->nd_repstat = aceerr; 4103 } 4104 attrsum += aclsize; 4105 break; 4106 case NFSATTRBIT_POSIXDEFAULTACL: 4107 error = nfsrv_dissectacl(nd, daclp, true, true, &aceerr, 4108 &aclsize); 4109 if (error != 0) 4110 goto nfsmout; 4111 if (!nd->nd_repstat) { 4112 if ((nd->nd_flag & ND_NFSV42) == 0) 4113 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 4114 else if (aclp != NULL && aclp->acl_cnt == 0) 4115 nd->nd_repstat = NFSERR_INVAL; 4116 else if (aceerr != 0) 4117 nd->nd_repstat = aceerr; 4118 else if (vp != NULL && vp->v_type != VDIR) 4119 nd->nd_repstat = NFSERR_INVAL; 4120 } 4121 attrsum += aclsize; 4122 break; 4123 default: 4124 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 4125 /* 4126 * set bitpos so we drop out of the loop. 4127 */ 4128 bitpos = NFSATTRBIT_MAX; 4129 break; 4130 } 4131 } 4132 4133 /* 4134 * some clients pad the attrlist, so we need to skip over the 4135 * padding. This also skips over unparsed non-supported attributes. 4136 */ 4137 if (attrsum > attrsize) { 4138 error = NFSERR_BADXDR; 4139 } else { 4140 attrsize = NFSM_RNDUP(attrsize); 4141 if (attrsum < attrsize) 4142 error = nfsm_advance(nd, attrsize - attrsum, -1); 4143 } 4144 nfsmout: 4145 NFSEXITCODE2(error, nd); 4146 return (error); 4147 } 4148 4149 /* 4150 * Check/setup export credentials. 4151 */ 4152 int 4153 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 4154 struct ucred *credanon, bool testsec) 4155 { 4156 int error; 4157 4158 /* 4159 * Check/setup credentials. 4160 */ 4161 if (nd->nd_flag & ND_GSS) 4162 exp->nes_exflag &= ~MNT_EXPORTANON; 4163 4164 /* 4165 * Check to see if the operation is allowed for this security flavor. 4166 */ 4167 error = 0; 4168 if (testsec) { 4169 error = nfsvno_testexp(nd, exp); 4170 if (error != 0) 4171 goto out; 4172 } 4173 4174 /* 4175 * Check to see if the file system is exported V4 only. 4176 */ 4177 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 4178 error = NFSERR_PROGNOTV4; 4179 goto out; 4180 } 4181 4182 /* 4183 * Now, map the user credentials. 4184 * (Note that ND_AUTHNONE will only be set for an NFSv3 4185 * Fsinfo RPC. If set for anything else, this code might need 4186 * to change.) 4187 */ 4188 if (NFSVNO_EXPORTED(exp)) { 4189 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 4190 NFSVNO_EXPORTANON(exp) || 4191 (nd->nd_flag & ND_AUTHNONE) != 0) { 4192 nd->nd_cred->cr_uid = credanon->cr_uid; 4193 nd->nd_cred->cr_gid = credanon->cr_gid; 4194 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 4195 credanon->cr_groups); 4196 } else if ((nd->nd_flag & ND_GSS) == 0) { 4197 /* 4198 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 4199 * if there is a replacement credential with a group 4200 * list set up by "nfsuserd -manage-gids". 4201 * If there is no replacement, nfsrv_getgrpscred() 4202 * simply returns its argument. 4203 */ 4204 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 4205 } 4206 } 4207 4208 out: 4209 NFSEXITCODE2(error, nd); 4210 return (error); 4211 } 4212 4213 /* 4214 * Check exports. 4215 */ 4216 int 4217 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 4218 struct ucred **credp) 4219 { 4220 int error; 4221 4222 error = 0; 4223 *credp = NULL; 4224 MNT_ILOCK(mp); 4225 if (mp->mnt_exjail == NULL || 4226 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) 4227 error = EACCES; 4228 MNT_IUNLOCK(mp); 4229 if (error == 0) 4230 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 4231 &exp->nes_numsecflavor, exp->nes_secflavors); 4232 if (error) { 4233 if (VNET(nfs_rootfhset)) { 4234 exp->nes_exflag = 0; 4235 exp->nes_numsecflavor = 0; 4236 error = 0; 4237 } 4238 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 4239 MAXSECFLAVORS) { 4240 printf("nfsvno_checkexp: numsecflavors out of range\n"); 4241 exp->nes_numsecflavor = 0; 4242 error = EACCES; 4243 } 4244 NFSEXITCODE(error); 4245 return (error); 4246 } 4247 4248 /* 4249 * Get a vnode for a file handle and export stuff. 4250 */ 4251 int 4252 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 4253 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 4254 struct ucred **credp) 4255 { 4256 int error; 4257 4258 *credp = NULL; 4259 exp->nes_numsecflavor = 0; 4260 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 4261 if (error != 0) 4262 /* Make sure the server replies ESTALE to the client. */ 4263 error = ESTALE; 4264 if (nam && !error) { 4265 MNT_ILOCK(mp); 4266 if (mp->mnt_exjail == NULL || 4267 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) 4268 error = EACCES; 4269 MNT_IUNLOCK(mp); 4270 if (error == 0) 4271 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 4272 &exp->nes_numsecflavor, exp->nes_secflavors); 4273 if (error) { 4274 if (VNET(nfs_rootfhset)) { 4275 exp->nes_exflag = 0; 4276 exp->nes_numsecflavor = 0; 4277 error = 0; 4278 } else { 4279 vput(*vpp); 4280 } 4281 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 4282 MAXSECFLAVORS) { 4283 printf("nfsvno_fhtovp: numsecflavors out of range\n"); 4284 exp->nes_numsecflavor = 0; 4285 error = EACCES; 4286 vput(*vpp); 4287 } 4288 } 4289 NFSEXITCODE(error); 4290 return (error); 4291 } 4292 4293 /* 4294 * nfsd_fhtovp() - convert a fh to a vnode ptr 4295 * - look up fsid in mount list (if not found ret error) 4296 * - get vp and export rights by calling nfsvno_fhtovp() 4297 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 4298 * for AUTH_SYS 4299 * - if mpp != NULL, return the mount point so that it can 4300 * be used for vn_finished_write() by the caller 4301 */ 4302 void 4303 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 4304 struct vnode **vpp, struct nfsexstuff *exp, 4305 struct mount **mpp, int startwrite, int nextop) 4306 { 4307 struct mount *mp, *mpw; 4308 struct ucred *credanon; 4309 fhandle_t *fhp; 4310 int error; 4311 4312 if (mpp != NULL) 4313 *mpp = NULL; 4314 *vpp = NULL; 4315 fhp = (fhandle_t *)nfp->nfsrvfh_data; 4316 mp = vfs_busyfs(&fhp->fh_fsid); 4317 if (mp == NULL) { 4318 nd->nd_repstat = ESTALE; 4319 goto out; 4320 } 4321 4322 if (startwrite) { 4323 mpw = mp; 4324 error = vn_start_write(NULL, &mpw, V_WAIT); 4325 if (error != 0) { 4326 mpw = NULL; 4327 vfs_unbusy(mp); 4328 nd->nd_repstat = ESTALE; 4329 goto out; 4330 } 4331 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 4332 lktype = LK_EXCLUSIVE; 4333 } else 4334 mpw = NULL; 4335 4336 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 4337 &credanon); 4338 vfs_unbusy(mp); 4339 4340 if (nd->nd_repstat == 0 && 4341 nfp->nfsrvfh_len >= NFSX_MYFH + NFSX_V4NAMEDDIRFH && 4342 nfp->nfsrvfh_len <= NFSX_MYFH + NFSX_V4NAMEDATTRFH) { 4343 if (nfp->nfsrvfh_len == NFSX_MYFH + NFSX_V4NAMEDDIRFH) 4344 vn_irflag_set_cond(*vpp, VIRF_NAMEDDIR); 4345 else 4346 vn_irflag_set_cond(*vpp, VIRF_NAMEDATTR); 4347 } 4348 4349 /* 4350 * For NFSv4 without a pseudo root fs, unexported file handles 4351 * can be returned, so that Lookup works everywhere. 4352 */ 4353 if (!nd->nd_repstat && exp->nes_exflag == 0 && 4354 !(nd->nd_flag & ND_NFSV4)) { 4355 vput(*vpp); 4356 *vpp = NULL; 4357 nd->nd_repstat = EACCES; 4358 } 4359 4360 /* 4361 * Personally, I've never seen any point in requiring a 4362 * reserved port#, since only in the rare case where the 4363 * clients are all boxes with secure system privileges, 4364 * does it provide any enhanced security, but... some people 4365 * believe it to be useful and keep putting this code back in. 4366 * (There is also some "security checker" out there that 4367 * complains if the nfs server doesn't enforce this.) 4368 * However, note the following: 4369 * RFC3530 (NFSv4) specifies that a reserved port# not be 4370 * required. 4371 * RFC2623 recommends that, if a reserved port# is checked for, 4372 * that there be a way to turn that off--> ifdef'd. 4373 */ 4374 #ifdef NFS_REQRSVPORT 4375 if (!nd->nd_repstat) { 4376 struct sockaddr_in *saddr; 4377 struct sockaddr_in6 *saddr6; 4378 4379 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 4380 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 4381 if (!(nd->nd_flag & ND_NFSV4) && 4382 ((saddr->sin_family == AF_INET && 4383 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 4384 (saddr6->sin6_family == AF_INET6 && 4385 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 4386 vput(*vpp); 4387 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 4388 } 4389 } 4390 #endif /* NFS_REQRSVPORT */ 4391 4392 /* 4393 * Check/setup credentials. 4394 */ 4395 if (!nd->nd_repstat) { 4396 nd->nd_saveduid = nd->nd_cred->cr_uid; 4397 nd->nd_repstat = nfsd_excred(nd, exp, credanon, 4398 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type)); 4399 if (nd->nd_repstat) 4400 vput(*vpp); 4401 } 4402 if (credanon != NULL) 4403 crfree(credanon); 4404 if (nd->nd_repstat) { 4405 vn_finished_write(mpw); 4406 *vpp = NULL; 4407 } else if (mpp != NULL) { 4408 *mpp = mpw; 4409 } 4410 4411 out: 4412 NFSEXITCODE2(0, nd); 4413 } 4414 4415 /* 4416 * glue for fp. 4417 */ 4418 static int 4419 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 4420 { 4421 struct filedesc *fdp; 4422 struct file *fp; 4423 int error = 0; 4424 4425 fdp = p->td_proc->p_fd; 4426 if (fd < 0 || fd >= fdp->fd_nfiles || 4427 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 4428 error = EBADF; 4429 goto out; 4430 } 4431 *fpp = fp; 4432 4433 out: 4434 NFSEXITCODE(error); 4435 return (error); 4436 } 4437 4438 /* 4439 * Called from nfssvc() to update the exports list. Just call 4440 * vfs_export(). This has to be done, since the v4 root fake fs isn't 4441 * in the mount list. 4442 */ 4443 int 4444 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 4445 { 4446 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 4447 int error = 0; 4448 struct nameidata nd; 4449 fhandle_t fh; 4450 4451 error = vfs_export(VNET(nfsv4root_mnt), &nfsexargp->export, false); 4452 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 4453 VNET(nfs_rootfhset) = 0; 4454 else if (error == 0) { 4455 if (nfsexargp->fspec == NULL) { 4456 error = EPERM; 4457 goto out; 4458 } 4459 /* 4460 * If fspec != NULL, this is the v4root path. 4461 */ 4462 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec); 4463 if ((error = namei(&nd)) != 0) 4464 goto out; 4465 NDFREE_PNBUF(&nd); 4466 error = nfsvno_getfh(nd.ni_vp, &fh, p); 4467 vrele(nd.ni_vp); 4468 if (!error) { 4469 VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH; 4470 NFSBCOPY((caddr_t)&fh, 4471 VNET(nfs_rootfh).nfsrvfh_data, 4472 sizeof (fhandle_t)); 4473 VNET(nfs_rootfhset) = 1; 4474 } 4475 } 4476 4477 out: 4478 NFSEXITCODE(error); 4479 return (error); 4480 } 4481 4482 /* 4483 * This function needs to test to see if the system is near its limit 4484 * for memory allocation via malloc() or mget() and return True iff 4485 * either of these resources are near their limit. 4486 * XXX (For now, this is just a stub.) 4487 */ 4488 int nfsrv_testmalloclimit = 0; 4489 int 4490 nfsrv_mallocmget_limit(void) 4491 { 4492 static int printmesg = 0; 4493 static int testval = 1; 4494 4495 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 4496 if ((printmesg++ % 100) == 0) 4497 printf("nfsd: malloc/mget near limit\n"); 4498 return (1); 4499 } 4500 return (0); 4501 } 4502 4503 /* 4504 * BSD specific initialization of a mount point. 4505 */ 4506 void 4507 nfsd_mntinit(void) 4508 { 4509 4510 NFSD_LOCK(); 4511 if (VNET(nfsrv_mntinited)) { 4512 NFSD_UNLOCK(); 4513 return; 4514 } 4515 VNET(nfsrv_mntinited) = true; 4516 nfsrvd_init(0); 4517 NFSD_UNLOCK(); 4518 4519 VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP, 4520 M_WAITOK | M_ZERO); 4521 VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 4522 mtx_init(&VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 4523 lockinit(&VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0); 4524 TAILQ_INIT(&VNET(nfsv4root_mnt)->mnt_nvnodelist); 4525 TAILQ_INIT(&VNET(nfsv4root_mnt)->mnt_lazyvnodelist); 4526 VNET(nfsv4root_mnt)->mnt_export = NULL; 4527 TAILQ_INIT(&VNET(nfsv4root_opt)); 4528 TAILQ_INIT(&VNET(nfsv4root_newopt)); 4529 VNET(nfsv4root_mnt)->mnt_opt = &VNET(nfsv4root_opt); 4530 VNET(nfsv4root_mnt)->mnt_optnew = &VNET(nfsv4root_newopt); 4531 VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0; 4532 VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0; 4533 callout_init(&VNET(nfsd_callout), 1); 4534 4535 nfsrvd_initcache(); 4536 nfsd_init(); 4537 } 4538 4539 static void 4540 nfsd_timer(void *arg) 4541 { 4542 struct vnet *vnetp; 4543 4544 vnetp = (struct vnet *)arg; 4545 CURVNET_SET_QUIET(vnetp); 4546 nfsrv_servertimer(vnetp); 4547 callout_reset_sbt(&VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer, 4548 arg, 0); 4549 CURVNET_RESTORE(); 4550 } 4551 4552 /* 4553 * Get a vnode for a file handle, without checking exports, etc. 4554 */ 4555 struct vnode * 4556 nfsvno_getvp(fhandle_t *fhp) 4557 { 4558 struct mount *mp; 4559 struct vnode *vp; 4560 int error; 4561 4562 mp = vfs_busyfs(&fhp->fh_fsid); 4563 if (mp == NULL) 4564 return (NULL); 4565 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 4566 vfs_unbusy(mp); 4567 if (error) 4568 return (NULL); 4569 return (vp); 4570 } 4571 4572 /* 4573 * Do a local VOP_ADVLOCK(). 4574 */ 4575 int 4576 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 4577 u_int64_t end, struct thread *td) 4578 { 4579 int error = 0; 4580 struct flock fl; 4581 u_int64_t tlen; 4582 4583 if (nfsrv_dolocallocks == 0) 4584 goto out; 4585 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 4586 4587 fl.l_whence = SEEK_SET; 4588 fl.l_type = ftype; 4589 fl.l_start = (off_t)first; 4590 if (end == NFS64BITSSET) { 4591 fl.l_len = 0; 4592 } else { 4593 tlen = end - first; 4594 fl.l_len = (off_t)tlen; 4595 } 4596 /* 4597 * For FreeBSD8, the l_pid and l_sysid must be set to the same 4598 * values for all calls, so that all locks will be held by the 4599 * nfsd server. (The nfsd server handles conflicts between the 4600 * various clients.) 4601 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 4602 * bytes, so it can't be put in l_sysid. 4603 */ 4604 if (nfsv4_sysid == 0) 4605 nfsv4_sysid = nlm_acquire_next_sysid(); 4606 fl.l_pid = (pid_t)0; 4607 fl.l_sysid = (int)nfsv4_sysid; 4608 4609 if (ftype == F_UNLCK) 4610 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 4611 (F_POSIX | F_REMOTE)); 4612 else 4613 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 4614 (F_POSIX | F_REMOTE)); 4615 4616 out: 4617 NFSEXITCODE(error); 4618 return (error); 4619 } 4620 4621 /* 4622 * Check the nfsv4 root exports. 4623 */ 4624 int 4625 nfsvno_v4rootexport(struct nfsrv_descript *nd) 4626 { 4627 struct ucred *credanon; 4628 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; 4629 uint64_t exflags; 4630 4631 error = vfs_stdcheckexp(VNET(nfsv4root_mnt), nd->nd_nam, &exflags, 4632 &credanon, &numsecflavor, secflavors); 4633 if (error) { 4634 error = NFSERR_PROGUNAVAIL; 4635 goto out; 4636 } 4637 if (credanon != NULL) 4638 crfree(credanon); 4639 for (i = 0; i < numsecflavor; i++) { 4640 if (secflavors[i] == AUTH_SYS) 4641 nd->nd_flag |= ND_EXAUTHSYS; 4642 else if (secflavors[i] == RPCSEC_GSS_KRB5) 4643 nd->nd_flag |= ND_EXGSS; 4644 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 4645 nd->nd_flag |= ND_EXGSSINTEGRITY; 4646 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 4647 nd->nd_flag |= ND_EXGSSPRIVACY; 4648 } 4649 4650 /* And set ND_EXxx flags for TLS. */ 4651 if ((exflags & MNT_EXTLS) != 0) { 4652 nd->nd_flag |= ND_EXTLS; 4653 if ((exflags & MNT_EXTLSCERT) != 0) 4654 nd->nd_flag |= ND_EXTLSCERT; 4655 if ((exflags & MNT_EXTLSCERTUSER) != 0) 4656 nd->nd_flag |= ND_EXTLSCERTUSER; 4657 } 4658 4659 out: 4660 NFSEXITCODE(error); 4661 return (error); 4662 } 4663 4664 /* 4665 * Nfs server pseudo system call for the nfsd's 4666 */ 4667 /* 4668 * MPSAFE 4669 */ 4670 static int 4671 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 4672 { 4673 struct file *fp; 4674 struct nfsd_addsock_args sockarg; 4675 struct nfsd_nfsd_args nfsdarg; 4676 struct nfsd_nfsd_oargs onfsdarg; 4677 struct nfsd_pnfsd_args pnfsdarg; 4678 struct vnode *vp, *nvp, *curdvp; 4679 struct pnfsdsfile *pf; 4680 struct nfsdevice *ds, *fds; 4681 cap_rights_t rights; 4682 int buflen, error, ret; 4683 char *buf, *cp, *cp2, *cp3; 4684 char fname[PNFS_FILENAME_LEN + 1]; 4685 4686 CURVNET_SET(TD_TO_VNET(td)); 4687 if (uap->flag & NFSSVC_NFSDADDSOCK) { 4688 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 4689 if (error) 4690 goto out; 4691 /* 4692 * Since we don't know what rights might be required, 4693 * pretend that we need them all. It is better to be too 4694 * careful than too reckless. 4695 */ 4696 error = fget(td, sockarg.sock, 4697 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp); 4698 if (error != 0) 4699 goto out; 4700 if (fp->f_type != DTYPE_SOCKET) { 4701 fdrop(fp, td); 4702 error = EPERM; 4703 goto out; 4704 } 4705 error = nfsrvd_addsock(fp); 4706 fdrop(fp, td); 4707 } else if (uap->flag & NFSSVC_NFSDNFSD) { 4708 if (uap->argp == NULL) { 4709 error = EINVAL; 4710 goto out; 4711 } 4712 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 4713 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 4714 if (error == 0) { 4715 nfsdarg.principal = onfsdarg.principal; 4716 nfsdarg.minthreads = onfsdarg.minthreads; 4717 nfsdarg.maxthreads = onfsdarg.maxthreads; 4718 nfsdarg.version = 1; 4719 nfsdarg.addr = NULL; 4720 nfsdarg.addrlen = 0; 4721 nfsdarg.dnshost = NULL; 4722 nfsdarg.dnshostlen = 0; 4723 nfsdarg.dspath = NULL; 4724 nfsdarg.dspathlen = 0; 4725 nfsdarg.mdspath = NULL; 4726 nfsdarg.mdspathlen = 0; 4727 nfsdarg.mirrorcnt = 1; 4728 } 4729 } else 4730 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 4731 if (error) 4732 goto out; 4733 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 4734 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 4735 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 4736 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 4737 nfsdarg.mirrorcnt >= 1 && 4738 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 4739 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 4740 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 4741 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 4742 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 4743 nfsdarg.dspathlen, nfsdarg.dnshostlen, 4744 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 4745 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 4746 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 4747 if (error != 0) { 4748 free(cp, M_TEMP); 4749 goto out; 4750 } 4751 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 4752 nfsdarg.addr = cp; 4753 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 4754 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 4755 if (error != 0) { 4756 free(nfsdarg.addr, M_TEMP); 4757 free(cp, M_TEMP); 4758 goto out; 4759 } 4760 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 4761 nfsdarg.dnshost = cp; 4762 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 4763 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 4764 if (error != 0) { 4765 free(nfsdarg.addr, M_TEMP); 4766 free(nfsdarg.dnshost, M_TEMP); 4767 free(cp, M_TEMP); 4768 goto out; 4769 } 4770 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 4771 nfsdarg.dspath = cp; 4772 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 4773 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 4774 if (error != 0) { 4775 free(nfsdarg.addr, M_TEMP); 4776 free(nfsdarg.dnshost, M_TEMP); 4777 free(nfsdarg.dspath, M_TEMP); 4778 free(cp, M_TEMP); 4779 goto out; 4780 } 4781 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 4782 nfsdarg.mdspath = cp; 4783 } else { 4784 nfsdarg.addr = NULL; 4785 nfsdarg.addrlen = 0; 4786 nfsdarg.dnshost = NULL; 4787 nfsdarg.dnshostlen = 0; 4788 nfsdarg.dspath = NULL; 4789 nfsdarg.dspathlen = 0; 4790 nfsdarg.mdspath = NULL; 4791 nfsdarg.mdspathlen = 0; 4792 nfsdarg.mirrorcnt = 1; 4793 } 4794 nfsd_timer(TD_TO_VNET(td)); 4795 error = nfsrvd_nfsd(td, &nfsdarg); 4796 callout_drain(&VNET(nfsd_callout)); 4797 free(nfsdarg.addr, M_TEMP); 4798 free(nfsdarg.dnshost, M_TEMP); 4799 free(nfsdarg.dspath, M_TEMP); 4800 free(nfsdarg.mdspath, M_TEMP); 4801 } else if (uap->flag & NFSSVC_PNFSDS) { 4802 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 4803 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 4804 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 4805 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4806 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 4807 NULL); 4808 if (error == 0) 4809 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 4810 free(cp, M_TEMP); 4811 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 4812 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4813 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 4814 buf = malloc(buflen, M_TEMP, M_WAITOK); 4815 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 4816 NULL); 4817 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 4818 if (error == 0 && pnfsdarg.dspath != NULL) { 4819 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4820 error = copyinstr(pnfsdarg.dspath, cp2, 4821 PATH_MAX + 1, NULL); 4822 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 4823 error); 4824 } else 4825 cp2 = NULL; 4826 if (error == 0 && pnfsdarg.curdspath != NULL) { 4827 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 4828 error = copyinstr(pnfsdarg.curdspath, cp3, 4829 PATH_MAX + 1, NULL); 4830 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 4831 error); 4832 } else 4833 cp3 = NULL; 4834 curdvp = NULL; 4835 fds = NULL; 4836 if (error == 0) 4837 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 4838 &buflen, fname, td, &vp, &nvp, &pf, &ds, 4839 &fds); 4840 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 4841 if (error == 0) { 4842 if (pf->dsf_dir >= nfsrv_dsdirsize) { 4843 printf("copymr: dsdir out of range\n"); 4844 pf->dsf_dir = 0; 4845 } 4846 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 4847 error = nfsrv_copymr(vp, nvp, 4848 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 4849 (struct pnfsdsfile *)buf, 4850 buflen / sizeof(*pf), td->td_ucred, td); 4851 vput(vp); 4852 vput(nvp); 4853 if (fds != NULL && error == 0) { 4854 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 4855 ret = vn_lock(curdvp, LK_EXCLUSIVE); 4856 if (ret == 0) { 4857 nfsrv_dsremove(curdvp, fname, 4858 td->td_ucred, td); 4859 NFSVOPUNLOCK(curdvp); 4860 } 4861 } 4862 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 4863 } 4864 free(cp, M_TEMP); 4865 free(cp2, M_TEMP); 4866 free(cp3, M_TEMP); 4867 free(buf, M_TEMP); 4868 } 4869 } else { 4870 error = nfssvc_srvcall(td, uap, td->td_ucred); 4871 } 4872 4873 out: 4874 CURVNET_RESTORE(); 4875 NFSEXITCODE(error); 4876 return (error); 4877 } 4878 4879 static int 4880 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 4881 { 4882 struct nfsex_args export; 4883 struct nfsex_oldargs oexp; 4884 struct file *fp = NULL; 4885 int stablefd, i, len; 4886 struct nfsd_clid adminrevoke; 4887 struct nfsd_dumplist dumplist; 4888 struct nfsd_dumpclients *dumpclients; 4889 struct nfsd_dumplocklist dumplocklist; 4890 struct nfsd_dumplocks *dumplocks; 4891 struct nameidata nd; 4892 vnode_t vp; 4893 int error = EINVAL, igotlock; 4894 struct proc *procp; 4895 gid_t *grps; 4896 4897 if (uap->flag & NFSSVC_PUBLICFH) { 4898 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 4899 sizeof (fhandle_t)); 4900 error = copyin(uap->argp, 4901 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 4902 if (!error) 4903 nfs_pubfhset = 1; 4904 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 4905 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { 4906 error = copyin(uap->argp,(caddr_t)&export, 4907 sizeof (struct nfsex_args)); 4908 if (!error) { 4909 grps = NULL; 4910 if (export.export.ex_ngroups > NGROUPS_MAX || 4911 export.export.ex_ngroups < 0) 4912 error = EINVAL; 4913 else if (export.export.ex_ngroups > 0) { 4914 grps = malloc(export.export.ex_ngroups * 4915 sizeof(gid_t), M_TEMP, M_WAITOK); 4916 error = copyin(export.export.ex_groups, grps, 4917 export.export.ex_ngroups * sizeof(gid_t)); 4918 export.export.ex_groups = grps; 4919 } else 4920 export.export.ex_groups = NULL; 4921 if (!error) 4922 error = nfsrv_v4rootexport(&export, cred, p); 4923 free(grps, M_TEMP); 4924 } 4925 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 4926 NFSSVC_V4ROOTEXPORT) { 4927 error = copyin(uap->argp,(caddr_t)&oexp, 4928 sizeof (struct nfsex_oldargs)); 4929 if (!error) { 4930 memset(&export.export, 0, sizeof(export.export)); 4931 export.export.ex_flags = (uint64_t)oexp.export.ex_flags; 4932 export.export.ex_root = oexp.export.ex_root; 4933 export.export.ex_uid = oexp.export.ex_anon.cr_uid; 4934 export.export.ex_ngroups = 4935 oexp.export.ex_anon.cr_ngroups; 4936 export.export.ex_groups = NULL; 4937 if (export.export.ex_ngroups > XU_NGROUPS || 4938 export.export.ex_ngroups < 0) 4939 error = EINVAL; 4940 else if (export.export.ex_ngroups > 0) { 4941 export.export.ex_groups = malloc( 4942 export.export.ex_ngroups * sizeof(gid_t), 4943 M_TEMP, M_WAITOK); 4944 for (i = 0; i < export.export.ex_ngroups; i++) 4945 export.export.ex_groups[i] = 4946 oexp.export.ex_anon.cr_groups[i]; 4947 } 4948 export.export.ex_addr = oexp.export.ex_addr; 4949 export.export.ex_addrlen = oexp.export.ex_addrlen; 4950 export.export.ex_mask = oexp.export.ex_mask; 4951 export.export.ex_masklen = oexp.export.ex_masklen; 4952 export.export.ex_indexfile = oexp.export.ex_indexfile; 4953 export.export.ex_numsecflavors = 4954 oexp.export.ex_numsecflavors; 4955 if (export.export.ex_numsecflavors >= MAXSECFLAVORS || 4956 export.export.ex_numsecflavors < 0) 4957 error = EINVAL; 4958 else { 4959 for (i = 0; i < export.export.ex_numsecflavors; 4960 i++) 4961 export.export.ex_secflavors[i] = 4962 oexp.export.ex_secflavors[i]; 4963 } 4964 export.fspec = oexp.fspec; 4965 if (error == 0) 4966 error = nfsrv_v4rootexport(&export, cred, p); 4967 free(export.export.ex_groups, M_TEMP); 4968 } 4969 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 4970 nfs_pubfhset = 0; 4971 error = 0; 4972 } else if (uap->flag & NFSSVC_STABLERESTART) { 4973 error = copyin(uap->argp, (caddr_t)&stablefd, 4974 sizeof (int)); 4975 if (!error) 4976 error = fp_getfvp(p, stablefd, &fp, &vp); 4977 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 4978 error = EBADF; 4979 if (!error && VNET(nfsrv_numnfsd) != 0) 4980 error = ENXIO; 4981 if (!error) { 4982 VNET(nfsrv_stablefirst).nsf_fp = fp; 4983 nfsrv_setupstable(p); 4984 } 4985 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 4986 error = copyin(uap->argp, (caddr_t)&adminrevoke, 4987 sizeof (struct nfsd_clid)); 4988 if (!error) 4989 error = nfsrv_adminrevoke(&adminrevoke, p); 4990 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 4991 error = copyin(uap->argp, (caddr_t)&dumplist, 4992 sizeof (struct nfsd_dumplist)); 4993 if (!error && (dumplist.ndl_size < 1 || 4994 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 4995 error = EPERM; 4996 if (!error) { 4997 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 4998 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 4999 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 5000 error = copyout(dumpclients, dumplist.ndl_list, len); 5001 free(dumpclients, M_TEMP); 5002 } 5003 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 5004 error = copyin(uap->argp, (caddr_t)&dumplocklist, 5005 sizeof (struct nfsd_dumplocklist)); 5006 if (!error && (dumplocklist.ndllck_size < 1 || 5007 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 5008 error = EPERM; 5009 if (!error) 5010 error = nfsrv_lookupfilename(&nd, 5011 dumplocklist.ndllck_fname, p); 5012 if (!error) { 5013 len = sizeof (struct nfsd_dumplocks) * 5014 dumplocklist.ndllck_size; 5015 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 5016 nfsrv_dumplocks(nd.ni_vp, dumplocks, 5017 dumplocklist.ndllck_size, p); 5018 vput(nd.ni_vp); 5019 error = copyout(dumplocks, dumplocklist.ndllck_list, 5020 len); 5021 free(dumplocks, M_TEMP); 5022 } 5023 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 5024 procp = p->td_proc; 5025 PROC_LOCK(procp); 5026 nfsd_master_pid = procp->p_pid; 5027 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 5028 nfsd_master_start = procp->p_stats->p_start; 5029 VNET(nfsd_master_proc) = procp; 5030 PROC_UNLOCK(procp); 5031 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 5032 NFSLOCKV4ROOTMUTEX(); 5033 if (!VNET(nfsrv_suspend_nfsd)) { 5034 /* Lock out all nfsd threads */ 5035 do { 5036 igotlock = nfsv4_lock( 5037 &VNET(nfsd_suspend_lock), 1, NULL, 5038 NFSV4ROOTLOCKMUTEXPTR, NULL); 5039 } while (igotlock == 0 && 5040 !VNET(nfsrv_suspend_nfsd)); 5041 VNET(nfsrv_suspend_nfsd) = true; 5042 } 5043 NFSUNLOCKV4ROOTMUTEX(); 5044 error = 0; 5045 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 5046 NFSLOCKV4ROOTMUTEX(); 5047 if (VNET(nfsrv_suspend_nfsd)) { 5048 nfsv4_unlock(&VNET(nfsd_suspend_lock), 0); 5049 VNET(nfsrv_suspend_nfsd) = false; 5050 } 5051 NFSUNLOCKV4ROOTMUTEX(); 5052 error = 0; 5053 } 5054 5055 NFSEXITCODE(error); 5056 return (error); 5057 } 5058 5059 /* 5060 * Check exports. 5061 * Returns 0 if ok, 1 otherwise. 5062 */ 5063 int 5064 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 5065 { 5066 int i; 5067 5068 if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || 5069 (NFSVNO_EXTLSCERT(exp) && 5070 (nd->nd_flag & ND_TLSCERT) == 0) || 5071 (NFSVNO_EXTLSCERTUSER(exp) && 5072 (nd->nd_flag & ND_TLSCERTUSER) == 0)) { 5073 if ((nd->nd_flag & ND_NFSV4) != 0) 5074 return (NFSERR_WRONGSEC); 5075 #ifdef notnow 5076 /* There is currently no auth_stat for this. */ 5077 else if ((nd->nd_flag & ND_TLS) == 0) 5078 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS); 5079 else 5080 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST); 5081 #endif 5082 else 5083 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 5084 } 5085 5086 /* 5087 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use 5088 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 5089 */ 5090 if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO) 5091 return (0); 5092 5093 /* 5094 * This seems odd, but allow the case where the security flavor 5095 * list is empty. This happens when NFSv4 is traversing non-exported 5096 * file systems. Exported file systems should always have a non-empty 5097 * security flavor list. 5098 */ 5099 if (exp->nes_numsecflavor == 0) 5100 return (0); 5101 5102 for (i = 0; i < exp->nes_numsecflavor; i++) { 5103 /* 5104 * The tests for privacy and integrity must be first, 5105 * since ND_GSS is set for everything but AUTH_SYS. 5106 */ 5107 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 5108 (nd->nd_flag & ND_GSSPRIVACY)) 5109 return (0); 5110 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 5111 (nd->nd_flag & ND_GSSINTEGRITY)) 5112 return (0); 5113 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 5114 (nd->nd_flag & ND_GSS)) 5115 return (0); 5116 if (exp->nes_secflavors[i] == AUTH_SYS && 5117 (nd->nd_flag & ND_GSS) == 0) 5118 return (0); 5119 } 5120 if ((nd->nd_flag & ND_NFSV4) != 0) 5121 return (NFSERR_WRONGSEC); 5122 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 5123 } 5124 5125 /* 5126 * Calculate a hash value for the fid in a file handle. 5127 */ 5128 uint32_t 5129 nfsrv_hashfh(fhandle_t *fhp) 5130 { 5131 uint32_t hashval; 5132 5133 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 5134 return (hashval); 5135 } 5136 5137 /* 5138 * Calculate a hash value for the sessionid. 5139 */ 5140 uint32_t 5141 nfsrv_hashsessionid(uint8_t *sessionid) 5142 { 5143 uint32_t hashval; 5144 5145 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 5146 return (hashval); 5147 } 5148 5149 /* 5150 * Signal the userland master nfsd to backup the stable restart file. 5151 */ 5152 void 5153 nfsrv_backupstable(void) 5154 { 5155 struct proc *procp; 5156 5157 if (VNET(nfsd_master_proc) != NULL) { 5158 procp = pfind(nfsd_master_pid); 5159 /* Try to make sure it is the correct process. */ 5160 if (procp == VNET(nfsd_master_proc) && 5161 procp->p_stats->p_start.tv_sec == 5162 nfsd_master_start.tv_sec && 5163 procp->p_stats->p_start.tv_usec == 5164 nfsd_master_start.tv_usec && 5165 strcmp(procp->p_comm, nfsd_master_comm) == 0) 5166 kern_psignal(procp, SIGUSR2); 5167 else 5168 VNET(nfsd_master_proc) = NULL; 5169 5170 if (procp != NULL) 5171 PROC_UNLOCK(procp); 5172 } 5173 } 5174 5175 /* 5176 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 5177 * The arguments are in a structure, so that they can be passed through 5178 * taskqueue for a kernel process to execute this function. 5179 */ 5180 struct nfsrvdscreate { 5181 int done; 5182 int inprog; 5183 struct task tsk; 5184 struct ucred *tcred; 5185 struct vnode *dvp; 5186 NFSPROC_T *p; 5187 struct pnfsdsfile *pf; 5188 int err; 5189 fhandle_t fh; 5190 struct vattr va; 5191 struct vattr createva; 5192 }; 5193 5194 int 5195 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 5196 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 5197 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 5198 { 5199 struct vnode *nvp; 5200 struct nameidata named; 5201 struct vattr va; 5202 char *bufp; 5203 u_long *hashp; 5204 struct nfsnode *np; 5205 struct nfsmount *nmp; 5206 int error; 5207 5208 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 5209 LOCKPARENT | LOCKLEAF | NOCACHE); 5210 nfsvno_setpathbuf(&named, &bufp, &hashp); 5211 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 5212 named.ni_cnd.cn_nameptr = bufp; 5213 if (fnamep != NULL) { 5214 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 5215 named.ni_cnd.cn_namelen = strlen(bufp); 5216 } else 5217 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 5218 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 5219 5220 /* Create the date file in the DS mount. */ 5221 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 5222 if (error == 0) { 5223 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 5224 vref(dvp); 5225 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false); 5226 if (error == 0) { 5227 /* Set the ownership of the file. */ 5228 error = VOP_SETATTR(nvp, nvap, tcred); 5229 NFSD_DEBUG(4, "nfsrv_dscreate:" 5230 " setattr-uid=%d\n", error); 5231 if (error != 0) 5232 vput(nvp); 5233 } 5234 if (error != 0) 5235 printf("pNFS: pnfscreate failed=%d\n", error); 5236 } else 5237 printf("pNFS: pnfscreate vnlock=%d\n", error); 5238 if (error == 0) { 5239 np = VTONFS(nvp); 5240 nmp = VFSTONFS(nvp->v_mount); 5241 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 5242 != 0 || nmp->nm_nam->sa_len > sizeof( 5243 struct sockaddr_in6) || 5244 np->n_fhp->nfh_len != NFSX_MYFH) { 5245 printf("Bad DS file: fstype=%s salen=%d" 5246 " fhlen=%d\n", 5247 nvp->v_mount->mnt_vfc->vfc_name, 5248 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 5249 error = ENOENT; 5250 } 5251 5252 /* Set extattrs for the DS on the MDS file. */ 5253 if (error == 0) { 5254 if (dsa != NULL) { 5255 error = VOP_GETATTR(nvp, &va, tcred); 5256 if (error == 0) { 5257 dsa->dsa_filerev = va.va_filerev; 5258 dsa->dsa_size = va.va_size; 5259 dsa->dsa_atime = va.va_atime; 5260 dsa->dsa_mtime = va.va_mtime; 5261 dsa->dsa_bytes = va.va_bytes; 5262 } 5263 } 5264 if (error == 0) { 5265 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 5266 NFSX_MYFH); 5267 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 5268 nmp->nm_nam->sa_len); 5269 NFSBCOPY(named.ni_cnd.cn_nameptr, 5270 pf->dsf_filename, 5271 sizeof(pf->dsf_filename)); 5272 } 5273 } else 5274 printf("pNFS: pnfscreate can't get DS" 5275 " attr=%d\n", error); 5276 if (nvpp != NULL && error == 0) 5277 *nvpp = nvp; 5278 else 5279 vput(nvp); 5280 } 5281 nfsvno_relpathbuf(&named); 5282 return (error); 5283 } 5284 5285 /* 5286 * Start up the thread that will execute nfsrv_dscreate(). 5287 */ 5288 static void 5289 start_dscreate(void *arg, int pending) 5290 { 5291 struct nfsrvdscreate *dsc; 5292 5293 dsc = (struct nfsrvdscreate *)arg; 5294 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 5295 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 5296 dsc->done = 1; 5297 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 5298 } 5299 5300 /* 5301 * Create a pNFS data file on the Data Server(s). 5302 */ 5303 static void 5304 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 5305 NFSPROC_T *p) 5306 { 5307 struct nfsrvdscreate *dsc, *tdsc = NULL; /* Shut up gcc. */ 5308 struct nfsdevice *ds, *tds, *fds; 5309 struct mount *mp; 5310 struct pnfsdsfile *pf, *tpf; 5311 struct pnfsdsattr dsattr; 5312 struct vattr va; 5313 struct vnode **dvp; 5314 struct nfsmount *nmp; 5315 fhandle_t fh; 5316 uid_t vauid; 5317 gid_t vagid; 5318 u_short vamode; 5319 struct ucred *tcred; 5320 int *dsdir, error, i, j, mirrorcnt, ret, stripecnt; 5321 int failpos, timo; 5322 uint64_t stripesiz; 5323 5324 /* Get a DS server directory in a round-robin order. */ 5325 mirrorcnt = 1; 5326 mp = vp->v_mount; 5327 ds = fds = NULL; 5328 i = j = 0; 5329 dvp = malloc(sizeof(*dvp) * nfsrv_maxpnfsmirror * nfsrv_maxstripecnt, 5330 M_TEMP, M_WAITOK); 5331 dsdir = malloc(sizeof(*dsdir) * nfsrv_maxpnfsmirror * 5332 nfsrv_maxstripecnt, M_TEMP, M_WAITOK); 5333 stripesiz = nfsrv_stripesiz; 5334 NFSDDSLOCK(); 5335 /* 5336 * Search for the first entry that handles this MDS fs, but use the 5337 * first entry for all MDS fs's otherwise. 5338 */ 5339 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 5340 if (tds->nfsdev_nmp != NULL) { 5341 i++; 5342 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 5343 ds = tds; 5344 else if (tds->nfsdev_mdsisset != 0 && fsidcmp( 5345 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { 5346 if (j == 0) { 5347 ds = fds = tds; 5348 if (nfsrv_maxstripecnt > 1) 5349 stripesiz = 5350 tds->nfsdev_mdsstripesiz; 5351 } 5352 if (stripesiz == 0) 5353 break; 5354 j++; 5355 } 5356 } 5357 } 5358 if (ds == NULL) { 5359 NFSDDSUNLOCK(); 5360 free(dvp, M_TEMP); 5361 free(dsdir, M_TEMP); 5362 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 5363 return; 5364 } 5365 5366 /* 5367 * i or j is the count of devices. The stripecnt is that number 5368 * of devices devided by the number of mirrors. 5369 */ 5370 stripecnt = 0; 5371 if (stripesiz > 0) { 5372 if (j > 0) 5373 stripecnt = j / nfsrv_maxpnfsmirror; 5374 else 5375 stripecnt = i / nfsrv_maxpnfsmirror; 5376 if (stripecnt > nfsrv_maxstripecnt) 5377 stripecnt = nfsrv_maxstripecnt; 5378 } 5379 if (stripecnt == 0) 5380 stripecnt = 1; 5381 5382 /* Set the first device as found above. */ 5383 i = dsdir[0] = ds->nfsdev_nextdir; 5384 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 5385 dvp[0] = ds->nfsdev_dsdir[i]; 5386 tds = TAILQ_NEXT(ds, nfsdev_list); 5387 if ((nfsrv_maxpnfsmirror > 1 || stripecnt > 1) && tds != NULL) { 5388 j = 1; /* Stripe number */ 5389 mirrorcnt = 0; 5390 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 5391 if (tds->nfsdev_nmp != NULL && 5392 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 5393 (tds->nfsdev_mdsisset != 0 && fds != NULL && 5394 fsidcmp(&mp->mnt_stat.f_fsid, 5395 &tds->nfsdev_mdsfsid) == 0))) { 5396 dsdir[mirrorcnt * stripecnt + j] = i; 5397 dvp[mirrorcnt * stripecnt + j] = 5398 tds->nfsdev_dsdir[i]; 5399 j++; 5400 if (j >= stripecnt) { 5401 mirrorcnt++; 5402 if (mirrorcnt >= nfsrv_maxpnfsmirror) 5403 break; 5404 j = 0; 5405 } 5406 } 5407 } 5408 if (mirrorcnt == 0) { 5409 mirrorcnt = 1; 5410 stripecnt = j; 5411 } 5412 } else 5413 stripecnt = 1; 5414 /* Put at end of list to implement round-robin usage. */ 5415 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 5416 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 5417 NFSDDSUNLOCK(); 5418 dsc = NULL; 5419 j = mirrorcnt * stripecnt; 5420 if (j > 1) 5421 tdsc = dsc = malloc(sizeof(*dsc) * (j - 1), M_TEMP, 5422 M_WAITOK | M_ZERO); 5423 tpf = pf = malloc(sizeof(*pf) * j, M_TEMP, M_WAITOK | M_ZERO); 5424 5425 error = nfsvno_getfh(vp, &fh, p); 5426 if (error == 0) 5427 error = VOP_GETATTR(vp, &va, cred); 5428 if (error == 0) { 5429 /* Set the attributes for "vp" to Setattr the DS vp. */ 5430 vauid = va.va_uid; 5431 vagid = va.va_gid; 5432 vamode = va.va_mode; 5433 VATTR_NULL(&va); 5434 va.va_uid = vauid; 5435 va.va_gid = vagid; 5436 va.va_mode = vamode; 5437 va.va_size = 0; 5438 } else 5439 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 5440 5441 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 5442 cred->cr_gid); 5443 /* Make data file name based on FH. */ 5444 tcred = newnfs_getcred(); 5445 5446 /* 5447 * Create the file on each DS mirror, using kernel process(es) for the 5448 * additional mirrors. 5449 */ 5450 failpos = -1; 5451 for (i = 0; i < j - 1 && error == 0; i++, tpf++, tdsc++) { 5452 tpf->dsf_stripecnt = stripecnt; 5453 tpf->dsf_stripesiz = stripesiz; 5454 tpf->dsf_dir = dsdir[i]; 5455 tdsc->tcred = tcred; 5456 tdsc->p = p; 5457 tdsc->pf = tpf; 5458 tdsc->createva = *vap; 5459 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 5460 tdsc->va = va; 5461 tdsc->dvp = dvp[i]; 5462 tdsc->done = 0; 5463 tdsc->inprog = 0; 5464 tdsc->err = 0; 5465 ret = EIO; 5466 if (nfs_pnfsiothreads != 0) { 5467 ret = nfs_pnfsio(start_dscreate, tdsc); 5468 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 5469 } 5470 if (ret != 0) { 5471 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 5472 NULL, tcred, p, NULL); 5473 if (ret != 0) { 5474 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 5475 error)); 5476 if (failpos == -1 && nfsds_failerr(ret)) 5477 failpos = i; 5478 else 5479 error = ret; 5480 } 5481 } 5482 } 5483 if (error == 0) { 5484 tpf->dsf_stripecnt = stripecnt; 5485 tpf->dsf_stripesiz = stripesiz; 5486 tpf->dsf_dir = dsdir[j - 1]; 5487 error = nfsrv_dscreate(dvp[j - 1], vap, &va, &fh, tpf, 5488 &dsattr, NULL, tcred, p, NULL); 5489 if (failpos == -1 && j > 1 && nfsds_failerr(error)) { 5490 failpos = j - 1; 5491 error = 0; 5492 } 5493 } 5494 timo = hz / 50; /* Wait for 20msec. */ 5495 if (timo < 1) 5496 timo = 1; 5497 /* Wait for kernel task(s) to complete. */ 5498 for (tdsc = dsc, i = 0; i < j - 1; i++, tdsc++) { 5499 while (tdsc->inprog != 0 && tdsc->done == 0) 5500 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 5501 if (tdsc->err != 0) { 5502 if (failpos == -1 && nfsds_failerr(tdsc->err)) 5503 failpos = i; 5504 else if (error == 0) 5505 error = tdsc->err; 5506 } 5507 } 5508 5509 /* 5510 * If failpos has been set, that DS has failed, so it needs 5511 * to be disabled. 5512 */ 5513 if (failpos >= 0) { 5514 nmp = VFSTONFS(dvp[failpos]->v_mount); 5515 NFSLOCKMNT(nmp); 5516 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 5517 NFSMNTP_CANCELRPCS)) == 0) { 5518 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 5519 NFSUNLOCKMNT(nmp); 5520 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 5521 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 5522 ds); 5523 if (ds != NULL) 5524 nfsrv_killrpcs(nmp); 5525 NFSLOCKMNT(nmp); 5526 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 5527 wakeup(nmp); 5528 } 5529 NFSUNLOCKMNT(nmp); 5530 } 5531 5532 NFSFREECRED(tcred); 5533 if (error == 0) { 5534 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 5535 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d stripecnt=%d\n", 5536 mirrorcnt, stripecnt); 5537 5538 error = vn_extattr_set(vp, IO_NODELOCKED, 5539 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 5540 sizeof(*pf) * mirrorcnt * stripecnt, (char *)pf, p); 5541 if (error == 0) 5542 error = vn_extattr_set(vp, IO_NODELOCKED, 5543 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 5544 sizeof(dsattr), (char *)&dsattr, p); 5545 if (error != 0) 5546 printf("pNFS: pnfscreate setextattr=%d\n", 5547 error); 5548 } else 5549 printf("pNFS: pnfscreate=%d\n", error); 5550 free(dvp, M_TEMP); 5551 free(dsdir, M_TEMP); 5552 free(pf, M_TEMP); 5553 free(dsc, M_TEMP); 5554 } 5555 5556 /* 5557 * Get the information needed to remove the pNFS Data Server file from the 5558 * Metadata file. Upon success, *dvppp is set to an array of locked 5559 * DS directory vnode(s). The caller must unlock this array of *dvp when done 5560 * with it. 5561 */ 5562 static void 5563 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode ***dvppp, 5564 int *dsfilecntp, char *fname, fhandle_t *fhp) 5565 { 5566 struct vattr va; 5567 struct ucred *tcred; 5568 char *buf; 5569 int buflen, error; 5570 5571 *dvppp = NULL; 5572 /* If not an exported regular file or not a pNFS server, just return. */ 5573 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 5574 nfsrv_devidcnt == 0) 5575 return; 5576 5577 /* Check to see if this is the last hard link. */ 5578 tcred = newnfs_getcred(); 5579 error = VOP_GETATTR(vp, &va, tcred); 5580 NFSFREECRED(tcred); 5581 if (error != 0) { 5582 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 5583 return; 5584 } 5585 if (va.va_nlink > 1) 5586 return; 5587 5588 error = nfsvno_getfh(vp, fhp, p); 5589 if (error != 0) { 5590 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 5591 return; 5592 } 5593 5594 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS * 5595 NFSDEV_MAXSTRIPE; 5596 buf = malloc(buflen, M_TEMP, M_WAITOK); 5597 /* Get the directory vnode for the DS mount and the file handle. */ 5598 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, dsfilecntp, NULL, NULL, 5599 p, dvppp, NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 5600 free(buf, M_TEMP); 5601 if (error != 0) 5602 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 5603 } 5604 5605 /* 5606 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 5607 * The arguments are in a structure, so that they can be passed through 5608 * taskqueue for a kernel process to execute this function. 5609 */ 5610 struct nfsrvdsremove { 5611 int done; 5612 int inprog; 5613 struct task tsk; 5614 struct ucred *tcred; 5615 struct vnode *dvp; 5616 NFSPROC_T *p; 5617 int err; 5618 char fname[PNFS_FILENAME_LEN + 1]; 5619 }; 5620 5621 static int 5622 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 5623 NFSPROC_T *p) 5624 { 5625 struct nameidata named; 5626 struct vnode *nvp; 5627 char *bufp; 5628 u_long *hashp; 5629 int error; 5630 5631 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 5632 if (error != 0) 5633 return (error); 5634 named.ni_cnd.cn_nameiop = DELETE; 5635 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 5636 named.ni_cnd.cn_cred = tcred; 5637 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 5638 nfsvno_setpathbuf(&named, &bufp, &hashp); 5639 named.ni_cnd.cn_nameptr = bufp; 5640 named.ni_cnd.cn_namelen = strlen(fname); 5641 strlcpy(bufp, fname, NAME_MAX); 5642 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 5643 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 5644 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 5645 if (error == 0) { 5646 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 5647 vput(nvp); 5648 } 5649 NFSVOPUNLOCK(dvp); 5650 nfsvno_relpathbuf(&named); 5651 if (error != 0) 5652 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 5653 return (error); 5654 } 5655 5656 /* 5657 * Start up the thread that will execute nfsrv_dsremove(). 5658 */ 5659 static void 5660 start_dsremove(void *arg, int pending) 5661 { 5662 struct nfsrvdsremove *dsrm; 5663 5664 dsrm = (struct nfsrvdsremove *)arg; 5665 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 5666 dsrm->p); 5667 dsrm->done = 1; 5668 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 5669 } 5670 5671 /* 5672 * Remove a pNFS data file from a Data Server. 5673 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 5674 * removed to set up the dvp and fill in the FH. 5675 */ 5676 static void 5677 nfsrv_pnfsremove(struct vnode **dvpp, int dsfilecnt, char *fname, 5678 fhandle_t *fhp, NFSPROC_T *p) 5679 { 5680 struct ucred *tcred; 5681 struct nfsrvdsremove *dsrm, *tdsrm = NULL; /* Shut up gcc. */ 5682 struct nfsdevice *ds; 5683 struct nfsmount *nmp; 5684 struct vnode **tdvpp; 5685 int failpos, i, ret, timo; 5686 5687 tcred = newnfs_getcred(); 5688 dsrm = NULL; 5689 if (dsfilecnt > 1) 5690 tdsrm = dsrm = malloc(sizeof(*dsrm) * dsfilecnt - 1, M_TEMP, 5691 M_WAITOK); 5692 /* 5693 * Remove the file on each DS mirror, using kernel process(es) for the 5694 * additional mirrors. 5695 */ 5696 failpos = -1; 5697 tdvpp = dvpp; 5698 for (i = 0; i < dsfilecnt - 1; i++, tdsrm++, tdvpp++) { 5699 tdsrm->tcred = tcred; 5700 tdsrm->p = p; 5701 tdsrm->dvp = *tdvpp; 5702 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 5703 tdsrm->inprog = 0; 5704 tdsrm->done = 0; 5705 tdsrm->err = 0; 5706 ret = EIO; 5707 if (nfs_pnfsiothreads != 0) { 5708 ret = nfs_pnfsio(start_dsremove, tdsrm); 5709 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 5710 } 5711 if (ret != 0) { 5712 ret = nfsrv_dsremove(tdsrm->dvp, fname, tcred, p); 5713 if (failpos == -1 && nfsds_failerr(ret)) 5714 failpos = i; 5715 } 5716 } 5717 ret = nfsrv_dsremove(*tdvpp, fname, tcred, p); 5718 if (failpos == -1 && dsfilecnt > 1 && nfsds_failerr(ret)) 5719 failpos = dsfilecnt - 1; 5720 timo = hz / 50; /* Wait for 20msec. */ 5721 if (timo < 1) 5722 timo = 1; 5723 /* Wait for kernel task(s) to complete. */ 5724 for (tdsrm = dsrm, i = 0; i < dsfilecnt - 1; i++, tdsrm++) { 5725 while (tdsrm->inprog != 0 && tdsrm->done == 0) 5726 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 5727 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 5728 failpos = i; 5729 } 5730 5731 /* 5732 * If failpos has been set, that mirror has failed, so it needs 5733 * to be disabled. 5734 */ 5735 if (failpos >= 0) { 5736 tdvpp = dvpp + failpos; 5737 nmp = VFSTONFS((*tdvpp)->v_mount); 5738 NFSLOCKMNT(nmp); 5739 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 5740 NFSMNTP_CANCELRPCS)) == 0) { 5741 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 5742 NFSUNLOCKMNT(nmp); 5743 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 5744 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 5745 ds); 5746 if (ds != NULL) 5747 nfsrv_killrpcs(nmp); 5748 NFSLOCKMNT(nmp); 5749 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 5750 wakeup(nmp); 5751 } 5752 NFSUNLOCKMNT(nmp); 5753 } 5754 5755 /* Get rid all layouts for the file. */ 5756 nfsrv_freefilelayouts(fhp); 5757 5758 NFSFREECRED(tcred); 5759 free(dsrm, M_TEMP); 5760 } 5761 5762 /* 5763 * Generate a file name based on the file handle and put it in *bufp. 5764 * Return the number of bytes generated. 5765 */ 5766 static int 5767 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 5768 { 5769 int i; 5770 uint8_t *cp; 5771 const uint8_t *hexdigits = "0123456789abcdef"; 5772 5773 cp = (uint8_t *)fhp; 5774 for (i = 0; i < sizeof(*fhp); i++) { 5775 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 5776 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 5777 } 5778 bufp[2 * i] = '\0'; 5779 return (2 * i); 5780 } 5781 5782 /* 5783 * Update the Metadata file's attributes from the DS file when a Read/Write 5784 * layout is returned. 5785 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 5786 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 5787 */ 5788 int 5789 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5790 { 5791 struct ucred *tcred; 5792 int error; 5793 5794 /* Do this as root so that it won't fail with EACCES. */ 5795 tcred = newnfs_getcred(); 5796 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 5797 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); 5798 NFSFREECRED(tcred); 5799 return (error); 5800 } 5801 5802 /* 5803 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 5804 */ 5805 static int 5806 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 5807 NFSPROC_T *p) 5808 { 5809 int error; 5810 5811 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, 5812 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); 5813 return (error); 5814 } 5815 5816 static int 5817 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 5818 struct thread *p, int ioproc, struct mbuf **mpp, char *cp, 5819 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, 5820 off_t *offp, int content, bool *eofp) 5821 { 5822 struct nfsmount **nmp, *failnmp; 5823 fhandle_t *fhp; 5824 struct vnode **dvp; 5825 struct nfsdevice *ds; 5826 struct pnfsdsattr dsattr; 5827 struct opnfsdsattr odsattr; 5828 char *buf; 5829 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 5830 int stripecnt; 5831 uint64_t stripesiz; 5832 5833 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 5834 /* 5835 * If not a regular file, not exported or not a pNFS server, 5836 * just return ENOENT. 5837 */ 5838 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 5839 nfsrv_devidcnt == 0) 5840 return (ENOENT); 5841 5842 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS * 5843 NFSDEV_MAXSTRIPE; 5844 buf = malloc(buflen, M_TEMP, M_WAITOK); 5845 error = 0; 5846 5847 /* 5848 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 5849 * from the MetaData file's extended attribute. 5850 */ 5851 if (ioproc == NFSPROC_GETATTR) { 5852 error = vn_extattr_get(vp, IO_NODELOCKED, 5853 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 5854 p); 5855 if (error == 0) { 5856 if (buflen == sizeof(odsattr)) { 5857 NFSBCOPY(buf, &odsattr, buflen); 5858 nap->na_filerev = odsattr.dsa_filerev; 5859 nap->na_size = odsattr.dsa_size; 5860 nap->na_atime = odsattr.dsa_atime; 5861 nap->na_mtime = odsattr.dsa_mtime; 5862 /* 5863 * Fake na_bytes by rounding up na_size. 5864 * Since we don't know the block size, just 5865 * use BLKDEV_IOSIZE. 5866 */ 5867 nap->na_bytes = (odsattr.dsa_size + 5868 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); 5869 } else if (buflen == sizeof(dsattr)) { 5870 NFSBCOPY(buf, &dsattr, buflen); 5871 nap->na_filerev = dsattr.dsa_filerev; 5872 nap->na_size = dsattr.dsa_size; 5873 nap->na_atime = dsattr.dsa_atime; 5874 nap->na_mtime = dsattr.dsa_mtime; 5875 nap->na_bytes = dsattr.dsa_bytes; 5876 } else 5877 error = ENXIO; 5878 } 5879 if (error == 0) { 5880 /* 5881 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 5882 * returns 0, just return now. nfsrv_checkdsattr() 5883 * returns 0 if there is no Read/Write layout 5884 * plus either an Open/Write_access or Write 5885 * delegation issued to a client for the file. 5886 */ 5887 if (nfsrv_pnfsgetdsattr == 0 || 5888 nfsrv_checkdsattr(vp, p) == 0) { 5889 free(buf, M_TEMP); 5890 return (error); 5891 } 5892 } 5893 5894 /* 5895 * Clear ENOATTR so the code below will attempt to do a 5896 * nfsrv_getattrdsrpc() to get the attributes and (re)create 5897 * the extended attribute. 5898 */ 5899 if (error == ENOATTR) 5900 error = 0; 5901 } 5902 5903 origmircnt = -1; 5904 trycnt = 0; 5905 tryagain: 5906 nmp = NULL; 5907 dvp = NULL; 5908 fhp = NULL; 5909 if (error == 0) { 5910 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS * 5911 NFSDEV_MAXSTRIPE; 5912 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == 5913 LK_EXCLUSIVE) 5914 printf("nfsrv_proxyds: Readds vp exclusively locked\n"); 5915 if (ioproc == NFSPROC_WRITEDS && NFSVOPISLOCKED(vp) == 5916 LK_SHARED) 5917 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 5918 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 5919 &mirrorcnt, &stripecnt, &stripesiz, p, &dvp, &fhp, NULL, 5920 NULL, NULL, NULL, NULL, NULL, NULL); 5921 if (error == 0) { 5922 nmp = malloc(sizeof(*nmp) * mirrorcnt * stripecnt, 5923 M_TEMP, M_WAITOK); 5924 for (i = 0; i < mirrorcnt * stripecnt; i++) 5925 if (dvp[i] != NULL) 5926 nmp[i] = VFSTONFS(dvp[i]->v_mount); 5927 else 5928 nmp[i] = NULL; 5929 } else 5930 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 5931 } else 5932 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 5933 if (error == 0) { 5934 if (origmircnt == -1) 5935 origmircnt = mirrorcnt; 5936 /* 5937 * If failpos is set to a mirror#, then that mirror has 5938 * failed and will be disabled. For Read, Getattr and Seek, the 5939 * function only tries one mirror, so if that mirror has 5940 * failed, it will need to be retried. As such, increment 5941 * tryitagain for these cases. 5942 * For Write, Setattr and Setacl, the function tries all 5943 * mirrors and will not return an error for the case where 5944 * one mirror has failed. For these cases, the functioning 5945 * mirror(s) will have been modified, so a retry isn't 5946 * necessary. These functions will set failpos for the 5947 * failed mirror#. 5948 */ 5949 if (ioproc == NFSPROC_READDS) { 5950 error = nfsrv_readdsrpc(fhp, off, cnt, cred, p, nmp, 5951 mirrorcnt, stripecnt, stripesiz, mpp, mpp2, 5952 &failpos); 5953 if (failpos >= 0 && mirrorcnt > 1) { 5954 /* 5955 * Setting failpos will cause the mirror 5956 * to be disabled and then a retry of this 5957 * read is required. 5958 */ 5959 error = 0; 5960 trycnt++; 5961 } 5962 } else if (ioproc == NFSPROC_WRITEDS) 5963 error = nfsrv_writedsrpc(fhp, off, cnt, cred, p, vp, 5964 nmp, mirrorcnt, stripecnt, stripesiz, mpp, cp, 5965 &failpos); 5966 else if (ioproc == NFSPROC_SETATTR) 5967 error = nfsrv_setattrdsrpc(fhp, vp, cred, p, nmp, 5968 mirrorcnt, stripecnt, nap, &failpos); 5969 #ifdef notnow 5970 else if (ioproc == NFSPROC_SETACL) 5971 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 5972 mirrorcnt, aclp, &failpos); 5973 else if (ioproc == NFSPROC_SEEKDS) { 5974 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, 5975 p, nmp[0]); 5976 if (nfsds_failerr(error) && mirrorcnt > 1) { 5977 /* 5978 * Setting failpos will cause the mirror 5979 * to be disabled and then a retry of this 5980 * read is required. 5981 */ 5982 failpos = 0; 5983 error = 0; 5984 trycnt++; 5985 } 5986 } else if (ioproc == NFSPROC_ALLOCATE) 5987 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, 5988 &nmp[0], mirrorcnt, &failpos); 5989 else if (ioproc == NFSPROC_DEALLOCATE) 5990 error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p, 5991 vp, &nmp[0], mirrorcnt, &failpos); 5992 #endif 5993 else { 5994 error = nfsrv_getattrdsrpc(fhp, vp, cred, p, 5995 nmp, stripecnt, nap, &failpos); 5996 if (failpos >= 0 && mirrorcnt > 1) { 5997 /* 5998 * Setting failpos will cause the mirror 5999 * to be disabled and then a retry of this 6000 * getattr is required. 6001 */ 6002 error = 0; 6003 trycnt++; 6004 } 6005 } 6006 ds = NULL; 6007 if (failpos >= 0) { 6008 failnmp = nmp[failpos]; 6009 NFSLOCKMNT(failnmp); 6010 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 6011 NFSMNTP_CANCELRPCS)) == 0) { 6012 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 6013 NFSUNLOCKMNT(failnmp); 6014 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 6015 failnmp, p); 6016 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 6017 failpos, ds); 6018 if (ds != NULL) 6019 nfsrv_killrpcs(failnmp); 6020 NFSLOCKMNT(failnmp); 6021 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 6022 wakeup(failnmp); 6023 } 6024 NFSUNLOCKMNT(failnmp); 6025 } 6026 for (i = 0; i < mirrorcnt * stripecnt; i++) 6027 if (dvp[i] != NULL) 6028 NFSVOPUNLOCK(dvp[i]); 6029 free(dvp, M_TEMP); 6030 free(nmp, M_TEMP); 6031 free(fhp, M_TEMP); 6032 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 6033 trycnt); 6034 /* Try the Read/Getattr again if a mirror was deleted. */ 6035 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 6036 goto tryagain; 6037 } else { 6038 /* Return ENOENT for any Extended Attribute error. */ 6039 error = ENOENT; 6040 } 6041 free(buf, M_TEMP); 6042 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 6043 return (error); 6044 } 6045 6046 /* 6047 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 6048 * attribute. 6049 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 6050 * to be checked. If it points to a NULL nmp, then it returns 6051 * a suitable destination. 6052 * curnmp - If non-NULL, it is the source mount for the copy. 6053 */ 6054 int 6055 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 6056 int *mirrorcntp, int *stripecntp, uint64_t *stripesizp, NFSPROC_T *p, 6057 struct vnode ***dvppp, fhandle_t **fhpp, 6058 char **devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 6059 struct nfsmount *curnmp, int *ippos, int *dsdirp) 6060 { 6061 struct vnode *dvp, *nvp = NULL, **tdvpp = NULL; /* Shut up gcc. */ 6062 struct mount *mp; 6063 struct nfsmount *nmp, *newnmp; 6064 fhandle_t *tfhp = NULL; /* Shut up gcc. */ 6065 struct sockaddr *sad; 6066 struct sockaddr_in *sin; 6067 struct nfsdevice *ds, *tds, *fndds; 6068 struct pnfsdsfile *pf; 6069 struct opnfsdsfile *opf; 6070 uint32_t dsdir; 6071 int error, fhiszero, fnd, gotmirror, gotone, i, j, k, l, m, mirrorcnt; 6072 char *tdevid; 6073 bool dvplocked; 6074 6075 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 6076 dvplocked = false; 6077 if (mirrorcntp != NULL) 6078 *mirrorcntp = 1; 6079 if (stripecntp != NULL) 6080 *stripecntp = 1; 6081 if (stripesizp != NULL) 6082 *stripesizp = 0; 6083 if (nvpp != NULL) 6084 *nvpp = NULL; 6085 if (dvppp != NULL) 6086 *dvppp = NULL; 6087 if (fhpp != NULL) 6088 *fhpp = NULL; 6089 if (devid != NULL) 6090 *devid = NULL; 6091 tdevid = NULL; 6092 if (ippos != NULL) 6093 *ippos = -1; 6094 if (newnmpp != NULL) 6095 newnmp = *newnmpp; 6096 else 6097 newnmp = NULL; 6098 if (fnamep != NULL) 6099 fnamep[0] = '\0'; 6100 mp = vp->v_mount; 6101 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 6102 "pnfsd.dsfile", buflenp, buf, p); 6103 if (error == 0 && *buflenp > 0) { 6104 j = *buflenp / sizeof(*pf); 6105 if (*buflenp != sizeof(*pf) * j) { 6106 /* Try opnfsdsfile. */ 6107 j = *buflenp / sizeof(*opf); 6108 if (j >= 1 && *buflenp == sizeof(*opf) * j) { 6109 char *tbuf; 6110 6111 tbuf = malloc(*buflenp, M_TEMP, M_WAITOK); 6112 memcpy(tbuf, buf, *buflenp); 6113 pf = (struct pnfsdsfile *)buf; 6114 opf = (struct opnfsdsfile *)tbuf; 6115 for (k = 0; k < j; k++, pf++, opf++) { 6116 memcpy(&pf->dsf_fh, opf, sizeof(*opf)); 6117 pf->dsf_stripecnt = 1; 6118 pf->dsf_stripesiz = 0; 6119 } 6120 free(tbuf, M_TEMP); 6121 } else 6122 error = ENOATTR; 6123 } else if (j < 1) 6124 error = ENOATTR; 6125 } else if (error == 0) 6126 error = ENOATTR; 6127 if (error != 0) 6128 return (error); 6129 6130 pf = (struct pnfsdsfile *)buf; 6131 if (pf->dsf_stripesiz > 0) { 6132 mirrorcnt = j / pf->dsf_stripecnt; 6133 k = pf->dsf_stripecnt; 6134 } else { 6135 mirrorcnt = j; 6136 k = 1; 6137 } 6138 if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 6139 k < 1 || k > NFSDEV_MAXSTRIPE || j != mirrorcnt * k) 6140 return (ENOATTR); 6141 if (stripecntp != NULL) 6142 *stripecntp = k; 6143 if (stripesizp != NULL) 6144 *stripesizp = pf->dsf_stripesiz; 6145 6146 /* Allocate a large enough array for dvppp, if required. */ 6147 if (dvppp != NULL) 6148 tdvpp = *dvppp = malloc(sizeof(*tdvpp) * mirrorcnt * k, M_TEMP, 6149 M_WAITOK | M_ZERO); 6150 if (fhpp != NULL) 6151 tfhp = *fhpp = malloc(sizeof(*tfhp) * mirrorcnt * k, M_TEMP, 6152 M_WAITOK); 6153 if (devid != NULL) 6154 tdevid = *devid = malloc(NFSX_V4DEVICEID * mirrorcnt * k, 6155 M_TEMP, M_WAITOK); 6156 6157 /* If curnmp != NULL, check for a match in the mirror list. */ 6158 if (curnmp != NULL) { 6159 fnd = 0; 6160 for (i = 0; i < j; i += k, pf += k) { 6161 sad = (struct sockaddr *)&pf->dsf_sin; 6162 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 6163 if (ippos != NULL) 6164 *ippos = i; 6165 fnd = 1; 6166 break; 6167 } 6168 } 6169 if (fnd == 0) 6170 error = ENXIO; 6171 } 6172 6173 gotmirror = gotone = 0; 6174 l = 0; /* Index for tdvpp and tfhp. */ 6175 pf = (struct pnfsdsfile *)buf; 6176 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d stripecnt=%d err=%d\n", 6177 mirrorcnt, k, error); 6178 for (i = 0; i < j && error == 0; i++, pf++) { 6179 fhiszero = 0; 6180 sad = (struct sockaddr *)&pf->dsf_sin; 6181 sin = &pf->dsf_sin; 6182 dsdir = pf->dsf_dir; 6183 if (dsdir >= nfsrv_dsdirsize) { 6184 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 6185 error = ENOATTR; 6186 } else if (nvpp != NULL && newnmp != NULL && 6187 nfsaddr2_match(sad, newnmp->nm_nam)) 6188 error = EEXIST; 6189 if (error == 0) { 6190 if (ippos != NULL && curnmp == NULL && 6191 sad->sa_family == AF_INET && 6192 sin->sin_addr.s_addr == 0) 6193 *ippos = i; 6194 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 6195 fhiszero = 1; 6196 /* Use the socket address to find the mount point. */ 6197 fndds = NULL; 6198 NFSDDSLOCK(); 6199 /* Find a match for the IP address. */ 6200 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6201 if (ds->nfsdev_nmp != NULL) { 6202 dvp = ds->nfsdev_dvp; 6203 nmp = VFSTONFS(dvp->v_mount); 6204 if (nmp != ds->nfsdev_nmp) 6205 printf("different2 nmp %p %p\n", 6206 nmp, ds->nfsdev_nmp); 6207 if (nfsaddr2_match(sad, nmp->nm_nam)) { 6208 fndds = ds; 6209 break; 6210 } 6211 } 6212 } 6213 if (fndds != NULL && newnmpp != NULL && 6214 newnmp == NULL) { 6215 /* Search for a place to make a mirror copy. */ 6216 TAILQ_FOREACH(tds, &nfsrv_devidhead, 6217 nfsdev_list) { 6218 if (tds->nfsdev_nmp != NULL && 6219 fndds != tds && 6220 ((tds->nfsdev_mdsisset == 0 && 6221 fndds->nfsdev_mdsisset == 0) || 6222 (tds->nfsdev_mdsisset != 0 && 6223 fndds->nfsdev_mdsisset != 0 && 6224 fsidcmp(&tds->nfsdev_mdsfsid, 6225 &mp->mnt_stat.f_fsid) == 0))) { 6226 *newnmpp = tds->nfsdev_nmp; 6227 break; 6228 } 6229 } 6230 if (tds != NULL) { 6231 /* 6232 * Move this entry to the end of the 6233 * list, so it won't be selected as 6234 * easily the next time. 6235 */ 6236 TAILQ_REMOVE(&nfsrv_devidhead, tds, 6237 nfsdev_list); 6238 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 6239 nfsdev_list); 6240 } 6241 } 6242 NFSDDSUNLOCK(); 6243 if (fndds != NULL) { 6244 dvp = fndds->nfsdev_dsdir[dsdir]; 6245 if (lktype != 0 || fhiszero != 0 || 6246 (nvpp != NULL && *nvpp == NULL)) { 6247 dvplocked = true; 6248 if (fhiszero != 0) 6249 error = vn_lock(dvp, 6250 LK_EXCLUSIVE); 6251 else if (lktype != 0) 6252 error = vn_lock(dvp, lktype); 6253 else 6254 error = vn_lock(dvp, LK_SHARED); 6255 /* 6256 * If the file handle is all 0's, try to 6257 * do a Lookup against the DS to acquire 6258 * it. 6259 * If dvpp == NULL or the Lookup fails, 6260 * unlock dvp after the call. 6261 */ 6262 if (error == 0 && (fhiszero != 0 || 6263 (nvpp != NULL && *nvpp == NULL))) { 6264 error = nfsrv_pnfslookupds(vp, 6265 dvp, pf, &nvp, p); 6266 if (error == 0) { 6267 if (fhiszero != 0) 6268 nfsrv_pnfssetfh( 6269 vp, pf, 6270 devid, 6271 fnamep, 6272 nvp, p); 6273 if (nvpp != NULL && 6274 *nvpp == NULL) { 6275 *nvpp = nvp; 6276 *dsdirp = dsdir; 6277 } else 6278 vput(nvp); 6279 } 6280 if (error != 0 || lktype == 0) 6281 NFSVOPUNLOCK(dvp); 6282 } 6283 } 6284 if (error == 0) { 6285 gotone++; 6286 NFSD_DEBUG(4, "gotone=%d\n", gotone); 6287 if (tdevid != NULL) { 6288 NFSBCOPY(fndds->nfsdev_deviceid, 6289 tdevid, NFSX_V4DEVICEID); 6290 tdevid += NFSX_V4DEVICEID; 6291 } 6292 if (dvppp != NULL) 6293 tdvpp[l] = dvp; 6294 if (fhpp != NULL) 6295 NFSBCOPY(&pf->dsf_fh, &tfhp[l], 6296 NFSX_MYFH); 6297 if (dvppp != NULL || fhpp != NULL) { 6298 l++; 6299 if (l % k == 0) 6300 gotmirror++; 6301 } 6302 if (fnamep != NULL && gotone == 1) 6303 strlcpy(fnamep, 6304 pf->dsf_filename, 6305 sizeof(pf->dsf_filename)); 6306 } else 6307 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 6308 "err=%d\n", error); 6309 } else if (fnamep == NULL) { 6310 /* 6311 * fnamep is NULL for ReadDS, WriteDS, 6312 * SetattrDS and GetattrDS. For these cases, 6313 * do not use a partial stripe set as a 6314 * mirror. 6315 */ 6316 for (m = l / k * k; m < l; m++) { 6317 if (dvplocked) 6318 NFSVOPUNLOCK(tdvpp[m]); 6319 tdvpp[m] = NULL; 6320 } 6321 l = l / k * k; 6322 } 6323 } 6324 } 6325 if (fnamep != NULL) { 6326 /* 6327 * If fnamep != NULL, a list of all DSs is wanted. 6328 * For this, cheat and return the total cound of DSs in 6329 * mirrorcnt. 6330 */ 6331 gotmirror = gotone; 6332 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: fname=%s, gotmirror=%d\n", 6333 fnamep, gotmirror); 6334 } 6335 if (error == 0 && gotmirror == 0) 6336 error = ENOENT; 6337 6338 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 6339 error); 6340 if (error == 0) { 6341 if (mirrorcntp != NULL) 6342 *mirrorcntp = gotmirror; 6343 } else { 6344 if (dvppp != NULL) { 6345 if (l > 0 && dvplocked) { 6346 /* 6347 * If the error didn't occur on the first one 6348 * and dvppp != NULL, the one(s) prior to the 6349 * failure will have locked dvp's that need to 6350 * be unlocked. 6351 */ 6352 tdvpp = *dvppp; 6353 for (i = 0; i < l; i++) 6354 NFSVOPUNLOCK(*tdvpp++); 6355 } 6356 free(*dvppp, M_TEMP); 6357 *dvppp = NULL; 6358 } 6359 if (fhpp != NULL) { 6360 free(*fhpp, M_TEMP); 6361 *fhpp = NULL; 6362 } 6363 if (devid != NULL) { 6364 free(*devid, M_TEMP); 6365 *devid = NULL; 6366 } 6367 /* 6368 * If it found the vnode to be copied from before a failure, 6369 * it needs to be vput()'d. 6370 */ 6371 if (nvpp != NULL && *nvpp != NULL) { 6372 vput(*nvpp); 6373 *nvpp = NULL; 6374 } 6375 } 6376 return (error); 6377 } 6378 6379 /* 6380 * Set the extended attribute for the Change attribute. 6381 */ 6382 static int 6383 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 6384 { 6385 struct pnfsdsattr dsattr; 6386 int error; 6387 6388 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 6389 dsattr.dsa_filerev = nap->na_filerev; 6390 dsattr.dsa_size = nap->na_size; 6391 dsattr.dsa_atime = nap->na_atime; 6392 dsattr.dsa_mtime = nap->na_mtime; 6393 dsattr.dsa_bytes = nap->na_bytes; 6394 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 6395 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); 6396 if (error != 0) 6397 printf("pNFS: setextattr=%d\n", error); 6398 return (error); 6399 } 6400 6401 /* 6402 * Do a read RPC on a DS data file, using this structure for the arguments, 6403 * so that this function can be executed by a separate kernel process. 6404 */ 6405 struct nfsrvreaddsdorpc { 6406 int done; 6407 int inprog; 6408 struct task tsk; 6409 fhandle_t fh; 6410 off_t off; 6411 int len; 6412 struct nfsmount *nmp; 6413 struct ucred *cred; 6414 NFSPROC_T *p; 6415 struct mbuf *m; 6416 struct mbuf *mend; 6417 int err; 6418 }; 6419 6420 static int 6421 nfsrv_readdsdorpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 6422 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 6423 { 6424 uint32_t *tl; 6425 struct nfsrv_descript *nd; 6426 nfsv4stateid_t st; 6427 struct mbuf *m, *m2; 6428 int error = 0, retlen, tlen, trimlen; 6429 6430 NFSD_DEBUG(4, "in nfsrv_readdsdorpc\n"); 6431 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6432 *mpp = NULL; 6433 /* 6434 * Use a stateid where other is an alternating 01010 pattern and 6435 * seqid is 0xffffffff. This value is not defined as special by 6436 * the RFC and is used by the FreeBSD NFS server to indicate an 6437 * MDS->DS proxy operation. 6438 */ 6439 st.other[0] = 0x55555555; 6440 st.other[1] = 0x55555555; 6441 st.other[2] = 0x55555555; 6442 st.seqid = 0xffffffff; 6443 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 6444 NULL, NULL, 0, 0, cred); 6445 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6446 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 6447 txdr_hyper(off, tl); 6448 *(tl + 2) = txdr_unsigned(len); 6449 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6450 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6451 if (error != 0) { 6452 free(nd, M_TEMP); 6453 return (error); 6454 } 6455 if (nd->nd_repstat == 0) { 6456 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 6457 NFSM_STRSIZ(retlen, len); 6458 if (retlen > 0) { 6459 /* Trim off the pre-data XDR from the mbuf chain. */ 6460 m = nd->nd_mrep; 6461 while (m != NULL && m != nd->nd_md) { 6462 if (m->m_next == nd->nd_md) { 6463 m->m_next = NULL; 6464 m_freem(nd->nd_mrep); 6465 nd->nd_mrep = m = nd->nd_md; 6466 } else 6467 m = m->m_next; 6468 } 6469 if (m == NULL) { 6470 printf("nfsrv_readdsdorpc: busted mbuf list\n"); 6471 error = ENOENT; 6472 goto nfsmout; 6473 } 6474 6475 /* 6476 * Now, adjust first mbuf so that any XDR before the 6477 * read data is skipped over. 6478 */ 6479 trimlen = nd->nd_dpos - mtod(m, char *); 6480 if (trimlen > 0) { 6481 m->m_len -= trimlen; 6482 NFSM_DATAP(m, trimlen); 6483 } 6484 6485 /* 6486 * Truncate the mbuf chain at retlen bytes of data, 6487 * plus XDR padding that brings the length up to a 6488 * multiple of 4. 6489 */ 6490 tlen = NFSM_RNDUP(retlen); 6491 do { 6492 if (m->m_len >= tlen) { 6493 m->m_len = tlen; 6494 tlen = 0; 6495 m2 = m->m_next; 6496 m->m_next = NULL; 6497 m_freem(m2); 6498 break; 6499 } 6500 tlen -= m->m_len; 6501 m = m->m_next; 6502 } while (m != NULL); 6503 if (tlen > 0) { 6504 printf("nfsrv_readdsdorpc: busted mbuf list\n"); 6505 error = ENOENT; 6506 goto nfsmout; 6507 } 6508 *mpp = nd->nd_mrep; 6509 *mpendp = m; 6510 nd->nd_mrep = NULL; 6511 } 6512 } else 6513 error = nd->nd_repstat; 6514 nfsmout: 6515 /* If nd->nd_mrep is already NULL, this is a no-op. */ 6516 m_freem(nd->nd_mrep); 6517 free(nd, M_TEMP); 6518 NFSD_DEBUG(4, "nfsrv_readdsdorpc error=%d\n", error); 6519 return (error); 6520 } 6521 6522 /* 6523 * Start up the thread that will execute nfsrv_readdsdorpc(). 6524 */ 6525 static void 6526 start_readdsdorpc(void *arg, int pending) 6527 { 6528 struct nfsrvreaddsdorpc *drpc; 6529 6530 drpc = (struct nfsrvreaddsdorpc *)arg; 6531 drpc->err = nfsrv_readdsdorpc(&drpc->fh, drpc->off, drpc->len, 6532 drpc->cred, drpc->p, drpc->nmp, &drpc->m, &drpc->mend); 6533 drpc->done = 1; 6534 NFSD_DEBUG(4, "start_readdsdorpc: err=%d\n", drpc->err); 6535 } 6536 6537 static int 6538 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 6539 NFSPROC_T *p, struct nfsmount **nmp, int mirrorcnt, int stripecnt, 6540 uint64_t stripesiz, struct mbuf **mpp, struct mbuf **mendp, int *failposp) 6541 { 6542 struct nfsrvreaddsdorpc *drpc, *tdrpc; 6543 struct mbuf *m, *m2, *mend; 6544 fhandle_t *tfhp; 6545 struct nfsmount **tnmp; 6546 uint64_t scnt; 6547 int error, i, j, k, l, n, ret, timo; 6548 6549 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 6550 drpc = NULL; 6551 m2 = *mpp = NULL; 6552 *failposp = -1; 6553 if (stripecnt > 1) { 6554 /* Set j to the upper bound of the # of stripes to read. */ 6555 j = (len + len - 1) / stripesiz + 1; 6556 tdrpc = drpc = malloc(sizeof(*drpc) * j, M_TEMP, M_WAITOK); 6557 } 6558 6559 /* For each stripe except last one, read the stripe. */ 6560 for (j = 0; ; j++, tdrpc++) { 6561 if (stripecnt > 1) { 6562 k = (off / stripesiz) % (uint64_t)stripecnt; 6563 scnt = stripesiz - (off % stripesiz); 6564 l = ((uint64_t)len < scnt) ? len : (int)scnt; 6565 } else { 6566 k = 0; 6567 l = len; 6568 } 6569 if (j == 0) 6570 n = k; /* Save first stripe# for later. */ 6571 tfhp = fhp + k; 6572 tnmp = nmp + k; 6573 NFSD_DEBUG(4, "nfsrv_readdsrpc: mcopy k=%d l=%d\n", k, l); 6574 6575 /* Break out of the loop for the last stripe. */ 6576 if (l == len) 6577 break; 6578 6579 /* 6580 * Do the read RPC for every DS, using a separate kernel 6581 * process for every DS, except the last one. 6582 */ 6583 error = 0; 6584 tdrpc->done = 0; 6585 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp)); 6586 tdrpc->off = off; 6587 tdrpc->len = l; 6588 tdrpc->nmp = *tnmp; 6589 tdrpc->cred = cred; 6590 tdrpc->p = p; 6591 tdrpc->inprog = 0; 6592 tdrpc->err = 0; 6593 ret = EIO; 6594 if (nfs_pnfsiothreads != 0) { 6595 ret = nfs_pnfsio(start_readdsdorpc, tdrpc); 6596 NFSD_DEBUG(4, "nfsrv_readdsrpc: " 6597 "nfs_pnfsio=%d\n", ret); 6598 } 6599 if (ret != 0) { 6600 ret = nfsrv_readdsdorpc(tfhp, off, l, cred, p, 6601 *tnmp, &tdrpc->m, &tdrpc->mend); 6602 if (nfsds_failerr(ret) && *failposp == -1) 6603 *failposp = k; 6604 else if (error == 0 && ret != 0) 6605 tdrpc->err = ret; 6606 tdrpc->inprog = 0; 6607 tdrpc->done = 1; 6608 } 6609 off += l; 6610 len -= l; 6611 } 6612 ret = nfsrv_readdsdorpc(tfhp, off, l, cred, p, *tnmp, &m, &mend); 6613 if (nfsds_failerr(ret) && *failposp == -1) 6614 *failposp = k; 6615 if (error == 0 && ret != 0) 6616 error = ret; 6617 NFSD_DEBUG(4, "nfsrv_readdsrpc: aft stripes=%d\n", error); 6618 tdrpc = drpc; 6619 timo = hz / 50; /* Wait for 20msec. */ 6620 if (timo < 1) 6621 timo = 1; 6622 k = n; /* Keep track of stripe#. */ 6623 for (i = 0; i < j - 1; i++, tdrpc++) { 6624 /* Wait for RPCs on separate threads to complete. */ 6625 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6626 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 6627 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6628 *failposp = k; 6629 else if (error == 0 && tdrpc->err != 0) 6630 error = tdrpc->err; 6631 else { 6632 if (m2 != NULL) 6633 m2->m_next = tdrpc->m; 6634 else 6635 *mpp = tdrpc->m; 6636 m2 = tdrpc->mend; 6637 } 6638 k = (k + 1) % stripecnt; 6639 } 6640 if (m2 != NULL) 6641 m2->m_next = m; 6642 else 6643 *mpp = m; 6644 *mendp = mend; 6645 6646 free(drpc, M_TEMP); 6647 return (error); 6648 } 6649 6650 /* 6651 * Do a write RPC on a DS data file, using this structure for the arguments, 6652 * so that this function can be executed by a separate kernel process. 6653 */ 6654 struct nfsrvwritedsdorpc { 6655 int done; 6656 int inprog; 6657 struct task tsk; 6658 fhandle_t fh; 6659 off_t off; 6660 int len; 6661 struct nfsmount *nmp; 6662 struct ucred *cred; 6663 NFSPROC_T *p; 6664 struct mbuf *m; 6665 int err; 6666 }; 6667 6668 static int 6669 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 6670 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 6671 { 6672 uint32_t *tl; 6673 struct nfsrv_descript *nd; 6674 nfsattrbit_t attrbits; 6675 nfsv4stateid_t st; 6676 int commit, error, retlen; 6677 6678 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6679 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 6680 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6681 6682 /* 6683 * Use a stateid where other is an alternating 01010 pattern and 6684 * seqid is 0xffffffff. This value is not defined as special by 6685 * the RFC and is used by the FreeBSD NFS server to indicate an 6686 * MDS->DS proxy operation. 6687 */ 6688 st.other[0] = 0x55555555; 6689 st.other[1] = 0x55555555; 6690 st.other[2] = 0x55555555; 6691 st.seqid = 0xffffffff; 6692 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6693 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 6694 txdr_hyper(off, tl); 6695 tl += 2; 6696 /* 6697 * Do all writes FileSync, since the server doesn't hold onto dirty 6698 * buffers. Since clients should be accessing the DS servers directly 6699 * using the pNFS layouts, this just needs to work correctly as a 6700 * fallback. 6701 */ 6702 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 6703 *tl = txdr_unsigned(len); 6704 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 6705 6706 /* Put data in mbuf chain. */ 6707 nd->nd_mb->m_next = m; 6708 6709 /* Set nd_mb and nd_bpos to end of data. */ 6710 while (m->m_next != NULL) 6711 m = m->m_next; 6712 nd->nd_mb = m; 6713 nfsm_set(nd, m->m_len); 6714 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 6715 6716 /* Do a Getattr for the attributes that change upon writing. */ 6717 NFSZERO_ATTRBIT(&attrbits); 6718 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 6719 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 6720 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 6721 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 6722 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 6723 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 6724 *tl = txdr_unsigned(NFSV4OP_GETATTR); 6725 (void) nfsrv_putattrbit(nd, &attrbits); 6726 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 6727 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6728 if (error != 0) { 6729 free(nd, M_TEMP); 6730 return (error); 6731 } 6732 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 6733 /* Get rid of weak cache consistency data for now. */ 6734 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 6735 (ND_NFSV4 | ND_V4WCCATTR)) { 6736 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 6737 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 6738 NULL, NULL); 6739 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 6740 if (error != 0) 6741 goto nfsmout; 6742 /* 6743 * Get rid of Op# and status for next op. 6744 */ 6745 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6746 if (*++tl != 0) 6747 nd->nd_flag |= ND_NOMOREDATA; 6748 } 6749 if (nd->nd_repstat == 0) { 6750 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 6751 retlen = fxdr_unsigned(int, *tl++); 6752 commit = fxdr_unsigned(int, *tl); 6753 if (commit != NFSWRITE_FILESYNC) 6754 error = NFSERR_IO; 6755 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 6756 retlen, commit, error); 6757 } else 6758 error = nd->nd_repstat; 6759 /* We have no use for the Write Verifier since we use FileSync. */ 6760 6761 /* 6762 * Get the Change, Size, Access Time and Modify Time attributes and set 6763 * on the Metadata file, so its attributes will be what the file's 6764 * would be if it had been written. 6765 */ 6766 if (error == 0) { 6767 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6768 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 6769 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 6770 NULL, NULL); 6771 } 6772 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 6773 nfsmout: 6774 m_freem(nd->nd_mrep); 6775 free(nd, M_TEMP); 6776 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 6777 return (error); 6778 } 6779 6780 /* 6781 * Start up the thread that will execute nfsrv_writedsdorpc(). 6782 */ 6783 static void 6784 start_writedsdorpc(void *arg, int pending) 6785 { 6786 struct nfsrvwritedsdorpc *drpc; 6787 6788 drpc = (struct nfsrvwritedsdorpc *)arg; 6789 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 6790 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 6791 drpc->done = 1; 6792 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 6793 } 6794 6795 static int 6796 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 6797 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmp, int mirrorcnt, 6798 int stripecnt, uint64_t stripesiz, struct mbuf **mpp, char *cp, 6799 int *failposp) 6800 { 6801 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; /* Shut up gcc. */ 6802 struct nfsvattr na; 6803 struct mbuf *m; 6804 fhandle_t *tfhp; 6805 struct nfsmount **tnmp; 6806 uint64_t scnt; 6807 int error, i, j, k, l, n, o, offs, ret, timo; 6808 6809 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 6810 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 6811 drpc = NULL; 6812 *failposp = -1; 6813 if (mirrorcnt > 1 || stripecnt > 1) { 6814 /* Set j to the upper bound of the # of DSs to read. */ 6815 if (stripecnt > 1) 6816 j = (len + len - 1) / stripesiz + 1; 6817 else 6818 j = 1; 6819 j *= mirrorcnt; 6820 tdrpc = drpc = malloc(sizeof(*drpc) * j, M_TEMP, M_WAITOK); 6821 } 6822 6823 /* Calculate offset in mbuf chain that data starts. */ 6824 offs = cp - mtod(*mpp, char *); 6825 /* For each stripe, write to all the mirrors. */ 6826 for (j = 0; ; j++) { 6827 if (stripecnt > 1) { 6828 k = (off / stripesiz) % (uint64_t)stripecnt; 6829 scnt = stripesiz - (off % stripesiz); 6830 l = ((uint64_t)len < scnt) ? len : (int)scnt; 6831 } else { 6832 k = 0; 6833 l = len; 6834 } 6835 if (j == 0) 6836 o = k; /* Save first stripe# for later. */ 6837 tfhp = fhp + k; 6838 tnmp = nmp + k; 6839 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d k=%d " 6840 "l=%d\n", offs, k, l); 6841 6842 /* 6843 * Do the write RPC for every DS, using a separate kernel 6844 * process for every DS, except the last one. 6845 */ 6846 error = 0; 6847 n = mirrorcnt; 6848 if (l == len) 6849 n--; 6850 for (i = 0; i < n; i++, tdrpc++) { 6851 tdrpc->done = 0; 6852 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp)); 6853 tdrpc->off = off; 6854 tdrpc->len = l; 6855 tdrpc->nmp = *tnmp; 6856 tdrpc->cred = cred; 6857 tdrpc->p = p; 6858 tdrpc->inprog = 0; 6859 tdrpc->err = 0; 6860 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(l), 6861 M_WAITOK); 6862 ret = EIO; 6863 if (nfs_pnfsiothreads != 0) { 6864 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 6865 NFSD_DEBUG(4, "nfsrv_writedsrpc: " 6866 "nfs_pnfsio=%d\n", ret); 6867 } 6868 if (ret != 0) { 6869 ret = nfsrv_writedsdorpc(*tnmp, tfhp, off, l, 6870 NULL, tdrpc->m, cred, p); 6871 if (nfsds_failerr(ret) && *failposp == -1) 6872 *failposp = k; 6873 else if (error == 0 && ret != 0) 6874 tdrpc->err = ret; 6875 tdrpc->inprog = 0; 6876 tdrpc->done = 1; 6877 } 6878 tnmp += stripecnt; 6879 tfhp += stripecnt; 6880 } 6881 if (l == len) 6882 break; 6883 offs += l; 6884 off += l; 6885 len -= l; 6886 } 6887 m = m_copym(*mpp, offs, NFSM_RNDUP(l), M_WAITOK); 6888 ret = nfsrv_writedsdorpc(*tnmp, tfhp, off, l, &na, m, cred, p); 6889 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6890 *failposp = k; 6891 else if (error == 0 && ret != 0) 6892 error = ret; 6893 if (error == 0) 6894 error = nfsrv_setextattr(vp, &na, p); 6895 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 6896 tdrpc = drpc; 6897 timo = hz / 50; /* Wait for 20msec. */ 6898 if (timo < 1) 6899 timo = 1; 6900 k = o; 6901 for (i = 0; i < j * mirrorcnt - 1; i++, tdrpc++) { 6902 /* Wait for RPCs on separate threads to complete. */ 6903 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6904 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 6905 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6906 *failposp = k; 6907 else if (error == 0 && tdrpc->err != 0) 6908 error = tdrpc->err; 6909 k = (k + 1) % stripecnt; 6910 } 6911 free(drpc, M_TEMP); 6912 return (error); 6913 } 6914 6915 #ifdef notnow 6916 /* 6917 * Do a allocate RPC on a DS data file, using this structure for the arguments, 6918 * so that this function can be executed by a separate kernel process. 6919 */ 6920 struct nfsrvallocatedsdorpc { 6921 int done; 6922 int inprog; 6923 struct task tsk; 6924 fhandle_t fh; 6925 off_t off; 6926 off_t len; 6927 struct nfsmount *nmp; 6928 struct ucred *cred; 6929 NFSPROC_T *p; 6930 int err; 6931 }; 6932 6933 static int 6934 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 6935 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 6936 { 6937 uint32_t *tl; 6938 struct nfsrv_descript *nd; 6939 nfsattrbit_t attrbits; 6940 nfsv4stateid_t st; 6941 int error; 6942 6943 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6944 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, 6945 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6946 6947 /* 6948 * Use a stateid where other is an alternating 01010 pattern and 6949 * seqid is 0xffffffff. This value is not defined as special by 6950 * the RFC and is used by the FreeBSD NFS server to indicate an 6951 * MDS->DS proxy operation. 6952 */ 6953 st.other[0] = 0x55555555; 6954 st.other[1] = 0x55555555; 6955 st.other[2] = 0x55555555; 6956 st.seqid = 0xffffffff; 6957 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6958 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 6959 txdr_hyper(off, tl); tl += 2; 6960 txdr_hyper(len, tl); tl += 2; 6961 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); 6962 6963 *tl = txdr_unsigned(NFSV4OP_GETATTR); 6964 NFSGETATTR_ATTRBIT(&attrbits); 6965 nfsrv_putattrbit(nd, &attrbits); 6966 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 6967 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6968 if (error != 0) { 6969 free(nd, M_TEMP); 6970 return (error); 6971 } 6972 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", 6973 nd->nd_repstat); 6974 if (nd->nd_repstat == 0) { 6975 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6976 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 6977 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 6978 NULL, NULL); 6979 } else 6980 error = nd->nd_repstat; 6981 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); 6982 nfsmout: 6983 m_freem(nd->nd_mrep); 6984 free(nd, M_TEMP); 6985 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); 6986 return (error); 6987 } 6988 6989 /* 6990 * Start up the thread that will execute nfsrv_allocatedsdorpc(). 6991 */ 6992 static void 6993 start_allocatedsdorpc(void *arg, int pending) 6994 { 6995 struct nfsrvallocatedsdorpc *drpc; 6996 6997 drpc = (struct nfsrvallocatedsdorpc *)arg; 6998 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 6999 drpc->len, NULL, drpc->cred, drpc->p); 7000 drpc->done = 1; 7001 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); 7002 } 7003 7004 static int 7005 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 7006 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 7007 int *failposp) 7008 { 7009 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; 7010 struct nfsvattr na; 7011 int error, i, ret, timo; 7012 7013 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); 7014 drpc = NULL; 7015 if (mirrorcnt > 1) 7016 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 7017 M_WAITOK); 7018 7019 /* 7020 * Do the allocate RPC for every DS, using a separate kernel process 7021 * for every DS except the last one. 7022 */ 7023 error = 0; 7024 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 7025 tdrpc->done = 0; 7026 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 7027 tdrpc->off = off; 7028 tdrpc->len = len; 7029 tdrpc->nmp = *nmpp; 7030 tdrpc->cred = cred; 7031 tdrpc->p = p; 7032 tdrpc->inprog = 0; 7033 tdrpc->err = 0; 7034 ret = EIO; 7035 if (nfs_pnfsiothreads != 0) { 7036 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); 7037 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", 7038 ret); 7039 } 7040 if (ret != 0) { 7041 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, 7042 cred, p); 7043 if (nfsds_failerr(ret) && *failposp == -1) 7044 *failposp = i; 7045 else if (error == 0 && ret != 0) 7046 error = ret; 7047 } 7048 nmpp++; 7049 fhp++; 7050 } 7051 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 7052 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 7053 *failposp = mirrorcnt - 1; 7054 else if (error == 0 && ret != 0) 7055 error = ret; 7056 if (error == 0) 7057 error = nfsrv_setextattr(vp, &na, p); 7058 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); 7059 tdrpc = drpc; 7060 timo = hz / 50; /* Wait for 20msec. */ 7061 if (timo < 1) 7062 timo = 1; 7063 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 7064 /* Wait for RPCs on separate threads to complete. */ 7065 while (tdrpc->inprog != 0 && tdrpc->done == 0) 7066 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 7067 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 7068 *failposp = i; 7069 else if (error == 0 && tdrpc->err != 0) 7070 error = tdrpc->err; 7071 } 7072 free(drpc, M_TEMP); 7073 return (error); 7074 } 7075 7076 /* 7077 * Do a deallocate RPC on a DS data file, using this structure for the 7078 * arguments, so that this function can be executed by a separate kernel 7079 * process. 7080 */ 7081 struct nfsrvdeallocatedsdorpc { 7082 int done; 7083 int inprog; 7084 struct task tsk; 7085 fhandle_t fh; 7086 off_t off; 7087 off_t len; 7088 struct nfsmount *nmp; 7089 struct ucred *cred; 7090 NFSPROC_T *p; 7091 int err; 7092 }; 7093 7094 static int 7095 nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 7096 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 7097 { 7098 uint32_t *tl; 7099 struct nfsrv_descript *nd; 7100 nfsattrbit_t attrbits; 7101 nfsv4stateid_t st; 7102 int error; 7103 7104 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 7105 nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp, 7106 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 7107 7108 /* 7109 * Use a stateid where other is an alternating 01010 pattern and 7110 * seqid is 0xffffffff. This value is not defined as special by 7111 * the RFC and is used by the FreeBSD NFS server to indicate an 7112 * MDS->DS proxy operation. 7113 */ 7114 st.other[0] = 0x55555555; 7115 st.other[1] = 0x55555555; 7116 st.other[2] = 0x55555555; 7117 st.seqid = 0xffffffff; 7118 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 7119 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 7120 txdr_hyper(off, tl); tl += 2; 7121 txdr_hyper(len, tl); tl += 2; 7122 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len); 7123 7124 /* Do a Getattr for the attributes that change upon writing. */ 7125 NFSZERO_ATTRBIT(&attrbits); 7126 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 7127 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 7128 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 7129 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 7130 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 7131 *tl = txdr_unsigned(NFSV4OP_GETATTR); 7132 nfsrv_putattrbit(nd, &attrbits); 7133 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 7134 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 7135 if (error != 0) { 7136 free(nd, M_TEMP); 7137 return (error); 7138 } 7139 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n", 7140 nd->nd_repstat); 7141 /* Get rid of weak cache consistency data for now. */ 7142 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 7143 (ND_NFSV4 | ND_V4WCCATTR)) { 7144 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 7145 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 7146 NULL, NULL); 7147 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error); 7148 if (error != 0) 7149 goto nfsmout; 7150 /* 7151 * Get rid of Op# and status for next op. 7152 */ 7153 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 7154 if (*++tl != 0) 7155 nd->nd_flag |= ND_NOMOREDATA; 7156 } 7157 if (nd->nd_repstat == 0) { 7158 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 7159 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 7160 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, 7161 NULL, NULL); 7162 } else 7163 error = nd->nd_repstat; 7164 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error); 7165 nfsmout: 7166 m_freem(nd->nd_mrep); 7167 free(nd, M_TEMP); 7168 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error); 7169 return (error); 7170 } 7171 7172 /* 7173 * Start up the thread that will execute nfsrv_deallocatedsdorpc(). 7174 */ 7175 static void 7176 start_deallocatedsdorpc(void *arg, int pending) 7177 { 7178 struct nfsrvdeallocatedsdorpc *drpc; 7179 7180 drpc = (struct nfsrvdeallocatedsdorpc *)arg; 7181 drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 7182 drpc->len, NULL, drpc->cred, drpc->p); 7183 drpc->done = 1; 7184 NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err); 7185 } 7186 7187 static int 7188 nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 7189 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 7190 int *failposp) 7191 { 7192 struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL; 7193 struct nfsvattr na; 7194 int error, i, ret, timo; 7195 7196 NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n"); 7197 drpc = NULL; 7198 if (mirrorcnt > 1) 7199 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 7200 M_WAITOK); 7201 7202 /* 7203 * Do the deallocate RPC for every DS, using a separate kernel process 7204 * for every DS except the last one. 7205 */ 7206 error = 0; 7207 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 7208 tdrpc->done = 0; 7209 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 7210 tdrpc->off = off; 7211 tdrpc->len = len; 7212 tdrpc->nmp = *nmpp; 7213 tdrpc->cred = cred; 7214 tdrpc->p = p; 7215 tdrpc->inprog = 0; 7216 tdrpc->err = 0; 7217 ret = EIO; 7218 if (nfs_pnfsiothreads != 0) { 7219 ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc); 7220 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n", 7221 ret); 7222 } 7223 if (ret != 0) { 7224 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, 7225 NULL, cred, p); 7226 if (nfsds_failerr(ret) && *failposp == -1) 7227 *failposp = i; 7228 else if (error == 0 && ret != 0) 7229 error = ret; 7230 } 7231 nmpp++; 7232 fhp++; 7233 } 7234 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 7235 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 7236 *failposp = mirrorcnt - 1; 7237 else if (error == 0 && ret != 0) 7238 error = ret; 7239 if (error == 0) 7240 error = nfsrv_setextattr(vp, &na, p); 7241 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error); 7242 tdrpc = drpc; 7243 timo = hz / 50; /* Wait for 20msec. */ 7244 if (timo < 1) 7245 timo = 1; 7246 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 7247 /* Wait for RPCs on separate threads to complete. */ 7248 while (tdrpc->inprog != 0 && tdrpc->done == 0) 7249 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 7250 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 7251 *failposp = i; 7252 else if (error == 0 && tdrpc->err != 0) 7253 error = tdrpc->err; 7254 } 7255 free(drpc, M_TEMP); 7256 return (error); 7257 } 7258 #endif 7259 7260 static int 7261 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred, 7262 NFSPROC_T *p, struct nfsmount *nmp, struct nfsvattr *nap, 7263 struct nfsvattr *dsnap) 7264 { 7265 uint32_t *tl; 7266 struct nfsrv_descript *nd; 7267 nfsv4stateid_t st; 7268 nfsattrbit_t attrbits; 7269 int error; 7270 7271 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 7272 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 7273 /* 7274 * Use a stateid where other is an alternating 01010 pattern and 7275 * seqid is 0xffffffff. This value is not defined as special by 7276 * the RFC and is used by the FreeBSD NFS server to indicate an 7277 * MDS->DS proxy operation. 7278 */ 7279 st.other[0] = 0x55555555; 7280 st.other[1] = 0x55555555; 7281 st.other[2] = 0x55555555; 7282 st.seqid = 0xffffffff; 7283 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (uint8_t *)fhp, sizeof(*fhp), 7284 NULL, NULL, 0, 0, cred); 7285 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 7286 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 7287 7288 /* Do a Getattr for the attributes that change due to writing. */ 7289 NFSZERO_ATTRBIT(&attrbits); 7290 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 7291 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 7292 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 7293 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 7294 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 7295 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 7296 *tl = txdr_unsigned(NFSV4OP_GETATTR); 7297 (void) nfsrv_putattrbit(nd, &attrbits); 7298 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 7299 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 7300 if (error != 0) { 7301 free(nd, M_TEMP); 7302 return (error); 7303 } 7304 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 7305 nd->nd_repstat); 7306 /* Get rid of weak cache consistency data for now. */ 7307 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 7308 (ND_NFSV4 | ND_V4WCCATTR)) { 7309 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 7310 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 7311 NULL, NULL, NULL); 7312 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 7313 if (error != 0) 7314 goto nfsmout; 7315 /* 7316 * Get rid of Op# and status for next op. 7317 */ 7318 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 7319 if (*++tl != 0) 7320 nd->nd_flag |= ND_NOMOREDATA; 7321 } 7322 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 7323 if (error != 0) 7324 goto nfsmout; 7325 if (nd->nd_repstat != 0) 7326 error = nd->nd_repstat; 7327 /* 7328 * Get the Change, Size, Access Time and Modify Time attributes and set 7329 * on the Metadata file, so its attributes will be what the file's 7330 * would be if it had been written. 7331 */ 7332 if (error == 0) { 7333 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 7334 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 7335 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 7336 NULL, NULL, NULL); 7337 } 7338 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 7339 nfsmout: 7340 m_freem(nd->nd_mrep); 7341 free(nd, M_TEMP); 7342 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 7343 return (error); 7344 } 7345 7346 struct nfsrvsetattrdsdorpc { 7347 int done; 7348 int inprog; 7349 struct task tsk; 7350 fhandle_t fh; 7351 struct nfsmount *nmp; 7352 struct vnode *vp; 7353 struct ucred *cred; 7354 NFSPROC_T *p; 7355 struct nfsvattr na; 7356 struct nfsvattr dsna; 7357 int err; 7358 }; 7359 7360 /* 7361 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 7362 */ 7363 static void 7364 start_setattrdsdorpc(void *arg, int pending) 7365 { 7366 struct nfsrvsetattrdsdorpc *drpc; 7367 7368 drpc = (struct nfsrvsetattrdsdorpc *)arg; 7369 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->vp, drpc->cred, 7370 drpc->p, drpc->nmp, &drpc->na, &drpc->dsna); 7371 drpc->done = 1; 7372 } 7373 7374 static int 7375 nfsrv_setattrdsrpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred, 7376 NFSPROC_T *p, struct nfsmount **nmp, int mirrorcnt, int stripecnt, 7377 struct nfsvattr *nap, int *failposp) 7378 { 7379 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; /* Shut up gcc. */ 7380 fhandle_t *tfhp; 7381 struct nfsmount **tnmp; 7382 struct nfsvattr na; 7383 int error, i, j, ret, timo; 7384 7385 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 7386 drpc = NULL; 7387 *failposp = -1; 7388 error = 0; 7389 if (mirrorcnt > 1 || stripecnt > 1) 7390 tdrpc = drpc = malloc(sizeof(*drpc) * stripecnt * mirrorcnt, 7391 M_TEMP, M_WAITOK); 7392 7393 /* For each stripe, write to all the mirrors. */ 7394 tfhp = fhp; 7395 tnmp = nmp; 7396 for (i = 0; i < stripecnt * mirrorcnt - 1; i++, tdrpc++, tfhp++, 7397 tnmp++) { 7398 j = i / stripecnt; 7399 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: stripe=%d mirror=%d\n", 7400 i, j); 7401 tdrpc->done = 0; 7402 NFSBCOPY(nap, &tdrpc->na, sizeof(*nap)); 7403 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp)); 7404 tdrpc->vp = vp; 7405 tdrpc->nmp = *tnmp; 7406 tdrpc->cred = cred; 7407 tdrpc->p = p; 7408 tdrpc->inprog = 0; 7409 tdrpc->err = 0; 7410 ret = EIO; 7411 if (nfs_pnfsiothreads != 0) { 7412 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 7413 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: " 7414 "nfs_pnfsio=%d\n", ret); 7415 } 7416 if (ret != 0) { 7417 ret = nfsrv_setattrdsdorpc(tfhp, vp, cred, p, 7418 *tnmp, &tdrpc->na, &tdrpc->dsna); 7419 if (nfsds_failerr(ret) && *failposp == -1) 7420 *failposp = i; 7421 else if (error == 0 && ret != 0) 7422 tdrpc->err = ret; 7423 tdrpc->inprog = 0; 7424 tdrpc->done = 1; 7425 } 7426 } 7427 ret = nfsrv_setattrdsdorpc(tfhp, vp, cred, p, *tnmp, nap, &na); 7428 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 7429 *failposp = i; 7430 else if (error == 0 && ret != 0) 7431 error = ret; 7432 tdrpc = drpc; 7433 timo = hz / 50; /* Wait for 20msec. */ 7434 if (timo < 1) 7435 timo = 1; 7436 for (i = 0; i < stripecnt * mirrorcnt - 1; i++, tdrpc++) { 7437 /* Wait for RPCs on separate threads to complete. */ 7438 while (tdrpc->inprog != 0 && tdrpc->done == 0) 7439 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 7440 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 7441 *failposp = i; 7442 else if (error == 0 && tdrpc->err != 0) 7443 error = tdrpc->err; 7444 } 7445 7446 /* Find the reply attribute with the largest size and set that one. */ 7447 if (error == 0 && (mirrorcnt > 1 || stripecnt > 1)) { 7448 tdrpc = drpc; 7449 for (i = 0; i < stripecnt * mirrorcnt - 1; i++, tdrpc++) { 7450 if (tdrpc->dsna.na_size > na.na_size) 7451 NFSBCOPY(&tdrpc->dsna, &na, sizeof(*nap)); 7452 } 7453 } 7454 if (error == 0) 7455 error = nfsrv_setextattr(vp, &na, p); 7456 free(drpc, M_TEMP); 7457 return (error); 7458 } 7459 7460 #ifdef notnow 7461 /* 7462 * Do a Setattr of an NFSv4 ACL on the DS file. 7463 */ 7464 static int 7465 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 7466 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 7467 { 7468 struct nfsrv_descript *nd; 7469 nfsv4stateid_t st; 7470 nfsattrbit_t attrbits; 7471 int error; 7472 7473 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 7474 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 7475 /* 7476 * Use a stateid where other is an alternating 01010 pattern and 7477 * seqid is 0xffffffff. This value is not defined as special by 7478 * the RFC and is used by the FreeBSD NFS server to indicate an 7479 * MDS->DS proxy operation. 7480 */ 7481 st.other[0] = 0x55555555; 7482 st.other[1] = 0x55555555; 7483 st.other[2] = 0x55555555; 7484 st.seqid = 0xffffffff; 7485 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 7486 NULL, NULL, 0, 0, cred); 7487 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 7488 NFSZERO_ATTRBIT(&attrbits); 7489 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 7490 /* 7491 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 7492 * so passing in the metadata "vp" will be ok, since it is of 7493 * the same type (VREG). 7494 */ 7495 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 7496 NULL, 0, 0, 0, 0, 0, NULL, false, false, false, 0, NULL, false); 7497 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 7498 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 7499 if (error != 0) { 7500 free(nd, M_TEMP); 7501 return (error); 7502 } 7503 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 7504 nd->nd_repstat); 7505 error = nd->nd_repstat; 7506 m_freem(nd->nd_mrep); 7507 free(nd, M_TEMP); 7508 return (error); 7509 } 7510 7511 struct nfsrvsetacldsdorpc { 7512 int done; 7513 int inprog; 7514 struct task tsk; 7515 fhandle_t fh; 7516 struct nfsmount *nmp; 7517 struct vnode *vp; 7518 struct ucred *cred; 7519 NFSPROC_T *p; 7520 struct acl *aclp; 7521 int err; 7522 }; 7523 7524 /* 7525 * Start up the thread that will execute nfsrv_setacldsdorpc(). 7526 */ 7527 static void 7528 start_setacldsdorpc(void *arg, int pending) 7529 { 7530 struct nfsrvsetacldsdorpc *drpc; 7531 7532 drpc = (struct nfsrvsetacldsdorpc *)arg; 7533 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 7534 drpc->vp, drpc->nmp, drpc->aclp); 7535 drpc->done = 1; 7536 } 7537 7538 static int 7539 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 7540 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 7541 int *failposp) 7542 { 7543 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; 7544 int error, i, ret, timo; 7545 7546 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 7547 drpc = NULL; 7548 if (mirrorcnt > 1) 7549 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 7550 M_WAITOK); 7551 7552 /* 7553 * Do the setattr RPC for every DS, using a separate kernel process 7554 * for every DS except the last one. 7555 */ 7556 error = 0; 7557 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 7558 tdrpc->done = 0; 7559 tdrpc->inprog = 0; 7560 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 7561 tdrpc->nmp = *nmpp; 7562 tdrpc->vp = vp; 7563 tdrpc->cred = cred; 7564 tdrpc->p = p; 7565 tdrpc->aclp = aclp; 7566 tdrpc->err = 0; 7567 ret = EIO; 7568 if (nfs_pnfsiothreads != 0) { 7569 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 7570 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 7571 ret); 7572 } 7573 if (ret != 0) { 7574 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 7575 aclp); 7576 if (nfsds_failerr(ret) && *failposp == -1) 7577 *failposp = i; 7578 else if (error == 0 && ret != 0) 7579 error = ret; 7580 } 7581 nmpp++; 7582 fhp++; 7583 } 7584 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 7585 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 7586 *failposp = mirrorcnt - 1; 7587 else if (error == 0 && ret != 0) 7588 error = ret; 7589 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 7590 tdrpc = drpc; 7591 timo = hz / 50; /* Wait for 20msec. */ 7592 if (timo < 1) 7593 timo = 1; 7594 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 7595 /* Wait for RPCs on separate threads to complete. */ 7596 while (tdrpc->inprog != 0 && tdrpc->done == 0) 7597 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 7598 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 7599 *failposp = i; 7600 else if (error == 0 && tdrpc->err != 0) 7601 error = tdrpc->err; 7602 } 7603 free(drpc, M_TEMP); 7604 return (error); 7605 } 7606 #endif 7607 7608 struct nfsrvgetattrdsdorpc { 7609 int done; 7610 int inprog; 7611 struct task tsk; 7612 fhandle_t fh; 7613 struct vnode *vp; 7614 struct nfsvattr na; 7615 struct nfsmount *nmp; 7616 struct ucred *cred; 7617 NFSPROC_T *p; 7618 int err; 7619 }; 7620 7621 /* 7622 * Start up the thread that will execute nfsrv_getattrdsdorpc(). 7623 */ 7624 static void 7625 start_getattrdsdorpc(void *arg, int pending) 7626 { 7627 struct nfsrvgetattrdsdorpc *drpc; 7628 7629 drpc = (struct nfsrvgetattrdsdorpc *)arg; 7630 drpc->err = nfsrv_getattrdsdorpc(&drpc->fh, drpc->vp, drpc->cred, 7631 drpc->p, drpc->nmp, &drpc->na); 7632 drpc->done = 1; 7633 NFSD_DEBUG(4, "start_getattrdsdorpc: err=%d\n", drpc->err); 7634 } 7635 7636 /* 7637 * For a striped configuration, a getattr RPC must be done on all stripes, 7638 * since there is no way of knowing which DS currently stores the last 7639 * bytes of the file. 7640 */ 7641 static int 7642 nfsrv_getattrdsrpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred, 7643 NFSPROC_T *p, struct nfsmount **nmp, int stripecnt, struct nfsvattr *nap, 7644 int *failposp) 7645 { 7646 struct nfsrvgetattrdsdorpc *drpc, *tdrpc = NULL; /* Shut up gcc. */ 7647 fhandle_t *tfhp; 7648 struct nfsmount **tnmp; 7649 int error, i, ret, timo; 7650 7651 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 7652 drpc = NULL; 7653 *failposp = -1; 7654 if (stripecnt > 1) 7655 tdrpc = drpc = malloc(sizeof(*drpc) * stripecnt, M_TEMP, 7656 M_WAITOK); 7657 7658 /* For each stripe except last one, do a Getattr.. */ 7659 tfhp = fhp; 7660 tnmp = nmp; 7661 for (i = 0; i < stripecnt - 1; i++, tdrpc++, tfhp++, tnmp++) { 7662 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: stripe=%d\n", i); 7663 error = 0; 7664 tdrpc->done = 0; 7665 NFSBCOPY(tfhp, &tdrpc->fh, sizeof(*tfhp)); 7666 tdrpc->nmp = *tnmp; 7667 tdrpc->vp = vp; 7668 tdrpc->cred = cred; 7669 tdrpc->p = p; 7670 tdrpc->inprog = 0; 7671 tdrpc->err = 0; 7672 ret = EIO; 7673 if (nfs_pnfsiothreads != 0) { 7674 ret = nfs_pnfsio(start_getattrdsdorpc, tdrpc); 7675 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: " 7676 "nfs_pnfsio=%d\n", ret); 7677 } 7678 if (ret != 0) { 7679 ret = nfsrv_getattrdsdorpc(tfhp, vp, cred, p, *tnmp, 7680 &tdrpc->na); 7681 if (nfsds_failerr(ret) && *failposp == -1) 7682 *failposp = i; 7683 else if (error == 0 && ret != 0) 7684 tdrpc->err = ret; 7685 tdrpc->inprog = 0; 7686 tdrpc->done = 1; 7687 } 7688 } 7689 ret = nfsrv_getattrdsdorpc(tfhp, vp, cred, p, *tnmp, nap); 7690 if (nfsds_failerr(ret) && *failposp == -1) 7691 *failposp = i; 7692 if (error == 0 && ret != 0) 7693 error = ret; 7694 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft stripes=%d\n", error); 7695 timo = hz / 50; /* Wait for 20msec. */ 7696 if (timo < 1) 7697 timo = 1; 7698 tdrpc = drpc; 7699 for (i = 0; i < stripecnt - 1; i++, tdrpc++) { 7700 /* Wait for RPCs on separate threads to complete. */ 7701 while (tdrpc->inprog != 0 && tdrpc->done == 0) 7702 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 7703 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 7704 *failposp = i; 7705 else if (error == 0 && tdrpc->err != 0) 7706 error = tdrpc->err; 7707 } 7708 7709 /* Find the attribute with the largest size and return that one. */ 7710 if (stripecnt > 1) { 7711 tdrpc = drpc; 7712 for (i = 0; i < stripecnt - 1; i++, tdrpc++) { 7713 if (tdrpc->na.na_size > nap->na_size) 7714 NFSBCOPY(&tdrpc->na, nap, sizeof(*nap)); 7715 } 7716 } 7717 /* 7718 * We can only save the updated values in the extended 7719 * attribute if the vp is exclusively locked. 7720 */ 7721 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 7722 error = nfsrv_setextattr(vp, nap, p); 7723 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 7724 error); 7725 } 7726 7727 free(drpc, M_TEMP); 7728 return (error); 7729 } 7730 7731 /* 7732 * Getattr call to the DS for the attributes that change due to writing. 7733 */ 7734 static int 7735 nfsrv_getattrdsdorpc(fhandle_t *fhp, struct vnode *vp, struct ucred *cred, 7736 NFSPROC_T *p, struct nfsmount *nmp, struct nfsvattr *nap) 7737 { 7738 struct nfsrv_descript *nd; 7739 int error; 7740 nfsattrbit_t attrbits; 7741 7742 NFSD_DEBUG(4, "in nfsrv_getattrdsdorpc\n"); 7743 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 7744 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 7745 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 7746 NFSZERO_ATTRBIT(&attrbits); 7747 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 7748 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 7749 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 7750 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 7751 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 7752 (void) nfsrv_putattrbit(nd, &attrbits); 7753 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 7754 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 7755 if (error != 0) { 7756 free(nd, M_TEMP); 7757 return (error); 7758 } 7759 NFSD_DEBUG(4, "nfsrv_getattrdsdorpc: aft getattrrpc=%d\n", 7760 nd->nd_repstat); 7761 if (nd->nd_repstat == 0) 7762 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 7763 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 7764 NULL, NULL, NULL, NULL, NULL); 7765 else 7766 error = nd->nd_repstat; 7767 m_freem(nd->nd_mrep); 7768 free(nd, M_TEMP); 7769 NFSD_DEBUG(4, "nfsrv_getattrdsdorpc error=%d\n", error); 7770 return (error); 7771 } 7772 7773 #ifdef notnow 7774 /* 7775 * Seek call to a DS. 7776 */ 7777 static int 7778 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, 7779 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) 7780 { 7781 uint32_t *tl; 7782 struct nfsrv_descript *nd; 7783 nfsv4stateid_t st; 7784 int error; 7785 7786 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); 7787 /* 7788 * Use a stateid where other is an alternating 01010 pattern and 7789 * seqid is 0xffffffff. This value is not defined as special by 7790 * the RFC and is used by the FreeBSD NFS server to indicate an 7791 * MDS->DS proxy operation. 7792 */ 7793 st.other[0] = 0x55555555; 7794 st.other[1] = 0x55555555; 7795 st.other[2] = 0x55555555; 7796 st.seqid = 0xffffffff; 7797 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 7798 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, 7799 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 7800 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 7801 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); 7802 txdr_hyper(*offp, tl); tl += 2; 7803 *tl = txdr_unsigned(content); 7804 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 7805 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 7806 if (error != 0) { 7807 free(nd, M_TEMP); 7808 return (error); 7809 } 7810 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); 7811 if (nd->nd_repstat == 0) { 7812 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); 7813 if (*tl++ == newnfs_true) 7814 *eofp = true; 7815 else 7816 *eofp = false; 7817 *offp = fxdr_hyper(tl); 7818 } else 7819 error = nd->nd_repstat; 7820 nfsmout: 7821 m_freem(nd->nd_mrep); 7822 free(nd, M_TEMP); 7823 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); 7824 return (error); 7825 } 7826 #endif 7827 7828 /* 7829 * Get the device id and file handle for a DS file. 7830 */ 7831 int 7832 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 7833 uint64_t *stripesizp, int *stripecntp, fhandle_t **fhpp, char **devid) 7834 { 7835 int buflen, error; 7836 char *buf; 7837 7838 buflen = sizeof(struct pnfsdsfile) * NFSDEV_MAXMIRRORS * 7839 NFSDEV_MAXSTRIPE; 7840 buf = malloc(buflen, M_TEMP, M_WAITOK); 7841 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, stripecntp, 7842 stripesizp, p, NULL, fhpp, devid, NULL, NULL, NULL, NULL, NULL, 7843 NULL); 7844 free(buf, M_TEMP); 7845 return (error); 7846 } 7847 7848 /* 7849 * Do a Lookup against the DS for the filename. 7850 */ 7851 static int 7852 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 7853 struct vnode **nvpp, NFSPROC_T *p) 7854 { 7855 struct nameidata named; 7856 struct ucred *tcred; 7857 char *bufp; 7858 u_long *hashp; 7859 struct vnode *nvp; 7860 int error; 7861 7862 tcred = newnfs_getcred(); 7863 named.ni_cnd.cn_nameiop = LOOKUP; 7864 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 7865 named.ni_cnd.cn_cred = tcred; 7866 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 7867 nfsvno_setpathbuf(&named, &bufp, &hashp); 7868 named.ni_cnd.cn_nameptr = bufp; 7869 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 7870 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 7871 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 7872 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 7873 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 7874 NFSFREECRED(tcred); 7875 nfsvno_relpathbuf(&named); 7876 if (error == 0) 7877 *nvpp = nvp; 7878 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 7879 return (error); 7880 } 7881 7882 /* 7883 * Set the file handle to the correct one. 7884 */ 7885 static void 7886 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char **devid, 7887 char *fnamep, struct vnode *nvp, NFSPROC_T *p) 7888 { 7889 struct nfsnode *np; 7890 int ret = 0; 7891 7892 np = VTONFS(nvp); 7893 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 7894 /* 7895 * We can only do a vn_set_extattr() if the vnode is exclusively 7896 * locked and vn_start_write() has been done. If devid != NULL or 7897 * fnamep != NULL or the vnode is shared locked, vn_start_write() 7898 * may not have been done. 7899 * If not done now, it will be done on a future call. 7900 */ 7901 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == 7902 LK_EXCLUSIVE) 7903 ret = vn_extattr_set(vp, IO_NODELOCKED, 7904 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), 7905 (char *)pf, p); 7906 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 7907 } 7908 7909 /* 7910 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 7911 * when the DS has failed. 7912 */ 7913 void 7914 nfsrv_killrpcs(struct nfsmount *nmp) 7915 { 7916 7917 /* 7918 * Call newnfs_nmcancelreqs() to cause 7919 * any RPCs in progress on the mount point to 7920 * fail. 7921 * This will cause any process waiting for an 7922 * RPC to complete while holding a vnode lock 7923 * on the mounted-on vnode (such as "df" or 7924 * a non-forced "umount") to fail. 7925 * This will unlock the mounted-on vnode so 7926 * a forced dismount can succeed. 7927 * The NFSMNTP_CANCELRPCS flag should be set when this function is 7928 * called. 7929 */ 7930 newnfs_nmcancelreqs(nmp); 7931 } 7932 7933 /* 7934 * Sum up the statfs info for each of the DSs, so that the client will 7935 * receive the total for all DSs. 7936 */ 7937 static int 7938 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) 7939 { 7940 struct statfs *tsf; 7941 struct nfsdevice *ds; 7942 struct vnode **dvpp, **tdvpp, *dvp; 7943 uint64_t tot; 7944 int cnt, error = 0, i; 7945 7946 if (nfsrv_devidcnt <= 0) 7947 return (ENXIO); 7948 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 7949 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 7950 7951 /* Get an array of the dvps for the DSs. */ 7952 tdvpp = dvpp; 7953 i = 0; 7954 NFSDDSLOCK(); 7955 /* First, search for matches for same file system. */ 7956 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 7957 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && 7958 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { 7959 if (++i > nfsrv_devidcnt) 7960 break; 7961 *tdvpp++ = ds->nfsdev_dvp; 7962 } 7963 } 7964 /* 7965 * If no matches for same file system, total all servers not assigned 7966 * to a file system. 7967 */ 7968 if (i == 0) { 7969 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 7970 if (ds->nfsdev_nmp != NULL && 7971 ds->nfsdev_mdsisset == 0) { 7972 if (++i > nfsrv_devidcnt) 7973 break; 7974 *tdvpp++ = ds->nfsdev_dvp; 7975 } 7976 } 7977 } 7978 NFSDDSUNLOCK(); 7979 cnt = i; 7980 7981 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 7982 tdvpp = dvpp; 7983 for (i = 0; i < cnt && error == 0; i++) { 7984 dvp = *tdvpp++; 7985 error = VFS_STATFS(dvp->v_mount, tsf); 7986 if (error == 0) { 7987 if (sf->f_bsize == 0) { 7988 if (tsf->f_bsize > 0) 7989 sf->f_bsize = tsf->f_bsize; 7990 else 7991 sf->f_bsize = 8192; 7992 } 7993 if (tsf->f_blocks > 0) { 7994 if (sf->f_bsize != tsf->f_bsize) { 7995 tot = tsf->f_blocks * tsf->f_bsize; 7996 sf->f_blocks += (tot / sf->f_bsize); 7997 } else 7998 sf->f_blocks += tsf->f_blocks; 7999 } 8000 if (tsf->f_bfree > 0) { 8001 if (sf->f_bsize != tsf->f_bsize) { 8002 tot = tsf->f_bfree * tsf->f_bsize; 8003 sf->f_bfree += (tot / sf->f_bsize); 8004 } else 8005 sf->f_bfree += tsf->f_bfree; 8006 } 8007 if (tsf->f_bavail > 0) { 8008 if (sf->f_bsize != tsf->f_bsize) { 8009 tot = tsf->f_bavail * tsf->f_bsize; 8010 sf->f_bavail += (tot / sf->f_bsize); 8011 } else 8012 sf->f_bavail += tsf->f_bavail; 8013 } 8014 } 8015 } 8016 free(tsf, M_TEMP); 8017 free(dvpp, M_TEMP); 8018 return (error); 8019 } 8020 8021 /* 8022 * Set an acl. 8023 */ 8024 int 8025 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, acl_type_t atype, 8026 struct ucred *cred, NFSPROC_T *p) 8027 { 8028 int error; 8029 8030 if (nfsrv_useacl == 0 || (atype == ACL_TYPE_NFS4 && 8031 nfs_supportsnfsv4acls(vp) == 0) || (atype != ACL_TYPE_NFS4 && 8032 nfs_supportsposixacls(vp) == 0)) { 8033 error = NFSERR_ATTRNOTSUPP; 8034 goto out; 8035 } 8036 /* 8037 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 8038 * Make sure it has enough room for that - splitting every entry 8039 * into two and appending "canonical six" entries at the end. 8040 * Cribbed out of kern/vfs_acl.c - Rick M. 8041 */ 8042 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 8043 error = NFSERR_ATTRNOTSUPP; 8044 goto out; 8045 } 8046 if (aclp->acl_cnt == 0) { 8047 if (atype != ACL_TYPE_DEFAULT || vp->v_type != VDIR) { 8048 error = NFSERR_INVAL; 8049 goto out; 8050 } 8051 error = VOP_SETACL(vp, atype, NULL, cred, p); 8052 } else 8053 error = VOP_SETACL(vp, atype, aclp, cred, p); 8054 if (error == 0) { 8055 error = nfsrv_dssetacl(vp, aclp, cred, p); 8056 if (error == ENOENT) 8057 error = 0; 8058 } 8059 8060 out: 8061 NFSEXITCODE(error); 8062 return (error); 8063 } 8064 8065 /* 8066 * Seek vnode op call (actually it is a VOP_IOCTL()). 8067 * This function is called with the vnode locked, but unlocks and vrele()s 8068 * the vp before returning. 8069 */ 8070 int 8071 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, 8072 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) 8073 { 8074 struct nfsvattr at; 8075 int error, ret; 8076 8077 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); 8078 #ifdef notnow 8079 /* 8080 * Attempt to seek on a DS file. A return of ENOENT implies 8081 * there is no DS file to seek on. 8082 */ 8083 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, 8084 NULL, NULL, NULL, NULL, offp, content, eofp); 8085 if (error != ENOENT) { 8086 vput(vp); 8087 return (error); 8088 } 8089 #endif 8090 8091 /* 8092 * Do the VOP_IOCTL() call. For the case where *offp == file_size, 8093 * VOP_IOCTL() will return ENXIO. However, the correct reply for 8094 * NFSv4.2 is *eofp == true and error == 0 for this case. 8095 */ 8096 NFSVOPUNLOCK(vp); 8097 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); 8098 *eofp = false; 8099 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { 8100 /* Handle the cases where we might be at EOF. */ 8101 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); 8102 if (ret == 0 && *offp == at.na_size) { 8103 *eofp = true; 8104 error = 0; 8105 } 8106 if (ret != 0 && error == 0) 8107 error = ret; 8108 } 8109 vrele(vp); 8110 NFSEXITCODE(error); 8111 return (error); 8112 } 8113 8114 /* 8115 * Allocate vnode op call. 8116 */ 8117 int 8118 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 8119 NFSPROC_T *p) 8120 { 8121 int error; 8122 off_t olen; 8123 8124 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); 8125 #ifdef notnow 8126 /* 8127 * Attempt to allocate on a DS file. A return of ENOENT implies 8128 * there is no DS file to allocate on. 8129 */ 8130 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, 8131 NULL, NULL, NULL, NULL, &len, 0, NULL); 8132 if (error != ENOENT) 8133 return (error); 8134 #endif 8135 8136 /* 8137 * Do the actual VOP_ALLOCATE(), looping so long as 8138 * progress is being made, to achieve completion. 8139 */ 8140 do { 8141 olen = len; 8142 error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred); 8143 if (error == 0 && len > 0 && olen > len) 8144 maybe_yield(); 8145 } while (error == 0 && len > 0 && olen > len); 8146 if (error == 0 && len > 0) 8147 error = NFSERR_IO; 8148 NFSEXITCODE(error); 8149 return (error); 8150 } 8151 8152 /* 8153 * Deallocate vnode op call. 8154 */ 8155 int 8156 nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 8157 NFSPROC_T *p) 8158 { 8159 int error; 8160 off_t olen; 8161 8162 ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp"); 8163 #ifdef notnow 8164 /* 8165 * Attempt to deallocate on a DS file. A return of ENOENT implies 8166 * there is no DS file to deallocate on. 8167 */ 8168 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL, 8169 NULL, NULL, NULL, NULL, &len, 0, NULL); 8170 if (error != ENOENT) 8171 return (error); 8172 #endif 8173 8174 /* 8175 * Do the actual VOP_DEALLOCATE(), looping so long as 8176 * progress is being made, to achieve completion. 8177 */ 8178 do { 8179 olen = len; 8180 error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred); 8181 if (error == 0 && len > 0 && olen > len) 8182 maybe_yield(); 8183 } while (error == 0 && len > 0 && olen > len); 8184 if (error == 0 && len > 0) 8185 error = NFSERR_IO; 8186 NFSEXITCODE(error); 8187 return (error); 8188 } 8189 8190 /* 8191 * Get Extended Atribute vnode op into an mbuf list. 8192 */ 8193 int 8194 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, 8195 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, 8196 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 8197 { 8198 struct iovec *iv; 8199 struct uio io, *uiop = &io; 8200 struct mbuf *m, *m2; 8201 int alen, error, len, tlen; 8202 size_t siz; 8203 8204 /* First, find out the size of the extended attribute. */ 8205 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 8206 &siz, cred, p); 8207 if (error != 0) 8208 return (NFSERR_NOXATTR); 8209 if (siz > maxresp - NFS_MAXXDR) 8210 return (NFSERR_XATTR2BIG); 8211 len = siz; 8212 tlen = NFSM_RNDUP(len); 8213 if (tlen > 0) { 8214 /* 8215 * If cnt > MCLBYTES and the reply will not be saved, use 8216 * ext_pgs mbufs for TLS. 8217 * For NFSv4.0, we do not know for sure if the reply will 8218 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 8219 * Always use ext_pgs mbufs if ND_EXTPG is set. 8220 */ 8221 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && 8222 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && 8223 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) 8224 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, 8225 maxextsiz, &m, &m2, &iv); 8226 else 8227 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, 8228 &iv); 8229 uiop->uio_iov = iv; 8230 } else { 8231 uiop->uio_iovcnt = 0; 8232 uiop->uio_iov = iv = NULL; 8233 m = m2 = NULL; 8234 } 8235 uiop->uio_offset = 0; 8236 uiop->uio_resid = tlen; 8237 uiop->uio_rw = UIO_READ; 8238 uiop->uio_segflg = UIO_SYSSPACE; 8239 uiop->uio_td = p; 8240 #ifdef MAC 8241 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, 8242 name); 8243 if (error != 0) 8244 goto out; 8245 #endif 8246 8247 if (tlen > 0) 8248 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 8249 NULL, cred, p); 8250 if (error != 0) 8251 goto out; 8252 if (uiop->uio_resid > 0) { 8253 alen = tlen; 8254 len = tlen - uiop->uio_resid; 8255 tlen = NFSM_RNDUP(len); 8256 if (alen != tlen) 8257 printf("nfsvno_getxattr: weird size read\n"); 8258 if (tlen == 0) { 8259 m_freem(m); 8260 m = m2 = NULL; 8261 } else if (alen != tlen || tlen != len) 8262 m2 = nfsrv_adj(m, alen - tlen, tlen - len); 8263 } 8264 *lenp = len; 8265 *mpp = m; 8266 *mpendp = m2; 8267 8268 out: 8269 if (error != 0) { 8270 if (m != NULL) 8271 m_freem(m); 8272 *lenp = 0; 8273 } 8274 free(iv, M_TEMP); 8275 NFSEXITCODE(error); 8276 return (error); 8277 } 8278 8279 /* 8280 * Set Extended attribute vnode op from an mbuf list. 8281 */ 8282 int 8283 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, 8284 char *cp, struct ucred *cred, struct thread *p) 8285 { 8286 struct iovec *iv; 8287 struct uio uio, *uiop = &uio; 8288 int cnt, error; 8289 8290 error = 0; 8291 #ifdef MAC 8292 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, 8293 name); 8294 #endif 8295 if (error != 0) 8296 goto out; 8297 8298 uiop->uio_rw = UIO_WRITE; 8299 uiop->uio_segflg = UIO_SYSSPACE; 8300 uiop->uio_td = p; 8301 uiop->uio_offset = 0; 8302 uiop->uio_resid = len; 8303 if (len > 0) { 8304 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); 8305 uiop->uio_iov = iv; 8306 uiop->uio_iovcnt = cnt; 8307 } else { 8308 uiop->uio_iov = iv = NULL; 8309 uiop->uio_iovcnt = 0; 8310 } 8311 if (error == 0) { 8312 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 8313 cred, p); 8314 if (error == 0) { 8315 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 8316 nfsvno_updateds(vp, cred, p); 8317 error = VOP_FSYNC(vp, MNT_WAIT, p); 8318 } 8319 free(iv, M_TEMP); 8320 } 8321 8322 out: 8323 NFSEXITCODE(error); 8324 return (error); 8325 } 8326 8327 /* 8328 * For a pNFS server, the DS file's ctime and 8329 * va_filerev (TimeMetadata and Change) needs to 8330 * be updated. This is a hack, but works by 8331 * flipping the S_ISGID bit in va_mode and then 8332 * flipping it back. 8333 * It does result in two MDS->DS RPCs, but creating 8334 * a custom RPC just to do this seems overkill, since 8335 * Setxattr/Rmxattr will not be done that frequently. 8336 * If it fails part way through, that is not too 8337 * serious, since the DS file is never executed. 8338 */ 8339 static void 8340 nfsvno_updateds(struct vnode *vp, struct ucred *cred, NFSPROC_T *p) 8341 { 8342 struct nfsvattr nva; 8343 int ret; 8344 u_short tmode; 8345 8346 ret = VOP_GETATTR(vp, &nva.na_vattr, cred); 8347 if (ret == 0) { 8348 tmode = nva.na_mode; 8349 NFSVNO_ATTRINIT(&nva); 8350 tmode ^= S_ISGID; 8351 NFSVNO_SETATTRVAL(&nva, mode, tmode); 8352 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 8353 NFSPROC_SETATTR, NULL, NULL, NULL, &nva, 8354 NULL, NULL, 0, NULL); 8355 if (ret == 0) { 8356 tmode ^= S_ISGID; 8357 NFSVNO_SETATTRVAL(&nva, mode, tmode); 8358 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 8359 NFSPROC_SETATTR, NULL, NULL, NULL, 8360 &nva, NULL, NULL, 0, NULL); 8361 } 8362 } 8363 } 8364 8365 /* 8366 * Remove Extended attribute vnode op. 8367 */ 8368 int 8369 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, 8370 struct ucred *cred, struct thread *p) 8371 { 8372 int error; 8373 8374 /* 8375 * Get rid of any delegations. I am not sure why this is required, 8376 * but RFC-8276 says so. 8377 */ 8378 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); 8379 if (error != 0) 8380 goto out; 8381 #ifdef MAC 8382 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, 8383 name); 8384 if (error != 0) 8385 goto out; 8386 #endif 8387 8388 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); 8389 if (error == EOPNOTSUPP) 8390 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 8391 cred, p); 8392 if (error == 0) { 8393 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 8394 nfsvno_updateds(vp, cred, p); 8395 error = VOP_FSYNC(vp, MNT_WAIT, p); 8396 } 8397 out: 8398 NFSEXITCODE(error); 8399 return (error); 8400 } 8401 8402 /* 8403 * List Extended Atribute vnode op into an mbuf list. 8404 */ 8405 int 8406 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, 8407 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) 8408 { 8409 struct iovec iv; 8410 struct uio io; 8411 int error; 8412 size_t siz; 8413 8414 *bufp = NULL; 8415 /* First, find out the size of the extended attribute. */ 8416 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, 8417 p); 8418 if (error != 0) 8419 return (NFSERR_NOXATTR); 8420 if (siz <= cookie) { 8421 *lenp = 0; 8422 *eofp = true; 8423 goto out; 8424 } 8425 if (siz > cookie + *lenp) { 8426 siz = cookie + *lenp; 8427 *eofp = false; 8428 } else 8429 *eofp = true; 8430 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ 8431 if (siz > 10 * 1024 * 1024) { 8432 error = NFSERR_XATTR2BIG; 8433 goto out; 8434 } 8435 *bufp = malloc(siz, M_TEMP, M_WAITOK); 8436 iv.iov_base = *bufp; 8437 iv.iov_len = siz; 8438 io.uio_iovcnt = 1; 8439 io.uio_iov = &iv; 8440 io.uio_offset = 0; 8441 io.uio_resid = siz; 8442 io.uio_rw = UIO_READ; 8443 io.uio_segflg = UIO_SYSSPACE; 8444 io.uio_td = p; 8445 #ifdef MAC 8446 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); 8447 if (error != 0) 8448 goto out; 8449 #endif 8450 8451 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, 8452 p); 8453 if (error != 0) 8454 goto out; 8455 if (io.uio_resid > 0) 8456 siz -= io.uio_resid; 8457 *lenp = siz; 8458 8459 out: 8460 if (error != 0) { 8461 free(*bufp, M_TEMP); 8462 *bufp = NULL; 8463 } 8464 NFSEXITCODE(error); 8465 return (error); 8466 } 8467 8468 /* 8469 * Trim trailing data off the mbuf list being built. 8470 */ 8471 void 8472 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, 8473 int bextpg, int bextpgsiz) 8474 { 8475 vm_page_t pg; 8476 int fullpgsiz, i; 8477 8478 if (mb->m_next != NULL) { 8479 m_freem(mb->m_next); 8480 mb->m_next = NULL; 8481 } 8482 if ((mb->m_flags & M_EXTPG) != 0) { 8483 KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs, 8484 ("nfsm_trimtrailing: bextpg out of range")); 8485 KASSERT(bpos == (char *) 8486 PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz, 8487 ("nfsm_trimtrailing: bextpgsiz bad!")); 8488 8489 /* First, get rid of any pages after this position. */ 8490 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { 8491 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); 8492 vm_page_unwire_noq(pg); 8493 vm_page_free(pg); 8494 } 8495 mb->m_epg_npgs = bextpg + 1; 8496 if (bextpg == 0) 8497 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; 8498 else 8499 fullpgsiz = PAGE_SIZE; 8500 mb->m_epg_last_len = fullpgsiz - bextpgsiz; 8501 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); 8502 for (i = 1; i < mb->m_epg_npgs; i++) 8503 mb->m_len += m_epg_pagelen(mb, i, 0); 8504 nd->nd_bextpgsiz = bextpgsiz; 8505 nd->nd_bextpg = bextpg; 8506 } else 8507 mb->m_len = bpos - mtod(mb, char *); 8508 nd->nd_mb = mb; 8509 nd->nd_bpos = bpos; 8510 } 8511 8512 8513 /* 8514 * Check to see if a put file handle operation should test for 8515 * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR. 8516 * When Open is the next operation, NFSERR_WRONGSEC cannot be 8517 * replied for the Open cases that use a component. This can 8518 * be identified by the fact that the file handle's type is VDIR. 8519 */ 8520 bool 8521 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, __enum_uint8(vtype) vtyp) 8522 { 8523 8524 if ((nd->nd_flag & ND_NFSV4) == 0) 8525 return (true); 8526 8527 if ((nd->nd_flag & ND_LASTOP) != 0) 8528 return (false); 8529 8530 if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH || 8531 nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH || 8532 nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP || 8533 nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME) 8534 return (false); 8535 if (nextop == NFSV4OP_OPEN && vtyp == VDIR) 8536 return (false); 8537 return (true); 8538 } 8539 8540 /* 8541 * Check DSs marked no space. 8542 */ 8543 void 8544 nfsrv_checknospc(void) 8545 { 8546 struct statfs *tsf; 8547 struct nfsdevice *ds; 8548 struct vnode **dvpp, **tdvpp, *dvp; 8549 char *devid, *tdevid; 8550 int cnt, error = 0, i; 8551 8552 if (nfsrv_devidcnt <= 0) 8553 return; 8554 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 8555 devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK); 8556 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 8557 8558 /* Get an array of the dvps for the DSs. */ 8559 tdvpp = dvpp; 8560 tdevid = devid; 8561 i = 0; 8562 NFSDDSLOCK(); 8563 /* First, search for matches for same file system. */ 8564 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 8565 if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) { 8566 if (++i > nfsrv_devidcnt) 8567 break; 8568 *tdvpp++ = ds->nfsdev_dvp; 8569 NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID); 8570 tdevid += NFSX_V4DEVICEID; 8571 } 8572 } 8573 NFSDDSUNLOCK(); 8574 8575 /* Do a VFS_STATFS() for each of the DSs and clear no space. */ 8576 cnt = i; 8577 tdvpp = dvpp; 8578 tdevid = devid; 8579 for (i = 0; i < cnt && error == 0; i++) { 8580 dvp = *tdvpp++; 8581 error = VFS_STATFS(dvp->v_mount, tsf); 8582 if (error == 0 && tsf->f_bavail > 0) { 8583 NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n"); 8584 nfsrv_marknospc(tdevid, false); 8585 } 8586 tdevid += NFSX_V4DEVICEID; 8587 } 8588 free(tsf, M_TEMP); 8589 free(dvpp, M_TEMP); 8590 free(devid, M_TEMP); 8591 } 8592 8593 /* 8594 * Return the correct ACL support value for a vnode. 8595 */ 8596 int 8597 nfs_supportsacls(struct vnode *vp) 8598 { 8599 8600 if (nfs_supportsnfsv4acls(vp) != 0) 8601 return (SUPPACL_NFSV4); 8602 else if (nfs_supportsposixacls(vp) != 0) 8603 return (SUPPACL_POSIX); 8604 return (SUPPACL_NONE); 8605 } 8606 8607 /* 8608 * Initialize everything that needs to be initialized for a vnet. 8609 */ 8610 static void 8611 nfsrv_vnetinit(const void *unused __unused) 8612 { 8613 8614 nfsd_mntinit(); 8615 } 8616 VNET_SYSINIT(nfsrv_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_ANY, 8617 nfsrv_vnetinit, NULL); 8618 8619 /* 8620 * Clean up everything that is in a vnet and needs to be 8621 * done when the jail is destroyed or the module unloaded. 8622 */ 8623 static void 8624 nfsrv_cleanup(const void *unused __unused) 8625 { 8626 int i; 8627 8628 NFSD_LOCK(); 8629 if (!VNET(nfsrv_mntinited)) { 8630 NFSD_UNLOCK(); 8631 return; 8632 } 8633 VNET(nfsrv_mntinited) = false; 8634 NFSD_UNLOCK(); 8635 8636 /* Clean out all NFSv4 state. */ 8637 nfsrv_throwawayallstate(curthread); 8638 8639 /* Clean the NFS server reply cache */ 8640 nfsrvd_cleancache(); 8641 8642 /* Clean out v4root exports. */ 8643 if (VNET(nfsv4root_mnt)->mnt_export != NULL) { 8644 vfs_free_addrlist(VNET(nfsv4root_mnt)->mnt_export); 8645 free(VNET(nfsv4root_mnt)->mnt_export, M_MOUNT); 8646 VNET(nfsv4root_mnt)->mnt_export = NULL; 8647 } 8648 8649 /* Free up the krpc server pool. */ 8650 if (VNET(nfsrvd_pool) != NULL) 8651 svcpool_destroy(VNET(nfsrvd_pool)); 8652 8653 /* and get rid of the locks */ 8654 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 8655 mtx_destroy(&VNET(nfsrchash_table)[i].mtx); 8656 mtx_destroy(&VNET(nfsrcahash_table)[i].mtx); 8657 } 8658 mtx_destroy(&VNET(nfsv4root_mnt)->mnt_mtx); 8659 for (i = 0; i < nfsrv_sessionhashsize; i++) 8660 mtx_destroy(&VNET(nfssessionhash)[i].mtx); 8661 lockdestroy(&VNET(nfsv4root_mnt)->mnt_explock); 8662 free(VNET(nfsrvudphashtbl), M_NFSRVCACHE); 8663 free(VNET(nfsrchash_table), M_NFSRVCACHE); 8664 free(VNET(nfsrcahash_table), M_NFSRVCACHE); 8665 free(VNET(nfsclienthash), M_NFSDCLIENT); 8666 free(VNET(nfslockhash), M_NFSDLOCKFILE); 8667 free(VNET(nfssessionhash), M_NFSDSESSION); 8668 free(VNET(nfsv4root_mnt), M_TEMP); 8669 VNET(nfsv4root_mnt) = NULL; 8670 } 8671 VNET_SYSUNINIT(nfsrv_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY, 8672 nfsrv_cleanup, NULL); 8673 8674 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 8675 8676 /* 8677 * Called once to initialize data structures... 8678 */ 8679 static int 8680 nfsd_modevent(module_t mod, int type, void *data) 8681 { 8682 int error = 0, i; 8683 static int loaded = 0; 8684 8685 switch (type) { 8686 case MOD_LOAD: 8687 if (loaded) 8688 goto out; 8689 newnfs_portinit(); 8690 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 8691 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 8692 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 8693 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 8694 #ifdef VV_DISABLEDELEG 8695 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 8696 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 8697 #endif 8698 nfsd_call_nfsd = nfssvc_nfsd; 8699 loaded = 1; 8700 break; 8701 8702 case MOD_UNLOAD: 8703 if (newnfs_numnfsd != 0) { 8704 error = EBUSY; 8705 break; 8706 } 8707 8708 #ifdef VV_DISABLEDELEG 8709 vn_deleg_ops.vndeleg_recall = NULL; 8710 vn_deleg_ops.vndeleg_disable = NULL; 8711 #endif 8712 nfsd_call_nfsd = NULL; 8713 mtx_destroy(&nfsrc_udpmtx); 8714 mtx_destroy(&nfs_v4root_mutex); 8715 mtx_destroy(&nfsrv_dontlistlock_mtx); 8716 mtx_destroy(&nfsrv_recalllock_mtx); 8717 if (nfslayouthash != NULL) { 8718 for (i = 0; i < nfsrv_layouthashsize; i++) 8719 mtx_destroy(&nfslayouthash[i].mtx); 8720 free(nfslayouthash, M_NFSDSESSION); 8721 } 8722 loaded = 0; 8723 break; 8724 default: 8725 error = EOPNOTSUPP; 8726 break; 8727 } 8728 8729 out: 8730 NFSEXITCODE(error); 8731 return (error); 8732 } 8733 static moduledata_t nfsd_mod = { 8734 "nfsd", 8735 nfsd_modevent, 8736 NULL, 8737 }; 8738 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 8739 8740 /* So that loader and kldload(2) can find us, wherever we are.. */ 8741 MODULE_VERSION(nfsd, 1); 8742 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 8743 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 8744 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 8745 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 8746