1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/capsicum.h> 37 #include <sys/extattr.h> 38 39 /* 40 * Functions that perform the vfs operations required by the routines in 41 * nfsd_serv.c. It is hoped that this change will make the server more 42 * portable. 43 */ 44 45 #include <fs/nfs/nfsport.h> 46 #include <security/mac/mac_framework.h> 47 #include <sys/callout.h> 48 #include <sys/filio.h> 49 #include <sys/hash.h> 50 #include <sys/osd.h> 51 #include <sys/sysctl.h> 52 #include <nlm/nlm_prot.h> 53 #include <nlm/nlm.h> 54 #include <vm/vm_param.h> 55 #include <vm/vnode_pager.h> 56 57 FEATURE(nfsd, "NFSv4 server"); 58 59 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 60 extern int nfsrv_useacl; 61 extern int newnfs_numnfsd; 62 extern int nfsrv_sessionhashsize; 63 extern struct nfslayouthash *nfslayouthash; 64 extern int nfsrv_layouthashsize; 65 extern struct mtx nfsrv_dslock_mtx; 66 extern int nfs_pnfsiothreads; 67 extern volatile int nfsrv_devidcnt; 68 extern int nfsrv_maxpnfsmirror; 69 extern uint32_t nfs_srvmaxio; 70 extern int nfs_bufpackets; 71 extern u_long sb_max_adj; 72 73 NFSD_VNET_DECLARE(int, nfsrv_numnfsd); 74 NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); 75 NFSD_VNET_DECLARE(SVCPOOL *, nfsrvd_pool); 76 NFSD_VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); 77 NFSD_VNET_DECLARE(struct nfslockhashhead *, nfslockhash); 78 NFSD_VNET_DECLARE(struct nfssessionhash *, nfssessionhash); 79 NFSD_VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock); 80 NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); 81 82 NFSDLOCKMUTEX; 83 NFSSTATESPINLOCK; 84 struct mtx nfsrc_udpmtx; 85 struct mtx nfs_v4root_mutex; 86 struct mtx nfsrv_dontlistlock_mtx; 87 struct mtx nfsrv_recalllock_mtx; 88 struct nfsrvfh nfs_pubfh; 89 int nfs_pubfhset = 0; 90 int nfsd_debuglevel = 0; 91 static pid_t nfsd_master_pid = (pid_t)-1; 92 static char nfsd_master_comm[MAXCOMLEN + 1]; 93 static struct timeval nfsd_master_start; 94 static uint32_t nfsv4_sysid = 0; 95 static fhandle_t zerofh; 96 97 NFSD_VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL; 98 NFSD_VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl); 99 NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table); 100 NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table); 101 NFSD_VNET_DEFINE(struct nfsrvfh, nfs_rootfh); 102 NFSD_VNET_DEFINE(int, nfs_rootfhset) = 0; 103 NFSD_VNET_DEFINE(struct callout, nfsd_callout); 104 NFSD_VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt); 105 NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt); 106 NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt); 107 NFSD_VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false; 108 NFSD_VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false; 109 110 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 111 struct ucred *); 112 static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *); 113 114 int nfsrv_enable_crossmntpt = 1; 115 static int nfs_commit_blks; 116 static int nfs_commit_miss; 117 extern int nfsrv_issuedelegs; 118 extern int nfsrv_dolocallocks; 119 extern struct nfsdevicehead nfsrv_devidhead; 120 121 /* Map d_type to vnode type. */ 122 static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR, 123 VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON }; 124 #define NFS_DTYPETOVTYPE(t) ((t) <= DT_WHT ? dtype_to_vnode[(t)] : VNON) 125 126 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, 127 struct iovec **); 128 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, 129 struct mbuf **, struct iovec **); 130 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, 131 int *); 132 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 133 NFSPROC_T *); 134 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 135 int *, char *, fhandle_t *); 136 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 137 NFSPROC_T *); 138 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, 139 struct thread *, int, struct mbuf **, char *, struct mbuf **, 140 struct nfsvattr *, struct acl *, off_t *, int, bool *); 141 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 142 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 143 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 144 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 145 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 146 char *, int *); 147 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 148 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 149 static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 150 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 151 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 152 struct vnode *, struct nfsmount **, int, struct acl *, int *); 153 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 154 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 155 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 156 struct vnode *, struct nfsmount *, struct nfsvattr *); 157 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, 158 NFSPROC_T *, struct nfsmount *); 159 static int nfsrv_putfhname(fhandle_t *, char *); 160 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 161 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 162 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, 163 struct vnode *, NFSPROC_T *); 164 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 165 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 166 NFSPROC_T *); 167 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); 168 169 int nfs_pnfsio(task_fn_t *, void *); 170 171 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 172 "NFS server"); 173 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 174 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 175 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 176 0, ""); 177 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 178 0, ""); 179 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 180 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 181 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 182 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 183 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 184 0, "Debug level for NFS server"); 185 NFSD_VNET_DECLARE(int, nfsd_enable_stringtouid); 186 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, 187 CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_enable_stringtouid), 188 0, "Enable nfsd to accept numeric owner_names"); 189 static int nfsrv_pnfsgetdsattr = 1; 190 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 191 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 192 193 /* 194 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 195 * not running. 196 * The dsN subdirectories for the increased values must have been created 197 * on all DS servers before this increase is done. 198 */ 199 u_int nfsrv_dsdirsize = 20; 200 static int 201 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 202 { 203 int error, newdsdirsize; 204 205 newdsdirsize = nfsrv_dsdirsize; 206 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 207 if (error != 0 || req->newptr == NULL) 208 return (error); 209 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 210 newnfs_numnfsd != 0) 211 return (EINVAL); 212 nfsrv_dsdirsize = newdsdirsize; 213 return (0); 214 } 215 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, 216 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), 217 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); 218 219 /* 220 * nfs_srvmaxio can only be increased and only when the nfsd threads are 221 * not running. The setting must be a power of 2, with the current limit of 222 * 1Mbyte. 223 */ 224 static int 225 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS) 226 { 227 int error; 228 u_int newsrvmaxio; 229 uint64_t tval; 230 231 newsrvmaxio = nfs_srvmaxio; 232 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req); 233 if (error != 0 || req->newptr == NULL) 234 return (error); 235 if (newsrvmaxio == nfs_srvmaxio) 236 return (0); 237 if (newsrvmaxio < nfs_srvmaxio) { 238 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n"); 239 return (EINVAL); 240 } 241 if (newsrvmaxio > 1048576) { 242 printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n"); 243 return (EINVAL); 244 } 245 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) { 246 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n"); 247 return (EINVAL); 248 } 249 250 /* 251 * Check that kern.ipc.maxsockbuf is large enough for 252 * newsrviomax, given the setting of vfs.nfs.bufpackets. 253 */ 254 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets > 255 sb_max_adj) { 256 /* 257 * Suggest vfs.nfs.bufpackets * maximum RPC message for 258 * sb_max_adj. 259 */ 260 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets; 261 262 /* 263 * Convert suggested sb_max_adj value to a suggested 264 * sb_max value, which is what is set via kern.ipc.maxsockbuf. 265 * Perform the inverse calculation of (from uipc_sockbuf.c): 266 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES / 267 * (MSIZE + MCLBYTES); 268 * XXX If the calculation of sb_max_adj from sb_max changes, 269 * this calculation must be changed as well. 270 */ 271 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */ 272 tval += MCLBYTES - 1; /* Round up divide. */ 273 tval /= MCLBYTES; 274 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of " 275 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval, 276 newsrvmaxio); 277 return (EINVAL); 278 } 279 280 NFSD_LOCK(); 281 if (newnfs_numnfsd != 0) { 282 NFSD_UNLOCK(); 283 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd " 284 "threads are running\n"); 285 return (EINVAL); 286 } 287 288 289 nfs_srvmaxio = newsrvmaxio; 290 NFSD_UNLOCK(); 291 return (0); 292 } 293 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio, 294 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 295 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes"); 296 297 #define MAX_REORDERED_RPC 16 298 #define NUM_HEURISTIC 1031 299 #define NHUSE_INIT 64 300 #define NHUSE_INC 16 301 #define NHUSE_MAX 2048 302 303 static struct nfsheur { 304 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 305 off_t nh_nextoff; /* next offset for sequential detection */ 306 int nh_use; /* use count for selection */ 307 int nh_seqcount; /* heuristic */ 308 } nfsheur[NUM_HEURISTIC]; 309 310 /* 311 * Heuristic to detect sequential operation. 312 */ 313 static struct nfsheur * 314 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 315 { 316 struct nfsheur *nh; 317 int hi, try; 318 319 /* Locate best candidate. */ 320 try = 32; 321 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 322 nh = &nfsheur[hi]; 323 while (try--) { 324 if (nfsheur[hi].nh_vp == vp) { 325 nh = &nfsheur[hi]; 326 break; 327 } 328 if (nfsheur[hi].nh_use > 0) 329 --nfsheur[hi].nh_use; 330 hi = (hi + 1) % NUM_HEURISTIC; 331 if (nfsheur[hi].nh_use < nh->nh_use) 332 nh = &nfsheur[hi]; 333 } 334 335 /* Initialize hint if this is a new file. */ 336 if (nh->nh_vp != vp) { 337 nh->nh_vp = vp; 338 nh->nh_nextoff = uio->uio_offset; 339 nh->nh_use = NHUSE_INIT; 340 if (uio->uio_offset == 0) 341 nh->nh_seqcount = 4; 342 else 343 nh->nh_seqcount = 1; 344 } 345 346 /* Calculate heuristic. */ 347 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 348 uio->uio_offset == nh->nh_nextoff) { 349 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 350 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 351 if (nh->nh_seqcount > IO_SEQMAX) 352 nh->nh_seqcount = IO_SEQMAX; 353 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 354 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 355 /* Probably a reordered RPC, leave seqcount alone. */ 356 } else if (nh->nh_seqcount > 1) { 357 nh->nh_seqcount /= 2; 358 } else { 359 nh->nh_seqcount = 0; 360 } 361 nh->nh_use += NHUSE_INC; 362 if (nh->nh_use > NHUSE_MAX) 363 nh->nh_use = NHUSE_MAX; 364 return (nh); 365 } 366 367 /* 368 * Get attributes into nfsvattr structure. 369 */ 370 int 371 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 372 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 373 nfsattrbit_t *attrbitp) 374 { 375 int error, gotattr, lockedit = 0; 376 struct nfsvattr na; 377 378 if (vpislocked == 0) { 379 /* 380 * When vpislocked == 0, the vnode is either exclusively 381 * locked by this thread or not locked by this thread. 382 * As such, shared lock it, if not exclusively locked. 383 */ 384 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 385 lockedit = 1; 386 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 387 } 388 } 389 390 /* 391 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed 392 * attributes, as required. 393 * This needs to be done for regular files if: 394 * - non-NFSv4 RPCs or 395 * - when attrbitp == NULL or 396 * - an NFSv4 RPC with any of the above attributes in attrbitp. 397 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 398 * these attributes. nfsrv_proxyds() will return an error if the 399 * server is not a pNFS one. 400 */ 401 gotattr = 0; 402 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 403 (nd->nd_flag & ND_NFSV4) == 0 || 404 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 405 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 406 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 407 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || 408 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { 409 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, 410 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, 411 NULL); 412 if (error == 0) 413 gotattr = 1; 414 } 415 416 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 417 if (lockedit != 0) 418 NFSVOPUNLOCK(vp); 419 420 /* 421 * If we got the Change, Size and Modify Time from the DS, 422 * replace them. 423 */ 424 if (gotattr != 0) { 425 nvap->na_atime = na.na_atime; 426 nvap->na_mtime = na.na_mtime; 427 nvap->na_filerev = na.na_filerev; 428 nvap->na_size = na.na_size; 429 nvap->na_bytes = na.na_bytes; 430 } 431 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 432 error, (uintmax_t)na.na_filerev); 433 434 NFSEXITCODE(error); 435 return (error); 436 } 437 438 /* 439 * Get a file handle for a vnode. 440 */ 441 int 442 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 443 { 444 int error; 445 446 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 447 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 448 error = VOP_VPTOFH(vp, &fhp->fh_fid); 449 450 NFSEXITCODE(error); 451 return (error); 452 } 453 454 /* 455 * Perform access checking for vnodes obtained from file handles that would 456 * refer to files already opened by a Unix client. You cannot just use 457 * vn_writechk() and VOP_ACCESSX() for two reasons. 458 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 459 * case. 460 * 2 - The owner is to be given access irrespective of mode bits for some 461 * operations, so that processes that chmod after opening a file don't 462 * break. 463 */ 464 int 465 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 466 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 467 u_int32_t *supportedtypep) 468 { 469 struct vattr vattr; 470 int error = 0, getret = 0; 471 472 if (vpislocked == 0) { 473 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 474 error = EPERM; 475 goto out; 476 } 477 } 478 if (accmode & VWRITE) { 479 /* Just vn_writechk() changed to check rdonly */ 480 /* 481 * Disallow write attempts on read-only file systems; 482 * unless the file is a socket or a block or character 483 * device resident on the file system. 484 */ 485 if (NFSVNO_EXRDONLY(exp) || 486 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 487 switch (vp->v_type) { 488 case VREG: 489 case VDIR: 490 case VLNK: 491 error = EROFS; 492 default: 493 break; 494 } 495 } 496 /* 497 * If there's shared text associated with 498 * the inode, try to free it up once. If 499 * we fail, we can't allow writing. 500 */ 501 if (VOP_IS_TEXT(vp) && error == 0) 502 error = ETXTBSY; 503 } 504 if (error != 0) { 505 if (vpislocked == 0) 506 NFSVOPUNLOCK(vp); 507 goto out; 508 } 509 510 /* 511 * Should the override still be applied when ACLs are enabled? 512 */ 513 error = VOP_ACCESSX(vp, accmode, cred, p); 514 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 515 /* 516 * Try again with VEXPLICIT_DENY, to see if the test for 517 * deletion is supported. 518 */ 519 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 520 if (error == 0) { 521 if (vp->v_type == VDIR) { 522 accmode &= ~(VDELETE | VDELETE_CHILD); 523 accmode |= VWRITE; 524 error = VOP_ACCESSX(vp, accmode, cred, p); 525 } else if (supportedtypep != NULL) { 526 *supportedtypep &= ~NFSACCESS_DELETE; 527 } 528 } 529 } 530 531 /* 532 * Allow certain operations for the owner (reads and writes 533 * on files that are already open). 534 */ 535 if (override != NFSACCCHK_NOOVERRIDE && 536 (error == EPERM || error == EACCES)) { 537 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 538 error = 0; 539 else if (override & NFSACCCHK_ALLOWOWNER) { 540 getret = VOP_GETATTR(vp, &vattr, cred); 541 if (getret == 0 && cred->cr_uid == vattr.va_uid) 542 error = 0; 543 } 544 } 545 if (vpislocked == 0) 546 NFSVOPUNLOCK(vp); 547 548 out: 549 NFSEXITCODE(error); 550 return (error); 551 } 552 553 /* 554 * Set attribute(s) vnop. 555 */ 556 int 557 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 558 struct thread *p, struct nfsexstuff *exp) 559 { 560 u_quad_t savsize = 0; 561 int error, savedit; 562 time_t savbtime; 563 564 /* 565 * If this is an exported file system and a pNFS service is running, 566 * don't VOP_SETATTR() of size for the MDS file system. 567 */ 568 savedit = 0; 569 error = 0; 570 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && 571 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && 572 nvap->na_vattr.va_size > 0) { 573 savsize = nvap->na_vattr.va_size; 574 nvap->na_vattr.va_size = VNOVAL; 575 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 576 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 577 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 578 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 579 nvap->na_vattr.va_mtime.tv_sec != VNOVAL) 580 savedit = 1; 581 else 582 savedit = 2; 583 } 584 if (savedit != 2) 585 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 586 if (savedit != 0) 587 nvap->na_vattr.va_size = savsize; 588 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 589 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 590 nvap->na_vattr.va_size != VNOVAL || 591 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 592 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 593 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 594 /* Never modify birthtime on a DS file. */ 595 savbtime = nvap->na_vattr.va_birthtime.tv_sec; 596 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; 597 /* For a pNFS server, set the attributes on the DS file. */ 598 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, 599 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); 600 nvap->na_vattr.va_birthtime.tv_sec = savbtime; 601 if (error == ENOENT) 602 error = 0; 603 } 604 NFSEXITCODE(error); 605 return (error); 606 } 607 608 /* 609 * Set up nameidata for a lookup() call and do it. 610 */ 611 int 612 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 613 struct vnode *dp, int islocked, struct nfsexstuff *exp, 614 struct vnode **retdirp) 615 { 616 struct componentname *cnp = &ndp->ni_cnd; 617 int i; 618 struct iovec aiov; 619 struct uio auio; 620 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 621 int error = 0; 622 char *cp; 623 624 *retdirp = NULL; 625 cnp->cn_nameptr = cnp->cn_pnbuf; 626 ndp->ni_lcf = 0; 627 /* 628 * Extract and set starting directory. 629 */ 630 if (dp->v_type != VDIR) { 631 if (islocked) 632 vput(dp); 633 else 634 vrele(dp); 635 nfsvno_relpathbuf(ndp); 636 error = ENOTDIR; 637 goto out1; 638 } 639 if (islocked) 640 NFSVOPUNLOCK(dp); 641 VREF(dp); 642 *retdirp = dp; 643 if (NFSVNO_EXRDONLY(exp)) 644 cnp->cn_flags |= RDONLY; 645 ndp->ni_segflg = UIO_SYSSPACE; 646 647 if (nd->nd_flag & ND_PUBLOOKUP) { 648 ndp->ni_loopcnt = 0; 649 if (cnp->cn_pnbuf[0] == '/') { 650 vrele(dp); 651 /* 652 * Check for degenerate pathnames here, since lookup() 653 * panics on them. 654 */ 655 for (i = 1; i < ndp->ni_pathlen; i++) 656 if (cnp->cn_pnbuf[i] != '/') 657 break; 658 if (i == ndp->ni_pathlen) { 659 error = NFSERR_ACCES; 660 goto out; 661 } 662 dp = rootvnode; 663 VREF(dp); 664 } 665 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 666 (nd->nd_flag & ND_NFSV4) == 0) { 667 /* 668 * Only cross mount points for NFSv4 when doing a 669 * mount while traversing the file system above 670 * the mount point, unless nfsrv_enable_crossmntpt is set. 671 */ 672 cnp->cn_flags |= NOCROSSMOUNT; 673 } 674 675 /* 676 * Initialize for scan, set ni_startdir and bump ref on dp again 677 * because lookup() will dereference ni_startdir. 678 */ 679 680 ndp->ni_startdir = dp; 681 ndp->ni_rootdir = rootvnode; 682 ndp->ni_topdir = NULL; 683 684 if (!lockleaf) 685 cnp->cn_flags |= LOCKLEAF; 686 for (;;) { 687 cnp->cn_nameptr = cnp->cn_pnbuf; 688 /* 689 * Call lookup() to do the real work. If an error occurs, 690 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 691 * we do not have to dereference anything before returning. 692 * In either case ni_startdir will be dereferenced and NULLed 693 * out. 694 */ 695 error = vfs_lookup(ndp); 696 if (error) 697 break; 698 699 /* 700 * Check for encountering a symbolic link. Trivial 701 * termination occurs if no symlink encountered. 702 */ 703 if ((cnp->cn_flags & ISSYMLINK) == 0) { 704 if (ndp->ni_vp && !lockleaf) 705 NFSVOPUNLOCK(ndp->ni_vp); 706 break; 707 } 708 709 /* 710 * Validate symlink 711 */ 712 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 713 NFSVOPUNLOCK(ndp->ni_dvp); 714 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 715 error = EINVAL; 716 goto badlink2; 717 } 718 719 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 720 error = ELOOP; 721 goto badlink2; 722 } 723 if (ndp->ni_pathlen > 1) 724 cp = uma_zalloc(namei_zone, M_WAITOK); 725 else 726 cp = cnp->cn_pnbuf; 727 aiov.iov_base = cp; 728 aiov.iov_len = MAXPATHLEN; 729 auio.uio_iov = &aiov; 730 auio.uio_iovcnt = 1; 731 auio.uio_offset = 0; 732 auio.uio_rw = UIO_READ; 733 auio.uio_segflg = UIO_SYSSPACE; 734 auio.uio_td = NULL; 735 auio.uio_resid = MAXPATHLEN; 736 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 737 if (error) { 738 badlink1: 739 if (ndp->ni_pathlen > 1) 740 uma_zfree(namei_zone, cp); 741 badlink2: 742 vrele(ndp->ni_dvp); 743 vput(ndp->ni_vp); 744 break; 745 } 746 linklen = MAXPATHLEN - auio.uio_resid; 747 if (linklen == 0) { 748 error = ENOENT; 749 goto badlink1; 750 } 751 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 752 error = ENAMETOOLONG; 753 goto badlink1; 754 } 755 756 /* 757 * Adjust or replace path 758 */ 759 if (ndp->ni_pathlen > 1) { 760 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 761 uma_zfree(namei_zone, cnp->cn_pnbuf); 762 cnp->cn_pnbuf = cp; 763 } else 764 cnp->cn_pnbuf[linklen] = '\0'; 765 ndp->ni_pathlen += linklen; 766 767 /* 768 * Cleanup refs for next loop and check if root directory 769 * should replace current directory. Normally ni_dvp 770 * becomes the new base directory and is cleaned up when 771 * we loop. Explicitly null pointers after invalidation 772 * to clarify operation. 773 */ 774 vput(ndp->ni_vp); 775 ndp->ni_vp = NULL; 776 777 if (cnp->cn_pnbuf[0] == '/') { 778 vrele(ndp->ni_dvp); 779 ndp->ni_dvp = ndp->ni_rootdir; 780 VREF(ndp->ni_dvp); 781 } 782 ndp->ni_startdir = ndp->ni_dvp; 783 ndp->ni_dvp = NULL; 784 } 785 if (!lockleaf) 786 cnp->cn_flags &= ~LOCKLEAF; 787 788 out: 789 if (error) { 790 nfsvno_relpathbuf(ndp); 791 ndp->ni_vp = NULL; 792 ndp->ni_dvp = NULL; 793 ndp->ni_startdir = NULL; 794 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 795 ndp->ni_dvp = NULL; 796 } 797 798 out1: 799 NFSEXITCODE2(error, nd); 800 return (error); 801 } 802 803 /* 804 * Set up a pathname buffer and return a pointer to it and, optionally 805 * set a hash pointer. 806 */ 807 void 808 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 809 { 810 struct componentname *cnp = &ndp->ni_cnd; 811 812 cnp->cn_flags |= (NOMACCHECK); 813 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 814 if (hashpp != NULL) 815 *hashpp = NULL; 816 *bufpp = cnp->cn_pnbuf; 817 } 818 819 /* 820 * Release the above path buffer, if not released by nfsvno_namei(). 821 */ 822 void 823 nfsvno_relpathbuf(struct nameidata *ndp) 824 { 825 826 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 827 ndp->ni_cnd.cn_pnbuf = NULL; 828 } 829 830 /* 831 * Readlink vnode op into an mbuf list. 832 */ 833 int 834 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, 835 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 836 { 837 struct iovec *iv; 838 struct uio io, *uiop = &io; 839 struct mbuf *mp, *mp3; 840 int len, tlen, error = 0; 841 842 len = NFS_MAXPATHLEN; 843 if (maxextsiz > 0) 844 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 845 &mp3, &mp, &iv); 846 else 847 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); 848 uiop->uio_iov = iv; 849 uiop->uio_offset = 0; 850 uiop->uio_resid = len; 851 uiop->uio_rw = UIO_READ; 852 uiop->uio_segflg = UIO_SYSSPACE; 853 uiop->uio_td = NULL; 854 error = VOP_READLINK(vp, uiop, cred); 855 free(iv, M_TEMP); 856 if (error) { 857 m_freem(mp3); 858 *lenp = 0; 859 goto out; 860 } 861 if (uiop->uio_resid > 0) { 862 len -= uiop->uio_resid; 863 tlen = NFSM_RNDUP(len); 864 if (tlen == 0) { 865 m_freem(mp3); 866 mp3 = mp = NULL; 867 } else if (tlen != NFS_MAXPATHLEN || tlen != len) 868 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, 869 tlen - len); 870 } 871 *lenp = len; 872 *mpp = mp3; 873 *mpendp = mp; 874 875 out: 876 NFSEXITCODE(error); 877 return (error); 878 } 879 880 /* 881 * Create an mbuf chain and an associated iovec that can be used to Read 882 * or Getextattr of data. 883 * Upon success, return pointers to the first and last mbufs in the chain 884 * plus the malloc'd iovec and its iovlen. 885 */ 886 static int 887 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, 888 struct iovec **ivp) 889 { 890 struct mbuf *m, *m2 = NULL, *m3; 891 struct iovec *iv; 892 int i, left, siz; 893 894 left = len; 895 m3 = NULL; 896 /* 897 * Generate the mbuf list with the uio_iov ref. to it. 898 */ 899 i = 0; 900 while (left > 0) { 901 NFSMGET(m); 902 MCLGET(m, M_WAITOK); 903 m->m_len = 0; 904 siz = min(M_TRAILINGSPACE(m), left); 905 left -= siz; 906 i++; 907 if (m3) 908 m2->m_next = m; 909 else 910 m3 = m; 911 m2 = m; 912 } 913 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 914 m = m3; 915 left = len; 916 i = 0; 917 while (left > 0) { 918 if (m == NULL) 919 panic("nfsrv_createiovec iov"); 920 siz = min(M_TRAILINGSPACE(m), left); 921 if (siz > 0) { 922 iv->iov_base = mtod(m, caddr_t) + m->m_len; 923 iv->iov_len = siz; 924 m->m_len += siz; 925 left -= siz; 926 iv++; 927 i++; 928 } 929 m = m->m_next; 930 } 931 *mpp = m3; 932 *mpendp = m2; 933 return (i); 934 } 935 936 /* 937 * Create an mbuf chain and an associated iovec that can be used to Read 938 * or Getextattr of data. 939 * Upon success, return pointers to the first and last mbufs in the chain 940 * plus the malloc'd iovec and its iovlen. 941 * Same as above, but creates ext_pgs mbuf(s). 942 */ 943 static int 944 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, 945 struct mbuf **mpendp, struct iovec **ivp) 946 { 947 struct mbuf *m, *m2 = NULL, *m3; 948 struct iovec *iv; 949 int i, left, pgno, siz; 950 951 left = len; 952 m3 = NULL; 953 /* 954 * Generate the mbuf list with the uio_iov ref. to it. 955 */ 956 i = 0; 957 while (left > 0) { 958 siz = min(left, maxextsiz); 959 m = mb_alloc_ext_plus_pages(siz, M_WAITOK); 960 left -= siz; 961 i += m->m_epg_npgs; 962 if (m3 != NULL) 963 m2->m_next = m; 964 else 965 m3 = m; 966 m2 = m; 967 } 968 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 969 m = m3; 970 left = len; 971 i = 0; 972 pgno = 0; 973 while (left > 0) { 974 if (m == NULL) 975 panic("nfsvno_createiovec_extpgs iov"); 976 siz = min(PAGE_SIZE, left); 977 if (siz > 0) { 978 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); 979 iv->iov_len = siz; 980 m->m_len += siz; 981 if (pgno == m->m_epg_npgs - 1) 982 m->m_epg_last_len = siz; 983 left -= siz; 984 iv++; 985 i++; 986 pgno++; 987 } 988 if (pgno == m->m_epg_npgs && left > 0) { 989 m = m->m_next; 990 if (m == NULL) 991 panic("nfsvno_createiovec_extpgs iov"); 992 pgno = 0; 993 } 994 } 995 *mpp = m3; 996 *mpendp = m2; 997 return (i); 998 } 999 1000 /* 1001 * Read vnode op call into mbuf list. 1002 */ 1003 int 1004 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 1005 int maxextsiz, struct thread *p, struct mbuf **mpp, 1006 struct mbuf **mpendp) 1007 { 1008 struct mbuf *m; 1009 struct iovec *iv; 1010 int error = 0, len, tlen, ioflag = 0; 1011 struct mbuf *m3; 1012 struct uio io, *uiop = &io; 1013 struct nfsheur *nh; 1014 1015 /* 1016 * Attempt to read from a DS file. A return of ENOENT implies 1017 * there is no DS file to read. 1018 */ 1019 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 1020 NULL, mpendp, NULL, NULL, NULL, 0, NULL); 1021 if (error != ENOENT) 1022 return (error); 1023 1024 len = NFSM_RNDUP(cnt); 1025 if (maxextsiz > 0) 1026 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 1027 &m3, &m, &iv); 1028 else 1029 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); 1030 uiop->uio_iov = iv; 1031 uiop->uio_offset = off; 1032 uiop->uio_resid = len; 1033 uiop->uio_rw = UIO_READ; 1034 uiop->uio_segflg = UIO_SYSSPACE; 1035 uiop->uio_td = NULL; 1036 nh = nfsrv_sequential_heuristic(uiop, vp); 1037 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 1038 /* XXX KDM make this more systematic? */ 1039 NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid; 1040 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 1041 free(iv, M_TEMP); 1042 if (error) { 1043 m_freem(m3); 1044 *mpp = NULL; 1045 goto out; 1046 } 1047 nh->nh_nextoff = uiop->uio_offset; 1048 tlen = len - uiop->uio_resid; 1049 cnt = cnt < tlen ? cnt : tlen; 1050 tlen = NFSM_RNDUP(cnt); 1051 if (tlen == 0) { 1052 m_freem(m3); 1053 m3 = m = NULL; 1054 } else if (len != tlen || tlen != cnt) 1055 m = nfsrv_adj(m3, len - tlen, tlen - cnt); 1056 *mpp = m3; 1057 *mpendp = m; 1058 1059 out: 1060 NFSEXITCODE(error); 1061 return (error); 1062 } 1063 1064 /* 1065 * Create the iovec for the mbuf chain passed in as an argument. 1066 * The "cp" argument is where the data starts within the first mbuf in 1067 * the chain. It returns the iovec and the iovcnt. 1068 */ 1069 static int 1070 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, 1071 int *iovcntp) 1072 { 1073 struct mbuf *mp; 1074 struct iovec *ivp; 1075 int cnt, i, len; 1076 1077 /* 1078 * Loop through the mbuf chain, counting how many mbufs are a 1079 * part of this write operation, so the iovec size is known. 1080 */ 1081 cnt = 0; 1082 len = retlen; 1083 mp = m; 1084 i = mtod(mp, caddr_t) + mp->m_len - cp; 1085 while (len > 0) { 1086 if (i > 0) { 1087 len -= i; 1088 cnt++; 1089 } 1090 mp = mp->m_next; 1091 if (!mp) { 1092 if (len > 0) 1093 return (EBADRPC); 1094 } else 1095 i = mp->m_len; 1096 } 1097 1098 /* Now, create the iovec. */ 1099 mp = m; 1100 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 1101 M_WAITOK); 1102 *iovcntp = cnt; 1103 i = mtod(mp, caddr_t) + mp->m_len - cp; 1104 len = retlen; 1105 while (len > 0) { 1106 if (mp == NULL) 1107 panic("nfsrv_createiovecw"); 1108 if (i > 0) { 1109 i = min(i, len); 1110 ivp->iov_base = cp; 1111 ivp->iov_len = i; 1112 ivp++; 1113 len -= i; 1114 } 1115 mp = mp->m_next; 1116 if (mp) { 1117 i = mp->m_len; 1118 cp = mtod(mp, caddr_t); 1119 } 1120 } 1121 return (0); 1122 } 1123 1124 /* 1125 * Write vnode op from an mbuf list. 1126 */ 1127 int 1128 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, 1129 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 1130 { 1131 struct iovec *iv; 1132 int cnt, ioflags, error; 1133 struct uio io, *uiop = &io; 1134 struct nfsheur *nh; 1135 1136 /* 1137 * Attempt to write to a DS file. A return of ENOENT implies 1138 * there is no DS file to write. 1139 */ 1140 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, 1141 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); 1142 if (error != ENOENT) { 1143 *stable = NFSWRITE_FILESYNC; 1144 return (error); 1145 } 1146 1147 if (*stable == NFSWRITE_UNSTABLE) 1148 ioflags = IO_NODELOCKED; 1149 else 1150 ioflags = (IO_SYNC | IO_NODELOCKED); 1151 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); 1152 if (error != 0) 1153 return (error); 1154 uiop->uio_iov = iv; 1155 uiop->uio_iovcnt = cnt; 1156 uiop->uio_resid = retlen; 1157 uiop->uio_rw = UIO_WRITE; 1158 uiop->uio_segflg = UIO_SYSSPACE; 1159 NFSUIOPROC(uiop, p); 1160 uiop->uio_offset = off; 1161 nh = nfsrv_sequential_heuristic(uiop, vp); 1162 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 1163 /* XXX KDM make this more systematic? */ 1164 NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 1165 error = VOP_WRITE(vp, uiop, ioflags, cred); 1166 if (error == 0) 1167 nh->nh_nextoff = uiop->uio_offset; 1168 free(iv, M_TEMP); 1169 1170 NFSEXITCODE(error); 1171 return (error); 1172 } 1173 1174 /* 1175 * Common code for creating a regular file (plus special files for V2). 1176 */ 1177 int 1178 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 1179 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 1180 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) 1181 { 1182 u_quad_t tempsize; 1183 int error; 1184 struct thread *p = curthread; 1185 1186 error = nd->nd_repstat; 1187 if (!error && ndp->ni_vp == NULL) { 1188 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 1189 error = VOP_CREATE(ndp->ni_dvp, 1190 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1191 /* For a pNFS server, create the data file on a DS. */ 1192 if (error == 0 && nvap->na_type == VREG) { 1193 /* 1194 * Create a data file on a DS for a pNFS server. 1195 * This function just returns if not 1196 * running a pNFS DS or the creation fails. 1197 */ 1198 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1199 nd->nd_cred, p); 1200 } 1201 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1202 NULL, false); 1203 nfsvno_relpathbuf(ndp); 1204 if (!error) { 1205 if (*exclusive_flagp) { 1206 *exclusive_flagp = 0; 1207 NFSVNO_ATTRINIT(nvap); 1208 nvap->na_atime.tv_sec = cverf[0]; 1209 nvap->na_atime.tv_nsec = cverf[1]; 1210 error = VOP_SETATTR(ndp->ni_vp, 1211 &nvap->na_vattr, nd->nd_cred); 1212 if (error != 0) { 1213 vput(ndp->ni_vp); 1214 ndp->ni_vp = NULL; 1215 error = NFSERR_NOTSUPP; 1216 } 1217 } 1218 } 1219 /* 1220 * NFS V2 Only. nfsrvd_mknod() does this for V3. 1221 * (This implies, just get out on an error.) 1222 */ 1223 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 1224 nvap->na_type == VFIFO) { 1225 if (nvap->na_type == VCHR && rdev == 0xffffffff) 1226 nvap->na_type = VFIFO; 1227 if (nvap->na_type != VFIFO && 1228 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { 1229 nfsvno_relpathbuf(ndp); 1230 vput(ndp->ni_dvp); 1231 goto out; 1232 } 1233 nvap->na_rdev = rdev; 1234 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1235 &ndp->ni_cnd, &nvap->na_vattr); 1236 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1237 NULL, false); 1238 nfsvno_relpathbuf(ndp); 1239 if (error) 1240 goto out; 1241 } else { 1242 nfsvno_relpathbuf(ndp); 1243 vput(ndp->ni_dvp); 1244 error = ENXIO; 1245 goto out; 1246 } 1247 *vpp = ndp->ni_vp; 1248 } else { 1249 /* 1250 * Handle cases where error is already set and/or 1251 * the file exists. 1252 * 1 - clean up the lookup 1253 * 2 - iff !error and na_size set, truncate it 1254 */ 1255 nfsvno_relpathbuf(ndp); 1256 *vpp = ndp->ni_vp; 1257 if (ndp->ni_dvp == *vpp) 1258 vrele(ndp->ni_dvp); 1259 else 1260 vput(ndp->ni_dvp); 1261 if (!error && nvap->na_size != VNOVAL) { 1262 error = nfsvno_accchk(*vpp, VWRITE, 1263 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1264 NFSACCCHK_VPISLOCKED, NULL); 1265 if (!error) { 1266 tempsize = nvap->na_size; 1267 NFSVNO_ATTRINIT(nvap); 1268 nvap->na_size = tempsize; 1269 error = nfsvno_setattr(*vpp, nvap, 1270 nd->nd_cred, p, exp); 1271 } 1272 } 1273 if (error) 1274 vput(*vpp); 1275 } 1276 1277 out: 1278 NFSEXITCODE(error); 1279 return (error); 1280 } 1281 1282 /* 1283 * Do a mknod vnode op. 1284 */ 1285 int 1286 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1287 struct thread *p) 1288 { 1289 int error = 0; 1290 __enum_uint8(vtype) vtyp; 1291 1292 vtyp = nvap->na_type; 1293 /* 1294 * Iff doesn't exist, create it. 1295 */ 1296 if (ndp->ni_vp) { 1297 nfsvno_relpathbuf(ndp); 1298 vput(ndp->ni_dvp); 1299 vrele(ndp->ni_vp); 1300 error = EEXIST; 1301 goto out; 1302 } 1303 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1304 nfsvno_relpathbuf(ndp); 1305 vput(ndp->ni_dvp); 1306 error = NFSERR_BADTYPE; 1307 goto out; 1308 } 1309 if (vtyp == VSOCK) { 1310 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1311 &ndp->ni_cnd, &nvap->na_vattr); 1312 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1313 false); 1314 nfsvno_relpathbuf(ndp); 1315 } else { 1316 if (nvap->na_type != VFIFO && 1317 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { 1318 nfsvno_relpathbuf(ndp); 1319 vput(ndp->ni_dvp); 1320 goto out; 1321 } 1322 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1323 &ndp->ni_cnd, &nvap->na_vattr); 1324 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1325 false); 1326 nfsvno_relpathbuf(ndp); 1327 /* 1328 * Since VOP_MKNOD returns the ni_vp, I can't 1329 * see any reason to do the lookup. 1330 */ 1331 } 1332 1333 out: 1334 NFSEXITCODE(error); 1335 return (error); 1336 } 1337 1338 /* 1339 * Mkdir vnode op. 1340 */ 1341 int 1342 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1343 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1344 { 1345 int error = 0; 1346 1347 if (ndp->ni_vp != NULL) { 1348 if (ndp->ni_dvp == ndp->ni_vp) 1349 vrele(ndp->ni_dvp); 1350 else 1351 vput(ndp->ni_dvp); 1352 vrele(ndp->ni_vp); 1353 nfsvno_relpathbuf(ndp); 1354 error = EEXIST; 1355 goto out; 1356 } 1357 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1358 &nvap->na_vattr); 1359 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); 1360 nfsvno_relpathbuf(ndp); 1361 1362 out: 1363 NFSEXITCODE(error); 1364 return (error); 1365 } 1366 1367 /* 1368 * symlink vnode op. 1369 */ 1370 int 1371 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1372 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1373 struct nfsexstuff *exp) 1374 { 1375 int error = 0; 1376 1377 if (ndp->ni_vp) { 1378 nfsvno_relpathbuf(ndp); 1379 if (ndp->ni_dvp == ndp->ni_vp) 1380 vrele(ndp->ni_dvp); 1381 else 1382 vput(ndp->ni_dvp); 1383 vrele(ndp->ni_vp); 1384 error = EEXIST; 1385 goto out; 1386 } 1387 1388 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1389 &nvap->na_vattr, pathcp); 1390 /* 1391 * Although FreeBSD still had the lookup code in 1392 * it for 7/current, there doesn't seem to be any 1393 * point, since VOP_SYMLINK() returns the ni_vp. 1394 * Just vput it for v2. 1395 */ 1396 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0); 1397 nfsvno_relpathbuf(ndp); 1398 1399 out: 1400 NFSEXITCODE(error); 1401 return (error); 1402 } 1403 1404 /* 1405 * Parse symbolic link arguments. 1406 * This function has an ugly side effect. It will malloc() an area for 1407 * the symlink and set iov_base to point to it, only if it succeeds. 1408 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1409 * be FREE'd later. 1410 */ 1411 int 1412 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1413 struct thread *p, char **pathcpp, int *lenp) 1414 { 1415 u_int32_t *tl; 1416 char *pathcp = NULL; 1417 int error = 0, len; 1418 struct nfsv2_sattr *sp; 1419 1420 *pathcpp = NULL; 1421 *lenp = 0; 1422 if ((nd->nd_flag & ND_NFSV3) && 1423 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1424 goto nfsmout; 1425 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1426 len = fxdr_unsigned(int, *tl); 1427 if (len > NFS_MAXPATHLEN || len <= 0) { 1428 error = EBADRPC; 1429 goto nfsmout; 1430 } 1431 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1432 error = nfsrv_mtostr(nd, pathcp, len); 1433 if (error) 1434 goto nfsmout; 1435 if (nd->nd_flag & ND_NFSV2) { 1436 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1437 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1438 } 1439 *pathcpp = pathcp; 1440 *lenp = len; 1441 NFSEXITCODE2(0, nd); 1442 return (0); 1443 nfsmout: 1444 if (pathcp) 1445 free(pathcp, M_TEMP); 1446 NFSEXITCODE2(error, nd); 1447 return (error); 1448 } 1449 1450 /* 1451 * Remove a non-directory object. 1452 */ 1453 int 1454 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1455 struct thread *p, struct nfsexstuff *exp) 1456 { 1457 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1458 int error = 0, mirrorcnt; 1459 char fname[PNFS_FILENAME_LEN + 1]; 1460 fhandle_t fh; 1461 1462 vp = ndp->ni_vp; 1463 dsdvp[0] = NULL; 1464 if (vp->v_type == VDIR) 1465 error = NFSERR_ISDIR; 1466 else if (is_v4) 1467 error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), 1468 p); 1469 if (error == 0) 1470 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1471 if (!error) 1472 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1473 if (error == 0 && dsdvp[0] != NULL) 1474 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1475 if (ndp->ni_dvp == vp) 1476 vrele(ndp->ni_dvp); 1477 else 1478 vput(ndp->ni_dvp); 1479 vput(vp); 1480 nfsvno_relpathbuf(ndp); 1481 NFSEXITCODE(error); 1482 return (error); 1483 } 1484 1485 /* 1486 * Remove a directory. 1487 */ 1488 int 1489 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1490 struct thread *p, struct nfsexstuff *exp) 1491 { 1492 struct vnode *vp; 1493 int error = 0; 1494 1495 vp = ndp->ni_vp; 1496 if (vp->v_type != VDIR) { 1497 error = ENOTDIR; 1498 goto out; 1499 } 1500 /* 1501 * No rmdir "." please. 1502 */ 1503 if (ndp->ni_dvp == vp) { 1504 error = EINVAL; 1505 goto out; 1506 } 1507 /* 1508 * The root of a mounted filesystem cannot be deleted. 1509 */ 1510 if (vp->v_vflag & VV_ROOT) 1511 error = EBUSY; 1512 out: 1513 if (!error) 1514 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1515 if (ndp->ni_dvp == vp) 1516 vrele(ndp->ni_dvp); 1517 else 1518 vput(ndp->ni_dvp); 1519 vput(vp); 1520 nfsvno_relpathbuf(ndp); 1521 NFSEXITCODE(error); 1522 return (error); 1523 } 1524 1525 /* 1526 * Rename vnode op. 1527 */ 1528 int 1529 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1530 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1531 { 1532 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1533 int error = 0, mirrorcnt; 1534 char fname[PNFS_FILENAME_LEN + 1]; 1535 fhandle_t fh; 1536 1537 dsdvp[0] = NULL; 1538 fvp = fromndp->ni_vp; 1539 if (ndstat) { 1540 vrele(fromndp->ni_dvp); 1541 vrele(fvp); 1542 error = ndstat; 1543 goto out1; 1544 } 1545 tdvp = tondp->ni_dvp; 1546 tvp = tondp->ni_vp; 1547 if (tvp != NULL) { 1548 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1549 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1550 goto out; 1551 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1552 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1553 goto out; 1554 } 1555 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1556 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1557 goto out; 1558 } 1559 1560 /* 1561 * A rename to '.' or '..' results in a prematurely 1562 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1563 * here. 1564 */ 1565 if ((tondp->ni_cnd.cn_namelen == 1 && 1566 tondp->ni_cnd.cn_nameptr[0] == '.') || 1567 (tondp->ni_cnd.cn_namelen == 2 && 1568 tondp->ni_cnd.cn_nameptr[0] == '.' && 1569 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1570 error = EINVAL; 1571 goto out; 1572 } 1573 } 1574 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1575 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1576 goto out; 1577 } 1578 if (fvp->v_mount != tdvp->v_mount) { 1579 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1580 goto out; 1581 } 1582 if (fvp == tdvp) { 1583 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1584 goto out; 1585 } 1586 if (fvp == tvp) { 1587 /* 1588 * If source and destination are the same, there is nothing to 1589 * do. Set error to -1 to indicate this. 1590 */ 1591 error = -1; 1592 goto out; 1593 } 1594 if (ndflag & ND_NFSV4) { 1595 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1596 error = nfsrv_checkremove(fvp, 0, NULL, 1597 (nfsquad_t)((u_quad_t)0), p); 1598 NFSVOPUNLOCK(fvp); 1599 } else 1600 error = EPERM; 1601 if (tvp && !error) 1602 error = nfsrv_checkremove(tvp, 1, NULL, 1603 (nfsquad_t)((u_quad_t)0), p); 1604 } else { 1605 /* 1606 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1607 * that the NFSv4 client won't be confused by the rename. 1608 * Since nfsd_recalldelegation() can only be called on an 1609 * unlocked vnode at this point and fvp is the file that will 1610 * still exist after the rename, just do fvp. 1611 */ 1612 nfsd_recalldelegation(fvp, p); 1613 } 1614 if (error == 0 && tvp != NULL) { 1615 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1616 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1617 " dsdvp=%p\n", dsdvp[0]); 1618 } 1619 out: 1620 if (!error) { 1621 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1622 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1623 &tondp->ni_cnd); 1624 } else { 1625 if (tdvp == tvp) 1626 vrele(tdvp); 1627 else 1628 vput(tdvp); 1629 if (tvp) 1630 vput(tvp); 1631 vrele(fromndp->ni_dvp); 1632 vrele(fvp); 1633 if (error == -1) 1634 error = 0; 1635 } 1636 1637 /* 1638 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1639 * if the rename succeeded, the DS file for the tvp needs to be 1640 * removed. 1641 */ 1642 if (error == 0 && dsdvp[0] != NULL) { 1643 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1644 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1645 } 1646 1647 nfsvno_relpathbuf(tondp); 1648 out1: 1649 nfsvno_relpathbuf(fromndp); 1650 NFSEXITCODE(error); 1651 return (error); 1652 } 1653 1654 /* 1655 * Link vnode op. 1656 */ 1657 int 1658 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1659 struct thread *p, struct nfsexstuff *exp) 1660 { 1661 struct vnode *xp; 1662 int error = 0; 1663 1664 xp = ndp->ni_vp; 1665 if (xp != NULL) { 1666 error = EEXIST; 1667 } else { 1668 xp = ndp->ni_dvp; 1669 if (vp->v_mount != xp->v_mount) 1670 error = EXDEV; 1671 } 1672 if (!error) { 1673 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1674 if (!VN_IS_DOOMED(vp)) 1675 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1676 else 1677 error = EPERM; 1678 if (ndp->ni_dvp == vp) { 1679 vrele(ndp->ni_dvp); 1680 NFSVOPUNLOCK(vp); 1681 } else { 1682 vref(vp); 1683 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true); 1684 } 1685 } else { 1686 if (ndp->ni_dvp == ndp->ni_vp) 1687 vrele(ndp->ni_dvp); 1688 else 1689 vput(ndp->ni_dvp); 1690 if (ndp->ni_vp) 1691 vrele(ndp->ni_vp); 1692 } 1693 nfsvno_relpathbuf(ndp); 1694 NFSEXITCODE(error); 1695 return (error); 1696 } 1697 1698 /* 1699 * Do the fsync() appropriate for the commit. 1700 */ 1701 int 1702 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1703 struct thread *td) 1704 { 1705 int error = 0; 1706 1707 /* 1708 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1709 * file is done. At this time VOP_FSYNC does not accept offset and 1710 * byte count parameters so call VOP_FSYNC the whole file for now. 1711 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1712 * File systems that do not use the buffer cache (as indicated 1713 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1714 */ 1715 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1716 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1717 /* 1718 * Give up and do the whole thing 1719 */ 1720 vnode_pager_clean_sync(vp); 1721 error = VOP_FSYNC(vp, MNT_WAIT, td); 1722 } else { 1723 /* 1724 * Locate and synchronously write any buffers that fall 1725 * into the requested range. Note: we are assuming that 1726 * f_iosize is a power of 2. 1727 */ 1728 int iosize = vp->v_mount->mnt_stat.f_iosize; 1729 int iomask = iosize - 1; 1730 struct bufobj *bo; 1731 daddr_t lblkno; 1732 1733 /* 1734 * Align to iosize boundary, super-align to page boundary. 1735 */ 1736 if (off & iomask) { 1737 cnt += off & iomask; 1738 off &= ~(u_quad_t)iomask; 1739 } 1740 if (off & PAGE_MASK) { 1741 cnt += off & PAGE_MASK; 1742 off &= ~(u_quad_t)PAGE_MASK; 1743 } 1744 lblkno = off / iosize; 1745 1746 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1747 VM_OBJECT_WLOCK(vp->v_object); 1748 vm_object_page_clean(vp->v_object, off, off + cnt, 1749 OBJPC_SYNC); 1750 VM_OBJECT_WUNLOCK(vp->v_object); 1751 } 1752 1753 bo = &vp->v_bufobj; 1754 BO_LOCK(bo); 1755 while (cnt > 0) { 1756 struct buf *bp; 1757 1758 /* 1759 * If we have a buffer and it is marked B_DELWRI we 1760 * have to lock and write it. Otherwise the prior 1761 * write is assumed to have already been committed. 1762 * 1763 * gbincore() can return invalid buffers now so we 1764 * have to check that bit as well (though B_DELWRI 1765 * should not be set if B_INVAL is set there could be 1766 * a race here since we haven't locked the buffer). 1767 */ 1768 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1769 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1770 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1771 BO_LOCK(bo); 1772 continue; /* retry */ 1773 } 1774 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1775 B_DELWRI) { 1776 bremfree(bp); 1777 bp->b_flags &= ~B_ASYNC; 1778 bwrite(bp); 1779 ++nfs_commit_miss; 1780 } else 1781 BUF_UNLOCK(bp); 1782 BO_LOCK(bo); 1783 } 1784 ++nfs_commit_blks; 1785 if (cnt < iosize) 1786 break; 1787 cnt -= iosize; 1788 ++lblkno; 1789 } 1790 BO_UNLOCK(bo); 1791 } 1792 NFSEXITCODE(error); 1793 return (error); 1794 } 1795 1796 /* 1797 * Statfs vnode op. 1798 */ 1799 int 1800 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1801 { 1802 struct statfs *tsf; 1803 int error; 1804 1805 tsf = NULL; 1806 if (nfsrv_devidcnt > 0) { 1807 /* For a pNFS service, get the DS numbers. */ 1808 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1809 error = nfsrv_pnfsstatfs(tsf, vp->v_mount); 1810 if (error != 0) { 1811 free(tsf, M_TEMP); 1812 tsf = NULL; 1813 } 1814 } 1815 error = VFS_STATFS(vp->v_mount, sf); 1816 if (error == 0) { 1817 if (tsf != NULL) { 1818 sf->f_blocks = tsf->f_blocks; 1819 sf->f_bavail = tsf->f_bavail; 1820 sf->f_bfree = tsf->f_bfree; 1821 sf->f_bsize = tsf->f_bsize; 1822 } 1823 /* 1824 * Since NFS handles these values as unsigned on the 1825 * wire, there is no way to represent negative values, 1826 * so set them to 0. Without this, they will appear 1827 * to be very large positive values for clients like 1828 * Solaris10. 1829 */ 1830 if (sf->f_bavail < 0) 1831 sf->f_bavail = 0; 1832 if (sf->f_ffree < 0) 1833 sf->f_ffree = 0; 1834 } 1835 free(tsf, M_TEMP); 1836 NFSEXITCODE(error); 1837 return (error); 1838 } 1839 1840 /* 1841 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1842 * must handle nfsrv_opencheck() calls after any other access checks. 1843 */ 1844 void 1845 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1846 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1847 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1848 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, bool done_namei, 1849 struct nfsexstuff *exp, struct vnode **vpp) 1850 { 1851 struct vnode *vp = NULL; 1852 u_quad_t tempsize; 1853 struct nfsexstuff nes; 1854 struct thread *p = curthread; 1855 uint32_t oldrepstat; 1856 1857 if (ndp->ni_vp == NULL) { 1858 /* 1859 * If nfsrv_opencheck() sets nd_repstat, done_namei needs to be 1860 * set true, since cleanup after nfsvno_namei() is needed. 1861 */ 1862 oldrepstat = nd->nd_repstat; 1863 nd->nd_repstat = nfsrv_opencheck(clientid, 1864 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1865 if (nd->nd_repstat != 0 && oldrepstat == 0) 1866 done_namei = true; 1867 } 1868 if (!nd->nd_repstat) { 1869 if (ndp->ni_vp == NULL) { 1870 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1871 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1872 /* For a pNFS server, create the data file on a DS. */ 1873 if (nd->nd_repstat == 0) { 1874 /* 1875 * Create a data file on a DS for a pNFS server. 1876 * This function just returns if not 1877 * running a pNFS DS or the creation fails. 1878 */ 1879 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1880 cred, p); 1881 } 1882 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ? 1883 &ndp->ni_vp : NULL, false); 1884 nfsvno_relpathbuf(ndp); 1885 if (!nd->nd_repstat) { 1886 if (*exclusive_flagp) { 1887 *exclusive_flagp = 0; 1888 NFSVNO_ATTRINIT(nvap); 1889 nvap->na_atime.tv_sec = cverf[0]; 1890 nvap->na_atime.tv_nsec = cverf[1]; 1891 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1892 &nvap->na_vattr, cred); 1893 if (nd->nd_repstat != 0) { 1894 vput(ndp->ni_vp); 1895 ndp->ni_vp = NULL; 1896 nd->nd_repstat = NFSERR_NOTSUPP; 1897 } else 1898 NFSSETBIT_ATTRBIT(attrbitp, 1899 NFSATTRBIT_TIMEACCESS); 1900 } else { 1901 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1902 aclp, p, attrbitp, exp); 1903 } 1904 } 1905 vp = ndp->ni_vp; 1906 } else { 1907 nfsvno_relpathbuf(ndp); 1908 vp = ndp->ni_vp; 1909 if (create == NFSV4OPEN_CREATE) { 1910 if (ndp->ni_dvp == vp) 1911 vrele(ndp->ni_dvp); 1912 else 1913 vput(ndp->ni_dvp); 1914 } 1915 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1916 if (ndp->ni_cnd.cn_flags & RDONLY) 1917 NFSVNO_SETEXRDONLY(&nes); 1918 else 1919 NFSVNO_EXINIT(&nes); 1920 nd->nd_repstat = nfsvno_accchk(vp, 1921 VWRITE, cred, &nes, p, 1922 NFSACCCHK_NOOVERRIDE, 1923 NFSACCCHK_VPISLOCKED, NULL); 1924 nd->nd_repstat = nfsrv_opencheck(clientid, 1925 stateidp, stp, vp, nd, p, nd->nd_repstat); 1926 if (!nd->nd_repstat) { 1927 tempsize = nvap->na_size; 1928 NFSVNO_ATTRINIT(nvap); 1929 nvap->na_size = tempsize; 1930 nd->nd_repstat = nfsvno_setattr(vp, 1931 nvap, cred, p, exp); 1932 } 1933 } else if (vp->v_type == VREG) { 1934 nd->nd_repstat = nfsrv_opencheck(clientid, 1935 stateidp, stp, vp, nd, p, nd->nd_repstat); 1936 } 1937 } 1938 } else if (done_namei) { 1939 KASSERT(create == NFSV4OPEN_CREATE, 1940 ("nfsvno_open: not create")); 1941 /* 1942 * done_namei is set when nfsvno_namei() has completed 1943 * successfully, but a subsequent error was set in 1944 * nd_repstat. As such, cleanup of the nfsvno_namei() 1945 * results is required. 1946 */ 1947 nfsvno_relpathbuf(ndp); 1948 if (ndp->ni_dvp == ndp->ni_vp) 1949 vrele(ndp->ni_dvp); 1950 else 1951 vput(ndp->ni_dvp); 1952 if (ndp->ni_vp) 1953 vput(ndp->ni_vp); 1954 } 1955 *vpp = vp; 1956 1957 NFSEXITCODE2(0, nd); 1958 } 1959 1960 /* 1961 * Updates the file rev and sets the mtime and ctime 1962 * to the current clock time, returning the va_filerev and va_Xtime 1963 * values. 1964 * Return ESTALE to indicate the vnode is VIRF_DOOMED. 1965 */ 1966 int 1967 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1968 struct nfsrv_descript *nd, struct thread *p) 1969 { 1970 struct vattr va; 1971 1972 VATTR_NULL(&va); 1973 vfs_timestamp(&va.va_mtime); 1974 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1975 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1976 if (VN_IS_DOOMED(vp)) 1977 return (ESTALE); 1978 } 1979 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1980 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1981 return (0); 1982 } 1983 1984 /* 1985 * Glue routine to nfsv4_fillattr(). 1986 */ 1987 int 1988 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1989 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1990 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1991 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1992 { 1993 struct statfs *sf; 1994 int error; 1995 1996 sf = NULL; 1997 if (nfsrv_devidcnt > 0 && 1998 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1999 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 2000 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 2001 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 2002 error = nfsrv_pnfsstatfs(sf, mp); 2003 if (error != 0) { 2004 free(sf, M_TEMP); 2005 sf = NULL; 2006 } 2007 } 2008 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 2009 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 2010 mounted_on_fileno, sf); 2011 free(sf, M_TEMP); 2012 NFSEXITCODE2(0, nd); 2013 return (error); 2014 } 2015 2016 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 2017 /* 2018 * nfs readdir service 2019 * - mallocs what it thinks is enough to read 2020 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 2021 * - calls VOP_READDIR() 2022 * - loops around building the reply 2023 * if the output generated exceeds count break out of loop 2024 * The NFSM_CLGET macro is used here so that the reply will be packed 2025 * tightly in mbuf clusters. 2026 * - it trims out records with d_fileno == 0 2027 * this doesn't matter for Unix clients, but they might confuse clients 2028 * for other os'. 2029 * - it trims out records with d_type == DT_WHT 2030 * these cannot be seen through NFS (unless we extend the protocol) 2031 * The alternate call nfsrvd_readdirplus() does lookups as well. 2032 * PS: The NFS protocol spec. does not clarify what the "count" byte 2033 * argument is a count of.. just name strings and file id's or the 2034 * entire reply rpc or ... 2035 * I tried just file name and id sizes and it confused the Sun client, 2036 * so I am using the full rpc size now. The "paranoia.." comment refers 2037 * to including the status longwords that are not a part of the dir. 2038 * "entry" structures, but are in the rpc. 2039 */ 2040 int 2041 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 2042 struct vnode *vp, struct nfsexstuff *exp) 2043 { 2044 struct dirent *dp; 2045 u_int32_t *tl; 2046 int dirlen; 2047 char *cpos, *cend, *rbuf; 2048 struct nfsvattr at; 2049 int nlen, error = 0, getret = 1; 2050 int siz, cnt, fullsiz, eofflag, ncookies; 2051 u_int64_t off, toff, verf __unused; 2052 uint64_t *cookies = NULL, *cookiep; 2053 struct uio io; 2054 struct iovec iv; 2055 int is_ufs; 2056 struct thread *p = curthread; 2057 2058 if (nd->nd_repstat) { 2059 nfsrv_postopattr(nd, getret, &at); 2060 goto out; 2061 } 2062 if (nd->nd_flag & ND_NFSV2) { 2063 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2064 off = fxdr_unsigned(u_quad_t, *tl++); 2065 } else { 2066 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 2067 off = fxdr_hyper(tl); 2068 tl += 2; 2069 verf = fxdr_hyper(tl); 2070 tl += 2; 2071 } 2072 toff = off; 2073 cnt = fxdr_unsigned(int, *tl); 2074 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2075 cnt = NFS_SRVMAXDATA(nd); 2076 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2077 fullsiz = siz; 2078 if (nd->nd_flag & ND_NFSV3) { 2079 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 2080 NULL); 2081 #if 0 2082 /* 2083 * va_filerev is not sufficient as a cookie verifier, 2084 * since it is not supposed to change when entries are 2085 * removed/added unless that offset cookies returned to 2086 * the client are no longer valid. 2087 */ 2088 if (!nd->nd_repstat && toff && verf != at.na_filerev) 2089 nd->nd_repstat = NFSERR_BAD_COOKIE; 2090 #endif 2091 } 2092 if (!nd->nd_repstat && vp->v_type != VDIR) 2093 nd->nd_repstat = NFSERR_NOTDIR; 2094 if (nd->nd_repstat == 0 && cnt == 0) { 2095 if (nd->nd_flag & ND_NFSV2) 2096 /* NFSv2 does not have NFSERR_TOOSMALL */ 2097 nd->nd_repstat = EPERM; 2098 else 2099 nd->nd_repstat = NFSERR_TOOSMALL; 2100 } 2101 if (!nd->nd_repstat) 2102 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2103 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2104 NFSACCCHK_VPISLOCKED, NULL); 2105 if (nd->nd_repstat) { 2106 vput(vp); 2107 if (nd->nd_flag & ND_NFSV3) 2108 nfsrv_postopattr(nd, getret, &at); 2109 goto out; 2110 } 2111 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2112 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2113 again: 2114 eofflag = 0; 2115 if (cookies) { 2116 free(cookies, M_TEMP); 2117 cookies = NULL; 2118 } 2119 2120 iv.iov_base = rbuf; 2121 iv.iov_len = siz; 2122 io.uio_iov = &iv; 2123 io.uio_iovcnt = 1; 2124 io.uio_offset = (off_t)off; 2125 io.uio_resid = siz; 2126 io.uio_segflg = UIO_SYSSPACE; 2127 io.uio_rw = UIO_READ; 2128 io.uio_td = NULL; 2129 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2130 &cookies); 2131 off = (u_int64_t)io.uio_offset; 2132 if (io.uio_resid) 2133 siz -= io.uio_resid; 2134 2135 if (!cookies && !nd->nd_repstat) 2136 nd->nd_repstat = NFSERR_PERM; 2137 if (nd->nd_flag & ND_NFSV3) { 2138 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2139 if (!nd->nd_repstat) 2140 nd->nd_repstat = getret; 2141 } 2142 2143 /* 2144 * Handles the failed cases. nd->nd_repstat == 0 past here. 2145 */ 2146 if (nd->nd_repstat) { 2147 vput(vp); 2148 free(rbuf, M_TEMP); 2149 if (cookies) 2150 free(cookies, M_TEMP); 2151 if (nd->nd_flag & ND_NFSV3) 2152 nfsrv_postopattr(nd, getret, &at); 2153 goto out; 2154 } 2155 /* 2156 * If nothing read, return eof 2157 * rpc reply 2158 */ 2159 if (siz == 0) { 2160 vput(vp); 2161 if (nd->nd_flag & ND_NFSV2) { 2162 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2163 } else { 2164 nfsrv_postopattr(nd, getret, &at); 2165 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2166 txdr_hyper(at.na_filerev, tl); 2167 tl += 2; 2168 } 2169 *tl++ = newnfs_false; 2170 *tl = newnfs_true; 2171 free(rbuf, M_TEMP); 2172 free(cookies, M_TEMP); 2173 goto out; 2174 } 2175 2176 /* 2177 * Check for degenerate cases of nothing useful read. 2178 * If so go try again 2179 */ 2180 cpos = rbuf; 2181 cend = rbuf + siz; 2182 dp = (struct dirent *)cpos; 2183 cookiep = cookies; 2184 2185 /* 2186 * For some reason FreeBSD's ufs_readdir() chooses to back the 2187 * directory offset up to a block boundary, so it is necessary to 2188 * skip over the records that precede the requested offset. This 2189 * requires the assumption that file offset cookies monotonically 2190 * increase. 2191 */ 2192 while (cpos < cend && ncookies > 0 && 2193 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2194 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 2195 cpos += dp->d_reclen; 2196 dp = (struct dirent *)cpos; 2197 cookiep++; 2198 ncookies--; 2199 } 2200 if (cpos >= cend || ncookies == 0) { 2201 siz = fullsiz; 2202 toff = off; 2203 goto again; 2204 } 2205 vput(vp); 2206 2207 /* 2208 * If cnt > MCLBYTES and the reply will not be saved, use 2209 * ext_pgs mbufs for TLS. 2210 * For NFSv4.0, we do not know for sure if the reply will 2211 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2212 */ 2213 if (cnt > MCLBYTES && siz > MCLBYTES && 2214 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2215 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2216 nd->nd_flag |= ND_EXTPG; 2217 2218 /* 2219 * dirlen is the size of the reply, including all XDR and must 2220 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 2221 * if the XDR should be included in "count", but to be safe, we do. 2222 * (Include the two booleans at the end of the reply in dirlen now.) 2223 */ 2224 if (nd->nd_flag & ND_NFSV3) { 2225 nfsrv_postopattr(nd, getret, &at); 2226 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2227 txdr_hyper(at.na_filerev, tl); 2228 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2229 } else { 2230 dirlen = 2 * NFSX_UNSIGNED; 2231 } 2232 2233 /* Loop through the records and build reply */ 2234 while (cpos < cend && ncookies > 0) { 2235 nlen = dp->d_namlen; 2236 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2237 nlen <= NFS_MAXNAMLEN) { 2238 if (nd->nd_flag & ND_NFSV3) 2239 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2240 else 2241 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2242 if (dirlen > cnt) { 2243 eofflag = 0; 2244 break; 2245 } 2246 2247 /* 2248 * Build the directory record xdr from 2249 * the dirent entry. 2250 */ 2251 if (nd->nd_flag & ND_NFSV3) { 2252 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2253 *tl++ = newnfs_true; 2254 txdr_hyper(dp->d_fileno, tl); 2255 } else { 2256 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2257 *tl++ = newnfs_true; 2258 *tl = txdr_unsigned(dp->d_fileno); 2259 } 2260 (void) nfsm_strtom(nd, dp->d_name, nlen); 2261 if (nd->nd_flag & ND_NFSV3) { 2262 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2263 txdr_hyper(*cookiep, tl); 2264 } else { 2265 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2266 *tl = txdr_unsigned(*cookiep); 2267 } 2268 } 2269 cpos += dp->d_reclen; 2270 dp = (struct dirent *)cpos; 2271 cookiep++; 2272 ncookies--; 2273 } 2274 if (cpos < cend) 2275 eofflag = 0; 2276 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2277 *tl++ = newnfs_false; 2278 if (eofflag) 2279 *tl = newnfs_true; 2280 else 2281 *tl = newnfs_false; 2282 free(rbuf, M_TEMP); 2283 free(cookies, M_TEMP); 2284 2285 out: 2286 NFSEXITCODE2(0, nd); 2287 return (0); 2288 nfsmout: 2289 vput(vp); 2290 NFSEXITCODE2(error, nd); 2291 return (error); 2292 } 2293 2294 /* 2295 * Readdirplus for V3 and Readdir for V4. 2296 */ 2297 int 2298 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2299 struct vnode *vp, struct nfsexstuff *exp) 2300 { 2301 struct dirent *dp; 2302 u_int32_t *tl; 2303 int dirlen; 2304 char *cpos, *cend, *rbuf; 2305 struct vnode *nvp; 2306 fhandle_t nfh; 2307 struct nfsvattr nva, at, *nvap = &nva; 2308 struct mbuf *mb0, *mb1; 2309 struct nfsreferral *refp; 2310 int nlen, r, error = 0, getret = 1, usevget = 1; 2311 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2312 caddr_t bpos0, bpos1; 2313 u_int64_t off, toff, verf __unused; 2314 uint64_t *cookies = NULL, *cookiep; 2315 nfsattrbit_t attrbits, rderrbits, savbits, refbits; 2316 struct uio io; 2317 struct iovec iv; 2318 struct componentname cn; 2319 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2320 struct mount *mp, *new_mp; 2321 uint64_t mounted_on_fileno; 2322 struct thread *p = curthread; 2323 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; 2324 2325 if (nd->nd_repstat) { 2326 nfsrv_postopattr(nd, getret, &at); 2327 goto out; 2328 } 2329 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2330 off = fxdr_hyper(tl); 2331 toff = off; 2332 tl += 2; 2333 verf = fxdr_hyper(tl); 2334 tl += 2; 2335 siz = fxdr_unsigned(int, *tl++); 2336 cnt = fxdr_unsigned(int, *tl); 2337 2338 /* 2339 * Use the server's maximum data transfer size as the upper bound 2340 * on reply datalen. 2341 */ 2342 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2343 cnt = NFS_SRVMAXDATA(nd); 2344 2345 /* 2346 * siz is a "hint" of how much directory information (name, fileid, 2347 * cookie) should be in the reply. At least one client "hints" 0, 2348 * so I set it to cnt for that case. I also round it up to the 2349 * next multiple of DIRBLKSIZ. 2350 * Since the size of a Readdirplus directory entry reply will always 2351 * be greater than a directory entry returned by VOP_READDIR(), it 2352 * does not make sense to read more than NFS_SRVMAXDATA() via 2353 * VOP_READDIR(). 2354 */ 2355 if (siz <= 0) 2356 siz = cnt; 2357 else if (siz > NFS_SRVMAXDATA(nd)) 2358 siz = NFS_SRVMAXDATA(nd); 2359 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2360 2361 if (nd->nd_flag & ND_NFSV4) { 2362 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2363 if (error) 2364 goto nfsmout; 2365 NFSSET_ATTRBIT(&savbits, &attrbits); 2366 NFSSET_ATTRBIT(&refbits, &attrbits); 2367 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); 2368 NFSZERO_ATTRBIT(&rderrbits); 2369 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2370 /* 2371 * If these 4 bits are the only attributes requested by the 2372 * client, they can be satisfied without acquiring the vnode 2373 * for the file object unless it is a directory. 2374 * This will be indicated by savbits being all 0s. 2375 */ 2376 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_TYPE); 2377 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_FILEID); 2378 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_MOUNTEDONFILEID); 2379 NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_RDATTRERROR); 2380 } else { 2381 NFSZERO_ATTRBIT(&attrbits); 2382 } 2383 fullsiz = siz; 2384 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2385 #if 0 2386 if (!nd->nd_repstat) { 2387 if (off && verf != at.na_filerev) { 2388 /* 2389 * va_filerev is not sufficient as a cookie verifier, 2390 * since it is not supposed to change when entries are 2391 * removed/added unless that offset cookies returned to 2392 * the client are no longer valid. 2393 */ 2394 if (nd->nd_flag & ND_NFSV4) { 2395 nd->nd_repstat = NFSERR_NOTSAME; 2396 } else { 2397 nd->nd_repstat = NFSERR_BAD_COOKIE; 2398 } 2399 } 2400 } 2401 #endif 2402 if (!nd->nd_repstat && vp->v_type != VDIR) 2403 nd->nd_repstat = NFSERR_NOTDIR; 2404 if (!nd->nd_repstat && cnt == 0) 2405 nd->nd_repstat = NFSERR_TOOSMALL; 2406 if (!nd->nd_repstat) 2407 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2408 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2409 NFSACCCHK_VPISLOCKED, NULL); 2410 if (nd->nd_repstat) { 2411 vput(vp); 2412 if (nd->nd_flag & ND_NFSV3) 2413 nfsrv_postopattr(nd, getret, &at); 2414 goto out; 2415 } 2416 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2417 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2418 2419 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2420 again: 2421 eofflag = 0; 2422 if (cookies) { 2423 free(cookies, M_TEMP); 2424 cookies = NULL; 2425 } 2426 2427 iv.iov_base = rbuf; 2428 iv.iov_len = siz; 2429 io.uio_iov = &iv; 2430 io.uio_iovcnt = 1; 2431 io.uio_offset = (off_t)off; 2432 io.uio_resid = siz; 2433 io.uio_segflg = UIO_SYSSPACE; 2434 io.uio_rw = UIO_READ; 2435 io.uio_td = NULL; 2436 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2437 &cookies); 2438 off = (u_int64_t)io.uio_offset; 2439 if (io.uio_resid) 2440 siz -= io.uio_resid; 2441 2442 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2443 2444 if (!cookies && !nd->nd_repstat) 2445 nd->nd_repstat = NFSERR_PERM; 2446 if (!nd->nd_repstat) 2447 nd->nd_repstat = getret; 2448 if (nd->nd_repstat) { 2449 vput(vp); 2450 if (cookies) 2451 free(cookies, M_TEMP); 2452 free(rbuf, M_TEMP); 2453 if (nd->nd_flag & ND_NFSV3) 2454 nfsrv_postopattr(nd, getret, &at); 2455 goto out; 2456 } 2457 /* 2458 * If nothing read, return eof 2459 * rpc reply 2460 */ 2461 if (siz == 0) { 2462 vput(vp); 2463 if (nd->nd_flag & ND_NFSV3) 2464 nfsrv_postopattr(nd, getret, &at); 2465 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2466 txdr_hyper(at.na_filerev, tl); 2467 tl += 2; 2468 *tl++ = newnfs_false; 2469 *tl = newnfs_true; 2470 free(cookies, M_TEMP); 2471 free(rbuf, M_TEMP); 2472 goto out; 2473 } 2474 2475 /* 2476 * Check for degenerate cases of nothing useful read. 2477 * If so go try again 2478 */ 2479 cpos = rbuf; 2480 cend = rbuf + siz; 2481 dp = (struct dirent *)cpos; 2482 cookiep = cookies; 2483 2484 /* 2485 * For some reason FreeBSD's ufs_readdir() chooses to back the 2486 * directory offset up to a block boundary, so it is necessary to 2487 * skip over the records that precede the requested offset. This 2488 * requires the assumption that file offset cookies monotonically 2489 * increase. 2490 */ 2491 while (cpos < cend && ncookies > 0 && 2492 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2493 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2494 ((nd->nd_flag & ND_NFSV4) && 2495 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2496 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2497 cpos += dp->d_reclen; 2498 dp = (struct dirent *)cpos; 2499 cookiep++; 2500 ncookies--; 2501 } 2502 if (cpos >= cend || ncookies == 0) { 2503 siz = fullsiz; 2504 toff = off; 2505 goto again; 2506 } 2507 2508 /* 2509 * Busy the file system so that the mount point won't go away 2510 * and, as such, VFS_VGET() can be used safely. 2511 */ 2512 mp = vp->v_mount; 2513 vfs_ref(mp); 2514 NFSVOPUNLOCK(vp); 2515 nd->nd_repstat = vfs_busy(mp, 0); 2516 vfs_rel(mp); 2517 if (nd->nd_repstat != 0) { 2518 vrele(vp); 2519 free(cookies, M_TEMP); 2520 free(rbuf, M_TEMP); 2521 if (nd->nd_flag & ND_NFSV3) 2522 nfsrv_postopattr(nd, getret, &at); 2523 goto out; 2524 } 2525 2526 /* 2527 * Check to see if entries in this directory can be safely acquired 2528 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2529 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2530 * automount of the snapshot directory that is required will 2531 * be done. 2532 * This needs to be done here for NFSv4, since NFSv4 never does 2533 * a VFS_VGET() for "." or "..". 2534 */ 2535 if (is_zfs == 1) { 2536 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2537 if (r == EOPNOTSUPP) { 2538 usevget = 0; 2539 cn.cn_nameiop = LOOKUP; 2540 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2541 cn.cn_cred = nd->nd_cred; 2542 } else if (r == 0) 2543 vput(nvp); 2544 } 2545 2546 /* 2547 * If the reply is likely to exceed MCLBYTES and the reply will 2548 * not be saved, use ext_pgs mbufs for TLS. 2549 * It is difficult to predict how large each entry will be and 2550 * how many entries have been read, so just assume the directory 2551 * entries grow by a factor of 4 when attributes are included. 2552 * For NFSv4.0, we do not know for sure if the reply will 2553 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2554 */ 2555 if (cnt > MCLBYTES && siz > MCLBYTES / 4 && 2556 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2557 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2558 nd->nd_flag |= ND_EXTPG; 2559 2560 /* 2561 * Save this position, in case there is an error before one entry 2562 * is created. 2563 */ 2564 mb0 = nd->nd_mb; 2565 bpos0 = nd->nd_bpos; 2566 bextpg0 = nd->nd_bextpg; 2567 bextpgsiz0 = nd->nd_bextpgsiz; 2568 2569 /* 2570 * Fill in the first part of the reply. 2571 * dirlen is the reply length in bytes and cannot exceed cnt. 2572 * (Include the two booleans at the end of the reply in dirlen now, 2573 * so we recognize when we have exceeded cnt.) 2574 */ 2575 if (nd->nd_flag & ND_NFSV3) { 2576 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2577 nfsrv_postopattr(nd, getret, &at); 2578 } else { 2579 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2580 } 2581 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2582 txdr_hyper(at.na_filerev, tl); 2583 2584 /* 2585 * Save this position, in case there is an empty reply needed. 2586 */ 2587 mb1 = nd->nd_mb; 2588 bpos1 = nd->nd_bpos; 2589 bextpg1 = nd->nd_bextpg; 2590 bextpgsiz1 = nd->nd_bextpgsiz; 2591 2592 /* Loop through the records and build reply */ 2593 entrycnt = 0; 2594 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2595 nlen = dp->d_namlen; 2596 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2597 nlen <= NFS_MAXNAMLEN && 2598 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2599 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2600 || (nlen == 1 && dp->d_name[0] != '.'))) { 2601 /* 2602 * Save the current position in the reply, in case 2603 * this entry exceeds cnt. 2604 */ 2605 mb1 = nd->nd_mb; 2606 bpos1 = nd->nd_bpos; 2607 bextpg1 = nd->nd_bextpg; 2608 bextpgsiz1 = nd->nd_bextpgsiz; 2609 2610 /* 2611 * For readdir_and_lookup get the vnode using 2612 * the file number. 2613 */ 2614 nvp = NULL; 2615 refp = NULL; 2616 r = 0; 2617 at_root = 0; 2618 needs_unbusy = 0; 2619 new_mp = mp; 2620 mounted_on_fileno = (uint64_t)dp->d_fileno; 2621 if ((nd->nd_flag & ND_NFSV3) || 2622 NFSNONZERO_ATTRBIT(&savbits) || 2623 dp->d_type == DT_UNKNOWN || 2624 (dp->d_type == DT_DIR && 2625 nfsrv_enable_crossmntpt != 0)) { 2626 if (nd->nd_flag & ND_NFSV4) 2627 refp = nfsv4root_getreferral(NULL, 2628 vp, dp->d_fileno); 2629 if (refp == NULL) { 2630 if (usevget) 2631 r = VFS_VGET(mp, dp->d_fileno, 2632 LK_SHARED, &nvp); 2633 else 2634 r = EOPNOTSUPP; 2635 if (r == EOPNOTSUPP) { 2636 if (usevget) { 2637 usevget = 0; 2638 cn.cn_nameiop = LOOKUP; 2639 cn.cn_lkflags = 2640 LK_SHARED | 2641 LK_RETRY; 2642 cn.cn_cred = 2643 nd->nd_cred; 2644 } 2645 cn.cn_nameptr = dp->d_name; 2646 cn.cn_namelen = nlen; 2647 cn.cn_flags = ISLASTCN | 2648 NOFOLLOW | LOCKLEAF; 2649 if (nlen == 2 && 2650 dp->d_name[0] == '.' && 2651 dp->d_name[1] == '.') 2652 cn.cn_flags |= 2653 ISDOTDOT; 2654 if (NFSVOPLOCK(vp, LK_SHARED) 2655 != 0) { 2656 nd->nd_repstat = EPERM; 2657 break; 2658 } 2659 if ((vp->v_vflag & VV_ROOT) != 0 2660 && (cn.cn_flags & ISDOTDOT) 2661 != 0) { 2662 vref(vp); 2663 nvp = vp; 2664 r = 0; 2665 } else { 2666 r = VOP_LOOKUP(vp, &nvp, 2667 &cn); 2668 if (vp != nvp) 2669 NFSVOPUNLOCK(vp); 2670 } 2671 } 2672 2673 /* 2674 * For NFSv4, check to see if nvp is 2675 * a mount point and get the mount 2676 * point vnode, as required. 2677 */ 2678 if (r == 0 && 2679 nfsrv_enable_crossmntpt != 0 && 2680 (nd->nd_flag & ND_NFSV4) != 0 && 2681 nvp->v_type == VDIR && 2682 nvp->v_mountedhere != NULL) { 2683 new_mp = nvp->v_mountedhere; 2684 r = vfs_busy(new_mp, 0); 2685 vput(nvp); 2686 nvp = NULL; 2687 if (r == 0) { 2688 r = VFS_ROOT(new_mp, 2689 LK_SHARED, &nvp); 2690 needs_unbusy = 1; 2691 if (r == 0) 2692 at_root = 1; 2693 } 2694 } 2695 } 2696 2697 /* 2698 * If we failed to look up the entry, then it 2699 * has become invalid, most likely removed. 2700 */ 2701 if (r != 0) { 2702 if (needs_unbusy) 2703 vfs_unbusy(new_mp); 2704 goto invalid; 2705 } 2706 KASSERT(refp != NULL || nvp != NULL, 2707 ("%s: undetected lookup error", __func__)); 2708 2709 if (refp == NULL && 2710 ((nd->nd_flag & ND_NFSV3) || 2711 NFSNONZERO_ATTRBIT(&attrbits))) { 2712 r = nfsvno_getfh(nvp, &nfh, p); 2713 if (!r) 2714 r = nfsvno_getattr(nvp, nvap, nd, p, 2715 1, &attrbits); 2716 if (r == 0 && is_zfs == 1 && 2717 nfsrv_enable_crossmntpt != 0 && 2718 (nd->nd_flag & ND_NFSV4) != 0 && 2719 nvp->v_type == VDIR && 2720 vp->v_mount != nvp->v_mount) { 2721 /* 2722 * For a ZFS snapshot, there is a 2723 * pseudo mount that does not set 2724 * v_mountedhere, so it needs to 2725 * be detected via a different 2726 * mount structure. 2727 */ 2728 at_root = 1; 2729 if (new_mp == mp) 2730 new_mp = nvp->v_mount; 2731 } 2732 } 2733 2734 /* 2735 * If we failed to get attributes of the entry, 2736 * then just skip it for NFSv3 (the traditional 2737 * behavior in the old NFS server). 2738 * For NFSv4 the behavior is controlled by 2739 * RDATTRERROR: we either ignore the error or 2740 * fail the request. 2741 * The exception is EOPNOTSUPP, which can be 2742 * returned by nfsvno_getfh() for certain 2743 * file systems, such as devfs. This indicates 2744 * that the file system cannot be exported, 2745 * so just skip over the entry. 2746 * Note that RDATTRERROR is never set for NFSv3. 2747 */ 2748 if (r != 0) { 2749 if (!NFSISSET_ATTRBIT(&attrbits, 2750 NFSATTRBIT_RDATTRERROR) || 2751 r == EOPNOTSUPP) { 2752 vput(nvp); 2753 if (needs_unbusy != 0) 2754 vfs_unbusy(new_mp); 2755 if ((nd->nd_flag & ND_NFSV3) || 2756 r == EOPNOTSUPP) 2757 goto invalid; 2758 nd->nd_repstat = r; 2759 break; 2760 } 2761 } 2762 } else if (NFSNONZERO_ATTRBIT(&attrbits)) { 2763 /* Only need Type and/or Fileid. */ 2764 VATTR_NULL(&nvap->na_vattr); 2765 nvap->na_fileid = dp->d_fileno; 2766 nvap->na_type = NFS_DTYPETOVTYPE(dp->d_type); 2767 } 2768 2769 /* 2770 * Build the directory record xdr 2771 */ 2772 if (nd->nd_flag & ND_NFSV3) { 2773 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2774 *tl++ = newnfs_true; 2775 txdr_hyper(dp->d_fileno, tl); 2776 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2777 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2778 txdr_hyper(*cookiep, tl); 2779 nfsrv_postopattr(nd, 0, nvap); 2780 dirlen += nfsm_fhtom(NULL, nd, (u_int8_t *)&nfh, 2781 0, 1); 2782 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2783 if (nvp != NULL) 2784 vput(nvp); 2785 } else { 2786 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2787 *tl++ = newnfs_true; 2788 txdr_hyper(*cookiep, tl); 2789 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2790 if (nvp != NULL) { 2791 supports_nfsv4acls = 2792 nfs_supportsnfsv4acls(nvp); 2793 NFSVOPUNLOCK(nvp); 2794 } else 2795 supports_nfsv4acls = 0; 2796 if (refp != NULL) { 2797 dirlen += nfsrv_putreferralattr(nd, 2798 &refbits, refp, 0, 2799 &nd->nd_repstat); 2800 if (nd->nd_repstat) { 2801 if (nvp != NULL) 2802 vrele(nvp); 2803 if (needs_unbusy != 0) 2804 vfs_unbusy(new_mp); 2805 break; 2806 } 2807 } else if (r) { 2808 dirlen += nfsvno_fillattr(nd, new_mp, 2809 nvp, nvap, &nfh, r, &rderrbits, 2810 nd->nd_cred, p, isdgram, 0, 2811 supports_nfsv4acls, at_root, 2812 mounted_on_fileno); 2813 } else { 2814 dirlen += nfsvno_fillattr(nd, new_mp, 2815 nvp, nvap, &nfh, r, &attrbits, 2816 nd->nd_cred, p, isdgram, 0, 2817 supports_nfsv4acls, at_root, 2818 mounted_on_fileno); 2819 } 2820 if (nvp != NULL) 2821 vrele(nvp); 2822 dirlen += (3 * NFSX_UNSIGNED); 2823 } 2824 if (needs_unbusy != 0) 2825 vfs_unbusy(new_mp); 2826 if (dirlen <= cnt) 2827 entrycnt++; 2828 } 2829 invalid: 2830 cpos += dp->d_reclen; 2831 dp = (struct dirent *)cpos; 2832 cookiep++; 2833 ncookies--; 2834 } 2835 vrele(vp); 2836 vfs_unbusy(mp); 2837 2838 /* 2839 * If dirlen > cnt, we must strip off the last entry. If that 2840 * results in an empty reply, report NFSERR_TOOSMALL. 2841 */ 2842 if (dirlen > cnt || nd->nd_repstat) { 2843 if (!nd->nd_repstat && entrycnt == 0) 2844 nd->nd_repstat = NFSERR_TOOSMALL; 2845 if (nd->nd_repstat) { 2846 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); 2847 if (nd->nd_flag & ND_NFSV3) 2848 nfsrv_postopattr(nd, getret, &at); 2849 } else 2850 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); 2851 eofflag = 0; 2852 } else if (cpos < cend) 2853 eofflag = 0; 2854 if (!nd->nd_repstat) { 2855 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2856 *tl++ = newnfs_false; 2857 if (eofflag) 2858 *tl = newnfs_true; 2859 else 2860 *tl = newnfs_false; 2861 } 2862 free(cookies, M_TEMP); 2863 free(rbuf, M_TEMP); 2864 2865 out: 2866 NFSEXITCODE2(0, nd); 2867 return (0); 2868 nfsmout: 2869 vput(vp); 2870 NFSEXITCODE2(error, nd); 2871 return (error); 2872 } 2873 2874 /* 2875 * Get the settable attributes out of the mbuf list. 2876 * (Return 0 or EBADRPC) 2877 */ 2878 int 2879 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2880 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2881 { 2882 u_int32_t *tl; 2883 struct nfsv2_sattr *sp; 2884 int error = 0, toclient = 0; 2885 2886 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2887 case ND_NFSV2: 2888 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2889 /* 2890 * Some old clients didn't fill in the high order 16bits. 2891 * --> check the low order 2 bytes for 0xffff 2892 */ 2893 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2894 nvap->na_mode = nfstov_mode(sp->sa_mode); 2895 if (sp->sa_uid != newnfs_xdrneg1) 2896 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2897 if (sp->sa_gid != newnfs_xdrneg1) 2898 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2899 if (sp->sa_size != newnfs_xdrneg1) 2900 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2901 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2902 #ifdef notyet 2903 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2904 #else 2905 nvap->na_atime.tv_sec = 2906 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2907 nvap->na_atime.tv_nsec = 0; 2908 #endif 2909 } 2910 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2911 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2912 break; 2913 case ND_NFSV3: 2914 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2915 if (*tl == newnfs_true) { 2916 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2917 nvap->na_mode = nfstov_mode(*tl); 2918 } 2919 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2920 if (*tl == newnfs_true) { 2921 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2922 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2923 } 2924 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2925 if (*tl == newnfs_true) { 2926 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2927 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2928 } 2929 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2930 if (*tl == newnfs_true) { 2931 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2932 nvap->na_size = fxdr_hyper(tl); 2933 } 2934 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2935 switch (fxdr_unsigned(int, *tl)) { 2936 case NFSV3SATTRTIME_TOCLIENT: 2937 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2938 fxdr_nfsv3time(tl, &nvap->na_atime); 2939 toclient = 1; 2940 break; 2941 case NFSV3SATTRTIME_TOSERVER: 2942 vfs_timestamp(&nvap->na_atime); 2943 nvap->na_vaflags |= VA_UTIMES_NULL; 2944 break; 2945 } 2946 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2947 switch (fxdr_unsigned(int, *tl)) { 2948 case NFSV3SATTRTIME_TOCLIENT: 2949 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2950 fxdr_nfsv3time(tl, &nvap->na_mtime); 2951 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2952 break; 2953 case NFSV3SATTRTIME_TOSERVER: 2954 vfs_timestamp(&nvap->na_mtime); 2955 if (!toclient) 2956 nvap->na_vaflags |= VA_UTIMES_NULL; 2957 break; 2958 } 2959 break; 2960 case ND_NFSV4: 2961 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2962 } 2963 nfsmout: 2964 NFSEXITCODE2(error, nd); 2965 return (error); 2966 } 2967 2968 /* 2969 * Handle the setable attributes for V4. 2970 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2971 */ 2972 int 2973 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2974 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2975 { 2976 u_int32_t *tl; 2977 int attrsum = 0; 2978 int i, j; 2979 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2980 int moderet, toclient = 0; 2981 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2982 uid_t uid; 2983 gid_t gid; 2984 u_short mode, mask; /* Same type as va_mode. */ 2985 struct vattr va; 2986 2987 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2988 if (error) 2989 goto nfsmout; 2990 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2991 attrsize = fxdr_unsigned(int, *tl); 2992 2993 /* 2994 * Loop around getting the setable attributes. If an unsupported 2995 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2996 */ 2997 if (retnotsup) { 2998 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2999 bitpos = NFSATTRBIT_MAX; 3000 } else { 3001 bitpos = 0; 3002 } 3003 moderet = 0; 3004 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 3005 if (attrsum > attrsize) { 3006 error = NFSERR_BADXDR; 3007 goto nfsmout; 3008 } 3009 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 3010 switch (bitpos) { 3011 case NFSATTRBIT_SIZE: 3012 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 3013 if (vp != NULL && vp->v_type != VREG) { 3014 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 3015 NFSERR_INVAL; 3016 goto nfsmout; 3017 } 3018 nvap->na_size = fxdr_hyper(tl); 3019 attrsum += NFSX_HYPER; 3020 break; 3021 case NFSATTRBIT_ACL: 3022 error = nfsrv_dissectacl(nd, aclp, true, &aceerr, 3023 &aclsize, p); 3024 if (error) 3025 goto nfsmout; 3026 if (aceerr && !nd->nd_repstat) 3027 nd->nd_repstat = aceerr; 3028 attrsum += aclsize; 3029 break; 3030 case NFSATTRBIT_ARCHIVE: 3031 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3032 if (!nd->nd_repstat) 3033 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3034 attrsum += NFSX_UNSIGNED; 3035 break; 3036 case NFSATTRBIT_HIDDEN: 3037 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3038 if (!nd->nd_repstat) 3039 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3040 attrsum += NFSX_UNSIGNED; 3041 break; 3042 case NFSATTRBIT_MIMETYPE: 3043 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3044 i = fxdr_unsigned(int, *tl); 3045 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 3046 if (error) 3047 goto nfsmout; 3048 if (!nd->nd_repstat) 3049 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3050 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 3051 break; 3052 case NFSATTRBIT_MODE: 3053 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ 3054 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3055 nvap->na_mode = nfstov_mode(*tl); 3056 attrsum += NFSX_UNSIGNED; 3057 break; 3058 case NFSATTRBIT_OWNER: 3059 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3060 j = fxdr_unsigned(int, *tl); 3061 if (j < 0) { 3062 error = NFSERR_BADXDR; 3063 goto nfsmout; 3064 } 3065 if (j > NFSV4_SMALLSTR) 3066 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3067 else 3068 cp = namestr; 3069 error = nfsrv_mtostr(nd, cp, j); 3070 if (error) { 3071 if (j > NFSV4_SMALLSTR) 3072 free(cp, M_NFSSTRING); 3073 goto nfsmout; 3074 } 3075 if (!nd->nd_repstat) { 3076 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, 3077 &uid); 3078 if (!nd->nd_repstat) 3079 nvap->na_uid = uid; 3080 } 3081 if (j > NFSV4_SMALLSTR) 3082 free(cp, M_NFSSTRING); 3083 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3084 break; 3085 case NFSATTRBIT_OWNERGROUP: 3086 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3087 j = fxdr_unsigned(int, *tl); 3088 if (j < 0) { 3089 error = NFSERR_BADXDR; 3090 goto nfsmout; 3091 } 3092 if (j > NFSV4_SMALLSTR) 3093 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3094 else 3095 cp = namestr; 3096 error = nfsrv_mtostr(nd, cp, j); 3097 if (error) { 3098 if (j > NFSV4_SMALLSTR) 3099 free(cp, M_NFSSTRING); 3100 goto nfsmout; 3101 } 3102 if (!nd->nd_repstat) { 3103 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, 3104 &gid); 3105 if (!nd->nd_repstat) 3106 nvap->na_gid = gid; 3107 } 3108 if (j > NFSV4_SMALLSTR) 3109 free(cp, M_NFSSTRING); 3110 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3111 break; 3112 case NFSATTRBIT_SYSTEM: 3113 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3114 if (!nd->nd_repstat) 3115 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3116 attrsum += NFSX_UNSIGNED; 3117 break; 3118 case NFSATTRBIT_TIMEACCESSSET: 3119 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3120 attrsum += NFSX_UNSIGNED; 3121 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3122 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3123 fxdr_nfsv4time(tl, &nvap->na_atime); 3124 toclient = 1; 3125 attrsum += NFSX_V4TIME; 3126 } else { 3127 vfs_timestamp(&nvap->na_atime); 3128 nvap->na_vaflags |= VA_UTIMES_NULL; 3129 } 3130 break; 3131 case NFSATTRBIT_TIMEBACKUP: 3132 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3133 if (!nd->nd_repstat) 3134 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3135 attrsum += NFSX_V4TIME; 3136 break; 3137 case NFSATTRBIT_TIMECREATE: 3138 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3139 fxdr_nfsv4time(tl, &nvap->na_btime); 3140 attrsum += NFSX_V4TIME; 3141 break; 3142 case NFSATTRBIT_TIMEMODIFYSET: 3143 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3144 attrsum += NFSX_UNSIGNED; 3145 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3146 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3147 fxdr_nfsv4time(tl, &nvap->na_mtime); 3148 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3149 attrsum += NFSX_V4TIME; 3150 } else { 3151 vfs_timestamp(&nvap->na_mtime); 3152 if (!toclient) 3153 nvap->na_vaflags |= VA_UTIMES_NULL; 3154 } 3155 break; 3156 case NFSATTRBIT_MODESETMASKED: 3157 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 3158 mode = fxdr_unsigned(u_short, *tl++); 3159 mask = fxdr_unsigned(u_short, *tl); 3160 /* 3161 * vp == NULL implies an Open/Create operation. 3162 * This attribute can only be used for Setattr and 3163 * only for NFSv4.1 or higher. 3164 * If moderet != 0, a mode attribute has also been 3165 * specified and this attribute cannot be done in the 3166 * same Setattr operation. 3167 */ 3168 if ((nd->nd_flag & ND_NFSV41) == 0) 3169 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3170 else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || 3171 vp == NULL) 3172 nd->nd_repstat = NFSERR_INVAL; 3173 else if (moderet == 0) 3174 moderet = VOP_GETATTR(vp, &va, nd->nd_cred); 3175 if (moderet == 0) 3176 nvap->na_mode = (mode & mask) | 3177 (va.va_mode & ~mask); 3178 else 3179 nd->nd_repstat = moderet; 3180 attrsum += 2 * NFSX_UNSIGNED; 3181 break; 3182 default: 3183 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3184 /* 3185 * set bitpos so we drop out of the loop. 3186 */ 3187 bitpos = NFSATTRBIT_MAX; 3188 break; 3189 } 3190 } 3191 3192 /* 3193 * some clients pad the attrlist, so we need to skip over the 3194 * padding. 3195 */ 3196 if (attrsum > attrsize) { 3197 error = NFSERR_BADXDR; 3198 } else { 3199 attrsize = NFSM_RNDUP(attrsize); 3200 if (attrsum < attrsize) 3201 error = nfsm_advance(nd, attrsize - attrsum, -1); 3202 } 3203 nfsmout: 3204 NFSEXITCODE2(error, nd); 3205 return (error); 3206 } 3207 3208 /* 3209 * Check/setup export credentials. 3210 */ 3211 int 3212 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 3213 struct ucred *credanon, bool testsec) 3214 { 3215 int error; 3216 3217 /* 3218 * Check/setup credentials. 3219 */ 3220 if (nd->nd_flag & ND_GSS) 3221 exp->nes_exflag &= ~MNT_EXPORTANON; 3222 3223 /* 3224 * Check to see if the operation is allowed for this security flavor. 3225 */ 3226 error = 0; 3227 if (testsec) { 3228 error = nfsvno_testexp(nd, exp); 3229 if (error != 0) 3230 goto out; 3231 } 3232 3233 /* 3234 * Check to see if the file system is exported V4 only. 3235 */ 3236 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 3237 error = NFSERR_PROGNOTV4; 3238 goto out; 3239 } 3240 3241 /* 3242 * Now, map the user credentials. 3243 * (Note that ND_AUTHNONE will only be set for an NFSv3 3244 * Fsinfo RPC. If set for anything else, this code might need 3245 * to change.) 3246 */ 3247 if (NFSVNO_EXPORTED(exp)) { 3248 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 3249 NFSVNO_EXPORTANON(exp) || 3250 (nd->nd_flag & ND_AUTHNONE) != 0) { 3251 nd->nd_cred->cr_uid = credanon->cr_uid; 3252 nd->nd_cred->cr_gid = credanon->cr_gid; 3253 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 3254 credanon->cr_groups); 3255 } else if ((nd->nd_flag & ND_GSS) == 0) { 3256 /* 3257 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 3258 * if there is a replacement credential with a group 3259 * list set up by "nfsuserd -manage-gids". 3260 * If there is no replacement, nfsrv_getgrpscred() 3261 * simply returns its argument. 3262 */ 3263 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 3264 } 3265 } 3266 3267 out: 3268 NFSEXITCODE2(error, nd); 3269 return (error); 3270 } 3271 3272 /* 3273 * Check exports. 3274 */ 3275 int 3276 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 3277 struct ucred **credp) 3278 { 3279 int error; 3280 3281 error = 0; 3282 *credp = NULL; 3283 MNT_ILOCK(mp); 3284 if (mp->mnt_exjail == NULL || 3285 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) 3286 error = EACCES; 3287 MNT_IUNLOCK(mp); 3288 if (error == 0) 3289 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3290 &exp->nes_numsecflavor, exp->nes_secflavors); 3291 if (error) { 3292 if (NFSD_VNET(nfs_rootfhset)) { 3293 exp->nes_exflag = 0; 3294 exp->nes_numsecflavor = 0; 3295 error = 0; 3296 } 3297 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3298 MAXSECFLAVORS) { 3299 printf("nfsvno_checkexp: numsecflavors out of range\n"); 3300 exp->nes_numsecflavor = 0; 3301 error = EACCES; 3302 } 3303 NFSEXITCODE(error); 3304 return (error); 3305 } 3306 3307 /* 3308 * Get a vnode for a file handle and export stuff. 3309 */ 3310 int 3311 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 3312 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 3313 struct ucred **credp) 3314 { 3315 int error; 3316 3317 *credp = NULL; 3318 exp->nes_numsecflavor = 0; 3319 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 3320 if (error != 0) 3321 /* Make sure the server replies ESTALE to the client. */ 3322 error = ESTALE; 3323 if (nam && !error) { 3324 MNT_ILOCK(mp); 3325 if (mp->mnt_exjail == NULL || 3326 mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison) 3327 error = EACCES; 3328 MNT_IUNLOCK(mp); 3329 if (error == 0) 3330 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3331 &exp->nes_numsecflavor, exp->nes_secflavors); 3332 if (error) { 3333 if (NFSD_VNET(nfs_rootfhset)) { 3334 exp->nes_exflag = 0; 3335 exp->nes_numsecflavor = 0; 3336 error = 0; 3337 } else { 3338 vput(*vpp); 3339 } 3340 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3341 MAXSECFLAVORS) { 3342 printf("nfsvno_fhtovp: numsecflavors out of range\n"); 3343 exp->nes_numsecflavor = 0; 3344 error = EACCES; 3345 vput(*vpp); 3346 } 3347 } 3348 NFSEXITCODE(error); 3349 return (error); 3350 } 3351 3352 /* 3353 * nfsd_fhtovp() - convert a fh to a vnode ptr 3354 * - look up fsid in mount list (if not found ret error) 3355 * - get vp and export rights by calling nfsvno_fhtovp() 3356 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 3357 * for AUTH_SYS 3358 * - if mpp != NULL, return the mount point so that it can 3359 * be used for vn_finished_write() by the caller 3360 */ 3361 void 3362 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 3363 struct vnode **vpp, struct nfsexstuff *exp, 3364 struct mount **mpp, int startwrite, int nextop) 3365 { 3366 struct mount *mp, *mpw; 3367 struct ucred *credanon; 3368 fhandle_t *fhp; 3369 int error; 3370 3371 if (mpp != NULL) 3372 *mpp = NULL; 3373 *vpp = NULL; 3374 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3375 mp = vfs_busyfs(&fhp->fh_fsid); 3376 if (mp == NULL) { 3377 nd->nd_repstat = ESTALE; 3378 goto out; 3379 } 3380 3381 if (startwrite) { 3382 mpw = mp; 3383 error = vn_start_write(NULL, &mpw, V_WAIT); 3384 if (error != 0) { 3385 mpw = NULL; 3386 vfs_unbusy(mp); 3387 nd->nd_repstat = ESTALE; 3388 goto out; 3389 } 3390 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3391 lktype = LK_EXCLUSIVE; 3392 } else 3393 mpw = NULL; 3394 3395 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3396 &credanon); 3397 vfs_unbusy(mp); 3398 3399 /* 3400 * For NFSv4 without a pseudo root fs, unexported file handles 3401 * can be returned, so that Lookup works everywhere. 3402 */ 3403 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3404 !(nd->nd_flag & ND_NFSV4)) { 3405 vput(*vpp); 3406 *vpp = NULL; 3407 nd->nd_repstat = EACCES; 3408 } 3409 3410 /* 3411 * Personally, I've never seen any point in requiring a 3412 * reserved port#, since only in the rare case where the 3413 * clients are all boxes with secure system privileges, 3414 * does it provide any enhanced security, but... some people 3415 * believe it to be useful and keep putting this code back in. 3416 * (There is also some "security checker" out there that 3417 * complains if the nfs server doesn't enforce this.) 3418 * However, note the following: 3419 * RFC3530 (NFSv4) specifies that a reserved port# not be 3420 * required. 3421 * RFC2623 recommends that, if a reserved port# is checked for, 3422 * that there be a way to turn that off--> ifdef'd. 3423 */ 3424 #ifdef NFS_REQRSVPORT 3425 if (!nd->nd_repstat) { 3426 struct sockaddr_in *saddr; 3427 struct sockaddr_in6 *saddr6; 3428 3429 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3430 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3431 if (!(nd->nd_flag & ND_NFSV4) && 3432 ((saddr->sin_family == AF_INET && 3433 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3434 (saddr6->sin6_family == AF_INET6 && 3435 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3436 vput(*vpp); 3437 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3438 } 3439 } 3440 #endif /* NFS_REQRSVPORT */ 3441 3442 /* 3443 * Check/setup credentials. 3444 */ 3445 if (!nd->nd_repstat) { 3446 nd->nd_saveduid = nd->nd_cred->cr_uid; 3447 nd->nd_repstat = nfsd_excred(nd, exp, credanon, 3448 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type)); 3449 if (nd->nd_repstat) 3450 vput(*vpp); 3451 } 3452 if (credanon != NULL) 3453 crfree(credanon); 3454 if (nd->nd_repstat) { 3455 vn_finished_write(mpw); 3456 *vpp = NULL; 3457 } else if (mpp != NULL) { 3458 *mpp = mpw; 3459 } 3460 3461 out: 3462 NFSEXITCODE2(0, nd); 3463 } 3464 3465 /* 3466 * glue for fp. 3467 */ 3468 static int 3469 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3470 { 3471 struct filedesc *fdp; 3472 struct file *fp; 3473 int error = 0; 3474 3475 fdp = p->td_proc->p_fd; 3476 if (fd < 0 || fd >= fdp->fd_nfiles || 3477 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3478 error = EBADF; 3479 goto out; 3480 } 3481 *fpp = fp; 3482 3483 out: 3484 NFSEXITCODE(error); 3485 return (error); 3486 } 3487 3488 /* 3489 * Called from nfssvc() to update the exports list. Just call 3490 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3491 * in the mount list. 3492 */ 3493 int 3494 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3495 { 3496 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3497 int error = 0; 3498 struct nameidata nd; 3499 fhandle_t fh; 3500 3501 error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export, false); 3502 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3503 NFSD_VNET(nfs_rootfhset) = 0; 3504 else if (error == 0) { 3505 if (nfsexargp->fspec == NULL) { 3506 error = EPERM; 3507 goto out; 3508 } 3509 /* 3510 * If fspec != NULL, this is the v4root path. 3511 */ 3512 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec); 3513 if ((error = namei(&nd)) != 0) 3514 goto out; 3515 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3516 vrele(nd.ni_vp); 3517 if (!error) { 3518 NFSD_VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH; 3519 NFSBCOPY((caddr_t)&fh, 3520 NFSD_VNET(nfs_rootfh).nfsrvfh_data, 3521 sizeof (fhandle_t)); 3522 NFSD_VNET(nfs_rootfhset) = 1; 3523 } 3524 } 3525 3526 out: 3527 NFSEXITCODE(error); 3528 return (error); 3529 } 3530 3531 /* 3532 * This function needs to test to see if the system is near its limit 3533 * for memory allocation via malloc() or mget() and return True iff 3534 * either of these resources are near their limit. 3535 * XXX (For now, this is just a stub.) 3536 */ 3537 int nfsrv_testmalloclimit = 0; 3538 int 3539 nfsrv_mallocmget_limit(void) 3540 { 3541 static int printmesg = 0; 3542 static int testval = 1; 3543 3544 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3545 if ((printmesg++ % 100) == 0) 3546 printf("nfsd: malloc/mget near limit\n"); 3547 return (1); 3548 } 3549 return (0); 3550 } 3551 3552 /* 3553 * BSD specific initialization of a mount point. 3554 */ 3555 void 3556 nfsd_mntinit(void) 3557 { 3558 3559 NFSD_LOCK(); 3560 if (NFSD_VNET(nfsrv_mntinited)) { 3561 NFSD_UNLOCK(); 3562 return; 3563 } 3564 NFSD_VNET(nfsrv_mntinited) = true; 3565 nfsrvd_init(0); 3566 NFSD_UNLOCK(); 3567 3568 NFSD_VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP, 3569 M_WAITOK | M_ZERO); 3570 NFSD_VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3571 mtx_init(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 3572 lockinit(&NFSD_VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0); 3573 TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelist); 3574 TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelist); 3575 NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL; 3576 TAILQ_INIT(&NFSD_VNET(nfsv4root_opt)); 3577 TAILQ_INIT(&NFSD_VNET(nfsv4root_newopt)); 3578 NFSD_VNET(nfsv4root_mnt)->mnt_opt = &NFSD_VNET(nfsv4root_opt); 3579 NFSD_VNET(nfsv4root_mnt)->mnt_optnew = &NFSD_VNET(nfsv4root_newopt); 3580 NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0; 3581 NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0; 3582 callout_init(&NFSD_VNET(nfsd_callout), 1); 3583 3584 nfsrvd_initcache(); 3585 nfsd_init(); 3586 } 3587 3588 static void 3589 nfsd_timer(void *arg) 3590 { 3591 struct vnet *vnetp; 3592 3593 vnetp = (struct vnet *)arg; 3594 NFSD_CURVNET_SET_QUIET(vnetp); 3595 nfsrv_servertimer(vnetp); 3596 callout_reset_sbt(&NFSD_VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer, 3597 arg, 0); 3598 NFSD_CURVNET_RESTORE(); 3599 } 3600 3601 /* 3602 * Get a vnode for a file handle, without checking exports, etc. 3603 */ 3604 struct vnode * 3605 nfsvno_getvp(fhandle_t *fhp) 3606 { 3607 struct mount *mp; 3608 struct vnode *vp; 3609 int error; 3610 3611 mp = vfs_busyfs(&fhp->fh_fsid); 3612 if (mp == NULL) 3613 return (NULL); 3614 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3615 vfs_unbusy(mp); 3616 if (error) 3617 return (NULL); 3618 return (vp); 3619 } 3620 3621 /* 3622 * Do a local VOP_ADVLOCK(). 3623 */ 3624 int 3625 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3626 u_int64_t end, struct thread *td) 3627 { 3628 int error = 0; 3629 struct flock fl; 3630 u_int64_t tlen; 3631 3632 if (nfsrv_dolocallocks == 0) 3633 goto out; 3634 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3635 3636 fl.l_whence = SEEK_SET; 3637 fl.l_type = ftype; 3638 fl.l_start = (off_t)first; 3639 if (end == NFS64BITSSET) { 3640 fl.l_len = 0; 3641 } else { 3642 tlen = end - first; 3643 fl.l_len = (off_t)tlen; 3644 } 3645 /* 3646 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3647 * values for all calls, so that all locks will be held by the 3648 * nfsd server. (The nfsd server handles conflicts between the 3649 * various clients.) 3650 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3651 * bytes, so it can't be put in l_sysid. 3652 */ 3653 if (nfsv4_sysid == 0) 3654 nfsv4_sysid = nlm_acquire_next_sysid(); 3655 fl.l_pid = (pid_t)0; 3656 fl.l_sysid = (int)nfsv4_sysid; 3657 3658 if (ftype == F_UNLCK) 3659 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3660 (F_POSIX | F_REMOTE)); 3661 else 3662 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3663 (F_POSIX | F_REMOTE)); 3664 3665 out: 3666 NFSEXITCODE(error); 3667 return (error); 3668 } 3669 3670 /* 3671 * Check the nfsv4 root exports. 3672 */ 3673 int 3674 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3675 { 3676 struct ucred *credanon; 3677 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; 3678 uint64_t exflags; 3679 3680 error = vfs_stdcheckexp(NFSD_VNET(nfsv4root_mnt), nd->nd_nam, &exflags, 3681 &credanon, &numsecflavor, secflavors); 3682 if (error) { 3683 error = NFSERR_PROGUNAVAIL; 3684 goto out; 3685 } 3686 if (credanon != NULL) 3687 crfree(credanon); 3688 for (i = 0; i < numsecflavor; i++) { 3689 if (secflavors[i] == AUTH_SYS) 3690 nd->nd_flag |= ND_EXAUTHSYS; 3691 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3692 nd->nd_flag |= ND_EXGSS; 3693 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3694 nd->nd_flag |= ND_EXGSSINTEGRITY; 3695 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3696 nd->nd_flag |= ND_EXGSSPRIVACY; 3697 } 3698 3699 /* And set ND_EXxx flags for TLS. */ 3700 if ((exflags & MNT_EXTLS) != 0) { 3701 nd->nd_flag |= ND_EXTLS; 3702 if ((exflags & MNT_EXTLSCERT) != 0) 3703 nd->nd_flag |= ND_EXTLSCERT; 3704 if ((exflags & MNT_EXTLSCERTUSER) != 0) 3705 nd->nd_flag |= ND_EXTLSCERTUSER; 3706 } 3707 3708 out: 3709 NFSEXITCODE(error); 3710 return (error); 3711 } 3712 3713 /* 3714 * Nfs server pseudo system call for the nfsd's 3715 */ 3716 /* 3717 * MPSAFE 3718 */ 3719 static int 3720 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3721 { 3722 struct file *fp; 3723 struct nfsd_addsock_args sockarg; 3724 struct nfsd_nfsd_args nfsdarg; 3725 struct nfsd_nfsd_oargs onfsdarg; 3726 struct nfsd_pnfsd_args pnfsdarg; 3727 struct vnode *vp, *nvp, *curdvp; 3728 struct pnfsdsfile *pf; 3729 struct nfsdevice *ds, *fds; 3730 cap_rights_t rights; 3731 int buflen, error, ret; 3732 char *buf, *cp, *cp2, *cp3; 3733 char fname[PNFS_FILENAME_LEN + 1]; 3734 3735 NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td)); 3736 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3737 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3738 if (error) 3739 goto out; 3740 /* 3741 * Since we don't know what rights might be required, 3742 * pretend that we need them all. It is better to be too 3743 * careful than too reckless. 3744 */ 3745 error = fget(td, sockarg.sock, 3746 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp); 3747 if (error != 0) 3748 goto out; 3749 if (fp->f_type != DTYPE_SOCKET) { 3750 fdrop(fp, td); 3751 error = EPERM; 3752 goto out; 3753 } 3754 error = nfsrvd_addsock(fp); 3755 fdrop(fp, td); 3756 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3757 if (uap->argp == NULL) { 3758 error = EINVAL; 3759 goto out; 3760 } 3761 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3762 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3763 if (error == 0) { 3764 nfsdarg.principal = onfsdarg.principal; 3765 nfsdarg.minthreads = onfsdarg.minthreads; 3766 nfsdarg.maxthreads = onfsdarg.maxthreads; 3767 nfsdarg.version = 1; 3768 nfsdarg.addr = NULL; 3769 nfsdarg.addrlen = 0; 3770 nfsdarg.dnshost = NULL; 3771 nfsdarg.dnshostlen = 0; 3772 nfsdarg.dspath = NULL; 3773 nfsdarg.dspathlen = 0; 3774 nfsdarg.mdspath = NULL; 3775 nfsdarg.mdspathlen = 0; 3776 nfsdarg.mirrorcnt = 1; 3777 } 3778 } else 3779 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3780 if (error) 3781 goto out; 3782 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3783 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3784 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3785 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 3786 nfsdarg.mirrorcnt >= 1 && 3787 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3788 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3789 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 3790 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3791 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 3792 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3793 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 3794 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3795 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3796 if (error != 0) { 3797 free(cp, M_TEMP); 3798 goto out; 3799 } 3800 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3801 nfsdarg.addr = cp; 3802 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3803 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3804 if (error != 0) { 3805 free(nfsdarg.addr, M_TEMP); 3806 free(cp, M_TEMP); 3807 goto out; 3808 } 3809 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3810 nfsdarg.dnshost = cp; 3811 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3812 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3813 if (error != 0) { 3814 free(nfsdarg.addr, M_TEMP); 3815 free(nfsdarg.dnshost, M_TEMP); 3816 free(cp, M_TEMP); 3817 goto out; 3818 } 3819 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3820 nfsdarg.dspath = cp; 3821 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 3822 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 3823 if (error != 0) { 3824 free(nfsdarg.addr, M_TEMP); 3825 free(nfsdarg.dnshost, M_TEMP); 3826 free(nfsdarg.dspath, M_TEMP); 3827 free(cp, M_TEMP); 3828 goto out; 3829 } 3830 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 3831 nfsdarg.mdspath = cp; 3832 } else { 3833 nfsdarg.addr = NULL; 3834 nfsdarg.addrlen = 0; 3835 nfsdarg.dnshost = NULL; 3836 nfsdarg.dnshostlen = 0; 3837 nfsdarg.dspath = NULL; 3838 nfsdarg.dspathlen = 0; 3839 nfsdarg.mdspath = NULL; 3840 nfsdarg.mdspathlen = 0; 3841 nfsdarg.mirrorcnt = 1; 3842 } 3843 nfsd_timer(NFSD_TD_TO_VNET(td)); 3844 error = nfsrvd_nfsd(td, &nfsdarg); 3845 callout_drain(&NFSD_VNET(nfsd_callout)); 3846 free(nfsdarg.addr, M_TEMP); 3847 free(nfsdarg.dnshost, M_TEMP); 3848 free(nfsdarg.dspath, M_TEMP); 3849 free(nfsdarg.mdspath, M_TEMP); 3850 } else if (uap->flag & NFSSVC_PNFSDS) { 3851 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3852 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 3853 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 3854 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3855 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3856 NULL); 3857 if (error == 0) 3858 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 3859 free(cp, M_TEMP); 3860 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3861 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3862 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3863 buf = malloc(buflen, M_TEMP, M_WAITOK); 3864 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3865 NULL); 3866 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3867 if (error == 0 && pnfsdarg.dspath != NULL) { 3868 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3869 error = copyinstr(pnfsdarg.dspath, cp2, 3870 PATH_MAX + 1, NULL); 3871 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3872 error); 3873 } else 3874 cp2 = NULL; 3875 if (error == 0 && pnfsdarg.curdspath != NULL) { 3876 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3877 error = copyinstr(pnfsdarg.curdspath, cp3, 3878 PATH_MAX + 1, NULL); 3879 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3880 error); 3881 } else 3882 cp3 = NULL; 3883 curdvp = NULL; 3884 fds = NULL; 3885 if (error == 0) 3886 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3887 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3888 &fds); 3889 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3890 if (error == 0) { 3891 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3892 printf("copymr: dsdir out of range\n"); 3893 pf->dsf_dir = 0; 3894 } 3895 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3896 error = nfsrv_copymr(vp, nvp, 3897 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3898 (struct pnfsdsfile *)buf, 3899 buflen / sizeof(*pf), td->td_ucred, td); 3900 vput(vp); 3901 vput(nvp); 3902 if (fds != NULL && error == 0) { 3903 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3904 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3905 if (ret == 0) { 3906 nfsrv_dsremove(curdvp, fname, 3907 td->td_ucred, td); 3908 NFSVOPUNLOCK(curdvp); 3909 } 3910 } 3911 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3912 } 3913 free(cp, M_TEMP); 3914 free(cp2, M_TEMP); 3915 free(cp3, M_TEMP); 3916 free(buf, M_TEMP); 3917 } 3918 } else { 3919 error = nfssvc_srvcall(td, uap, td->td_ucred); 3920 } 3921 3922 out: 3923 NFSD_CURVNET_RESTORE(); 3924 NFSEXITCODE(error); 3925 return (error); 3926 } 3927 3928 static int 3929 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3930 { 3931 struct nfsex_args export; 3932 struct nfsex_oldargs oexp; 3933 struct file *fp = NULL; 3934 int stablefd, i, len; 3935 struct nfsd_clid adminrevoke; 3936 struct nfsd_dumplist dumplist; 3937 struct nfsd_dumpclients *dumpclients; 3938 struct nfsd_dumplocklist dumplocklist; 3939 struct nfsd_dumplocks *dumplocks; 3940 struct nameidata nd; 3941 vnode_t vp; 3942 int error = EINVAL, igotlock; 3943 struct proc *procp; 3944 gid_t *grps; 3945 3946 if (uap->flag & NFSSVC_PUBLICFH) { 3947 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3948 sizeof (fhandle_t)); 3949 error = copyin(uap->argp, 3950 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3951 if (!error) 3952 nfs_pubfhset = 1; 3953 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3954 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { 3955 error = copyin(uap->argp,(caddr_t)&export, 3956 sizeof (struct nfsex_args)); 3957 if (!error) { 3958 grps = NULL; 3959 if (export.export.ex_ngroups > NGROUPS_MAX || 3960 export.export.ex_ngroups < 0) 3961 error = EINVAL; 3962 else if (export.export.ex_ngroups > 0) { 3963 grps = malloc(export.export.ex_ngroups * 3964 sizeof(gid_t), M_TEMP, M_WAITOK); 3965 error = copyin(export.export.ex_groups, grps, 3966 export.export.ex_ngroups * sizeof(gid_t)); 3967 export.export.ex_groups = grps; 3968 } else 3969 export.export.ex_groups = NULL; 3970 if (!error) 3971 error = nfsrv_v4rootexport(&export, cred, p); 3972 free(grps, M_TEMP); 3973 } 3974 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3975 NFSSVC_V4ROOTEXPORT) { 3976 error = copyin(uap->argp,(caddr_t)&oexp, 3977 sizeof (struct nfsex_oldargs)); 3978 if (!error) { 3979 memset(&export.export, 0, sizeof(export.export)); 3980 export.export.ex_flags = (uint64_t)oexp.export.ex_flags; 3981 export.export.ex_root = oexp.export.ex_root; 3982 export.export.ex_uid = oexp.export.ex_anon.cr_uid; 3983 export.export.ex_ngroups = 3984 oexp.export.ex_anon.cr_ngroups; 3985 export.export.ex_groups = NULL; 3986 if (export.export.ex_ngroups > XU_NGROUPS || 3987 export.export.ex_ngroups < 0) 3988 error = EINVAL; 3989 else if (export.export.ex_ngroups > 0) { 3990 export.export.ex_groups = malloc( 3991 export.export.ex_ngroups * sizeof(gid_t), 3992 M_TEMP, M_WAITOK); 3993 for (i = 0; i < export.export.ex_ngroups; i++) 3994 export.export.ex_groups[i] = 3995 oexp.export.ex_anon.cr_groups[i]; 3996 } 3997 export.export.ex_addr = oexp.export.ex_addr; 3998 export.export.ex_addrlen = oexp.export.ex_addrlen; 3999 export.export.ex_mask = oexp.export.ex_mask; 4000 export.export.ex_masklen = oexp.export.ex_masklen; 4001 export.export.ex_indexfile = oexp.export.ex_indexfile; 4002 export.export.ex_numsecflavors = 4003 oexp.export.ex_numsecflavors; 4004 if (export.export.ex_numsecflavors >= MAXSECFLAVORS || 4005 export.export.ex_numsecflavors < 0) 4006 error = EINVAL; 4007 else { 4008 for (i = 0; i < export.export.ex_numsecflavors; 4009 i++) 4010 export.export.ex_secflavors[i] = 4011 oexp.export.ex_secflavors[i]; 4012 } 4013 export.fspec = oexp.fspec; 4014 if (error == 0) 4015 error = nfsrv_v4rootexport(&export, cred, p); 4016 free(export.export.ex_groups, M_TEMP); 4017 } 4018 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 4019 nfs_pubfhset = 0; 4020 error = 0; 4021 } else if (uap->flag & NFSSVC_STABLERESTART) { 4022 error = copyin(uap->argp, (caddr_t)&stablefd, 4023 sizeof (int)); 4024 if (!error) 4025 error = fp_getfvp(p, stablefd, &fp, &vp); 4026 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 4027 error = EBADF; 4028 if (!error && NFSD_VNET(nfsrv_numnfsd) != 0) 4029 error = ENXIO; 4030 if (!error) { 4031 NFSD_VNET(nfsrv_stablefirst).nsf_fp = fp; 4032 nfsrv_setupstable(p); 4033 } 4034 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 4035 error = copyin(uap->argp, (caddr_t)&adminrevoke, 4036 sizeof (struct nfsd_clid)); 4037 if (!error) 4038 error = nfsrv_adminrevoke(&adminrevoke, p); 4039 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 4040 error = copyin(uap->argp, (caddr_t)&dumplist, 4041 sizeof (struct nfsd_dumplist)); 4042 if (!error && (dumplist.ndl_size < 1 || 4043 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 4044 error = EPERM; 4045 if (!error) { 4046 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 4047 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 4048 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 4049 error = copyout(dumpclients, dumplist.ndl_list, len); 4050 free(dumpclients, M_TEMP); 4051 } 4052 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 4053 error = copyin(uap->argp, (caddr_t)&dumplocklist, 4054 sizeof (struct nfsd_dumplocklist)); 4055 if (!error && (dumplocklist.ndllck_size < 1 || 4056 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 4057 error = EPERM; 4058 if (!error) 4059 error = nfsrv_lookupfilename(&nd, 4060 dumplocklist.ndllck_fname, p); 4061 if (!error) { 4062 len = sizeof (struct nfsd_dumplocks) * 4063 dumplocklist.ndllck_size; 4064 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 4065 nfsrv_dumplocks(nd.ni_vp, dumplocks, 4066 dumplocklist.ndllck_size, p); 4067 vput(nd.ni_vp); 4068 error = copyout(dumplocks, dumplocklist.ndllck_list, 4069 len); 4070 free(dumplocks, M_TEMP); 4071 } 4072 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 4073 procp = p->td_proc; 4074 PROC_LOCK(procp); 4075 nfsd_master_pid = procp->p_pid; 4076 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 4077 nfsd_master_start = procp->p_stats->p_start; 4078 NFSD_VNET(nfsd_master_proc) = procp; 4079 PROC_UNLOCK(procp); 4080 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 4081 NFSLOCKV4ROOTMUTEX(); 4082 if (!NFSD_VNET(nfsrv_suspend_nfsd)) { 4083 /* Lock out all nfsd threads */ 4084 do { 4085 igotlock = nfsv4_lock( 4086 &NFSD_VNET(nfsd_suspend_lock), 1, NULL, 4087 NFSV4ROOTLOCKMUTEXPTR, NULL); 4088 } while (igotlock == 0 && 4089 !NFSD_VNET(nfsrv_suspend_nfsd)); 4090 NFSD_VNET(nfsrv_suspend_nfsd) = true; 4091 } 4092 NFSUNLOCKV4ROOTMUTEX(); 4093 error = 0; 4094 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 4095 NFSLOCKV4ROOTMUTEX(); 4096 if (NFSD_VNET(nfsrv_suspend_nfsd)) { 4097 nfsv4_unlock(&NFSD_VNET(nfsd_suspend_lock), 0); 4098 NFSD_VNET(nfsrv_suspend_nfsd) = false; 4099 } 4100 NFSUNLOCKV4ROOTMUTEX(); 4101 error = 0; 4102 } 4103 4104 NFSEXITCODE(error); 4105 return (error); 4106 } 4107 4108 /* 4109 * Check exports. 4110 * Returns 0 if ok, 1 otherwise. 4111 */ 4112 int 4113 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 4114 { 4115 int i; 4116 4117 if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || 4118 (NFSVNO_EXTLSCERT(exp) && 4119 (nd->nd_flag & ND_TLSCERT) == 0) || 4120 (NFSVNO_EXTLSCERTUSER(exp) && 4121 (nd->nd_flag & ND_TLSCERTUSER) == 0)) { 4122 if ((nd->nd_flag & ND_NFSV4) != 0) 4123 return (NFSERR_WRONGSEC); 4124 #ifdef notnow 4125 /* There is currently no auth_stat for this. */ 4126 else if ((nd->nd_flag & ND_TLS) == 0) 4127 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS); 4128 else 4129 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST); 4130 #endif 4131 else 4132 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 4133 } 4134 4135 /* 4136 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use 4137 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 4138 */ 4139 if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO) 4140 return (0); 4141 4142 /* 4143 * This seems odd, but allow the case where the security flavor 4144 * list is empty. This happens when NFSv4 is traversing non-exported 4145 * file systems. Exported file systems should always have a non-empty 4146 * security flavor list. 4147 */ 4148 if (exp->nes_numsecflavor == 0) 4149 return (0); 4150 4151 for (i = 0; i < exp->nes_numsecflavor; i++) { 4152 /* 4153 * The tests for privacy and integrity must be first, 4154 * since ND_GSS is set for everything but AUTH_SYS. 4155 */ 4156 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 4157 (nd->nd_flag & ND_GSSPRIVACY)) 4158 return (0); 4159 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 4160 (nd->nd_flag & ND_GSSINTEGRITY)) 4161 return (0); 4162 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 4163 (nd->nd_flag & ND_GSS)) 4164 return (0); 4165 if (exp->nes_secflavors[i] == AUTH_SYS && 4166 (nd->nd_flag & ND_GSS) == 0) 4167 return (0); 4168 } 4169 if ((nd->nd_flag & ND_NFSV4) != 0) 4170 return (NFSERR_WRONGSEC); 4171 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 4172 } 4173 4174 /* 4175 * Calculate a hash value for the fid in a file handle. 4176 */ 4177 uint32_t 4178 nfsrv_hashfh(fhandle_t *fhp) 4179 { 4180 uint32_t hashval; 4181 4182 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 4183 return (hashval); 4184 } 4185 4186 /* 4187 * Calculate a hash value for the sessionid. 4188 */ 4189 uint32_t 4190 nfsrv_hashsessionid(uint8_t *sessionid) 4191 { 4192 uint32_t hashval; 4193 4194 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 4195 return (hashval); 4196 } 4197 4198 /* 4199 * Signal the userland master nfsd to backup the stable restart file. 4200 */ 4201 void 4202 nfsrv_backupstable(void) 4203 { 4204 struct proc *procp; 4205 4206 if (NFSD_VNET(nfsd_master_proc) != NULL) { 4207 procp = pfind(nfsd_master_pid); 4208 /* Try to make sure it is the correct process. */ 4209 if (procp == NFSD_VNET(nfsd_master_proc) && 4210 procp->p_stats->p_start.tv_sec == 4211 nfsd_master_start.tv_sec && 4212 procp->p_stats->p_start.tv_usec == 4213 nfsd_master_start.tv_usec && 4214 strcmp(procp->p_comm, nfsd_master_comm) == 0) 4215 kern_psignal(procp, SIGUSR2); 4216 else 4217 NFSD_VNET(nfsd_master_proc) = NULL; 4218 4219 if (procp != NULL) 4220 PROC_UNLOCK(procp); 4221 } 4222 } 4223 4224 /* 4225 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 4226 * The arguments are in a structure, so that they can be passed through 4227 * taskqueue for a kernel process to execute this function. 4228 */ 4229 struct nfsrvdscreate { 4230 int done; 4231 int inprog; 4232 struct task tsk; 4233 struct ucred *tcred; 4234 struct vnode *dvp; 4235 NFSPROC_T *p; 4236 struct pnfsdsfile *pf; 4237 int err; 4238 fhandle_t fh; 4239 struct vattr va; 4240 struct vattr createva; 4241 }; 4242 4243 int 4244 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 4245 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 4246 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 4247 { 4248 struct vnode *nvp; 4249 struct nameidata named; 4250 struct vattr va; 4251 char *bufp; 4252 u_long *hashp; 4253 struct nfsnode *np; 4254 struct nfsmount *nmp; 4255 int error; 4256 4257 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 4258 LOCKPARENT | LOCKLEAF | NOCACHE); 4259 nfsvno_setpathbuf(&named, &bufp, &hashp); 4260 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 4261 named.ni_cnd.cn_nameptr = bufp; 4262 if (fnamep != NULL) { 4263 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 4264 named.ni_cnd.cn_namelen = strlen(bufp); 4265 } else 4266 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 4267 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 4268 4269 /* Create the date file in the DS mount. */ 4270 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4271 if (error == 0) { 4272 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 4273 vref(dvp); 4274 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false); 4275 if (error == 0) { 4276 /* Set the ownership of the file. */ 4277 error = VOP_SETATTR(nvp, nvap, tcred); 4278 NFSD_DEBUG(4, "nfsrv_dscreate:" 4279 " setattr-uid=%d\n", error); 4280 if (error != 0) 4281 vput(nvp); 4282 } 4283 if (error != 0) 4284 printf("pNFS: pnfscreate failed=%d\n", error); 4285 } else 4286 printf("pNFS: pnfscreate vnlock=%d\n", error); 4287 if (error == 0) { 4288 np = VTONFS(nvp); 4289 nmp = VFSTONFS(nvp->v_mount); 4290 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 4291 != 0 || nmp->nm_nam->sa_len > sizeof( 4292 struct sockaddr_in6) || 4293 np->n_fhp->nfh_len != NFSX_MYFH) { 4294 printf("Bad DS file: fstype=%s salen=%d" 4295 " fhlen=%d\n", 4296 nvp->v_mount->mnt_vfc->vfc_name, 4297 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 4298 error = ENOENT; 4299 } 4300 4301 /* Set extattrs for the DS on the MDS file. */ 4302 if (error == 0) { 4303 if (dsa != NULL) { 4304 error = VOP_GETATTR(nvp, &va, tcred); 4305 if (error == 0) { 4306 dsa->dsa_filerev = va.va_filerev; 4307 dsa->dsa_size = va.va_size; 4308 dsa->dsa_atime = va.va_atime; 4309 dsa->dsa_mtime = va.va_mtime; 4310 dsa->dsa_bytes = va.va_bytes; 4311 } 4312 } 4313 if (error == 0) { 4314 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 4315 NFSX_MYFH); 4316 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 4317 nmp->nm_nam->sa_len); 4318 NFSBCOPY(named.ni_cnd.cn_nameptr, 4319 pf->dsf_filename, 4320 sizeof(pf->dsf_filename)); 4321 } 4322 } else 4323 printf("pNFS: pnfscreate can't get DS" 4324 " attr=%d\n", error); 4325 if (nvpp != NULL && error == 0) 4326 *nvpp = nvp; 4327 else 4328 vput(nvp); 4329 } 4330 nfsvno_relpathbuf(&named); 4331 return (error); 4332 } 4333 4334 /* 4335 * Start up the thread that will execute nfsrv_dscreate(). 4336 */ 4337 static void 4338 start_dscreate(void *arg, int pending) 4339 { 4340 struct nfsrvdscreate *dsc; 4341 4342 dsc = (struct nfsrvdscreate *)arg; 4343 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 4344 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 4345 dsc->done = 1; 4346 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 4347 } 4348 4349 /* 4350 * Create a pNFS data file on the Data Server(s). 4351 */ 4352 static void 4353 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 4354 NFSPROC_T *p) 4355 { 4356 struct nfsrvdscreate *dsc, *tdsc = NULL; 4357 struct nfsdevice *ds, *tds, *fds; 4358 struct mount *mp; 4359 struct pnfsdsfile *pf, *tpf; 4360 struct pnfsdsattr dsattr; 4361 struct vattr va; 4362 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4363 struct nfsmount *nmp; 4364 fhandle_t fh; 4365 uid_t vauid; 4366 gid_t vagid; 4367 u_short vamode; 4368 struct ucred *tcred; 4369 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 4370 int failpos, timo; 4371 4372 /* Get a DS server directory in a round-robin order. */ 4373 mirrorcnt = 1; 4374 mp = vp->v_mount; 4375 ds = fds = NULL; 4376 NFSDDSLOCK(); 4377 /* 4378 * Search for the first entry that handles this MDS fs, but use the 4379 * first entry for all MDS fs's otherwise. 4380 */ 4381 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 4382 if (tds->nfsdev_nmp != NULL) { 4383 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 4384 ds = tds; 4385 else if (tds->nfsdev_mdsisset != 0 && fsidcmp( 4386 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { 4387 ds = fds = tds; 4388 break; 4389 } 4390 } 4391 } 4392 if (ds == NULL) { 4393 NFSDDSUNLOCK(); 4394 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 4395 return; 4396 } 4397 i = dsdir[0] = ds->nfsdev_nextdir; 4398 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 4399 dvp[0] = ds->nfsdev_dsdir[i]; 4400 tds = TAILQ_NEXT(ds, nfsdev_list); 4401 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { 4402 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 4403 if (tds->nfsdev_nmp != NULL && 4404 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 4405 (tds->nfsdev_mdsisset != 0 && fds != NULL && 4406 fsidcmp(&mp->mnt_stat.f_fsid, 4407 &tds->nfsdev_mdsfsid) == 0))) { 4408 dsdir[mirrorcnt] = i; 4409 dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; 4410 mirrorcnt++; 4411 if (mirrorcnt >= nfsrv_maxpnfsmirror) 4412 break; 4413 } 4414 } 4415 } 4416 /* Put at end of list to implement round-robin usage. */ 4417 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 4418 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 4419 NFSDDSUNLOCK(); 4420 dsc = NULL; 4421 if (mirrorcnt > 1) 4422 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 4423 M_WAITOK | M_ZERO); 4424 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 4425 M_ZERO); 4426 4427 error = nfsvno_getfh(vp, &fh, p); 4428 if (error == 0) 4429 error = VOP_GETATTR(vp, &va, cred); 4430 if (error == 0) { 4431 /* Set the attributes for "vp" to Setattr the DS vp. */ 4432 vauid = va.va_uid; 4433 vagid = va.va_gid; 4434 vamode = va.va_mode; 4435 VATTR_NULL(&va); 4436 va.va_uid = vauid; 4437 va.va_gid = vagid; 4438 va.va_mode = vamode; 4439 va.va_size = 0; 4440 } else 4441 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 4442 4443 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 4444 cred->cr_gid); 4445 /* Make data file name based on FH. */ 4446 tcred = newnfs_getcred(); 4447 4448 /* 4449 * Create the file on each DS mirror, using kernel process(es) for the 4450 * additional mirrors. 4451 */ 4452 failpos = -1; 4453 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 4454 tpf->dsf_dir = dsdir[i]; 4455 tdsc->tcred = tcred; 4456 tdsc->p = p; 4457 tdsc->pf = tpf; 4458 tdsc->createva = *vap; 4459 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 4460 tdsc->va = va; 4461 tdsc->dvp = dvp[i]; 4462 tdsc->done = 0; 4463 tdsc->inprog = 0; 4464 tdsc->err = 0; 4465 ret = EIO; 4466 if (nfs_pnfsiothreads != 0) { 4467 ret = nfs_pnfsio(start_dscreate, tdsc); 4468 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 4469 } 4470 if (ret != 0) { 4471 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 4472 NULL, tcred, p, NULL); 4473 if (ret != 0) { 4474 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 4475 error)); 4476 if (failpos == -1 && nfsds_failerr(ret)) 4477 failpos = i; 4478 else 4479 error = ret; 4480 } 4481 } 4482 } 4483 if (error == 0) { 4484 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 4485 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 4486 &dsattr, NULL, tcred, p, NULL); 4487 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 4488 failpos = mirrorcnt - 1; 4489 error = 0; 4490 } 4491 } 4492 timo = hz / 50; /* Wait for 20msec. */ 4493 if (timo < 1) 4494 timo = 1; 4495 /* Wait for kernel task(s) to complete. */ 4496 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 4497 while (tdsc->inprog != 0 && tdsc->done == 0) 4498 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 4499 if (tdsc->err != 0) { 4500 if (failpos == -1 && nfsds_failerr(tdsc->err)) 4501 failpos = i; 4502 else if (error == 0) 4503 error = tdsc->err; 4504 } 4505 } 4506 4507 /* 4508 * If failpos has been set, that mirror has failed, so it needs 4509 * to be disabled. 4510 */ 4511 if (failpos >= 0) { 4512 nmp = VFSTONFS(dvp[failpos]->v_mount); 4513 NFSLOCKMNT(nmp); 4514 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4515 NFSMNTP_CANCELRPCS)) == 0) { 4516 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4517 NFSUNLOCKMNT(nmp); 4518 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4519 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 4520 ds); 4521 if (ds != NULL) 4522 nfsrv_killrpcs(nmp); 4523 NFSLOCKMNT(nmp); 4524 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4525 wakeup(nmp); 4526 } 4527 NFSUNLOCKMNT(nmp); 4528 } 4529 4530 NFSFREECRED(tcred); 4531 if (error == 0) { 4532 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 4533 4534 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 4535 mirrorcnt, nfsrv_maxpnfsmirror); 4536 /* 4537 * For all mirrors that couldn't be created, fill in the 4538 * *pf structure, but with an IP address == 0.0.0.0. 4539 */ 4540 tpf = pf + mirrorcnt; 4541 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4542 *tpf = *pf; 4543 tpf->dsf_sin.sin_family = AF_INET; 4544 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4545 tpf->dsf_sin.sin_addr.s_addr = 0; 4546 tpf->dsf_sin.sin_port = 0; 4547 } 4548 4549 error = vn_extattr_set(vp, IO_NODELOCKED, 4550 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4551 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4552 if (error == 0) 4553 error = vn_extattr_set(vp, IO_NODELOCKED, 4554 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4555 sizeof(dsattr), (char *)&dsattr, p); 4556 if (error != 0) 4557 printf("pNFS: pnfscreate setextattr=%d\n", 4558 error); 4559 } else 4560 printf("pNFS: pnfscreate=%d\n", error); 4561 free(pf, M_TEMP); 4562 free(dsc, M_TEMP); 4563 } 4564 4565 /* 4566 * Get the information needed to remove the pNFS Data Server file from the 4567 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4568 * DS directory vnode. The caller must unlock *ddvp when done with it. 4569 */ 4570 static void 4571 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4572 int *mirrorcntp, char *fname, fhandle_t *fhp) 4573 { 4574 struct vattr va; 4575 struct ucred *tcred; 4576 char *buf; 4577 int buflen, error; 4578 4579 dvpp[0] = NULL; 4580 /* If not an exported regular file or not a pNFS server, just return. */ 4581 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4582 nfsrv_devidcnt == 0) 4583 return; 4584 4585 /* Check to see if this is the last hard link. */ 4586 tcred = newnfs_getcred(); 4587 error = VOP_GETATTR(vp, &va, tcred); 4588 NFSFREECRED(tcred); 4589 if (error != 0) { 4590 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4591 return; 4592 } 4593 if (va.va_nlink > 1) 4594 return; 4595 4596 error = nfsvno_getfh(vp, fhp, p); 4597 if (error != 0) { 4598 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4599 return; 4600 } 4601 4602 buflen = 1024; 4603 buf = malloc(buflen, M_TEMP, M_WAITOK); 4604 /* Get the directory vnode for the DS mount and the file handle. */ 4605 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4606 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4607 free(buf, M_TEMP); 4608 if (error != 0) 4609 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4610 } 4611 4612 /* 4613 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4614 * The arguments are in a structure, so that they can be passed through 4615 * taskqueue for a kernel process to execute this function. 4616 */ 4617 struct nfsrvdsremove { 4618 int done; 4619 int inprog; 4620 struct task tsk; 4621 struct ucred *tcred; 4622 struct vnode *dvp; 4623 NFSPROC_T *p; 4624 int err; 4625 char fname[PNFS_FILENAME_LEN + 1]; 4626 }; 4627 4628 static int 4629 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4630 NFSPROC_T *p) 4631 { 4632 struct nameidata named; 4633 struct vnode *nvp; 4634 char *bufp; 4635 u_long *hashp; 4636 int error; 4637 4638 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4639 if (error != 0) 4640 return (error); 4641 named.ni_cnd.cn_nameiop = DELETE; 4642 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4643 named.ni_cnd.cn_cred = tcred; 4644 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 4645 nfsvno_setpathbuf(&named, &bufp, &hashp); 4646 named.ni_cnd.cn_nameptr = bufp; 4647 named.ni_cnd.cn_namelen = strlen(fname); 4648 strlcpy(bufp, fname, NAME_MAX); 4649 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4650 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4651 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4652 if (error == 0) { 4653 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4654 vput(nvp); 4655 } 4656 NFSVOPUNLOCK(dvp); 4657 nfsvno_relpathbuf(&named); 4658 if (error != 0) 4659 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4660 return (error); 4661 } 4662 4663 /* 4664 * Start up the thread that will execute nfsrv_dsremove(). 4665 */ 4666 static void 4667 start_dsremove(void *arg, int pending) 4668 { 4669 struct nfsrvdsremove *dsrm; 4670 4671 dsrm = (struct nfsrvdsremove *)arg; 4672 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4673 dsrm->p); 4674 dsrm->done = 1; 4675 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4676 } 4677 4678 /* 4679 * Remove a pNFS data file from a Data Server. 4680 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4681 * removed to set up the dvp and fill in the FH. 4682 */ 4683 static void 4684 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4685 NFSPROC_T *p) 4686 { 4687 struct ucred *tcred; 4688 struct nfsrvdsremove *dsrm, *tdsrm; 4689 struct nfsdevice *ds; 4690 struct nfsmount *nmp; 4691 int failpos, i, ret, timo; 4692 4693 tcred = newnfs_getcred(); 4694 dsrm = NULL; 4695 if (mirrorcnt > 1) 4696 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4697 /* 4698 * Remove the file on each DS mirror, using kernel process(es) for the 4699 * additional mirrors. 4700 */ 4701 failpos = -1; 4702 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4703 tdsrm->tcred = tcred; 4704 tdsrm->p = p; 4705 tdsrm->dvp = dvp[i]; 4706 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4707 tdsrm->inprog = 0; 4708 tdsrm->done = 0; 4709 tdsrm->err = 0; 4710 ret = EIO; 4711 if (nfs_pnfsiothreads != 0) { 4712 ret = nfs_pnfsio(start_dsremove, tdsrm); 4713 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4714 } 4715 if (ret != 0) { 4716 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4717 if (failpos == -1 && nfsds_failerr(ret)) 4718 failpos = i; 4719 } 4720 } 4721 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4722 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4723 failpos = mirrorcnt - 1; 4724 timo = hz / 50; /* Wait for 20msec. */ 4725 if (timo < 1) 4726 timo = 1; 4727 /* Wait for kernel task(s) to complete. */ 4728 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4729 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4730 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4731 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4732 failpos = i; 4733 } 4734 4735 /* 4736 * If failpos has been set, that mirror has failed, so it needs 4737 * to be disabled. 4738 */ 4739 if (failpos >= 0) { 4740 nmp = VFSTONFS(dvp[failpos]->v_mount); 4741 NFSLOCKMNT(nmp); 4742 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4743 NFSMNTP_CANCELRPCS)) == 0) { 4744 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4745 NFSUNLOCKMNT(nmp); 4746 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4747 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4748 ds); 4749 if (ds != NULL) 4750 nfsrv_killrpcs(nmp); 4751 NFSLOCKMNT(nmp); 4752 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4753 wakeup(nmp); 4754 } 4755 NFSUNLOCKMNT(nmp); 4756 } 4757 4758 /* Get rid all layouts for the file. */ 4759 nfsrv_freefilelayouts(fhp); 4760 4761 NFSFREECRED(tcred); 4762 free(dsrm, M_TEMP); 4763 } 4764 4765 /* 4766 * Generate a file name based on the file handle and put it in *bufp. 4767 * Return the number of bytes generated. 4768 */ 4769 static int 4770 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4771 { 4772 int i; 4773 uint8_t *cp; 4774 const uint8_t *hexdigits = "0123456789abcdef"; 4775 4776 cp = (uint8_t *)fhp; 4777 for (i = 0; i < sizeof(*fhp); i++) { 4778 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4779 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4780 } 4781 bufp[2 * i] = '\0'; 4782 return (2 * i); 4783 } 4784 4785 /* 4786 * Update the Metadata file's attributes from the DS file when a Read/Write 4787 * layout is returned. 4788 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4789 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4790 */ 4791 int 4792 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4793 { 4794 struct ucred *tcred; 4795 int error; 4796 4797 /* Do this as root so that it won't fail with EACCES. */ 4798 tcred = newnfs_getcred(); 4799 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4800 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); 4801 NFSFREECRED(tcred); 4802 return (error); 4803 } 4804 4805 /* 4806 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4807 */ 4808 static int 4809 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4810 NFSPROC_T *p) 4811 { 4812 int error; 4813 4814 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, 4815 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); 4816 return (error); 4817 } 4818 4819 static int 4820 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 4821 struct thread *p, int ioproc, struct mbuf **mpp, char *cp, 4822 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, 4823 off_t *offp, int content, bool *eofp) 4824 { 4825 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4826 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4827 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4828 struct nfsdevice *ds; 4829 struct pnfsdsattr dsattr; 4830 struct opnfsdsattr odsattr; 4831 char *buf; 4832 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4833 4834 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4835 /* 4836 * If not a regular file, not exported or not a pNFS server, 4837 * just return ENOENT. 4838 */ 4839 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4840 nfsrv_devidcnt == 0) 4841 return (ENOENT); 4842 4843 buflen = 1024; 4844 buf = malloc(buflen, M_TEMP, M_WAITOK); 4845 error = 0; 4846 4847 /* 4848 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4849 * from the MetaData file's extended attribute. 4850 */ 4851 if (ioproc == NFSPROC_GETATTR) { 4852 error = vn_extattr_get(vp, IO_NODELOCKED, 4853 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4854 p); 4855 if (error == 0) { 4856 if (buflen == sizeof(odsattr)) { 4857 NFSBCOPY(buf, &odsattr, buflen); 4858 nap->na_filerev = odsattr.dsa_filerev; 4859 nap->na_size = odsattr.dsa_size; 4860 nap->na_atime = odsattr.dsa_atime; 4861 nap->na_mtime = odsattr.dsa_mtime; 4862 /* 4863 * Fake na_bytes by rounding up na_size. 4864 * Since we don't know the block size, just 4865 * use BLKDEV_IOSIZE. 4866 */ 4867 nap->na_bytes = (odsattr.dsa_size + 4868 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); 4869 } else if (buflen == sizeof(dsattr)) { 4870 NFSBCOPY(buf, &dsattr, buflen); 4871 nap->na_filerev = dsattr.dsa_filerev; 4872 nap->na_size = dsattr.dsa_size; 4873 nap->na_atime = dsattr.dsa_atime; 4874 nap->na_mtime = dsattr.dsa_mtime; 4875 nap->na_bytes = dsattr.dsa_bytes; 4876 } else 4877 error = ENXIO; 4878 } 4879 if (error == 0) { 4880 /* 4881 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4882 * returns 0, just return now. nfsrv_checkdsattr() 4883 * returns 0 if there is no Read/Write layout 4884 * plus either an Open/Write_access or Write 4885 * delegation issued to a client for the file. 4886 */ 4887 if (nfsrv_pnfsgetdsattr == 0 || 4888 nfsrv_checkdsattr(vp, p) == 0) { 4889 free(buf, M_TEMP); 4890 return (error); 4891 } 4892 } 4893 4894 /* 4895 * Clear ENOATTR so the code below will attempt to do a 4896 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4897 * the extended attribute. 4898 */ 4899 if (error == ENOATTR) 4900 error = 0; 4901 } 4902 4903 origmircnt = -1; 4904 trycnt = 0; 4905 tryagain: 4906 if (error == 0) { 4907 buflen = 1024; 4908 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == 4909 LK_EXCLUSIVE) 4910 printf("nfsrv_proxyds: Readds vp exclusively locked\n"); 4911 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4912 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4913 NULL, NULL); 4914 if (error == 0) { 4915 for (i = 0; i < mirrorcnt; i++) 4916 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4917 } else 4918 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4919 } else 4920 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4921 if (error == 0) { 4922 failpos = -1; 4923 if (origmircnt == -1) 4924 origmircnt = mirrorcnt; 4925 /* 4926 * If failpos is set to a mirror#, then that mirror has 4927 * failed and will be disabled. For Read, Getattr and Seek, the 4928 * function only tries one mirror, so if that mirror has 4929 * failed, it will need to be retried. As such, increment 4930 * tryitagain for these cases. 4931 * For Write, Setattr and Setacl, the function tries all 4932 * mirrors and will not return an error for the case where 4933 * one mirror has failed. For these cases, the functioning 4934 * mirror(s) will have been modified, so a retry isn't 4935 * necessary. These functions will set failpos for the 4936 * failed mirror#. 4937 */ 4938 if (ioproc == NFSPROC_READDS) { 4939 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4940 mpp, mpp2); 4941 if (nfsds_failerr(error) && mirrorcnt > 1) { 4942 /* 4943 * Setting failpos will cause the mirror 4944 * to be disabled and then a retry of this 4945 * read is required. 4946 */ 4947 failpos = 0; 4948 error = 0; 4949 trycnt++; 4950 } 4951 } else if (ioproc == NFSPROC_WRITEDS) 4952 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4953 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4954 else if (ioproc == NFSPROC_SETATTR) 4955 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4956 mirrorcnt, nap, &failpos); 4957 else if (ioproc == NFSPROC_SETACL) 4958 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4959 mirrorcnt, aclp, &failpos); 4960 else if (ioproc == NFSPROC_SEEKDS) { 4961 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, 4962 p, nmp[0]); 4963 if (nfsds_failerr(error) && mirrorcnt > 1) { 4964 /* 4965 * Setting failpos will cause the mirror 4966 * to be disabled and then a retry of this 4967 * read is required. 4968 */ 4969 failpos = 0; 4970 error = 0; 4971 trycnt++; 4972 } 4973 } else if (ioproc == NFSPROC_ALLOCATE) 4974 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, 4975 &nmp[0], mirrorcnt, &failpos); 4976 else if (ioproc == NFSPROC_DEALLOCATE) 4977 error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p, 4978 vp, &nmp[0], mirrorcnt, &failpos); 4979 else { 4980 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4981 vp, nmp[mirrorcnt - 1], nap); 4982 if (nfsds_failerr(error) && mirrorcnt > 1) { 4983 /* 4984 * Setting failpos will cause the mirror 4985 * to be disabled and then a retry of this 4986 * getattr is required. 4987 */ 4988 failpos = mirrorcnt - 1; 4989 error = 0; 4990 trycnt++; 4991 } 4992 } 4993 ds = NULL; 4994 if (failpos >= 0) { 4995 failnmp = nmp[failpos]; 4996 NFSLOCKMNT(failnmp); 4997 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4998 NFSMNTP_CANCELRPCS)) == 0) { 4999 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 5000 NFSUNLOCKMNT(failnmp); 5001 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 5002 failnmp, p); 5003 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 5004 failpos, ds); 5005 if (ds != NULL) 5006 nfsrv_killrpcs(failnmp); 5007 NFSLOCKMNT(failnmp); 5008 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 5009 wakeup(failnmp); 5010 } 5011 NFSUNLOCKMNT(failnmp); 5012 } 5013 for (i = 0; i < mirrorcnt; i++) 5014 NFSVOPUNLOCK(dvp[i]); 5015 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 5016 trycnt); 5017 /* Try the Read/Getattr again if a mirror was deleted. */ 5018 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 5019 goto tryagain; 5020 } else { 5021 /* Return ENOENT for any Extended Attribute error. */ 5022 error = ENOENT; 5023 } 5024 free(buf, M_TEMP); 5025 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 5026 return (error); 5027 } 5028 5029 /* 5030 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 5031 * attribute. 5032 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 5033 * to be checked. If it points to a NULL nmp, then it returns 5034 * a suitable destination. 5035 * curnmp - If non-NULL, it is the source mount for the copy. 5036 */ 5037 int 5038 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 5039 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 5040 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 5041 struct nfsmount *curnmp, int *ippos, int *dsdirp) 5042 { 5043 struct vnode *dvp, *nvp = NULL, **tdvpp; 5044 struct mount *mp; 5045 struct nfsmount *nmp, *newnmp; 5046 struct sockaddr *sad; 5047 struct sockaddr_in *sin; 5048 struct nfsdevice *ds, *tds, *fndds; 5049 struct pnfsdsfile *pf; 5050 uint32_t dsdir; 5051 int error, fhiszero, fnd, gotone, i, mirrorcnt; 5052 5053 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 5054 *mirrorcntp = 1; 5055 tdvpp = dvpp; 5056 if (nvpp != NULL) 5057 *nvpp = NULL; 5058 if (dvpp != NULL) 5059 *dvpp = NULL; 5060 if (ippos != NULL) 5061 *ippos = -1; 5062 if (newnmpp != NULL) 5063 newnmp = *newnmpp; 5064 else 5065 newnmp = NULL; 5066 mp = vp->v_mount; 5067 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5068 "pnfsd.dsfile", buflenp, buf, p); 5069 mirrorcnt = *buflenp / sizeof(*pf); 5070 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 5071 *buflenp != sizeof(*pf) * mirrorcnt)) 5072 error = ENOATTR; 5073 5074 pf = (struct pnfsdsfile *)buf; 5075 /* If curnmp != NULL, check for a match in the mirror list. */ 5076 if (curnmp != NULL && error == 0) { 5077 fnd = 0; 5078 for (i = 0; i < mirrorcnt; i++, pf++) { 5079 sad = (struct sockaddr *)&pf->dsf_sin; 5080 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 5081 if (ippos != NULL) 5082 *ippos = i; 5083 fnd = 1; 5084 break; 5085 } 5086 } 5087 if (fnd == 0) 5088 error = ENXIO; 5089 } 5090 5091 gotone = 0; 5092 pf = (struct pnfsdsfile *)buf; 5093 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 5094 error); 5095 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 5096 fhiszero = 0; 5097 sad = (struct sockaddr *)&pf->dsf_sin; 5098 sin = &pf->dsf_sin; 5099 dsdir = pf->dsf_dir; 5100 if (dsdir >= nfsrv_dsdirsize) { 5101 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 5102 error = ENOATTR; 5103 } else if (nvpp != NULL && newnmp != NULL && 5104 nfsaddr2_match(sad, newnmp->nm_nam)) 5105 error = EEXIST; 5106 if (error == 0) { 5107 if (ippos != NULL && curnmp == NULL && 5108 sad->sa_family == AF_INET && 5109 sin->sin_addr.s_addr == 0) 5110 *ippos = i; 5111 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 5112 fhiszero = 1; 5113 /* Use the socket address to find the mount point. */ 5114 fndds = NULL; 5115 NFSDDSLOCK(); 5116 /* Find a match for the IP address. */ 5117 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 5118 if (ds->nfsdev_nmp != NULL) { 5119 dvp = ds->nfsdev_dvp; 5120 nmp = VFSTONFS(dvp->v_mount); 5121 if (nmp != ds->nfsdev_nmp) 5122 printf("different2 nmp %p %p\n", 5123 nmp, ds->nfsdev_nmp); 5124 if (nfsaddr2_match(sad, nmp->nm_nam)) { 5125 fndds = ds; 5126 break; 5127 } 5128 } 5129 } 5130 if (fndds != NULL && newnmpp != NULL && 5131 newnmp == NULL) { 5132 /* Search for a place to make a mirror copy. */ 5133 TAILQ_FOREACH(tds, &nfsrv_devidhead, 5134 nfsdev_list) { 5135 if (tds->nfsdev_nmp != NULL && 5136 fndds != tds && 5137 ((tds->nfsdev_mdsisset == 0 && 5138 fndds->nfsdev_mdsisset == 0) || 5139 (tds->nfsdev_mdsisset != 0 && 5140 fndds->nfsdev_mdsisset != 0 && 5141 fsidcmp(&tds->nfsdev_mdsfsid, 5142 &mp->mnt_stat.f_fsid) == 0))) { 5143 *newnmpp = tds->nfsdev_nmp; 5144 break; 5145 } 5146 } 5147 if (tds != NULL) { 5148 /* 5149 * Move this entry to the end of the 5150 * list, so it won't be selected as 5151 * easily the next time. 5152 */ 5153 TAILQ_REMOVE(&nfsrv_devidhead, tds, 5154 nfsdev_list); 5155 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 5156 nfsdev_list); 5157 } 5158 } 5159 NFSDDSUNLOCK(); 5160 if (fndds != NULL) { 5161 dvp = fndds->nfsdev_dsdir[dsdir]; 5162 if (lktype != 0 || fhiszero != 0 || 5163 (nvpp != NULL && *nvpp == NULL)) { 5164 if (fhiszero != 0) 5165 error = vn_lock(dvp, 5166 LK_EXCLUSIVE); 5167 else if (lktype != 0) 5168 error = vn_lock(dvp, lktype); 5169 else 5170 error = vn_lock(dvp, LK_SHARED); 5171 /* 5172 * If the file handle is all 0's, try to 5173 * do a Lookup against the DS to acquire 5174 * it. 5175 * If dvpp == NULL or the Lookup fails, 5176 * unlock dvp after the call. 5177 */ 5178 if (error == 0 && (fhiszero != 0 || 5179 (nvpp != NULL && *nvpp == NULL))) { 5180 error = nfsrv_pnfslookupds(vp, 5181 dvp, pf, &nvp, p); 5182 if (error == 0) { 5183 if (fhiszero != 0) 5184 nfsrv_pnfssetfh( 5185 vp, pf, 5186 devid, 5187 fnamep, 5188 nvp, p); 5189 if (nvpp != NULL && 5190 *nvpp == NULL) { 5191 *nvpp = nvp; 5192 *dsdirp = dsdir; 5193 } else 5194 vput(nvp); 5195 } 5196 if (error != 0 || lktype == 0) 5197 NFSVOPUNLOCK(dvp); 5198 } 5199 } 5200 if (error == 0) { 5201 gotone++; 5202 NFSD_DEBUG(4, "gotone=%d\n", gotone); 5203 if (devid != NULL) { 5204 NFSBCOPY(fndds->nfsdev_deviceid, 5205 devid, NFSX_V4DEVICEID); 5206 devid += NFSX_V4DEVICEID; 5207 } 5208 if (dvpp != NULL) 5209 *tdvpp++ = dvp; 5210 if (fhp != NULL) 5211 NFSBCOPY(&pf->dsf_fh, fhp++, 5212 NFSX_MYFH); 5213 if (fnamep != NULL && gotone == 1) 5214 strlcpy(fnamep, 5215 pf->dsf_filename, 5216 sizeof(pf->dsf_filename)); 5217 } else 5218 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 5219 "err=%d\n", error); 5220 } 5221 } 5222 } 5223 if (error == 0 && gotone == 0) 5224 error = ENOENT; 5225 5226 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 5227 error); 5228 if (error == 0) 5229 *mirrorcntp = gotone; 5230 else { 5231 if (gotone > 0 && dvpp != NULL) { 5232 /* 5233 * If the error didn't occur on the first one and 5234 * dvpp != NULL, the one(s) prior to the failure will 5235 * have locked dvp's that need to be unlocked. 5236 */ 5237 for (i = 0; i < gotone; i++) { 5238 NFSVOPUNLOCK(*dvpp); 5239 *dvpp++ = NULL; 5240 } 5241 } 5242 /* 5243 * If it found the vnode to be copied from before a failure, 5244 * it needs to be vput()'d. 5245 */ 5246 if (nvpp != NULL && *nvpp != NULL) { 5247 vput(*nvpp); 5248 *nvpp = NULL; 5249 } 5250 } 5251 return (error); 5252 } 5253 5254 /* 5255 * Set the extended attribute for the Change attribute. 5256 */ 5257 static int 5258 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5259 { 5260 struct pnfsdsattr dsattr; 5261 int error; 5262 5263 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 5264 dsattr.dsa_filerev = nap->na_filerev; 5265 dsattr.dsa_size = nap->na_size; 5266 dsattr.dsa_atime = nap->na_atime; 5267 dsattr.dsa_mtime = nap->na_mtime; 5268 dsattr.dsa_bytes = nap->na_bytes; 5269 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5270 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); 5271 if (error != 0) 5272 printf("pNFS: setextattr=%d\n", error); 5273 return (error); 5274 } 5275 5276 static int 5277 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5278 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 5279 { 5280 uint32_t *tl; 5281 struct nfsrv_descript *nd; 5282 nfsv4stateid_t st; 5283 struct mbuf *m, *m2; 5284 int error = 0, retlen, tlen, trimlen; 5285 5286 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 5287 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5288 *mpp = NULL; 5289 /* 5290 * Use a stateid where other is an alternating 01010 pattern and 5291 * seqid is 0xffffffff. This value is not defined as special by 5292 * the RFC and is used by the FreeBSD NFS server to indicate an 5293 * MDS->DS proxy operation. 5294 */ 5295 st.other[0] = 0x55555555; 5296 st.other[1] = 0x55555555; 5297 st.other[2] = 0x55555555; 5298 st.seqid = 0xffffffff; 5299 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5300 NULL, NULL, 0, 0, cred); 5301 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5302 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 5303 txdr_hyper(off, tl); 5304 *(tl + 2) = txdr_unsigned(len); 5305 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5306 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5307 if (error != 0) { 5308 free(nd, M_TEMP); 5309 return (error); 5310 } 5311 if (nd->nd_repstat == 0) { 5312 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 5313 NFSM_STRSIZ(retlen, len); 5314 if (retlen > 0) { 5315 /* Trim off the pre-data XDR from the mbuf chain. */ 5316 m = nd->nd_mrep; 5317 while (m != NULL && m != nd->nd_md) { 5318 if (m->m_next == nd->nd_md) { 5319 m->m_next = NULL; 5320 m_freem(nd->nd_mrep); 5321 nd->nd_mrep = m = nd->nd_md; 5322 } else 5323 m = m->m_next; 5324 } 5325 if (m == NULL) { 5326 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5327 error = ENOENT; 5328 goto nfsmout; 5329 } 5330 5331 /* 5332 * Now, adjust first mbuf so that any XDR before the 5333 * read data is skipped over. 5334 */ 5335 trimlen = nd->nd_dpos - mtod(m, char *); 5336 if (trimlen > 0) { 5337 m->m_len -= trimlen; 5338 NFSM_DATAP(m, trimlen); 5339 } 5340 5341 /* 5342 * Truncate the mbuf chain at retlen bytes of data, 5343 * plus XDR padding that brings the length up to a 5344 * multiple of 4. 5345 */ 5346 tlen = NFSM_RNDUP(retlen); 5347 do { 5348 if (m->m_len >= tlen) { 5349 m->m_len = tlen; 5350 tlen = 0; 5351 m2 = m->m_next; 5352 m->m_next = NULL; 5353 m_freem(m2); 5354 break; 5355 } 5356 tlen -= m->m_len; 5357 m = m->m_next; 5358 } while (m != NULL); 5359 if (tlen > 0) { 5360 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5361 error = ENOENT; 5362 goto nfsmout; 5363 } 5364 *mpp = nd->nd_mrep; 5365 *mpendp = m; 5366 nd->nd_mrep = NULL; 5367 } 5368 } else 5369 error = nd->nd_repstat; 5370 nfsmout: 5371 /* If nd->nd_mrep is already NULL, this is a no-op. */ 5372 m_freem(nd->nd_mrep); 5373 free(nd, M_TEMP); 5374 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 5375 return (error); 5376 } 5377 5378 /* 5379 * Do a write RPC on a DS data file, using this structure for the arguments, 5380 * so that this function can be executed by a separate kernel process. 5381 */ 5382 struct nfsrvwritedsdorpc { 5383 int done; 5384 int inprog; 5385 struct task tsk; 5386 fhandle_t fh; 5387 off_t off; 5388 int len; 5389 struct nfsmount *nmp; 5390 struct ucred *cred; 5391 NFSPROC_T *p; 5392 struct mbuf *m; 5393 int err; 5394 }; 5395 5396 static int 5397 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 5398 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 5399 { 5400 uint32_t *tl; 5401 struct nfsrv_descript *nd; 5402 nfsattrbit_t attrbits; 5403 nfsv4stateid_t st; 5404 int commit, error, retlen; 5405 5406 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5407 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 5408 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5409 5410 /* 5411 * Use a stateid where other is an alternating 01010 pattern and 5412 * seqid is 0xffffffff. This value is not defined as special by 5413 * the RFC and is used by the FreeBSD NFS server to indicate an 5414 * MDS->DS proxy operation. 5415 */ 5416 st.other[0] = 0x55555555; 5417 st.other[1] = 0x55555555; 5418 st.other[2] = 0x55555555; 5419 st.seqid = 0xffffffff; 5420 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5421 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 5422 txdr_hyper(off, tl); 5423 tl += 2; 5424 /* 5425 * Do all writes FileSync, since the server doesn't hold onto dirty 5426 * buffers. Since clients should be accessing the DS servers directly 5427 * using the pNFS layouts, this just needs to work correctly as a 5428 * fallback. 5429 */ 5430 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 5431 *tl = txdr_unsigned(len); 5432 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 5433 5434 /* Put data in mbuf chain. */ 5435 nd->nd_mb->m_next = m; 5436 5437 /* Set nd_mb and nd_bpos to end of data. */ 5438 while (m->m_next != NULL) 5439 m = m->m_next; 5440 nd->nd_mb = m; 5441 nfsm_set(nd, m->m_len); 5442 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 5443 5444 /* Do a Getattr for the attributes that change upon writing. */ 5445 NFSZERO_ATTRBIT(&attrbits); 5446 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5447 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5448 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5449 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5450 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5451 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5452 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5453 (void) nfsrv_putattrbit(nd, &attrbits); 5454 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5455 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5456 if (error != 0) { 5457 free(nd, M_TEMP); 5458 return (error); 5459 } 5460 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 5461 /* Get rid of weak cache consistency data for now. */ 5462 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5463 (ND_NFSV4 | ND_V4WCCATTR)) { 5464 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5465 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5466 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 5467 if (error != 0) 5468 goto nfsmout; 5469 /* 5470 * Get rid of Op# and status for next op. 5471 */ 5472 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5473 if (*++tl != 0) 5474 nd->nd_flag |= ND_NOMOREDATA; 5475 } 5476 if (nd->nd_repstat == 0) { 5477 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 5478 retlen = fxdr_unsigned(int, *tl++); 5479 commit = fxdr_unsigned(int, *tl); 5480 if (commit != NFSWRITE_FILESYNC) 5481 error = NFSERR_IO; 5482 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 5483 retlen, commit, error); 5484 } else 5485 error = nd->nd_repstat; 5486 /* We have no use for the Write Verifier since we use FileSync. */ 5487 5488 /* 5489 * Get the Change, Size, Access Time and Modify Time attributes and set 5490 * on the Metadata file, so its attributes will be what the file's 5491 * would be if it had been written. 5492 */ 5493 if (error == 0) { 5494 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5495 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5496 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5497 } 5498 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 5499 nfsmout: 5500 m_freem(nd->nd_mrep); 5501 free(nd, M_TEMP); 5502 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 5503 return (error); 5504 } 5505 5506 /* 5507 * Start up the thread that will execute nfsrv_writedsdorpc(). 5508 */ 5509 static void 5510 start_writedsdorpc(void *arg, int pending) 5511 { 5512 struct nfsrvwritedsdorpc *drpc; 5513 5514 drpc = (struct nfsrvwritedsdorpc *)arg; 5515 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5516 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 5517 drpc->done = 1; 5518 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 5519 } 5520 5521 static int 5522 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5523 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5524 struct mbuf **mpp, char *cp, int *failposp) 5525 { 5526 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; 5527 struct nfsvattr na; 5528 struct mbuf *m; 5529 int error, i, offs, ret, timo; 5530 5531 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 5532 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 5533 drpc = NULL; 5534 if (mirrorcnt > 1) 5535 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5536 M_WAITOK); 5537 5538 /* Calculate offset in mbuf chain that data starts. */ 5539 offs = cp - mtod(*mpp, char *); 5540 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 5541 5542 /* 5543 * Do the write RPC for every DS, using a separate kernel process 5544 * for every DS except the last one. 5545 */ 5546 error = 0; 5547 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5548 tdrpc->done = 0; 5549 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5550 tdrpc->off = off; 5551 tdrpc->len = len; 5552 tdrpc->nmp = *nmpp; 5553 tdrpc->cred = cred; 5554 tdrpc->p = p; 5555 tdrpc->inprog = 0; 5556 tdrpc->err = 0; 5557 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5558 ret = EIO; 5559 if (nfs_pnfsiothreads != 0) { 5560 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 5561 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 5562 ret); 5563 } 5564 if (ret != 0) { 5565 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 5566 tdrpc->m, cred, p); 5567 if (nfsds_failerr(ret) && *failposp == -1) 5568 *failposp = i; 5569 else if (error == 0 && ret != 0) 5570 error = ret; 5571 } 5572 nmpp++; 5573 fhp++; 5574 } 5575 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5576 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 5577 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5578 *failposp = mirrorcnt - 1; 5579 else if (error == 0 && ret != 0) 5580 error = ret; 5581 if (error == 0) 5582 error = nfsrv_setextattr(vp, &na, p); 5583 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 5584 tdrpc = drpc; 5585 timo = hz / 50; /* Wait for 20msec. */ 5586 if (timo < 1) 5587 timo = 1; 5588 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5589 /* Wait for RPCs on separate threads to complete. */ 5590 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5591 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 5592 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5593 *failposp = i; 5594 else if (error == 0 && tdrpc->err != 0) 5595 error = tdrpc->err; 5596 } 5597 free(drpc, M_TEMP); 5598 return (error); 5599 } 5600 5601 /* 5602 * Do a allocate RPC on a DS data file, using this structure for the arguments, 5603 * so that this function can be executed by a separate kernel process. 5604 */ 5605 struct nfsrvallocatedsdorpc { 5606 int done; 5607 int inprog; 5608 struct task tsk; 5609 fhandle_t fh; 5610 off_t off; 5611 off_t len; 5612 struct nfsmount *nmp; 5613 struct ucred *cred; 5614 NFSPROC_T *p; 5615 int err; 5616 }; 5617 5618 static int 5619 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5620 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5621 { 5622 uint32_t *tl; 5623 struct nfsrv_descript *nd; 5624 nfsattrbit_t attrbits; 5625 nfsv4stateid_t st; 5626 int error; 5627 5628 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5629 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, 5630 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5631 5632 /* 5633 * Use a stateid where other is an alternating 01010 pattern and 5634 * seqid is 0xffffffff. This value is not defined as special by 5635 * the RFC and is used by the FreeBSD NFS server to indicate an 5636 * MDS->DS proxy operation. 5637 */ 5638 st.other[0] = 0x55555555; 5639 st.other[1] = 0x55555555; 5640 st.other[2] = 0x55555555; 5641 st.seqid = 0xffffffff; 5642 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5643 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5644 txdr_hyper(off, tl); tl += 2; 5645 txdr_hyper(len, tl); tl += 2; 5646 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); 5647 5648 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5649 NFSGETATTR_ATTRBIT(&attrbits); 5650 nfsrv_putattrbit(nd, &attrbits); 5651 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5652 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5653 if (error != 0) { 5654 free(nd, M_TEMP); 5655 return (error); 5656 } 5657 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", 5658 nd->nd_repstat); 5659 if (nd->nd_repstat == 0) { 5660 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5661 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5662 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5663 } else 5664 error = nd->nd_repstat; 5665 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); 5666 nfsmout: 5667 m_freem(nd->nd_mrep); 5668 free(nd, M_TEMP); 5669 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); 5670 return (error); 5671 } 5672 5673 /* 5674 * Start up the thread that will execute nfsrv_allocatedsdorpc(). 5675 */ 5676 static void 5677 start_allocatedsdorpc(void *arg, int pending) 5678 { 5679 struct nfsrvallocatedsdorpc *drpc; 5680 5681 drpc = (struct nfsrvallocatedsdorpc *)arg; 5682 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5683 drpc->len, NULL, drpc->cred, drpc->p); 5684 drpc->done = 1; 5685 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); 5686 } 5687 5688 static int 5689 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5690 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5691 int *failposp) 5692 { 5693 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; 5694 struct nfsvattr na; 5695 int error, i, ret, timo; 5696 5697 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); 5698 drpc = NULL; 5699 if (mirrorcnt > 1) 5700 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5701 M_WAITOK); 5702 5703 /* 5704 * Do the allocate RPC for every DS, using a separate kernel process 5705 * for every DS except the last one. 5706 */ 5707 error = 0; 5708 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5709 tdrpc->done = 0; 5710 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5711 tdrpc->off = off; 5712 tdrpc->len = len; 5713 tdrpc->nmp = *nmpp; 5714 tdrpc->cred = cred; 5715 tdrpc->p = p; 5716 tdrpc->inprog = 0; 5717 tdrpc->err = 0; 5718 ret = EIO; 5719 if (nfs_pnfsiothreads != 0) { 5720 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); 5721 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", 5722 ret); 5723 } 5724 if (ret != 0) { 5725 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, 5726 cred, p); 5727 if (nfsds_failerr(ret) && *failposp == -1) 5728 *failposp = i; 5729 else if (error == 0 && ret != 0) 5730 error = ret; 5731 } 5732 nmpp++; 5733 fhp++; 5734 } 5735 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5736 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5737 *failposp = mirrorcnt - 1; 5738 else if (error == 0 && ret != 0) 5739 error = ret; 5740 if (error == 0) 5741 error = nfsrv_setextattr(vp, &na, p); 5742 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); 5743 tdrpc = drpc; 5744 timo = hz / 50; /* Wait for 20msec. */ 5745 if (timo < 1) 5746 timo = 1; 5747 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5748 /* Wait for RPCs on separate threads to complete. */ 5749 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5750 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5751 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5752 *failposp = i; 5753 else if (error == 0 && tdrpc->err != 0) 5754 error = tdrpc->err; 5755 } 5756 free(drpc, M_TEMP); 5757 return (error); 5758 } 5759 5760 /* 5761 * Do a deallocate RPC on a DS data file, using this structure for the 5762 * arguments, so that this function can be executed by a separate kernel 5763 * process. 5764 */ 5765 struct nfsrvdeallocatedsdorpc { 5766 int done; 5767 int inprog; 5768 struct task tsk; 5769 fhandle_t fh; 5770 off_t off; 5771 off_t len; 5772 struct nfsmount *nmp; 5773 struct ucred *cred; 5774 NFSPROC_T *p; 5775 int err; 5776 }; 5777 5778 static int 5779 nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5780 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5781 { 5782 uint32_t *tl; 5783 struct nfsrv_descript *nd; 5784 nfsattrbit_t attrbits; 5785 nfsv4stateid_t st; 5786 int error; 5787 5788 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5789 nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp, 5790 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5791 5792 /* 5793 * Use a stateid where other is an alternating 01010 pattern and 5794 * seqid is 0xffffffff. This value is not defined as special by 5795 * the RFC and is used by the FreeBSD NFS server to indicate an 5796 * MDS->DS proxy operation. 5797 */ 5798 st.other[0] = 0x55555555; 5799 st.other[1] = 0x55555555; 5800 st.other[2] = 0x55555555; 5801 st.seqid = 0xffffffff; 5802 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5803 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5804 txdr_hyper(off, tl); tl += 2; 5805 txdr_hyper(len, tl); tl += 2; 5806 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len); 5807 5808 /* Do a Getattr for the attributes that change upon writing. */ 5809 NFSZERO_ATTRBIT(&attrbits); 5810 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5811 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5812 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5813 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5814 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5815 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5816 nfsrv_putattrbit(nd, &attrbits); 5817 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5818 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5819 if (error != 0) { 5820 free(nd, M_TEMP); 5821 return (error); 5822 } 5823 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n", 5824 nd->nd_repstat); 5825 /* Get rid of weak cache consistency data for now. */ 5826 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5827 (ND_NFSV4 | ND_V4WCCATTR)) { 5828 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5829 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5830 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error); 5831 if (error != 0) 5832 goto nfsmout; 5833 /* 5834 * Get rid of Op# and status for next op. 5835 */ 5836 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5837 if (*++tl != 0) 5838 nd->nd_flag |= ND_NOMOREDATA; 5839 } 5840 if (nd->nd_repstat == 0) { 5841 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5842 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5843 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5844 } else 5845 error = nd->nd_repstat; 5846 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error); 5847 nfsmout: 5848 m_freem(nd->nd_mrep); 5849 free(nd, M_TEMP); 5850 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error); 5851 return (error); 5852 } 5853 5854 /* 5855 * Start up the thread that will execute nfsrv_deallocatedsdorpc(). 5856 */ 5857 static void 5858 start_deallocatedsdorpc(void *arg, int pending) 5859 { 5860 struct nfsrvdeallocatedsdorpc *drpc; 5861 5862 drpc = (struct nfsrvdeallocatedsdorpc *)arg; 5863 drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5864 drpc->len, NULL, drpc->cred, drpc->p); 5865 drpc->done = 1; 5866 NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err); 5867 } 5868 5869 static int 5870 nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5871 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5872 int *failposp) 5873 { 5874 struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL; 5875 struct nfsvattr na; 5876 int error, i, ret, timo; 5877 5878 NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n"); 5879 drpc = NULL; 5880 if (mirrorcnt > 1) 5881 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5882 M_WAITOK); 5883 5884 /* 5885 * Do the deallocate RPC for every DS, using a separate kernel process 5886 * for every DS except the last one. 5887 */ 5888 error = 0; 5889 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5890 tdrpc->done = 0; 5891 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5892 tdrpc->off = off; 5893 tdrpc->len = len; 5894 tdrpc->nmp = *nmpp; 5895 tdrpc->cred = cred; 5896 tdrpc->p = p; 5897 tdrpc->inprog = 0; 5898 tdrpc->err = 0; 5899 ret = EIO; 5900 if (nfs_pnfsiothreads != 0) { 5901 ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc); 5902 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n", 5903 ret); 5904 } 5905 if (ret != 0) { 5906 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, 5907 NULL, cred, p); 5908 if (nfsds_failerr(ret) && *failposp == -1) 5909 *failposp = i; 5910 else if (error == 0 && ret != 0) 5911 error = ret; 5912 } 5913 nmpp++; 5914 fhp++; 5915 } 5916 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5917 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5918 *failposp = mirrorcnt - 1; 5919 else if (error == 0 && ret != 0) 5920 error = ret; 5921 if (error == 0) 5922 error = nfsrv_setextattr(vp, &na, p); 5923 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error); 5924 tdrpc = drpc; 5925 timo = hz / 50; /* Wait for 20msec. */ 5926 if (timo < 1) 5927 timo = 1; 5928 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5929 /* Wait for RPCs on separate threads to complete. */ 5930 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5931 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5932 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5933 *failposp = i; 5934 else if (error == 0 && tdrpc->err != 0) 5935 error = tdrpc->err; 5936 } 5937 free(drpc, M_TEMP); 5938 return (error); 5939 } 5940 5941 static int 5942 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5943 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 5944 struct nfsvattr *dsnap) 5945 { 5946 uint32_t *tl; 5947 struct nfsrv_descript *nd; 5948 nfsv4stateid_t st; 5949 nfsattrbit_t attrbits; 5950 int error; 5951 5952 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 5953 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5954 /* 5955 * Use a stateid where other is an alternating 01010 pattern and 5956 * seqid is 0xffffffff. This value is not defined as special by 5957 * the RFC and is used by the FreeBSD NFS server to indicate an 5958 * MDS->DS proxy operation. 5959 */ 5960 st.other[0] = 0x55555555; 5961 st.other[1] = 0x55555555; 5962 st.other[2] = 0x55555555; 5963 st.seqid = 0xffffffff; 5964 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5965 NULL, NULL, 0, 0, cred); 5966 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5967 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5968 5969 /* Do a Getattr for the attributes that change due to writing. */ 5970 NFSZERO_ATTRBIT(&attrbits); 5971 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5972 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5973 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5974 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5975 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5976 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5977 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5978 (void) nfsrv_putattrbit(nd, &attrbits); 5979 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5980 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5981 if (error != 0) { 5982 free(nd, M_TEMP); 5983 return (error); 5984 } 5985 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5986 nd->nd_repstat); 5987 /* Get rid of weak cache consistency data for now. */ 5988 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5989 (ND_NFSV4 | ND_V4WCCATTR)) { 5990 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5991 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5992 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5993 if (error != 0) 5994 goto nfsmout; 5995 /* 5996 * Get rid of Op# and status for next op. 5997 */ 5998 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5999 if (*++tl != 0) 6000 nd->nd_flag |= ND_NOMOREDATA; 6001 } 6002 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 6003 if (error != 0) 6004 goto nfsmout; 6005 if (nd->nd_repstat != 0) 6006 error = nd->nd_repstat; 6007 /* 6008 * Get the Change, Size, Access Time and Modify Time attributes and set 6009 * on the Metadata file, so its attributes will be what the file's 6010 * would be if it had been written. 6011 */ 6012 if (error == 0) { 6013 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 6014 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 6015 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 6016 } 6017 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 6018 nfsmout: 6019 m_freem(nd->nd_mrep); 6020 free(nd, M_TEMP); 6021 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 6022 return (error); 6023 } 6024 6025 struct nfsrvsetattrdsdorpc { 6026 int done; 6027 int inprog; 6028 struct task tsk; 6029 fhandle_t fh; 6030 struct nfsmount *nmp; 6031 struct vnode *vp; 6032 struct ucred *cred; 6033 NFSPROC_T *p; 6034 struct nfsvattr na; 6035 struct nfsvattr dsna; 6036 int err; 6037 }; 6038 6039 /* 6040 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 6041 */ 6042 static void 6043 start_setattrdsdorpc(void *arg, int pending) 6044 { 6045 struct nfsrvsetattrdsdorpc *drpc; 6046 6047 drpc = (struct nfsrvsetattrdsdorpc *)arg; 6048 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 6049 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 6050 drpc->done = 1; 6051 } 6052 6053 static int 6054 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6055 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 6056 struct nfsvattr *nap, int *failposp) 6057 { 6058 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; 6059 struct nfsvattr na; 6060 int error, i, ret, timo; 6061 6062 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 6063 drpc = NULL; 6064 if (mirrorcnt > 1) 6065 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 6066 M_WAITOK); 6067 6068 /* 6069 * Do the setattr RPC for every DS, using a separate kernel process 6070 * for every DS except the last one. 6071 */ 6072 error = 0; 6073 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6074 tdrpc->done = 0; 6075 tdrpc->inprog = 0; 6076 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6077 tdrpc->nmp = *nmpp; 6078 tdrpc->vp = vp; 6079 tdrpc->cred = cred; 6080 tdrpc->p = p; 6081 tdrpc->na = *nap; 6082 tdrpc->err = 0; 6083 ret = EIO; 6084 if (nfs_pnfsiothreads != 0) { 6085 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 6086 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 6087 ret); 6088 } 6089 if (ret != 0) { 6090 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 6091 &na); 6092 if (nfsds_failerr(ret) && *failposp == -1) 6093 *failposp = i; 6094 else if (error == 0 && ret != 0) 6095 error = ret; 6096 } 6097 nmpp++; 6098 fhp++; 6099 } 6100 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 6101 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6102 *failposp = mirrorcnt - 1; 6103 else if (error == 0 && ret != 0) 6104 error = ret; 6105 if (error == 0) 6106 error = nfsrv_setextattr(vp, &na, p); 6107 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 6108 tdrpc = drpc; 6109 timo = hz / 50; /* Wait for 20msec. */ 6110 if (timo < 1) 6111 timo = 1; 6112 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6113 /* Wait for RPCs on separate threads to complete. */ 6114 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6115 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 6116 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6117 *failposp = i; 6118 else if (error == 0 && tdrpc->err != 0) 6119 error = tdrpc->err; 6120 } 6121 free(drpc, M_TEMP); 6122 return (error); 6123 } 6124 6125 /* 6126 * Do a Setattr of an NFSv4 ACL on the DS file. 6127 */ 6128 static int 6129 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6130 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 6131 { 6132 struct nfsrv_descript *nd; 6133 nfsv4stateid_t st; 6134 nfsattrbit_t attrbits; 6135 int error; 6136 6137 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 6138 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6139 /* 6140 * Use a stateid where other is an alternating 01010 pattern and 6141 * seqid is 0xffffffff. This value is not defined as special by 6142 * the RFC and is used by the FreeBSD NFS server to indicate an 6143 * MDS->DS proxy operation. 6144 */ 6145 st.other[0] = 0x55555555; 6146 st.other[1] = 0x55555555; 6147 st.other[2] = 0x55555555; 6148 st.seqid = 0xffffffff; 6149 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 6150 NULL, NULL, 0, 0, cred); 6151 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6152 NFSZERO_ATTRBIT(&attrbits); 6153 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 6154 /* 6155 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 6156 * so passing in the metadata "vp" will be ok, since it is of 6157 * the same type (VREG). 6158 */ 6159 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 6160 NULL, 0, 0, 0, 0, 0, NULL); 6161 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6162 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6163 if (error != 0) { 6164 free(nd, M_TEMP); 6165 return (error); 6166 } 6167 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 6168 nd->nd_repstat); 6169 error = nd->nd_repstat; 6170 m_freem(nd->nd_mrep); 6171 free(nd, M_TEMP); 6172 return (error); 6173 } 6174 6175 struct nfsrvsetacldsdorpc { 6176 int done; 6177 int inprog; 6178 struct task tsk; 6179 fhandle_t fh; 6180 struct nfsmount *nmp; 6181 struct vnode *vp; 6182 struct ucred *cred; 6183 NFSPROC_T *p; 6184 struct acl *aclp; 6185 int err; 6186 }; 6187 6188 /* 6189 * Start up the thread that will execute nfsrv_setacldsdorpc(). 6190 */ 6191 static void 6192 start_setacldsdorpc(void *arg, int pending) 6193 { 6194 struct nfsrvsetacldsdorpc *drpc; 6195 6196 drpc = (struct nfsrvsetacldsdorpc *)arg; 6197 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 6198 drpc->vp, drpc->nmp, drpc->aclp); 6199 drpc->done = 1; 6200 } 6201 6202 static int 6203 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6204 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 6205 int *failposp) 6206 { 6207 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; 6208 int error, i, ret, timo; 6209 6210 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 6211 drpc = NULL; 6212 if (mirrorcnt > 1) 6213 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 6214 M_WAITOK); 6215 6216 /* 6217 * Do the setattr RPC for every DS, using a separate kernel process 6218 * for every DS except the last one. 6219 */ 6220 error = 0; 6221 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6222 tdrpc->done = 0; 6223 tdrpc->inprog = 0; 6224 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6225 tdrpc->nmp = *nmpp; 6226 tdrpc->vp = vp; 6227 tdrpc->cred = cred; 6228 tdrpc->p = p; 6229 tdrpc->aclp = aclp; 6230 tdrpc->err = 0; 6231 ret = EIO; 6232 if (nfs_pnfsiothreads != 0) { 6233 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 6234 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 6235 ret); 6236 } 6237 if (ret != 0) { 6238 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 6239 aclp); 6240 if (nfsds_failerr(ret) && *failposp == -1) 6241 *failposp = i; 6242 else if (error == 0 && ret != 0) 6243 error = ret; 6244 } 6245 nmpp++; 6246 fhp++; 6247 } 6248 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 6249 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6250 *failposp = mirrorcnt - 1; 6251 else if (error == 0 && ret != 0) 6252 error = ret; 6253 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 6254 tdrpc = drpc; 6255 timo = hz / 50; /* Wait for 20msec. */ 6256 if (timo < 1) 6257 timo = 1; 6258 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6259 /* Wait for RPCs on separate threads to complete. */ 6260 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6261 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 6262 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6263 *failposp = i; 6264 else if (error == 0 && tdrpc->err != 0) 6265 error = tdrpc->err; 6266 } 6267 free(drpc, M_TEMP); 6268 return (error); 6269 } 6270 6271 /* 6272 * Getattr call to the DS for the attributes that change due to writing. 6273 */ 6274 static int 6275 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6276 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 6277 { 6278 struct nfsrv_descript *nd; 6279 int error; 6280 nfsattrbit_t attrbits; 6281 6282 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 6283 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6284 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 6285 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6286 NFSZERO_ATTRBIT(&attrbits); 6287 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 6288 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 6289 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 6290 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 6291 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 6292 (void) nfsrv_putattrbit(nd, &attrbits); 6293 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6294 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6295 if (error != 0) { 6296 free(nd, M_TEMP); 6297 return (error); 6298 } 6299 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 6300 nd->nd_repstat); 6301 if (nd->nd_repstat == 0) { 6302 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 6303 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 6304 NULL, NULL); 6305 /* 6306 * We can only save the updated values in the extended 6307 * attribute if the vp is exclusively locked. 6308 * This should happen when any of the following operations 6309 * occur on the vnode: 6310 * Close, Delegreturn, LayoutCommit, LayoutReturn 6311 * As such, the updated extended attribute should get saved 6312 * before nfsrv_checkdsattr() returns 0 and allows the cached 6313 * attributes to be returned without calling this function. 6314 */ 6315 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 6316 error = nfsrv_setextattr(vp, nap, p); 6317 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 6318 error); 6319 } 6320 } else 6321 error = nd->nd_repstat; 6322 m_freem(nd->nd_mrep); 6323 free(nd, M_TEMP); 6324 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 6325 return (error); 6326 } 6327 6328 /* 6329 * Seek call to a DS. 6330 */ 6331 static int 6332 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, 6333 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) 6334 { 6335 uint32_t *tl; 6336 struct nfsrv_descript *nd; 6337 nfsv4stateid_t st; 6338 int error; 6339 6340 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); 6341 /* 6342 * Use a stateid where other is an alternating 01010 pattern and 6343 * seqid is 0xffffffff. This value is not defined as special by 6344 * the RFC and is used by the FreeBSD NFS server to indicate an 6345 * MDS->DS proxy operation. 6346 */ 6347 st.other[0] = 0x55555555; 6348 st.other[1] = 0x55555555; 6349 st.other[2] = 0x55555555; 6350 st.seqid = 0xffffffff; 6351 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6352 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, 6353 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6354 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6355 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); 6356 txdr_hyper(*offp, tl); tl += 2; 6357 *tl = txdr_unsigned(content); 6358 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6359 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6360 if (error != 0) { 6361 free(nd, M_TEMP); 6362 return (error); 6363 } 6364 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); 6365 if (nd->nd_repstat == 0) { 6366 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); 6367 if (*tl++ == newnfs_true) 6368 *eofp = true; 6369 else 6370 *eofp = false; 6371 *offp = fxdr_hyper(tl); 6372 } else 6373 error = nd->nd_repstat; 6374 nfsmout: 6375 m_freem(nd->nd_mrep); 6376 free(nd, M_TEMP); 6377 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); 6378 return (error); 6379 } 6380 6381 /* 6382 * Get the device id and file handle for a DS file. 6383 */ 6384 int 6385 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 6386 fhandle_t *fhp, char *devid) 6387 { 6388 int buflen, error; 6389 char *buf; 6390 6391 buflen = 1024; 6392 buf = malloc(buflen, M_TEMP, M_WAITOK); 6393 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 6394 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 6395 free(buf, M_TEMP); 6396 return (error); 6397 } 6398 6399 /* 6400 * Do a Lookup against the DS for the filename. 6401 */ 6402 static int 6403 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 6404 struct vnode **nvpp, NFSPROC_T *p) 6405 { 6406 struct nameidata named; 6407 struct ucred *tcred; 6408 char *bufp; 6409 u_long *hashp; 6410 struct vnode *nvp; 6411 int error; 6412 6413 tcred = newnfs_getcred(); 6414 named.ni_cnd.cn_nameiop = LOOKUP; 6415 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 6416 named.ni_cnd.cn_cred = tcred; 6417 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 6418 nfsvno_setpathbuf(&named, &bufp, &hashp); 6419 named.ni_cnd.cn_nameptr = bufp; 6420 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 6421 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 6422 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 6423 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 6424 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 6425 NFSFREECRED(tcred); 6426 nfsvno_relpathbuf(&named); 6427 if (error == 0) 6428 *nvpp = nvp; 6429 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 6430 return (error); 6431 } 6432 6433 /* 6434 * Set the file handle to the correct one. 6435 */ 6436 static void 6437 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, 6438 char *fnamep, struct vnode *nvp, NFSPROC_T *p) 6439 { 6440 struct nfsnode *np; 6441 int ret = 0; 6442 6443 np = VTONFS(nvp); 6444 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 6445 /* 6446 * We can only do a vn_set_extattr() if the vnode is exclusively 6447 * locked and vn_start_write() has been done. If devid != NULL or 6448 * fnamep != NULL or the vnode is shared locked, vn_start_write() 6449 * may not have been done. 6450 * If not done now, it will be done on a future call. 6451 */ 6452 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == 6453 LK_EXCLUSIVE) 6454 ret = vn_extattr_set(vp, IO_NODELOCKED, 6455 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), 6456 (char *)pf, p); 6457 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 6458 } 6459 6460 /* 6461 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 6462 * when the DS has failed. 6463 */ 6464 void 6465 nfsrv_killrpcs(struct nfsmount *nmp) 6466 { 6467 6468 /* 6469 * Call newnfs_nmcancelreqs() to cause 6470 * any RPCs in progress on the mount point to 6471 * fail. 6472 * This will cause any process waiting for an 6473 * RPC to complete while holding a vnode lock 6474 * on the mounted-on vnode (such as "df" or 6475 * a non-forced "umount") to fail. 6476 * This will unlock the mounted-on vnode so 6477 * a forced dismount can succeed. 6478 * The NFSMNTP_CANCELRPCS flag should be set when this function is 6479 * called. 6480 */ 6481 newnfs_nmcancelreqs(nmp); 6482 } 6483 6484 /* 6485 * Sum up the statfs info for each of the DSs, so that the client will 6486 * receive the total for all DSs. 6487 */ 6488 static int 6489 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) 6490 { 6491 struct statfs *tsf; 6492 struct nfsdevice *ds; 6493 struct vnode **dvpp, **tdvpp, *dvp; 6494 uint64_t tot; 6495 int cnt, error = 0, i; 6496 6497 if (nfsrv_devidcnt <= 0) 6498 return (ENXIO); 6499 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 6500 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 6501 6502 /* Get an array of the dvps for the DSs. */ 6503 tdvpp = dvpp; 6504 i = 0; 6505 NFSDDSLOCK(); 6506 /* First, search for matches for same file system. */ 6507 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6508 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && 6509 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { 6510 if (++i > nfsrv_devidcnt) 6511 break; 6512 *tdvpp++ = ds->nfsdev_dvp; 6513 } 6514 } 6515 /* 6516 * If no matches for same file system, total all servers not assigned 6517 * to a file system. 6518 */ 6519 if (i == 0) { 6520 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6521 if (ds->nfsdev_nmp != NULL && 6522 ds->nfsdev_mdsisset == 0) { 6523 if (++i > nfsrv_devidcnt) 6524 break; 6525 *tdvpp++ = ds->nfsdev_dvp; 6526 } 6527 } 6528 } 6529 NFSDDSUNLOCK(); 6530 cnt = i; 6531 6532 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 6533 tdvpp = dvpp; 6534 for (i = 0; i < cnt && error == 0; i++) { 6535 dvp = *tdvpp++; 6536 error = VFS_STATFS(dvp->v_mount, tsf); 6537 if (error == 0) { 6538 if (sf->f_bsize == 0) { 6539 if (tsf->f_bsize > 0) 6540 sf->f_bsize = tsf->f_bsize; 6541 else 6542 sf->f_bsize = 8192; 6543 } 6544 if (tsf->f_blocks > 0) { 6545 if (sf->f_bsize != tsf->f_bsize) { 6546 tot = tsf->f_blocks * tsf->f_bsize; 6547 sf->f_blocks += (tot / sf->f_bsize); 6548 } else 6549 sf->f_blocks += tsf->f_blocks; 6550 } 6551 if (tsf->f_bfree > 0) { 6552 if (sf->f_bsize != tsf->f_bsize) { 6553 tot = tsf->f_bfree * tsf->f_bsize; 6554 sf->f_bfree += (tot / sf->f_bsize); 6555 } else 6556 sf->f_bfree += tsf->f_bfree; 6557 } 6558 if (tsf->f_bavail > 0) { 6559 if (sf->f_bsize != tsf->f_bsize) { 6560 tot = tsf->f_bavail * tsf->f_bsize; 6561 sf->f_bavail += (tot / sf->f_bsize); 6562 } else 6563 sf->f_bavail += tsf->f_bavail; 6564 } 6565 } 6566 } 6567 free(tsf, M_TEMP); 6568 free(dvpp, M_TEMP); 6569 return (error); 6570 } 6571 6572 /* 6573 * Set an NFSv4 acl. 6574 */ 6575 int 6576 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 6577 { 6578 int error; 6579 6580 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 6581 error = NFSERR_ATTRNOTSUPP; 6582 goto out; 6583 } 6584 /* 6585 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 6586 * Make sure it has enough room for that - splitting every entry 6587 * into two and appending "canonical six" entries at the end. 6588 * Cribbed out of kern/vfs_acl.c - Rick M. 6589 */ 6590 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 6591 error = NFSERR_ATTRNOTSUPP; 6592 goto out; 6593 } 6594 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 6595 if (error == 0) { 6596 error = nfsrv_dssetacl(vp, aclp, cred, p); 6597 if (error == ENOENT) 6598 error = 0; 6599 } 6600 6601 out: 6602 NFSEXITCODE(error); 6603 return (error); 6604 } 6605 6606 /* 6607 * Seek vnode op call (actually it is a VOP_IOCTL()). 6608 * This function is called with the vnode locked, but unlocks and vrele()s 6609 * the vp before returning. 6610 */ 6611 int 6612 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, 6613 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) 6614 { 6615 struct nfsvattr at; 6616 int error, ret; 6617 6618 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); 6619 /* 6620 * Attempt to seek on a DS file. A return of ENOENT implies 6621 * there is no DS file to seek on. 6622 */ 6623 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, 6624 NULL, NULL, NULL, NULL, offp, content, eofp); 6625 if (error != ENOENT) { 6626 vput(vp); 6627 return (error); 6628 } 6629 6630 /* 6631 * Do the VOP_IOCTL() call. For the case where *offp == file_size, 6632 * VOP_IOCTL() will return ENXIO. However, the correct reply for 6633 * NFSv4.2 is *eofp == true and error == 0 for this case. 6634 */ 6635 NFSVOPUNLOCK(vp); 6636 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); 6637 *eofp = false; 6638 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { 6639 /* Handle the cases where we might be at EOF. */ 6640 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); 6641 if (ret == 0 && *offp == at.na_size) { 6642 *eofp = true; 6643 error = 0; 6644 } 6645 if (ret != 0 && error == 0) 6646 error = ret; 6647 } 6648 vrele(vp); 6649 NFSEXITCODE(error); 6650 return (error); 6651 } 6652 6653 /* 6654 * Allocate vnode op call. 6655 */ 6656 int 6657 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6658 NFSPROC_T *p) 6659 { 6660 int error; 6661 off_t olen; 6662 6663 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); 6664 /* 6665 * Attempt to allocate on a DS file. A return of ENOENT implies 6666 * there is no DS file to allocate on. 6667 */ 6668 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, 6669 NULL, NULL, NULL, NULL, &len, 0, NULL); 6670 if (error != ENOENT) 6671 return (error); 6672 6673 /* 6674 * Do the actual VOP_ALLOCATE(), looping so long as 6675 * progress is being made, to achieve completion. 6676 */ 6677 do { 6678 olen = len; 6679 error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred); 6680 if (error == 0 && len > 0 && olen > len) 6681 maybe_yield(); 6682 } while (error == 0 && len > 0 && olen > len); 6683 if (error == 0 && len > 0) 6684 error = NFSERR_IO; 6685 NFSEXITCODE(error); 6686 return (error); 6687 } 6688 6689 /* 6690 * Deallocate vnode op call. 6691 */ 6692 int 6693 nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6694 NFSPROC_T *p) 6695 { 6696 int error; 6697 off_t olen; 6698 6699 ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp"); 6700 /* 6701 * Attempt to deallocate on a DS file. A return of ENOENT implies 6702 * there is no DS file to deallocate on. 6703 */ 6704 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL, 6705 NULL, NULL, NULL, NULL, &len, 0, NULL); 6706 if (error != ENOENT) 6707 return (error); 6708 6709 /* 6710 * Do the actual VOP_DEALLOCATE(), looping so long as 6711 * progress is being made, to achieve completion. 6712 */ 6713 do { 6714 olen = len; 6715 error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred); 6716 if (error == 0 && len > 0 && olen > len) 6717 maybe_yield(); 6718 } while (error == 0 && len > 0 && olen > len); 6719 if (error == 0 && len > 0) 6720 error = NFSERR_IO; 6721 NFSEXITCODE(error); 6722 return (error); 6723 } 6724 6725 /* 6726 * Get Extended Atribute vnode op into an mbuf list. 6727 */ 6728 int 6729 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, 6730 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, 6731 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 6732 { 6733 struct iovec *iv; 6734 struct uio io, *uiop = &io; 6735 struct mbuf *m, *m2; 6736 int alen, error, len, tlen; 6737 size_t siz; 6738 6739 /* First, find out the size of the extended attribute. */ 6740 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6741 &siz, cred, p); 6742 if (error != 0) 6743 return (NFSERR_NOXATTR); 6744 if (siz > maxresp - NFS_MAXXDR) 6745 return (NFSERR_XATTR2BIG); 6746 len = siz; 6747 tlen = NFSM_RNDUP(len); 6748 if (tlen > 0) { 6749 /* 6750 * If cnt > MCLBYTES and the reply will not be saved, use 6751 * ext_pgs mbufs for TLS. 6752 * For NFSv4.0, we do not know for sure if the reply will 6753 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 6754 * Always use ext_pgs mbufs if ND_EXTPG is set. 6755 */ 6756 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && 6757 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && 6758 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) 6759 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, 6760 maxextsiz, &m, &m2, &iv); 6761 else 6762 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, 6763 &iv); 6764 uiop->uio_iov = iv; 6765 } else { 6766 uiop->uio_iovcnt = 0; 6767 uiop->uio_iov = iv = NULL; 6768 m = m2 = NULL; 6769 } 6770 uiop->uio_offset = 0; 6771 uiop->uio_resid = tlen; 6772 uiop->uio_rw = UIO_READ; 6773 uiop->uio_segflg = UIO_SYSSPACE; 6774 uiop->uio_td = p; 6775 #ifdef MAC 6776 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6777 name); 6778 if (error != 0) 6779 goto out; 6780 #endif 6781 6782 if (tlen > 0) 6783 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6784 NULL, cred, p); 6785 if (error != 0) 6786 goto out; 6787 if (uiop->uio_resid > 0) { 6788 alen = tlen; 6789 len = tlen - uiop->uio_resid; 6790 tlen = NFSM_RNDUP(len); 6791 if (alen != tlen) 6792 printf("nfsvno_getxattr: weird size read\n"); 6793 if (tlen == 0) { 6794 m_freem(m); 6795 m = m2 = NULL; 6796 } else if (alen != tlen || tlen != len) 6797 m2 = nfsrv_adj(m, alen - tlen, tlen - len); 6798 } 6799 *lenp = len; 6800 *mpp = m; 6801 *mpendp = m2; 6802 6803 out: 6804 if (error != 0) { 6805 if (m != NULL) 6806 m_freem(m); 6807 *lenp = 0; 6808 } 6809 free(iv, M_TEMP); 6810 NFSEXITCODE(error); 6811 return (error); 6812 } 6813 6814 /* 6815 * Set Extended attribute vnode op from an mbuf list. 6816 */ 6817 int 6818 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, 6819 char *cp, struct ucred *cred, struct thread *p) 6820 { 6821 struct iovec *iv; 6822 struct uio uio, *uiop = &uio; 6823 int cnt, error; 6824 6825 error = 0; 6826 #ifdef MAC 6827 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6828 name); 6829 #endif 6830 if (error != 0) 6831 goto out; 6832 6833 uiop->uio_rw = UIO_WRITE; 6834 uiop->uio_segflg = UIO_SYSSPACE; 6835 uiop->uio_td = p; 6836 uiop->uio_offset = 0; 6837 uiop->uio_resid = len; 6838 if (len > 0) { 6839 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); 6840 uiop->uio_iov = iv; 6841 uiop->uio_iovcnt = cnt; 6842 } else { 6843 uiop->uio_iov = iv = NULL; 6844 uiop->uio_iovcnt = 0; 6845 } 6846 if (error == 0) { 6847 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6848 cred, p); 6849 if (error == 0) { 6850 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 6851 nfsvno_updateds(vp, cred, p); 6852 error = VOP_FSYNC(vp, MNT_WAIT, p); 6853 } 6854 free(iv, M_TEMP); 6855 } 6856 6857 out: 6858 NFSEXITCODE(error); 6859 return (error); 6860 } 6861 6862 /* 6863 * For a pNFS server, the DS file's ctime and 6864 * va_filerev (TimeMetadata and Change) needs to 6865 * be updated. This is a hack, but works by 6866 * flipping the S_ISGID bit in va_mode and then 6867 * flipping it back. 6868 * It does result in two MDS->DS RPCs, but creating 6869 * a custom RPC just to do this seems overkill, since 6870 * Setxattr/Rmxattr will not be done that frequently. 6871 * If it fails part way through, that is not too 6872 * serious, since the DS file is never executed. 6873 */ 6874 static void 6875 nfsvno_updateds(struct vnode *vp, struct ucred *cred, NFSPROC_T *p) 6876 { 6877 struct nfsvattr nva; 6878 int ret; 6879 u_short tmode; 6880 6881 ret = VOP_GETATTR(vp, &nva.na_vattr, cred); 6882 if (ret == 0) { 6883 tmode = nva.na_mode; 6884 NFSVNO_ATTRINIT(&nva); 6885 tmode ^= S_ISGID; 6886 NFSVNO_SETATTRVAL(&nva, mode, tmode); 6887 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 6888 NFSPROC_SETATTR, NULL, NULL, NULL, &nva, 6889 NULL, NULL, 0, NULL); 6890 if (ret == 0) { 6891 tmode ^= S_ISGID; 6892 NFSVNO_SETATTRVAL(&nva, mode, tmode); 6893 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 6894 NFSPROC_SETATTR, NULL, NULL, NULL, 6895 &nva, NULL, NULL, 0, NULL); 6896 } 6897 } 6898 } 6899 6900 /* 6901 * Remove Extended attribute vnode op. 6902 */ 6903 int 6904 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, 6905 struct ucred *cred, struct thread *p) 6906 { 6907 int error; 6908 6909 /* 6910 * Get rid of any delegations. I am not sure why this is required, 6911 * but RFC-8276 says so. 6912 */ 6913 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); 6914 if (error != 0) 6915 goto out; 6916 #ifdef MAC 6917 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6918 name); 6919 if (error != 0) 6920 goto out; 6921 #endif 6922 6923 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); 6924 if (error == EOPNOTSUPP) 6925 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6926 cred, p); 6927 if (error == 0) { 6928 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 6929 nfsvno_updateds(vp, cred, p); 6930 error = VOP_FSYNC(vp, MNT_WAIT, p); 6931 } 6932 out: 6933 NFSEXITCODE(error); 6934 return (error); 6935 } 6936 6937 /* 6938 * List Extended Atribute vnode op into an mbuf list. 6939 */ 6940 int 6941 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, 6942 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) 6943 { 6944 struct iovec iv; 6945 struct uio io; 6946 int error; 6947 size_t siz; 6948 6949 *bufp = NULL; 6950 /* First, find out the size of the extended attribute. */ 6951 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, 6952 p); 6953 if (error != 0) 6954 return (NFSERR_NOXATTR); 6955 if (siz <= cookie) { 6956 *lenp = 0; 6957 *eofp = true; 6958 goto out; 6959 } 6960 if (siz > cookie + *lenp) { 6961 siz = cookie + *lenp; 6962 *eofp = false; 6963 } else 6964 *eofp = true; 6965 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ 6966 if (siz > 10 * 1024 * 1024) { 6967 error = NFSERR_XATTR2BIG; 6968 goto out; 6969 } 6970 *bufp = malloc(siz, M_TEMP, M_WAITOK); 6971 iv.iov_base = *bufp; 6972 iv.iov_len = siz; 6973 io.uio_iovcnt = 1; 6974 io.uio_iov = &iv; 6975 io.uio_offset = 0; 6976 io.uio_resid = siz; 6977 io.uio_rw = UIO_READ; 6978 io.uio_segflg = UIO_SYSSPACE; 6979 io.uio_td = p; 6980 #ifdef MAC 6981 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); 6982 if (error != 0) 6983 goto out; 6984 #endif 6985 6986 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, 6987 p); 6988 if (error != 0) 6989 goto out; 6990 if (io.uio_resid > 0) 6991 siz -= io.uio_resid; 6992 *lenp = siz; 6993 6994 out: 6995 if (error != 0) { 6996 free(*bufp, M_TEMP); 6997 *bufp = NULL; 6998 } 6999 NFSEXITCODE(error); 7000 return (error); 7001 } 7002 7003 /* 7004 * Trim trailing data off the mbuf list being built. 7005 */ 7006 void 7007 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, 7008 int bextpg, int bextpgsiz) 7009 { 7010 vm_page_t pg; 7011 int fullpgsiz, i; 7012 7013 if (mb->m_next != NULL) { 7014 m_freem(mb->m_next); 7015 mb->m_next = NULL; 7016 } 7017 if ((mb->m_flags & M_EXTPG) != 0) { 7018 KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs, 7019 ("nfsm_trimtrailing: bextpg out of range")); 7020 KASSERT(bpos == (char *)(void *) 7021 PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz, 7022 ("nfsm_trimtrailing: bextpgsiz bad!")); 7023 7024 /* First, get rid of any pages after this position. */ 7025 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { 7026 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); 7027 vm_page_unwire_noq(pg); 7028 vm_page_free(pg); 7029 } 7030 mb->m_epg_npgs = bextpg + 1; 7031 if (bextpg == 0) 7032 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; 7033 else 7034 fullpgsiz = PAGE_SIZE; 7035 mb->m_epg_last_len = fullpgsiz - bextpgsiz; 7036 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); 7037 for (i = 1; i < mb->m_epg_npgs; i++) 7038 mb->m_len += m_epg_pagelen(mb, i, 0); 7039 nd->nd_bextpgsiz = bextpgsiz; 7040 nd->nd_bextpg = bextpg; 7041 } else 7042 mb->m_len = bpos - mtod(mb, char *); 7043 nd->nd_mb = mb; 7044 nd->nd_bpos = bpos; 7045 } 7046 7047 7048 /* 7049 * Check to see if a put file handle operation should test for 7050 * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR. 7051 * When Open is the next operation, NFSERR_WRONGSEC cannot be 7052 * replied for the Open cases that use a component. This can 7053 * be identified by the fact that the file handle's type is VDIR. 7054 */ 7055 bool 7056 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, __enum_uint8(vtype) vtyp) 7057 { 7058 7059 if ((nd->nd_flag & ND_NFSV4) == 0) 7060 return (true); 7061 7062 if ((nd->nd_flag & ND_LASTOP) != 0) 7063 return (false); 7064 7065 if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH || 7066 nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH || 7067 nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP || 7068 nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME) 7069 return (false); 7070 if (nextop == NFSV4OP_OPEN && vtyp == VDIR) 7071 return (false); 7072 return (true); 7073 } 7074 7075 /* 7076 * Check DSs marked no space. 7077 */ 7078 void 7079 nfsrv_checknospc(void) 7080 { 7081 struct statfs *tsf; 7082 struct nfsdevice *ds; 7083 struct vnode **dvpp, **tdvpp, *dvp; 7084 char *devid, *tdevid; 7085 int cnt, error = 0, i; 7086 7087 if (nfsrv_devidcnt <= 0) 7088 return; 7089 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 7090 devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK); 7091 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 7092 7093 /* Get an array of the dvps for the DSs. */ 7094 tdvpp = dvpp; 7095 tdevid = devid; 7096 i = 0; 7097 NFSDDSLOCK(); 7098 /* First, search for matches for same file system. */ 7099 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 7100 if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) { 7101 if (++i > nfsrv_devidcnt) 7102 break; 7103 *tdvpp++ = ds->nfsdev_dvp; 7104 NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID); 7105 tdevid += NFSX_V4DEVICEID; 7106 } 7107 } 7108 NFSDDSUNLOCK(); 7109 7110 /* Do a VFS_STATFS() for each of the DSs and clear no space. */ 7111 cnt = i; 7112 tdvpp = dvpp; 7113 tdevid = devid; 7114 for (i = 0; i < cnt && error == 0; i++) { 7115 dvp = *tdvpp++; 7116 error = VFS_STATFS(dvp->v_mount, tsf); 7117 if (error == 0 && tsf->f_bavail > 0) { 7118 NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n"); 7119 nfsrv_marknospc(tdevid, false); 7120 } 7121 tdevid += NFSX_V4DEVICEID; 7122 } 7123 free(tsf, M_TEMP); 7124 free(dvpp, M_TEMP); 7125 free(devid, M_TEMP); 7126 } 7127 7128 /* 7129 * Initialize everything that needs to be initialized for a vnet. 7130 */ 7131 static void 7132 nfsrv_vnetinit(const void *unused __unused) 7133 { 7134 7135 nfsd_mntinit(); 7136 } 7137 VNET_SYSINIT(nfsrv_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_ANY, 7138 nfsrv_vnetinit, NULL); 7139 7140 /* 7141 * Clean up everything that is in a vnet and needs to be 7142 * done when the jail is destroyed or the module unloaded. 7143 */ 7144 static void 7145 nfsrv_cleanup(const void *unused __unused) 7146 { 7147 int i; 7148 7149 NFSD_LOCK(); 7150 if (!NFSD_VNET(nfsrv_mntinited)) { 7151 NFSD_UNLOCK(); 7152 return; 7153 } 7154 NFSD_VNET(nfsrv_mntinited) = false; 7155 NFSD_UNLOCK(); 7156 7157 /* Clean out all NFSv4 state. */ 7158 nfsrv_throwawayallstate(curthread); 7159 7160 /* Clean the NFS server reply cache */ 7161 nfsrvd_cleancache(); 7162 7163 /* Clean out v4root exports. */ 7164 if (NFSD_VNET(nfsv4root_mnt)->mnt_export != NULL) { 7165 vfs_free_addrlist(NFSD_VNET(nfsv4root_mnt)->mnt_export); 7166 free(NFSD_VNET(nfsv4root_mnt)->mnt_export, M_MOUNT); 7167 NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL; 7168 } 7169 7170 /* Free up the krpc server pool. */ 7171 if (NFSD_VNET(nfsrvd_pool) != NULL) 7172 svcpool_destroy(NFSD_VNET(nfsrvd_pool)); 7173 7174 /* and get rid of the locks */ 7175 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 7176 mtx_destroy(&NFSD_VNET(nfsrchash_table)[i].mtx); 7177 mtx_destroy(&NFSD_VNET(nfsrcahash_table)[i].mtx); 7178 } 7179 mtx_destroy(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx); 7180 for (i = 0; i < nfsrv_sessionhashsize; i++) 7181 mtx_destroy(&NFSD_VNET(nfssessionhash)[i].mtx); 7182 lockdestroy(&NFSD_VNET(nfsv4root_mnt)->mnt_explock); 7183 free(NFSD_VNET(nfsrvudphashtbl), M_NFSRVCACHE); 7184 free(NFSD_VNET(nfsrchash_table), M_NFSRVCACHE); 7185 free(NFSD_VNET(nfsrcahash_table), M_NFSRVCACHE); 7186 free(NFSD_VNET(nfsclienthash), M_NFSDCLIENT); 7187 free(NFSD_VNET(nfslockhash), M_NFSDLOCKFILE); 7188 free(NFSD_VNET(nfssessionhash), M_NFSDSESSION); 7189 free(NFSD_VNET(nfsv4root_mnt), M_TEMP); 7190 NFSD_VNET(nfsv4root_mnt) = NULL; 7191 } 7192 VNET_SYSUNINIT(nfsrv_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY, 7193 nfsrv_cleanup, NULL); 7194 7195 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 7196 7197 /* 7198 * Called once to initialize data structures... 7199 */ 7200 static int 7201 nfsd_modevent(module_t mod, int type, void *data) 7202 { 7203 int error = 0, i; 7204 static int loaded = 0; 7205 7206 switch (type) { 7207 case MOD_LOAD: 7208 if (loaded) 7209 goto out; 7210 newnfs_portinit(); 7211 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 7212 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 7213 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 7214 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 7215 #ifdef VV_DISABLEDELEG 7216 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 7217 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 7218 #endif 7219 nfsd_call_nfsd = nfssvc_nfsd; 7220 loaded = 1; 7221 break; 7222 7223 case MOD_UNLOAD: 7224 if (newnfs_numnfsd != 0) { 7225 error = EBUSY; 7226 break; 7227 } 7228 7229 #ifdef VV_DISABLEDELEG 7230 vn_deleg_ops.vndeleg_recall = NULL; 7231 vn_deleg_ops.vndeleg_disable = NULL; 7232 #endif 7233 nfsd_call_nfsd = NULL; 7234 mtx_destroy(&nfsrc_udpmtx); 7235 mtx_destroy(&nfs_v4root_mutex); 7236 mtx_destroy(&nfsrv_dontlistlock_mtx); 7237 mtx_destroy(&nfsrv_recalllock_mtx); 7238 if (nfslayouthash != NULL) { 7239 for (i = 0; i < nfsrv_layouthashsize; i++) 7240 mtx_destroy(&nfslayouthash[i].mtx); 7241 free(nfslayouthash, M_NFSDSESSION); 7242 } 7243 loaded = 0; 7244 break; 7245 default: 7246 error = EOPNOTSUPP; 7247 break; 7248 } 7249 7250 out: 7251 NFSEXITCODE(error); 7252 return (error); 7253 } 7254 static moduledata_t nfsd_mod = { 7255 "nfsd", 7256 nfsd_modevent, 7257 NULL, 7258 }; 7259 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 7260 7261 /* So that loader and kldload(2) can find us, wherever we are.. */ 7262 MODULE_VERSION(nfsd, 1); 7263 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 7264 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 7265 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 7266 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 7267