1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/capsicum.h> 40 #include <sys/extattr.h> 41 42 /* 43 * Functions that perform the vfs operations required by the routines in 44 * nfsd_serv.c. It is hoped that this change will make the server more 45 * portable. 46 */ 47 48 #include <fs/nfs/nfsport.h> 49 #include <security/mac/mac_framework.h> 50 #include <sys/callout.h> 51 #include <sys/filio.h> 52 #include <sys/hash.h> 53 #include <sys/sysctl.h> 54 #include <nlm/nlm_prot.h> 55 #include <nlm/nlm.h> 56 57 FEATURE(nfsd, "NFSv4 server"); 58 59 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 60 extern int nfsrv_useacl; 61 extern int newnfs_numnfsd; 62 extern struct mount nfsv4root_mnt; 63 extern struct nfsrv_stablefirst nfsrv_stablefirst; 64 extern SVCPOOL *nfsrvd_pool; 65 extern struct nfsv4lock nfsd_suspend_lock; 66 extern struct nfsclienthashhead *nfsclienthash; 67 extern struct nfslockhashhead *nfslockhash; 68 extern struct nfssessionhash *nfssessionhash; 69 extern int nfsrv_sessionhashsize; 70 extern struct nfsstatsv1 nfsstatsv1; 71 extern struct nfslayouthash *nfslayouthash; 72 extern int nfsrv_layouthashsize; 73 extern struct mtx nfsrv_dslock_mtx; 74 extern int nfs_pnfsiothreads; 75 extern struct nfsdontlisthead nfsrv_dontlisthead; 76 extern volatile int nfsrv_dontlistlen; 77 extern volatile int nfsrv_devidcnt; 78 extern int nfsrv_maxpnfsmirror; 79 extern uint32_t nfs_srvmaxio; 80 extern int nfs_bufpackets; 81 extern u_long sb_max_adj; 82 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; 83 NFSDLOCKMUTEX; 84 NFSSTATESPINLOCK; 85 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; 86 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; 87 struct mtx nfsrc_udpmtx; 88 struct mtx nfs_v4root_mutex; 89 struct mtx nfsrv_dontlistlock_mtx; 90 struct mtx nfsrv_recalllock_mtx; 91 struct nfsrvfh nfs_rootfh, nfs_pubfh; 92 int nfs_pubfhset = 0, nfs_rootfhset = 0; 93 struct proc *nfsd_master_proc = NULL; 94 int nfsd_debuglevel = 0; 95 static pid_t nfsd_master_pid = (pid_t)-1; 96 static char nfsd_master_comm[MAXCOMLEN + 1]; 97 static struct timeval nfsd_master_start; 98 static uint32_t nfsv4_sysid = 0; 99 static fhandle_t zerofh; 100 struct callout nfsd_callout; 101 102 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, 103 struct ucred *); 104 static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *); 105 106 int nfsrv_enable_crossmntpt = 1; 107 static int nfs_commit_blks; 108 static int nfs_commit_miss; 109 extern int nfsrv_issuedelegs; 110 extern int nfsrv_dolocallocks; 111 extern int nfsd_enable_stringtouid; 112 extern struct nfsdevicehead nfsrv_devidhead; 113 114 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, 115 struct iovec **); 116 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, 117 struct mbuf **, struct iovec **); 118 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, 119 int *); 120 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, 121 NFSPROC_T *); 122 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, 123 int *, char *, fhandle_t *); 124 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, 125 NFSPROC_T *); 126 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, 127 struct thread *, int, struct mbuf **, char *, struct mbuf **, 128 struct nfsvattr *, struct acl *, off_t *, int, bool *); 129 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); 130 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, 131 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); 132 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, 133 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, 134 char *, int *); 135 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 136 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 137 static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, 138 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); 139 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 140 struct vnode *, struct nfsmount **, int, struct acl *, int *); 141 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 142 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); 143 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, 144 struct vnode *, struct nfsmount *, struct nfsvattr *); 145 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, 146 NFSPROC_T *, struct nfsmount *); 147 static int nfsrv_putfhname(fhandle_t *, char *); 148 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, 149 struct pnfsdsfile *, struct vnode **, NFSPROC_T *); 150 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, 151 struct vnode *, NFSPROC_T *); 152 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); 153 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, 154 NFSPROC_T *); 155 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); 156 157 int nfs_pnfsio(task_fn_t *, void *); 158 159 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 160 "NFS server"); 161 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, 162 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); 163 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 164 0, ""); 165 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 166 0, ""); 167 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, 168 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); 169 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, 170 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); 171 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 172 0, "Debug level for NFS server"); 173 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, 174 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); 175 static int nfsrv_pnfsgetdsattr = 1; 176 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, 177 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); 178 179 /* 180 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are 181 * not running. 182 * The dsN subdirectories for the increased values must have been created 183 * on all DS servers before this increase is done. 184 */ 185 u_int nfsrv_dsdirsize = 20; 186 static int 187 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) 188 { 189 int error, newdsdirsize; 190 191 newdsdirsize = nfsrv_dsdirsize; 192 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); 193 if (error != 0 || req->newptr == NULL) 194 return (error); 195 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || 196 newnfs_numnfsd != 0) 197 return (EINVAL); 198 nfsrv_dsdirsize = newdsdirsize; 199 return (0); 200 } 201 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, 202 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), 203 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); 204 205 /* 206 * nfs_srvmaxio can only be increased and only when the nfsd threads are 207 * not running. The setting must be a power of 2, with the current limit of 208 * 1Mbyte. 209 */ 210 static int 211 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS) 212 { 213 int error; 214 u_int newsrvmaxio; 215 uint64_t tval; 216 217 newsrvmaxio = nfs_srvmaxio; 218 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req); 219 if (error != 0 || req->newptr == NULL) 220 return (error); 221 if (newsrvmaxio == nfs_srvmaxio) 222 return (0); 223 if (newsrvmaxio < nfs_srvmaxio) { 224 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n"); 225 return (EINVAL); 226 } 227 if (newsrvmaxio > 1048576) { 228 printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n"); 229 return (EINVAL); 230 } 231 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) { 232 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n"); 233 return (EINVAL); 234 } 235 236 /* 237 * Check that kern.ipc.maxsockbuf is large enough for 238 * newsrviomax, given the setting of vfs.nfs.bufpackets. 239 */ 240 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets > 241 sb_max_adj) { 242 /* 243 * Suggest vfs.nfs.bufpackets * maximum RPC message for 244 * sb_max_adj. 245 */ 246 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets; 247 248 /* 249 * Convert suggested sb_max_adj value to a suggested 250 * sb_max value, which is what is set via kern.ipc.maxsockbuf. 251 * Perform the inverse calculation of (from uipc_sockbuf.c): 252 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES / 253 * (MSIZE + MCLBYTES); 254 * XXX If the calculation of sb_max_adj from sb_max changes, 255 * this calculation must be changed as well. 256 */ 257 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */ 258 tval += MCLBYTES - 1; /* Round up divide. */ 259 tval /= MCLBYTES; 260 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of " 261 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval, 262 newsrvmaxio); 263 return (EINVAL); 264 } 265 266 NFSD_LOCK(); 267 if (newnfs_numnfsd != 0) { 268 NFSD_UNLOCK(); 269 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd " 270 "threads are running\n"); 271 return (EINVAL); 272 } 273 274 275 nfs_srvmaxio = newsrvmaxio; 276 NFSD_UNLOCK(); 277 return (0); 278 } 279 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio, 280 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 281 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes"); 282 283 #define MAX_REORDERED_RPC 16 284 #define NUM_HEURISTIC 1031 285 #define NHUSE_INIT 64 286 #define NHUSE_INC 16 287 #define NHUSE_MAX 2048 288 289 static struct nfsheur { 290 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ 291 off_t nh_nextoff; /* next offset for sequential detection */ 292 int nh_use; /* use count for selection */ 293 int nh_seqcount; /* heuristic */ 294 } nfsheur[NUM_HEURISTIC]; 295 296 /* 297 * Heuristic to detect sequential operation. 298 */ 299 static struct nfsheur * 300 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) 301 { 302 struct nfsheur *nh; 303 int hi, try; 304 305 /* Locate best candidate. */ 306 try = 32; 307 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; 308 nh = &nfsheur[hi]; 309 while (try--) { 310 if (nfsheur[hi].nh_vp == vp) { 311 nh = &nfsheur[hi]; 312 break; 313 } 314 if (nfsheur[hi].nh_use > 0) 315 --nfsheur[hi].nh_use; 316 hi = (hi + 1) % NUM_HEURISTIC; 317 if (nfsheur[hi].nh_use < nh->nh_use) 318 nh = &nfsheur[hi]; 319 } 320 321 /* Initialize hint if this is a new file. */ 322 if (nh->nh_vp != vp) { 323 nh->nh_vp = vp; 324 nh->nh_nextoff = uio->uio_offset; 325 nh->nh_use = NHUSE_INIT; 326 if (uio->uio_offset == 0) 327 nh->nh_seqcount = 4; 328 else 329 nh->nh_seqcount = 1; 330 } 331 332 /* Calculate heuristic. */ 333 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || 334 uio->uio_offset == nh->nh_nextoff) { 335 /* See comments in vfs_vnops.c:sequential_heuristic(). */ 336 nh->nh_seqcount += howmany(uio->uio_resid, 16384); 337 if (nh->nh_seqcount > IO_SEQMAX) 338 nh->nh_seqcount = IO_SEQMAX; 339 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * 340 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { 341 /* Probably a reordered RPC, leave seqcount alone. */ 342 } else if (nh->nh_seqcount > 1) { 343 nh->nh_seqcount /= 2; 344 } else { 345 nh->nh_seqcount = 0; 346 } 347 nh->nh_use += NHUSE_INC; 348 if (nh->nh_use > NHUSE_MAX) 349 nh->nh_use = NHUSE_MAX; 350 return (nh); 351 } 352 353 /* 354 * Get attributes into nfsvattr structure. 355 */ 356 int 357 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, 358 struct nfsrv_descript *nd, struct thread *p, int vpislocked, 359 nfsattrbit_t *attrbitp) 360 { 361 int error, gotattr, lockedit = 0; 362 struct nfsvattr na; 363 364 if (vpislocked == 0) { 365 /* 366 * When vpislocked == 0, the vnode is either exclusively 367 * locked by this thread or not locked by this thread. 368 * As such, shared lock it, if not exclusively locked. 369 */ 370 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 371 lockedit = 1; 372 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); 373 } 374 } 375 376 /* 377 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed 378 * attributes, as required. 379 * This needs to be done for regular files if: 380 * - non-NFSv4 RPCs or 381 * - when attrbitp == NULL or 382 * - an NFSv4 RPC with any of the above attributes in attrbitp. 383 * A return of 0 for nfsrv_proxyds() indicates that it has acquired 384 * these attributes. nfsrv_proxyds() will return an error if the 385 * server is not a pNFS one. 386 */ 387 gotattr = 0; 388 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || 389 (nd->nd_flag & ND_NFSV4) == 0 || 390 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || 391 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || 392 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || 393 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || 394 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { 395 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, 396 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, 397 NULL); 398 if (error == 0) 399 gotattr = 1; 400 } 401 402 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); 403 if (lockedit != 0) 404 NFSVOPUNLOCK(vp); 405 406 /* 407 * If we got the Change, Size and Modify Time from the DS, 408 * replace them. 409 */ 410 if (gotattr != 0) { 411 nvap->na_atime = na.na_atime; 412 nvap->na_mtime = na.na_mtime; 413 nvap->na_filerev = na.na_filerev; 414 nvap->na_size = na.na_size; 415 nvap->na_bytes = na.na_bytes; 416 } 417 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, 418 error, (uintmax_t)na.na_filerev); 419 420 NFSEXITCODE(error); 421 return (error); 422 } 423 424 /* 425 * Get a file handle for a vnode. 426 */ 427 int 428 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) 429 { 430 int error; 431 432 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); 433 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; 434 error = VOP_VPTOFH(vp, &fhp->fh_fid); 435 436 NFSEXITCODE(error); 437 return (error); 438 } 439 440 /* 441 * Perform access checking for vnodes obtained from file handles that would 442 * refer to files already opened by a Unix client. You cannot just use 443 * vn_writechk() and VOP_ACCESSX() for two reasons. 444 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write 445 * case. 446 * 2 - The owner is to be given access irrespective of mode bits for some 447 * operations, so that processes that chmod after opening a file don't 448 * break. 449 */ 450 int 451 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, 452 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, 453 u_int32_t *supportedtypep) 454 { 455 struct vattr vattr; 456 int error = 0, getret = 0; 457 458 if (vpislocked == 0) { 459 if (NFSVOPLOCK(vp, LK_SHARED) != 0) { 460 error = EPERM; 461 goto out; 462 } 463 } 464 if (accmode & VWRITE) { 465 /* Just vn_writechk() changed to check rdonly */ 466 /* 467 * Disallow write attempts on read-only file systems; 468 * unless the file is a socket or a block or character 469 * device resident on the file system. 470 */ 471 if (NFSVNO_EXRDONLY(exp) || 472 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 473 switch (vp->v_type) { 474 case VREG: 475 case VDIR: 476 case VLNK: 477 error = EROFS; 478 default: 479 break; 480 } 481 } 482 /* 483 * If there's shared text associated with 484 * the inode, try to free it up once. If 485 * we fail, we can't allow writing. 486 */ 487 if (VOP_IS_TEXT(vp) && error == 0) 488 error = ETXTBSY; 489 } 490 if (error != 0) { 491 if (vpislocked == 0) 492 NFSVOPUNLOCK(vp); 493 goto out; 494 } 495 496 /* 497 * Should the override still be applied when ACLs are enabled? 498 */ 499 error = VOP_ACCESSX(vp, accmode, cred, p); 500 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { 501 /* 502 * Try again with VEXPLICIT_DENY, to see if the test for 503 * deletion is supported. 504 */ 505 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); 506 if (error == 0) { 507 if (vp->v_type == VDIR) { 508 accmode &= ~(VDELETE | VDELETE_CHILD); 509 accmode |= VWRITE; 510 error = VOP_ACCESSX(vp, accmode, cred, p); 511 } else if (supportedtypep != NULL) { 512 *supportedtypep &= ~NFSACCESS_DELETE; 513 } 514 } 515 } 516 517 /* 518 * Allow certain operations for the owner (reads and writes 519 * on files that are already open). 520 */ 521 if (override != NFSACCCHK_NOOVERRIDE && 522 (error == EPERM || error == EACCES)) { 523 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) 524 error = 0; 525 else if (override & NFSACCCHK_ALLOWOWNER) { 526 getret = VOP_GETATTR(vp, &vattr, cred); 527 if (getret == 0 && cred->cr_uid == vattr.va_uid) 528 error = 0; 529 } 530 } 531 if (vpislocked == 0) 532 NFSVOPUNLOCK(vp); 533 534 out: 535 NFSEXITCODE(error); 536 return (error); 537 } 538 539 /* 540 * Set attribute(s) vnop. 541 */ 542 int 543 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, 544 struct thread *p, struct nfsexstuff *exp) 545 { 546 u_quad_t savsize = 0; 547 int error, savedit; 548 time_t savbtime; 549 550 /* 551 * If this is an exported file system and a pNFS service is running, 552 * don't VOP_SETATTR() of size for the MDS file system. 553 */ 554 savedit = 0; 555 error = 0; 556 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && 557 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && 558 nvap->na_vattr.va_size > 0) { 559 savsize = nvap->na_vattr.va_size; 560 nvap->na_vattr.va_size = VNOVAL; 561 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 562 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 563 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 564 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 565 nvap->na_vattr.va_mtime.tv_sec != VNOVAL) 566 savedit = 1; 567 else 568 savedit = 2; 569 } 570 if (savedit != 2) 571 error = VOP_SETATTR(vp, &nvap->na_vattr, cred); 572 if (savedit != 0) 573 nvap->na_vattr.va_size = savsize; 574 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || 575 nvap->na_vattr.va_gid != (gid_t)VNOVAL || 576 nvap->na_vattr.va_size != VNOVAL || 577 nvap->na_vattr.va_mode != (mode_t)VNOVAL || 578 nvap->na_vattr.va_atime.tv_sec != VNOVAL || 579 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { 580 /* Never modify birthtime on a DS file. */ 581 savbtime = nvap->na_vattr.va_birthtime.tv_sec; 582 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; 583 /* For a pNFS server, set the attributes on the DS file. */ 584 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, 585 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); 586 nvap->na_vattr.va_birthtime.tv_sec = savbtime; 587 if (error == ENOENT) 588 error = 0; 589 } 590 NFSEXITCODE(error); 591 return (error); 592 } 593 594 /* 595 * Set up nameidata for a lookup() call and do it. 596 */ 597 int 598 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, 599 struct vnode *dp, int islocked, struct nfsexstuff *exp, 600 struct vnode **retdirp) 601 { 602 struct componentname *cnp = &ndp->ni_cnd; 603 int i; 604 struct iovec aiov; 605 struct uio auio; 606 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; 607 int error = 0; 608 char *cp; 609 610 *retdirp = NULL; 611 cnp->cn_nameptr = cnp->cn_pnbuf; 612 ndp->ni_lcf = 0; 613 /* 614 * Extract and set starting directory. 615 */ 616 if (dp->v_type != VDIR) { 617 if (islocked) 618 vput(dp); 619 else 620 vrele(dp); 621 nfsvno_relpathbuf(ndp); 622 error = ENOTDIR; 623 goto out1; 624 } 625 if (islocked) 626 NFSVOPUNLOCK(dp); 627 VREF(dp); 628 *retdirp = dp; 629 if (NFSVNO_EXRDONLY(exp)) 630 cnp->cn_flags |= RDONLY; 631 ndp->ni_segflg = UIO_SYSSPACE; 632 633 if (nd->nd_flag & ND_PUBLOOKUP) { 634 ndp->ni_loopcnt = 0; 635 if (cnp->cn_pnbuf[0] == '/') { 636 vrele(dp); 637 /* 638 * Check for degenerate pathnames here, since lookup() 639 * panics on them. 640 */ 641 for (i = 1; i < ndp->ni_pathlen; i++) 642 if (cnp->cn_pnbuf[i] != '/') 643 break; 644 if (i == ndp->ni_pathlen) { 645 error = NFSERR_ACCES; 646 goto out; 647 } 648 dp = rootvnode; 649 VREF(dp); 650 } 651 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || 652 (nd->nd_flag & ND_NFSV4) == 0) { 653 /* 654 * Only cross mount points for NFSv4 when doing a 655 * mount while traversing the file system above 656 * the mount point, unless nfsrv_enable_crossmntpt is set. 657 */ 658 cnp->cn_flags |= NOCROSSMOUNT; 659 } 660 661 /* 662 * Initialize for scan, set ni_startdir and bump ref on dp again 663 * because lookup() will dereference ni_startdir. 664 */ 665 666 ndp->ni_startdir = dp; 667 ndp->ni_rootdir = rootvnode; 668 ndp->ni_topdir = NULL; 669 670 if (!lockleaf) 671 cnp->cn_flags |= LOCKLEAF; 672 for (;;) { 673 cnp->cn_nameptr = cnp->cn_pnbuf; 674 /* 675 * Call lookup() to do the real work. If an error occurs, 676 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and 677 * we do not have to dereference anything before returning. 678 * In either case ni_startdir will be dereferenced and NULLed 679 * out. 680 */ 681 error = vfs_lookup(ndp); 682 if (error) 683 break; 684 685 /* 686 * Check for encountering a symbolic link. Trivial 687 * termination occurs if no symlink encountered. 688 */ 689 if ((cnp->cn_flags & ISSYMLINK) == 0) { 690 if (ndp->ni_vp && !lockleaf) 691 NFSVOPUNLOCK(ndp->ni_vp); 692 break; 693 } 694 695 /* 696 * Validate symlink 697 */ 698 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) 699 NFSVOPUNLOCK(ndp->ni_dvp); 700 if (!(nd->nd_flag & ND_PUBLOOKUP)) { 701 error = EINVAL; 702 goto badlink2; 703 } 704 705 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 706 error = ELOOP; 707 goto badlink2; 708 } 709 if (ndp->ni_pathlen > 1) 710 cp = uma_zalloc(namei_zone, M_WAITOK); 711 else 712 cp = cnp->cn_pnbuf; 713 aiov.iov_base = cp; 714 aiov.iov_len = MAXPATHLEN; 715 auio.uio_iov = &aiov; 716 auio.uio_iovcnt = 1; 717 auio.uio_offset = 0; 718 auio.uio_rw = UIO_READ; 719 auio.uio_segflg = UIO_SYSSPACE; 720 auio.uio_td = NULL; 721 auio.uio_resid = MAXPATHLEN; 722 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 723 if (error) { 724 badlink1: 725 if (ndp->ni_pathlen > 1) 726 uma_zfree(namei_zone, cp); 727 badlink2: 728 vrele(ndp->ni_dvp); 729 vput(ndp->ni_vp); 730 break; 731 } 732 linklen = MAXPATHLEN - auio.uio_resid; 733 if (linklen == 0) { 734 error = ENOENT; 735 goto badlink1; 736 } 737 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 738 error = ENAMETOOLONG; 739 goto badlink1; 740 } 741 742 /* 743 * Adjust or replace path 744 */ 745 if (ndp->ni_pathlen > 1) { 746 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 747 uma_zfree(namei_zone, cnp->cn_pnbuf); 748 cnp->cn_pnbuf = cp; 749 } else 750 cnp->cn_pnbuf[linklen] = '\0'; 751 ndp->ni_pathlen += linklen; 752 753 /* 754 * Cleanup refs for next loop and check if root directory 755 * should replace current directory. Normally ni_dvp 756 * becomes the new base directory and is cleaned up when 757 * we loop. Explicitly null pointers after invalidation 758 * to clarify operation. 759 */ 760 vput(ndp->ni_vp); 761 ndp->ni_vp = NULL; 762 763 if (cnp->cn_pnbuf[0] == '/') { 764 vrele(ndp->ni_dvp); 765 ndp->ni_dvp = ndp->ni_rootdir; 766 VREF(ndp->ni_dvp); 767 } 768 ndp->ni_startdir = ndp->ni_dvp; 769 ndp->ni_dvp = NULL; 770 } 771 if (!lockleaf) 772 cnp->cn_flags &= ~LOCKLEAF; 773 774 out: 775 if (error) { 776 nfsvno_relpathbuf(ndp); 777 ndp->ni_vp = NULL; 778 ndp->ni_dvp = NULL; 779 ndp->ni_startdir = NULL; 780 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { 781 ndp->ni_dvp = NULL; 782 } 783 784 out1: 785 NFSEXITCODE2(error, nd); 786 return (error); 787 } 788 789 /* 790 * Set up a pathname buffer and return a pointer to it and, optionally 791 * set a hash pointer. 792 */ 793 void 794 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) 795 { 796 struct componentname *cnp = &ndp->ni_cnd; 797 798 cnp->cn_flags |= (NOMACCHECK); 799 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 800 if (hashpp != NULL) 801 *hashpp = NULL; 802 *bufpp = cnp->cn_pnbuf; 803 } 804 805 /* 806 * Release the above path buffer, if not released by nfsvno_namei(). 807 */ 808 void 809 nfsvno_relpathbuf(struct nameidata *ndp) 810 { 811 812 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 813 ndp->ni_cnd.cn_pnbuf = NULL; 814 } 815 816 /* 817 * Readlink vnode op into an mbuf list. 818 */ 819 int 820 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, 821 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 822 { 823 struct iovec *iv; 824 struct uio io, *uiop = &io; 825 struct mbuf *mp, *mp3; 826 int len, tlen, error = 0; 827 828 len = NFS_MAXPATHLEN; 829 if (maxextsiz > 0) 830 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 831 &mp3, &mp, &iv); 832 else 833 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); 834 uiop->uio_iov = iv; 835 uiop->uio_offset = 0; 836 uiop->uio_resid = len; 837 uiop->uio_rw = UIO_READ; 838 uiop->uio_segflg = UIO_SYSSPACE; 839 uiop->uio_td = NULL; 840 error = VOP_READLINK(vp, uiop, cred); 841 free(iv, M_TEMP); 842 if (error) { 843 m_freem(mp3); 844 *lenp = 0; 845 goto out; 846 } 847 if (uiop->uio_resid > 0) { 848 len -= uiop->uio_resid; 849 tlen = NFSM_RNDUP(len); 850 if (tlen == 0) { 851 m_freem(mp3); 852 mp3 = mp = NULL; 853 } else if (tlen != NFS_MAXPATHLEN || tlen != len) 854 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, 855 tlen - len); 856 } 857 *lenp = len; 858 *mpp = mp3; 859 *mpendp = mp; 860 861 out: 862 NFSEXITCODE(error); 863 return (error); 864 } 865 866 /* 867 * Create an mbuf chain and an associated iovec that can be used to Read 868 * or Getextattr of data. 869 * Upon success, return pointers to the first and last mbufs in the chain 870 * plus the malloc'd iovec and its iovlen. 871 */ 872 static int 873 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, 874 struct iovec **ivp) 875 { 876 struct mbuf *m, *m2 = NULL, *m3; 877 struct iovec *iv; 878 int i, left, siz; 879 880 left = len; 881 m3 = NULL; 882 /* 883 * Generate the mbuf list with the uio_iov ref. to it. 884 */ 885 i = 0; 886 while (left > 0) { 887 NFSMGET(m); 888 MCLGET(m, M_WAITOK); 889 m->m_len = 0; 890 siz = min(M_TRAILINGSPACE(m), left); 891 left -= siz; 892 i++; 893 if (m3) 894 m2->m_next = m; 895 else 896 m3 = m; 897 m2 = m; 898 } 899 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 900 m = m3; 901 left = len; 902 i = 0; 903 while (left > 0) { 904 if (m == NULL) 905 panic("nfsrv_createiovec iov"); 906 siz = min(M_TRAILINGSPACE(m), left); 907 if (siz > 0) { 908 iv->iov_base = mtod(m, caddr_t) + m->m_len; 909 iv->iov_len = siz; 910 m->m_len += siz; 911 left -= siz; 912 iv++; 913 i++; 914 } 915 m = m->m_next; 916 } 917 *mpp = m3; 918 *mpendp = m2; 919 return (i); 920 } 921 922 /* 923 * Create an mbuf chain and an associated iovec that can be used to Read 924 * or Getextattr of data. 925 * Upon success, return pointers to the first and last mbufs in the chain 926 * plus the malloc'd iovec and its iovlen. 927 * Same as above, but creates ext_pgs mbuf(s). 928 */ 929 static int 930 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, 931 struct mbuf **mpendp, struct iovec **ivp) 932 { 933 struct mbuf *m, *m2 = NULL, *m3; 934 struct iovec *iv; 935 int i, left, pgno, siz; 936 937 left = len; 938 m3 = NULL; 939 /* 940 * Generate the mbuf list with the uio_iov ref. to it. 941 */ 942 i = 0; 943 while (left > 0) { 944 siz = min(left, maxextsiz); 945 m = mb_alloc_ext_plus_pages(siz, M_WAITOK); 946 left -= siz; 947 i += m->m_epg_npgs; 948 if (m3 != NULL) 949 m2->m_next = m; 950 else 951 m3 = m; 952 m2 = m; 953 } 954 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); 955 m = m3; 956 left = len; 957 i = 0; 958 pgno = 0; 959 while (left > 0) { 960 if (m == NULL) 961 panic("nfsvno_createiovec_extpgs iov"); 962 siz = min(PAGE_SIZE, left); 963 if (siz > 0) { 964 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); 965 iv->iov_len = siz; 966 m->m_len += siz; 967 if (pgno == m->m_epg_npgs - 1) 968 m->m_epg_last_len = siz; 969 left -= siz; 970 iv++; 971 i++; 972 pgno++; 973 } 974 if (pgno == m->m_epg_npgs && left > 0) { 975 m = m->m_next; 976 if (m == NULL) 977 panic("nfsvno_createiovec_extpgs iov"); 978 pgno = 0; 979 } 980 } 981 *mpp = m3; 982 *mpendp = m2; 983 return (i); 984 } 985 986 /* 987 * Read vnode op call into mbuf list. 988 */ 989 int 990 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 991 int maxextsiz, struct thread *p, struct mbuf **mpp, 992 struct mbuf **mpendp) 993 { 994 struct mbuf *m; 995 struct iovec *iv; 996 int error = 0, len, tlen, ioflag = 0; 997 struct mbuf *m3; 998 struct uio io, *uiop = &io; 999 struct nfsheur *nh; 1000 1001 /* 1002 * Attempt to read from a DS file. A return of ENOENT implies 1003 * there is no DS file to read. 1004 */ 1005 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, 1006 NULL, mpendp, NULL, NULL, NULL, 0, NULL); 1007 if (error != ENOENT) 1008 return (error); 1009 1010 len = NFSM_RNDUP(cnt); 1011 if (maxextsiz > 0) 1012 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, 1013 &m3, &m, &iv); 1014 else 1015 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); 1016 uiop->uio_iov = iv; 1017 uiop->uio_offset = off; 1018 uiop->uio_resid = len; 1019 uiop->uio_rw = UIO_READ; 1020 uiop->uio_segflg = UIO_SYSSPACE; 1021 uiop->uio_td = NULL; 1022 nh = nfsrv_sequential_heuristic(uiop, vp); 1023 ioflag |= nh->nh_seqcount << IO_SEQSHIFT; 1024 /* XXX KDM make this more systematic? */ 1025 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; 1026 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); 1027 free(iv, M_TEMP); 1028 if (error) { 1029 m_freem(m3); 1030 *mpp = NULL; 1031 goto out; 1032 } 1033 nh->nh_nextoff = uiop->uio_offset; 1034 tlen = len - uiop->uio_resid; 1035 cnt = cnt < tlen ? cnt : tlen; 1036 tlen = NFSM_RNDUP(cnt); 1037 if (tlen == 0) { 1038 m_freem(m3); 1039 m3 = m = NULL; 1040 } else if (len != tlen || tlen != cnt) 1041 m = nfsrv_adj(m3, len - tlen, tlen - cnt); 1042 *mpp = m3; 1043 *mpendp = m; 1044 1045 out: 1046 NFSEXITCODE(error); 1047 return (error); 1048 } 1049 1050 /* 1051 * Create the iovec for the mbuf chain passed in as an argument. 1052 * The "cp" argument is where the data starts within the first mbuf in 1053 * the chain. It returns the iovec and the iovcnt. 1054 */ 1055 static int 1056 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, 1057 int *iovcntp) 1058 { 1059 struct mbuf *mp; 1060 struct iovec *ivp; 1061 int cnt, i, len; 1062 1063 /* 1064 * Loop through the mbuf chain, counting how many mbufs are a 1065 * part of this write operation, so the iovec size is known. 1066 */ 1067 cnt = 0; 1068 len = retlen; 1069 mp = m; 1070 i = mtod(mp, caddr_t) + mp->m_len - cp; 1071 while (len > 0) { 1072 if (i > 0) { 1073 len -= i; 1074 cnt++; 1075 } 1076 mp = mp->m_next; 1077 if (!mp) { 1078 if (len > 0) 1079 return (EBADRPC); 1080 } else 1081 i = mp->m_len; 1082 } 1083 1084 /* Now, create the iovec. */ 1085 mp = m; 1086 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, 1087 M_WAITOK); 1088 *iovcntp = cnt; 1089 i = mtod(mp, caddr_t) + mp->m_len - cp; 1090 len = retlen; 1091 while (len > 0) { 1092 if (mp == NULL) 1093 panic("nfsrv_createiovecw"); 1094 if (i > 0) { 1095 i = min(i, len); 1096 ivp->iov_base = cp; 1097 ivp->iov_len = i; 1098 ivp++; 1099 len -= i; 1100 } 1101 mp = mp->m_next; 1102 if (mp) { 1103 i = mp->m_len; 1104 cp = mtod(mp, caddr_t); 1105 } 1106 } 1107 return (0); 1108 } 1109 1110 /* 1111 * Write vnode op from an mbuf list. 1112 */ 1113 int 1114 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, 1115 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) 1116 { 1117 struct iovec *iv; 1118 int cnt, ioflags, error; 1119 struct uio io, *uiop = &io; 1120 struct nfsheur *nh; 1121 1122 /* 1123 * Attempt to write to a DS file. A return of ENOENT implies 1124 * there is no DS file to write. 1125 */ 1126 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, 1127 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); 1128 if (error != ENOENT) { 1129 *stable = NFSWRITE_FILESYNC; 1130 return (error); 1131 } 1132 1133 if (*stable == NFSWRITE_UNSTABLE) 1134 ioflags = IO_NODELOCKED; 1135 else 1136 ioflags = (IO_SYNC | IO_NODELOCKED); 1137 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); 1138 if (error != 0) 1139 return (error); 1140 uiop->uio_iov = iv; 1141 uiop->uio_iovcnt = cnt; 1142 uiop->uio_resid = retlen; 1143 uiop->uio_rw = UIO_WRITE; 1144 uiop->uio_segflg = UIO_SYSSPACE; 1145 NFSUIOPROC(uiop, p); 1146 uiop->uio_offset = off; 1147 nh = nfsrv_sequential_heuristic(uiop, vp); 1148 ioflags |= nh->nh_seqcount << IO_SEQSHIFT; 1149 /* XXX KDM make this more systematic? */ 1150 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; 1151 error = VOP_WRITE(vp, uiop, ioflags, cred); 1152 if (error == 0) 1153 nh->nh_nextoff = uiop->uio_offset; 1154 free(iv, M_TEMP); 1155 1156 NFSEXITCODE(error); 1157 return (error); 1158 } 1159 1160 /* 1161 * Common code for creating a regular file (plus special files for V2). 1162 */ 1163 int 1164 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, 1165 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, 1166 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) 1167 { 1168 u_quad_t tempsize; 1169 int error; 1170 struct thread *p = curthread; 1171 1172 error = nd->nd_repstat; 1173 if (!error && ndp->ni_vp == NULL) { 1174 if (nvap->na_type == VREG || nvap->na_type == VSOCK) { 1175 vrele(ndp->ni_startdir); 1176 error = VOP_CREATE(ndp->ni_dvp, 1177 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1178 /* For a pNFS server, create the data file on a DS. */ 1179 if (error == 0 && nvap->na_type == VREG) { 1180 /* 1181 * Create a data file on a DS for a pNFS server. 1182 * This function just returns if not 1183 * running a pNFS DS or the creation fails. 1184 */ 1185 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1186 nd->nd_cred, p); 1187 } 1188 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1189 NULL, false); 1190 nfsvno_relpathbuf(ndp); 1191 if (!error) { 1192 if (*exclusive_flagp) { 1193 *exclusive_flagp = 0; 1194 NFSVNO_ATTRINIT(nvap); 1195 nvap->na_atime.tv_sec = cverf[0]; 1196 nvap->na_atime.tv_nsec = cverf[1]; 1197 error = VOP_SETATTR(ndp->ni_vp, 1198 &nvap->na_vattr, nd->nd_cred); 1199 if (error != 0) { 1200 vput(ndp->ni_vp); 1201 ndp->ni_vp = NULL; 1202 error = NFSERR_NOTSUPP; 1203 } 1204 } 1205 } 1206 /* 1207 * NFS V2 Only. nfsrvd_mknod() does this for V3. 1208 * (This implies, just get out on an error.) 1209 */ 1210 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || 1211 nvap->na_type == VFIFO) { 1212 if (nvap->na_type == VCHR && rdev == 0xffffffff) 1213 nvap->na_type = VFIFO; 1214 if (nvap->na_type != VFIFO && 1215 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { 1216 vrele(ndp->ni_startdir); 1217 nfsvno_relpathbuf(ndp); 1218 vput(ndp->ni_dvp); 1219 goto out; 1220 } 1221 nvap->na_rdev = rdev; 1222 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1223 &ndp->ni_cnd, &nvap->na_vattr); 1224 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : 1225 NULL, false); 1226 nfsvno_relpathbuf(ndp); 1227 vrele(ndp->ni_startdir); 1228 if (error) 1229 goto out; 1230 } else { 1231 vrele(ndp->ni_startdir); 1232 nfsvno_relpathbuf(ndp); 1233 vput(ndp->ni_dvp); 1234 error = ENXIO; 1235 goto out; 1236 } 1237 *vpp = ndp->ni_vp; 1238 } else { 1239 /* 1240 * Handle cases where error is already set and/or 1241 * the file exists. 1242 * 1 - clean up the lookup 1243 * 2 - iff !error and na_size set, truncate it 1244 */ 1245 vrele(ndp->ni_startdir); 1246 nfsvno_relpathbuf(ndp); 1247 *vpp = ndp->ni_vp; 1248 if (ndp->ni_dvp == *vpp) 1249 vrele(ndp->ni_dvp); 1250 else 1251 vput(ndp->ni_dvp); 1252 if (!error && nvap->na_size != VNOVAL) { 1253 error = nfsvno_accchk(*vpp, VWRITE, 1254 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 1255 NFSACCCHK_VPISLOCKED, NULL); 1256 if (!error) { 1257 tempsize = nvap->na_size; 1258 NFSVNO_ATTRINIT(nvap); 1259 nvap->na_size = tempsize; 1260 error = nfsvno_setattr(*vpp, nvap, 1261 nd->nd_cred, p, exp); 1262 } 1263 } 1264 if (error) 1265 vput(*vpp); 1266 } 1267 1268 out: 1269 NFSEXITCODE(error); 1270 return (error); 1271 } 1272 1273 /* 1274 * Do a mknod vnode op. 1275 */ 1276 int 1277 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, 1278 struct thread *p) 1279 { 1280 int error = 0; 1281 enum vtype vtyp; 1282 1283 vtyp = nvap->na_type; 1284 /* 1285 * Iff doesn't exist, create it. 1286 */ 1287 if (ndp->ni_vp) { 1288 vrele(ndp->ni_startdir); 1289 nfsvno_relpathbuf(ndp); 1290 vput(ndp->ni_dvp); 1291 vrele(ndp->ni_vp); 1292 error = EEXIST; 1293 goto out; 1294 } 1295 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { 1296 vrele(ndp->ni_startdir); 1297 nfsvno_relpathbuf(ndp); 1298 vput(ndp->ni_dvp); 1299 error = NFSERR_BADTYPE; 1300 goto out; 1301 } 1302 if (vtyp == VSOCK) { 1303 vrele(ndp->ni_startdir); 1304 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 1305 &ndp->ni_cnd, &nvap->na_vattr); 1306 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1307 false); 1308 nfsvno_relpathbuf(ndp); 1309 } else { 1310 if (nvap->na_type != VFIFO && 1311 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { 1312 vrele(ndp->ni_startdir); 1313 nfsvno_relpathbuf(ndp); 1314 vput(ndp->ni_dvp); 1315 goto out; 1316 } 1317 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, 1318 &ndp->ni_cnd, &nvap->na_vattr); 1319 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, 1320 false); 1321 nfsvno_relpathbuf(ndp); 1322 vrele(ndp->ni_startdir); 1323 /* 1324 * Since VOP_MKNOD returns the ni_vp, I can't 1325 * see any reason to do the lookup. 1326 */ 1327 } 1328 1329 out: 1330 NFSEXITCODE(error); 1331 return (error); 1332 } 1333 1334 /* 1335 * Mkdir vnode op. 1336 */ 1337 int 1338 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, 1339 struct ucred *cred, struct thread *p, struct nfsexstuff *exp) 1340 { 1341 int error = 0; 1342 1343 if (ndp->ni_vp != NULL) { 1344 if (ndp->ni_dvp == ndp->ni_vp) 1345 vrele(ndp->ni_dvp); 1346 else 1347 vput(ndp->ni_dvp); 1348 vrele(ndp->ni_vp); 1349 nfsvno_relpathbuf(ndp); 1350 error = EEXIST; 1351 goto out; 1352 } 1353 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1354 &nvap->na_vattr); 1355 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); 1356 nfsvno_relpathbuf(ndp); 1357 1358 out: 1359 NFSEXITCODE(error); 1360 return (error); 1361 } 1362 1363 /* 1364 * symlink vnode op. 1365 */ 1366 int 1367 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, 1368 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, 1369 struct nfsexstuff *exp) 1370 { 1371 int error = 0; 1372 1373 if (ndp->ni_vp) { 1374 vrele(ndp->ni_startdir); 1375 nfsvno_relpathbuf(ndp); 1376 if (ndp->ni_dvp == ndp->ni_vp) 1377 vrele(ndp->ni_dvp); 1378 else 1379 vput(ndp->ni_dvp); 1380 vrele(ndp->ni_vp); 1381 error = EEXIST; 1382 goto out; 1383 } 1384 1385 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, 1386 &nvap->na_vattr, pathcp); 1387 /* 1388 * Although FreeBSD still had the lookup code in 1389 * it for 7/current, there doesn't seem to be any 1390 * point, since VOP_SYMLINK() returns the ni_vp. 1391 * Just vput it for v2. 1392 */ 1393 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0); 1394 vrele(ndp->ni_startdir); 1395 nfsvno_relpathbuf(ndp); 1396 1397 out: 1398 NFSEXITCODE(error); 1399 return (error); 1400 } 1401 1402 /* 1403 * Parse symbolic link arguments. 1404 * This function has an ugly side effect. It will malloc() an area for 1405 * the symlink and set iov_base to point to it, only if it succeeds. 1406 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must 1407 * be FREE'd later. 1408 */ 1409 int 1410 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, 1411 struct thread *p, char **pathcpp, int *lenp) 1412 { 1413 u_int32_t *tl; 1414 char *pathcp = NULL; 1415 int error = 0, len; 1416 struct nfsv2_sattr *sp; 1417 1418 *pathcpp = NULL; 1419 *lenp = 0; 1420 if ((nd->nd_flag & ND_NFSV3) && 1421 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) 1422 goto nfsmout; 1423 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1424 len = fxdr_unsigned(int, *tl); 1425 if (len > NFS_MAXPATHLEN || len <= 0) { 1426 error = EBADRPC; 1427 goto nfsmout; 1428 } 1429 pathcp = malloc(len + 1, M_TEMP, M_WAITOK); 1430 error = nfsrv_mtostr(nd, pathcp, len); 1431 if (error) 1432 goto nfsmout; 1433 if (nd->nd_flag & ND_NFSV2) { 1434 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 1435 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); 1436 } 1437 *pathcpp = pathcp; 1438 *lenp = len; 1439 NFSEXITCODE2(0, nd); 1440 return (0); 1441 nfsmout: 1442 if (pathcp) 1443 free(pathcp, M_TEMP); 1444 NFSEXITCODE2(error, nd); 1445 return (error); 1446 } 1447 1448 /* 1449 * Remove a non-directory object. 1450 */ 1451 int 1452 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1453 struct thread *p, struct nfsexstuff *exp) 1454 { 1455 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; 1456 int error = 0, mirrorcnt; 1457 char fname[PNFS_FILENAME_LEN + 1]; 1458 fhandle_t fh; 1459 1460 vp = ndp->ni_vp; 1461 dsdvp[0] = NULL; 1462 if (vp->v_type == VDIR) 1463 error = NFSERR_ISDIR; 1464 else if (is_v4) 1465 error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), 1466 p); 1467 if (error == 0) 1468 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); 1469 if (!error) 1470 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); 1471 if (error == 0 && dsdvp[0] != NULL) 1472 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1473 if (ndp->ni_dvp == vp) 1474 vrele(ndp->ni_dvp); 1475 else 1476 vput(ndp->ni_dvp); 1477 vput(vp); 1478 nfsvno_relpathbuf(ndp); 1479 NFSEXITCODE(error); 1480 return (error); 1481 } 1482 1483 /* 1484 * Remove a directory. 1485 */ 1486 int 1487 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, 1488 struct thread *p, struct nfsexstuff *exp) 1489 { 1490 struct vnode *vp; 1491 int error = 0; 1492 1493 vp = ndp->ni_vp; 1494 if (vp->v_type != VDIR) { 1495 error = ENOTDIR; 1496 goto out; 1497 } 1498 /* 1499 * No rmdir "." please. 1500 */ 1501 if (ndp->ni_dvp == vp) { 1502 error = EINVAL; 1503 goto out; 1504 } 1505 /* 1506 * The root of a mounted filesystem cannot be deleted. 1507 */ 1508 if (vp->v_vflag & VV_ROOT) 1509 error = EBUSY; 1510 out: 1511 if (!error) 1512 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); 1513 if (ndp->ni_dvp == vp) 1514 vrele(ndp->ni_dvp); 1515 else 1516 vput(ndp->ni_dvp); 1517 vput(vp); 1518 nfsvno_relpathbuf(ndp); 1519 NFSEXITCODE(error); 1520 return (error); 1521 } 1522 1523 /* 1524 * Rename vnode op. 1525 */ 1526 int 1527 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, 1528 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) 1529 { 1530 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; 1531 int error = 0, mirrorcnt; 1532 char fname[PNFS_FILENAME_LEN + 1]; 1533 fhandle_t fh; 1534 1535 dsdvp[0] = NULL; 1536 fvp = fromndp->ni_vp; 1537 if (ndstat) { 1538 vrele(fromndp->ni_dvp); 1539 vrele(fvp); 1540 error = ndstat; 1541 goto out1; 1542 } 1543 tdvp = tondp->ni_dvp; 1544 tvp = tondp->ni_vp; 1545 if (tvp != NULL) { 1546 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 1547 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; 1548 goto out; 1549 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 1550 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; 1551 goto out; 1552 } 1553 if (tvp->v_type == VDIR && tvp->v_mountedhere) { 1554 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1555 goto out; 1556 } 1557 1558 /* 1559 * A rename to '.' or '..' results in a prematurely 1560 * unlocked vnode on FreeBSD5, so I'm just going to fail that 1561 * here. 1562 */ 1563 if ((tondp->ni_cnd.cn_namelen == 1 && 1564 tondp->ni_cnd.cn_nameptr[0] == '.') || 1565 (tondp->ni_cnd.cn_namelen == 2 && 1566 tondp->ni_cnd.cn_nameptr[0] == '.' && 1567 tondp->ni_cnd.cn_nameptr[1] == '.')) { 1568 error = EINVAL; 1569 goto out; 1570 } 1571 } 1572 if (fvp->v_type == VDIR && fvp->v_mountedhere) { 1573 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1574 goto out; 1575 } 1576 if (fvp->v_mount != tdvp->v_mount) { 1577 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; 1578 goto out; 1579 } 1580 if (fvp == tdvp) { 1581 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; 1582 goto out; 1583 } 1584 if (fvp == tvp) { 1585 /* 1586 * If source and destination are the same, there is nothing to 1587 * do. Set error to -1 to indicate this. 1588 */ 1589 error = -1; 1590 goto out; 1591 } 1592 if (ndflag & ND_NFSV4) { 1593 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { 1594 error = nfsrv_checkremove(fvp, 0, NULL, 1595 (nfsquad_t)((u_quad_t)0), p); 1596 NFSVOPUNLOCK(fvp); 1597 } else 1598 error = EPERM; 1599 if (tvp && !error) 1600 error = nfsrv_checkremove(tvp, 1, NULL, 1601 (nfsquad_t)((u_quad_t)0), p); 1602 } else { 1603 /* 1604 * For NFSv2 and NFSv3, try to get rid of the delegation, so 1605 * that the NFSv4 client won't be confused by the rename. 1606 * Since nfsd_recalldelegation() can only be called on an 1607 * unlocked vnode at this point and fvp is the file that will 1608 * still exist after the rename, just do fvp. 1609 */ 1610 nfsd_recalldelegation(fvp, p); 1611 } 1612 if (error == 0 && tvp != NULL) { 1613 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); 1614 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" 1615 " dsdvp=%p\n", dsdvp[0]); 1616 } 1617 out: 1618 if (!error) { 1619 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, 1620 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, 1621 &tondp->ni_cnd); 1622 } else { 1623 if (tdvp == tvp) 1624 vrele(tdvp); 1625 else 1626 vput(tdvp); 1627 if (tvp) 1628 vput(tvp); 1629 vrele(fromndp->ni_dvp); 1630 vrele(fvp); 1631 if (error == -1) 1632 error = 0; 1633 } 1634 1635 /* 1636 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and 1637 * if the rename succeeded, the DS file for the tvp needs to be 1638 * removed. 1639 */ 1640 if (error == 0 && dsdvp[0] != NULL) { 1641 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); 1642 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); 1643 } 1644 1645 vrele(tondp->ni_startdir); 1646 nfsvno_relpathbuf(tondp); 1647 out1: 1648 vrele(fromndp->ni_startdir); 1649 nfsvno_relpathbuf(fromndp); 1650 NFSEXITCODE(error); 1651 return (error); 1652 } 1653 1654 /* 1655 * Link vnode op. 1656 */ 1657 int 1658 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, 1659 struct thread *p, struct nfsexstuff *exp) 1660 { 1661 struct vnode *xp; 1662 int error = 0; 1663 1664 xp = ndp->ni_vp; 1665 if (xp != NULL) { 1666 error = EEXIST; 1667 } else { 1668 xp = ndp->ni_dvp; 1669 if (vp->v_mount != xp->v_mount) 1670 error = EXDEV; 1671 } 1672 if (!error) { 1673 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 1674 if (!VN_IS_DOOMED(vp)) 1675 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); 1676 else 1677 error = EPERM; 1678 if (ndp->ni_dvp == vp) { 1679 vrele(ndp->ni_dvp); 1680 NFSVOPUNLOCK(vp); 1681 } else { 1682 vref(vp); 1683 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true); 1684 } 1685 } else { 1686 if (ndp->ni_dvp == ndp->ni_vp) 1687 vrele(ndp->ni_dvp); 1688 else 1689 vput(ndp->ni_dvp); 1690 if (ndp->ni_vp) 1691 vrele(ndp->ni_vp); 1692 } 1693 nfsvno_relpathbuf(ndp); 1694 NFSEXITCODE(error); 1695 return (error); 1696 } 1697 1698 /* 1699 * Do the fsync() appropriate for the commit. 1700 */ 1701 int 1702 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, 1703 struct thread *td) 1704 { 1705 int error = 0; 1706 1707 /* 1708 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of 1709 * file is done. At this time VOP_FSYNC does not accept offset and 1710 * byte count parameters so call VOP_FSYNC the whole file for now. 1711 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. 1712 * File systems that do not use the buffer cache (as indicated 1713 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). 1714 */ 1715 if (cnt == 0 || cnt > MAX_COMMIT_COUNT || 1716 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { 1717 /* 1718 * Give up and do the whole thing 1719 */ 1720 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1721 VM_OBJECT_WLOCK(vp->v_object); 1722 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 1723 VM_OBJECT_WUNLOCK(vp->v_object); 1724 } 1725 error = VOP_FSYNC(vp, MNT_WAIT, td); 1726 } else { 1727 /* 1728 * Locate and synchronously write any buffers that fall 1729 * into the requested range. Note: we are assuming that 1730 * f_iosize is a power of 2. 1731 */ 1732 int iosize = vp->v_mount->mnt_stat.f_iosize; 1733 int iomask = iosize - 1; 1734 struct bufobj *bo; 1735 daddr_t lblkno; 1736 1737 /* 1738 * Align to iosize boundary, super-align to page boundary. 1739 */ 1740 if (off & iomask) { 1741 cnt += off & iomask; 1742 off &= ~(u_quad_t)iomask; 1743 } 1744 if (off & PAGE_MASK) { 1745 cnt += off & PAGE_MASK; 1746 off &= ~(u_quad_t)PAGE_MASK; 1747 } 1748 lblkno = off / iosize; 1749 1750 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { 1751 VM_OBJECT_WLOCK(vp->v_object); 1752 vm_object_page_clean(vp->v_object, off, off + cnt, 1753 OBJPC_SYNC); 1754 VM_OBJECT_WUNLOCK(vp->v_object); 1755 } 1756 1757 bo = &vp->v_bufobj; 1758 BO_LOCK(bo); 1759 while (cnt > 0) { 1760 struct buf *bp; 1761 1762 /* 1763 * If we have a buffer and it is marked B_DELWRI we 1764 * have to lock and write it. Otherwise the prior 1765 * write is assumed to have already been committed. 1766 * 1767 * gbincore() can return invalid buffers now so we 1768 * have to check that bit as well (though B_DELWRI 1769 * should not be set if B_INVAL is set there could be 1770 * a race here since we haven't locked the buffer). 1771 */ 1772 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { 1773 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | 1774 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { 1775 BO_LOCK(bo); 1776 continue; /* retry */ 1777 } 1778 if ((bp->b_flags & (B_DELWRI|B_INVAL)) == 1779 B_DELWRI) { 1780 bremfree(bp); 1781 bp->b_flags &= ~B_ASYNC; 1782 bwrite(bp); 1783 ++nfs_commit_miss; 1784 } else 1785 BUF_UNLOCK(bp); 1786 BO_LOCK(bo); 1787 } 1788 ++nfs_commit_blks; 1789 if (cnt < iosize) 1790 break; 1791 cnt -= iosize; 1792 ++lblkno; 1793 } 1794 BO_UNLOCK(bo); 1795 } 1796 NFSEXITCODE(error); 1797 return (error); 1798 } 1799 1800 /* 1801 * Statfs vnode op. 1802 */ 1803 int 1804 nfsvno_statfs(struct vnode *vp, struct statfs *sf) 1805 { 1806 struct statfs *tsf; 1807 int error; 1808 1809 tsf = NULL; 1810 if (nfsrv_devidcnt > 0) { 1811 /* For a pNFS service, get the DS numbers. */ 1812 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); 1813 error = nfsrv_pnfsstatfs(tsf, vp->v_mount); 1814 if (error != 0) { 1815 free(tsf, M_TEMP); 1816 tsf = NULL; 1817 } 1818 } 1819 error = VFS_STATFS(vp->v_mount, sf); 1820 if (error == 0) { 1821 if (tsf != NULL) { 1822 sf->f_blocks = tsf->f_blocks; 1823 sf->f_bavail = tsf->f_bavail; 1824 sf->f_bfree = tsf->f_bfree; 1825 sf->f_bsize = tsf->f_bsize; 1826 } 1827 /* 1828 * Since NFS handles these values as unsigned on the 1829 * wire, there is no way to represent negative values, 1830 * so set them to 0. Without this, they will appear 1831 * to be very large positive values for clients like 1832 * Solaris10. 1833 */ 1834 if (sf->f_bavail < 0) 1835 sf->f_bavail = 0; 1836 if (sf->f_ffree < 0) 1837 sf->f_ffree = 0; 1838 } 1839 free(tsf, M_TEMP); 1840 NFSEXITCODE(error); 1841 return (error); 1842 } 1843 1844 /* 1845 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but 1846 * must handle nfsrv_opencheck() calls after any other access checks. 1847 */ 1848 void 1849 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, 1850 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, 1851 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, 1852 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, 1853 struct nfsexstuff *exp, struct vnode **vpp) 1854 { 1855 struct vnode *vp = NULL; 1856 u_quad_t tempsize; 1857 struct nfsexstuff nes; 1858 struct thread *p = curthread; 1859 1860 if (ndp->ni_vp == NULL) 1861 nd->nd_repstat = nfsrv_opencheck(clientid, 1862 stateidp, stp, NULL, nd, p, nd->nd_repstat); 1863 if (!nd->nd_repstat) { 1864 if (ndp->ni_vp == NULL) { 1865 vrele(ndp->ni_startdir); 1866 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, 1867 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); 1868 /* For a pNFS server, create the data file on a DS. */ 1869 if (nd->nd_repstat == 0) { 1870 /* 1871 * Create a data file on a DS for a pNFS server. 1872 * This function just returns if not 1873 * running a pNFS DS or the creation fails. 1874 */ 1875 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, 1876 cred, p); 1877 } 1878 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ? 1879 &ndp->ni_vp : NULL, false); 1880 nfsvno_relpathbuf(ndp); 1881 if (!nd->nd_repstat) { 1882 if (*exclusive_flagp) { 1883 *exclusive_flagp = 0; 1884 NFSVNO_ATTRINIT(nvap); 1885 nvap->na_atime.tv_sec = cverf[0]; 1886 nvap->na_atime.tv_nsec = cverf[1]; 1887 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, 1888 &nvap->na_vattr, cred); 1889 if (nd->nd_repstat != 0) { 1890 vput(ndp->ni_vp); 1891 ndp->ni_vp = NULL; 1892 nd->nd_repstat = NFSERR_NOTSUPP; 1893 } else 1894 NFSSETBIT_ATTRBIT(attrbitp, 1895 NFSATTRBIT_TIMEACCESS); 1896 } else { 1897 nfsrv_fixattr(nd, ndp->ni_vp, nvap, 1898 aclp, p, attrbitp, exp); 1899 } 1900 } 1901 vp = ndp->ni_vp; 1902 } else { 1903 if (ndp->ni_startdir) 1904 vrele(ndp->ni_startdir); 1905 nfsvno_relpathbuf(ndp); 1906 vp = ndp->ni_vp; 1907 if (create == NFSV4OPEN_CREATE) { 1908 if (ndp->ni_dvp == vp) 1909 vrele(ndp->ni_dvp); 1910 else 1911 vput(ndp->ni_dvp); 1912 } 1913 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { 1914 if (ndp->ni_cnd.cn_flags & RDONLY) 1915 NFSVNO_SETEXRDONLY(&nes); 1916 else 1917 NFSVNO_EXINIT(&nes); 1918 nd->nd_repstat = nfsvno_accchk(vp, 1919 VWRITE, cred, &nes, p, 1920 NFSACCCHK_NOOVERRIDE, 1921 NFSACCCHK_VPISLOCKED, NULL); 1922 nd->nd_repstat = nfsrv_opencheck(clientid, 1923 stateidp, stp, vp, nd, p, nd->nd_repstat); 1924 if (!nd->nd_repstat) { 1925 tempsize = nvap->na_size; 1926 NFSVNO_ATTRINIT(nvap); 1927 nvap->na_size = tempsize; 1928 nd->nd_repstat = nfsvno_setattr(vp, 1929 nvap, cred, p, exp); 1930 } 1931 } else if (vp->v_type == VREG) { 1932 nd->nd_repstat = nfsrv_opencheck(clientid, 1933 stateidp, stp, vp, nd, p, nd->nd_repstat); 1934 } 1935 } 1936 } else { 1937 nfsvno_relpathbuf(ndp); 1938 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { 1939 vrele(ndp->ni_startdir); 1940 if (ndp->ni_dvp == ndp->ni_vp) 1941 vrele(ndp->ni_dvp); 1942 else 1943 vput(ndp->ni_dvp); 1944 if (ndp->ni_vp) 1945 vput(ndp->ni_vp); 1946 } 1947 } 1948 *vpp = vp; 1949 1950 NFSEXITCODE2(0, nd); 1951 } 1952 1953 /* 1954 * Updates the file rev and sets the mtime and ctime 1955 * to the current clock time, returning the va_filerev and va_Xtime 1956 * values. 1957 * Return ESTALE to indicate the vnode is VIRF_DOOMED. 1958 */ 1959 int 1960 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, 1961 struct nfsrv_descript *nd, struct thread *p) 1962 { 1963 struct vattr va; 1964 1965 VATTR_NULL(&va); 1966 vfs_timestamp(&va.va_mtime); 1967 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { 1968 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 1969 if (VN_IS_DOOMED(vp)) 1970 return (ESTALE); 1971 } 1972 (void) VOP_SETATTR(vp, &va, nd->nd_cred); 1973 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); 1974 return (0); 1975 } 1976 1977 /* 1978 * Glue routine to nfsv4_fillattr(). 1979 */ 1980 int 1981 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, 1982 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, 1983 struct ucred *cred, struct thread *p, int isdgram, int reterr, 1984 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) 1985 { 1986 struct statfs *sf; 1987 int error; 1988 1989 sf = NULL; 1990 if (nfsrv_devidcnt > 0 && 1991 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || 1992 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || 1993 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { 1994 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); 1995 error = nfsrv_pnfsstatfs(sf, mp); 1996 if (error != 0) { 1997 free(sf, M_TEMP); 1998 sf = NULL; 1999 } 2000 } 2001 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, 2002 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, 2003 mounted_on_fileno, sf); 2004 free(sf, M_TEMP); 2005 NFSEXITCODE2(0, nd); 2006 return (error); 2007 } 2008 2009 /* Since the Readdir vnode ops vary, put the entire functions in here. */ 2010 /* 2011 * nfs readdir service 2012 * - mallocs what it thinks is enough to read 2013 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR 2014 * - calls VOP_READDIR() 2015 * - loops around building the reply 2016 * if the output generated exceeds count break out of loop 2017 * The NFSM_CLGET macro is used here so that the reply will be packed 2018 * tightly in mbuf clusters. 2019 * - it trims out records with d_fileno == 0 2020 * this doesn't matter for Unix clients, but they might confuse clients 2021 * for other os'. 2022 * - it trims out records with d_type == DT_WHT 2023 * these cannot be seen through NFS (unless we extend the protocol) 2024 * The alternate call nfsrvd_readdirplus() does lookups as well. 2025 * PS: The NFS protocol spec. does not clarify what the "count" byte 2026 * argument is a count of.. just name strings and file id's or the 2027 * entire reply rpc or ... 2028 * I tried just file name and id sizes and it confused the Sun client, 2029 * so I am using the full rpc size now. The "paranoia.." comment refers 2030 * to including the status longwords that are not a part of the dir. 2031 * "entry" structures, but are in the rpc. 2032 */ 2033 int 2034 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, 2035 struct vnode *vp, struct nfsexstuff *exp) 2036 { 2037 struct dirent *dp; 2038 u_int32_t *tl; 2039 int dirlen; 2040 char *cpos, *cend, *rbuf; 2041 struct nfsvattr at; 2042 int nlen, error = 0, getret = 1; 2043 int siz, cnt, fullsiz, eofflag, ncookies; 2044 u_int64_t off, toff, verf __unused; 2045 uint64_t *cookies = NULL, *cookiep; 2046 struct uio io; 2047 struct iovec iv; 2048 int is_ufs; 2049 struct thread *p = curthread; 2050 2051 if (nd->nd_repstat) { 2052 nfsrv_postopattr(nd, getret, &at); 2053 goto out; 2054 } 2055 if (nd->nd_flag & ND_NFSV2) { 2056 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2057 off = fxdr_unsigned(u_quad_t, *tl++); 2058 } else { 2059 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 2060 off = fxdr_hyper(tl); 2061 tl += 2; 2062 verf = fxdr_hyper(tl); 2063 tl += 2; 2064 } 2065 toff = off; 2066 cnt = fxdr_unsigned(int, *tl); 2067 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2068 cnt = NFS_SRVMAXDATA(nd); 2069 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2070 fullsiz = siz; 2071 if (nd->nd_flag & ND_NFSV3) { 2072 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, 2073 NULL); 2074 #if 0 2075 /* 2076 * va_filerev is not sufficient as a cookie verifier, 2077 * since it is not supposed to change when entries are 2078 * removed/added unless that offset cookies returned to 2079 * the client are no longer valid. 2080 */ 2081 if (!nd->nd_repstat && toff && verf != at.na_filerev) 2082 nd->nd_repstat = NFSERR_BAD_COOKIE; 2083 #endif 2084 } 2085 if (!nd->nd_repstat && vp->v_type != VDIR) 2086 nd->nd_repstat = NFSERR_NOTDIR; 2087 if (nd->nd_repstat == 0 && cnt == 0) { 2088 if (nd->nd_flag & ND_NFSV2) 2089 /* NFSv2 does not have NFSERR_TOOSMALL */ 2090 nd->nd_repstat = EPERM; 2091 else 2092 nd->nd_repstat = NFSERR_TOOSMALL; 2093 } 2094 if (!nd->nd_repstat) 2095 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2096 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2097 NFSACCCHK_VPISLOCKED, NULL); 2098 if (nd->nd_repstat) { 2099 vput(vp); 2100 if (nd->nd_flag & ND_NFSV3) 2101 nfsrv_postopattr(nd, getret, &at); 2102 goto out; 2103 } 2104 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2105 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2106 again: 2107 eofflag = 0; 2108 if (cookies) { 2109 free(cookies, M_TEMP); 2110 cookies = NULL; 2111 } 2112 2113 iv.iov_base = rbuf; 2114 iv.iov_len = siz; 2115 io.uio_iov = &iv; 2116 io.uio_iovcnt = 1; 2117 io.uio_offset = (off_t)off; 2118 io.uio_resid = siz; 2119 io.uio_segflg = UIO_SYSSPACE; 2120 io.uio_rw = UIO_READ; 2121 io.uio_td = NULL; 2122 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2123 &cookies); 2124 off = (u_int64_t)io.uio_offset; 2125 if (io.uio_resid) 2126 siz -= io.uio_resid; 2127 2128 if (!cookies && !nd->nd_repstat) 2129 nd->nd_repstat = NFSERR_PERM; 2130 if (nd->nd_flag & ND_NFSV3) { 2131 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2132 if (!nd->nd_repstat) 2133 nd->nd_repstat = getret; 2134 } 2135 2136 /* 2137 * Handles the failed cases. nd->nd_repstat == 0 past here. 2138 */ 2139 if (nd->nd_repstat) { 2140 vput(vp); 2141 free(rbuf, M_TEMP); 2142 if (cookies) 2143 free(cookies, M_TEMP); 2144 if (nd->nd_flag & ND_NFSV3) 2145 nfsrv_postopattr(nd, getret, &at); 2146 goto out; 2147 } 2148 /* 2149 * If nothing read, return eof 2150 * rpc reply 2151 */ 2152 if (siz == 0) { 2153 vput(vp); 2154 if (nd->nd_flag & ND_NFSV2) { 2155 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2156 } else { 2157 nfsrv_postopattr(nd, getret, &at); 2158 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2159 txdr_hyper(at.na_filerev, tl); 2160 tl += 2; 2161 } 2162 *tl++ = newnfs_false; 2163 *tl = newnfs_true; 2164 free(rbuf, M_TEMP); 2165 free(cookies, M_TEMP); 2166 goto out; 2167 } 2168 2169 /* 2170 * Check for degenerate cases of nothing useful read. 2171 * If so go try again 2172 */ 2173 cpos = rbuf; 2174 cend = rbuf + siz; 2175 dp = (struct dirent *)cpos; 2176 cookiep = cookies; 2177 2178 /* 2179 * For some reason FreeBSD's ufs_readdir() chooses to back the 2180 * directory offset up to a block boundary, so it is necessary to 2181 * skip over the records that precede the requested offset. This 2182 * requires the assumption that file offset cookies monotonically 2183 * increase. 2184 */ 2185 while (cpos < cend && ncookies > 0 && 2186 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2187 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { 2188 cpos += dp->d_reclen; 2189 dp = (struct dirent *)cpos; 2190 cookiep++; 2191 ncookies--; 2192 } 2193 if (cpos >= cend || ncookies == 0) { 2194 siz = fullsiz; 2195 toff = off; 2196 goto again; 2197 } 2198 vput(vp); 2199 2200 /* 2201 * If cnt > MCLBYTES and the reply will not be saved, use 2202 * ext_pgs mbufs for TLS. 2203 * For NFSv4.0, we do not know for sure if the reply will 2204 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2205 */ 2206 if (cnt > MCLBYTES && siz > MCLBYTES && 2207 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2208 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2209 nd->nd_flag |= ND_EXTPG; 2210 2211 /* 2212 * dirlen is the size of the reply, including all XDR and must 2213 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate 2214 * if the XDR should be included in "count", but to be safe, we do. 2215 * (Include the two booleans at the end of the reply in dirlen now.) 2216 */ 2217 if (nd->nd_flag & ND_NFSV3) { 2218 nfsrv_postopattr(nd, getret, &at); 2219 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2220 txdr_hyper(at.na_filerev, tl); 2221 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2222 } else { 2223 dirlen = 2 * NFSX_UNSIGNED; 2224 } 2225 2226 /* Loop through the records and build reply */ 2227 while (cpos < cend && ncookies > 0) { 2228 nlen = dp->d_namlen; 2229 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2230 nlen <= NFS_MAXNAMLEN) { 2231 if (nd->nd_flag & ND_NFSV3) 2232 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2233 else 2234 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); 2235 if (dirlen > cnt) { 2236 eofflag = 0; 2237 break; 2238 } 2239 2240 /* 2241 * Build the directory record xdr from 2242 * the dirent entry. 2243 */ 2244 if (nd->nd_flag & ND_NFSV3) { 2245 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2246 *tl++ = newnfs_true; 2247 *tl++ = 0; 2248 } else { 2249 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2250 *tl++ = newnfs_true; 2251 } 2252 *tl = txdr_unsigned(dp->d_fileno); 2253 (void) nfsm_strtom(nd, dp->d_name, nlen); 2254 if (nd->nd_flag & ND_NFSV3) { 2255 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2256 txdr_hyper(*cookiep, tl); 2257 } else { 2258 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 2259 *tl = txdr_unsigned(*cookiep); 2260 } 2261 } 2262 cpos += dp->d_reclen; 2263 dp = (struct dirent *)cpos; 2264 cookiep++; 2265 ncookies--; 2266 } 2267 if (cpos < cend) 2268 eofflag = 0; 2269 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2270 *tl++ = newnfs_false; 2271 if (eofflag) 2272 *tl = newnfs_true; 2273 else 2274 *tl = newnfs_false; 2275 free(rbuf, M_TEMP); 2276 free(cookies, M_TEMP); 2277 2278 out: 2279 NFSEXITCODE2(0, nd); 2280 return (0); 2281 nfsmout: 2282 vput(vp); 2283 NFSEXITCODE2(error, nd); 2284 return (error); 2285 } 2286 2287 /* 2288 * Readdirplus for V3 and Readdir for V4. 2289 */ 2290 int 2291 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, 2292 struct vnode *vp, struct nfsexstuff *exp) 2293 { 2294 struct dirent *dp; 2295 u_int32_t *tl; 2296 int dirlen; 2297 char *cpos, *cend, *rbuf; 2298 struct vnode *nvp; 2299 fhandle_t nfh; 2300 struct nfsvattr nva, at, *nvap = &nva; 2301 struct mbuf *mb0, *mb1; 2302 struct nfsreferral *refp; 2303 int nlen, r, error = 0, getret = 1, usevget = 1; 2304 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; 2305 caddr_t bpos0, bpos1; 2306 u_int64_t off, toff, verf __unused; 2307 uint64_t *cookies = NULL, *cookiep; 2308 nfsattrbit_t attrbits, rderrbits, savbits; 2309 struct uio io; 2310 struct iovec iv; 2311 struct componentname cn; 2312 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; 2313 struct mount *mp, *new_mp; 2314 uint64_t mounted_on_fileno; 2315 struct thread *p = curthread; 2316 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; 2317 2318 if (nd->nd_repstat) { 2319 nfsrv_postopattr(nd, getret, &at); 2320 goto out; 2321 } 2322 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 2323 off = fxdr_hyper(tl); 2324 toff = off; 2325 tl += 2; 2326 verf = fxdr_hyper(tl); 2327 tl += 2; 2328 siz = fxdr_unsigned(int, *tl++); 2329 cnt = fxdr_unsigned(int, *tl); 2330 2331 /* 2332 * Use the server's maximum data transfer size as the upper bound 2333 * on reply datalen. 2334 */ 2335 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) 2336 cnt = NFS_SRVMAXDATA(nd); 2337 2338 /* 2339 * siz is a "hint" of how much directory information (name, fileid, 2340 * cookie) should be in the reply. At least one client "hints" 0, 2341 * so I set it to cnt for that case. I also round it up to the 2342 * next multiple of DIRBLKSIZ. 2343 * Since the size of a Readdirplus directory entry reply will always 2344 * be greater than a directory entry returned by VOP_READDIR(), it 2345 * does not make sense to read more than NFS_SRVMAXDATA() via 2346 * VOP_READDIR(). 2347 */ 2348 if (siz <= 0) 2349 siz = cnt; 2350 else if (siz > NFS_SRVMAXDATA(nd)) 2351 siz = NFS_SRVMAXDATA(nd); 2352 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); 2353 2354 if (nd->nd_flag & ND_NFSV4) { 2355 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 2356 if (error) 2357 goto nfsmout; 2358 NFSSET_ATTRBIT(&savbits, &attrbits); 2359 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); 2360 NFSZERO_ATTRBIT(&rderrbits); 2361 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); 2362 } else { 2363 NFSZERO_ATTRBIT(&attrbits); 2364 } 2365 fullsiz = siz; 2366 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2367 #if 0 2368 if (!nd->nd_repstat) { 2369 if (off && verf != at.na_filerev) { 2370 /* 2371 * va_filerev is not sufficient as a cookie verifier, 2372 * since it is not supposed to change when entries are 2373 * removed/added unless that offset cookies returned to 2374 * the client are no longer valid. 2375 */ 2376 if (nd->nd_flag & ND_NFSV4) { 2377 nd->nd_repstat = NFSERR_NOTSAME; 2378 } else { 2379 nd->nd_repstat = NFSERR_BAD_COOKIE; 2380 } 2381 } 2382 } 2383 #endif 2384 if (!nd->nd_repstat && vp->v_type != VDIR) 2385 nd->nd_repstat = NFSERR_NOTDIR; 2386 if (!nd->nd_repstat && cnt == 0) 2387 nd->nd_repstat = NFSERR_TOOSMALL; 2388 if (!nd->nd_repstat) 2389 nd->nd_repstat = nfsvno_accchk(vp, VEXEC, 2390 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, 2391 NFSACCCHK_VPISLOCKED, NULL); 2392 if (nd->nd_repstat) { 2393 vput(vp); 2394 if (nd->nd_flag & ND_NFSV3) 2395 nfsrv_postopattr(nd, getret, &at); 2396 goto out; 2397 } 2398 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; 2399 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; 2400 2401 rbuf = malloc(siz, M_TEMP, M_WAITOK); 2402 again: 2403 eofflag = 0; 2404 if (cookies) { 2405 free(cookies, M_TEMP); 2406 cookies = NULL; 2407 } 2408 2409 iv.iov_base = rbuf; 2410 iv.iov_len = siz; 2411 io.uio_iov = &iv; 2412 io.uio_iovcnt = 1; 2413 io.uio_offset = (off_t)off; 2414 io.uio_resid = siz; 2415 io.uio_segflg = UIO_SYSSPACE; 2416 io.uio_rw = UIO_READ; 2417 io.uio_td = NULL; 2418 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, 2419 &cookies); 2420 off = (u_int64_t)io.uio_offset; 2421 if (io.uio_resid) 2422 siz -= io.uio_resid; 2423 2424 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); 2425 2426 if (!cookies && !nd->nd_repstat) 2427 nd->nd_repstat = NFSERR_PERM; 2428 if (!nd->nd_repstat) 2429 nd->nd_repstat = getret; 2430 if (nd->nd_repstat) { 2431 vput(vp); 2432 if (cookies) 2433 free(cookies, M_TEMP); 2434 free(rbuf, M_TEMP); 2435 if (nd->nd_flag & ND_NFSV3) 2436 nfsrv_postopattr(nd, getret, &at); 2437 goto out; 2438 } 2439 /* 2440 * If nothing read, return eof 2441 * rpc reply 2442 */ 2443 if (siz == 0) { 2444 vput(vp); 2445 if (nd->nd_flag & ND_NFSV3) 2446 nfsrv_postopattr(nd, getret, &at); 2447 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); 2448 txdr_hyper(at.na_filerev, tl); 2449 tl += 2; 2450 *tl++ = newnfs_false; 2451 *tl = newnfs_true; 2452 free(cookies, M_TEMP); 2453 free(rbuf, M_TEMP); 2454 goto out; 2455 } 2456 2457 /* 2458 * Check for degenerate cases of nothing useful read. 2459 * If so go try again 2460 */ 2461 cpos = rbuf; 2462 cend = rbuf + siz; 2463 dp = (struct dirent *)cpos; 2464 cookiep = cookies; 2465 2466 /* 2467 * For some reason FreeBSD's ufs_readdir() chooses to back the 2468 * directory offset up to a block boundary, so it is necessary to 2469 * skip over the records that precede the requested offset. This 2470 * requires the assumption that file offset cookies monotonically 2471 * increase. 2472 */ 2473 while (cpos < cend && ncookies > 0 && 2474 (dp->d_fileno == 0 || dp->d_type == DT_WHT || 2475 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || 2476 ((nd->nd_flag & ND_NFSV4) && 2477 ((dp->d_namlen == 1 && dp->d_name[0] == '.') || 2478 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { 2479 cpos += dp->d_reclen; 2480 dp = (struct dirent *)cpos; 2481 cookiep++; 2482 ncookies--; 2483 } 2484 if (cpos >= cend || ncookies == 0) { 2485 siz = fullsiz; 2486 toff = off; 2487 goto again; 2488 } 2489 2490 /* 2491 * Busy the file system so that the mount point won't go away 2492 * and, as such, VFS_VGET() can be used safely. 2493 */ 2494 mp = vp->v_mount; 2495 vfs_ref(mp); 2496 NFSVOPUNLOCK(vp); 2497 nd->nd_repstat = vfs_busy(mp, 0); 2498 vfs_rel(mp); 2499 if (nd->nd_repstat != 0) { 2500 vrele(vp); 2501 free(cookies, M_TEMP); 2502 free(rbuf, M_TEMP); 2503 if (nd->nd_flag & ND_NFSV3) 2504 nfsrv_postopattr(nd, getret, &at); 2505 goto out; 2506 } 2507 2508 /* 2509 * Check to see if entries in this directory can be safely acquired 2510 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. 2511 * ZFS snapshot directories need VOP_LOOKUP(), so that any 2512 * automount of the snapshot directory that is required will 2513 * be done. 2514 * This needs to be done here for NFSv4, since NFSv4 never does 2515 * a VFS_VGET() for "." or "..". 2516 */ 2517 if (is_zfs == 1) { 2518 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); 2519 if (r == EOPNOTSUPP) { 2520 usevget = 0; 2521 cn.cn_nameiop = LOOKUP; 2522 cn.cn_lkflags = LK_SHARED | LK_RETRY; 2523 cn.cn_cred = nd->nd_cred; 2524 } else if (r == 0) 2525 vput(nvp); 2526 } 2527 2528 /* 2529 * If the reply is likely to exceed MCLBYTES and the reply will 2530 * not be saved, use ext_pgs mbufs for TLS. 2531 * It is difficult to predict how large each entry will be and 2532 * how many entries have been read, so just assume the directory 2533 * entries grow by a factor of 4 when attributes are included. 2534 * For NFSv4.0, we do not know for sure if the reply will 2535 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 2536 */ 2537 if (cnt > MCLBYTES && siz > MCLBYTES / 4 && 2538 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && 2539 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) 2540 nd->nd_flag |= ND_EXTPG; 2541 2542 /* 2543 * Save this position, in case there is an error before one entry 2544 * is created. 2545 */ 2546 mb0 = nd->nd_mb; 2547 bpos0 = nd->nd_bpos; 2548 bextpg0 = nd->nd_bextpg; 2549 bextpgsiz0 = nd->nd_bextpgsiz; 2550 2551 /* 2552 * Fill in the first part of the reply. 2553 * dirlen is the reply length in bytes and cannot exceed cnt. 2554 * (Include the two booleans at the end of the reply in dirlen now, 2555 * so we recognize when we have exceeded cnt.) 2556 */ 2557 if (nd->nd_flag & ND_NFSV3) { 2558 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; 2559 nfsrv_postopattr(nd, getret, &at); 2560 } else { 2561 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; 2562 } 2563 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); 2564 txdr_hyper(at.na_filerev, tl); 2565 2566 /* 2567 * Save this position, in case there is an empty reply needed. 2568 */ 2569 mb1 = nd->nd_mb; 2570 bpos1 = nd->nd_bpos; 2571 bextpg1 = nd->nd_bextpg; 2572 bextpgsiz1 = nd->nd_bextpgsiz; 2573 2574 /* Loop through the records and build reply */ 2575 entrycnt = 0; 2576 while (cpos < cend && ncookies > 0 && dirlen < cnt) { 2577 nlen = dp->d_namlen; 2578 if (dp->d_fileno != 0 && dp->d_type != DT_WHT && 2579 nlen <= NFS_MAXNAMLEN && 2580 ((nd->nd_flag & ND_NFSV3) || nlen > 2 || 2581 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) 2582 || (nlen == 1 && dp->d_name[0] != '.'))) { 2583 /* 2584 * Save the current position in the reply, in case 2585 * this entry exceeds cnt. 2586 */ 2587 mb1 = nd->nd_mb; 2588 bpos1 = nd->nd_bpos; 2589 bextpg1 = nd->nd_bextpg; 2590 bextpgsiz1 = nd->nd_bextpgsiz; 2591 2592 /* 2593 * For readdir_and_lookup get the vnode using 2594 * the file number. 2595 */ 2596 nvp = NULL; 2597 refp = NULL; 2598 r = 0; 2599 at_root = 0; 2600 needs_unbusy = 0; 2601 new_mp = mp; 2602 mounted_on_fileno = (uint64_t)dp->d_fileno; 2603 if ((nd->nd_flag & ND_NFSV3) || 2604 NFSNONZERO_ATTRBIT(&savbits)) { 2605 if (nd->nd_flag & ND_NFSV4) 2606 refp = nfsv4root_getreferral(NULL, 2607 vp, dp->d_fileno); 2608 if (refp == NULL) { 2609 if (usevget) 2610 r = VFS_VGET(mp, dp->d_fileno, 2611 LK_SHARED, &nvp); 2612 else 2613 r = EOPNOTSUPP; 2614 if (r == EOPNOTSUPP) { 2615 if (usevget) { 2616 usevget = 0; 2617 cn.cn_nameiop = LOOKUP; 2618 cn.cn_lkflags = 2619 LK_SHARED | 2620 LK_RETRY; 2621 cn.cn_cred = 2622 nd->nd_cred; 2623 } 2624 cn.cn_nameptr = dp->d_name; 2625 cn.cn_namelen = nlen; 2626 cn.cn_flags = ISLASTCN | 2627 NOFOLLOW | LOCKLEAF; 2628 if (nlen == 2 && 2629 dp->d_name[0] == '.' && 2630 dp->d_name[1] == '.') 2631 cn.cn_flags |= 2632 ISDOTDOT; 2633 if (NFSVOPLOCK(vp, LK_SHARED) 2634 != 0) { 2635 nd->nd_repstat = EPERM; 2636 break; 2637 } 2638 if ((vp->v_vflag & VV_ROOT) != 0 2639 && (cn.cn_flags & ISDOTDOT) 2640 != 0) { 2641 vref(vp); 2642 nvp = vp; 2643 r = 0; 2644 } else { 2645 r = VOP_LOOKUP(vp, &nvp, 2646 &cn); 2647 if (vp != nvp) 2648 NFSVOPUNLOCK(vp); 2649 } 2650 } 2651 2652 /* 2653 * For NFSv4, check to see if nvp is 2654 * a mount point and get the mount 2655 * point vnode, as required. 2656 */ 2657 if (r == 0 && 2658 nfsrv_enable_crossmntpt != 0 && 2659 (nd->nd_flag & ND_NFSV4) != 0 && 2660 nvp->v_type == VDIR && 2661 nvp->v_mountedhere != NULL) { 2662 new_mp = nvp->v_mountedhere; 2663 r = vfs_busy(new_mp, 0); 2664 vput(nvp); 2665 nvp = NULL; 2666 if (r == 0) { 2667 r = VFS_ROOT(new_mp, 2668 LK_SHARED, &nvp); 2669 needs_unbusy = 1; 2670 if (r == 0) 2671 at_root = 1; 2672 } 2673 } 2674 } 2675 2676 /* 2677 * If we failed to look up the entry, then it 2678 * has become invalid, most likely removed. 2679 */ 2680 if (r != 0) { 2681 if (needs_unbusy) 2682 vfs_unbusy(new_mp); 2683 goto invalid; 2684 } 2685 KASSERT(refp != NULL || nvp != NULL, 2686 ("%s: undetected lookup error", __func__)); 2687 2688 if (refp == NULL && 2689 ((nd->nd_flag & ND_NFSV3) || 2690 NFSNONZERO_ATTRBIT(&attrbits))) { 2691 r = nfsvno_getfh(nvp, &nfh, p); 2692 if (!r) 2693 r = nfsvno_getattr(nvp, nvap, nd, p, 2694 1, &attrbits); 2695 if (r == 0 && is_zfs == 1 && 2696 nfsrv_enable_crossmntpt != 0 && 2697 (nd->nd_flag & ND_NFSV4) != 0 && 2698 nvp->v_type == VDIR && 2699 vp->v_mount != nvp->v_mount) { 2700 /* 2701 * For a ZFS snapshot, there is a 2702 * pseudo mount that does not set 2703 * v_mountedhere, so it needs to 2704 * be detected via a different 2705 * mount structure. 2706 */ 2707 at_root = 1; 2708 if (new_mp == mp) 2709 new_mp = nvp->v_mount; 2710 } 2711 } 2712 2713 /* 2714 * If we failed to get attributes of the entry, 2715 * then just skip it for NFSv3 (the traditional 2716 * behavior in the old NFS server). 2717 * For NFSv4 the behavior is controlled by 2718 * RDATTRERROR: we either ignore the error or 2719 * fail the request. 2720 * Note that RDATTRERROR is never set for NFSv3. 2721 */ 2722 if (r != 0) { 2723 if (!NFSISSET_ATTRBIT(&attrbits, 2724 NFSATTRBIT_RDATTRERROR)) { 2725 vput(nvp); 2726 if (needs_unbusy != 0) 2727 vfs_unbusy(new_mp); 2728 if ((nd->nd_flag & ND_NFSV3)) 2729 goto invalid; 2730 nd->nd_repstat = r; 2731 break; 2732 } 2733 } 2734 } 2735 2736 /* 2737 * Build the directory record xdr 2738 */ 2739 if (nd->nd_flag & ND_NFSV3) { 2740 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2741 *tl++ = newnfs_true; 2742 *tl++ = 0; 2743 *tl = txdr_unsigned(dp->d_fileno); 2744 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2745 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2746 txdr_hyper(*cookiep, tl); 2747 nfsrv_postopattr(nd, 0, nvap); 2748 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); 2749 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); 2750 if (nvp != NULL) 2751 vput(nvp); 2752 } else { 2753 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2754 *tl++ = newnfs_true; 2755 txdr_hyper(*cookiep, tl); 2756 dirlen += nfsm_strtom(nd, dp->d_name, nlen); 2757 if (nvp != NULL) { 2758 supports_nfsv4acls = 2759 nfs_supportsnfsv4acls(nvp); 2760 NFSVOPUNLOCK(nvp); 2761 } else 2762 supports_nfsv4acls = 0; 2763 if (refp != NULL) { 2764 dirlen += nfsrv_putreferralattr(nd, 2765 &savbits, refp, 0, 2766 &nd->nd_repstat); 2767 if (nd->nd_repstat) { 2768 if (nvp != NULL) 2769 vrele(nvp); 2770 if (needs_unbusy != 0) 2771 vfs_unbusy(new_mp); 2772 break; 2773 } 2774 } else if (r) { 2775 dirlen += nfsvno_fillattr(nd, new_mp, 2776 nvp, nvap, &nfh, r, &rderrbits, 2777 nd->nd_cred, p, isdgram, 0, 2778 supports_nfsv4acls, at_root, 2779 mounted_on_fileno); 2780 } else { 2781 dirlen += nfsvno_fillattr(nd, new_mp, 2782 nvp, nvap, &nfh, r, &attrbits, 2783 nd->nd_cred, p, isdgram, 0, 2784 supports_nfsv4acls, at_root, 2785 mounted_on_fileno); 2786 } 2787 if (nvp != NULL) 2788 vrele(nvp); 2789 dirlen += (3 * NFSX_UNSIGNED); 2790 } 2791 if (needs_unbusy != 0) 2792 vfs_unbusy(new_mp); 2793 if (dirlen <= cnt) 2794 entrycnt++; 2795 } 2796 invalid: 2797 cpos += dp->d_reclen; 2798 dp = (struct dirent *)cpos; 2799 cookiep++; 2800 ncookies--; 2801 } 2802 vrele(vp); 2803 vfs_unbusy(mp); 2804 2805 /* 2806 * If dirlen > cnt, we must strip off the last entry. If that 2807 * results in an empty reply, report NFSERR_TOOSMALL. 2808 */ 2809 if (dirlen > cnt || nd->nd_repstat) { 2810 if (!nd->nd_repstat && entrycnt == 0) 2811 nd->nd_repstat = NFSERR_TOOSMALL; 2812 if (nd->nd_repstat) { 2813 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); 2814 if (nd->nd_flag & ND_NFSV3) 2815 nfsrv_postopattr(nd, getret, &at); 2816 } else 2817 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); 2818 eofflag = 0; 2819 } else if (cpos < cend) 2820 eofflag = 0; 2821 if (!nd->nd_repstat) { 2822 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2823 *tl++ = newnfs_false; 2824 if (eofflag) 2825 *tl = newnfs_true; 2826 else 2827 *tl = newnfs_false; 2828 } 2829 free(cookies, M_TEMP); 2830 free(rbuf, M_TEMP); 2831 2832 out: 2833 NFSEXITCODE2(0, nd); 2834 return (0); 2835 nfsmout: 2836 vput(vp); 2837 NFSEXITCODE2(error, nd); 2838 return (error); 2839 } 2840 2841 /* 2842 * Get the settable attributes out of the mbuf list. 2843 * (Return 0 or EBADRPC) 2844 */ 2845 int 2846 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2847 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2848 { 2849 u_int32_t *tl; 2850 struct nfsv2_sattr *sp; 2851 int error = 0, toclient = 0; 2852 2853 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { 2854 case ND_NFSV2: 2855 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); 2856 /* 2857 * Some old clients didn't fill in the high order 16bits. 2858 * --> check the low order 2 bytes for 0xffff 2859 */ 2860 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) 2861 nvap->na_mode = nfstov_mode(sp->sa_mode); 2862 if (sp->sa_uid != newnfs_xdrneg1) 2863 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); 2864 if (sp->sa_gid != newnfs_xdrneg1) 2865 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); 2866 if (sp->sa_size != newnfs_xdrneg1) 2867 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); 2868 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { 2869 #ifdef notyet 2870 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); 2871 #else 2872 nvap->na_atime.tv_sec = 2873 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); 2874 nvap->na_atime.tv_nsec = 0; 2875 #endif 2876 } 2877 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) 2878 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); 2879 break; 2880 case ND_NFSV3: 2881 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2882 if (*tl == newnfs_true) { 2883 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2884 nvap->na_mode = nfstov_mode(*tl); 2885 } 2886 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2887 if (*tl == newnfs_true) { 2888 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2889 nvap->na_uid = fxdr_unsigned(uid_t, *tl); 2890 } 2891 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2892 if (*tl == newnfs_true) { 2893 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2894 nvap->na_gid = fxdr_unsigned(gid_t, *tl); 2895 } 2896 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2897 if (*tl == newnfs_true) { 2898 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2899 nvap->na_size = fxdr_hyper(tl); 2900 } 2901 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2902 switch (fxdr_unsigned(int, *tl)) { 2903 case NFSV3SATTRTIME_TOCLIENT: 2904 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2905 fxdr_nfsv3time(tl, &nvap->na_atime); 2906 toclient = 1; 2907 break; 2908 case NFSV3SATTRTIME_TOSERVER: 2909 vfs_timestamp(&nvap->na_atime); 2910 nvap->na_vaflags |= VA_UTIMES_NULL; 2911 break; 2912 } 2913 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2914 switch (fxdr_unsigned(int, *tl)) { 2915 case NFSV3SATTRTIME_TOCLIENT: 2916 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2917 fxdr_nfsv3time(tl, &nvap->na_mtime); 2918 nvap->na_vaflags &= ~VA_UTIMES_NULL; 2919 break; 2920 case NFSV3SATTRTIME_TOSERVER: 2921 vfs_timestamp(&nvap->na_mtime); 2922 if (!toclient) 2923 nvap->na_vaflags |= VA_UTIMES_NULL; 2924 break; 2925 } 2926 break; 2927 case ND_NFSV4: 2928 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); 2929 } 2930 nfsmout: 2931 NFSEXITCODE2(error, nd); 2932 return (error); 2933 } 2934 2935 /* 2936 * Handle the setable attributes for V4. 2937 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. 2938 */ 2939 int 2940 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, 2941 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) 2942 { 2943 u_int32_t *tl; 2944 int attrsum = 0; 2945 int i, j; 2946 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; 2947 int moderet, toclient = 0; 2948 u_char *cp, namestr[NFSV4_SMALLSTR + 1]; 2949 uid_t uid; 2950 gid_t gid; 2951 u_short mode, mask; /* Same type as va_mode. */ 2952 struct vattr va; 2953 2954 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); 2955 if (error) 2956 goto nfsmout; 2957 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2958 attrsize = fxdr_unsigned(int, *tl); 2959 2960 /* 2961 * Loop around getting the setable attributes. If an unsupported 2962 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. 2963 */ 2964 if (retnotsup) { 2965 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 2966 bitpos = NFSATTRBIT_MAX; 2967 } else { 2968 bitpos = 0; 2969 } 2970 moderet = 0; 2971 for (; bitpos < NFSATTRBIT_MAX; bitpos++) { 2972 if (attrsum > attrsize) { 2973 error = NFSERR_BADXDR; 2974 goto nfsmout; 2975 } 2976 if (NFSISSET_ATTRBIT(attrbitp, bitpos)) 2977 switch (bitpos) { 2978 case NFSATTRBIT_SIZE: 2979 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); 2980 if (vp != NULL && vp->v_type != VREG) { 2981 error = (vp->v_type == VDIR) ? NFSERR_ISDIR : 2982 NFSERR_INVAL; 2983 goto nfsmout; 2984 } 2985 nvap->na_size = fxdr_hyper(tl); 2986 attrsum += NFSX_HYPER; 2987 break; 2988 case NFSATTRBIT_ACL: 2989 error = nfsrv_dissectacl(nd, aclp, true, &aceerr, 2990 &aclsize, p); 2991 if (error) 2992 goto nfsmout; 2993 if (aceerr && !nd->nd_repstat) 2994 nd->nd_repstat = aceerr; 2995 attrsum += aclsize; 2996 break; 2997 case NFSATTRBIT_ARCHIVE: 2998 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 2999 if (!nd->nd_repstat) 3000 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3001 attrsum += NFSX_UNSIGNED; 3002 break; 3003 case NFSATTRBIT_HIDDEN: 3004 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3005 if (!nd->nd_repstat) 3006 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3007 attrsum += NFSX_UNSIGNED; 3008 break; 3009 case NFSATTRBIT_MIMETYPE: 3010 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3011 i = fxdr_unsigned(int, *tl); 3012 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 3013 if (error) 3014 goto nfsmout; 3015 if (!nd->nd_repstat) 3016 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3017 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); 3018 break; 3019 case NFSATTRBIT_MODE: 3020 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ 3021 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3022 nvap->na_mode = nfstov_mode(*tl); 3023 attrsum += NFSX_UNSIGNED; 3024 break; 3025 case NFSATTRBIT_OWNER: 3026 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3027 j = fxdr_unsigned(int, *tl); 3028 if (j < 0) { 3029 error = NFSERR_BADXDR; 3030 goto nfsmout; 3031 } 3032 if (j > NFSV4_SMALLSTR) 3033 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3034 else 3035 cp = namestr; 3036 error = nfsrv_mtostr(nd, cp, j); 3037 if (error) { 3038 if (j > NFSV4_SMALLSTR) 3039 free(cp, M_NFSSTRING); 3040 goto nfsmout; 3041 } 3042 if (!nd->nd_repstat) { 3043 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, 3044 &uid); 3045 if (!nd->nd_repstat) 3046 nvap->na_uid = uid; 3047 } 3048 if (j > NFSV4_SMALLSTR) 3049 free(cp, M_NFSSTRING); 3050 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3051 break; 3052 case NFSATTRBIT_OWNERGROUP: 3053 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3054 j = fxdr_unsigned(int, *tl); 3055 if (j < 0) { 3056 error = NFSERR_BADXDR; 3057 goto nfsmout; 3058 } 3059 if (j > NFSV4_SMALLSTR) 3060 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); 3061 else 3062 cp = namestr; 3063 error = nfsrv_mtostr(nd, cp, j); 3064 if (error) { 3065 if (j > NFSV4_SMALLSTR) 3066 free(cp, M_NFSSTRING); 3067 goto nfsmout; 3068 } 3069 if (!nd->nd_repstat) { 3070 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, 3071 &gid); 3072 if (!nd->nd_repstat) 3073 nvap->na_gid = gid; 3074 } 3075 if (j > NFSV4_SMALLSTR) 3076 free(cp, M_NFSSTRING); 3077 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); 3078 break; 3079 case NFSATTRBIT_SYSTEM: 3080 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3081 if (!nd->nd_repstat) 3082 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3083 attrsum += NFSX_UNSIGNED; 3084 break; 3085 case NFSATTRBIT_TIMEACCESSSET: 3086 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3087 attrsum += NFSX_UNSIGNED; 3088 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3089 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3090 fxdr_nfsv4time(tl, &nvap->na_atime); 3091 toclient = 1; 3092 attrsum += NFSX_V4TIME; 3093 } else { 3094 vfs_timestamp(&nvap->na_atime); 3095 nvap->na_vaflags |= VA_UTIMES_NULL; 3096 } 3097 break; 3098 case NFSATTRBIT_TIMEBACKUP: 3099 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3100 if (!nd->nd_repstat) 3101 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3102 attrsum += NFSX_V4TIME; 3103 break; 3104 case NFSATTRBIT_TIMECREATE: 3105 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3106 fxdr_nfsv4time(tl, &nvap->na_btime); 3107 attrsum += NFSX_V4TIME; 3108 break; 3109 case NFSATTRBIT_TIMEMODIFYSET: 3110 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 3111 attrsum += NFSX_UNSIGNED; 3112 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { 3113 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); 3114 fxdr_nfsv4time(tl, &nvap->na_mtime); 3115 nvap->na_vaflags &= ~VA_UTIMES_NULL; 3116 attrsum += NFSX_V4TIME; 3117 } else { 3118 vfs_timestamp(&nvap->na_mtime); 3119 if (!toclient) 3120 nvap->na_vaflags |= VA_UTIMES_NULL; 3121 } 3122 break; 3123 case NFSATTRBIT_MODESETMASKED: 3124 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 3125 mode = fxdr_unsigned(u_short, *tl++); 3126 mask = fxdr_unsigned(u_short, *tl); 3127 /* 3128 * vp == NULL implies an Open/Create operation. 3129 * This attribute can only be used for Setattr and 3130 * only for NFSv4.1 or higher. 3131 * If moderet != 0, a mode attribute has also been 3132 * specified and this attribute cannot be done in the 3133 * same Setattr operation. 3134 */ 3135 if ((nd->nd_flag & ND_NFSV41) == 0) 3136 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3137 else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || 3138 vp == NULL) 3139 nd->nd_repstat = NFSERR_INVAL; 3140 else if (moderet == 0) 3141 moderet = VOP_GETATTR(vp, &va, nd->nd_cred); 3142 if (moderet == 0) 3143 nvap->na_mode = (mode & mask) | 3144 (va.va_mode & ~mask); 3145 else 3146 nd->nd_repstat = moderet; 3147 attrsum += 2 * NFSX_UNSIGNED; 3148 break; 3149 default: 3150 nd->nd_repstat = NFSERR_ATTRNOTSUPP; 3151 /* 3152 * set bitpos so we drop out of the loop. 3153 */ 3154 bitpos = NFSATTRBIT_MAX; 3155 break; 3156 } 3157 } 3158 3159 /* 3160 * some clients pad the attrlist, so we need to skip over the 3161 * padding. 3162 */ 3163 if (attrsum > attrsize) { 3164 error = NFSERR_BADXDR; 3165 } else { 3166 attrsize = NFSM_RNDUP(attrsize); 3167 if (attrsum < attrsize) 3168 error = nfsm_advance(nd, attrsize - attrsum, -1); 3169 } 3170 nfsmout: 3171 NFSEXITCODE2(error, nd); 3172 return (error); 3173 } 3174 3175 /* 3176 * Check/setup export credentials. 3177 */ 3178 int 3179 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, 3180 struct ucred *credanon, bool testsec) 3181 { 3182 int error; 3183 3184 /* 3185 * Check/setup credentials. 3186 */ 3187 if (nd->nd_flag & ND_GSS) 3188 exp->nes_exflag &= ~MNT_EXPORTANON; 3189 3190 /* 3191 * Check to see if the operation is allowed for this security flavor. 3192 */ 3193 error = 0; 3194 if (testsec) { 3195 error = nfsvno_testexp(nd, exp); 3196 if (error != 0) 3197 goto out; 3198 } 3199 3200 /* 3201 * Check to see if the file system is exported V4 only. 3202 */ 3203 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { 3204 error = NFSERR_PROGNOTV4; 3205 goto out; 3206 } 3207 3208 /* 3209 * Now, map the user credentials. 3210 * (Note that ND_AUTHNONE will only be set for an NFSv3 3211 * Fsinfo RPC. If set for anything else, this code might need 3212 * to change.) 3213 */ 3214 if (NFSVNO_EXPORTED(exp)) { 3215 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || 3216 NFSVNO_EXPORTANON(exp) || 3217 (nd->nd_flag & ND_AUTHNONE) != 0) { 3218 nd->nd_cred->cr_uid = credanon->cr_uid; 3219 nd->nd_cred->cr_gid = credanon->cr_gid; 3220 crsetgroups(nd->nd_cred, credanon->cr_ngroups, 3221 credanon->cr_groups); 3222 } else if ((nd->nd_flag & ND_GSS) == 0) { 3223 /* 3224 * If using AUTH_SYS, call nfsrv_getgrpscred() to see 3225 * if there is a replacement credential with a group 3226 * list set up by "nfsuserd -manage-gids". 3227 * If there is no replacement, nfsrv_getgrpscred() 3228 * simply returns its argument. 3229 */ 3230 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); 3231 } 3232 } 3233 3234 out: 3235 NFSEXITCODE2(error, nd); 3236 return (error); 3237 } 3238 3239 /* 3240 * Check exports. 3241 */ 3242 int 3243 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, 3244 struct ucred **credp) 3245 { 3246 int error; 3247 3248 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3249 &exp->nes_numsecflavor, exp->nes_secflavors); 3250 if (error) { 3251 if (nfs_rootfhset) { 3252 exp->nes_exflag = 0; 3253 exp->nes_numsecflavor = 0; 3254 error = 0; 3255 } 3256 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3257 MAXSECFLAVORS) { 3258 printf("nfsvno_checkexp: numsecflavors out of range\n"); 3259 exp->nes_numsecflavor = 0; 3260 error = EACCES; 3261 } 3262 NFSEXITCODE(error); 3263 return (error); 3264 } 3265 3266 /* 3267 * Get a vnode for a file handle and export stuff. 3268 */ 3269 int 3270 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, 3271 int lktype, struct vnode **vpp, struct nfsexstuff *exp, 3272 struct ucred **credp) 3273 { 3274 int error; 3275 3276 *credp = NULL; 3277 exp->nes_numsecflavor = 0; 3278 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); 3279 if (error != 0) 3280 /* Make sure the server replies ESTALE to the client. */ 3281 error = ESTALE; 3282 if (nam && !error) { 3283 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, 3284 &exp->nes_numsecflavor, exp->nes_secflavors); 3285 if (error) { 3286 if (nfs_rootfhset) { 3287 exp->nes_exflag = 0; 3288 exp->nes_numsecflavor = 0; 3289 error = 0; 3290 } else { 3291 vput(*vpp); 3292 } 3293 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > 3294 MAXSECFLAVORS) { 3295 printf("nfsvno_fhtovp: numsecflavors out of range\n"); 3296 exp->nes_numsecflavor = 0; 3297 error = EACCES; 3298 vput(*vpp); 3299 } 3300 } 3301 NFSEXITCODE(error); 3302 return (error); 3303 } 3304 3305 /* 3306 * nfsd_fhtovp() - convert a fh to a vnode ptr 3307 * - look up fsid in mount list (if not found ret error) 3308 * - get vp and export rights by calling nfsvno_fhtovp() 3309 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 3310 * for AUTH_SYS 3311 * - if mpp != NULL, return the mount point so that it can 3312 * be used for vn_finished_write() by the caller 3313 */ 3314 void 3315 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, 3316 struct vnode **vpp, struct nfsexstuff *exp, 3317 struct mount **mpp, int startwrite, int nextop) 3318 { 3319 struct mount *mp, *mpw; 3320 struct ucred *credanon; 3321 fhandle_t *fhp; 3322 int error; 3323 3324 if (mpp != NULL) 3325 *mpp = NULL; 3326 *vpp = NULL; 3327 fhp = (fhandle_t *)nfp->nfsrvfh_data; 3328 mp = vfs_busyfs(&fhp->fh_fsid); 3329 if (mp == NULL) { 3330 nd->nd_repstat = ESTALE; 3331 goto out; 3332 } 3333 3334 if (startwrite) { 3335 mpw = mp; 3336 error = vn_start_write(NULL, &mpw, V_WAIT); 3337 if (error != 0) { 3338 mpw = NULL; 3339 vfs_unbusy(mp); 3340 nd->nd_repstat = ESTALE; 3341 goto out; 3342 } 3343 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) 3344 lktype = LK_EXCLUSIVE; 3345 } else 3346 mpw = NULL; 3347 3348 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, 3349 &credanon); 3350 vfs_unbusy(mp); 3351 3352 /* 3353 * For NFSv4 without a pseudo root fs, unexported file handles 3354 * can be returned, so that Lookup works everywhere. 3355 */ 3356 if (!nd->nd_repstat && exp->nes_exflag == 0 && 3357 !(nd->nd_flag & ND_NFSV4)) { 3358 vput(*vpp); 3359 *vpp = NULL; 3360 nd->nd_repstat = EACCES; 3361 } 3362 3363 /* 3364 * Personally, I've never seen any point in requiring a 3365 * reserved port#, since only in the rare case where the 3366 * clients are all boxes with secure system privileges, 3367 * does it provide any enhanced security, but... some people 3368 * believe it to be useful and keep putting this code back in. 3369 * (There is also some "security checker" out there that 3370 * complains if the nfs server doesn't enforce this.) 3371 * However, note the following: 3372 * RFC3530 (NFSv4) specifies that a reserved port# not be 3373 * required. 3374 * RFC2623 recommends that, if a reserved port# is checked for, 3375 * that there be a way to turn that off--> ifdef'd. 3376 */ 3377 #ifdef NFS_REQRSVPORT 3378 if (!nd->nd_repstat) { 3379 struct sockaddr_in *saddr; 3380 struct sockaddr_in6 *saddr6; 3381 3382 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); 3383 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); 3384 if (!(nd->nd_flag & ND_NFSV4) && 3385 ((saddr->sin_family == AF_INET && 3386 ntohs(saddr->sin_port) >= IPPORT_RESERVED) || 3387 (saddr6->sin6_family == AF_INET6 && 3388 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { 3389 vput(*vpp); 3390 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 3391 } 3392 } 3393 #endif /* NFS_REQRSVPORT */ 3394 3395 /* 3396 * Check/setup credentials. 3397 */ 3398 if (!nd->nd_repstat) { 3399 nd->nd_saveduid = nd->nd_cred->cr_uid; 3400 nd->nd_repstat = nfsd_excred(nd, exp, credanon, 3401 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type)); 3402 if (nd->nd_repstat) 3403 vput(*vpp); 3404 } 3405 if (credanon != NULL) 3406 crfree(credanon); 3407 if (nd->nd_repstat) { 3408 vn_finished_write(mpw); 3409 *vpp = NULL; 3410 } else if (mpp != NULL) { 3411 *mpp = mpw; 3412 } 3413 3414 out: 3415 NFSEXITCODE2(0, nd); 3416 } 3417 3418 /* 3419 * glue for fp. 3420 */ 3421 static int 3422 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) 3423 { 3424 struct filedesc *fdp; 3425 struct file *fp; 3426 int error = 0; 3427 3428 fdp = p->td_proc->p_fd; 3429 if (fd < 0 || fd >= fdp->fd_nfiles || 3430 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { 3431 error = EBADF; 3432 goto out; 3433 } 3434 *fpp = fp; 3435 3436 out: 3437 NFSEXITCODE(error); 3438 return (error); 3439 } 3440 3441 /* 3442 * Called from nfssvc() to update the exports list. Just call 3443 * vfs_export(). This has to be done, since the v4 root fake fs isn't 3444 * in the mount list. 3445 */ 3446 int 3447 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) 3448 { 3449 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; 3450 int error = 0; 3451 struct nameidata nd; 3452 fhandle_t fh; 3453 3454 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); 3455 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) 3456 nfs_rootfhset = 0; 3457 else if (error == 0) { 3458 if (nfsexargp->fspec == NULL) { 3459 error = EPERM; 3460 goto out; 3461 } 3462 /* 3463 * If fspec != NULL, this is the v4root path. 3464 */ 3465 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec); 3466 if ((error = namei(&nd)) != 0) 3467 goto out; 3468 error = nfsvno_getfh(nd.ni_vp, &fh, p); 3469 vrele(nd.ni_vp); 3470 if (!error) { 3471 nfs_rootfh.nfsrvfh_len = NFSX_MYFH; 3472 NFSBCOPY((caddr_t)&fh, 3473 nfs_rootfh.nfsrvfh_data, 3474 sizeof (fhandle_t)); 3475 nfs_rootfhset = 1; 3476 } 3477 } 3478 3479 out: 3480 NFSEXITCODE(error); 3481 return (error); 3482 } 3483 3484 /* 3485 * This function needs to test to see if the system is near its limit 3486 * for memory allocation via malloc() or mget() and return True iff 3487 * either of these resources are near their limit. 3488 * XXX (For now, this is just a stub.) 3489 */ 3490 int nfsrv_testmalloclimit = 0; 3491 int 3492 nfsrv_mallocmget_limit(void) 3493 { 3494 static int printmesg = 0; 3495 static int testval = 1; 3496 3497 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { 3498 if ((printmesg++ % 100) == 0) 3499 printf("nfsd: malloc/mget near limit\n"); 3500 return (1); 3501 } 3502 return (0); 3503 } 3504 3505 /* 3506 * BSD specific initialization of a mount point. 3507 */ 3508 void 3509 nfsd_mntinit(void) 3510 { 3511 static int inited = 0; 3512 3513 if (inited) 3514 return; 3515 inited = 1; 3516 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); 3517 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); 3518 TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); 3519 nfsv4root_mnt.mnt_export = NULL; 3520 TAILQ_INIT(&nfsv4root_opt); 3521 TAILQ_INIT(&nfsv4root_newopt); 3522 nfsv4root_mnt.mnt_opt = &nfsv4root_opt; 3523 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; 3524 nfsv4root_mnt.mnt_nvnodelistsize = 0; 3525 nfsv4root_mnt.mnt_lazyvnodelistsize = 0; 3526 } 3527 3528 static void 3529 nfsd_timer(void *arg) 3530 { 3531 3532 nfsrv_servertimer(); 3533 callout_reset_sbt(&nfsd_callout, SBT_1S, SBT_1S, nfsd_timer, NULL, 0); 3534 } 3535 3536 /* 3537 * Get a vnode for a file handle, without checking exports, etc. 3538 */ 3539 struct vnode * 3540 nfsvno_getvp(fhandle_t *fhp) 3541 { 3542 struct mount *mp; 3543 struct vnode *vp; 3544 int error; 3545 3546 mp = vfs_busyfs(&fhp->fh_fsid); 3547 if (mp == NULL) 3548 return (NULL); 3549 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); 3550 vfs_unbusy(mp); 3551 if (error) 3552 return (NULL); 3553 return (vp); 3554 } 3555 3556 /* 3557 * Do a local VOP_ADVLOCK(). 3558 */ 3559 int 3560 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, 3561 u_int64_t end, struct thread *td) 3562 { 3563 int error = 0; 3564 struct flock fl; 3565 u_int64_t tlen; 3566 3567 if (nfsrv_dolocallocks == 0) 3568 goto out; 3569 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); 3570 3571 fl.l_whence = SEEK_SET; 3572 fl.l_type = ftype; 3573 fl.l_start = (off_t)first; 3574 if (end == NFS64BITSSET) { 3575 fl.l_len = 0; 3576 } else { 3577 tlen = end - first; 3578 fl.l_len = (off_t)tlen; 3579 } 3580 /* 3581 * For FreeBSD8, the l_pid and l_sysid must be set to the same 3582 * values for all calls, so that all locks will be held by the 3583 * nfsd server. (The nfsd server handles conflicts between the 3584 * various clients.) 3585 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 3586 * bytes, so it can't be put in l_sysid. 3587 */ 3588 if (nfsv4_sysid == 0) 3589 nfsv4_sysid = nlm_acquire_next_sysid(); 3590 fl.l_pid = (pid_t)0; 3591 fl.l_sysid = (int)nfsv4_sysid; 3592 3593 if (ftype == F_UNLCK) 3594 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, 3595 (F_POSIX | F_REMOTE)); 3596 else 3597 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, 3598 (F_POSIX | F_REMOTE)); 3599 3600 out: 3601 NFSEXITCODE(error); 3602 return (error); 3603 } 3604 3605 /* 3606 * Check the nfsv4 root exports. 3607 */ 3608 int 3609 nfsvno_v4rootexport(struct nfsrv_descript *nd) 3610 { 3611 struct ucred *credanon; 3612 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; 3613 uint64_t exflags; 3614 3615 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, 3616 &credanon, &numsecflavor, secflavors); 3617 if (error) { 3618 error = NFSERR_PROGUNAVAIL; 3619 goto out; 3620 } 3621 if (credanon != NULL) 3622 crfree(credanon); 3623 for (i = 0; i < numsecflavor; i++) { 3624 if (secflavors[i] == AUTH_SYS) 3625 nd->nd_flag |= ND_EXAUTHSYS; 3626 else if (secflavors[i] == RPCSEC_GSS_KRB5) 3627 nd->nd_flag |= ND_EXGSS; 3628 else if (secflavors[i] == RPCSEC_GSS_KRB5I) 3629 nd->nd_flag |= ND_EXGSSINTEGRITY; 3630 else if (secflavors[i] == RPCSEC_GSS_KRB5P) 3631 nd->nd_flag |= ND_EXGSSPRIVACY; 3632 } 3633 3634 /* And set ND_EXxx flags for TLS. */ 3635 if ((exflags & MNT_EXTLS) != 0) { 3636 nd->nd_flag |= ND_EXTLS; 3637 if ((exflags & MNT_EXTLSCERT) != 0) 3638 nd->nd_flag |= ND_EXTLSCERT; 3639 if ((exflags & MNT_EXTLSCERTUSER) != 0) 3640 nd->nd_flag |= ND_EXTLSCERTUSER; 3641 } 3642 3643 out: 3644 NFSEXITCODE(error); 3645 return (error); 3646 } 3647 3648 /* 3649 * Nfs server pseudo system call for the nfsd's 3650 */ 3651 /* 3652 * MPSAFE 3653 */ 3654 static int 3655 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) 3656 { 3657 struct file *fp; 3658 struct nfsd_addsock_args sockarg; 3659 struct nfsd_nfsd_args nfsdarg; 3660 struct nfsd_nfsd_oargs onfsdarg; 3661 struct nfsd_pnfsd_args pnfsdarg; 3662 struct vnode *vp, *nvp, *curdvp; 3663 struct pnfsdsfile *pf; 3664 struct nfsdevice *ds, *fds; 3665 cap_rights_t rights; 3666 int buflen, error, ret; 3667 char *buf, *cp, *cp2, *cp3; 3668 char fname[PNFS_FILENAME_LEN + 1]; 3669 3670 if (uap->flag & NFSSVC_NFSDADDSOCK) { 3671 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); 3672 if (error) 3673 goto out; 3674 /* 3675 * Since we don't know what rights might be required, 3676 * pretend that we need them all. It is better to be too 3677 * careful than too reckless. 3678 */ 3679 error = fget(td, sockarg.sock, 3680 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp); 3681 if (error != 0) 3682 goto out; 3683 if (fp->f_type != DTYPE_SOCKET) { 3684 fdrop(fp, td); 3685 error = EPERM; 3686 goto out; 3687 } 3688 error = nfsrvd_addsock(fp); 3689 fdrop(fp, td); 3690 } else if (uap->flag & NFSSVC_NFSDNFSD) { 3691 if (uap->argp == NULL) { 3692 error = EINVAL; 3693 goto out; 3694 } 3695 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { 3696 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); 3697 if (error == 0) { 3698 nfsdarg.principal = onfsdarg.principal; 3699 nfsdarg.minthreads = onfsdarg.minthreads; 3700 nfsdarg.maxthreads = onfsdarg.maxthreads; 3701 nfsdarg.version = 1; 3702 nfsdarg.addr = NULL; 3703 nfsdarg.addrlen = 0; 3704 nfsdarg.dnshost = NULL; 3705 nfsdarg.dnshostlen = 0; 3706 nfsdarg.dspath = NULL; 3707 nfsdarg.dspathlen = 0; 3708 nfsdarg.mdspath = NULL; 3709 nfsdarg.mdspathlen = 0; 3710 nfsdarg.mirrorcnt = 1; 3711 } 3712 } else 3713 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); 3714 if (error) 3715 goto out; 3716 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && 3717 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && 3718 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && 3719 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && 3720 nfsdarg.mirrorcnt >= 1 && 3721 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && 3722 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && 3723 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { 3724 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" 3725 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, 3726 nfsdarg.dspathlen, nfsdarg.dnshostlen, 3727 nfsdarg.mdspathlen, nfsdarg.mirrorcnt); 3728 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); 3729 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); 3730 if (error != 0) { 3731 free(cp, M_TEMP); 3732 goto out; 3733 } 3734 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ 3735 nfsdarg.addr = cp; 3736 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); 3737 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); 3738 if (error != 0) { 3739 free(nfsdarg.addr, M_TEMP); 3740 free(cp, M_TEMP); 3741 goto out; 3742 } 3743 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ 3744 nfsdarg.dnshost = cp; 3745 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); 3746 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); 3747 if (error != 0) { 3748 free(nfsdarg.addr, M_TEMP); 3749 free(nfsdarg.dnshost, M_TEMP); 3750 free(cp, M_TEMP); 3751 goto out; 3752 } 3753 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ 3754 nfsdarg.dspath = cp; 3755 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); 3756 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); 3757 if (error != 0) { 3758 free(nfsdarg.addr, M_TEMP); 3759 free(nfsdarg.dnshost, M_TEMP); 3760 free(nfsdarg.dspath, M_TEMP); 3761 free(cp, M_TEMP); 3762 goto out; 3763 } 3764 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ 3765 nfsdarg.mdspath = cp; 3766 } else { 3767 nfsdarg.addr = NULL; 3768 nfsdarg.addrlen = 0; 3769 nfsdarg.dnshost = NULL; 3770 nfsdarg.dnshostlen = 0; 3771 nfsdarg.dspath = NULL; 3772 nfsdarg.dspathlen = 0; 3773 nfsdarg.mdspath = NULL; 3774 nfsdarg.mdspathlen = 0; 3775 nfsdarg.mirrorcnt = 1; 3776 } 3777 nfsd_timer(NULL); 3778 error = nfsrvd_nfsd(td, &nfsdarg); 3779 free(nfsdarg.addr, M_TEMP); 3780 free(nfsdarg.dnshost, M_TEMP); 3781 free(nfsdarg.dspath, M_TEMP); 3782 free(nfsdarg.mdspath, M_TEMP); 3783 } else if (uap->flag & NFSSVC_PNFSDS) { 3784 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); 3785 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || 3786 pnfsdarg.op == PNFSDOP_FORCEDELDS)) { 3787 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3788 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, 3789 NULL); 3790 if (error == 0) 3791 error = nfsrv_deldsserver(pnfsdarg.op, cp, td); 3792 free(cp, M_TEMP); 3793 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { 3794 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3795 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; 3796 buf = malloc(buflen, M_TEMP, M_WAITOK); 3797 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, 3798 NULL); 3799 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); 3800 if (error == 0 && pnfsdarg.dspath != NULL) { 3801 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3802 error = copyinstr(pnfsdarg.dspath, cp2, 3803 PATH_MAX + 1, NULL); 3804 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", 3805 error); 3806 } else 3807 cp2 = NULL; 3808 if (error == 0 && pnfsdarg.curdspath != NULL) { 3809 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); 3810 error = copyinstr(pnfsdarg.curdspath, cp3, 3811 PATH_MAX + 1, NULL); 3812 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", 3813 error); 3814 } else 3815 cp3 = NULL; 3816 curdvp = NULL; 3817 fds = NULL; 3818 if (error == 0) 3819 error = nfsrv_mdscopymr(cp, cp2, cp3, buf, 3820 &buflen, fname, td, &vp, &nvp, &pf, &ds, 3821 &fds); 3822 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); 3823 if (error == 0) { 3824 if (pf->dsf_dir >= nfsrv_dsdirsize) { 3825 printf("copymr: dsdir out of range\n"); 3826 pf->dsf_dir = 0; 3827 } 3828 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); 3829 error = nfsrv_copymr(vp, nvp, 3830 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, 3831 (struct pnfsdsfile *)buf, 3832 buflen / sizeof(*pf), td->td_ucred, td); 3833 vput(vp); 3834 vput(nvp); 3835 if (fds != NULL && error == 0) { 3836 curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; 3837 ret = vn_lock(curdvp, LK_EXCLUSIVE); 3838 if (ret == 0) { 3839 nfsrv_dsremove(curdvp, fname, 3840 td->td_ucred, td); 3841 NFSVOPUNLOCK(curdvp); 3842 } 3843 } 3844 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); 3845 } 3846 free(cp, M_TEMP); 3847 free(cp2, M_TEMP); 3848 free(cp3, M_TEMP); 3849 free(buf, M_TEMP); 3850 } 3851 } else { 3852 error = nfssvc_srvcall(td, uap, td->td_ucred); 3853 } 3854 3855 out: 3856 NFSEXITCODE(error); 3857 return (error); 3858 } 3859 3860 static int 3861 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 3862 { 3863 struct nfsex_args export; 3864 struct nfsex_oldargs oexp; 3865 struct file *fp = NULL; 3866 int stablefd, i, len; 3867 struct nfsd_clid adminrevoke; 3868 struct nfsd_dumplist dumplist; 3869 struct nfsd_dumpclients *dumpclients; 3870 struct nfsd_dumplocklist dumplocklist; 3871 struct nfsd_dumplocks *dumplocks; 3872 struct nameidata nd; 3873 vnode_t vp; 3874 int error = EINVAL, igotlock; 3875 struct proc *procp; 3876 gid_t *grps; 3877 static int suspend_nfsd = 0; 3878 3879 if (uap->flag & NFSSVC_PUBLICFH) { 3880 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, 3881 sizeof (fhandle_t)); 3882 error = copyin(uap->argp, 3883 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); 3884 if (!error) 3885 nfs_pubfhset = 1; 3886 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3887 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { 3888 error = copyin(uap->argp,(caddr_t)&export, 3889 sizeof (struct nfsex_args)); 3890 if (!error) { 3891 grps = NULL; 3892 if (export.export.ex_ngroups > NGROUPS_MAX || 3893 export.export.ex_ngroups < 0) 3894 error = EINVAL; 3895 else if (export.export.ex_ngroups > 0) { 3896 grps = malloc(export.export.ex_ngroups * 3897 sizeof(gid_t), M_TEMP, M_WAITOK); 3898 error = copyin(export.export.ex_groups, grps, 3899 export.export.ex_ngroups * sizeof(gid_t)); 3900 export.export.ex_groups = grps; 3901 } else 3902 export.export.ex_groups = NULL; 3903 if (!error) 3904 error = nfsrv_v4rootexport(&export, cred, p); 3905 free(grps, M_TEMP); 3906 } 3907 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == 3908 NFSSVC_V4ROOTEXPORT) { 3909 error = copyin(uap->argp,(caddr_t)&oexp, 3910 sizeof (struct nfsex_oldargs)); 3911 if (!error) { 3912 memset(&export.export, 0, sizeof(export.export)); 3913 export.export.ex_flags = (uint64_t)oexp.export.ex_flags; 3914 export.export.ex_root = oexp.export.ex_root; 3915 export.export.ex_uid = oexp.export.ex_anon.cr_uid; 3916 export.export.ex_ngroups = 3917 oexp.export.ex_anon.cr_ngroups; 3918 export.export.ex_groups = NULL; 3919 if (export.export.ex_ngroups > XU_NGROUPS || 3920 export.export.ex_ngroups < 0) 3921 error = EINVAL; 3922 else if (export.export.ex_ngroups > 0) { 3923 export.export.ex_groups = malloc( 3924 export.export.ex_ngroups * sizeof(gid_t), 3925 M_TEMP, M_WAITOK); 3926 for (i = 0; i < export.export.ex_ngroups; i++) 3927 export.export.ex_groups[i] = 3928 oexp.export.ex_anon.cr_groups[i]; 3929 } 3930 export.export.ex_addr = oexp.export.ex_addr; 3931 export.export.ex_addrlen = oexp.export.ex_addrlen; 3932 export.export.ex_mask = oexp.export.ex_mask; 3933 export.export.ex_masklen = oexp.export.ex_masklen; 3934 export.export.ex_indexfile = oexp.export.ex_indexfile; 3935 export.export.ex_numsecflavors = 3936 oexp.export.ex_numsecflavors; 3937 if (export.export.ex_numsecflavors >= MAXSECFLAVORS || 3938 export.export.ex_numsecflavors < 0) 3939 error = EINVAL; 3940 else { 3941 for (i = 0; i < export.export.ex_numsecflavors; 3942 i++) 3943 export.export.ex_secflavors[i] = 3944 oexp.export.ex_secflavors[i]; 3945 } 3946 export.fspec = oexp.fspec; 3947 if (error == 0) 3948 error = nfsrv_v4rootexport(&export, cred, p); 3949 free(export.export.ex_groups, M_TEMP); 3950 } 3951 } else if (uap->flag & NFSSVC_NOPUBLICFH) { 3952 nfs_pubfhset = 0; 3953 error = 0; 3954 } else if (uap->flag & NFSSVC_STABLERESTART) { 3955 error = copyin(uap->argp, (caddr_t)&stablefd, 3956 sizeof (int)); 3957 if (!error) 3958 error = fp_getfvp(p, stablefd, &fp, &vp); 3959 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) 3960 error = EBADF; 3961 if (!error && newnfs_numnfsd != 0) 3962 error = EPERM; 3963 if (!error) { 3964 nfsrv_stablefirst.nsf_fp = fp; 3965 nfsrv_setupstable(p); 3966 } 3967 } else if (uap->flag & NFSSVC_ADMINREVOKE) { 3968 error = copyin(uap->argp, (caddr_t)&adminrevoke, 3969 sizeof (struct nfsd_clid)); 3970 if (!error) 3971 error = nfsrv_adminrevoke(&adminrevoke, p); 3972 } else if (uap->flag & NFSSVC_DUMPCLIENTS) { 3973 error = copyin(uap->argp, (caddr_t)&dumplist, 3974 sizeof (struct nfsd_dumplist)); 3975 if (!error && (dumplist.ndl_size < 1 || 3976 dumplist.ndl_size > NFSRV_MAXDUMPLIST)) 3977 error = EPERM; 3978 if (!error) { 3979 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; 3980 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 3981 nfsrv_dumpclients(dumpclients, dumplist.ndl_size); 3982 error = copyout(dumpclients, dumplist.ndl_list, len); 3983 free(dumpclients, M_TEMP); 3984 } 3985 } else if (uap->flag & NFSSVC_DUMPLOCKS) { 3986 error = copyin(uap->argp, (caddr_t)&dumplocklist, 3987 sizeof (struct nfsd_dumplocklist)); 3988 if (!error && (dumplocklist.ndllck_size < 1 || 3989 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) 3990 error = EPERM; 3991 if (!error) 3992 error = nfsrv_lookupfilename(&nd, 3993 dumplocklist.ndllck_fname, p); 3994 if (!error) { 3995 len = sizeof (struct nfsd_dumplocks) * 3996 dumplocklist.ndllck_size; 3997 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 3998 nfsrv_dumplocks(nd.ni_vp, dumplocks, 3999 dumplocklist.ndllck_size, p); 4000 vput(nd.ni_vp); 4001 error = copyout(dumplocks, dumplocklist.ndllck_list, 4002 len); 4003 free(dumplocks, M_TEMP); 4004 } 4005 } else if (uap->flag & NFSSVC_BACKUPSTABLE) { 4006 procp = p->td_proc; 4007 PROC_LOCK(procp); 4008 nfsd_master_pid = procp->p_pid; 4009 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); 4010 nfsd_master_start = procp->p_stats->p_start; 4011 nfsd_master_proc = procp; 4012 PROC_UNLOCK(procp); 4013 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { 4014 NFSLOCKV4ROOTMUTEX(); 4015 if (suspend_nfsd == 0) { 4016 /* Lock out all nfsd threads */ 4017 do { 4018 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, 4019 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); 4020 } while (igotlock == 0 && suspend_nfsd == 0); 4021 suspend_nfsd = 1; 4022 } 4023 NFSUNLOCKV4ROOTMUTEX(); 4024 error = 0; 4025 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { 4026 NFSLOCKV4ROOTMUTEX(); 4027 if (suspend_nfsd != 0) { 4028 nfsv4_unlock(&nfsd_suspend_lock, 0); 4029 suspend_nfsd = 0; 4030 } 4031 NFSUNLOCKV4ROOTMUTEX(); 4032 error = 0; 4033 } 4034 4035 NFSEXITCODE(error); 4036 return (error); 4037 } 4038 4039 /* 4040 * Check exports. 4041 * Returns 0 if ok, 1 otherwise. 4042 */ 4043 int 4044 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) 4045 { 4046 int i; 4047 4048 if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || 4049 (NFSVNO_EXTLSCERT(exp) && 4050 (nd->nd_flag & ND_TLSCERT) == 0) || 4051 (NFSVNO_EXTLSCERTUSER(exp) && 4052 (nd->nd_flag & ND_TLSCERTUSER) == 0)) { 4053 if ((nd->nd_flag & ND_NFSV4) != 0) 4054 return (NFSERR_WRONGSEC); 4055 #ifdef notnow 4056 /* There is currently no auth_stat for this. */ 4057 else if ((nd->nd_flag & ND_TLS) == 0) 4058 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS); 4059 else 4060 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST); 4061 #endif 4062 else 4063 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 4064 } 4065 4066 /* 4067 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use 4068 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. 4069 */ 4070 if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO) 4071 return (0); 4072 4073 /* 4074 * This seems odd, but allow the case where the security flavor 4075 * list is empty. This happens when NFSv4 is traversing non-exported 4076 * file systems. Exported file systems should always have a non-empty 4077 * security flavor list. 4078 */ 4079 if (exp->nes_numsecflavor == 0) 4080 return (0); 4081 4082 for (i = 0; i < exp->nes_numsecflavor; i++) { 4083 /* 4084 * The tests for privacy and integrity must be first, 4085 * since ND_GSS is set for everything but AUTH_SYS. 4086 */ 4087 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && 4088 (nd->nd_flag & ND_GSSPRIVACY)) 4089 return (0); 4090 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && 4091 (nd->nd_flag & ND_GSSINTEGRITY)) 4092 return (0); 4093 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && 4094 (nd->nd_flag & ND_GSS)) 4095 return (0); 4096 if (exp->nes_secflavors[i] == AUTH_SYS && 4097 (nd->nd_flag & ND_GSS) == 0) 4098 return (0); 4099 } 4100 if ((nd->nd_flag & ND_NFSV4) != 0) 4101 return (NFSERR_WRONGSEC); 4102 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 4103 } 4104 4105 /* 4106 * Calculate a hash value for the fid in a file handle. 4107 */ 4108 uint32_t 4109 nfsrv_hashfh(fhandle_t *fhp) 4110 { 4111 uint32_t hashval; 4112 4113 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); 4114 return (hashval); 4115 } 4116 4117 /* 4118 * Calculate a hash value for the sessionid. 4119 */ 4120 uint32_t 4121 nfsrv_hashsessionid(uint8_t *sessionid) 4122 { 4123 uint32_t hashval; 4124 4125 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); 4126 return (hashval); 4127 } 4128 4129 /* 4130 * Signal the userland master nfsd to backup the stable restart file. 4131 */ 4132 void 4133 nfsrv_backupstable(void) 4134 { 4135 struct proc *procp; 4136 4137 if (nfsd_master_proc != NULL) { 4138 procp = pfind(nfsd_master_pid); 4139 /* Try to make sure it is the correct process. */ 4140 if (procp == nfsd_master_proc && 4141 procp->p_stats->p_start.tv_sec == 4142 nfsd_master_start.tv_sec && 4143 procp->p_stats->p_start.tv_usec == 4144 nfsd_master_start.tv_usec && 4145 strcmp(procp->p_comm, nfsd_master_comm) == 0) 4146 kern_psignal(procp, SIGUSR2); 4147 else 4148 nfsd_master_proc = NULL; 4149 4150 if (procp != NULL) 4151 PROC_UNLOCK(procp); 4152 } 4153 } 4154 4155 /* 4156 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. 4157 * The arguments are in a structure, so that they can be passed through 4158 * taskqueue for a kernel process to execute this function. 4159 */ 4160 struct nfsrvdscreate { 4161 int done; 4162 int inprog; 4163 struct task tsk; 4164 struct ucred *tcred; 4165 struct vnode *dvp; 4166 NFSPROC_T *p; 4167 struct pnfsdsfile *pf; 4168 int err; 4169 fhandle_t fh; 4170 struct vattr va; 4171 struct vattr createva; 4172 }; 4173 4174 int 4175 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, 4176 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, 4177 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) 4178 { 4179 struct vnode *nvp; 4180 struct nameidata named; 4181 struct vattr va; 4182 char *bufp; 4183 u_long *hashp; 4184 struct nfsnode *np; 4185 struct nfsmount *nmp; 4186 int error; 4187 4188 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, 4189 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); 4190 nfsvno_setpathbuf(&named, &bufp, &hashp); 4191 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; 4192 named.ni_cnd.cn_nameptr = bufp; 4193 if (fnamep != NULL) { 4194 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); 4195 named.ni_cnd.cn_namelen = strlen(bufp); 4196 } else 4197 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); 4198 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); 4199 4200 /* Create the date file in the DS mount. */ 4201 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4202 if (error == 0) { 4203 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); 4204 vref(dvp); 4205 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false); 4206 if (error == 0) { 4207 /* Set the ownership of the file. */ 4208 error = VOP_SETATTR(nvp, nvap, tcred); 4209 NFSD_DEBUG(4, "nfsrv_dscreate:" 4210 " setattr-uid=%d\n", error); 4211 if (error != 0) 4212 vput(nvp); 4213 } 4214 if (error != 0) 4215 printf("pNFS: pnfscreate failed=%d\n", error); 4216 } else 4217 printf("pNFS: pnfscreate vnlock=%d\n", error); 4218 if (error == 0) { 4219 np = VTONFS(nvp); 4220 nmp = VFSTONFS(nvp->v_mount); 4221 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") 4222 != 0 || nmp->nm_nam->sa_len > sizeof( 4223 struct sockaddr_in6) || 4224 np->n_fhp->nfh_len != NFSX_MYFH) { 4225 printf("Bad DS file: fstype=%s salen=%d" 4226 " fhlen=%d\n", 4227 nvp->v_mount->mnt_vfc->vfc_name, 4228 nmp->nm_nam->sa_len, np->n_fhp->nfh_len); 4229 error = ENOENT; 4230 } 4231 4232 /* Set extattrs for the DS on the MDS file. */ 4233 if (error == 0) { 4234 if (dsa != NULL) { 4235 error = VOP_GETATTR(nvp, &va, tcred); 4236 if (error == 0) { 4237 dsa->dsa_filerev = va.va_filerev; 4238 dsa->dsa_size = va.va_size; 4239 dsa->dsa_atime = va.va_atime; 4240 dsa->dsa_mtime = va.va_mtime; 4241 dsa->dsa_bytes = va.va_bytes; 4242 } 4243 } 4244 if (error == 0) { 4245 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, 4246 NFSX_MYFH); 4247 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, 4248 nmp->nm_nam->sa_len); 4249 NFSBCOPY(named.ni_cnd.cn_nameptr, 4250 pf->dsf_filename, 4251 sizeof(pf->dsf_filename)); 4252 } 4253 } else 4254 printf("pNFS: pnfscreate can't get DS" 4255 " attr=%d\n", error); 4256 if (nvpp != NULL && error == 0) 4257 *nvpp = nvp; 4258 else 4259 vput(nvp); 4260 } 4261 nfsvno_relpathbuf(&named); 4262 return (error); 4263 } 4264 4265 /* 4266 * Start up the thread that will execute nfsrv_dscreate(). 4267 */ 4268 static void 4269 start_dscreate(void *arg, int pending) 4270 { 4271 struct nfsrvdscreate *dsc; 4272 4273 dsc = (struct nfsrvdscreate *)arg; 4274 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, 4275 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); 4276 dsc->done = 1; 4277 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); 4278 } 4279 4280 /* 4281 * Create a pNFS data file on the Data Server(s). 4282 */ 4283 static void 4284 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, 4285 NFSPROC_T *p) 4286 { 4287 struct nfsrvdscreate *dsc, *tdsc = NULL; 4288 struct nfsdevice *ds, *tds, *fds; 4289 struct mount *mp; 4290 struct pnfsdsfile *pf, *tpf; 4291 struct pnfsdsattr dsattr; 4292 struct vattr va; 4293 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4294 struct nfsmount *nmp; 4295 fhandle_t fh; 4296 uid_t vauid; 4297 gid_t vagid; 4298 u_short vamode; 4299 struct ucred *tcred; 4300 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; 4301 int failpos, timo; 4302 4303 /* Get a DS server directory in a round-robin order. */ 4304 mirrorcnt = 1; 4305 mp = vp->v_mount; 4306 ds = fds = NULL; 4307 NFSDDSLOCK(); 4308 /* 4309 * Search for the first entry that handles this MDS fs, but use the 4310 * first entry for all MDS fs's otherwise. 4311 */ 4312 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { 4313 if (tds->nfsdev_nmp != NULL) { 4314 if (tds->nfsdev_mdsisset == 0 && ds == NULL) 4315 ds = tds; 4316 else if (tds->nfsdev_mdsisset != 0 && fsidcmp( 4317 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { 4318 ds = fds = tds; 4319 break; 4320 } 4321 } 4322 } 4323 if (ds == NULL) { 4324 NFSDDSUNLOCK(); 4325 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); 4326 return; 4327 } 4328 i = dsdir[0] = ds->nfsdev_nextdir; 4329 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; 4330 dvp[0] = ds->nfsdev_dsdir[i]; 4331 tds = TAILQ_NEXT(ds, nfsdev_list); 4332 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { 4333 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { 4334 if (tds->nfsdev_nmp != NULL && 4335 ((tds->nfsdev_mdsisset == 0 && fds == NULL) || 4336 (tds->nfsdev_mdsisset != 0 && fds != NULL && 4337 fsidcmp(&mp->mnt_stat.f_fsid, 4338 &tds->nfsdev_mdsfsid) == 0))) { 4339 dsdir[mirrorcnt] = i; 4340 dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; 4341 mirrorcnt++; 4342 if (mirrorcnt >= nfsrv_maxpnfsmirror) 4343 break; 4344 } 4345 } 4346 } 4347 /* Put at end of list to implement round-robin usage. */ 4348 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); 4349 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); 4350 NFSDDSUNLOCK(); 4351 dsc = NULL; 4352 if (mirrorcnt > 1) 4353 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, 4354 M_WAITOK | M_ZERO); 4355 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | 4356 M_ZERO); 4357 4358 error = nfsvno_getfh(vp, &fh, p); 4359 if (error == 0) 4360 error = VOP_GETATTR(vp, &va, cred); 4361 if (error == 0) { 4362 /* Set the attributes for "vp" to Setattr the DS vp. */ 4363 vauid = va.va_uid; 4364 vagid = va.va_gid; 4365 vamode = va.va_mode; 4366 VATTR_NULL(&va); 4367 va.va_uid = vauid; 4368 va.va_gid = vagid; 4369 va.va_mode = vamode; 4370 va.va_size = 0; 4371 } else 4372 printf("pNFS: pnfscreate getfh+attr=%d\n", error); 4373 4374 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, 4375 cred->cr_gid); 4376 /* Make data file name based on FH. */ 4377 tcred = newnfs_getcred(); 4378 4379 /* 4380 * Create the file on each DS mirror, using kernel process(es) for the 4381 * additional mirrors. 4382 */ 4383 failpos = -1; 4384 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { 4385 tpf->dsf_dir = dsdir[i]; 4386 tdsc->tcred = tcred; 4387 tdsc->p = p; 4388 tdsc->pf = tpf; 4389 tdsc->createva = *vap; 4390 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); 4391 tdsc->va = va; 4392 tdsc->dvp = dvp[i]; 4393 tdsc->done = 0; 4394 tdsc->inprog = 0; 4395 tdsc->err = 0; 4396 ret = EIO; 4397 if (nfs_pnfsiothreads != 0) { 4398 ret = nfs_pnfsio(start_dscreate, tdsc); 4399 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); 4400 } 4401 if (ret != 0) { 4402 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, 4403 NULL, tcred, p, NULL); 4404 if (ret != 0) { 4405 KASSERT(error == 0, ("nfsrv_dscreate err=%d", 4406 error)); 4407 if (failpos == -1 && nfsds_failerr(ret)) 4408 failpos = i; 4409 else 4410 error = ret; 4411 } 4412 } 4413 } 4414 if (error == 0) { 4415 tpf->dsf_dir = dsdir[mirrorcnt - 1]; 4416 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, 4417 &dsattr, NULL, tcred, p, NULL); 4418 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { 4419 failpos = mirrorcnt - 1; 4420 error = 0; 4421 } 4422 } 4423 timo = hz / 50; /* Wait for 20msec. */ 4424 if (timo < 1) 4425 timo = 1; 4426 /* Wait for kernel task(s) to complete. */ 4427 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { 4428 while (tdsc->inprog != 0 && tdsc->done == 0) 4429 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); 4430 if (tdsc->err != 0) { 4431 if (failpos == -1 && nfsds_failerr(tdsc->err)) 4432 failpos = i; 4433 else if (error == 0) 4434 error = tdsc->err; 4435 } 4436 } 4437 4438 /* 4439 * If failpos has been set, that mirror has failed, so it needs 4440 * to be disabled. 4441 */ 4442 if (failpos >= 0) { 4443 nmp = VFSTONFS(dvp[failpos]->v_mount); 4444 NFSLOCKMNT(nmp); 4445 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4446 NFSMNTP_CANCELRPCS)) == 0) { 4447 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4448 NFSUNLOCKMNT(nmp); 4449 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4450 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, 4451 ds); 4452 if (ds != NULL) 4453 nfsrv_killrpcs(nmp); 4454 NFSLOCKMNT(nmp); 4455 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4456 wakeup(nmp); 4457 } 4458 NFSUNLOCKMNT(nmp); 4459 } 4460 4461 NFSFREECRED(tcred); 4462 if (error == 0) { 4463 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); 4464 4465 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", 4466 mirrorcnt, nfsrv_maxpnfsmirror); 4467 /* 4468 * For all mirrors that couldn't be created, fill in the 4469 * *pf structure, but with an IP address == 0.0.0.0. 4470 */ 4471 tpf = pf + mirrorcnt; 4472 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { 4473 *tpf = *pf; 4474 tpf->dsf_sin.sin_family = AF_INET; 4475 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); 4476 tpf->dsf_sin.sin_addr.s_addr = 0; 4477 tpf->dsf_sin.sin_port = 0; 4478 } 4479 4480 error = vn_extattr_set(vp, IO_NODELOCKED, 4481 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", 4482 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); 4483 if (error == 0) 4484 error = vn_extattr_set(vp, IO_NODELOCKED, 4485 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", 4486 sizeof(dsattr), (char *)&dsattr, p); 4487 if (error != 0) 4488 printf("pNFS: pnfscreate setextattr=%d\n", 4489 error); 4490 } else 4491 printf("pNFS: pnfscreate=%d\n", error); 4492 free(pf, M_TEMP); 4493 free(dsc, M_TEMP); 4494 } 4495 4496 /* 4497 * Get the information needed to remove the pNFS Data Server file from the 4498 * Metadata file. Upon success, ddvp is set non-NULL to the locked 4499 * DS directory vnode. The caller must unlock *ddvp when done with it. 4500 */ 4501 static void 4502 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, 4503 int *mirrorcntp, char *fname, fhandle_t *fhp) 4504 { 4505 struct vattr va; 4506 struct ucred *tcred; 4507 char *buf; 4508 int buflen, error; 4509 4510 dvpp[0] = NULL; 4511 /* If not an exported regular file or not a pNFS server, just return. */ 4512 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4513 nfsrv_devidcnt == 0) 4514 return; 4515 4516 /* Check to see if this is the last hard link. */ 4517 tcred = newnfs_getcred(); 4518 error = VOP_GETATTR(vp, &va, tcred); 4519 NFSFREECRED(tcred); 4520 if (error != 0) { 4521 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); 4522 return; 4523 } 4524 if (va.va_nlink > 1) 4525 return; 4526 4527 error = nfsvno_getfh(vp, fhp, p); 4528 if (error != 0) { 4529 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); 4530 return; 4531 } 4532 4533 buflen = 1024; 4534 buf = malloc(buflen, M_TEMP, M_WAITOK); 4535 /* Get the directory vnode for the DS mount and the file handle. */ 4536 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, 4537 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); 4538 free(buf, M_TEMP); 4539 if (error != 0) 4540 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); 4541 } 4542 4543 /* 4544 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. 4545 * The arguments are in a structure, so that they can be passed through 4546 * taskqueue for a kernel process to execute this function. 4547 */ 4548 struct nfsrvdsremove { 4549 int done; 4550 int inprog; 4551 struct task tsk; 4552 struct ucred *tcred; 4553 struct vnode *dvp; 4554 NFSPROC_T *p; 4555 int err; 4556 char fname[PNFS_FILENAME_LEN + 1]; 4557 }; 4558 4559 static int 4560 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, 4561 NFSPROC_T *p) 4562 { 4563 struct nameidata named; 4564 struct vnode *nvp; 4565 char *bufp; 4566 u_long *hashp; 4567 int error; 4568 4569 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); 4570 if (error != 0) 4571 return (error); 4572 named.ni_cnd.cn_nameiop = DELETE; 4573 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 4574 named.ni_cnd.cn_cred = tcred; 4575 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 4576 nfsvno_setpathbuf(&named, &bufp, &hashp); 4577 named.ni_cnd.cn_nameptr = bufp; 4578 named.ni_cnd.cn_namelen = strlen(fname); 4579 strlcpy(bufp, fname, NAME_MAX); 4580 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); 4581 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 4582 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); 4583 if (error == 0) { 4584 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); 4585 vput(nvp); 4586 } 4587 NFSVOPUNLOCK(dvp); 4588 nfsvno_relpathbuf(&named); 4589 if (error != 0) 4590 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); 4591 return (error); 4592 } 4593 4594 /* 4595 * Start up the thread that will execute nfsrv_dsremove(). 4596 */ 4597 static void 4598 start_dsremove(void *arg, int pending) 4599 { 4600 struct nfsrvdsremove *dsrm; 4601 4602 dsrm = (struct nfsrvdsremove *)arg; 4603 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, 4604 dsrm->p); 4605 dsrm->done = 1; 4606 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); 4607 } 4608 4609 /* 4610 * Remove a pNFS data file from a Data Server. 4611 * nfsrv_pnfsremovesetup() must have been called before the MDS file was 4612 * removed to set up the dvp and fill in the FH. 4613 */ 4614 static void 4615 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, 4616 NFSPROC_T *p) 4617 { 4618 struct ucred *tcred; 4619 struct nfsrvdsremove *dsrm, *tdsrm; 4620 struct nfsdevice *ds; 4621 struct nfsmount *nmp; 4622 int failpos, i, ret, timo; 4623 4624 tcred = newnfs_getcred(); 4625 dsrm = NULL; 4626 if (mirrorcnt > 1) 4627 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); 4628 /* 4629 * Remove the file on each DS mirror, using kernel process(es) for the 4630 * additional mirrors. 4631 */ 4632 failpos = -1; 4633 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4634 tdsrm->tcred = tcred; 4635 tdsrm->p = p; 4636 tdsrm->dvp = dvp[i]; 4637 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); 4638 tdsrm->inprog = 0; 4639 tdsrm->done = 0; 4640 tdsrm->err = 0; 4641 ret = EIO; 4642 if (nfs_pnfsiothreads != 0) { 4643 ret = nfs_pnfsio(start_dsremove, tdsrm); 4644 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); 4645 } 4646 if (ret != 0) { 4647 ret = nfsrv_dsremove(dvp[i], fname, tcred, p); 4648 if (failpos == -1 && nfsds_failerr(ret)) 4649 failpos = i; 4650 } 4651 } 4652 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); 4653 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) 4654 failpos = mirrorcnt - 1; 4655 timo = hz / 50; /* Wait for 20msec. */ 4656 if (timo < 1) 4657 timo = 1; 4658 /* Wait for kernel task(s) to complete. */ 4659 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { 4660 while (tdsrm->inprog != 0 && tdsrm->done == 0) 4661 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); 4662 if (failpos == -1 && nfsds_failerr(tdsrm->err)) 4663 failpos = i; 4664 } 4665 4666 /* 4667 * If failpos has been set, that mirror has failed, so it needs 4668 * to be disabled. 4669 */ 4670 if (failpos >= 0) { 4671 nmp = VFSTONFS(dvp[failpos]->v_mount); 4672 NFSLOCKMNT(nmp); 4673 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | 4674 NFSMNTP_CANCELRPCS)) == 0) { 4675 nmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4676 NFSUNLOCKMNT(nmp); 4677 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); 4678 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, 4679 ds); 4680 if (ds != NULL) 4681 nfsrv_killrpcs(nmp); 4682 NFSLOCKMNT(nmp); 4683 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4684 wakeup(nmp); 4685 } 4686 NFSUNLOCKMNT(nmp); 4687 } 4688 4689 /* Get rid all layouts for the file. */ 4690 nfsrv_freefilelayouts(fhp); 4691 4692 NFSFREECRED(tcred); 4693 free(dsrm, M_TEMP); 4694 } 4695 4696 /* 4697 * Generate a file name based on the file handle and put it in *bufp. 4698 * Return the number of bytes generated. 4699 */ 4700 static int 4701 nfsrv_putfhname(fhandle_t *fhp, char *bufp) 4702 { 4703 int i; 4704 uint8_t *cp; 4705 const uint8_t *hexdigits = "0123456789abcdef"; 4706 4707 cp = (uint8_t *)fhp; 4708 for (i = 0; i < sizeof(*fhp); i++) { 4709 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; 4710 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; 4711 } 4712 bufp[2 * i] = '\0'; 4713 return (2 * i); 4714 } 4715 4716 /* 4717 * Update the Metadata file's attributes from the DS file when a Read/Write 4718 * layout is returned. 4719 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN 4720 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. 4721 */ 4722 int 4723 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 4724 { 4725 struct ucred *tcred; 4726 int error; 4727 4728 /* Do this as root so that it won't fail with EACCES. */ 4729 tcred = newnfs_getcred(); 4730 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, 4731 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); 4732 NFSFREECRED(tcred); 4733 return (error); 4734 } 4735 4736 /* 4737 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. 4738 */ 4739 static int 4740 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, 4741 NFSPROC_T *p) 4742 { 4743 int error; 4744 4745 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, 4746 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); 4747 return (error); 4748 } 4749 4750 static int 4751 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, 4752 struct thread *p, int ioproc, struct mbuf **mpp, char *cp, 4753 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, 4754 off_t *offp, int content, bool *eofp) 4755 { 4756 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; 4757 fhandle_t fh[NFSDEV_MAXMIRRORS]; 4758 struct vnode *dvp[NFSDEV_MAXMIRRORS]; 4759 struct nfsdevice *ds; 4760 struct pnfsdsattr dsattr; 4761 struct opnfsdsattr odsattr; 4762 char *buf; 4763 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; 4764 4765 NFSD_DEBUG(4, "in nfsrv_proxyds\n"); 4766 /* 4767 * If not a regular file, not exported or not a pNFS server, 4768 * just return ENOENT. 4769 */ 4770 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || 4771 nfsrv_devidcnt == 0) 4772 return (ENOENT); 4773 4774 buflen = 1024; 4775 buf = malloc(buflen, M_TEMP, M_WAITOK); 4776 error = 0; 4777 4778 /* 4779 * For Getattr, get the Change attribute (va_filerev) and size (va_size) 4780 * from the MetaData file's extended attribute. 4781 */ 4782 if (ioproc == NFSPROC_GETATTR) { 4783 error = vn_extattr_get(vp, IO_NODELOCKED, 4784 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, 4785 p); 4786 if (error == 0) { 4787 if (buflen == sizeof(odsattr)) { 4788 NFSBCOPY(buf, &odsattr, buflen); 4789 nap->na_filerev = odsattr.dsa_filerev; 4790 nap->na_size = odsattr.dsa_size; 4791 nap->na_atime = odsattr.dsa_atime; 4792 nap->na_mtime = odsattr.dsa_mtime; 4793 /* 4794 * Fake na_bytes by rounding up na_size. 4795 * Since we don't know the block size, just 4796 * use BLKDEV_IOSIZE. 4797 */ 4798 nap->na_bytes = (odsattr.dsa_size + 4799 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); 4800 } else if (buflen == sizeof(dsattr)) { 4801 NFSBCOPY(buf, &dsattr, buflen); 4802 nap->na_filerev = dsattr.dsa_filerev; 4803 nap->na_size = dsattr.dsa_size; 4804 nap->na_atime = dsattr.dsa_atime; 4805 nap->na_mtime = dsattr.dsa_mtime; 4806 nap->na_bytes = dsattr.dsa_bytes; 4807 } else 4808 error = ENXIO; 4809 } 4810 if (error == 0) { 4811 /* 4812 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() 4813 * returns 0, just return now. nfsrv_checkdsattr() 4814 * returns 0 if there is no Read/Write layout 4815 * plus either an Open/Write_access or Write 4816 * delegation issued to a client for the file. 4817 */ 4818 if (nfsrv_pnfsgetdsattr == 0 || 4819 nfsrv_checkdsattr(vp, p) == 0) { 4820 free(buf, M_TEMP); 4821 return (error); 4822 } 4823 } 4824 4825 /* 4826 * Clear ENOATTR so the code below will attempt to do a 4827 * nfsrv_getattrdsrpc() to get the attributes and (re)create 4828 * the extended attribute. 4829 */ 4830 if (error == ENOATTR) 4831 error = 0; 4832 } 4833 4834 origmircnt = -1; 4835 trycnt = 0; 4836 tryagain: 4837 if (error == 0) { 4838 buflen = 1024; 4839 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == 4840 LK_EXCLUSIVE) 4841 printf("nfsrv_proxyds: Readds vp exclusively locked\n"); 4842 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, 4843 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, 4844 NULL, NULL); 4845 if (error == 0) { 4846 for (i = 0; i < mirrorcnt; i++) 4847 nmp[i] = VFSTONFS(dvp[i]->v_mount); 4848 } else 4849 printf("pNFS: proxy getextattr sockaddr=%d\n", error); 4850 } else 4851 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); 4852 if (error == 0) { 4853 failpos = -1; 4854 if (origmircnt == -1) 4855 origmircnt = mirrorcnt; 4856 /* 4857 * If failpos is set to a mirror#, then that mirror has 4858 * failed and will be disabled. For Read, Getattr and Seek, the 4859 * function only tries one mirror, so if that mirror has 4860 * failed, it will need to be retried. As such, increment 4861 * tryitagain for these cases. 4862 * For Write, Setattr and Setacl, the function tries all 4863 * mirrors and will not return an error for the case where 4864 * one mirror has failed. For these cases, the functioning 4865 * mirror(s) will have been modified, so a retry isn't 4866 * necessary. These functions will set failpos for the 4867 * failed mirror#. 4868 */ 4869 if (ioproc == NFSPROC_READDS) { 4870 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], 4871 mpp, mpp2); 4872 if (nfsds_failerr(error) && mirrorcnt > 1) { 4873 /* 4874 * Setting failpos will cause the mirror 4875 * to be disabled and then a retry of this 4876 * read is required. 4877 */ 4878 failpos = 0; 4879 error = 0; 4880 trycnt++; 4881 } 4882 } else if (ioproc == NFSPROC_WRITEDS) 4883 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, 4884 &nmp[0], mirrorcnt, mpp, cp, &failpos); 4885 else if (ioproc == NFSPROC_SETATTR) 4886 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], 4887 mirrorcnt, nap, &failpos); 4888 else if (ioproc == NFSPROC_SETACL) 4889 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], 4890 mirrorcnt, aclp, &failpos); 4891 else if (ioproc == NFSPROC_SEEKDS) { 4892 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, 4893 p, nmp[0]); 4894 if (nfsds_failerr(error) && mirrorcnt > 1) { 4895 /* 4896 * Setting failpos will cause the mirror 4897 * to be disabled and then a retry of this 4898 * read is required. 4899 */ 4900 failpos = 0; 4901 error = 0; 4902 trycnt++; 4903 } 4904 } else if (ioproc == NFSPROC_ALLOCATE) 4905 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, 4906 &nmp[0], mirrorcnt, &failpos); 4907 else if (ioproc == NFSPROC_DEALLOCATE) 4908 error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p, 4909 vp, &nmp[0], mirrorcnt, &failpos); 4910 else { 4911 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, 4912 vp, nmp[mirrorcnt - 1], nap); 4913 if (nfsds_failerr(error) && mirrorcnt > 1) { 4914 /* 4915 * Setting failpos will cause the mirror 4916 * to be disabled and then a retry of this 4917 * getattr is required. 4918 */ 4919 failpos = mirrorcnt - 1; 4920 error = 0; 4921 trycnt++; 4922 } 4923 } 4924 ds = NULL; 4925 if (failpos >= 0) { 4926 failnmp = nmp[failpos]; 4927 NFSLOCKMNT(failnmp); 4928 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | 4929 NFSMNTP_CANCELRPCS)) == 0) { 4930 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; 4931 NFSUNLOCKMNT(failnmp); 4932 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, 4933 failnmp, p); 4934 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", 4935 failpos, ds); 4936 if (ds != NULL) 4937 nfsrv_killrpcs(failnmp); 4938 NFSLOCKMNT(failnmp); 4939 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 4940 wakeup(failnmp); 4941 } 4942 NFSUNLOCKMNT(failnmp); 4943 } 4944 for (i = 0; i < mirrorcnt; i++) 4945 NFSVOPUNLOCK(dvp[i]); 4946 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, 4947 trycnt); 4948 /* Try the Read/Getattr again if a mirror was deleted. */ 4949 if (ds != NULL && trycnt > 0 && trycnt < origmircnt) 4950 goto tryagain; 4951 } else { 4952 /* Return ENOENT for any Extended Attribute error. */ 4953 error = ENOENT; 4954 } 4955 free(buf, M_TEMP); 4956 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); 4957 return (error); 4958 } 4959 4960 /* 4961 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended 4962 * attribute. 4963 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs 4964 * to be checked. If it points to a NULL nmp, then it returns 4965 * a suitable destination. 4966 * curnmp - If non-NULL, it is the source mount for the copy. 4967 */ 4968 int 4969 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, 4970 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, 4971 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, 4972 struct nfsmount *curnmp, int *ippos, int *dsdirp) 4973 { 4974 struct vnode *dvp, *nvp = NULL, **tdvpp; 4975 struct mount *mp; 4976 struct nfsmount *nmp, *newnmp; 4977 struct sockaddr *sad; 4978 struct sockaddr_in *sin; 4979 struct nfsdevice *ds, *tds, *fndds; 4980 struct pnfsdsfile *pf; 4981 uint32_t dsdir; 4982 int error, fhiszero, fnd, gotone, i, mirrorcnt; 4983 4984 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); 4985 *mirrorcntp = 1; 4986 tdvpp = dvpp; 4987 if (nvpp != NULL) 4988 *nvpp = NULL; 4989 if (dvpp != NULL) 4990 *dvpp = NULL; 4991 if (ippos != NULL) 4992 *ippos = -1; 4993 if (newnmpp != NULL) 4994 newnmp = *newnmpp; 4995 else 4996 newnmp = NULL; 4997 mp = vp->v_mount; 4998 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 4999 "pnfsd.dsfile", buflenp, buf, p); 5000 mirrorcnt = *buflenp / sizeof(*pf); 5001 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || 5002 *buflenp != sizeof(*pf) * mirrorcnt)) 5003 error = ENOATTR; 5004 5005 pf = (struct pnfsdsfile *)buf; 5006 /* If curnmp != NULL, check for a match in the mirror list. */ 5007 if (curnmp != NULL && error == 0) { 5008 fnd = 0; 5009 for (i = 0; i < mirrorcnt; i++, pf++) { 5010 sad = (struct sockaddr *)&pf->dsf_sin; 5011 if (nfsaddr2_match(sad, curnmp->nm_nam)) { 5012 if (ippos != NULL) 5013 *ippos = i; 5014 fnd = 1; 5015 break; 5016 } 5017 } 5018 if (fnd == 0) 5019 error = ENXIO; 5020 } 5021 5022 gotone = 0; 5023 pf = (struct pnfsdsfile *)buf; 5024 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, 5025 error); 5026 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { 5027 fhiszero = 0; 5028 sad = (struct sockaddr *)&pf->dsf_sin; 5029 sin = &pf->dsf_sin; 5030 dsdir = pf->dsf_dir; 5031 if (dsdir >= nfsrv_dsdirsize) { 5032 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); 5033 error = ENOATTR; 5034 } else if (nvpp != NULL && newnmp != NULL && 5035 nfsaddr2_match(sad, newnmp->nm_nam)) 5036 error = EEXIST; 5037 if (error == 0) { 5038 if (ippos != NULL && curnmp == NULL && 5039 sad->sa_family == AF_INET && 5040 sin->sin_addr.s_addr == 0) 5041 *ippos = i; 5042 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) 5043 fhiszero = 1; 5044 /* Use the socket address to find the mount point. */ 5045 fndds = NULL; 5046 NFSDDSLOCK(); 5047 /* Find a match for the IP address. */ 5048 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 5049 if (ds->nfsdev_nmp != NULL) { 5050 dvp = ds->nfsdev_dvp; 5051 nmp = VFSTONFS(dvp->v_mount); 5052 if (nmp != ds->nfsdev_nmp) 5053 printf("different2 nmp %p %p\n", 5054 nmp, ds->nfsdev_nmp); 5055 if (nfsaddr2_match(sad, nmp->nm_nam)) { 5056 fndds = ds; 5057 break; 5058 } 5059 } 5060 } 5061 if (fndds != NULL && newnmpp != NULL && 5062 newnmp == NULL) { 5063 /* Search for a place to make a mirror copy. */ 5064 TAILQ_FOREACH(tds, &nfsrv_devidhead, 5065 nfsdev_list) { 5066 if (tds->nfsdev_nmp != NULL && 5067 fndds != tds && 5068 ((tds->nfsdev_mdsisset == 0 && 5069 fndds->nfsdev_mdsisset == 0) || 5070 (tds->nfsdev_mdsisset != 0 && 5071 fndds->nfsdev_mdsisset != 0 && 5072 fsidcmp(&tds->nfsdev_mdsfsid, 5073 &mp->mnt_stat.f_fsid) == 0))) { 5074 *newnmpp = tds->nfsdev_nmp; 5075 break; 5076 } 5077 } 5078 if (tds != NULL) { 5079 /* 5080 * Move this entry to the end of the 5081 * list, so it won't be selected as 5082 * easily the next time. 5083 */ 5084 TAILQ_REMOVE(&nfsrv_devidhead, tds, 5085 nfsdev_list); 5086 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, 5087 nfsdev_list); 5088 } 5089 } 5090 NFSDDSUNLOCK(); 5091 if (fndds != NULL) { 5092 dvp = fndds->nfsdev_dsdir[dsdir]; 5093 if (lktype != 0 || fhiszero != 0 || 5094 (nvpp != NULL && *nvpp == NULL)) { 5095 if (fhiszero != 0) 5096 error = vn_lock(dvp, 5097 LK_EXCLUSIVE); 5098 else if (lktype != 0) 5099 error = vn_lock(dvp, lktype); 5100 else 5101 error = vn_lock(dvp, LK_SHARED); 5102 /* 5103 * If the file handle is all 0's, try to 5104 * do a Lookup against the DS to acquire 5105 * it. 5106 * If dvpp == NULL or the Lookup fails, 5107 * unlock dvp after the call. 5108 */ 5109 if (error == 0 && (fhiszero != 0 || 5110 (nvpp != NULL && *nvpp == NULL))) { 5111 error = nfsrv_pnfslookupds(vp, 5112 dvp, pf, &nvp, p); 5113 if (error == 0) { 5114 if (fhiszero != 0) 5115 nfsrv_pnfssetfh( 5116 vp, pf, 5117 devid, 5118 fnamep, 5119 nvp, p); 5120 if (nvpp != NULL && 5121 *nvpp == NULL) { 5122 *nvpp = nvp; 5123 *dsdirp = dsdir; 5124 } else 5125 vput(nvp); 5126 } 5127 if (error != 0 || lktype == 0) 5128 NFSVOPUNLOCK(dvp); 5129 } 5130 } 5131 if (error == 0) { 5132 gotone++; 5133 NFSD_DEBUG(4, "gotone=%d\n", gotone); 5134 if (devid != NULL) { 5135 NFSBCOPY(fndds->nfsdev_deviceid, 5136 devid, NFSX_V4DEVICEID); 5137 devid += NFSX_V4DEVICEID; 5138 } 5139 if (dvpp != NULL) 5140 *tdvpp++ = dvp; 5141 if (fhp != NULL) 5142 NFSBCOPY(&pf->dsf_fh, fhp++, 5143 NFSX_MYFH); 5144 if (fnamep != NULL && gotone == 1) 5145 strlcpy(fnamep, 5146 pf->dsf_filename, 5147 sizeof(pf->dsf_filename)); 5148 } else 5149 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " 5150 "err=%d\n", error); 5151 } 5152 } 5153 } 5154 if (error == 0 && gotone == 0) 5155 error = ENOENT; 5156 5157 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, 5158 error); 5159 if (error == 0) 5160 *mirrorcntp = gotone; 5161 else { 5162 if (gotone > 0 && dvpp != NULL) { 5163 /* 5164 * If the error didn't occur on the first one and 5165 * dvpp != NULL, the one(s) prior to the failure will 5166 * have locked dvp's that need to be unlocked. 5167 */ 5168 for (i = 0; i < gotone; i++) { 5169 NFSVOPUNLOCK(*dvpp); 5170 *dvpp++ = NULL; 5171 } 5172 } 5173 /* 5174 * If it found the vnode to be copied from before a failure, 5175 * it needs to be vput()'d. 5176 */ 5177 if (nvpp != NULL && *nvpp != NULL) { 5178 vput(*nvpp); 5179 *nvpp = NULL; 5180 } 5181 } 5182 return (error); 5183 } 5184 5185 /* 5186 * Set the extended attribute for the Change attribute. 5187 */ 5188 static int 5189 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) 5190 { 5191 struct pnfsdsattr dsattr; 5192 int error; 5193 5194 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); 5195 dsattr.dsa_filerev = nap->na_filerev; 5196 dsattr.dsa_size = nap->na_size; 5197 dsattr.dsa_atime = nap->na_atime; 5198 dsattr.dsa_mtime = nap->na_mtime; 5199 dsattr.dsa_bytes = nap->na_bytes; 5200 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, 5201 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); 5202 if (error != 0) 5203 printf("pNFS: setextattr=%d\n", error); 5204 return (error); 5205 } 5206 5207 static int 5208 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5209 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) 5210 { 5211 uint32_t *tl; 5212 struct nfsrv_descript *nd; 5213 nfsv4stateid_t st; 5214 struct mbuf *m, *m2; 5215 int error = 0, retlen, tlen, trimlen; 5216 5217 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); 5218 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5219 *mpp = NULL; 5220 /* 5221 * Use a stateid where other is an alternating 01010 pattern and 5222 * seqid is 0xffffffff. This value is not defined as special by 5223 * the RFC and is used by the FreeBSD NFS server to indicate an 5224 * MDS->DS proxy operation. 5225 */ 5226 st.other[0] = 0x55555555; 5227 st.other[1] = 0x55555555; 5228 st.other[2] = 0x55555555; 5229 st.seqid = 0xffffffff; 5230 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5231 NULL, NULL, 0, 0, cred); 5232 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5233 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); 5234 txdr_hyper(off, tl); 5235 *(tl + 2) = txdr_unsigned(len); 5236 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5237 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5238 if (error != 0) { 5239 free(nd, M_TEMP); 5240 return (error); 5241 } 5242 if (nd->nd_repstat == 0) { 5243 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 5244 NFSM_STRSIZ(retlen, len); 5245 if (retlen > 0) { 5246 /* Trim off the pre-data XDR from the mbuf chain. */ 5247 m = nd->nd_mrep; 5248 while (m != NULL && m != nd->nd_md) { 5249 if (m->m_next == nd->nd_md) { 5250 m->m_next = NULL; 5251 m_freem(nd->nd_mrep); 5252 nd->nd_mrep = m = nd->nd_md; 5253 } else 5254 m = m->m_next; 5255 } 5256 if (m == NULL) { 5257 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5258 error = ENOENT; 5259 goto nfsmout; 5260 } 5261 5262 /* 5263 * Now, adjust first mbuf so that any XDR before the 5264 * read data is skipped over. 5265 */ 5266 trimlen = nd->nd_dpos - mtod(m, char *); 5267 if (trimlen > 0) { 5268 m->m_len -= trimlen; 5269 NFSM_DATAP(m, trimlen); 5270 } 5271 5272 /* 5273 * Truncate the mbuf chain at retlen bytes of data, 5274 * plus XDR padding that brings the length up to a 5275 * multiple of 4. 5276 */ 5277 tlen = NFSM_RNDUP(retlen); 5278 do { 5279 if (m->m_len >= tlen) { 5280 m->m_len = tlen; 5281 tlen = 0; 5282 m2 = m->m_next; 5283 m->m_next = NULL; 5284 m_freem(m2); 5285 break; 5286 } 5287 tlen -= m->m_len; 5288 m = m->m_next; 5289 } while (m != NULL); 5290 if (tlen > 0) { 5291 printf("nfsrv_readdsrpc: busted mbuf list\n"); 5292 error = ENOENT; 5293 goto nfsmout; 5294 } 5295 *mpp = nd->nd_mrep; 5296 *mpendp = m; 5297 nd->nd_mrep = NULL; 5298 } 5299 } else 5300 error = nd->nd_repstat; 5301 nfsmout: 5302 /* If nd->nd_mrep is already NULL, this is a no-op. */ 5303 m_freem(nd->nd_mrep); 5304 free(nd, M_TEMP); 5305 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); 5306 return (error); 5307 } 5308 5309 /* 5310 * Do a write RPC on a DS data file, using this structure for the arguments, 5311 * so that this function can be executed by a separate kernel process. 5312 */ 5313 struct nfsrvwritedsdorpc { 5314 int done; 5315 int inprog; 5316 struct task tsk; 5317 fhandle_t fh; 5318 off_t off; 5319 int len; 5320 struct nfsmount *nmp; 5321 struct ucred *cred; 5322 NFSPROC_T *p; 5323 struct mbuf *m; 5324 int err; 5325 }; 5326 5327 static int 5328 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, 5329 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) 5330 { 5331 uint32_t *tl; 5332 struct nfsrv_descript *nd; 5333 nfsattrbit_t attrbits; 5334 nfsv4stateid_t st; 5335 int commit, error, retlen; 5336 5337 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5338 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, 5339 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5340 5341 /* 5342 * Use a stateid where other is an alternating 01010 pattern and 5343 * seqid is 0xffffffff. This value is not defined as special by 5344 * the RFC and is used by the FreeBSD NFS server to indicate an 5345 * MDS->DS proxy operation. 5346 */ 5347 st.other[0] = 0x55555555; 5348 st.other[1] = 0x55555555; 5349 st.other[2] = 0x55555555; 5350 st.seqid = 0xffffffff; 5351 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5352 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); 5353 txdr_hyper(off, tl); 5354 tl += 2; 5355 /* 5356 * Do all writes FileSync, since the server doesn't hold onto dirty 5357 * buffers. Since clients should be accessing the DS servers directly 5358 * using the pNFS layouts, this just needs to work correctly as a 5359 * fallback. 5360 */ 5361 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); 5362 *tl = txdr_unsigned(len); 5363 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); 5364 5365 /* Put data in mbuf chain. */ 5366 nd->nd_mb->m_next = m; 5367 5368 /* Set nd_mb and nd_bpos to end of data. */ 5369 while (m->m_next != NULL) 5370 m = m->m_next; 5371 nd->nd_mb = m; 5372 nfsm_set(nd, m->m_len); 5373 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); 5374 5375 /* Do a Getattr for the attributes that change upon writing. */ 5376 NFSZERO_ATTRBIT(&attrbits); 5377 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5378 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5379 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5380 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5381 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5382 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5383 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5384 (void) nfsrv_putattrbit(nd, &attrbits); 5385 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5386 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5387 if (error != 0) { 5388 free(nd, M_TEMP); 5389 return (error); 5390 } 5391 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); 5392 /* Get rid of weak cache consistency data for now. */ 5393 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5394 (ND_NFSV4 | ND_V4WCCATTR)) { 5395 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5396 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5397 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); 5398 if (error != 0) 5399 goto nfsmout; 5400 /* 5401 * Get rid of Op# and status for next op. 5402 */ 5403 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5404 if (*++tl != 0) 5405 nd->nd_flag |= ND_NOMOREDATA; 5406 } 5407 if (nd->nd_repstat == 0) { 5408 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); 5409 retlen = fxdr_unsigned(int, *tl++); 5410 commit = fxdr_unsigned(int, *tl); 5411 if (commit != NFSWRITE_FILESYNC) 5412 error = NFSERR_IO; 5413 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", 5414 retlen, commit, error); 5415 } else 5416 error = nd->nd_repstat; 5417 /* We have no use for the Write Verifier since we use FileSync. */ 5418 5419 /* 5420 * Get the Change, Size, Access Time and Modify Time attributes and set 5421 * on the Metadata file, so its attributes will be what the file's 5422 * would be if it had been written. 5423 */ 5424 if (error == 0) { 5425 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5426 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5427 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5428 } 5429 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); 5430 nfsmout: 5431 m_freem(nd->nd_mrep); 5432 free(nd, M_TEMP); 5433 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); 5434 return (error); 5435 } 5436 5437 /* 5438 * Start up the thread that will execute nfsrv_writedsdorpc(). 5439 */ 5440 static void 5441 start_writedsdorpc(void *arg, int pending) 5442 { 5443 struct nfsrvwritedsdorpc *drpc; 5444 5445 drpc = (struct nfsrvwritedsdorpc *)arg; 5446 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5447 drpc->len, NULL, drpc->m, drpc->cred, drpc->p); 5448 drpc->done = 1; 5449 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); 5450 } 5451 5452 static int 5453 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, 5454 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5455 struct mbuf **mpp, char *cp, int *failposp) 5456 { 5457 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; 5458 struct nfsvattr na; 5459 struct mbuf *m; 5460 int error, i, offs, ret, timo; 5461 5462 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); 5463 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); 5464 drpc = NULL; 5465 if (mirrorcnt > 1) 5466 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5467 M_WAITOK); 5468 5469 /* Calculate offset in mbuf chain that data starts. */ 5470 offs = cp - mtod(*mpp, char *); 5471 NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); 5472 5473 /* 5474 * Do the write RPC for every DS, using a separate kernel process 5475 * for every DS except the last one. 5476 */ 5477 error = 0; 5478 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5479 tdrpc->done = 0; 5480 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5481 tdrpc->off = off; 5482 tdrpc->len = len; 5483 tdrpc->nmp = *nmpp; 5484 tdrpc->cred = cred; 5485 tdrpc->p = p; 5486 tdrpc->inprog = 0; 5487 tdrpc->err = 0; 5488 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5489 ret = EIO; 5490 if (nfs_pnfsiothreads != 0) { 5491 ret = nfs_pnfsio(start_writedsdorpc, tdrpc); 5492 NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", 5493 ret); 5494 } 5495 if (ret != 0) { 5496 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, 5497 tdrpc->m, cred, p); 5498 if (nfsds_failerr(ret) && *failposp == -1) 5499 *failposp = i; 5500 else if (error == 0 && ret != 0) 5501 error = ret; 5502 } 5503 nmpp++; 5504 fhp++; 5505 } 5506 m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); 5507 ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); 5508 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5509 *failposp = mirrorcnt - 1; 5510 else if (error == 0 && ret != 0) 5511 error = ret; 5512 if (error == 0) 5513 error = nfsrv_setextattr(vp, &na, p); 5514 NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); 5515 tdrpc = drpc; 5516 timo = hz / 50; /* Wait for 20msec. */ 5517 if (timo < 1) 5518 timo = 1; 5519 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5520 /* Wait for RPCs on separate threads to complete. */ 5521 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5522 tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); 5523 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5524 *failposp = i; 5525 else if (error == 0 && tdrpc->err != 0) 5526 error = tdrpc->err; 5527 } 5528 free(drpc, M_TEMP); 5529 return (error); 5530 } 5531 5532 /* 5533 * Do a allocate RPC on a DS data file, using this structure for the arguments, 5534 * so that this function can be executed by a separate kernel process. 5535 */ 5536 struct nfsrvallocatedsdorpc { 5537 int done; 5538 int inprog; 5539 struct task tsk; 5540 fhandle_t fh; 5541 off_t off; 5542 off_t len; 5543 struct nfsmount *nmp; 5544 struct ucred *cred; 5545 NFSPROC_T *p; 5546 int err; 5547 }; 5548 5549 static int 5550 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5551 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5552 { 5553 uint32_t *tl; 5554 struct nfsrv_descript *nd; 5555 nfsattrbit_t attrbits; 5556 nfsv4stateid_t st; 5557 int error; 5558 5559 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5560 nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, 5561 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5562 5563 /* 5564 * Use a stateid where other is an alternating 01010 pattern and 5565 * seqid is 0xffffffff. This value is not defined as special by 5566 * the RFC and is used by the FreeBSD NFS server to indicate an 5567 * MDS->DS proxy operation. 5568 */ 5569 st.other[0] = 0x55555555; 5570 st.other[1] = 0x55555555; 5571 st.other[2] = 0x55555555; 5572 st.seqid = 0xffffffff; 5573 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5574 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5575 txdr_hyper(off, tl); tl += 2; 5576 txdr_hyper(len, tl); tl += 2; 5577 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); 5578 5579 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5580 NFSGETATTR_ATTRBIT(&attrbits); 5581 nfsrv_putattrbit(nd, &attrbits); 5582 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5583 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5584 if (error != 0) { 5585 free(nd, M_TEMP); 5586 return (error); 5587 } 5588 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", 5589 nd->nd_repstat); 5590 if (nd->nd_repstat == 0) { 5591 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5592 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5593 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5594 } else 5595 error = nd->nd_repstat; 5596 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); 5597 nfsmout: 5598 m_freem(nd->nd_mrep); 5599 free(nd, M_TEMP); 5600 NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); 5601 return (error); 5602 } 5603 5604 /* 5605 * Start up the thread that will execute nfsrv_allocatedsdorpc(). 5606 */ 5607 static void 5608 start_allocatedsdorpc(void *arg, int pending) 5609 { 5610 struct nfsrvallocatedsdorpc *drpc; 5611 5612 drpc = (struct nfsrvallocatedsdorpc *)arg; 5613 drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5614 drpc->len, NULL, drpc->cred, drpc->p); 5615 drpc->done = 1; 5616 NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); 5617 } 5618 5619 static int 5620 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5621 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5622 int *failposp) 5623 { 5624 struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; 5625 struct nfsvattr na; 5626 int error, i, ret, timo; 5627 5628 NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); 5629 drpc = NULL; 5630 if (mirrorcnt > 1) 5631 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5632 M_WAITOK); 5633 5634 /* 5635 * Do the allocate RPC for every DS, using a separate kernel process 5636 * for every DS except the last one. 5637 */ 5638 error = 0; 5639 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5640 tdrpc->done = 0; 5641 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5642 tdrpc->off = off; 5643 tdrpc->len = len; 5644 tdrpc->nmp = *nmpp; 5645 tdrpc->cred = cred; 5646 tdrpc->p = p; 5647 tdrpc->inprog = 0; 5648 tdrpc->err = 0; 5649 ret = EIO; 5650 if (nfs_pnfsiothreads != 0) { 5651 ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); 5652 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", 5653 ret); 5654 } 5655 if (ret != 0) { 5656 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, 5657 cred, p); 5658 if (nfsds_failerr(ret) && *failposp == -1) 5659 *failposp = i; 5660 else if (error == 0 && ret != 0) 5661 error = ret; 5662 } 5663 nmpp++; 5664 fhp++; 5665 } 5666 ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5667 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5668 *failposp = mirrorcnt - 1; 5669 else if (error == 0 && ret != 0) 5670 error = ret; 5671 if (error == 0) 5672 error = nfsrv_setextattr(vp, &na, p); 5673 NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); 5674 tdrpc = drpc; 5675 timo = hz / 50; /* Wait for 20msec. */ 5676 if (timo < 1) 5677 timo = 1; 5678 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5679 /* Wait for RPCs on separate threads to complete. */ 5680 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5681 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5682 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5683 *failposp = i; 5684 else if (error == 0 && tdrpc->err != 0) 5685 error = tdrpc->err; 5686 } 5687 free(drpc, M_TEMP); 5688 return (error); 5689 } 5690 5691 /* 5692 * Do a deallocate RPC on a DS data file, using this structure for the 5693 * arguments, so that this function can be executed by a separate kernel 5694 * process. 5695 */ 5696 struct nfsrvdeallocatedsdorpc { 5697 int done; 5698 int inprog; 5699 struct task tsk; 5700 fhandle_t fh; 5701 off_t off; 5702 off_t len; 5703 struct nfsmount *nmp; 5704 struct ucred *cred; 5705 NFSPROC_T *p; 5706 int err; 5707 }; 5708 5709 static int 5710 nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, 5711 off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) 5712 { 5713 uint32_t *tl; 5714 struct nfsrv_descript *nd; 5715 nfsattrbit_t attrbits; 5716 nfsv4stateid_t st; 5717 int error; 5718 5719 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5720 nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp, 5721 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 5722 5723 /* 5724 * Use a stateid where other is an alternating 01010 pattern and 5725 * seqid is 0xffffffff. This value is not defined as special by 5726 * the RFC and is used by the FreeBSD NFS server to indicate an 5727 * MDS->DS proxy operation. 5728 */ 5729 st.other[0] = 0x55555555; 5730 st.other[1] = 0x55555555; 5731 st.other[2] = 0x55555555; 5732 st.seqid = 0xffffffff; 5733 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5734 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); 5735 txdr_hyper(off, tl); tl += 2; 5736 txdr_hyper(len, tl); tl += 2; 5737 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len); 5738 5739 /* Do a Getattr for the attributes that change upon writing. */ 5740 NFSZERO_ATTRBIT(&attrbits); 5741 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5742 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5743 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5744 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5745 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5746 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5747 nfsrv_putattrbit(nd, &attrbits); 5748 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, 5749 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5750 if (error != 0) { 5751 free(nd, M_TEMP); 5752 return (error); 5753 } 5754 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n", 5755 nd->nd_repstat); 5756 /* Get rid of weak cache consistency data for now. */ 5757 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5758 (ND_NFSV4 | ND_V4WCCATTR)) { 5759 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5760 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5761 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error); 5762 if (error != 0) 5763 goto nfsmout; 5764 /* 5765 * Get rid of Op# and status for next op. 5766 */ 5767 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5768 if (*++tl != 0) 5769 nd->nd_flag |= ND_NOMOREDATA; 5770 } 5771 if (nd->nd_repstat == 0) { 5772 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5773 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, 5774 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5775 } else 5776 error = nd->nd_repstat; 5777 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error); 5778 nfsmout: 5779 m_freem(nd->nd_mrep); 5780 free(nd, M_TEMP); 5781 NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error); 5782 return (error); 5783 } 5784 5785 /* 5786 * Start up the thread that will execute nfsrv_deallocatedsdorpc(). 5787 */ 5788 static void 5789 start_deallocatedsdorpc(void *arg, int pending) 5790 { 5791 struct nfsrvdeallocatedsdorpc *drpc; 5792 5793 drpc = (struct nfsrvdeallocatedsdorpc *)arg; 5794 drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, 5795 drpc->len, NULL, drpc->cred, drpc->p); 5796 drpc->done = 1; 5797 NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err); 5798 } 5799 5800 static int 5801 nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, 5802 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5803 int *failposp) 5804 { 5805 struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL; 5806 struct nfsvattr na; 5807 int error, i, ret, timo; 5808 5809 NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n"); 5810 drpc = NULL; 5811 if (mirrorcnt > 1) 5812 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5813 M_WAITOK); 5814 5815 /* 5816 * Do the deallocate RPC for every DS, using a separate kernel process 5817 * for every DS except the last one. 5818 */ 5819 error = 0; 5820 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5821 tdrpc->done = 0; 5822 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 5823 tdrpc->off = off; 5824 tdrpc->len = len; 5825 tdrpc->nmp = *nmpp; 5826 tdrpc->cred = cred; 5827 tdrpc->p = p; 5828 tdrpc->inprog = 0; 5829 tdrpc->err = 0; 5830 ret = EIO; 5831 if (nfs_pnfsiothreads != 0) { 5832 ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc); 5833 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n", 5834 ret); 5835 } 5836 if (ret != 0) { 5837 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, 5838 NULL, cred, p); 5839 if (nfsds_failerr(ret) && *failposp == -1) 5840 *failposp = i; 5841 else if (error == 0 && ret != 0) 5842 error = ret; 5843 } 5844 nmpp++; 5845 fhp++; 5846 } 5847 ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); 5848 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 5849 *failposp = mirrorcnt - 1; 5850 else if (error == 0 && ret != 0) 5851 error = ret; 5852 if (error == 0) 5853 error = nfsrv_setextattr(vp, &na, p); 5854 NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error); 5855 tdrpc = drpc; 5856 timo = hz / 50; /* Wait for 20msec. */ 5857 if (timo < 1) 5858 timo = 1; 5859 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 5860 /* Wait for RPCs on separate threads to complete. */ 5861 while (tdrpc->inprog != 0 && tdrpc->done == 0) 5862 tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); 5863 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 5864 *failposp = i; 5865 else if (error == 0 && tdrpc->err != 0) 5866 error = tdrpc->err; 5867 } 5868 free(drpc, M_TEMP); 5869 return (error); 5870 } 5871 5872 static int 5873 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5874 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, 5875 struct nfsvattr *dsnap) 5876 { 5877 uint32_t *tl; 5878 struct nfsrv_descript *nd; 5879 nfsv4stateid_t st; 5880 nfsattrbit_t attrbits; 5881 int error; 5882 5883 NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); 5884 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 5885 /* 5886 * Use a stateid where other is an alternating 01010 pattern and 5887 * seqid is 0xffffffff. This value is not defined as special by 5888 * the RFC and is used by the FreeBSD NFS server to indicate an 5889 * MDS->DS proxy operation. 5890 */ 5891 st.other[0] = 0x55555555; 5892 st.other[1] = 0x55555555; 5893 st.other[2] = 0x55555555; 5894 st.seqid = 0xffffffff; 5895 nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), 5896 NULL, NULL, 0, 0, cred); 5897 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 5898 nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); 5899 5900 /* Do a Getattr for the attributes that change due to writing. */ 5901 NFSZERO_ATTRBIT(&attrbits); 5902 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 5903 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 5904 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 5905 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 5906 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 5907 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); 5908 *tl = txdr_unsigned(NFSV4OP_GETATTR); 5909 (void) nfsrv_putattrbit(nd, &attrbits); 5910 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 5911 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 5912 if (error != 0) { 5913 free(nd, M_TEMP); 5914 return (error); 5915 } 5916 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", 5917 nd->nd_repstat); 5918 /* Get rid of weak cache consistency data for now. */ 5919 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == 5920 (ND_NFSV4 | ND_V4WCCATTR)) { 5921 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5922 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5923 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); 5924 if (error != 0) 5925 goto nfsmout; 5926 /* 5927 * Get rid of Op# and status for next op. 5928 */ 5929 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5930 if (*++tl != 0) 5931 nd->nd_flag |= ND_NOMOREDATA; 5932 } 5933 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); 5934 if (error != 0) 5935 goto nfsmout; 5936 if (nd->nd_repstat != 0) 5937 error = nd->nd_repstat; 5938 /* 5939 * Get the Change, Size, Access Time and Modify Time attributes and set 5940 * on the Metadata file, so its attributes will be what the file's 5941 * would be if it had been written. 5942 */ 5943 if (error == 0) { 5944 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); 5945 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, 5946 NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); 5947 } 5948 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); 5949 nfsmout: 5950 m_freem(nd->nd_mrep); 5951 free(nd, M_TEMP); 5952 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); 5953 return (error); 5954 } 5955 5956 struct nfsrvsetattrdsdorpc { 5957 int done; 5958 int inprog; 5959 struct task tsk; 5960 fhandle_t fh; 5961 struct nfsmount *nmp; 5962 struct vnode *vp; 5963 struct ucred *cred; 5964 NFSPROC_T *p; 5965 struct nfsvattr na; 5966 struct nfsvattr dsna; 5967 int err; 5968 }; 5969 5970 /* 5971 * Start up the thread that will execute nfsrv_setattrdsdorpc(). 5972 */ 5973 static void 5974 start_setattrdsdorpc(void *arg, int pending) 5975 { 5976 struct nfsrvsetattrdsdorpc *drpc; 5977 5978 drpc = (struct nfsrvsetattrdsdorpc *)arg; 5979 drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, 5980 drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); 5981 drpc->done = 1; 5982 } 5983 5984 static int 5985 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 5986 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, 5987 struct nfsvattr *nap, int *failposp) 5988 { 5989 struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; 5990 struct nfsvattr na; 5991 int error, i, ret, timo; 5992 5993 NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); 5994 drpc = NULL; 5995 if (mirrorcnt > 1) 5996 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 5997 M_WAITOK); 5998 5999 /* 6000 * Do the setattr RPC for every DS, using a separate kernel process 6001 * for every DS except the last one. 6002 */ 6003 error = 0; 6004 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6005 tdrpc->done = 0; 6006 tdrpc->inprog = 0; 6007 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6008 tdrpc->nmp = *nmpp; 6009 tdrpc->vp = vp; 6010 tdrpc->cred = cred; 6011 tdrpc->p = p; 6012 tdrpc->na = *nap; 6013 tdrpc->err = 0; 6014 ret = EIO; 6015 if (nfs_pnfsiothreads != 0) { 6016 ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); 6017 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", 6018 ret); 6019 } 6020 if (ret != 0) { 6021 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, 6022 &na); 6023 if (nfsds_failerr(ret) && *failposp == -1) 6024 *failposp = i; 6025 else if (error == 0 && ret != 0) 6026 error = ret; 6027 } 6028 nmpp++; 6029 fhp++; 6030 } 6031 ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); 6032 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6033 *failposp = mirrorcnt - 1; 6034 else if (error == 0 && ret != 0) 6035 error = ret; 6036 if (error == 0) 6037 error = nfsrv_setextattr(vp, &na, p); 6038 NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); 6039 tdrpc = drpc; 6040 timo = hz / 50; /* Wait for 20msec. */ 6041 if (timo < 1) 6042 timo = 1; 6043 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6044 /* Wait for RPCs on separate threads to complete. */ 6045 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6046 tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); 6047 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6048 *failposp = i; 6049 else if (error == 0 && tdrpc->err != 0) 6050 error = tdrpc->err; 6051 } 6052 free(drpc, M_TEMP); 6053 return (error); 6054 } 6055 6056 /* 6057 * Do a Setattr of an NFSv4 ACL on the DS file. 6058 */ 6059 static int 6060 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6061 struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) 6062 { 6063 struct nfsrv_descript *nd; 6064 nfsv4stateid_t st; 6065 nfsattrbit_t attrbits; 6066 int error; 6067 6068 NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); 6069 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6070 /* 6071 * Use a stateid where other is an alternating 01010 pattern and 6072 * seqid is 0xffffffff. This value is not defined as special by 6073 * the RFC and is used by the FreeBSD NFS server to indicate an 6074 * MDS->DS proxy operation. 6075 */ 6076 st.other[0] = 0x55555555; 6077 st.other[1] = 0x55555555; 6078 st.other[2] = 0x55555555; 6079 st.seqid = 0xffffffff; 6080 nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), 6081 NULL, NULL, 0, 0, cred); 6082 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6083 NFSZERO_ATTRBIT(&attrbits); 6084 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); 6085 /* 6086 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), 6087 * so passing in the metadata "vp" will be ok, since it is of 6088 * the same type (VREG). 6089 */ 6090 nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, 6091 NULL, 0, 0, 0, 0, 0, NULL); 6092 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6093 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6094 if (error != 0) { 6095 free(nd, M_TEMP); 6096 return (error); 6097 } 6098 NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", 6099 nd->nd_repstat); 6100 error = nd->nd_repstat; 6101 m_freem(nd->nd_mrep); 6102 free(nd, M_TEMP); 6103 return (error); 6104 } 6105 6106 struct nfsrvsetacldsdorpc { 6107 int done; 6108 int inprog; 6109 struct task tsk; 6110 fhandle_t fh; 6111 struct nfsmount *nmp; 6112 struct vnode *vp; 6113 struct ucred *cred; 6114 NFSPROC_T *p; 6115 struct acl *aclp; 6116 int err; 6117 }; 6118 6119 /* 6120 * Start up the thread that will execute nfsrv_setacldsdorpc(). 6121 */ 6122 static void 6123 start_setacldsdorpc(void *arg, int pending) 6124 { 6125 struct nfsrvsetacldsdorpc *drpc; 6126 6127 drpc = (struct nfsrvsetacldsdorpc *)arg; 6128 drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, 6129 drpc->vp, drpc->nmp, drpc->aclp); 6130 drpc->done = 1; 6131 } 6132 6133 static int 6134 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6135 struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, 6136 int *failposp) 6137 { 6138 struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; 6139 int error, i, ret, timo; 6140 6141 NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); 6142 drpc = NULL; 6143 if (mirrorcnt > 1) 6144 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, 6145 M_WAITOK); 6146 6147 /* 6148 * Do the setattr RPC for every DS, using a separate kernel process 6149 * for every DS except the last one. 6150 */ 6151 error = 0; 6152 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6153 tdrpc->done = 0; 6154 tdrpc->inprog = 0; 6155 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); 6156 tdrpc->nmp = *nmpp; 6157 tdrpc->vp = vp; 6158 tdrpc->cred = cred; 6159 tdrpc->p = p; 6160 tdrpc->aclp = aclp; 6161 tdrpc->err = 0; 6162 ret = EIO; 6163 if (nfs_pnfsiothreads != 0) { 6164 ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); 6165 NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", 6166 ret); 6167 } 6168 if (ret != 0) { 6169 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, 6170 aclp); 6171 if (nfsds_failerr(ret) && *failposp == -1) 6172 *failposp = i; 6173 else if (error == 0 && ret != 0) 6174 error = ret; 6175 } 6176 nmpp++; 6177 fhp++; 6178 } 6179 ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); 6180 if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) 6181 *failposp = mirrorcnt - 1; 6182 else if (error == 0 && ret != 0) 6183 error = ret; 6184 NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); 6185 tdrpc = drpc; 6186 timo = hz / 50; /* Wait for 20msec. */ 6187 if (timo < 1) 6188 timo = 1; 6189 for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { 6190 /* Wait for RPCs on separate threads to complete. */ 6191 while (tdrpc->inprog != 0 && tdrpc->done == 0) 6192 tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); 6193 if (nfsds_failerr(tdrpc->err) && *failposp == -1) 6194 *failposp = i; 6195 else if (error == 0 && tdrpc->err != 0) 6196 error = tdrpc->err; 6197 } 6198 free(drpc, M_TEMP); 6199 return (error); 6200 } 6201 6202 /* 6203 * Getattr call to the DS for the attributes that change due to writing. 6204 */ 6205 static int 6206 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, 6207 struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) 6208 { 6209 struct nfsrv_descript *nd; 6210 int error; 6211 nfsattrbit_t attrbits; 6212 6213 NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); 6214 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6215 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, 6216 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6217 NFSZERO_ATTRBIT(&attrbits); 6218 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); 6219 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); 6220 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); 6221 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); 6222 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); 6223 (void) nfsrv_putattrbit(nd, &attrbits); 6224 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6225 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6226 if (error != 0) { 6227 free(nd, M_TEMP); 6228 return (error); 6229 } 6230 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", 6231 nd->nd_repstat); 6232 if (nd->nd_repstat == 0) { 6233 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, 6234 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, 6235 NULL, NULL); 6236 /* 6237 * We can only save the updated values in the extended 6238 * attribute if the vp is exclusively locked. 6239 * This should happen when any of the following operations 6240 * occur on the vnode: 6241 * Close, Delegreturn, LayoutCommit, LayoutReturn 6242 * As such, the updated extended attribute should get saved 6243 * before nfsrv_checkdsattr() returns 0 and allows the cached 6244 * attributes to be returned without calling this function. 6245 */ 6246 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { 6247 error = nfsrv_setextattr(vp, nap, p); 6248 NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", 6249 error); 6250 } 6251 } else 6252 error = nd->nd_repstat; 6253 m_freem(nd->nd_mrep); 6254 free(nd, M_TEMP); 6255 NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); 6256 return (error); 6257 } 6258 6259 /* 6260 * Seek call to a DS. 6261 */ 6262 static int 6263 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, 6264 struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) 6265 { 6266 uint32_t *tl; 6267 struct nfsrv_descript *nd; 6268 nfsv4stateid_t st; 6269 int error; 6270 6271 NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); 6272 /* 6273 * Use a stateid where other is an alternating 01010 pattern and 6274 * seqid is 0xffffffff. This value is not defined as special by 6275 * the RFC and is used by the FreeBSD NFS server to indicate an 6276 * MDS->DS proxy operation. 6277 */ 6278 st.other[0] = 0x55555555; 6279 st.other[1] = 0x55555555; 6280 st.other[2] = 0x55555555; 6281 st.seqid = 0xffffffff; 6282 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); 6283 nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, 6284 sizeof(fhandle_t), NULL, NULL, 0, 0, cred); 6285 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); 6286 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); 6287 txdr_hyper(*offp, tl); tl += 2; 6288 *tl = txdr_unsigned(content); 6289 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, 6290 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); 6291 if (error != 0) { 6292 free(nd, M_TEMP); 6293 return (error); 6294 } 6295 NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); 6296 if (nd->nd_repstat == 0) { 6297 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); 6298 if (*tl++ == newnfs_true) 6299 *eofp = true; 6300 else 6301 *eofp = false; 6302 *offp = fxdr_hyper(tl); 6303 } else 6304 error = nd->nd_repstat; 6305 nfsmout: 6306 m_freem(nd->nd_mrep); 6307 free(nd, M_TEMP); 6308 NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); 6309 return (error); 6310 } 6311 6312 /* 6313 * Get the device id and file handle for a DS file. 6314 */ 6315 int 6316 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, 6317 fhandle_t *fhp, char *devid) 6318 { 6319 int buflen, error; 6320 char *buf; 6321 6322 buflen = 1024; 6323 buf = malloc(buflen, M_TEMP, M_WAITOK); 6324 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, 6325 fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); 6326 free(buf, M_TEMP); 6327 return (error); 6328 } 6329 6330 /* 6331 * Do a Lookup against the DS for the filename. 6332 */ 6333 static int 6334 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, 6335 struct vnode **nvpp, NFSPROC_T *p) 6336 { 6337 struct nameidata named; 6338 struct ucred *tcred; 6339 char *bufp; 6340 u_long *hashp; 6341 struct vnode *nvp; 6342 int error; 6343 6344 tcred = newnfs_getcred(); 6345 named.ni_cnd.cn_nameiop = LOOKUP; 6346 named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; 6347 named.ni_cnd.cn_cred = tcred; 6348 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF; 6349 nfsvno_setpathbuf(&named, &bufp, &hashp); 6350 named.ni_cnd.cn_nameptr = bufp; 6351 named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); 6352 strlcpy(bufp, pf->dsf_filename, NAME_MAX); 6353 NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); 6354 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); 6355 NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); 6356 NFSFREECRED(tcred); 6357 nfsvno_relpathbuf(&named); 6358 if (error == 0) 6359 *nvpp = nvp; 6360 NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); 6361 return (error); 6362 } 6363 6364 /* 6365 * Set the file handle to the correct one. 6366 */ 6367 static void 6368 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, 6369 char *fnamep, struct vnode *nvp, NFSPROC_T *p) 6370 { 6371 struct nfsnode *np; 6372 int ret = 0; 6373 6374 np = VTONFS(nvp); 6375 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); 6376 /* 6377 * We can only do a vn_set_extattr() if the vnode is exclusively 6378 * locked and vn_start_write() has been done. If devid != NULL or 6379 * fnamep != NULL or the vnode is shared locked, vn_start_write() 6380 * may not have been done. 6381 * If not done now, it will be done on a future call. 6382 */ 6383 if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == 6384 LK_EXCLUSIVE) 6385 ret = vn_extattr_set(vp, IO_NODELOCKED, 6386 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), 6387 (char *)pf, p); 6388 NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); 6389 } 6390 6391 /* 6392 * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point 6393 * when the DS has failed. 6394 */ 6395 void 6396 nfsrv_killrpcs(struct nfsmount *nmp) 6397 { 6398 6399 /* 6400 * Call newnfs_nmcancelreqs() to cause 6401 * any RPCs in progress on the mount point to 6402 * fail. 6403 * This will cause any process waiting for an 6404 * RPC to complete while holding a vnode lock 6405 * on the mounted-on vnode (such as "df" or 6406 * a non-forced "umount") to fail. 6407 * This will unlock the mounted-on vnode so 6408 * a forced dismount can succeed. 6409 * The NFSMNTP_CANCELRPCS flag should be set when this function is 6410 * called. 6411 */ 6412 newnfs_nmcancelreqs(nmp); 6413 } 6414 6415 /* 6416 * Sum up the statfs info for each of the DSs, so that the client will 6417 * receive the total for all DSs. 6418 */ 6419 static int 6420 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) 6421 { 6422 struct statfs *tsf; 6423 struct nfsdevice *ds; 6424 struct vnode **dvpp, **tdvpp, *dvp; 6425 uint64_t tot; 6426 int cnt, error = 0, i; 6427 6428 if (nfsrv_devidcnt <= 0) 6429 return (ENXIO); 6430 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 6431 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 6432 6433 /* Get an array of the dvps for the DSs. */ 6434 tdvpp = dvpp; 6435 i = 0; 6436 NFSDDSLOCK(); 6437 /* First, search for matches for same file system. */ 6438 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6439 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && 6440 fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { 6441 if (++i > nfsrv_devidcnt) 6442 break; 6443 *tdvpp++ = ds->nfsdev_dvp; 6444 } 6445 } 6446 /* 6447 * If no matches for same file system, total all servers not assigned 6448 * to a file system. 6449 */ 6450 if (i == 0) { 6451 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 6452 if (ds->nfsdev_nmp != NULL && 6453 ds->nfsdev_mdsisset == 0) { 6454 if (++i > nfsrv_devidcnt) 6455 break; 6456 *tdvpp++ = ds->nfsdev_dvp; 6457 } 6458 } 6459 } 6460 NFSDDSUNLOCK(); 6461 cnt = i; 6462 6463 /* Do a VFS_STATFS() for each of the DSs and sum them up. */ 6464 tdvpp = dvpp; 6465 for (i = 0; i < cnt && error == 0; i++) { 6466 dvp = *tdvpp++; 6467 error = VFS_STATFS(dvp->v_mount, tsf); 6468 if (error == 0) { 6469 if (sf->f_bsize == 0) { 6470 if (tsf->f_bsize > 0) 6471 sf->f_bsize = tsf->f_bsize; 6472 else 6473 sf->f_bsize = 8192; 6474 } 6475 if (tsf->f_blocks > 0) { 6476 if (sf->f_bsize != tsf->f_bsize) { 6477 tot = tsf->f_blocks * tsf->f_bsize; 6478 sf->f_blocks += (tot / sf->f_bsize); 6479 } else 6480 sf->f_blocks += tsf->f_blocks; 6481 } 6482 if (tsf->f_bfree > 0) { 6483 if (sf->f_bsize != tsf->f_bsize) { 6484 tot = tsf->f_bfree * tsf->f_bsize; 6485 sf->f_bfree += (tot / sf->f_bsize); 6486 } else 6487 sf->f_bfree += tsf->f_bfree; 6488 } 6489 if (tsf->f_bavail > 0) { 6490 if (sf->f_bsize != tsf->f_bsize) { 6491 tot = tsf->f_bavail * tsf->f_bsize; 6492 sf->f_bavail += (tot / sf->f_bsize); 6493 } else 6494 sf->f_bavail += tsf->f_bavail; 6495 } 6496 } 6497 } 6498 free(tsf, M_TEMP); 6499 free(dvpp, M_TEMP); 6500 return (error); 6501 } 6502 6503 /* 6504 * Set an NFSv4 acl. 6505 */ 6506 int 6507 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) 6508 { 6509 int error; 6510 6511 if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { 6512 error = NFSERR_ATTRNOTSUPP; 6513 goto out; 6514 } 6515 /* 6516 * With NFSv4 ACLs, chmod(2) may need to add additional entries. 6517 * Make sure it has enough room for that - splitting every entry 6518 * into two and appending "canonical six" entries at the end. 6519 * Cribbed out of kern/vfs_acl.c - Rick M. 6520 */ 6521 if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { 6522 error = NFSERR_ATTRNOTSUPP; 6523 goto out; 6524 } 6525 error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); 6526 if (error == 0) { 6527 error = nfsrv_dssetacl(vp, aclp, cred, p); 6528 if (error == ENOENT) 6529 error = 0; 6530 } 6531 6532 out: 6533 NFSEXITCODE(error); 6534 return (error); 6535 } 6536 6537 /* 6538 * Seek vnode op call (actually it is a VOP_IOCTL()). 6539 * This function is called with the vnode locked, but unlocks and vrele()s 6540 * the vp before returning. 6541 */ 6542 int 6543 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, 6544 off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) 6545 { 6546 struct nfsvattr at; 6547 int error, ret; 6548 6549 ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); 6550 /* 6551 * Attempt to seek on a DS file. A return of ENOENT implies 6552 * there is no DS file to seek on. 6553 */ 6554 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, 6555 NULL, NULL, NULL, NULL, offp, content, eofp); 6556 if (error != ENOENT) { 6557 vput(vp); 6558 return (error); 6559 } 6560 6561 /* 6562 * Do the VOP_IOCTL() call. For the case where *offp == file_size, 6563 * VOP_IOCTL() will return ENXIO. However, the correct reply for 6564 * NFSv4.2 is *eofp == true and error == 0 for this case. 6565 */ 6566 NFSVOPUNLOCK(vp); 6567 error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); 6568 *eofp = false; 6569 if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { 6570 /* Handle the cases where we might be at EOF. */ 6571 ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); 6572 if (ret == 0 && *offp == at.na_size) { 6573 *eofp = true; 6574 error = 0; 6575 } 6576 if (ret != 0 && error == 0) 6577 error = ret; 6578 } 6579 vrele(vp); 6580 NFSEXITCODE(error); 6581 return (error); 6582 } 6583 6584 /* 6585 * Allocate vnode op call. 6586 */ 6587 int 6588 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6589 NFSPROC_T *p) 6590 { 6591 int error; 6592 off_t olen; 6593 6594 ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); 6595 /* 6596 * Attempt to allocate on a DS file. A return of ENOENT implies 6597 * there is no DS file to allocate on. 6598 */ 6599 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, 6600 NULL, NULL, NULL, NULL, &len, 0, NULL); 6601 if (error != ENOENT) 6602 return (error); 6603 6604 /* 6605 * Do the actual VOP_ALLOCATE(), looping so long as 6606 * progress is being made, to achieve completion. 6607 */ 6608 do { 6609 olen = len; 6610 error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred); 6611 if (error == 0 && len > 0 && olen > len) 6612 maybe_yield(); 6613 } while (error == 0 && len > 0 && olen > len); 6614 if (error == 0 && len > 0) 6615 error = NFSERR_IO; 6616 NFSEXITCODE(error); 6617 return (error); 6618 } 6619 6620 /* 6621 * Deallocate vnode op call. 6622 */ 6623 int 6624 nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, 6625 NFSPROC_T *p) 6626 { 6627 int error; 6628 off_t olen; 6629 6630 ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp"); 6631 /* 6632 * Attempt to deallocate on a DS file. A return of ENOENT implies 6633 * there is no DS file to deallocate on. 6634 */ 6635 error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL, 6636 NULL, NULL, NULL, NULL, &len, 0, NULL); 6637 if (error != ENOENT) 6638 return (error); 6639 6640 /* 6641 * Do the actual VOP_DEALLOCATE(), looping so long as 6642 * progress is being made, to achieve completion. 6643 */ 6644 do { 6645 olen = len; 6646 error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred); 6647 if (error == 0 && len > 0 && olen > len) 6648 maybe_yield(); 6649 } while (error == 0 && len > 0 && olen > len); 6650 if (error == 0 && len > 0) 6651 error = NFSERR_IO; 6652 NFSEXITCODE(error); 6653 return (error); 6654 } 6655 6656 /* 6657 * Get Extended Atribute vnode op into an mbuf list. 6658 */ 6659 int 6660 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, 6661 struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, 6662 struct mbuf **mpp, struct mbuf **mpendp, int *lenp) 6663 { 6664 struct iovec *iv; 6665 struct uio io, *uiop = &io; 6666 struct mbuf *m, *m2; 6667 int alen, error, len, tlen; 6668 size_t siz; 6669 6670 /* First, find out the size of the extended attribute. */ 6671 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6672 &siz, cred, p); 6673 if (error != 0) 6674 return (NFSERR_NOXATTR); 6675 if (siz > maxresp - NFS_MAXXDR) 6676 return (NFSERR_XATTR2BIG); 6677 len = siz; 6678 tlen = NFSM_RNDUP(len); 6679 if (tlen > 0) { 6680 /* 6681 * If cnt > MCLBYTES and the reply will not be saved, use 6682 * ext_pgs mbufs for TLS. 6683 * For NFSv4.0, we do not know for sure if the reply will 6684 * be saved, so do not use ext_pgs mbufs for NFSv4.0. 6685 * Always use ext_pgs mbufs if ND_EXTPG is set. 6686 */ 6687 if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && 6688 (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && 6689 (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) 6690 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, 6691 maxextsiz, &m, &m2, &iv); 6692 else 6693 uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, 6694 &iv); 6695 uiop->uio_iov = iv; 6696 } else { 6697 uiop->uio_iovcnt = 0; 6698 uiop->uio_iov = iv = NULL; 6699 m = m2 = NULL; 6700 } 6701 uiop->uio_offset = 0; 6702 uiop->uio_resid = tlen; 6703 uiop->uio_rw = UIO_READ; 6704 uiop->uio_segflg = UIO_SYSSPACE; 6705 uiop->uio_td = p; 6706 #ifdef MAC 6707 error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6708 name); 6709 if (error != 0) 6710 goto out; 6711 #endif 6712 6713 if (tlen > 0) 6714 error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6715 NULL, cred, p); 6716 if (error != 0) 6717 goto out; 6718 if (uiop->uio_resid > 0) { 6719 alen = tlen; 6720 len = tlen - uiop->uio_resid; 6721 tlen = NFSM_RNDUP(len); 6722 if (alen != tlen) 6723 printf("nfsvno_getxattr: weird size read\n"); 6724 if (tlen == 0) { 6725 m_freem(m); 6726 m = m2 = NULL; 6727 } else if (alen != tlen || tlen != len) 6728 m2 = nfsrv_adj(m, alen - tlen, tlen - len); 6729 } 6730 *lenp = len; 6731 *mpp = m; 6732 *mpendp = m2; 6733 6734 out: 6735 if (error != 0) { 6736 if (m != NULL) 6737 m_freem(m); 6738 *lenp = 0; 6739 } 6740 free(iv, M_TEMP); 6741 NFSEXITCODE(error); 6742 return (error); 6743 } 6744 6745 /* 6746 * Set Extended attribute vnode op from an mbuf list. 6747 */ 6748 int 6749 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, 6750 char *cp, struct ucred *cred, struct thread *p) 6751 { 6752 struct iovec *iv; 6753 struct uio uio, *uiop = &uio; 6754 int cnt, error; 6755 6756 error = 0; 6757 #ifdef MAC 6758 error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6759 name); 6760 #endif 6761 if (error != 0) 6762 goto out; 6763 6764 uiop->uio_rw = UIO_WRITE; 6765 uiop->uio_segflg = UIO_SYSSPACE; 6766 uiop->uio_td = p; 6767 uiop->uio_offset = 0; 6768 uiop->uio_resid = len; 6769 if (len > 0) { 6770 error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); 6771 uiop->uio_iov = iv; 6772 uiop->uio_iovcnt = cnt; 6773 } else { 6774 uiop->uio_iov = iv = NULL; 6775 uiop->uio_iovcnt = 0; 6776 } 6777 if (error == 0) { 6778 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, 6779 cred, p); 6780 if (error == 0) { 6781 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 6782 nfsvno_updateds(vp, cred, p); 6783 error = VOP_FSYNC(vp, MNT_WAIT, p); 6784 } 6785 free(iv, M_TEMP); 6786 } 6787 6788 out: 6789 NFSEXITCODE(error); 6790 return (error); 6791 } 6792 6793 /* 6794 * For a pNFS server, the DS file's ctime and 6795 * va_filerev (TimeMetadata and Change) needs to 6796 * be updated. This is a hack, but works by 6797 * flipping the S_ISGID bit in va_mode and then 6798 * flipping it back. 6799 * It does result in two MDS->DS RPCs, but creating 6800 * a custom RPC just to do this seems overkill, since 6801 * Setxattr/Rmxattr will not be done that frequently. 6802 * If it fails part way through, that is not too 6803 * serious, since the DS file is never executed. 6804 */ 6805 static void 6806 nfsvno_updateds(struct vnode *vp, struct ucred *cred, NFSPROC_T *p) 6807 { 6808 struct nfsvattr nva; 6809 int ret; 6810 u_short tmode; 6811 6812 ret = VOP_GETATTR(vp, &nva.na_vattr, cred); 6813 if (ret == 0) { 6814 tmode = nva.na_mode; 6815 NFSVNO_ATTRINIT(&nva); 6816 tmode ^= S_ISGID; 6817 NFSVNO_SETATTRVAL(&nva, mode, tmode); 6818 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 6819 NFSPROC_SETATTR, NULL, NULL, NULL, &nva, 6820 NULL, NULL, 0, NULL); 6821 if (ret == 0) { 6822 tmode ^= S_ISGID; 6823 NFSVNO_SETATTRVAL(&nva, mode, tmode); 6824 ret = nfsrv_proxyds(vp, 0, 0, cred, p, 6825 NFSPROC_SETATTR, NULL, NULL, NULL, 6826 &nva, NULL, NULL, 0, NULL); 6827 } 6828 } 6829 } 6830 6831 /* 6832 * Remove Extended attribute vnode op. 6833 */ 6834 int 6835 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, 6836 struct ucred *cred, struct thread *p) 6837 { 6838 int error; 6839 6840 /* 6841 * Get rid of any delegations. I am not sure why this is required, 6842 * but RFC-8276 says so. 6843 */ 6844 error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); 6845 if (error != 0) 6846 goto out; 6847 #ifdef MAC 6848 error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, 6849 name); 6850 if (error != 0) 6851 goto out; 6852 #endif 6853 6854 error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); 6855 if (error == EOPNOTSUPP) 6856 error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, 6857 cred, p); 6858 if (error == 0) { 6859 if (vp->v_type == VREG && nfsrv_devidcnt != 0) 6860 nfsvno_updateds(vp, cred, p); 6861 error = VOP_FSYNC(vp, MNT_WAIT, p); 6862 } 6863 out: 6864 NFSEXITCODE(error); 6865 return (error); 6866 } 6867 6868 /* 6869 * List Extended Atribute vnode op into an mbuf list. 6870 */ 6871 int 6872 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, 6873 struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) 6874 { 6875 struct iovec iv; 6876 struct uio io; 6877 int error; 6878 size_t siz; 6879 6880 *bufp = NULL; 6881 /* First, find out the size of the extended attribute. */ 6882 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, 6883 p); 6884 if (error != 0) 6885 return (NFSERR_NOXATTR); 6886 if (siz <= cookie) { 6887 *lenp = 0; 6888 *eofp = true; 6889 goto out; 6890 } 6891 if (siz > cookie + *lenp) { 6892 siz = cookie + *lenp; 6893 *eofp = false; 6894 } else 6895 *eofp = true; 6896 /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ 6897 if (siz > 10 * 1024 * 1024) { 6898 error = NFSERR_XATTR2BIG; 6899 goto out; 6900 } 6901 *bufp = malloc(siz, M_TEMP, M_WAITOK); 6902 iv.iov_base = *bufp; 6903 iv.iov_len = siz; 6904 io.uio_iovcnt = 1; 6905 io.uio_iov = &iv; 6906 io.uio_offset = 0; 6907 io.uio_resid = siz; 6908 io.uio_rw = UIO_READ; 6909 io.uio_segflg = UIO_SYSSPACE; 6910 io.uio_td = p; 6911 #ifdef MAC 6912 error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); 6913 if (error != 0) 6914 goto out; 6915 #endif 6916 6917 error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, 6918 p); 6919 if (error != 0) 6920 goto out; 6921 if (io.uio_resid > 0) 6922 siz -= io.uio_resid; 6923 *lenp = siz; 6924 6925 out: 6926 if (error != 0) { 6927 free(*bufp, M_TEMP); 6928 *bufp = NULL; 6929 } 6930 NFSEXITCODE(error); 6931 return (error); 6932 } 6933 6934 /* 6935 * Trim trailing data off the mbuf list being built. 6936 */ 6937 void 6938 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, 6939 int bextpg, int bextpgsiz) 6940 { 6941 vm_page_t pg; 6942 int fullpgsiz, i; 6943 6944 if (mb->m_next != NULL) { 6945 m_freem(mb->m_next); 6946 mb->m_next = NULL; 6947 } 6948 if ((mb->m_flags & M_EXTPG) != 0) { 6949 KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs, 6950 ("nfsm_trimtrailing: bextpg out of range")); 6951 KASSERT(bpos == (char *)(void *) 6952 PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz, 6953 ("nfsm_trimtrailing: bextpgsiz bad!")); 6954 6955 /* First, get rid of any pages after this position. */ 6956 for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { 6957 pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); 6958 vm_page_unwire_noq(pg); 6959 vm_page_free(pg); 6960 } 6961 mb->m_epg_npgs = bextpg + 1; 6962 if (bextpg == 0) 6963 fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; 6964 else 6965 fullpgsiz = PAGE_SIZE; 6966 mb->m_epg_last_len = fullpgsiz - bextpgsiz; 6967 mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); 6968 for (i = 1; i < mb->m_epg_npgs; i++) 6969 mb->m_len += m_epg_pagelen(mb, i, 0); 6970 nd->nd_bextpgsiz = bextpgsiz; 6971 nd->nd_bextpg = bextpg; 6972 } else 6973 mb->m_len = bpos - mtod(mb, char *); 6974 nd->nd_mb = mb; 6975 nd->nd_bpos = bpos; 6976 } 6977 6978 6979 /* 6980 * Check to see if a put file handle operation should test for 6981 * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR. 6982 * When Open is the next operation, NFSERR_WRONGSEC cannot be 6983 * replied for the Open cases that use a component. This can 6984 * be identified by the fact that the file handle's type is VDIR. 6985 */ 6986 bool 6987 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, enum vtype vtyp) 6988 { 6989 6990 if ((nd->nd_flag & ND_NFSV4) == 0) 6991 return (true); 6992 6993 if ((nd->nd_flag & ND_LASTOP) != 0) 6994 return (false); 6995 6996 if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH || 6997 nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH || 6998 nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP || 6999 nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME) 7000 return (false); 7001 if (nextop == NFSV4OP_OPEN && vtyp == VDIR) 7002 return (false); 7003 return (true); 7004 } 7005 7006 /* 7007 * Check DSs marked no space. 7008 */ 7009 void 7010 nfsrv_checknospc(void) 7011 { 7012 struct statfs *tsf; 7013 struct nfsdevice *ds; 7014 struct vnode **dvpp, **tdvpp, *dvp; 7015 char *devid, *tdevid; 7016 int cnt, error = 0, i; 7017 7018 if (nfsrv_devidcnt <= 0) 7019 return; 7020 dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); 7021 devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK); 7022 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); 7023 7024 /* Get an array of the dvps for the DSs. */ 7025 tdvpp = dvpp; 7026 tdevid = devid; 7027 i = 0; 7028 NFSDDSLOCK(); 7029 /* First, search for matches for same file system. */ 7030 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { 7031 if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) { 7032 if (++i > nfsrv_devidcnt) 7033 break; 7034 *tdvpp++ = ds->nfsdev_dvp; 7035 NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID); 7036 tdevid += NFSX_V4DEVICEID; 7037 } 7038 } 7039 NFSDDSUNLOCK(); 7040 7041 /* Do a VFS_STATFS() for each of the DSs and clear no space. */ 7042 cnt = i; 7043 tdvpp = dvpp; 7044 tdevid = devid; 7045 for (i = 0; i < cnt && error == 0; i++) { 7046 dvp = *tdvpp++; 7047 error = VFS_STATFS(dvp->v_mount, tsf); 7048 if (error == 0 && tsf->f_bavail > 0) { 7049 NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n"); 7050 nfsrv_marknospc(tdevid, false); 7051 } 7052 tdevid += NFSX_V4DEVICEID; 7053 } 7054 free(tsf, M_TEMP); 7055 free(dvpp, M_TEMP); 7056 free(devid, M_TEMP); 7057 } 7058 7059 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); 7060 7061 /* 7062 * Called once to initialize data structures... 7063 */ 7064 static int 7065 nfsd_modevent(module_t mod, int type, void *data) 7066 { 7067 int error = 0, i; 7068 static int loaded = 0; 7069 7070 switch (type) { 7071 case MOD_LOAD: 7072 if (loaded) 7073 goto out; 7074 newnfs_portinit(); 7075 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 7076 mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, 7077 MTX_DEF); 7078 mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, 7079 MTX_DEF); 7080 } 7081 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); 7082 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); 7083 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); 7084 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); 7085 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); 7086 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); 7087 callout_init(&nfsd_callout, 1); 7088 nfsrvd_initcache(); 7089 nfsd_init(); 7090 NFSD_LOCK(); 7091 nfsrvd_init(0); 7092 NFSD_UNLOCK(); 7093 nfsd_mntinit(); 7094 #ifdef VV_DISABLEDELEG 7095 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; 7096 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; 7097 #endif 7098 nfsd_call_nfsd = nfssvc_nfsd; 7099 loaded = 1; 7100 break; 7101 7102 case MOD_UNLOAD: 7103 if (newnfs_numnfsd != 0) { 7104 error = EBUSY; 7105 break; 7106 } 7107 7108 #ifdef VV_DISABLEDELEG 7109 vn_deleg_ops.vndeleg_recall = NULL; 7110 vn_deleg_ops.vndeleg_disable = NULL; 7111 #endif 7112 nfsd_call_nfsd = NULL; 7113 callout_drain(&nfsd_callout); 7114 7115 /* Clean out all NFSv4 state. */ 7116 nfsrv_throwawayallstate(curthread); 7117 7118 /* Clean the NFS server reply cache */ 7119 nfsrvd_cleancache(); 7120 7121 /* Free up the krpc server pool. */ 7122 if (nfsrvd_pool != NULL) 7123 svcpool_destroy(nfsrvd_pool); 7124 7125 /* and get rid of the locks */ 7126 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { 7127 mtx_destroy(&nfsrchash_table[i].mtx); 7128 mtx_destroy(&nfsrcahash_table[i].mtx); 7129 } 7130 mtx_destroy(&nfsrc_udpmtx); 7131 mtx_destroy(&nfs_v4root_mutex); 7132 mtx_destroy(&nfsv4root_mnt.mnt_mtx); 7133 mtx_destroy(&nfsrv_dontlistlock_mtx); 7134 mtx_destroy(&nfsrv_recalllock_mtx); 7135 for (i = 0; i < nfsrv_sessionhashsize; i++) 7136 mtx_destroy(&nfssessionhash[i].mtx); 7137 if (nfslayouthash != NULL) { 7138 for (i = 0; i < nfsrv_layouthashsize; i++) 7139 mtx_destroy(&nfslayouthash[i].mtx); 7140 free(nfslayouthash, M_NFSDSESSION); 7141 } 7142 lockdestroy(&nfsv4root_mnt.mnt_explock); 7143 free(nfsclienthash, M_NFSDCLIENT); 7144 free(nfslockhash, M_NFSDLOCKFILE); 7145 free(nfssessionhash, M_NFSDSESSION); 7146 loaded = 0; 7147 break; 7148 default: 7149 error = EOPNOTSUPP; 7150 break; 7151 } 7152 7153 out: 7154 NFSEXITCODE(error); 7155 return (error); 7156 } 7157 static moduledata_t nfsd_mod = { 7158 "nfsd", 7159 nfsd_modevent, 7160 NULL, 7161 }; 7162 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); 7163 7164 /* So that loader and kldload(2) can find us, wherever we are.. */ 7165 MODULE_VERSION(nfsd, 1); 7166 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); 7167 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); 7168 MODULE_DEPEND(nfsd, krpc, 1, 1, 1); 7169 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); 7170