1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 /* 38 * Functions that need to be different for different versions of BSD 39 * kernel should be kept here, along with any global storage specific 40 * to this BSD variant. 41 */ 42 #include <fs/nfs/nfsport.h> 43 #include <sys/sysctl.h> 44 #include <vm/vm.h> 45 #include <vm/vm_object.h> 46 #include <vm/vm_page.h> 47 #include <vm/vm_param.h> 48 #include <vm/vm_map.h> 49 #include <vm/vm_kern.h> 50 #include <vm/vm_extern.h> 51 #include <vm/uma.h> 52 #include <vm/uma_int.h> 53 54 extern int nfscl_ticks; 55 extern int nfsrv_nfsuserd; 56 extern struct nfssockreq nfsrv_nfsuserdsock; 57 extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, 58 struct thread *); 59 extern int nfsrv_useacl; 60 struct mount nfsv4root_mnt; 61 int newnfs_numnfsd = 0; 62 struct nfsstats newnfsstats; 63 int nfs_numnfscbd = 0; 64 char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; 65 struct callout newnfsd_callout; 66 void (*nfsd_call_servertimer)(void) = NULL; 67 void (*ncl_call_invalcaches)(struct vnode *) = NULL; 68 69 static int nfs_realign_test; 70 static int nfs_realign_count; 71 72 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "New NFS filesystem"); 73 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 74 0, "Number of realign tests done"); 75 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 76 0, "Number of mbuf realignments done"); 77 SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW, 78 nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr), 79 "NFSv4 callback addr for server to use"); 80 81 /* 82 * Defines for malloc 83 * (Here for FreeBSD, since they allocate storage.) 84 */ 85 MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache"); 86 MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id"); 87 MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state", 88 "NFSD V4 State (Openowner, Open, Lockowner, Delegation"); 89 MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock"); 90 MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file"); 91 MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string"); 92 MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map"); 93 MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header"); 94 MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle"); 95 MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner"); 96 MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open"); 97 MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation"); 98 MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client"); 99 MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner"); 100 MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock"); 101 MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "New nfs vnode"); 102 MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "New nfs Direct IO buffer"); 103 MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff", 104 "New NFS directory offset data"); 105 MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", 106 "New NFS local lock rollback"); 107 108 /* 109 * Definition of mutex locks. 110 * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list 111 * and assorted other nfsd structures. 112 * Giant is used to protect the nfsd list and count, which is just 113 * updated when nfsd's start/stop and is grabbed for nfsrvd_dorpc() 114 * for the VFS ops. 115 */ 116 struct mtx newnfsd_mtx; 117 struct mtx nfs_sockl_mutex; 118 struct mtx nfs_state_mutex; 119 struct mtx nfs_nameid_mutex; 120 struct mtx nfs_req_mutex; 121 struct mtx nfs_slock_mutex; 122 123 /* local functions */ 124 static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *); 125 126 #ifdef __NO_STRICT_ALIGNMENT 127 /* 128 * These architectures don't need re-alignment, so just return. 129 */ 130 void 131 newnfs_realign(struct mbuf **pm) 132 { 133 134 return; 135 } 136 #else /* !__NO_STRICT_ALIGNMENT */ 137 /* 138 * newnfs_realign: 139 * 140 * Check for badly aligned mbuf data and realign by copying the unaligned 141 * portion of the data into a new mbuf chain and freeing the portions 142 * of the old chain that were replaced. 143 * 144 * We cannot simply realign the data within the existing mbuf chain 145 * because the underlying buffers may contain other rpc commands and 146 * we cannot afford to overwrite them. 147 * 148 * We would prefer to avoid this situation entirely. The situation does 149 * not occur with NFS/UDP and is supposed to only occassionally occur 150 * with TCP. Use vfs.nfs.realign_count and realign_test to check this. 151 * 152 */ 153 void 154 newnfs_realign(struct mbuf **pm) 155 { 156 struct mbuf *m, *n; 157 int off, space; 158 159 ++nfs_realign_test; 160 while ((m = *pm) != NULL) { 161 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { 162 /* 163 * NB: we can't depend on m_pkthdr.len to help us 164 * decide what to do here. May not be worth doing 165 * the m_length calculation as m_copyback will 166 * expand the mbuf chain below as needed. 167 */ 168 space = m_length(m, NULL); 169 if (space >= MINCLSIZE) { 170 /* NB: m_copyback handles space > MCLBYTES */ 171 n = m_getcl(M_WAITOK, MT_DATA, 0); 172 } else 173 n = m_get(M_WAITOK, MT_DATA); 174 if (n == NULL) 175 return; 176 /* 177 * Align the remainder of the mbuf chain. 178 */ 179 n->m_len = 0; 180 off = 0; 181 while (m != NULL) { 182 m_copyback(n, off, m->m_len, mtod(m, caddr_t)); 183 off += m->m_len; 184 m = m->m_next; 185 } 186 m_freem(*pm); 187 *pm = n; 188 ++nfs_realign_count; 189 break; 190 } 191 pm = &m->m_next; 192 } 193 } 194 #endif /* __NO_STRICT_ALIGNMENT */ 195 196 #ifdef notdef 197 static void 198 nfsrv_object_create(struct vnode *vp, struct thread *td) 199 { 200 201 if (vp == NULL || vp->v_type != VREG) 202 return; 203 (void) vfs_object_create(vp, td, td->td_ucred); 204 } 205 #endif 206 207 /* 208 * Look up a file name. Basically just initialize stuff and call namei(). 209 */ 210 int 211 nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p) 212 { 213 int error; 214 215 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, fname, 216 p); 217 error = namei(ndp); 218 if (!error) { 219 NDFREE(ndp, NDF_ONLY_PNBUF); 220 } 221 return (error); 222 } 223 224 /* 225 * Copy NFS uid, gids to the cred structure. 226 */ 227 void 228 newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) 229 { 230 231 KASSERT(nfscr->nfsc_ngroups >= 0, 232 ("newnfs_copycred: negative nfsc_ngroups")); 233 cr->cr_uid = nfscr->nfsc_uid; 234 crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); 235 } 236 237 /* 238 * Map args from nfsmsleep() to msleep(). 239 */ 240 int 241 nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg, 242 struct timespec *ts) 243 { 244 u_int64_t nsecval; 245 int error, timeo; 246 247 if (ts) { 248 timeo = hz * ts->tv_sec; 249 nsecval = (u_int64_t)ts->tv_nsec; 250 nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) / 251 1000000000; 252 timeo += (int)nsecval; 253 } else { 254 timeo = 0; 255 } 256 error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo); 257 return (error); 258 } 259 260 /* 261 * Get the file system info for the server. For now, just assume FFS. 262 */ 263 void 264 nfsvno_getfs(struct nfsfsinfo *sip, int isdgram) 265 { 266 int pref; 267 268 /* 269 * XXX 270 * There should be file system VFS OP(s) to get this information. 271 * For now, assume ufs. 272 */ 273 if (isdgram) 274 pref = NFS_MAXDGRAMDATA; 275 else 276 pref = NFS_MAXDATA; 277 sip->fs_rtmax = NFS_MAXDATA; 278 sip->fs_rtpref = pref; 279 sip->fs_rtmult = NFS_FABLKSIZE; 280 sip->fs_wtmax = NFS_MAXDATA; 281 sip->fs_wtpref = pref; 282 sip->fs_wtmult = NFS_FABLKSIZE; 283 sip->fs_dtpref = pref; 284 sip->fs_maxfilesize = 0xffffffffffffffffull; 285 sip->fs_timedelta.tv_sec = 0; 286 sip->fs_timedelta.tv_nsec = 1; 287 sip->fs_properties = (NFSV3FSINFO_LINK | 288 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | 289 NFSV3FSINFO_CANSETTIME); 290 } 291 292 /* Fake nfsrv_atroot. Just return 0 */ 293 int 294 nfsrv_atroot(struct vnode *vp, long *retp) 295 { 296 297 return (0); 298 } 299 300 /* 301 * Set the credentials to refer to root. 302 * If only the various BSDen could agree on whether cr_gid is a separate 303 * field or cr_groups[0]... 304 */ 305 void 306 newnfs_setroot(struct ucred *cred) 307 { 308 309 cred->cr_uid = 0; 310 cred->cr_groups[0] = 0; 311 cred->cr_ngroups = 1; 312 } 313 314 /* 315 * Get the client credential. Used for Renew and recovery. 316 */ 317 struct ucred * 318 newnfs_getcred(void) 319 { 320 struct ucred *cred; 321 struct thread *td = curthread; 322 323 cred = crdup(td->td_ucred); 324 newnfs_setroot(cred); 325 return (cred); 326 } 327 328 /* 329 * Nfs timer routine 330 * Call the nfsd's timer function once/sec. 331 */ 332 void 333 newnfs_timer(void *arg) 334 { 335 static time_t lasttime = 0; 336 /* 337 * Call the server timer, if set up. 338 * The argument indicates if it is the next second and therefore 339 * leases should be checked. 340 */ 341 if (lasttime != NFSD_MONOSEC) { 342 lasttime = NFSD_MONOSEC; 343 if (nfsd_call_servertimer != NULL) 344 (*nfsd_call_servertimer)(); 345 } 346 callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL); 347 } 348 349 350 /* 351 * Sleep for a short period of time unless errval == NFSERR_GRACE, where 352 * the sleep should be for 5 seconds. 353 * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on 354 * an event that never gets a wakeup. Only return EINTR or 0. 355 */ 356 int 357 nfs_catnap(int prio, int errval, const char *wmesg) 358 { 359 static int non_event; 360 int ret; 361 362 if (errval == NFSERR_GRACE) 363 ret = tsleep(&non_event, prio, wmesg, 5 * hz); 364 else 365 ret = tsleep(&non_event, prio, wmesg, 1); 366 if (ret != EINTR) 367 ret = 0; 368 return (ret); 369 } 370 371 /* 372 * Get referral. For now, just fail. 373 */ 374 struct nfsreferral * 375 nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno) 376 { 377 378 return (NULL); 379 } 380 381 static int 382 nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) 383 { 384 int error; 385 386 error = nfssvc_call(td, uap, td->td_ucred); 387 return (error); 388 } 389 390 static int 391 nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) 392 { 393 int error = EINVAL; 394 struct nfsd_idargs nid; 395 396 if (uap->flag & NFSSVC_IDNAME) { 397 error = copyin(uap->argp, (caddr_t)&nid, sizeof (nid)); 398 if (error) 399 return (error); 400 error = nfssvc_idname(&nid); 401 return (error); 402 } else if (uap->flag & NFSSVC_GETSTATS) { 403 error = copyout(&newnfsstats, 404 CAST_USER_ADDR_T(uap->argp), sizeof (newnfsstats)); 405 if (error == 0) { 406 if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { 407 newnfsstats.attrcache_hits = 0; 408 newnfsstats.attrcache_misses = 0; 409 newnfsstats.lookupcache_hits = 0; 410 newnfsstats.lookupcache_misses = 0; 411 newnfsstats.direofcache_hits = 0; 412 newnfsstats.direofcache_misses = 0; 413 newnfsstats.accesscache_hits = 0; 414 newnfsstats.accesscache_misses = 0; 415 newnfsstats.biocache_reads = 0; 416 newnfsstats.read_bios = 0; 417 newnfsstats.read_physios = 0; 418 newnfsstats.biocache_writes = 0; 419 newnfsstats.write_bios = 0; 420 newnfsstats.write_physios = 0; 421 newnfsstats.biocache_readlinks = 0; 422 newnfsstats.readlink_bios = 0; 423 newnfsstats.biocache_readdirs = 0; 424 newnfsstats.readdir_bios = 0; 425 newnfsstats.rpcretries = 0; 426 newnfsstats.rpcrequests = 0; 427 newnfsstats.rpctimeouts = 0; 428 newnfsstats.rpcunexpected = 0; 429 newnfsstats.rpcinvalid = 0; 430 bzero(newnfsstats.rpccnt, 431 sizeof(newnfsstats.rpccnt)); 432 } 433 if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { 434 newnfsstats.srvrpc_errs = 0; 435 newnfsstats.srv_errs = 0; 436 newnfsstats.srvcache_inproghits = 0; 437 newnfsstats.srvcache_idemdonehits = 0; 438 newnfsstats.srvcache_nonidemdonehits = 0; 439 newnfsstats.srvcache_misses = 0; 440 newnfsstats.srvcache_tcppeak = 0; 441 newnfsstats.srvcache_size = 0; 442 newnfsstats.srvclients = 0; 443 newnfsstats.srvopenowners = 0; 444 newnfsstats.srvopens = 0; 445 newnfsstats.srvlockowners = 0; 446 newnfsstats.srvlocks = 0; 447 newnfsstats.srvdelegates = 0; 448 newnfsstats.clopenowners = 0; 449 newnfsstats.clopens = 0; 450 newnfsstats.cllockowners = 0; 451 newnfsstats.cllocks = 0; 452 newnfsstats.cldelegates = 0; 453 newnfsstats.cllocalopenowners = 0; 454 newnfsstats.cllocalopens = 0; 455 newnfsstats.cllocallockowners = 0; 456 newnfsstats.cllocallocks = 0; 457 bzero(newnfsstats.srvrpccnt, 458 sizeof(newnfsstats.srvrpccnt)); 459 bzero(newnfsstats.cbrpccnt, 460 sizeof(newnfsstats.cbrpccnt)); 461 } 462 } 463 return (error); 464 } else if (uap->flag & NFSSVC_NFSUSERDPORT) { 465 u_short sockport; 466 467 error = copyin(uap->argp, (caddr_t)&sockport, 468 sizeof (u_short)); 469 if (!error) 470 error = nfsrv_nfsuserdport(sockport, p); 471 } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { 472 nfsrv_nfsuserddelport(); 473 error = 0; 474 } 475 return (error); 476 } 477 478 /* 479 * called by all three modevent routines, so that it gets things 480 * initialized soon enough. 481 */ 482 void 483 newnfs_portinit(void) 484 { 485 static int inited = 0; 486 487 if (inited) 488 return; 489 inited = 1; 490 /* Initialize SMP locks used by both client and server. */ 491 mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF); 492 mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF); 493 } 494 495 /* 496 * Determine if the file system supports NFSv4 ACLs. 497 * Return 1 if it does, 0 otherwise. 498 */ 499 int 500 nfs_supportsnfsv4acls(struct vnode *vp) 501 { 502 int error; 503 register_t retval; 504 505 ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls"); 506 507 if (nfsrv_useacl == 0) 508 return (0); 509 error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval); 510 if (error == 0 && retval != 0) 511 return (1); 512 return (0); 513 } 514 515 extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *); 516 517 /* 518 * Called once to initialize data structures... 519 */ 520 static int 521 nfscommon_modevent(module_t mod, int type, void *data) 522 { 523 int error = 0; 524 static int loaded = 0; 525 526 switch (type) { 527 case MOD_LOAD: 528 if (loaded) 529 return (0); 530 newnfs_portinit(); 531 mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF); 532 mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF); 533 mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF); 534 mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF); 535 mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL, 536 MTX_DEF); 537 callout_init(&newnfsd_callout, CALLOUT_MPSAFE); 538 newnfs_init(); 539 nfsd_call_nfscommon = nfssvc_nfscommon; 540 loaded = 1; 541 break; 542 543 case MOD_UNLOAD: 544 if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != 0 || 545 nfs_numnfscbd != 0) { 546 error = EBUSY; 547 break; 548 } 549 550 nfsd_call_nfscommon = NULL; 551 callout_drain(&newnfsd_callout); 552 /* and get rid of the mutexes */ 553 mtx_destroy(&nfs_nameid_mutex); 554 mtx_destroy(&newnfsd_mtx); 555 mtx_destroy(&nfs_state_mutex); 556 mtx_destroy(&nfs_sockl_mutex); 557 mtx_destroy(&nfs_slock_mutex); 558 mtx_destroy(&nfs_req_mutex); 559 mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx); 560 loaded = 0; 561 break; 562 default: 563 error = EOPNOTSUPP; 564 break; 565 } 566 return error; 567 } 568 static moduledata_t nfscommon_mod = { 569 "nfscommon", 570 nfscommon_modevent, 571 NULL, 572 }; 573 DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY); 574 575 /* So that loader and kldload(2) can find us, wherever we are.. */ 576 MODULE_VERSION(nfscommon, 1); 577 MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1); 578 MODULE_DEPEND(nfscommon, krpc, 1, 1, 1); 579 580