1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Bayard G. Bell. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 28 * All rights reserved. 29 */ 30 31 #include <sys/errno.h> 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/user.h> 35 #include <sys/stat.h> 36 #include <sys/time.h> 37 #include <sys/utsname.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/vnode.h> 41 #include <sys/pathname.h> 42 #include <sys/bootconf.h> 43 #include <fs/fs_subr.h> 44 #include <rpc/types.h> 45 #include <nfs/nfs.h> 46 #include <nfs/nfs4.h> 47 #include <nfs/nfs_clnt.h> 48 #include <nfs/rnode.h> 49 #include <nfs/mount.h> 50 #include <nfs/nfssys.h> 51 #include <sys/debug.h> 52 #include <sys/cmn_err.h> 53 #include <sys/file.h> 54 #include <sys/fcntl.h> 55 #include <sys/zone.h> 56 57 /* 58 * This is the loadable module wrapper. 59 */ 60 #include <sys/systm.h> 61 #include <sys/modctl.h> 62 #include <sys/syscall.h> 63 #include <sys/ddi.h> 64 65 #include <rpc/types.h> 66 #include <rpc/auth.h> 67 #include <rpc/clnt.h> 68 #include <rpc/svc.h> 69 70 /* 71 * The pseudo NFS filesystem to allow diskless booting to dynamically 72 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements 73 * the VFS_MOUNTROOT op and is only intended to be used by the 74 * diskless booting code until the real root filesystem is mounted. 75 * Nothing else should ever call this! 76 * 77 * The strategy is that if the initial rootfs type is set to "nfsdyn" 78 * by loadrootmodules() this filesystem is called to mount the 79 * root filesystem. It first attempts to mount a V4 filesystem, and if that 80 * fails due to an RPC version mismatch it tries V3 and finally V2. 81 * Once the real mount succeeds the vfsops and rootfs name are changed 82 * to reflect the real filesystem type. 83 */ 84 static int nfsdyninit(int, char *); 85 static int nfsdyn_mountroot(vfs_t *, whymountroot_t); 86 87 vfsops_t *nfsdyn_vfsops; 88 89 /* 90 * The following data structures are used to configure the NFS 91 * system call, the NFS Version 2 client VFS, and the NFS Version 92 * 3 client VFS into the system. The NFS Version 4 structures are defined in 93 * nfs4_common.c 94 */ 95 96 /* 97 * The NFS system call. 98 */ 99 static struct sysent nfssysent = { 100 2, 101 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 102 nfssys 103 }; 104 105 static struct modlsys modlsys = { 106 &mod_syscallops, 107 "NFS syscall, client, and common", 108 &nfssysent 109 }; 110 111 #ifdef _SYSCALL32_IMPL 112 static struct modlsys modlsys32 = { 113 &mod_syscallops32, 114 "NFS syscall, client, and common (32-bit)", 115 &nfssysent 116 }; 117 #endif /* _SYSCALL32_IMPL */ 118 119 /* 120 * The NFS Dynamic client VFS. 121 */ 122 static vfsdef_t vfw = { 123 VFSDEF_VERSION, 124 "nfsdyn", 125 nfsdyninit, 126 VSW_ZMOUNT, 127 NULL 128 }; 129 130 static struct modlfs modlfs = { 131 &mod_fsops, 132 "network filesystem", 133 &vfw 134 }; 135 136 /* 137 * The NFS Version 2 client VFS. 138 */ 139 static vfsdef_t vfw2 = { 140 VFSDEF_VERSION, 141 "nfs", 142 nfsinit, 143 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, 144 NULL 145 }; 146 147 static struct modlfs modlfs2 = { 148 &mod_fsops, 149 "network filesystem version 2", 150 &vfw2 151 }; 152 153 /* 154 * The NFS Version 3 client VFS. 155 */ 156 static vfsdef_t vfw3 = { 157 VFSDEF_VERSION, 158 "nfs3", 159 nfs3init, 160 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, 161 NULL 162 }; 163 164 static struct modlfs modlfs3 = { 165 &mod_fsops, 166 "network filesystem version 3", 167 &vfw3 168 }; 169 170 extern struct modlfs modlfs4; 171 172 /* 173 * We have too many linkage structures so we define our own XXX 174 */ 175 struct modlinkage_big { 176 int ml_rev; /* rev of loadable modules system */ 177 void *ml_linkage[7]; /* NULL terminated list of */ 178 /* linkage structures */ 179 }; 180 181 /* 182 * All of the module configuration linkages required to configure 183 * the system call and client VFS's into the system. 184 */ 185 static struct modlinkage_big modlinkage = { 186 MODREV_1, 187 &modlsys, 188 #ifdef _SYSCALL32_IMPL 189 &modlsys32, 190 #endif 191 &modlfs, 192 &modlfs2, 193 &modlfs3, 194 &modlfs4, 195 NULL 196 }; 197 198 /* 199 * This routine is invoked automatically when the kernel module 200 * containing this routine is loaded. This allows module specific 201 * initialization to be done when the module is loaded. 202 */ 203 int 204 _init(void) 205 { 206 int status; 207 208 if ((status = nfs_clntinit()) != 0) { 209 cmn_err(CE_WARN, "_init: nfs_clntinit failed"); 210 return (status); 211 } 212 213 /* 214 * Create the version specific kstats. 215 * 216 * PSARC 2001/697 Contract Private Interface 217 * All nfs kstats are under SunMC contract 218 * Please refer to the PSARC listed above and contact 219 * SunMC before making any changes! 220 * 221 * Changes must be reviewed by Solaris File Sharing 222 * Changes must be communicated to contract-2001-697@sun.com 223 * 224 */ 225 226 zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, 227 nfsstat_zone_fini); 228 status = mod_install((struct modlinkage *)&modlinkage); 229 230 if (status) { 231 (void) zone_key_delete(nfsstat_zone_key); 232 233 /* 234 * Failed to install module, cleanup previous 235 * initialization work. 236 */ 237 nfs_clntfini(); 238 239 /* 240 * Clean up work performed indirectly by mod_installfs() 241 * as a result of our call to mod_install(). 242 */ 243 nfs4fini(); 244 nfs3fini(); 245 nfsfini(); 246 } 247 return (status); 248 } 249 250 int 251 _fini(void) 252 { 253 /* Don't allow module to be unloaded */ 254 return (EBUSY); 255 } 256 257 int 258 _info(struct modinfo *modinfop) 259 { 260 return (mod_info((struct modlinkage *)&modlinkage, modinfop)); 261 } 262 263 /* 264 * General utilities 265 */ 266 267 /* 268 * Returns the preferred transfer size in bytes based on 269 * what network interfaces are available. 270 */ 271 int 272 nfstsize(void) 273 { 274 /* 275 * For the moment, just return NFS_MAXDATA until we can query the 276 * appropriate transport. 277 */ 278 return (NFS_MAXDATA); 279 } 280 281 /* 282 * Returns the preferred transfer size in bytes based on 283 * what network interfaces are available. 284 */ 285 286 /* this should reflect the largest transfer size possible */ 287 static int nfs3_max_transfer_size = 1024 * 1024; 288 289 int 290 nfs3tsize(void) 291 { 292 /* 293 * For the moment, just return nfs3_max_transfer_size until we 294 * can query the appropriate transport. 295 */ 296 return (nfs3_max_transfer_size); 297 } 298 299 static uint_t nfs3_max_transfer_size_clts = 32 * 1024; 300 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; 301 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; 302 303 uint_t 304 nfs3_tsize(struct knetconfig *knp) 305 { 306 307 if (knp->knc_semantics == NC_TPI_COTS_ORD || 308 knp->knc_semantics == NC_TPI_COTS) 309 return (nfs3_max_transfer_size_cots); 310 if (knp->knc_semantics == NC_TPI_RDMA) 311 return (nfs3_max_transfer_size_rdma); 312 return (nfs3_max_transfer_size_clts); 313 } 314 315 uint_t 316 rfs3_tsize(struct svc_req *req) 317 { 318 319 if (req->rq_xprt->xp_type == T_COTS_ORD || 320 req->rq_xprt->xp_type == T_COTS) 321 return (nfs3_max_transfer_size_cots); 322 if (req->rq_xprt->xp_type == T_RDMA) 323 return (nfs3_max_transfer_size_rdma); 324 return (nfs3_max_transfer_size_clts); 325 } 326 327 /* ARGSUSED */ 328 static int 329 nfsdyninit(int fstyp, char *name) 330 { 331 static const fs_operation_def_t nfsdyn_vfsops_template[] = { 332 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot }, 333 NULL, NULL 334 }; 335 int error; 336 337 error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); 338 if (error != 0) 339 return (error); 340 341 return (0); 342 } 343 344 /* ARGSUSED */ 345 static int 346 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) 347 { 348 char root_hostname[SYS_NMLN+1]; 349 struct servinfo *svp; 350 int error; 351 int vfsflags; 352 char *root_path; 353 struct pathname pn; 354 char *name; 355 static char token[10]; 356 struct nfs_args args; /* nfs mount arguments */ 357 358 bzero(&args, sizeof (args)); 359 360 /* do this BEFORE getfile which causes xid stamps to be initialized */ 361 clkset(-1L); /* hack for now - until we get time svc? */ 362 363 if (why == ROOT_REMOUNT) { 364 /* 365 * Shouldn't happen. 366 */ 367 panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); 368 } 369 370 if (why == ROOT_UNMOUNT) { 371 /* 372 * Nothing to do for NFS. 373 */ 374 return (0); 375 } 376 377 /* 378 * why == ROOT_INIT 379 */ 380 381 name = token; 382 *name = 0; 383 getfsname("root", name, sizeof (token)); 384 385 pn_alloc(&pn); 386 root_path = pn.pn_path; 387 388 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 389 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 390 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 391 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 392 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 393 394 /* 395 * First try version 4 396 */ 397 vfs_setops(vfsp, nfs4_vfsops); 398 args.addr = &svp->sv_addr; 399 args.fh = (char *)&svp->sv_fhandle; 400 args.knconf = svp->sv_knconf; 401 args.hostname = root_hostname; 402 vfsflags = 0; 403 404 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 405 &args, &vfsflags)) { 406 if (error != EPROTONOSUPPORT) { 407 nfs_cmn_err(error, CE_WARN, 408 "Unable to mount NFS root filesystem: %m"); 409 sv_free(svp); 410 pn_free(&pn); 411 vfs_setops(vfsp, nfsdyn_vfsops); 412 return (error); 413 } 414 415 /* 416 * Then try version 3 417 */ 418 bzero(&args, sizeof (args)); 419 vfs_setops(vfsp, nfs3_vfsops); 420 args.addr = &svp->sv_addr; 421 args.fh = (char *)&svp->sv_fhandle; 422 args.knconf = svp->sv_knconf; 423 args.hostname = root_hostname; 424 vfsflags = 0; 425 426 if (error = mount_root(*name ? name : "root", root_path, 427 NFS_V3, &args, &vfsflags)) { 428 if (error != EPROTONOSUPPORT) { 429 nfs_cmn_err(error, CE_WARN, 430 "Unable to mount NFS root filesystem: %m"); 431 sv_free(svp); 432 pn_free(&pn); 433 vfs_setops(vfsp, nfsdyn_vfsops); 434 return (error); 435 } 436 437 /* 438 * Finally, try version 2 439 */ 440 bzero(&args, sizeof (args)); 441 args.addr = &svp->sv_addr; 442 args.fh = (char *)&svp->sv_fhandle.fh_buf; 443 args.knconf = svp->sv_knconf; 444 args.hostname = root_hostname; 445 vfsflags = 0; 446 447 vfs_setops(vfsp, nfs_vfsops); 448 449 if (error = mount_root(*name ? name : "root", 450 root_path, NFS_VERSION, &args, &vfsflags)) { 451 nfs_cmn_err(error, CE_WARN, 452 "Unable to mount NFS root filesystem: %m"); 453 sv_free(svp); 454 pn_free(&pn); 455 vfs_setops(vfsp, nfsdyn_vfsops); 456 return (error); 457 } 458 } 459 } 460 461 sv_free(svp); 462 pn_free(&pn); 463 return (VFS_MOUNTROOT(vfsp, why)); 464 } 465 466 int 467 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) 468 { 469 mntinfo_t *mi; /* mount info, pointed at by vfs */ 470 STRUCT_HANDLE(nfs_args, args); 471 int flags; 472 473 #ifdef lint 474 model = model; 475 #endif 476 477 STRUCT_SET_HANDLE(args, model, buf); 478 479 flags = STRUCT_FGET(args, flags); 480 481 /* 482 * Set option fields in mount info record 483 */ 484 mi = VTOMI(vp); 485 486 if (flags & NFSMNT_NOAC) { 487 mi->mi_flags |= MI_NOAC; 488 PURGE_ATTRCACHE(vp); 489 } 490 if (flags & NFSMNT_NOCTO) 491 mi->mi_flags |= MI_NOCTO; 492 if (flags & NFSMNT_LLOCK) 493 mi->mi_flags |= MI_LLOCK; 494 if (flags & NFSMNT_GRPID) 495 mi->mi_flags |= MI_GRPID; 496 if (flags & NFSMNT_RETRANS) { 497 if (STRUCT_FGET(args, retrans) < 0) 498 return (EINVAL); 499 mi->mi_retrans = STRUCT_FGET(args, retrans); 500 } 501 if (flags & NFSMNT_TIMEO) { 502 if (STRUCT_FGET(args, timeo) <= 0) 503 return (EINVAL); 504 mi->mi_timeo = STRUCT_FGET(args, timeo); 505 /* 506 * The following scales the standard deviation and 507 * and current retransmission timer to match the 508 * initial value for the timeout specified. 509 */ 510 mi->mi_timers[NFS_CALLTYPES].rt_deviate = 511 (mi->mi_timeo * hz * 2) / 5; 512 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = 513 mi->mi_timeo * hz / 10; 514 } 515 if (flags & NFSMNT_RSIZE) { 516 if (STRUCT_FGET(args, rsize) <= 0) 517 return (EINVAL); 518 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); 519 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); 520 } 521 if (flags & NFSMNT_WSIZE) { 522 if (STRUCT_FGET(args, wsize) <= 0) 523 return (EINVAL); 524 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); 525 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); 526 } 527 if (flags & NFSMNT_ACREGMIN) { 528 if (STRUCT_FGET(args, acregmin) < 0) 529 mi->mi_acregmin = ACMINMAX; 530 else 531 mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), 532 ACMINMAX); 533 mi->mi_acregmin = SEC2HR(mi->mi_acregmin); 534 } 535 if (flags & NFSMNT_ACREGMAX) { 536 if (STRUCT_FGET(args, acregmax) < 0) 537 mi->mi_acregmax = ACMAXMAX; 538 else 539 mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), 540 ACMAXMAX); 541 mi->mi_acregmax = SEC2HR(mi->mi_acregmax); 542 } 543 if (flags & NFSMNT_ACDIRMIN) { 544 if (STRUCT_FGET(args, acdirmin) < 0) 545 mi->mi_acdirmin = ACMINMAX; 546 else 547 mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), 548 ACMINMAX); 549 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); 550 } 551 if (flags & NFSMNT_ACDIRMAX) { 552 if (STRUCT_FGET(args, acdirmax) < 0) 553 mi->mi_acdirmax = ACMAXMAX; 554 else 555 mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), 556 ACMAXMAX); 557 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); 558 } 559 560 if (flags & NFSMNT_LOOPBACK) 561 mi->mi_flags |= MI_LOOPBACK; 562 563 return (0); 564 } 565 566 /* 567 * Set or Clear direct I/O flag 568 * VOP_RWLOCK() is held for write access to prevent a race condition 569 * which would occur if a process is in the middle of a write when 570 * directio flag gets set. It is possible that all pages may not get flushed. 571 */ 572 573 /* ARGSUSED */ 574 int 575 nfs_directio(vnode_t *vp, int cmd, cred_t *cr) 576 { 577 int error = 0; 578 rnode_t *rp; 579 580 rp = VTOR(vp); 581 582 if (cmd == DIRECTIO_ON) { 583 584 if (rp->r_flags & RDIRECTIO) 585 return (0); 586 587 /* 588 * Flush the page cache. 589 */ 590 591 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 592 593 if (rp->r_flags & RDIRECTIO) { 594 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 595 return (0); 596 } 597 598 if (vn_has_cached_data(vp) && 599 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 600 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 601 B_INVAL, cr, NULL); 602 if (error) { 603 if (error == ENOSPC || error == EDQUOT) { 604 mutex_enter(&rp->r_statelock); 605 if (!rp->r_error) 606 rp->r_error = error; 607 mutex_exit(&rp->r_statelock); 608 } 609 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 610 return (error); 611 } 612 } 613 614 mutex_enter(&rp->r_statelock); 615 rp->r_flags |= RDIRECTIO; 616 mutex_exit(&rp->r_statelock); 617 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 618 return (0); 619 } 620 621 if (cmd == DIRECTIO_OFF) { 622 mutex_enter(&rp->r_statelock); 623 rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ 624 mutex_exit(&rp->r_statelock); 625 return (0); 626 } 627 628 return (EINVAL); 629 } 630