1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 28 * All rights reserved. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/errno.h> 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/user.h> 37 #include <sys/stat.h> 38 #include <sys/time.h> 39 #include <sys/utsname.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/pathname.h> 43 #include <sys/bootconf.h> 44 #include <fs/fs_subr.h> 45 #include <rpc/types.h> 46 #include <nfs/nfs.h> 47 #include <nfs/nfs4.h> 48 #include <nfs/nfs_clnt.h> 49 #include <nfs/rnode.h> 50 #include <nfs/mount.h> 51 #include <nfs/nfssys.h> 52 #include <sys/debug.h> 53 #include <sys/cmn_err.h> 54 #include <sys/file.h> 55 #include <sys/fcntl.h> 56 #include <sys/zone.h> 57 58 /* 59 * This is the loadable module wrapper. 60 */ 61 #include <sys/systm.h> 62 #include <sys/modctl.h> 63 #include <sys/syscall.h> 64 #include <sys/ddi.h> 65 66 #include <rpc/types.h> 67 #include <rpc/auth.h> 68 #include <rpc/clnt.h> 69 #include <rpc/svc.h> 70 71 /* 72 * The psuedo NFS filesystem to allow diskless booting to dynamically 73 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements 74 * the VFS_MOUNTROOT op and is only intended to be used by the 75 * diskless booting code until the real root filesystem is mounted. 76 * Nothing else should ever call this! 77 * 78 * The strategy is that if the initial rootfs type is set to "nfsdyn" 79 * by loadrootmodules() this filesystem is called to mount the 80 * root filesystem. It first attempts to mount a V4 filesystem, and if that 81 * fails due to an RPC version mismatch it tries V3 and finally V2. 82 * Once the real mount succeeds the vfsops and rootfs name are changed 83 * to reflect the real filesystem type. 84 */ 85 static int nfsdyninit(int, char *); 86 static int nfsdyn_mountroot(vfs_t *, whymountroot_t); 87 88 vfsops_t *nfsdyn_vfsops; 89 90 /* 91 * The following data structures are used to configure the NFS 92 * system call, the NFS Version 2 client VFS, and the NFS Version 93 * 3 client VFS into the system. The NFS Version 4 structures are defined in 94 * nfs4_common.c 95 */ 96 97 /* 98 * The NFS system call. 99 */ 100 static struct sysent nfssysent = { 101 2, 102 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 103 nfssys 104 }; 105 106 static struct modlsys modlsys = { 107 &mod_syscallops, 108 "NFS syscall, client, and common", 109 &nfssysent 110 }; 111 112 #ifdef _SYSCALL32_IMPL 113 static struct modlsys modlsys32 = { 114 &mod_syscallops32, 115 "NFS syscall, client, and common (32-bit)", 116 &nfssysent 117 }; 118 #endif /* _SYSCALL32_IMPL */ 119 120 /* 121 * The NFS Dynamic client VFS. 122 */ 123 static vfsdef_t vfw = { 124 VFSDEF_VERSION, 125 "nfsdyn", 126 nfsdyninit, 127 0, 128 NULL 129 }; 130 131 static struct modlfs modlfs = { 132 &mod_fsops, 133 "network filesystem", 134 &vfw 135 }; 136 137 /* 138 * The NFS Version 2 client VFS. 139 */ 140 static vfsdef_t vfw2 = { 141 VFSDEF_VERSION, 142 "nfs", 143 nfsinit, 144 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 145 NULL 146 }; 147 148 static struct modlfs modlfs2 = { 149 &mod_fsops, 150 "network filesystem version 2", 151 &vfw2 152 }; 153 154 /* 155 * The NFS Version 3 client VFS. 156 */ 157 static vfsdef_t vfw3 = { 158 VFSDEF_VERSION, 159 "nfs3", 160 nfs3init, 161 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 162 NULL 163 }; 164 165 static struct modlfs modlfs3 = { 166 &mod_fsops, 167 "network filesystem version 3", 168 &vfw3 169 }; 170 171 extern struct modlfs modlfs4; 172 173 /* 174 * We have too many linkage structures so we define our own XXX 175 */ 176 struct modlinkage_big { 177 int ml_rev; /* rev of loadable modules system */ 178 void *ml_linkage[7]; /* NULL terminated list of */ 179 /* linkage structures */ 180 }; 181 182 /* 183 * All of the module configuration linkages required to configure 184 * the system call and client VFS's into the system. 185 */ 186 static struct modlinkage_big modlinkage = { 187 MODREV_1, 188 &modlsys, 189 #ifdef _SYSCALL32_IMPL 190 &modlsys32, 191 #endif 192 &modlfs, 193 &modlfs2, 194 &modlfs3, 195 &modlfs4, 196 NULL 197 }; 198 199 /* 200 * specfs - for getfsname only?? 201 * rpcmod - too many symbols to build stubs for them all 202 */ 203 char _depends_on[] = "fs/specfs strmod/rpcmod misc/rpcsec"; 204 205 /* 206 * This routine is invoked automatically when the kernel module 207 * containing this routine is loaded. This allows module specific 208 * initialization to be done when the module is loaded. 209 */ 210 int 211 _init(void) 212 { 213 int status; 214 215 if ((status = nfs_clntinit()) != 0) { 216 cmn_err(CE_WARN, "_init: nfs_clntinit failed"); 217 return (status); 218 } 219 220 /* 221 * Create the version specific kstats. 222 * 223 * PSARC 2001/697 Contract Private Interface 224 * All nfs kstats are under SunMC contract 225 * Please refer to the PSARC listed above and contact 226 * SunMC before making any changes! 227 * 228 * Changes must be reviewed by Solaris File Sharing 229 * Changes must be communicated to contract-2001-697@sun.com 230 * 231 */ 232 233 zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, 234 nfsstat_zone_fini); 235 status = mod_install((struct modlinkage *)&modlinkage); 236 237 if (status) { 238 (void) zone_key_delete(nfsstat_zone_key); 239 240 /* 241 * Failed to install module, cleanup previous 242 * initialization work. 243 */ 244 nfs_clntfini(); 245 246 /* 247 * Clean up work performed indirectly by mod_installfs() 248 * as a result of our call to mod_install(). 249 */ 250 nfs4fini(); 251 nfs3fini(); 252 nfsfini(); 253 } 254 return (status); 255 } 256 257 int 258 _fini(void) 259 { 260 /* Don't allow module to be unloaded */ 261 return (EBUSY); 262 } 263 264 int 265 _info(struct modinfo *modinfop) 266 { 267 return (mod_info((struct modlinkage *)&modlinkage, modinfop)); 268 } 269 270 /* 271 * General utilities 272 */ 273 274 /* 275 * Returns the prefered transfer size in bytes based on 276 * what network interfaces are available. 277 */ 278 int 279 nfstsize(void) 280 { 281 /* 282 * For the moment, just return NFS_MAXDATA until we can query the 283 * appropriate transport. 284 */ 285 return (NFS_MAXDATA); 286 } 287 288 /* 289 * Returns the prefered transfer size in bytes based on 290 * what network interfaces are available. 291 */ 292 293 /* this should reflect the largest transfer size possible */ 294 static int nfs3_max_transfer_size = 1024 * 1024; 295 296 int 297 nfs3tsize(void) 298 { 299 /* 300 * For the moment, just return nfs3_max_transfer_size until we 301 * can query the appropriate transport. 302 */ 303 return (nfs3_max_transfer_size); 304 } 305 306 static uint_t nfs3_max_transfer_size_clts = 32 * 1024; 307 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; 308 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; 309 310 uint_t 311 nfs3_tsize(struct knetconfig *knp) 312 { 313 314 if (knp->knc_semantics == NC_TPI_COTS_ORD || 315 knp->knc_semantics == NC_TPI_COTS) 316 return (nfs3_max_transfer_size_cots); 317 if (knp->knc_semantics == NC_TPI_RDMA) 318 return (nfs3_max_transfer_size_rdma); 319 return (nfs3_max_transfer_size_clts); 320 } 321 322 uint_t 323 rfs3_tsize(struct svc_req *req) 324 { 325 326 if (req->rq_xprt->xp_type == T_COTS_ORD || 327 req->rq_xprt->xp_type == T_COTS) 328 return (nfs3_max_transfer_size_cots); 329 if (req->rq_xprt->xp_type == T_RDMA) 330 return (nfs3_max_transfer_size_rdma); 331 return (nfs3_max_transfer_size_clts); 332 } 333 334 /* ARGSUSED */ 335 static int 336 nfsdyninit(int fstyp, char *name) 337 { 338 static const fs_operation_def_t nfsdyn_vfsops_template[] = { 339 VFSNAME_MOUNTROOT, nfsdyn_mountroot, 340 NULL, NULL 341 }; 342 int error; 343 344 error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); 345 if (error != 0) 346 return (error); 347 348 return (0); 349 } 350 351 /* ARGSUSED */ 352 static int 353 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) 354 { 355 char root_hostname[SYS_NMLN+1]; 356 struct servinfo *svp; 357 int error; 358 int vfsflags; 359 char *root_path; 360 struct pathname pn; 361 char *name; 362 static char token[10]; 363 struct nfs_args args; /* nfs mount arguments */ 364 365 bzero(&args, sizeof (args)); 366 367 /* do this BEFORE getfile which causes xid stamps to be initialized */ 368 clkset(-1L); /* hack for now - until we get time svc? */ 369 370 if (why == ROOT_REMOUNT) { 371 /* 372 * Shouldn't happen. 373 */ 374 panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); 375 } 376 377 if (why == ROOT_UNMOUNT) { 378 /* 379 * Nothing to do for NFS. 380 */ 381 return (0); 382 } 383 384 /* 385 * why == ROOT_INIT 386 */ 387 388 name = token; 389 *name = 0; 390 getfsname("root", name, sizeof (token)); 391 392 pn_alloc(&pn); 393 root_path = pn.pn_path; 394 395 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 396 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 397 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 398 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 399 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 400 401 /* 402 * First try version 4 403 */ 404 vfs_setops(vfsp, nfs4_vfsops); 405 args.addr = &svp->sv_addr; 406 args.fh = (char *)&svp->sv_fhandle; 407 args.knconf = svp->sv_knconf; 408 args.hostname = root_hostname; 409 vfsflags = 0; 410 411 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 412 &args, &vfsflags)) { 413 if (error != EPROTONOSUPPORT) { 414 nfs_cmn_err(error, CE_WARN, 415 "Unable to mount NFS root filesystem: %m"); 416 sv_free(svp); 417 pn_free(&pn); 418 vfs_setops(vfsp, nfsdyn_vfsops); 419 return (error); 420 } 421 422 /* 423 * Then try version 3 424 */ 425 bzero(&args, sizeof (args)); 426 vfs_setops(vfsp, nfs3_vfsops); 427 args.addr = &svp->sv_addr; 428 args.fh = (char *)&svp->sv_fhandle; 429 args.knconf = svp->sv_knconf; 430 args.hostname = root_hostname; 431 vfsflags = 0; 432 433 if (error = mount_root(*name ? name : "root", root_path, 434 NFS_V3, &args, &vfsflags)) { 435 if (error != EPROTONOSUPPORT) { 436 nfs_cmn_err(error, CE_WARN, 437 "Unable to mount NFS root filesystem: %m"); 438 sv_free(svp); 439 pn_free(&pn); 440 vfs_setops(vfsp, nfsdyn_vfsops); 441 return (error); 442 } 443 444 /* 445 * Finally, try version 2 446 */ 447 bzero(&args, sizeof (args)); 448 args.addr = &svp->sv_addr; 449 args.fh = (char *)&svp->sv_fhandle.fh_buf; 450 args.knconf = svp->sv_knconf; 451 args.hostname = root_hostname; 452 vfsflags = 0; 453 454 vfs_setops(vfsp, nfs_vfsops); 455 456 if (error = mount_root(*name ? name : "root", 457 root_path, NFS_VERSION, &args, 458 &vfsflags)) { 459 nfs_cmn_err(error, CE_WARN, 460 "Unable to mount NFS root filesystem: %m"); 461 sv_free(svp); 462 pn_free(&pn); 463 vfs_setops(vfsp, nfsdyn_vfsops); 464 return (error); 465 } 466 } 467 } 468 469 sv_free(svp); 470 pn_free(&pn); 471 return (VFS_MOUNTROOT(vfsp, why)); 472 } 473 474 int 475 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) 476 { 477 mntinfo_t *mi; /* mount info, pointed at by vfs */ 478 STRUCT_HANDLE(nfs_args, args); 479 int flags; 480 481 #ifdef lint 482 model = model; 483 #endif 484 485 STRUCT_SET_HANDLE(args, model, buf); 486 487 flags = STRUCT_FGET(args, flags); 488 489 /* 490 * Set option fields in mount info record 491 */ 492 mi = VTOMI(vp); 493 494 if (flags & NFSMNT_NOAC) { 495 mi->mi_flags |= MI_NOAC; 496 PURGE_ATTRCACHE(vp); 497 } 498 if (flags & NFSMNT_NOCTO) 499 mi->mi_flags |= MI_NOCTO; 500 if (flags & NFSMNT_LLOCK) 501 mi->mi_flags |= MI_LLOCK; 502 if (flags & NFSMNT_GRPID) 503 mi->mi_flags |= MI_GRPID; 504 if (flags & NFSMNT_RETRANS) { 505 if (STRUCT_FGET(args, retrans) < 0) 506 return (EINVAL); 507 mi->mi_retrans = STRUCT_FGET(args, retrans); 508 } 509 if (flags & NFSMNT_TIMEO) { 510 if (STRUCT_FGET(args, timeo) <= 0) 511 return (EINVAL); 512 mi->mi_timeo = STRUCT_FGET(args, timeo); 513 /* 514 * The following scales the standard deviation and 515 * and current retransmission timer to match the 516 * initial value for the timeout specified. 517 */ 518 mi->mi_timers[NFS_CALLTYPES].rt_deviate = 519 (mi->mi_timeo * hz * 2) / 5; 520 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = 521 mi->mi_timeo * hz / 10; 522 } 523 if (flags & NFSMNT_RSIZE) { 524 if (STRUCT_FGET(args, rsize) <= 0) 525 return (EINVAL); 526 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); 527 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); 528 } 529 if (flags & NFSMNT_WSIZE) { 530 if (STRUCT_FGET(args, wsize) <= 0) 531 return (EINVAL); 532 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); 533 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); 534 } 535 if (flags & NFSMNT_ACREGMIN) { 536 if (STRUCT_FGET(args, acregmin) < 0) 537 mi->mi_acregmin = ACMINMAX; 538 else 539 mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), 540 ACMINMAX); 541 mi->mi_acregmin = SEC2HR(mi->mi_acregmin); 542 } 543 if (flags & NFSMNT_ACREGMAX) { 544 if (STRUCT_FGET(args, acregmax) < 0) 545 mi->mi_acregmax = ACMAXMAX; 546 else 547 mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), 548 ACMAXMAX); 549 mi->mi_acregmax = SEC2HR(mi->mi_acregmax); 550 } 551 if (flags & NFSMNT_ACDIRMIN) { 552 if (STRUCT_FGET(args, acdirmin) < 0) 553 mi->mi_acdirmin = ACMINMAX; 554 else 555 mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), 556 ACMINMAX); 557 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); 558 } 559 if (flags & NFSMNT_ACDIRMAX) { 560 if (STRUCT_FGET(args, acdirmax) < 0) 561 mi->mi_acdirmax = ACMAXMAX; 562 else 563 mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), 564 ACMAXMAX); 565 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); 566 } 567 568 if (flags & NFSMNT_LOOPBACK) 569 mi->mi_flags |= MI_LOOPBACK; 570 571 return (0); 572 } 573 574 /* 575 * Set or Clear direct I/O flag 576 * VOP_RWLOCK() is held for write access to prevent a race condition 577 * which would occur if a process is in the middle of a write when 578 * directio flag gets set. It is possible that all pages may not get flushed. 579 */ 580 581 /* ARGSUSED */ 582 int 583 nfs_directio(vnode_t *vp, int cmd, cred_t *cr) 584 { 585 int error = 0; 586 rnode_t *rp; 587 588 rp = VTOR(vp); 589 590 if (cmd == DIRECTIO_ON) { 591 592 if (rp->r_flags & RDIRECTIO) 593 return (0); 594 595 /* 596 * Flush the page cache. 597 */ 598 599 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 600 601 if (rp->r_flags & RDIRECTIO) { 602 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 603 return (0); 604 } 605 606 if (vn_has_cached_data(vp) && 607 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 608 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 609 B_INVAL, cr); 610 if (error) { 611 if (error == ENOSPC || error == EDQUOT) { 612 mutex_enter(&rp->r_statelock); 613 if (!rp->r_error) 614 rp->r_error = error; 615 mutex_exit(&rp->r_statelock); 616 } 617 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 618 return (error); 619 } 620 } 621 622 mutex_enter(&rp->r_statelock); 623 rp->r_flags |= RDIRECTIO; 624 mutex_exit(&rp->r_statelock); 625 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 626 return (0); 627 } 628 629 if (cmd == DIRECTIO_OFF) { 630 mutex_enter(&rp->r_statelock); 631 rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ 632 mutex_exit(&rp->r_statelock); 633 return (0); 634 } 635 636 return (EINVAL); 637 } 638