1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 28 * All rights reserved. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/errno.h> 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/user.h> 37 #include <sys/stat.h> 38 #include <sys/time.h> 39 #include <sys/utsname.h> 40 #include <sys/vfs.h> 41 #include <sys/vfs_opreg.h> 42 #include <sys/vnode.h> 43 #include <sys/pathname.h> 44 #include <sys/bootconf.h> 45 #include <fs/fs_subr.h> 46 #include <rpc/types.h> 47 #include <nfs/nfs.h> 48 #include <nfs/nfs4.h> 49 #include <nfs/nfs_clnt.h> 50 #include <nfs/rnode.h> 51 #include <nfs/mount.h> 52 #include <nfs/nfssys.h> 53 #include <sys/debug.h> 54 #include <sys/cmn_err.h> 55 #include <sys/file.h> 56 #include <sys/fcntl.h> 57 #include <sys/zone.h> 58 59 /* 60 * This is the loadable module wrapper. 61 */ 62 #include <sys/systm.h> 63 #include <sys/modctl.h> 64 #include <sys/syscall.h> 65 #include <sys/ddi.h> 66 67 #include <rpc/types.h> 68 #include <rpc/auth.h> 69 #include <rpc/clnt.h> 70 #include <rpc/svc.h> 71 72 /* 73 * The psuedo NFS filesystem to allow diskless booting to dynamically 74 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements 75 * the VFS_MOUNTROOT op and is only intended to be used by the 76 * diskless booting code until the real root filesystem is mounted. 77 * Nothing else should ever call this! 78 * 79 * The strategy is that if the initial rootfs type is set to "nfsdyn" 80 * by loadrootmodules() this filesystem is called to mount the 81 * root filesystem. It first attempts to mount a V4 filesystem, and if that 82 * fails due to an RPC version mismatch it tries V3 and finally V2. 83 * Once the real mount succeeds the vfsops and rootfs name are changed 84 * to reflect the real filesystem type. 85 */ 86 static int nfsdyninit(int, char *); 87 static int nfsdyn_mountroot(vfs_t *, whymountroot_t); 88 89 vfsops_t *nfsdyn_vfsops; 90 91 /* 92 * The following data structures are used to configure the NFS 93 * system call, the NFS Version 2 client VFS, and the NFS Version 94 * 3 client VFS into the system. The NFS Version 4 structures are defined in 95 * nfs4_common.c 96 */ 97 98 /* 99 * The NFS system call. 100 */ 101 static struct sysent nfssysent = { 102 2, 103 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 104 nfssys 105 }; 106 107 static struct modlsys modlsys = { 108 &mod_syscallops, 109 "NFS syscall, client, and common", 110 &nfssysent 111 }; 112 113 #ifdef _SYSCALL32_IMPL 114 static struct modlsys modlsys32 = { 115 &mod_syscallops32, 116 "NFS syscall, client, and common (32-bit)", 117 &nfssysent 118 }; 119 #endif /* _SYSCALL32_IMPL */ 120 121 /* 122 * The NFS Dynamic client VFS. 123 */ 124 static vfsdef_t vfw = { 125 VFSDEF_VERSION, 126 "nfsdyn", 127 nfsdyninit, 128 0, 129 NULL 130 }; 131 132 static struct modlfs modlfs = { 133 &mod_fsops, 134 "network filesystem", 135 &vfw 136 }; 137 138 /* 139 * The NFS Version 2 client VFS. 140 */ 141 static vfsdef_t vfw2 = { 142 VFSDEF_VERSION, 143 "nfs", 144 nfsinit, 145 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 146 NULL 147 }; 148 149 static struct modlfs modlfs2 = { 150 &mod_fsops, 151 "network filesystem version 2", 152 &vfw2 153 }; 154 155 /* 156 * The NFS Version 3 client VFS. 157 */ 158 static vfsdef_t vfw3 = { 159 VFSDEF_VERSION, 160 "nfs3", 161 nfs3init, 162 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, 163 NULL 164 }; 165 166 static struct modlfs modlfs3 = { 167 &mod_fsops, 168 "network filesystem version 3", 169 &vfw3 170 }; 171 172 extern struct modlfs modlfs4; 173 174 /* 175 * We have too many linkage structures so we define our own XXX 176 */ 177 struct modlinkage_big { 178 int ml_rev; /* rev of loadable modules system */ 179 void *ml_linkage[7]; /* NULL terminated list of */ 180 /* linkage structures */ 181 }; 182 183 /* 184 * All of the module configuration linkages required to configure 185 * the system call and client VFS's into the system. 186 */ 187 static struct modlinkage_big modlinkage = { 188 MODREV_1, 189 &modlsys, 190 #ifdef _SYSCALL32_IMPL 191 &modlsys32, 192 #endif 193 &modlfs, 194 &modlfs2, 195 &modlfs3, 196 &modlfs4, 197 NULL 198 }; 199 200 /* 201 * specfs - for getfsname only?? 202 * rpcmod - too many symbols to build stubs for them all 203 */ 204 char _depends_on[] = "fs/specfs strmod/rpcmod misc/rpcsec"; 205 206 /* 207 * This routine is invoked automatically when the kernel module 208 * containing this routine is loaded. This allows module specific 209 * initialization to be done when the module is loaded. 210 */ 211 int 212 _init(void) 213 { 214 int status; 215 216 if ((status = nfs_clntinit()) != 0) { 217 cmn_err(CE_WARN, "_init: nfs_clntinit failed"); 218 return (status); 219 } 220 221 /* 222 * Create the version specific kstats. 223 * 224 * PSARC 2001/697 Contract Private Interface 225 * All nfs kstats are under SunMC contract 226 * Please refer to the PSARC listed above and contact 227 * SunMC before making any changes! 228 * 229 * Changes must be reviewed by Solaris File Sharing 230 * Changes must be communicated to contract-2001-697@sun.com 231 * 232 */ 233 234 zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, 235 nfsstat_zone_fini); 236 status = mod_install((struct modlinkage *)&modlinkage); 237 238 if (status) { 239 (void) zone_key_delete(nfsstat_zone_key); 240 241 /* 242 * Failed to install module, cleanup previous 243 * initialization work. 244 */ 245 nfs_clntfini(); 246 247 /* 248 * Clean up work performed indirectly by mod_installfs() 249 * as a result of our call to mod_install(). 250 */ 251 nfs4fini(); 252 nfs3fini(); 253 nfsfini(); 254 } 255 return (status); 256 } 257 258 int 259 _fini(void) 260 { 261 /* Don't allow module to be unloaded */ 262 return (EBUSY); 263 } 264 265 int 266 _info(struct modinfo *modinfop) 267 { 268 return (mod_info((struct modlinkage *)&modlinkage, modinfop)); 269 } 270 271 /* 272 * General utilities 273 */ 274 275 /* 276 * Returns the prefered transfer size in bytes based on 277 * what network interfaces are available. 278 */ 279 int 280 nfstsize(void) 281 { 282 /* 283 * For the moment, just return NFS_MAXDATA until we can query the 284 * appropriate transport. 285 */ 286 return (NFS_MAXDATA); 287 } 288 289 /* 290 * Returns the prefered transfer size in bytes based on 291 * what network interfaces are available. 292 */ 293 294 /* this should reflect the largest transfer size possible */ 295 static int nfs3_max_transfer_size = 1024 * 1024; 296 297 int 298 nfs3tsize(void) 299 { 300 /* 301 * For the moment, just return nfs3_max_transfer_size until we 302 * can query the appropriate transport. 303 */ 304 return (nfs3_max_transfer_size); 305 } 306 307 static uint_t nfs3_max_transfer_size_clts = 32 * 1024; 308 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; 309 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; 310 311 uint_t 312 nfs3_tsize(struct knetconfig *knp) 313 { 314 315 if (knp->knc_semantics == NC_TPI_COTS_ORD || 316 knp->knc_semantics == NC_TPI_COTS) 317 return (nfs3_max_transfer_size_cots); 318 if (knp->knc_semantics == NC_TPI_RDMA) 319 return (nfs3_max_transfer_size_rdma); 320 return (nfs3_max_transfer_size_clts); 321 } 322 323 uint_t 324 rfs3_tsize(struct svc_req *req) 325 { 326 327 if (req->rq_xprt->xp_type == T_COTS_ORD || 328 req->rq_xprt->xp_type == T_COTS) 329 return (nfs3_max_transfer_size_cots); 330 if (req->rq_xprt->xp_type == T_RDMA) 331 return (nfs3_max_transfer_size_rdma); 332 return (nfs3_max_transfer_size_clts); 333 } 334 335 /* ARGSUSED */ 336 static int 337 nfsdyninit(int fstyp, char *name) 338 { 339 static const fs_operation_def_t nfsdyn_vfsops_template[] = { 340 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot }, 341 NULL, NULL 342 }; 343 int error; 344 345 error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); 346 if (error != 0) 347 return (error); 348 349 return (0); 350 } 351 352 /* ARGSUSED */ 353 static int 354 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) 355 { 356 char root_hostname[SYS_NMLN+1]; 357 struct servinfo *svp; 358 int error; 359 int vfsflags; 360 char *root_path; 361 struct pathname pn; 362 char *name; 363 static char token[10]; 364 struct nfs_args args; /* nfs mount arguments */ 365 366 bzero(&args, sizeof (args)); 367 368 /* do this BEFORE getfile which causes xid stamps to be initialized */ 369 clkset(-1L); /* hack for now - until we get time svc? */ 370 371 if (why == ROOT_REMOUNT) { 372 /* 373 * Shouldn't happen. 374 */ 375 panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); 376 } 377 378 if (why == ROOT_UNMOUNT) { 379 /* 380 * Nothing to do for NFS. 381 */ 382 return (0); 383 } 384 385 /* 386 * why == ROOT_INIT 387 */ 388 389 name = token; 390 *name = 0; 391 getfsname("root", name, sizeof (token)); 392 393 pn_alloc(&pn); 394 root_path = pn.pn_path; 395 396 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 397 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 398 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 399 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 400 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 401 402 /* 403 * First try version 4 404 */ 405 vfs_setops(vfsp, nfs4_vfsops); 406 args.addr = &svp->sv_addr; 407 args.fh = (char *)&svp->sv_fhandle; 408 args.knconf = svp->sv_knconf; 409 args.hostname = root_hostname; 410 vfsflags = 0; 411 412 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 413 &args, &vfsflags)) { 414 if (error != EPROTONOSUPPORT) { 415 nfs_cmn_err(error, CE_WARN, 416 "Unable to mount NFS root filesystem: %m"); 417 sv_free(svp); 418 pn_free(&pn); 419 vfs_setops(vfsp, nfsdyn_vfsops); 420 return (error); 421 } 422 423 /* 424 * Then try version 3 425 */ 426 bzero(&args, sizeof (args)); 427 vfs_setops(vfsp, nfs3_vfsops); 428 args.addr = &svp->sv_addr; 429 args.fh = (char *)&svp->sv_fhandle; 430 args.knconf = svp->sv_knconf; 431 args.hostname = root_hostname; 432 vfsflags = 0; 433 434 if (error = mount_root(*name ? name : "root", root_path, 435 NFS_V3, &args, &vfsflags)) { 436 if (error != EPROTONOSUPPORT) { 437 nfs_cmn_err(error, CE_WARN, 438 "Unable to mount NFS root filesystem: %m"); 439 sv_free(svp); 440 pn_free(&pn); 441 vfs_setops(vfsp, nfsdyn_vfsops); 442 return (error); 443 } 444 445 /* 446 * Finally, try version 2 447 */ 448 bzero(&args, sizeof (args)); 449 args.addr = &svp->sv_addr; 450 args.fh = (char *)&svp->sv_fhandle.fh_buf; 451 args.knconf = svp->sv_knconf; 452 args.hostname = root_hostname; 453 vfsflags = 0; 454 455 vfs_setops(vfsp, nfs_vfsops); 456 457 if (error = mount_root(*name ? name : "root", 458 root_path, NFS_VERSION, &args, 459 &vfsflags)) { 460 nfs_cmn_err(error, CE_WARN, 461 "Unable to mount NFS root filesystem: %m"); 462 sv_free(svp); 463 pn_free(&pn); 464 vfs_setops(vfsp, nfsdyn_vfsops); 465 return (error); 466 } 467 } 468 } 469 470 sv_free(svp); 471 pn_free(&pn); 472 return (VFS_MOUNTROOT(vfsp, why)); 473 } 474 475 int 476 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) 477 { 478 mntinfo_t *mi; /* mount info, pointed at by vfs */ 479 STRUCT_HANDLE(nfs_args, args); 480 int flags; 481 482 #ifdef lint 483 model = model; 484 #endif 485 486 STRUCT_SET_HANDLE(args, model, buf); 487 488 flags = STRUCT_FGET(args, flags); 489 490 /* 491 * Set option fields in mount info record 492 */ 493 mi = VTOMI(vp); 494 495 if (flags & NFSMNT_NOAC) { 496 mi->mi_flags |= MI_NOAC; 497 PURGE_ATTRCACHE(vp); 498 } 499 if (flags & NFSMNT_NOCTO) 500 mi->mi_flags |= MI_NOCTO; 501 if (flags & NFSMNT_LLOCK) 502 mi->mi_flags |= MI_LLOCK; 503 if (flags & NFSMNT_GRPID) 504 mi->mi_flags |= MI_GRPID; 505 if (flags & NFSMNT_RETRANS) { 506 if (STRUCT_FGET(args, retrans) < 0) 507 return (EINVAL); 508 mi->mi_retrans = STRUCT_FGET(args, retrans); 509 } 510 if (flags & NFSMNT_TIMEO) { 511 if (STRUCT_FGET(args, timeo) <= 0) 512 return (EINVAL); 513 mi->mi_timeo = STRUCT_FGET(args, timeo); 514 /* 515 * The following scales the standard deviation and 516 * and current retransmission timer to match the 517 * initial value for the timeout specified. 518 */ 519 mi->mi_timers[NFS_CALLTYPES].rt_deviate = 520 (mi->mi_timeo * hz * 2) / 5; 521 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = 522 mi->mi_timeo * hz / 10; 523 } 524 if (flags & NFSMNT_RSIZE) { 525 if (STRUCT_FGET(args, rsize) <= 0) 526 return (EINVAL); 527 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); 528 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); 529 } 530 if (flags & NFSMNT_WSIZE) { 531 if (STRUCT_FGET(args, wsize) <= 0) 532 return (EINVAL); 533 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); 534 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); 535 } 536 if (flags & NFSMNT_ACREGMIN) { 537 if (STRUCT_FGET(args, acregmin) < 0) 538 mi->mi_acregmin = ACMINMAX; 539 else 540 mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), 541 ACMINMAX); 542 mi->mi_acregmin = SEC2HR(mi->mi_acregmin); 543 } 544 if (flags & NFSMNT_ACREGMAX) { 545 if (STRUCT_FGET(args, acregmax) < 0) 546 mi->mi_acregmax = ACMAXMAX; 547 else 548 mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), 549 ACMAXMAX); 550 mi->mi_acregmax = SEC2HR(mi->mi_acregmax); 551 } 552 if (flags & NFSMNT_ACDIRMIN) { 553 if (STRUCT_FGET(args, acdirmin) < 0) 554 mi->mi_acdirmin = ACMINMAX; 555 else 556 mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), 557 ACMINMAX); 558 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); 559 } 560 if (flags & NFSMNT_ACDIRMAX) { 561 if (STRUCT_FGET(args, acdirmax) < 0) 562 mi->mi_acdirmax = ACMAXMAX; 563 else 564 mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), 565 ACMAXMAX); 566 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); 567 } 568 569 if (flags & NFSMNT_LOOPBACK) 570 mi->mi_flags |= MI_LOOPBACK; 571 572 return (0); 573 } 574 575 /* 576 * Set or Clear direct I/O flag 577 * VOP_RWLOCK() is held for write access to prevent a race condition 578 * which would occur if a process is in the middle of a write when 579 * directio flag gets set. It is possible that all pages may not get flushed. 580 */ 581 582 /* ARGSUSED */ 583 int 584 nfs_directio(vnode_t *vp, int cmd, cred_t *cr) 585 { 586 int error = 0; 587 rnode_t *rp; 588 589 rp = VTOR(vp); 590 591 if (cmd == DIRECTIO_ON) { 592 593 if (rp->r_flags & RDIRECTIO) 594 return (0); 595 596 /* 597 * Flush the page cache. 598 */ 599 600 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 601 602 if (rp->r_flags & RDIRECTIO) { 603 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 604 return (0); 605 } 606 607 if (vn_has_cached_data(vp) && 608 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 609 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 610 B_INVAL, cr); 611 if (error) { 612 if (error == ENOSPC || error == EDQUOT) { 613 mutex_enter(&rp->r_statelock); 614 if (!rp->r_error) 615 rp->r_error = error; 616 mutex_exit(&rp->r_statelock); 617 } 618 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 619 return (error); 620 } 621 } 622 623 mutex_enter(&rp->r_statelock); 624 rp->r_flags |= RDIRECTIO; 625 mutex_exit(&rp->r_statelock); 626 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 627 return (0); 628 } 629 630 if (cmd == DIRECTIO_OFF) { 631 mutex_enter(&rp->r_statelock); 632 rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ 633 mutex_exit(&rp->r_statelock); 634 return (0); 635 } 636 637 return (EINVAL); 638 } 639