1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T 27 * All rights reserved. 28 */ 29 30 #include <sys/errno.h> 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/user.h> 34 #include <sys/stat.h> 35 #include <sys/time.h> 36 #include <sys/utsname.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/vnode.h> 40 #include <sys/pathname.h> 41 #include <sys/bootconf.h> 42 #include <fs/fs_subr.h> 43 #include <rpc/types.h> 44 #include <nfs/nfs.h> 45 #include <nfs/nfs4.h> 46 #include <nfs/nfs_clnt.h> 47 #include <nfs/rnode.h> 48 #include <nfs/mount.h> 49 #include <nfs/nfssys.h> 50 #include <sys/debug.h> 51 #include <sys/cmn_err.h> 52 #include <sys/file.h> 53 #include <sys/fcntl.h> 54 #include <sys/zone.h> 55 56 /* 57 * This is the loadable module wrapper. 58 */ 59 #include <sys/systm.h> 60 #include <sys/modctl.h> 61 #include <sys/syscall.h> 62 #include <sys/ddi.h> 63 64 #include <rpc/types.h> 65 #include <rpc/auth.h> 66 #include <rpc/clnt.h> 67 #include <rpc/svc.h> 68 69 /* 70 * The pseudo NFS filesystem to allow diskless booting to dynamically 71 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements 72 * the VFS_MOUNTROOT op and is only intended to be used by the 73 * diskless booting code until the real root filesystem is mounted. 74 * Nothing else should ever call this! 75 * 76 * The strategy is that if the initial rootfs type is set to "nfsdyn" 77 * by loadrootmodules() this filesystem is called to mount the 78 * root filesystem. It first attempts to mount a V4 filesystem, and if that 79 * fails due to an RPC version mismatch it tries V3 and finally V2. 80 * Once the real mount succeeds the vfsops and rootfs name are changed 81 * to reflect the real filesystem type. 82 */ 83 static int nfsdyninit(int, char *); 84 static int nfsdyn_mountroot(vfs_t *, whymountroot_t); 85 86 vfsops_t *nfsdyn_vfsops; 87 88 /* 89 * The following data structures are used to configure the NFS 90 * system call, the NFS Version 2 client VFS, and the NFS Version 91 * 3 client VFS into the system. The NFS Version 4 structures are defined in 92 * nfs4_common.c 93 */ 94 95 /* 96 * The NFS system call. 97 */ 98 static struct sysent nfssysent = { 99 2, 100 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, 101 nfssys 102 }; 103 104 static struct modlsys modlsys = { 105 &mod_syscallops, 106 "NFS syscall, client, and common", 107 &nfssysent 108 }; 109 110 #ifdef _SYSCALL32_IMPL 111 static struct modlsys modlsys32 = { 112 &mod_syscallops32, 113 "NFS syscall, client, and common (32-bit)", 114 &nfssysent 115 }; 116 #endif /* _SYSCALL32_IMPL */ 117 118 /* 119 * The NFS Dynamic client VFS. 120 */ 121 static vfsdef_t vfw = { 122 VFSDEF_VERSION, 123 "nfsdyn", 124 nfsdyninit, 125 VSW_ZMOUNT, 126 NULL 127 }; 128 129 static struct modlfs modlfs = { 130 &mod_fsops, 131 "network filesystem", 132 &vfw 133 }; 134 135 /* 136 * The NFS Version 2 client VFS. 137 */ 138 static vfsdef_t vfw2 = { 139 VFSDEF_VERSION, 140 "nfs", 141 nfsinit, 142 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, 143 NULL 144 }; 145 146 static struct modlfs modlfs2 = { 147 &mod_fsops, 148 "network filesystem version 2", 149 &vfw2 150 }; 151 152 /* 153 * The NFS Version 3 client VFS. 154 */ 155 static vfsdef_t vfw3 = { 156 VFSDEF_VERSION, 157 "nfs3", 158 nfs3init, 159 VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, 160 NULL 161 }; 162 163 static struct modlfs modlfs3 = { 164 &mod_fsops, 165 "network filesystem version 3", 166 &vfw3 167 }; 168 169 extern struct modlfs modlfs4; 170 171 /* 172 * We have too many linkage structures so we define our own XXX 173 */ 174 struct modlinkage_big { 175 int ml_rev; /* rev of loadable modules system */ 176 void *ml_linkage[7]; /* NULL terminated list of */ 177 /* linkage structures */ 178 }; 179 180 /* 181 * All of the module configuration linkages required to configure 182 * the system call and client VFS's into the system. 183 */ 184 static struct modlinkage_big modlinkage = { 185 MODREV_1, 186 &modlsys, 187 #ifdef _SYSCALL32_IMPL 188 &modlsys32, 189 #endif 190 &modlfs, 191 &modlfs2, 192 &modlfs3, 193 &modlfs4, 194 NULL 195 }; 196 197 /* 198 * specfs - for getfsname only?? 199 * rpcmod - too many symbols to build stubs for them all 200 */ 201 char _depends_on[] = "fs/specfs strmod/rpcmod misc/rpcsec"; 202 203 /* 204 * This routine is invoked automatically when the kernel module 205 * containing this routine is loaded. This allows module specific 206 * initialization to be done when the module is loaded. 207 */ 208 int 209 _init(void) 210 { 211 int status; 212 213 if ((status = nfs_clntinit()) != 0) { 214 cmn_err(CE_WARN, "_init: nfs_clntinit failed"); 215 return (status); 216 } 217 218 /* 219 * Create the version specific kstats. 220 * 221 * PSARC 2001/697 Contract Private Interface 222 * All nfs kstats are under SunMC contract 223 * Please refer to the PSARC listed above and contact 224 * SunMC before making any changes! 225 * 226 * Changes must be reviewed by Solaris File Sharing 227 * Changes must be communicated to contract-2001-697@sun.com 228 * 229 */ 230 231 zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, 232 nfsstat_zone_fini); 233 status = mod_install((struct modlinkage *)&modlinkage); 234 235 if (status) { 236 (void) zone_key_delete(nfsstat_zone_key); 237 238 /* 239 * Failed to install module, cleanup previous 240 * initialization work. 241 */ 242 nfs_clntfini(); 243 244 /* 245 * Clean up work performed indirectly by mod_installfs() 246 * as a result of our call to mod_install(). 247 */ 248 nfs4fini(); 249 nfs3fini(); 250 nfsfini(); 251 } 252 return (status); 253 } 254 255 int 256 _fini(void) 257 { 258 /* Don't allow module to be unloaded */ 259 return (EBUSY); 260 } 261 262 int 263 _info(struct modinfo *modinfop) 264 { 265 return (mod_info((struct modlinkage *)&modlinkage, modinfop)); 266 } 267 268 /* 269 * General utilities 270 */ 271 272 /* 273 * Returns the preferred transfer size in bytes based on 274 * what network interfaces are available. 275 */ 276 int 277 nfstsize(void) 278 { 279 /* 280 * For the moment, just return NFS_MAXDATA until we can query the 281 * appropriate transport. 282 */ 283 return (NFS_MAXDATA); 284 } 285 286 /* 287 * Returns the preferred transfer size in bytes based on 288 * what network interfaces are available. 289 */ 290 291 /* this should reflect the largest transfer size possible */ 292 static int nfs3_max_transfer_size = 1024 * 1024; 293 294 int 295 nfs3tsize(void) 296 { 297 /* 298 * For the moment, just return nfs3_max_transfer_size until we 299 * can query the appropriate transport. 300 */ 301 return (nfs3_max_transfer_size); 302 } 303 304 static uint_t nfs3_max_transfer_size_clts = 32 * 1024; 305 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; 306 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; 307 308 uint_t 309 nfs3_tsize(struct knetconfig *knp) 310 { 311 312 if (knp->knc_semantics == NC_TPI_COTS_ORD || 313 knp->knc_semantics == NC_TPI_COTS) 314 return (nfs3_max_transfer_size_cots); 315 if (knp->knc_semantics == NC_TPI_RDMA) 316 return (nfs3_max_transfer_size_rdma); 317 return (nfs3_max_transfer_size_clts); 318 } 319 320 uint_t 321 rfs3_tsize(struct svc_req *req) 322 { 323 324 if (req->rq_xprt->xp_type == T_COTS_ORD || 325 req->rq_xprt->xp_type == T_COTS) 326 return (nfs3_max_transfer_size_cots); 327 if (req->rq_xprt->xp_type == T_RDMA) 328 return (nfs3_max_transfer_size_rdma); 329 return (nfs3_max_transfer_size_clts); 330 } 331 332 /* ARGSUSED */ 333 static int 334 nfsdyninit(int fstyp, char *name) 335 { 336 static const fs_operation_def_t nfsdyn_vfsops_template[] = { 337 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot }, 338 NULL, NULL 339 }; 340 int error; 341 342 error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); 343 if (error != 0) 344 return (error); 345 346 return (0); 347 } 348 349 /* ARGSUSED */ 350 static int 351 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) 352 { 353 char root_hostname[SYS_NMLN+1]; 354 struct servinfo *svp; 355 int error; 356 int vfsflags; 357 char *root_path; 358 struct pathname pn; 359 char *name; 360 static char token[10]; 361 struct nfs_args args; /* nfs mount arguments */ 362 363 bzero(&args, sizeof (args)); 364 365 /* do this BEFORE getfile which causes xid stamps to be initialized */ 366 clkset(-1L); /* hack for now - until we get time svc? */ 367 368 if (why == ROOT_REMOUNT) { 369 /* 370 * Shouldn't happen. 371 */ 372 panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); 373 } 374 375 if (why == ROOT_UNMOUNT) { 376 /* 377 * Nothing to do for NFS. 378 */ 379 return (0); 380 } 381 382 /* 383 * why == ROOT_INIT 384 */ 385 386 name = token; 387 *name = 0; 388 getfsname("root", name, sizeof (token)); 389 390 pn_alloc(&pn); 391 root_path = pn.pn_path; 392 393 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 394 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 395 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 396 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 397 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 398 399 /* 400 * First try version 4 401 */ 402 vfs_setops(vfsp, nfs4_vfsops); 403 args.addr = &svp->sv_addr; 404 args.fh = (char *)&svp->sv_fhandle; 405 args.knconf = svp->sv_knconf; 406 args.hostname = root_hostname; 407 vfsflags = 0; 408 409 if (error = mount_root(*name ? name : "root", root_path, NFS_V4, 410 &args, &vfsflags)) { 411 if (error != EPROTONOSUPPORT) { 412 nfs_cmn_err(error, CE_WARN, 413 "Unable to mount NFS root filesystem: %m"); 414 sv_free(svp); 415 pn_free(&pn); 416 vfs_setops(vfsp, nfsdyn_vfsops); 417 return (error); 418 } 419 420 /* 421 * Then try version 3 422 */ 423 bzero(&args, sizeof (args)); 424 vfs_setops(vfsp, nfs3_vfsops); 425 args.addr = &svp->sv_addr; 426 args.fh = (char *)&svp->sv_fhandle; 427 args.knconf = svp->sv_knconf; 428 args.hostname = root_hostname; 429 vfsflags = 0; 430 431 if (error = mount_root(*name ? name : "root", root_path, 432 NFS_V3, &args, &vfsflags)) { 433 if (error != EPROTONOSUPPORT) { 434 nfs_cmn_err(error, CE_WARN, 435 "Unable to mount NFS root filesystem: %m"); 436 sv_free(svp); 437 pn_free(&pn); 438 vfs_setops(vfsp, nfsdyn_vfsops); 439 return (error); 440 } 441 442 /* 443 * Finally, try version 2 444 */ 445 bzero(&args, sizeof (args)); 446 args.addr = &svp->sv_addr; 447 args.fh = (char *)&svp->sv_fhandle.fh_buf; 448 args.knconf = svp->sv_knconf; 449 args.hostname = root_hostname; 450 vfsflags = 0; 451 452 vfs_setops(vfsp, nfs_vfsops); 453 454 if (error = mount_root(*name ? name : "root", 455 root_path, NFS_VERSION, &args, &vfsflags)) { 456 nfs_cmn_err(error, CE_WARN, 457 "Unable to mount NFS root filesystem: %m"); 458 sv_free(svp); 459 pn_free(&pn); 460 vfs_setops(vfsp, nfsdyn_vfsops); 461 return (error); 462 } 463 } 464 } 465 466 sv_free(svp); 467 pn_free(&pn); 468 return (VFS_MOUNTROOT(vfsp, why)); 469 } 470 471 int 472 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) 473 { 474 mntinfo_t *mi; /* mount info, pointed at by vfs */ 475 STRUCT_HANDLE(nfs_args, args); 476 int flags; 477 478 #ifdef lint 479 model = model; 480 #endif 481 482 STRUCT_SET_HANDLE(args, model, buf); 483 484 flags = STRUCT_FGET(args, flags); 485 486 /* 487 * Set option fields in mount info record 488 */ 489 mi = VTOMI(vp); 490 491 if (flags & NFSMNT_NOAC) { 492 mi->mi_flags |= MI_NOAC; 493 PURGE_ATTRCACHE(vp); 494 } 495 if (flags & NFSMNT_NOCTO) 496 mi->mi_flags |= MI_NOCTO; 497 if (flags & NFSMNT_LLOCK) 498 mi->mi_flags |= MI_LLOCK; 499 if (flags & NFSMNT_GRPID) 500 mi->mi_flags |= MI_GRPID; 501 if (flags & NFSMNT_RETRANS) { 502 if (STRUCT_FGET(args, retrans) < 0) 503 return (EINVAL); 504 mi->mi_retrans = STRUCT_FGET(args, retrans); 505 } 506 if (flags & NFSMNT_TIMEO) { 507 if (STRUCT_FGET(args, timeo) <= 0) 508 return (EINVAL); 509 mi->mi_timeo = STRUCT_FGET(args, timeo); 510 /* 511 * The following scales the standard deviation and 512 * and current retransmission timer to match the 513 * initial value for the timeout specified. 514 */ 515 mi->mi_timers[NFS_CALLTYPES].rt_deviate = 516 (mi->mi_timeo * hz * 2) / 5; 517 mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = 518 mi->mi_timeo * hz / 10; 519 } 520 if (flags & NFSMNT_RSIZE) { 521 if (STRUCT_FGET(args, rsize) <= 0) 522 return (EINVAL); 523 mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); 524 mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); 525 } 526 if (flags & NFSMNT_WSIZE) { 527 if (STRUCT_FGET(args, wsize) <= 0) 528 return (EINVAL); 529 mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); 530 mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); 531 } 532 if (flags & NFSMNT_ACREGMIN) { 533 if (STRUCT_FGET(args, acregmin) < 0) 534 mi->mi_acregmin = ACMINMAX; 535 else 536 mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), 537 ACMINMAX); 538 mi->mi_acregmin = SEC2HR(mi->mi_acregmin); 539 } 540 if (flags & NFSMNT_ACREGMAX) { 541 if (STRUCT_FGET(args, acregmax) < 0) 542 mi->mi_acregmax = ACMAXMAX; 543 else 544 mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), 545 ACMAXMAX); 546 mi->mi_acregmax = SEC2HR(mi->mi_acregmax); 547 } 548 if (flags & NFSMNT_ACDIRMIN) { 549 if (STRUCT_FGET(args, acdirmin) < 0) 550 mi->mi_acdirmin = ACMINMAX; 551 else 552 mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), 553 ACMINMAX); 554 mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); 555 } 556 if (flags & NFSMNT_ACDIRMAX) { 557 if (STRUCT_FGET(args, acdirmax) < 0) 558 mi->mi_acdirmax = ACMAXMAX; 559 else 560 mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), 561 ACMAXMAX); 562 mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); 563 } 564 565 if (flags & NFSMNT_LOOPBACK) 566 mi->mi_flags |= MI_LOOPBACK; 567 568 return (0); 569 } 570 571 /* 572 * Set or Clear direct I/O flag 573 * VOP_RWLOCK() is held for write access to prevent a race condition 574 * which would occur if a process is in the middle of a write when 575 * directio flag gets set. It is possible that all pages may not get flushed. 576 */ 577 578 /* ARGSUSED */ 579 int 580 nfs_directio(vnode_t *vp, int cmd, cred_t *cr) 581 { 582 int error = 0; 583 rnode_t *rp; 584 585 rp = VTOR(vp); 586 587 if (cmd == DIRECTIO_ON) { 588 589 if (rp->r_flags & RDIRECTIO) 590 return (0); 591 592 /* 593 * Flush the page cache. 594 */ 595 596 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 597 598 if (rp->r_flags & RDIRECTIO) { 599 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 600 return (0); 601 } 602 603 if (vn_has_cached_data(vp) && 604 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 605 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, 606 B_INVAL, cr, NULL); 607 if (error) { 608 if (error == ENOSPC || error == EDQUOT) { 609 mutex_enter(&rp->r_statelock); 610 if (!rp->r_error) 611 rp->r_error = error; 612 mutex_exit(&rp->r_statelock); 613 } 614 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 615 return (error); 616 } 617 } 618 619 mutex_enter(&rp->r_statelock); 620 rp->r_flags |= RDIRECTIO; 621 mutex_exit(&rp->r_statelock); 622 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 623 return (0); 624 } 625 626 if (cmd == DIRECTIO_OFF) { 627 mutex_enter(&rp->r_statelock); 628 rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ 629 mutex_exit(&rp->r_statelock); 630 return (0); 631 } 632 633 return (EINVAL); 634 } 635