1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 27 * All rights reserved. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/pathname.h> 39 #include <sys/sysmacros.h> 40 #include <sys/kmem.h> 41 #include <sys/mkdev.h> 42 #include <sys/mount.h> 43 #include <sys/mntent.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/policy.h> 54 #include <sys/zone.h> 55 #include <sys/class.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 59 #include <rpc/types.h> 60 #include <rpc/auth.h> 61 #include <rpc/clnt.h> 62 63 #include <nfs/nfs.h> 64 #include <nfs/nfs_clnt.h> 65 #include <nfs/rnode.h> 66 #include <nfs/mount.h> 67 #include <nfs/nfs_acl.h> 68 69 #include <fs/fs_subr.h> 70 71 /* 72 * From rpcsec module (common/rpcsec). 73 */ 74 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 75 extern void sec_clnt_freeinfo(struct sec_data *); 76 77 static int pathconf_get(struct mntinfo *, model_t, struct nfs_args *); 78 static void pathconf_rele(struct mntinfo *); 79 80 /* 81 * The order and contents of this structure must be kept in sync with that of 82 * rfsreqcnt_v2_tmpl in nfs_stats.c 83 */ 84 static char *rfsnames_v2[] = { 85 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read", 86 "unused", "write", "create", "remove", "rename", "link", "symlink", 87 "mkdir", "rmdir", "readdir", "fsstat" 88 }; 89 90 /* 91 * This table maps from NFS protocol number into call type. 92 * Zero means a "Lookup" type call 93 * One means a "Read" type call 94 * Two means a "Write" type call 95 * This is used to select a default time-out. 96 */ 97 static uchar_t call_type_v2[] = { 98 0, 0, 1, 0, 0, 0, 1, 99 0, 2, 2, 2, 2, 2, 2, 100 2, 2, 1, 0 101 }; 102 103 /* 104 * Similar table, but to determine which timer to use 105 * (only real reads and writes!) 106 */ 107 static uchar_t timer_type_v2[] = { 108 0, 0, 0, 0, 0, 0, 1, 109 0, 2, 0, 0, 0, 0, 0, 110 0, 0, 1, 0 111 }; 112 113 /* 114 * This table maps from NFS protocol number into a call type 115 * for the semisoft mount option. 116 * Zero means do not repeat operation. 117 * One means repeat. 118 */ 119 static uchar_t ss_call_type_v2[] = { 120 0, 0, 1, 0, 0, 0, 0, 121 0, 1, 1, 1, 1, 1, 1, 122 1, 1, 0, 0 123 }; 124 125 /* 126 * nfs vfs operations. 127 */ 128 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 129 static int nfs_unmount(vfs_t *, int, cred_t *); 130 static int nfs_root(vfs_t *, vnode_t **); 131 static int nfs_statvfs(vfs_t *, struct statvfs64 *); 132 static int nfs_sync(vfs_t *, short, cred_t *); 133 static int nfs_vget(vfs_t *, vnode_t **, fid_t *); 134 static int nfs_mountroot(vfs_t *, whymountroot_t); 135 static void nfs_freevfs(vfs_t *); 136 137 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *, 138 int, cred_t *, zone_t *); 139 140 /* 141 * Initialize the vfs structure 142 */ 143 144 int nfsfstyp; 145 vfsops_t *nfs_vfsops; 146 147 /* 148 * Debug variable to check for rdma based 149 * transport startup and cleanup. Controlled 150 * through /etc/system. Off by default. 151 */ 152 int rdma_debug = 0; 153 154 int 155 nfsinit(int fstyp, char *name) 156 { 157 static const fs_operation_def_t nfs_vfsops_template[] = { 158 VFSNAME_MOUNT, nfs_mount, 159 VFSNAME_UNMOUNT, nfs_unmount, 160 VFSNAME_ROOT, nfs_root, 161 VFSNAME_STATVFS, nfs_statvfs, 162 VFSNAME_SYNC, (fs_generic_func_p) nfs_sync, 163 VFSNAME_VGET, nfs_vget, 164 VFSNAME_MOUNTROOT, nfs_mountroot, 165 VFSNAME_FREEVFS, (fs_generic_func_p)nfs_freevfs, 166 NULL, NULL 167 }; 168 int error; 169 170 error = vfs_setfsops(fstyp, nfs_vfsops_template, &nfs_vfsops); 171 if (error != 0) { 172 zcmn_err(GLOBAL_ZONEID, CE_WARN, 173 "nfsinit: bad vfs ops template"); 174 return (error); 175 } 176 177 error = vn_make_ops(name, nfs_vnodeops_template, &nfs_vnodeops); 178 if (error != 0) { 179 (void) vfs_freevfsops_by_type(fstyp); 180 zcmn_err(GLOBAL_ZONEID, CE_WARN, 181 "nfsinit: bad vnode ops template"); 182 return (error); 183 } 184 185 186 nfsfstyp = fstyp; 187 188 return (0); 189 } 190 191 void 192 nfsfini(void) 193 { 194 } 195 196 /* 197 * nfs mount vfsop 198 * Set up mount info record and attach it to vfs struct. 199 */ 200 static int 201 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 202 { 203 char *data = uap->dataptr; 204 int error; 205 vnode_t *rtvp; /* the server's root */ 206 mntinfo_t *mi; /* mount info, pointed at by vfs */ 207 size_t hlen; /* length of hostname */ 208 size_t nlen; /* length of netname */ 209 char netname[SYS_NMLN]; /* server's netname */ 210 struct netbuf addr; /* server's address */ 211 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 212 struct knetconfig *knconf; /* transport knetconfig structure */ 213 struct knetconfig *rdma_knconf; /* rdma transport structure */ 214 rnode_t *rp; 215 struct servinfo *svp; /* nfs server info */ 216 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 217 struct servinfo *svp_head; /* first nfs server info */ 218 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */ 219 struct sec_data *secdata; /* security data */ 220 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 221 STRUCT_DECL(knetconfig, knconf_tmp); 222 STRUCT_DECL(netbuf, addr_tmp); 223 int flags, addr_type; 224 char *p, *pf; 225 zone_t *zone = nfs_zone(); 226 227 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 228 return (error); 229 230 if (mvp->v_type != VDIR) 231 return (ENOTDIR); 232 233 /* 234 * get arguments 235 * 236 * nfs_args is now versioned and is extensible, so 237 * uap->datalen might be different from sizeof (args) 238 * in a compatible situation. 239 */ 240 more: 241 STRUCT_INIT(args, get_udatamodel()); 242 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 243 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 244 STRUCT_SIZE(args)))) 245 return (EFAULT); 246 247 flags = STRUCT_FGET(args, flags); 248 249 if (uap->flags & MS_REMOUNT) { 250 size_t n; 251 char name[FSTYPSZ]; 252 253 if (uap->flags & MS_SYSSPACE) 254 error = copystr(uap->fstype, name, FSTYPSZ, &n); 255 else 256 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 257 258 if (error) { 259 if (error == ENAMETOOLONG) 260 return (EINVAL); 261 return (error); 262 } 263 264 /* 265 * This check is to ensure that the request is a 266 * genuine nfs remount request. 267 */ 268 269 if (strncmp(name, "nfs", 3) != 0) 270 return (EINVAL); 271 272 /* 273 * If the request changes the locking type, disallow the 274 * remount, 275 * because it's questionable whether we can transfer the 276 * locking state correctly. 277 * 278 * Remounts need to save the pathconf information. 279 * Part of the infamous static kludge. 280 */ 281 282 if ((mi = VFTOMI(vfsp)) != NULL) { 283 uint_t new_mi_llock; 284 uint_t old_mi_llock; 285 286 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 287 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 288 if (old_mi_llock != new_mi_llock) 289 return (EBUSY); 290 } 291 return (pathconf_get((struct mntinfo *)vfsp->vfs_data, 292 get_udatamodel(), STRUCT_BUF(args))); 293 } 294 295 mutex_enter(&mvp->v_lock); 296 if (!(uap->flags & MS_OVERLAY) && 297 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 298 mutex_exit(&mvp->v_lock); 299 return (EBUSY); 300 } 301 mutex_exit(&mvp->v_lock); 302 303 /* make sure things are zeroed for errout: */ 304 rtvp = NULL; 305 mi = NULL; 306 addr.buf = NULL; 307 syncaddr.buf = NULL; 308 secdata = NULL; 309 310 /* 311 * A valid knetconfig structure is required. 312 */ 313 if (!(flags & NFSMNT_KNCONF)) 314 return (EINVAL); 315 316 /* 317 * Allocate a servinfo struct. 318 */ 319 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 320 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 321 if (svp_tail) { 322 svp_2ndlast = svp_tail; 323 svp_tail->sv_next = svp; 324 } else { 325 svp_head = svp; 326 svp_2ndlast = svp; 327 } 328 329 svp_tail = svp; 330 331 /* 332 * Allocate space for a knetconfig structure and 333 * its strings and copy in from user-land. 334 */ 335 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 336 svp->sv_knconf = knconf; 337 STRUCT_INIT(knconf_tmp, get_udatamodel()); 338 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 339 STRUCT_SIZE(knconf_tmp))) { 340 sv_free(svp_head); 341 return (EFAULT); 342 } 343 344 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 345 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 346 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 347 if (get_udatamodel() != DATAMODEL_LP64) { 348 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 349 } else { 350 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 351 } 352 353 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 354 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 355 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 356 if (error) { 357 kmem_free(pf, KNC_STRSIZE); 358 kmem_free(p, KNC_STRSIZE); 359 sv_free(svp_head); 360 return (error); 361 } 362 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 363 if (error) { 364 kmem_free(pf, KNC_STRSIZE); 365 kmem_free(p, KNC_STRSIZE); 366 sv_free(svp_head); 367 return (error); 368 } 369 knconf->knc_protofmly = pf; 370 knconf->knc_proto = p; 371 372 /* 373 * Get server address 374 */ 375 STRUCT_INIT(addr_tmp, get_udatamodel()); 376 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 377 STRUCT_SIZE(addr_tmp))) { 378 addr.buf = NULL; 379 error = EFAULT; 380 } else { 381 char *userbufptr; 382 383 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 384 addr.len = STRUCT_FGET(addr_tmp, len); 385 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 386 addr.maxlen = addr.len; 387 if (copyin(userbufptr, addr.buf, addr.len)) 388 error = EFAULT; 389 } 390 svp->sv_addr = addr; 391 if (error) 392 goto errout; 393 394 /* 395 * Get the root fhandle 396 */ 397 if (copyin(STRUCT_FGETP(args, fh), &(svp->sv_fhandle.fh_buf), 398 NFS_FHSIZE)) { 399 error = EFAULT; 400 goto errout; 401 } 402 svp->sv_fhandle.fh_len = NFS_FHSIZE; 403 404 /* 405 * Get server's hostname 406 */ 407 if (flags & NFSMNT_HOSTNAME) { 408 error = copyinstr(STRUCT_FGETP(args, hostname), 409 netname, sizeof (netname), &hlen); 410 if (error) 411 goto errout; 412 } else { 413 char *p = "unknown-host"; 414 hlen = strlen(p) + 1; 415 (void) strcpy(netname, p); 416 } 417 svp->sv_hostnamelen = hlen; 418 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 419 (void) strcpy(svp->sv_hostname, netname); 420 421 /* 422 * RDMA MOUNT SUPPORT FOR NFS v2: 423 * Establish, is it possible to use RDMA, if so overload the 424 * knconf with rdma specific knconf and free the orignal. 425 */ 426 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 427 /* 428 * Determine the addr type for RDMA, IPv4 or v6. 429 */ 430 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 431 addr_type = AF_INET; 432 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 433 addr_type = AF_INET6; 434 435 if (rdma_reachable(addr_type, &svp->sv_addr, 436 &rdma_knconf) == 0) { 437 /* 438 * If successful, hijack, the orignal knconf and 439 * replace with a new one, depending on the flags. 440 */ 441 svp->sv_origknconf = svp->sv_knconf; 442 svp->sv_knconf = rdma_knconf; 443 knconf = rdma_knconf; 444 } else { 445 if (flags & NFSMNT_TRYRDMA) { 446 #ifdef DEBUG 447 if (rdma_debug) 448 zcmn_err(getzoneid(), CE_WARN, 449 "no RDMA onboard, revert\n"); 450 #endif 451 } 452 453 if (flags & NFSMNT_DORDMA) { 454 /* 455 * If proto=rdma is specified and no RDMA 456 * path to this server is avialable then 457 * ditch this server. 458 * This is not included in the mountable 459 * server list or the replica list. 460 * Check if more servers are specified; 461 * Failover case, otherwise bail out of mount. 462 */ 463 if (STRUCT_FGET(args, nfs_args_ext) == 464 NFS_ARGS_EXTB && STRUCT_FGETP(args, 465 nfs_ext_u.nfs_extB.next) != NULL) { 466 if (uap->flags & MS_RDONLY && 467 !(flags & NFSMNT_SOFT)) { 468 data = (char *) 469 STRUCT_FGETP(args, 470 nfs_ext_u.nfs_extB.next); 471 if (svp_head->sv_next == NULL) { 472 svp_tail = NULL; 473 svp_2ndlast = NULL; 474 sv_free(svp_head); 475 goto more; 476 } else { 477 svp_tail = svp_2ndlast; 478 svp_2ndlast->sv_next = 479 NULL; 480 sv_free(svp); 481 goto more; 482 } 483 } 484 } else { 485 /* 486 * This is the last server specified 487 * in the nfs_args list passed down 488 * and its not rdma capable. 489 */ 490 if (svp_head->sv_next == NULL) { 491 /* 492 * Is this the only one 493 */ 494 error = EINVAL; 495 #ifdef DEBUG 496 if (rdma_debug) 497 zcmn_err(getzoneid(), 498 CE_WARN, 499 "No RDMA srv"); 500 #endif 501 goto errout; 502 } else { 503 /* 504 * There is list, since some 505 * servers specified before 506 * this passed all requirements 507 */ 508 svp_tail = svp_2ndlast; 509 svp_2ndlast->sv_next = NULL; 510 sv_free(svp); 511 goto proceed; 512 } 513 } 514 } 515 } 516 } 517 518 /* 519 * Get the extention data which has the new security data structure. 520 */ 521 if (flags & NFSMNT_NEWARGS) { 522 switch (STRUCT_FGET(args, nfs_args_ext)) { 523 case NFS_ARGS_EXTA: 524 case NFS_ARGS_EXTB: 525 /* 526 * Indicating the application is using the new 527 * sec_data structure to pass in the security 528 * data. 529 */ 530 if (STRUCT_FGETP(args, 531 nfs_ext_u.nfs_extA.secdata) == NULL) { 532 error = EINVAL; 533 } else { 534 error = sec_clnt_loadinfo( 535 (struct sec_data *)STRUCT_FGETP(args, 536 nfs_ext_u.nfs_extA.secdata), 537 &secdata, get_udatamodel()); 538 } 539 break; 540 541 default: 542 error = EINVAL; 543 break; 544 } 545 } else if (flags & NFSMNT_SECURE) { 546 /* 547 * Keep this for backward compatibility to support 548 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 549 */ 550 if (STRUCT_FGETP(args, syncaddr) == NULL) { 551 error = EINVAL; 552 } else { 553 /* 554 * get time sync address. 555 */ 556 if (copyin(STRUCT_FGETP(args, syncaddr), &addr_tmp, 557 STRUCT_SIZE(addr_tmp))) { 558 syncaddr.buf = NULL; 559 error = EFAULT; 560 } else { 561 char *userbufptr; 562 563 userbufptr = syncaddr.buf = 564 STRUCT_FGETP(addr_tmp, buf); 565 syncaddr.len = 566 STRUCT_FGET(addr_tmp, len); 567 syncaddr.buf = kmem_alloc(syncaddr.len, 568 KM_SLEEP); 569 syncaddr.maxlen = syncaddr.len; 570 571 if (copyin(userbufptr, syncaddr.buf, 572 syncaddr.len)) 573 error = EFAULT; 574 } 575 576 /* 577 * get server's netname 578 */ 579 if (!error) { 580 error = copyinstr(STRUCT_FGETP(args, netname), 581 netname, sizeof (netname), &nlen); 582 netname[nlen] = '\0'; 583 } 584 585 if (error && syncaddr.buf != NULL) { 586 kmem_free(syncaddr.buf, syncaddr.len); 587 syncaddr.buf = NULL; 588 } 589 } 590 591 /* 592 * Move security related data to the sec_data structure. 593 */ 594 if (!error) { 595 dh_k4_clntdata_t *data; 596 char *pf, *p; 597 598 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 599 if (flags & NFSMNT_RPCTIMESYNC) 600 secdata->flags |= AUTH_F_RPCTIMESYNC; 601 data = kmem_alloc(sizeof (*data), KM_SLEEP); 602 data->syncaddr = syncaddr; 603 604 /* 605 * duplicate the knconf information for the 606 * new opaque data. 607 */ 608 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 609 *data->knconf = *knconf; 610 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 611 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 612 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 613 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 614 data->knconf->knc_protofmly = pf; 615 data->knconf->knc_proto = p; 616 617 /* move server netname to the sec_data structure */ 618 if (nlen != 0) { 619 data->netname = kmem_alloc(nlen, KM_SLEEP); 620 bcopy(netname, data->netname, nlen); 621 data->netnamelen = (int)nlen; 622 } 623 secdata->secmod = secdata->rpcflavor = AUTH_DES; 624 secdata->data = (caddr_t)data; 625 } 626 } else { 627 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 628 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 629 secdata->data = NULL; 630 } 631 svp->sv_secdata = secdata; 632 if (error) 633 goto errout; 634 635 /* 636 * See bug 1180236. 637 * If mount secure failed, we will fall back to AUTH_NONE 638 * and try again. nfs3rootvp() will turn this back off. 639 * 640 * The NFS Version 2 mount uses GETATTR and STATFS procedures. 641 * The server does not care if these procedures have the proper 642 * authentication flavor, so if mount retries using AUTH_NONE 643 * that does not require a credential setup for root then the 644 * automounter would work without requiring root to be 645 * keylogged into AUTH_DES. 646 */ 647 if (secdata->rpcflavor != AUTH_UNIX && 648 secdata->rpcflavor != AUTH_LOOPBACK) 649 secdata->flags |= AUTH_F_TRYNONE; 650 651 /* 652 * Failover support: 653 * 654 * We may have a linked list of nfs_args structures, 655 * which means the user is looking for failover. If 656 * the mount is either not "read-only" or "soft", 657 * we want to bail out with EINVAL. 658 */ 659 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 660 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 661 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 662 data = (char *)STRUCT_FGETP(args, 663 nfs_ext_u.nfs_extB.next); 664 goto more; 665 } 666 error = EINVAL; 667 goto errout; 668 } 669 670 /* 671 * Determine the zone we're being mounted into. 672 */ 673 if (getzoneid() == GLOBAL_ZONEID) { 674 zone_t *mntzone; 675 676 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 677 ASSERT(mntzone != NULL); 678 zone_rele(mntzone); 679 if (mntzone != zone) { 680 error = EBUSY; 681 goto errout; 682 } 683 } 684 685 /* 686 * Stop the mount from going any further if the zone is going away. 687 */ 688 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) { 689 error = EBUSY; 690 goto errout; 691 } 692 693 /* 694 * Get root vnode. 695 */ 696 proceed: 697 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, zone); 698 699 if (error) 700 goto errout; 701 702 /* 703 * Set option fields in the mount info record 704 */ 705 mi = VTOMI(rtvp); 706 707 if (svp_head->sv_next) 708 mi->mi_flags |= MI_LLOCK; 709 710 error = nfs_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 711 if (!error) { 712 /* static pathconf kludge */ 713 error = pathconf_get(mi, get_udatamodel(), STRUCT_BUF(args)); 714 } 715 716 errout: 717 if (error) { 718 if (rtvp != NULL) { 719 rp = VTOR(rtvp); 720 if (rp->r_flags & RHASHED) 721 rp_rmhash(rp); 722 } 723 sv_free(svp_head); 724 if (mi != NULL) { 725 nfs_async_stop(vfsp); 726 nfs_async_manager_stop(vfsp); 727 if (mi->mi_io_kstats) { 728 kstat_delete(mi->mi_io_kstats); 729 mi->mi_io_kstats = NULL; 730 } 731 if (mi->mi_ro_kstats) { 732 kstat_delete(mi->mi_ro_kstats); 733 mi->mi_ro_kstats = NULL; 734 } 735 nfs_free_mi(mi); 736 } 737 } 738 739 if (rtvp != NULL) 740 VN_RELE(rtvp); 741 742 return (error); 743 } 744 745 /* 746 * The pathconf information is kept on a linked list of kmem_alloc'ed 747 * structs. We search the list & add a new struct iff there is no other 748 * struct with the same information. 749 * See sys/pathconf.h for ``the rest of the story.'' 750 */ 751 static struct pathcnf *allpc = NULL; 752 753 static int 754 pathconf_get(struct mntinfo *mi, model_t model, struct nfs_args *args) 755 { 756 struct pathcnf *p; 757 struct pathcnf pc; 758 STRUCT_DECL(pathcnf, pc_tmp); 759 STRUCT_HANDLE(nfs_args, ap); 760 int i; 761 762 #ifdef lint 763 model = model; 764 #endif 765 766 STRUCT_INIT(pc_tmp, model); 767 STRUCT_SET_HANDLE(ap, model, args); 768 769 if (mi->mi_pathconf != NULL) { 770 pathconf_rele(mi); 771 mi->mi_pathconf = NULL; 772 } 773 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) && 774 STRUCT_FGETP(ap, pathconf) != NULL) { 775 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp), 776 STRUCT_SIZE(pc_tmp))) 777 return (EFAULT); 778 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask))) 779 return (EINVAL); 780 781 pc.pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max); 782 pc.pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon); 783 pc.pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input); 784 pc.pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max); 785 pc.pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max); 786 pc.pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf); 787 pc.pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable); 788 pc.pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx); 789 for (i = 0; i < _PC_N; i++) 790 pc.pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]); 791 792 for (p = allpc; p != NULL; p = p->pc_next) { 793 if (PCCMP(p, &pc) == 0) 794 break; 795 } 796 if (p != NULL) { 797 mi->mi_pathconf = p; 798 p->pc_refcnt++; 799 } else { 800 p = kmem_alloc(sizeof (*p), KM_SLEEP); 801 *p = pc; 802 p->pc_next = allpc; 803 p->pc_refcnt = 1; 804 allpc = mi->mi_pathconf = p; 805 } 806 } 807 return (0); 808 } 809 810 /* 811 * release the static pathconf information 812 */ 813 static void 814 pathconf_rele(struct mntinfo *mi) 815 { 816 if (mi->mi_pathconf != NULL) { 817 if (--mi->mi_pathconf->pc_refcnt == 0) { 818 struct pathcnf *p; 819 struct pathcnf *p2; 820 821 p2 = p = allpc; 822 while (p != NULL && p != mi->mi_pathconf) { 823 p2 = p; 824 p = p->pc_next; 825 } 826 if (p == NULL) { 827 panic("mi->pathconf"); 828 /*NOTREACHED*/ 829 } 830 if (p == allpc) 831 allpc = p->pc_next; 832 else 833 p2->pc_next = p->pc_next; 834 kmem_free(p, sizeof (*p)); 835 mi->mi_pathconf = NULL; 836 } 837 } 838 } 839 840 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */ 841 static ushort_t nfs_max_threads = 8; /* max number of active async threads */ 842 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */ 843 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; 844 845 static int 846 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 847 int flags, cred_t *cr, zone_t *zone) 848 { 849 vnode_t *rtvp; 850 mntinfo_t *mi; 851 dev_t nfs_dev; 852 struct vattr va; 853 int error; 854 rnode_t *rp; 855 int i; 856 struct nfs_stats *nfsstatsp; 857 cred_t *lcr = NULL, *tcr = cr; 858 859 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 860 ASSERT(nfsstatsp != NULL); 861 862 /* 863 * Create a mount record and link it to the vfs struct. 864 */ 865 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 866 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 867 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 868 mi->mi_flags = MI_ACL | MI_EXTATTR; 869 if (!(flags & NFSMNT_SOFT)) 870 mi->mi_flags |= MI_HARD; 871 if ((flags & NFSMNT_SEMISOFT)) 872 mi->mi_flags |= MI_SEMISOFT; 873 if ((flags & NFSMNT_NOPRINT)) 874 mi->mi_flags |= MI_NOPRINT; 875 if (flags & NFSMNT_INT) 876 mi->mi_flags |= MI_INT; 877 mi->mi_retrans = NFS_RETRIES; 878 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 879 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 880 mi->mi_timeo = nfs_cots_timeo; 881 else 882 mi->mi_timeo = NFS_TIMEO; 883 mi->mi_prog = NFS_PROGRAM; 884 mi->mi_vers = NFS_VERSION; 885 mi->mi_rfsnames = rfsnames_v2; 886 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr; 887 mi->mi_call_type = call_type_v2; 888 mi->mi_ss_call_type = ss_call_type_v2; 889 mi->mi_timer_type = timer_type_v2; 890 mi->mi_aclnames = aclnames_v2; 891 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr; 892 mi->mi_acl_call_type = acl_call_type_v2; 893 mi->mi_acl_ss_call_type = acl_ss_call_type_v2; 894 mi->mi_acl_timer_type = acl_timer_type_v2; 895 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 896 mi->mi_servers = svp; 897 mi->mi_curr_serv = svp; 898 mi->mi_acregmin = SEC2HR(ACREGMIN); 899 mi->mi_acregmax = SEC2HR(ACREGMAX); 900 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 901 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 902 903 if (nfs_dynamic) 904 mi->mi_flags |= MI_DYNAMIC; 905 906 if (flags & NFSMNT_DIRECTIO) 907 mi->mi_flags |= MI_DIRECTIO; 908 909 /* 910 * Make a vfs struct for nfs. We do this here instead of below 911 * because rtvp needs a vfs before we can do a getattr on it. 912 * 913 * Assign a unique device id to the mount 914 */ 915 mutex_enter(&nfs_minor_lock); 916 do { 917 nfs_minor = (nfs_minor + 1) & MAXMIN32; 918 nfs_dev = makedevice(nfs_major, nfs_minor); 919 } while (vfs_devismounted(nfs_dev)); 920 mutex_exit(&nfs_minor_lock); 921 922 vfsp->vfs_dev = nfs_dev; 923 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp); 924 vfsp->vfs_data = (caddr_t)mi; 925 vfsp->vfs_fstype = nfsfstyp; 926 vfsp->vfs_bsize = NFS_MAXDATA; 927 928 /* 929 * Initialize fields used to support async putpage operations. 930 */ 931 for (i = 0; i < NFS_ASYNC_TYPES; i++) 932 mi->mi_async_clusters[i] = nfs_async_clusters; 933 mi->mi_async_init_clusters = nfs_async_clusters; 934 mi->mi_async_curr = &mi->mi_async_reqs[0]; 935 mi->mi_max_threads = nfs_max_threads; 936 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 937 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 938 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 939 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 940 941 mi->mi_vfsp = vfsp; 942 zone_hold(mi->mi_zone = zone); 943 nfs_mi_zonelist_add(mi); 944 945 /* 946 * Make the root vnode, use it to get attributes, 947 * then remake it with the attributes. 948 */ 949 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf, 950 NULL, vfsp, gethrtime(), cr, NULL, NULL); 951 952 va.va_mask = AT_ALL; 953 954 /* 955 * If the uid is set then set the creds for secure mounts 956 * by proxy processes such as automountd. 957 */ 958 if (svp->sv_secdata->uid != 0 && 959 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 960 lcr = crdup(cr); 961 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 962 tcr = lcr; 963 } 964 965 error = nfsgetattr(rtvp, &va, tcr); 966 if (error) 967 goto bad; 968 rtvp->v_type = va.va_type; 969 970 /* 971 * Poll every server to get the filesystem stats; we're 972 * only interested in the server's transfer size, and we 973 * want the minimum. 974 * 975 * While we're looping, we'll turn off AUTH_F_TRYNONE, 976 * which is only for the mount operation. 977 */ 978 979 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize()); 980 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize()); 981 982 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 983 struct nfsstatfs fs; 984 int douprintf; 985 986 douprintf = 1; 987 mi->mi_curr_serv = svp; 988 989 error = rfs2call(mi, RFS_STATFS, 990 xdr_fhandle, (caddr_t)svp->sv_fhandle.fh_buf, 991 xdr_statfs, (caddr_t)&fs, tcr, &douprintf, 992 &fs.fs_status, 0, NULL); 993 if (error) 994 goto bad; 995 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 996 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 997 } 998 mi->mi_curr_serv = mi->mi_servers; 999 mi->mi_curread = mi->mi_tsize; 1000 mi->mi_curwrite = mi->mi_stsize; 1001 1002 /* 1003 * Start the manager thread responsible for handling async worker 1004 * threads. 1005 */ 1006 VFS_HOLD(vfsp); /* add reference for thread */ 1007 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1008 vfsp, 0, minclsyspri); 1009 ASSERT(mi->mi_manager_thread != NULL); 1010 1011 /* 1012 * Initialize kstats 1013 */ 1014 nfs_mnt_kstat_init(vfsp); 1015 1016 mi->mi_type = rtvp->v_type; 1017 1018 *rtvpp = rtvp; 1019 if (lcr != NULL) 1020 crfree(lcr); 1021 1022 return (0); 1023 bad: 1024 /* 1025 * An error occurred somewhere, need to clean up... 1026 * We need to release our reference to the root vnode and 1027 * destroy the mntinfo struct that we just created. 1028 */ 1029 if (lcr != NULL) 1030 crfree(lcr); 1031 rp = VTOR(rtvp); 1032 if (rp->r_flags & RHASHED) 1033 rp_rmhash(rp); 1034 VN_RELE(rtvp); 1035 nfs_async_stop(vfsp); 1036 nfs_async_manager_stop(vfsp); 1037 if (mi->mi_io_kstats) { 1038 kstat_delete(mi->mi_io_kstats); 1039 mi->mi_io_kstats = NULL; 1040 } 1041 if (mi->mi_ro_kstats) { 1042 kstat_delete(mi->mi_ro_kstats); 1043 mi->mi_ro_kstats = NULL; 1044 } 1045 nfs_free_mi(mi); 1046 *rtvpp = NULL; 1047 return (error); 1048 } 1049 1050 /* 1051 * vfs operations 1052 */ 1053 static int 1054 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1055 { 1056 mntinfo_t *mi; 1057 ushort_t omax; 1058 1059 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1060 return (EPERM); 1061 1062 mi = VFTOMI(vfsp); 1063 if (flag & MS_FORCE) { 1064 vfsp->vfs_flag |= VFS_UNMOUNTED; 1065 /* 1066 * We need to stop the manager thread explicitly; the worker 1067 * threads can time out and exit on their own. 1068 */ 1069 nfs_async_manager_stop(vfsp); 1070 destroy_rtable(vfsp, cr); 1071 if (mi->mi_io_kstats) { 1072 kstat_delete(mi->mi_io_kstats); 1073 mi->mi_io_kstats = NULL; 1074 } 1075 if (mi->mi_ro_kstats) { 1076 kstat_delete(mi->mi_ro_kstats); 1077 mi->mi_ro_kstats = NULL; 1078 } 1079 return (0); 1080 } 1081 /* 1082 * Wait until all asynchronous putpage operations on 1083 * this file system are complete before flushing rnodes 1084 * from the cache. 1085 */ 1086 omax = mi->mi_max_threads; 1087 if (nfs_async_stop_sig(vfsp)) { 1088 return (EINTR); 1089 } 1090 rflush(vfsp, cr); 1091 /* 1092 * If there are any active vnodes on this file system, 1093 * then the file system is busy and can't be umounted. 1094 */ 1095 if (check_rtable(vfsp)) { 1096 mutex_enter(&mi->mi_async_lock); 1097 mi->mi_max_threads = omax; 1098 mutex_exit(&mi->mi_async_lock); 1099 return (EBUSY); 1100 } 1101 /* 1102 * The unmount can't fail from now on; stop the manager thread. 1103 */ 1104 nfs_async_manager_stop(vfsp); 1105 /* 1106 * Destroy all rnodes belonging to this file system from the 1107 * rnode hash queues and purge any resources allocated to 1108 * them. 1109 */ 1110 destroy_rtable(vfsp, cr); 1111 if (mi->mi_io_kstats) { 1112 kstat_delete(mi->mi_io_kstats); 1113 mi->mi_io_kstats = NULL; 1114 } 1115 if (mi->mi_ro_kstats) { 1116 kstat_delete(mi->mi_ro_kstats); 1117 mi->mi_ro_kstats = NULL; 1118 } 1119 return (0); 1120 } 1121 1122 /* 1123 * find root of nfs 1124 */ 1125 static int 1126 nfs_root(vfs_t *vfsp, vnode_t **vpp) 1127 { 1128 mntinfo_t *mi; 1129 vnode_t *vp; 1130 servinfo_t *svp; 1131 1132 mi = VFTOMI(vfsp); 1133 1134 if (nfs_zone() != mi->mi_zone) 1135 return (EPERM); 1136 1137 svp = mi->mi_curr_serv; 1138 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1139 mutex_enter(&svp->sv_lock); 1140 svp->sv_flags &= ~SV_ROOT_STALE; 1141 mutex_exit(&svp->sv_lock); 1142 return (ENOENT); 1143 } 1144 1145 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf, 1146 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1147 1148 if (VTOR(vp)->r_flags & RSTALE) { 1149 VN_RELE(vp); 1150 return (ENOENT); 1151 } 1152 1153 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1154 1155 vp->v_type = mi->mi_type; 1156 1157 *vpp = vp; 1158 1159 return (0); 1160 } 1161 1162 /* 1163 * Get file system statistics. 1164 */ 1165 static int 1166 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1167 { 1168 int error; 1169 mntinfo_t *mi; 1170 struct nfsstatfs fs; 1171 int douprintf; 1172 failinfo_t fi; 1173 vnode_t *vp; 1174 1175 error = nfs_root(vfsp, &vp); 1176 if (error) 1177 return (error); 1178 1179 mi = VFTOMI(vfsp); 1180 douprintf = 1; 1181 fi.vp = vp; 1182 fi.fhp = NULL; /* no need to update, filehandle not copied */ 1183 fi.copyproc = nfscopyfh; 1184 fi.lookupproc = nfslookup; 1185 fi.xattrdirproc = acl_getxattrdir2; 1186 1187 error = rfs2call(mi, RFS_STATFS, 1188 xdr_fhandle, (caddr_t)VTOFH(vp), 1189 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, 1190 &fs.fs_status, 0, &fi); 1191 1192 if (!error) { 1193 error = geterrno(fs.fs_status); 1194 if (!error) { 1195 mutex_enter(&mi->mi_lock); 1196 if (mi->mi_stsize) { 1197 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1198 } else { 1199 mi->mi_stsize = fs.fs_tsize; 1200 mi->mi_curwrite = mi->mi_stsize; 1201 } 1202 mutex_exit(&mi->mi_lock); 1203 sbp->f_bsize = fs.fs_bsize; 1204 sbp->f_frsize = fs.fs_bsize; 1205 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks; 1206 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree; 1207 /* 1208 * Some servers may return negative available 1209 * block counts. They may do this because they 1210 * calculate the number of available blocks by 1211 * subtracting the number of used blocks from 1212 * the total number of blocks modified by the 1213 * minimum free value. For example, if the 1214 * minumum free percentage is 10 and the file 1215 * system is greater than 90 percent full, then 1216 * 90 percent of the total blocks minus the 1217 * actual number of used blocks may be a 1218 * negative number. 1219 * 1220 * In this case, we need to sign extend the 1221 * negative number through the assignment from 1222 * the 32 bit bavail count to the 64 bit bavail 1223 * count. 1224 * 1225 * We need to be able to discern between there 1226 * just being a lot of available blocks on the 1227 * file system and the case described above. 1228 * We are making the assumption that it does 1229 * not make sense to have more available blocks 1230 * than there are free blocks. So, if there 1231 * are, then we treat the number as if it were 1232 * a negative number and arrange to have it 1233 * sign extended when it is converted from 32 1234 * bits to 64 bits. 1235 */ 1236 if (fs.fs_bavail <= fs.fs_bfree) 1237 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail; 1238 else { 1239 sbp->f_bavail = 1240 (fsblkcnt64_t)((long)fs.fs_bavail); 1241 } 1242 sbp->f_files = (fsfilcnt64_t)-1; 1243 sbp->f_ffree = (fsfilcnt64_t)-1; 1244 sbp->f_favail = (fsfilcnt64_t)-1; 1245 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1246 (void) strncpy(sbp->f_basetype, 1247 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1248 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1249 sbp->f_namemax = (uint32_t)-1; 1250 } else { 1251 PURGE_STALE_FH(error, vp, CRED()); 1252 } 1253 } 1254 1255 VN_RELE(vp); 1256 1257 return (error); 1258 } 1259 1260 static kmutex_t nfs_syncbusy; 1261 1262 /* 1263 * Flush dirty nfs files for file system vfsp. 1264 * If vfsp == NULL, all nfs files are flushed. 1265 */ 1266 /* ARGSUSED */ 1267 static int 1268 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 1269 { 1270 /* 1271 * Cross-zone calls are OK here, since this translates to a 1272 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1273 */ 1274 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) { 1275 rflush(vfsp, cr); 1276 mutex_exit(&nfs_syncbusy); 1277 } 1278 return (0); 1279 } 1280 1281 /* ARGSUSED */ 1282 static int 1283 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1284 { 1285 int error; 1286 vnode_t *vp; 1287 struct vattr va; 1288 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp; 1289 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id; 1290 1291 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1292 return (EPERM); 1293 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) { 1294 #ifdef DEBUG 1295 zcmn_err(zoneid, CE_WARN, 1296 "nfs_vget: bad fid len, %d/%d", fidp->fid_len, 1297 (int)(sizeof (*nfsfidp) - sizeof (short))); 1298 #endif 1299 *vpp = NULL; 1300 return (ESTALE); 1301 } 1302 1303 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp, 1304 gethrtime(), CRED(), NULL, NULL); 1305 1306 if (VTOR(vp)->r_flags & RSTALE) { 1307 VN_RELE(vp); 1308 *vpp = NULL; 1309 return (ENOENT); 1310 } 1311 1312 if (vp->v_type == VNON) { 1313 va.va_mask = AT_ALL; 1314 error = nfsgetattr(vp, &va, CRED()); 1315 if (error) { 1316 VN_RELE(vp); 1317 *vpp = NULL; 1318 return (error); 1319 } 1320 vp->v_type = va.va_type; 1321 } 1322 1323 *vpp = vp; 1324 1325 return (0); 1326 } 1327 1328 /* ARGSUSED */ 1329 static int 1330 nfs_mountroot(vfs_t *vfsp, whymountroot_t why) 1331 { 1332 vnode_t *rtvp; 1333 char root_hostname[SYS_NMLN+1]; 1334 struct servinfo *svp; 1335 int error; 1336 int vfsflags; 1337 size_t size; 1338 char *root_path; 1339 struct pathname pn; 1340 char *name; 1341 cred_t *cr; 1342 struct nfs_args args; /* nfs mount arguments */ 1343 static char token[10]; 1344 1345 bzero(&args, sizeof (args)); 1346 1347 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1348 clkset(-1L); /* hack for now - until we get time svc? */ 1349 1350 if (why == ROOT_REMOUNT) { 1351 /* 1352 * Shouldn't happen. 1353 */ 1354 panic("nfs_mountroot: why == ROOT_REMOUNT"); 1355 } 1356 1357 if (why == ROOT_UNMOUNT) { 1358 /* 1359 * Nothing to do for NFS. 1360 */ 1361 return (0); 1362 } 1363 1364 /* 1365 * why == ROOT_INIT 1366 */ 1367 1368 name = token; 1369 *name = 0; 1370 getfsname("root", name, sizeof (token)); 1371 1372 pn_alloc(&pn); 1373 root_path = pn.pn_path; 1374 1375 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1376 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1377 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1378 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1379 1380 /* 1381 * Get server address 1382 * Get the root fhandle 1383 * Get server's transport 1384 * Get server's hostname 1385 * Get options 1386 */ 1387 args.addr = &svp->sv_addr; 1388 args.fh = (char *)&svp->sv_fhandle.fh_buf; 1389 args.knconf = svp->sv_knconf; 1390 args.hostname = root_hostname; 1391 vfsflags = 0; 1392 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, 1393 &args, &vfsflags)) { 1394 nfs_cmn_err(error, CE_WARN, 1395 "nfs_mountroot: mount_root failed: %m"); 1396 sv_free(svp); 1397 pn_free(&pn); 1398 return (error); 1399 } 1400 svp->sv_fhandle.fh_len = NFS_FHSIZE; 1401 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1402 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1403 (void) strcpy(svp->sv_hostname, root_hostname); 1404 1405 /* 1406 * Force root partition to always be mounted with AUTH_UNIX for now 1407 */ 1408 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1409 svp->sv_secdata->secmod = AUTH_UNIX; 1410 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1411 svp->sv_secdata->data = NULL; 1412 1413 cr = crgetcred(); 1414 rtvp = NULL; 1415 1416 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1417 1418 crfree(cr); 1419 1420 if (error) { 1421 pn_free(&pn); 1422 goto errout; 1423 } 1424 1425 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1426 if (error) { 1427 nfs_cmn_err(error, CE_WARN, 1428 "nfs_mountroot: invalid root mount options"); 1429 pn_free(&pn); 1430 goto errout; 1431 } 1432 1433 (void) vfs_lock_wait(vfsp); 1434 vfs_add(NULL, vfsp, vfsflags); 1435 vfs_unlock(vfsp); 1436 1437 size = strlen(svp->sv_hostname); 1438 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1439 rootfs.bo_name[size] = ':'; 1440 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1441 1442 pn_free(&pn); 1443 1444 errout: 1445 if (error) { 1446 sv_free(svp); 1447 nfs_async_stop(vfsp); 1448 nfs_async_manager_stop(vfsp); 1449 } 1450 1451 if (rtvp != NULL) 1452 VN_RELE(rtvp); 1453 1454 return (error); 1455 } 1456 1457 /* 1458 * Initialization routine for VFS routines. Should only be called once 1459 */ 1460 int 1461 nfs_vfsinit(void) 1462 { 1463 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1464 return (0); 1465 } 1466 1467 void 1468 nfs_vfsfini(void) 1469 { 1470 mutex_destroy(&nfs_syncbusy); 1471 } 1472 1473 void 1474 nfs_freevfs(vfs_t *vfsp) 1475 { 1476 mntinfo_t *mi; 1477 servinfo_t *svp; 1478 1479 /* free up the resources */ 1480 mi = VFTOMI(vfsp); 1481 pathconf_rele(mi); 1482 svp = mi->mi_servers; 1483 mi->mi_servers = mi->mi_curr_serv = NULL; 1484 sv_free(svp); 1485 1486 /* 1487 * By this time we should have already deleted the 1488 * mi kstats in the unmount code. If they are still around 1489 * somethings wrong 1490 */ 1491 ASSERT(mi->mi_io_kstats == NULL); 1492 nfs_free_mi(mi); 1493 } 1494