1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/mntent.h> 45 #include <sys/statvfs.h> 46 #include <sys/errno.h> 47 #include <sys/debug.h> 48 #include <sys/cmn_err.h> 49 #include <sys/utsname.h> 50 #include <sys/bootconf.h> 51 #include <sys/modctl.h> 52 #include <sys/acl.h> 53 #include <sys/flock.h> 54 #include <sys/policy.h> 55 #include <sys/zone.h> 56 #include <sys/class.h> 57 #include <sys/socket.h> 58 #include <sys/netconfig.h> 59 #include <sys/tsol/tnet.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/clnt.h> 64 65 #include <nfs/nfs.h> 66 #include <nfs/nfs_clnt.h> 67 #include <nfs/rnode.h> 68 #include <nfs/mount.h> 69 #include <nfs/nfs_acl.h> 70 71 #include <fs/fs_subr.h> 72 73 /* 74 * From rpcsec module (common/rpcsec). 75 */ 76 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 77 extern void sec_clnt_freeinfo(struct sec_data *); 78 79 /* 80 * The order and contents of this structure must be kept in sync with that of 81 * rfsreqcnt_v3_tmpl in nfs_stats.c 82 */ 83 static char *rfsnames_v3[] = { 84 "null", "getattr", "setattr", "lookup", "access", "readlink", "read", 85 "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir", 86 "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo", 87 "pathconf", "commit" 88 }; 89 90 /* 91 * This table maps from NFS protocol number into call type. 92 * Zero means a "Lookup" type call 93 * One means a "Read" type call 94 * Two means a "Write" type call 95 * This is used to select a default time-out. 96 */ 97 static uchar_t call_type_v3[] = { 98 0, 0, 1, 0, 0, 0, 1, 99 2, 2, 2, 2, 2, 2, 2, 100 2, 2, 1, 2, 0, 0, 0, 101 2 }; 102 103 /* 104 * Similar table, but to determine which timer to use 105 * (only real reads and writes!) 106 */ 107 static uchar_t timer_type_v3[] = { 108 0, 0, 0, 0, 0, 0, 1, 109 2, 0, 0, 0, 0, 0, 0, 110 0, 0, 1, 1, 0, 0, 0, 111 0 }; 112 113 /* 114 * This table maps from NFS protocol number into a call type 115 * for the semisoft mount option. 116 * Zero means do not repeat operation. 117 * One means repeat. 118 */ 119 static uchar_t ss_call_type_v3[] = { 120 0, 0, 1, 0, 0, 0, 0, 121 1, 1, 1, 1, 1, 1, 1, 122 1, 1, 0, 0, 0, 0, 0, 123 1 }; 124 125 /* 126 * nfs3 vfs operations. 127 */ 128 static int nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 129 static int nfs3_unmount(vfs_t *, int, cred_t *); 130 static int nfs3_root(vfs_t *, vnode_t **); 131 static int nfs3_statvfs(vfs_t *, struct statvfs64 *); 132 static int nfs3_sync(vfs_t *, short, cred_t *); 133 static int nfs3_vget(vfs_t *, vnode_t **, fid_t *); 134 static int nfs3_mountroot(vfs_t *, whymountroot_t); 135 static void nfs3_freevfs(vfs_t *); 136 137 static int nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *, 138 int, cred_t *, zone_t *); 139 140 /* 141 * Initialize the vfs structure 142 */ 143 144 static int nfs3fstyp; 145 vfsops_t *nfs3_vfsops; 146 147 /* 148 * Debug variable to check for rdma based 149 * transport startup and cleanup. Controlled 150 * through /etc/system. Off by default. 151 */ 152 extern int rdma_debug; 153 154 int 155 nfs3init(int fstyp, char *name) 156 { 157 static const fs_operation_def_t nfs3_vfsops_template[] = { 158 VFSNAME_MOUNT, nfs3_mount, 159 VFSNAME_UNMOUNT, nfs3_unmount, 160 VFSNAME_ROOT, nfs3_root, 161 VFSNAME_STATVFS, nfs3_statvfs, 162 VFSNAME_SYNC, (fs_generic_func_p) nfs3_sync, 163 VFSNAME_VGET, nfs3_vget, 164 VFSNAME_MOUNTROOT, nfs3_mountroot, 165 VFSNAME_FREEVFS, (fs_generic_func_p)nfs3_freevfs, 166 NULL, NULL 167 }; 168 int error; 169 170 error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops); 171 if (error != 0) { 172 zcmn_err(GLOBAL_ZONEID, CE_WARN, 173 "nfs3init: bad vfs ops template"); 174 return (error); 175 } 176 177 error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops); 178 if (error != 0) { 179 (void) vfs_freevfsops_by_type(fstyp); 180 zcmn_err(GLOBAL_ZONEID, CE_WARN, 181 "nfs3init: bad vnode ops template"); 182 return (error); 183 } 184 185 nfs3fstyp = fstyp; 186 187 return (0); 188 } 189 190 void 191 nfs3fini(void) 192 { 193 } 194 195 /* 196 * nfs mount vfsop 197 * Set up mount info record and attach it to vfs struct. 198 */ 199 static int 200 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 201 { 202 char *data = uap->dataptr; 203 int error; 204 vnode_t *rtvp; /* the server's root */ 205 mntinfo_t *mi; /* mount info, pointed at by vfs */ 206 size_t hlen; /* length of hostname */ 207 size_t nlen; /* length of netname */ 208 char netname[SYS_NMLN]; /* server's netname */ 209 struct netbuf addr; /* server's address */ 210 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 211 struct knetconfig *knconf; /* transport knetconfig structure */ 212 struct knetconfig *rdma_knconf; /* rdma transport structure */ 213 rnode_t *rp; 214 struct servinfo *svp; /* nfs server info */ 215 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 216 struct servinfo *svp_head; /* first nfs server info */ 217 struct servinfo *svp_2ndlast; /* 2nd last in server info list */ 218 struct sec_data *secdata; /* security data */ 219 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 220 STRUCT_DECL(knetconfig, knconf_tmp); 221 STRUCT_DECL(netbuf, addr_tmp); 222 int flags, addr_type; 223 char *p, *pf; 224 zone_t *zone = nfs_zone(); 225 zone_t *mntzone = NULL; 226 227 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 228 return (EPERM); 229 230 if (mvp->v_type != VDIR) 231 return (ENOTDIR); 232 233 /* 234 * get arguments 235 * 236 * nfs_args is now versioned and is extensible, so 237 * uap->datalen might be different from sizeof (args) 238 * in a compatible situation. 239 */ 240 more: 241 STRUCT_INIT(args, get_udatamodel()); 242 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 243 if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen, 244 STRUCT_SIZE(args)))) 245 return (EFAULT); 246 247 flags = STRUCT_FGET(args, flags); 248 249 if (uap->flags & MS_REMOUNT) { 250 size_t n; 251 char name[FSTYPSZ]; 252 253 if (uap->flags & MS_SYSSPACE) 254 error = copystr(uap->fstype, name, FSTYPSZ, &n); 255 else 256 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 257 258 if (error) { 259 if (error == ENAMETOOLONG) 260 return (EINVAL); 261 return (error); 262 } 263 264 /* 265 * This check is to ensure that the request is a 266 * genuine nfs remount request. 267 */ 268 269 if (strncmp(name, "nfs", 3) != 0) 270 return (EINVAL); 271 272 /* 273 * If the request changes the locking type, disallow the 274 * remount, 275 * because it's questionable whether we can transfer the 276 * locking state correctly. 277 */ 278 279 if ((mi = VFTOMI(vfsp)) != NULL) { 280 uint_t new_mi_llock; 281 uint_t old_mi_llock; 282 283 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 284 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 285 if (old_mi_llock != new_mi_llock) 286 return (EBUSY); 287 } 288 return (0); 289 } 290 291 mutex_enter(&mvp->v_lock); 292 if (!(uap->flags & MS_OVERLAY) && 293 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 294 mutex_exit(&mvp->v_lock); 295 return (EBUSY); 296 } 297 mutex_exit(&mvp->v_lock); 298 299 /* make sure things are zeroed for errout: */ 300 rtvp = NULL; 301 mi = NULL; 302 addr.buf = NULL; 303 syncaddr.buf = NULL; 304 secdata = NULL; 305 306 /* 307 * A valid knetconfig structure is required. 308 */ 309 if (!(flags & NFSMNT_KNCONF)) 310 return (EINVAL); 311 312 /* 313 * Allocate a servinfo struct. 314 */ 315 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 316 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 317 if (svp_tail) { 318 svp_2ndlast = svp_tail; 319 svp_tail->sv_next = svp; 320 } else { 321 svp_head = svp; 322 svp_2ndlast = svp; 323 } 324 325 svp_tail = svp; 326 327 /* 328 * Allocate space for a knetconfig structure and 329 * its strings and copy in from user-land. 330 */ 331 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 332 svp->sv_knconf = knconf; 333 STRUCT_INIT(knconf_tmp, get_udatamodel()); 334 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 335 STRUCT_SIZE(knconf_tmp))) { 336 sv_free(svp_head); 337 return (EFAULT); 338 } 339 340 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 341 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 342 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 343 if (get_udatamodel() != DATAMODEL_LP64) { 344 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 345 } else { 346 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 347 } 348 349 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 350 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 351 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 352 if (error) { 353 kmem_free(pf, KNC_STRSIZE); 354 kmem_free(p, KNC_STRSIZE); 355 sv_free(svp_head); 356 return (error); 357 } 358 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 359 if (error) { 360 kmem_free(pf, KNC_STRSIZE); 361 kmem_free(p, KNC_STRSIZE); 362 sv_free(svp_head); 363 return (error); 364 } 365 knconf->knc_protofmly = pf; 366 knconf->knc_proto = p; 367 368 /* 369 * Get server address 370 */ 371 STRUCT_INIT(addr_tmp, get_udatamodel()); 372 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 373 STRUCT_SIZE(addr_tmp))) { 374 addr.buf = NULL; 375 error = EFAULT; 376 } else { 377 char *userbufptr; 378 379 userbufptr = addr.buf = STRUCT_FGETP(addr_tmp, buf); 380 addr.len = STRUCT_FGET(addr_tmp, len); 381 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 382 addr.maxlen = addr.len; 383 if (copyin(userbufptr, addr.buf, addr.len)) 384 error = EFAULT; 385 } 386 svp->sv_addr = addr; 387 if (error) 388 goto errout; 389 390 /* 391 * Get the root fhandle 392 */ 393 if (copyin(STRUCT_FGETP(args, fh), &svp->sv_fhandle, 394 sizeof (svp->sv_fhandle))) { 395 error = EFAULT; 396 goto errout; 397 } 398 399 /* 400 * Check the root fhandle length 401 */ 402 if (svp->sv_fhandle.fh_len > NFS3_FHSIZE || 403 svp->sv_fhandle.fh_len <= 0) { 404 error = EINVAL; 405 #ifdef DEBUG 406 zcmn_err(getzoneid(), CE_WARN, 407 "nfs3_mount: got an invalid fhandle. fh_len = %d", 408 svp->sv_fhandle.fh_len); 409 svp->sv_fhandle.fh_len = NFS_FHANDLE_LEN; 410 nfs_printfhandle(&svp->sv_fhandle); 411 #endif 412 goto errout; 413 } 414 415 /* 416 * Get server's hostname 417 */ 418 if (flags & NFSMNT_HOSTNAME) { 419 error = copyinstr(STRUCT_FGETP(args, hostname), 420 netname, sizeof (netname), &hlen); 421 if (error) 422 goto errout; 423 } else { 424 char *p = "unknown-host"; 425 hlen = strlen(p) + 1; 426 (void) strcpy(netname, p); 427 } 428 svp->sv_hostnamelen = hlen; 429 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 430 (void) strcpy(svp->sv_hostname, netname); 431 432 /* 433 * RDMA MOUNT SUPPORT FOR NFS v3: 434 * Establish, is it possible to use RDMA, if so overload the 435 * knconf with rdma specific knconf and free the orignal. 436 */ 437 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 438 /* 439 * Determine the addr type for RDMA, IPv4 or v6. 440 */ 441 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 442 addr_type = AF_INET; 443 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 444 addr_type = AF_INET6; 445 446 if (rdma_reachable(addr_type, &svp->sv_addr, 447 &rdma_knconf) == 0) { 448 /* 449 * If successful, hijack the orignal knconf and 450 * replace with a new one, depending on the flags. 451 */ 452 svp->sv_origknconf = svp->sv_knconf; 453 svp->sv_knconf = rdma_knconf; 454 knconf = rdma_knconf; 455 } else { 456 if (flags & NFSMNT_TRYRDMA) { 457 #ifdef DEBUG 458 if (rdma_debug) 459 zcmn_err(getzoneid(), CE_WARN, 460 "no RDMA onboard, revert\n"); 461 #endif 462 } 463 464 if (flags & NFSMNT_DORDMA) { 465 /* 466 * If proto=rdma is specified and no RDMA 467 * path to this server is avialable then 468 * ditch this server. 469 * This is not included in the mountable 470 * server list or the replica list. 471 * Check if more servers are specified; 472 * Failover case, otherwise bail out of mount. 473 */ 474 if (STRUCT_FGET(args, nfs_args_ext) == 475 NFS_ARGS_EXTB && STRUCT_FGETP(args, 476 nfs_ext_u.nfs_extB.next) != NULL) { 477 if (uap->flags & MS_RDONLY && 478 !(flags & NFSMNT_SOFT)) { 479 data = (char *) 480 STRUCT_FGETP(args, 481 nfs_ext_u.nfs_extB.next); 482 if (svp_head->sv_next == NULL) { 483 svp_tail = NULL; 484 svp_2ndlast = NULL; 485 sv_free(svp_head); 486 goto more; 487 } else { 488 svp_tail = svp_2ndlast; 489 svp_2ndlast->sv_next = 490 NULL; 491 sv_free(svp); 492 goto more; 493 } 494 } 495 } else { 496 /* 497 * This is the last server specified 498 * in the nfs_args list passed down 499 * and its not rdma capable. 500 */ 501 if (svp_head->sv_next == NULL) { 502 /* 503 * Is this the only one 504 */ 505 error = EINVAL; 506 #ifdef DEBUG 507 if (rdma_debug) 508 zcmn_err(getzoneid(), 509 CE_WARN, 510 "No RDMA srv"); 511 #endif 512 goto errout; 513 } else { 514 /* 515 * There is list, since some 516 * servers specified before 517 * this passed all requirements 518 */ 519 svp_tail = svp_2ndlast; 520 svp_2ndlast->sv_next = NULL; 521 sv_free(svp); 522 goto proceed; 523 } 524 } 525 } 526 } 527 } 528 529 /* 530 * Get the extention data which has the new security data structure. 531 */ 532 if (flags & NFSMNT_NEWARGS) { 533 switch (STRUCT_FGET(args, nfs_args_ext)) { 534 case NFS_ARGS_EXTA: 535 case NFS_ARGS_EXTB: 536 /* 537 * Indicating the application is using the new 538 * sec_data structure to pass in the security 539 * data. 540 */ 541 if (STRUCT_FGETP(args, 542 nfs_ext_u.nfs_extA.secdata) == NULL) { 543 error = EINVAL; 544 } else { 545 error = sec_clnt_loadinfo( 546 (struct sec_data *)STRUCT_FGETP(args, 547 nfs_ext_u.nfs_extA.secdata), 548 &secdata, get_udatamodel()); 549 } 550 break; 551 552 default: 553 error = EINVAL; 554 break; 555 } 556 } else if (flags & NFSMNT_SECURE) { 557 /* 558 * Keep this for backward compatibility to support 559 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 560 */ 561 if (STRUCT_FGETP(args, syncaddr) == NULL) { 562 error = EINVAL; 563 } else { 564 /* 565 * get time sync address. 566 */ 567 if (copyin(STRUCT_FGETP(args, syncaddr), &addr_tmp, 568 STRUCT_SIZE(addr_tmp))) { 569 syncaddr.buf = NULL; 570 error = EFAULT; 571 } else { 572 char *userbufptr; 573 574 userbufptr = syncaddr.buf = 575 STRUCT_FGETP(addr_tmp, buf); 576 syncaddr.len = 577 STRUCT_FGET(addr_tmp, len); 578 syncaddr.buf = kmem_alloc(syncaddr.len, 579 KM_SLEEP); 580 syncaddr.maxlen = syncaddr.len; 581 582 if (copyin(userbufptr, syncaddr.buf, 583 syncaddr.len)) 584 error = EFAULT; 585 } 586 587 /* 588 * get server's netname 589 */ 590 if (!error) { 591 error = copyinstr(STRUCT_FGETP(args, netname), 592 netname, sizeof (netname), &nlen); 593 netname[nlen] = '\0'; 594 } 595 596 if (error && syncaddr.buf != NULL) { 597 kmem_free(syncaddr.buf, syncaddr.len); 598 syncaddr.buf = NULL; 599 } 600 } 601 602 /* 603 * Move security related data to the sec_data structure. 604 */ 605 if (!error) { 606 dh_k4_clntdata_t *data; 607 char *pf, *p; 608 609 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 610 if (flags & NFSMNT_RPCTIMESYNC) 611 secdata->flags |= AUTH_F_RPCTIMESYNC; 612 data = kmem_alloc(sizeof (*data), KM_SLEEP); 613 data->syncaddr = syncaddr; 614 615 /* 616 * duplicate the knconf information for the 617 * new opaque data. 618 */ 619 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 620 *data->knconf = *knconf; 621 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 622 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 623 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 624 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 625 data->knconf->knc_protofmly = pf; 626 data->knconf->knc_proto = p; 627 628 /* move server netname to the sec_data structure */ 629 if (nlen != 0) { 630 data->netname = kmem_alloc(nlen, KM_SLEEP); 631 bcopy(netname, data->netname, nlen); 632 data->netnamelen = (int)nlen; 633 } 634 secdata->secmod = secdata->rpcflavor = AUTH_DES; 635 secdata->data = (caddr_t)data; 636 } 637 } else { 638 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 639 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 640 secdata->data = NULL; 641 } 642 svp->sv_secdata = secdata; 643 if (error) 644 goto errout; 645 646 /* 647 * See bug 1180236. 648 * If mount secure failed, we will fall back to AUTH_NONE 649 * and try again. nfs3rootvp() will turn this back off. 650 * 651 * The NFS Version 3 mount uses the FSINFO and GETATTR 652 * procedures. The server should not care if these procedures 653 * have the proper security flavor, so if mount retries using 654 * AUTH_NONE that does not require a credential setup for root 655 * then the automounter would work without requiring root to be 656 * keylogged into AUTH_DES. 657 */ 658 if (secdata->rpcflavor != AUTH_UNIX && 659 secdata->rpcflavor != AUTH_LOOPBACK) 660 secdata->flags |= AUTH_F_TRYNONE; 661 662 /* 663 * Failover support: 664 * 665 * We may have a linked list of nfs_args structures, 666 * which means the user is looking for failover. If 667 * the mount is either not "read-only" or "soft", 668 * we want to bail out with EINVAL. 669 */ 670 if (STRUCT_FGET(args, nfs_args_ext) == NFS_ARGS_EXTB && 671 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next) != NULL) { 672 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 673 data = (char *)STRUCT_FGETP(args, 674 nfs_ext_u.nfs_extB.next); 675 goto more; 676 } 677 error = EINVAL; 678 goto errout; 679 } 680 681 /* 682 * Determine the zone we're being mounted into. 683 */ 684 zone_hold(mntzone = zone); /* start with this assumption */ 685 if (getzoneid() == GLOBAL_ZONEID) { 686 zone_rele(mntzone); 687 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 688 ASSERT(mntzone != NULL); 689 if (mntzone != zone) { 690 error = EBUSY; 691 goto errout; 692 } 693 } 694 695 if (is_system_labeled()) { 696 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 697 svp->sv_knconf, cr); 698 699 if (error > 0) 700 goto errout; 701 702 if (error == -1) { 703 /* change mount to read-only to prevent write-down */ 704 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 705 } 706 } 707 708 /* 709 * Stop the mount from going any further if the zone is going away. 710 */ 711 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 712 error = EBUSY; 713 goto errout; 714 } 715 716 /* 717 * Get root vnode. 718 */ 719 proceed: 720 error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 721 722 if (error) 723 goto errout; 724 725 /* 726 * Set option fields in the mount info record 727 */ 728 mi = VTOMI(rtvp); 729 730 if (svp_head->sv_next) 731 mi->mi_flags |= MI_LLOCK; 732 733 error = nfs_setopts(rtvp, get_udatamodel(), STRUCT_BUF(args)); 734 735 errout: 736 if (error) { 737 if (rtvp != NULL) { 738 rp = VTOR(rtvp); 739 if (rp->r_flags & RHASHED) 740 rp_rmhash(rp); 741 } 742 sv_free(svp_head); 743 if (mi != NULL) { 744 nfs_async_stop(vfsp); 745 nfs_async_manager_stop(vfsp); 746 if (mi->mi_io_kstats) { 747 kstat_delete(mi->mi_io_kstats); 748 mi->mi_io_kstats = NULL; 749 } 750 if (mi->mi_ro_kstats) { 751 kstat_delete(mi->mi_ro_kstats); 752 mi->mi_ro_kstats = NULL; 753 } 754 nfs_free_mi(mi); 755 } 756 } 757 758 if (rtvp != NULL) 759 VN_RELE(rtvp); 760 761 if (mntzone != NULL) 762 zone_rele(mntzone); 763 764 return (error); 765 } 766 767 static int nfs3_dynamic = 0; /* global variable to enable dynamic retrans. */ 768 static ushort_t nfs3_max_threads = 8; /* max number of active async threads */ 769 static uint_t nfs3_bsize = 32 * 1024; /* client `block' size */ 770 static uint_t nfs3_async_clusters = 1; /* # of reqs from each async queue */ 771 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO; 772 773 static int 774 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 775 int flags, cred_t *cr, zone_t *zone) 776 { 777 vnode_t *rtvp; 778 mntinfo_t *mi; 779 dev_t nfs_dev; 780 struct vattr va; 781 struct FSINFO3args args; 782 struct FSINFO3res res; 783 int error; 784 int douprintf; 785 rnode_t *rp; 786 int i; 787 uint_t max_transfer_size; 788 struct nfs_stats *nfsstatsp; 789 cred_t *lcr = NULL, *tcr = cr; 790 791 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 792 ASSERT(nfsstatsp != NULL); 793 794 ASSERT(nfs_zone() == zone); 795 /* 796 * Create a mount record and link it to the vfs struct. 797 */ 798 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 799 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 800 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 801 mi->mi_flags = MI_ACL | MI_EXTATTR; 802 if (!(flags & NFSMNT_SOFT)) 803 mi->mi_flags |= MI_HARD; 804 if ((flags & NFSMNT_SEMISOFT)) 805 mi->mi_flags |= MI_SEMISOFT; 806 if ((flags & NFSMNT_NOPRINT)) 807 mi->mi_flags |= MI_NOPRINT; 808 if (flags & NFSMNT_INT) 809 mi->mi_flags |= MI_INT; 810 mi->mi_retrans = NFS_RETRIES; 811 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 812 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 813 mi->mi_timeo = nfs3_cots_timeo; 814 else 815 mi->mi_timeo = NFS_TIMEO; 816 mi->mi_prog = NFS_PROGRAM; 817 mi->mi_vers = NFS_V3; 818 mi->mi_rfsnames = rfsnames_v3; 819 mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr; 820 mi->mi_call_type = call_type_v3; 821 mi->mi_ss_call_type = ss_call_type_v3; 822 mi->mi_timer_type = timer_type_v3; 823 mi->mi_aclnames = aclnames_v3; 824 mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr; 825 mi->mi_acl_call_type = acl_call_type_v3; 826 mi->mi_acl_ss_call_type = acl_ss_call_type_v3; 827 mi->mi_acl_timer_type = acl_timer_type_v3; 828 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 829 mi->mi_servers = svp; 830 mi->mi_curr_serv = svp; 831 mi->mi_acregmin = SEC2HR(ACREGMIN); 832 mi->mi_acregmax = SEC2HR(ACREGMAX); 833 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 834 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 835 836 if (nfs3_dynamic) 837 mi->mi_flags |= MI_DYNAMIC; 838 839 if (flags & NFSMNT_DIRECTIO) 840 mi->mi_flags |= MI_DIRECTIO; 841 842 /* 843 * Make a vfs struct for nfs. We do this here instead of below 844 * because rtvp needs a vfs before we can do a getattr on it. 845 * 846 * Assign a unique device id to the mount 847 */ 848 mutex_enter(&nfs_minor_lock); 849 do { 850 nfs_minor = (nfs_minor + 1) & MAXMIN32; 851 nfs_dev = makedevice(nfs_major, nfs_minor); 852 } while (vfs_devismounted(nfs_dev)); 853 mutex_exit(&nfs_minor_lock); 854 855 vfsp->vfs_dev = nfs_dev; 856 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp); 857 vfsp->vfs_data = (caddr_t)mi; 858 vfsp->vfs_fstype = nfsfstyp; 859 860 /* 861 * Verify that nfs3_bsize tuneable is set to an 862 * acceptable value. It be a multiple of PAGESIZE or 863 * file corruption can occur. 864 */ 865 if (nfs3_bsize & PAGEOFFSET) 866 nfs3_bsize &= PAGEMASK; 867 if (nfs3_bsize < PAGESIZE) 868 nfs3_bsize = PAGESIZE; 869 vfsp->vfs_bsize = nfs3_bsize; 870 871 /* 872 * Initialize fields used to support async putpage operations. 873 */ 874 for (i = 0; i < NFS_ASYNC_TYPES; i++) 875 mi->mi_async_clusters[i] = nfs3_async_clusters; 876 mi->mi_async_init_clusters = nfs3_async_clusters; 877 mi->mi_async_curr = &mi->mi_async_reqs[0]; 878 mi->mi_max_threads = nfs3_max_threads; 879 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 880 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 881 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 882 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 883 884 mi->mi_vfsp = vfsp; 885 zone_hold(mi->mi_zone = zone); 886 nfs_mi_zonelist_add(mi); 887 888 /* 889 * Make the root vnode, use it to get attributes, 890 * then remake it with the attributes. 891 */ 892 rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle, 893 NULL, vfsp, gethrtime(), cr, NULL, NULL); 894 895 /* 896 * Make the FSINFO calls, primarily at this point to 897 * determine the transfer size. For client failover, 898 * we'll want this to be the minimum bid from any 899 * server, so that we don't overrun stated limits. 900 * 901 * While we're looping, we'll turn off AUTH_F_TRYNONE, 902 * which is only for the mount operation. 903 */ 904 905 mi->mi_tsize = nfs3_tsize(svp->sv_knconf); 906 mi->mi_stsize = mi->mi_tsize; 907 908 mi->mi_curread = nfs3_bsize; 909 mi->mi_curwrite = mi->mi_curread; 910 911 /* 912 * If the uid is set then set the creds for secure mounts 913 * by proxy processes such as automountd. 914 */ 915 if (svp->sv_secdata->uid != 0 && 916 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 917 lcr = crdup(cr); 918 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 919 tcr = lcr; 920 } 921 922 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 923 douprintf = 1; 924 mi->mi_curr_serv = svp; 925 max_transfer_size = nfs3_tsize(svp->sv_knconf); 926 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize); 927 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize); 928 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread); 929 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite); 930 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle; 931 932 error = rfs3call(mi, NFSPROC3_FSINFO, 933 xdr_nfs_fh3, (caddr_t)&args, 934 xdr_FSINFO3res, (caddr_t)&res, tcr, 935 &douprintf, &res.status, 0, NULL); 936 if (error) 937 goto bad; 938 error = geterrno3(res.status); 939 if (error) 940 goto bad; 941 942 /* get type of root node */ 943 if (res.resok.obj_attributes.attributes) { 944 if (res.resok.obj_attributes.attr.type < NF3REG || 945 res.resok.obj_attributes.attr.type > NF3FIFO) { 946 #ifdef DEBUG 947 zcmn_err(getzoneid(), CE_WARN, 948 "NFS3 server %s returned a bad file type for root", 949 svp->sv_hostname); 950 #else 951 zcmn_err(getzoneid(), CE_WARN, 952 "NFS server %s returned a bad file type for root", 953 svp->sv_hostname); 954 #endif 955 error = EINVAL; 956 goto bad; 957 } else { 958 if (rtvp->v_type != VNON && 959 rtvp->v_type != nf3_to_vt[res.resok.obj_attributes.attr.type]) { 960 #ifdef DEBUG 961 zcmn_err(getzoneid(), CE_WARN, 962 "NFS3 server %s returned a different file type for root", 963 svp->sv_hostname); 964 #else 965 zcmn_err(getzoneid(), CE_WARN, 966 "NFS server %s returned a different file type for root", 967 svp->sv_hostname); 968 #endif 969 error = EINVAL; 970 goto bad; 971 } 972 rtvp->v_type = 973 nf3_to_vt[res.resok.obj_attributes.attr.type]; 974 } 975 } 976 977 if (res.resok.rtmax != 0) { 978 mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize); 979 if (res.resok.rtpref != 0) { 980 mi->mi_curread = MIN(res.resok.rtpref, 981 mi->mi_curread); 982 } else { 983 mi->mi_curread = MIN(res.resok.rtmax, 984 mi->mi_curread); 985 } 986 } else if (res.resok.rtpref != 0) { 987 mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize); 988 mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread); 989 } else { 990 #ifdef DEBUG 991 zcmn_err(getzoneid(), CE_WARN, 992 "NFS3 server %s returned 0 for read transfer sizes", 993 svp->sv_hostname); 994 #else 995 zcmn_err(getzoneid(), CE_WARN, 996 "NFS server %s returned 0 for read transfer sizes", 997 svp->sv_hostname); 998 #endif 999 error = EIO; 1000 goto bad; 1001 } 1002 if (res.resok.wtmax != 0) { 1003 mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize); 1004 if (res.resok.wtpref != 0) { 1005 mi->mi_curwrite = MIN(res.resok.wtpref, 1006 mi->mi_curwrite); 1007 } else { 1008 mi->mi_curwrite = MIN(res.resok.wtmax, 1009 mi->mi_curwrite); 1010 } 1011 } else if (res.resok.wtpref != 0) { 1012 mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize); 1013 mi->mi_curwrite = MIN(res.resok.wtpref, 1014 mi->mi_curwrite); 1015 } else { 1016 #ifdef DEBUG 1017 zcmn_err(getzoneid(), CE_WARN, 1018 "NFS3 server %s returned 0 for write transfer sizes", 1019 svp->sv_hostname); 1020 #else 1021 zcmn_err(getzoneid(), CE_WARN, 1022 "NFS server %s returned 0 for write transfer sizes", 1023 svp->sv_hostname); 1024 #endif 1025 error = EIO; 1026 goto bad; 1027 } 1028 1029 /* 1030 * These signal the ability of the server to create 1031 * hard links and symbolic links, so they really 1032 * aren't relevant if there is more than one server. 1033 * We'll set them here, though it probably looks odd. 1034 */ 1035 if (res.resok.properties & FSF3_LINK) 1036 mi->mi_flags |= MI_LINK; 1037 if (res.resok.properties & FSF3_SYMLINK) 1038 mi->mi_flags |= MI_SYMLINK; 1039 1040 /* Pick up smallest non-zero maxfilesize value */ 1041 if (res.resok.maxfilesize) { 1042 if (mi->mi_maxfilesize) { 1043 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize, 1044 res.resok.maxfilesize); 1045 } else 1046 mi->mi_maxfilesize = res.resok.maxfilesize; 1047 } 1048 1049 /* 1050 * AUTH_F_TRYNONE is only for the mount operation, 1051 * so turn it back off. 1052 */ 1053 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1054 } 1055 mi->mi_curr_serv = mi->mi_servers; 1056 1057 /* 1058 * Start the thread responsible for handling async worker threads. 1059 */ 1060 VFS_HOLD(vfsp); /* add reference for thread */ 1061 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1062 vfsp, 0, minclsyspri); 1063 ASSERT(mi->mi_manager_thread != NULL); 1064 1065 /* 1066 * Initialize kstats 1067 */ 1068 nfs_mnt_kstat_init(vfsp); 1069 1070 /* If we didn't get a type, get one now */ 1071 if (rtvp->v_type == VNON) { 1072 va.va_mask = AT_ALL; 1073 1074 error = nfs3getattr(rtvp, &va, tcr); 1075 if (error) 1076 goto bad; 1077 rtvp->v_type = va.va_type; 1078 } 1079 1080 mi->mi_type = rtvp->v_type; 1081 1082 *rtvpp = rtvp; 1083 if (lcr != NULL) 1084 crfree(lcr); 1085 1086 return (0); 1087 bad: 1088 /* 1089 * An error occurred somewhere, need to clean up... 1090 * We need to release our reference to the root vnode and 1091 * destroy the mntinfo struct that we just created. 1092 */ 1093 if (lcr != NULL) 1094 crfree(lcr); 1095 rp = VTOR(rtvp); 1096 if (rp->r_flags & RHASHED) 1097 rp_rmhash(rp); 1098 VN_RELE(rtvp); 1099 nfs_async_stop(vfsp); 1100 nfs_async_manager_stop(vfsp); 1101 if (mi->mi_io_kstats) { 1102 kstat_delete(mi->mi_io_kstats); 1103 mi->mi_io_kstats = NULL; 1104 } 1105 if (mi->mi_ro_kstats) { 1106 kstat_delete(mi->mi_ro_kstats); 1107 mi->mi_ro_kstats = NULL; 1108 } 1109 nfs_free_mi(mi); 1110 *rtvpp = NULL; 1111 return (error); 1112 } 1113 1114 /* 1115 * vfs operations 1116 */ 1117 static int 1118 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1119 { 1120 mntinfo_t *mi; 1121 ushort_t omax; 1122 1123 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1124 return (EPERM); 1125 1126 mi = VFTOMI(vfsp); 1127 if (flag & MS_FORCE) { 1128 vfsp->vfs_flag |= VFS_UNMOUNTED; 1129 /* 1130 * We need to stop the manager thread explicitly; the worker 1131 * threads can time out and exit on their own. 1132 */ 1133 nfs_async_manager_stop(vfsp); 1134 destroy_rtable(vfsp, cr); 1135 if (mi->mi_io_kstats) { 1136 kstat_delete(mi->mi_io_kstats); 1137 mi->mi_io_kstats = NULL; 1138 } 1139 if (mi->mi_ro_kstats) { 1140 kstat_delete(mi->mi_ro_kstats); 1141 mi->mi_ro_kstats = NULL; 1142 } 1143 return (0); 1144 } 1145 /* 1146 * Wait until all asynchronous putpage operations on 1147 * this file system are complete before flushing rnodes 1148 * from the cache. 1149 */ 1150 omax = mi->mi_max_threads; 1151 if (nfs_async_stop_sig(vfsp)) { 1152 return (EINTR); 1153 } 1154 rflush(vfsp, cr); 1155 /* 1156 * If there are any active vnodes on this file system, 1157 * then the file system is busy and can't be umounted. 1158 */ 1159 if (check_rtable(vfsp)) { 1160 mutex_enter(&mi->mi_async_lock); 1161 mi->mi_max_threads = omax; 1162 mutex_exit(&mi->mi_async_lock); 1163 return (EBUSY); 1164 } 1165 /* 1166 * The unmount can't fail from now on; stop the worker thread manager. 1167 */ 1168 nfs_async_manager_stop(vfsp); 1169 /* 1170 * Destroy all rnodes belonging to this file system from the 1171 * rnode hash queues and purge any resources allocated to 1172 * them. 1173 */ 1174 destroy_rtable(vfsp, cr); 1175 if (mi->mi_io_kstats) { 1176 kstat_delete(mi->mi_io_kstats); 1177 mi->mi_io_kstats = NULL; 1178 } 1179 if (mi->mi_ro_kstats) { 1180 kstat_delete(mi->mi_ro_kstats); 1181 mi->mi_ro_kstats = NULL; 1182 } 1183 return (0); 1184 } 1185 1186 /* 1187 * find root of nfs 1188 */ 1189 static int 1190 nfs3_root(vfs_t *vfsp, vnode_t **vpp) 1191 { 1192 mntinfo_t *mi; 1193 vnode_t *vp; 1194 servinfo_t *svp; 1195 1196 mi = VFTOMI(vfsp); 1197 1198 if (nfs_zone() != mi->mi_zone) 1199 return (EPERM); 1200 1201 svp = mi->mi_curr_serv; 1202 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1203 mutex_enter(&svp->sv_lock); 1204 svp->sv_flags &= ~SV_ROOT_STALE; 1205 mutex_exit(&svp->sv_lock); 1206 return (ENOENT); 1207 } 1208 1209 vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle, 1210 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1211 1212 if (VTOR(vp)->r_flags & RSTALE) { 1213 VN_RELE(vp); 1214 return (ENOENT); 1215 } 1216 1217 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1218 1219 vp->v_type = mi->mi_type; 1220 1221 *vpp = vp; 1222 1223 return (0); 1224 } 1225 1226 /* 1227 * Get file system statistics. 1228 */ 1229 static int 1230 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1231 { 1232 int error; 1233 struct mntinfo *mi; 1234 struct FSSTAT3args args; 1235 struct FSSTAT3res res; 1236 int douprintf; 1237 failinfo_t fi; 1238 vnode_t *vp; 1239 cred_t *cr; 1240 hrtime_t t; 1241 1242 mi = VFTOMI(vfsp); 1243 if (nfs_zone() != mi->mi_zone) 1244 return (EPERM); 1245 error = nfs3_root(vfsp, &vp); 1246 if (error) 1247 return (error); 1248 1249 cr = CRED(); 1250 1251 args.fsroot = *VTOFH3(vp); 1252 fi.vp = vp; 1253 fi.fhp = (caddr_t)&args.fsroot; 1254 fi.copyproc = nfs3copyfh; 1255 fi.lookupproc = nfs3lookup; 1256 fi.xattrdirproc = acl_getxattrdir3; 1257 1258 douprintf = 1; 1259 1260 t = gethrtime(); 1261 1262 error = rfs3call(mi, NFSPROC3_FSSTAT, 1263 xdr_nfs_fh3, (caddr_t)&args, 1264 xdr_FSSTAT3res, (caddr_t)&res, cr, 1265 &douprintf, &res.status, 0, &fi); 1266 1267 if (error) { 1268 VN_RELE(vp); 1269 return (error); 1270 } 1271 1272 error = geterrno3(res.status); 1273 if (!error) { 1274 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 1275 sbp->f_bsize = MAXBSIZE; 1276 sbp->f_frsize = DEV_BSIZE; 1277 /* 1278 * Allow -1 fields to pass through unconverted. These 1279 * indicate "don't know" fields. 1280 */ 1281 if (res.resok.tbytes == (size3)-1) 1282 sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes; 1283 else { 1284 sbp->f_blocks = (fsblkcnt64_t) 1285 (res.resok.tbytes / DEV_BSIZE); 1286 } 1287 if (res.resok.fbytes == (size3)-1) 1288 sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes; 1289 else { 1290 sbp->f_bfree = (fsblkcnt64_t) 1291 (res.resok.fbytes / DEV_BSIZE); 1292 } 1293 if (res.resok.abytes == (size3)-1) 1294 sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes; 1295 else { 1296 sbp->f_bavail = (fsblkcnt64_t) 1297 (res.resok.abytes / DEV_BSIZE); 1298 } 1299 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles; 1300 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles; 1301 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles; 1302 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1303 (void) strncpy(sbp->f_basetype, 1304 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1305 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1306 sbp->f_namemax = (ulong_t)-1; 1307 } else { 1308 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 1309 PURGE_STALE_FH(error, vp, cr); 1310 } 1311 1312 VN_RELE(vp); 1313 1314 return (error); 1315 } 1316 1317 static kmutex_t nfs3_syncbusy; 1318 1319 /* 1320 * Flush dirty nfs files for file system vfsp. 1321 * If vfsp == NULL, all nfs files are flushed. 1322 */ 1323 /* ARGSUSED */ 1324 static int 1325 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr) 1326 { 1327 /* 1328 * Cross-zone calls are OK here, since this translates to a 1329 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1330 */ 1331 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) { 1332 rflush(vfsp, cr); 1333 mutex_exit(&nfs3_syncbusy); 1334 } 1335 return (0); 1336 } 1337 1338 /* ARGSUSED */ 1339 static int 1340 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1341 { 1342 int error; 1343 nfs_fh3 fh; 1344 vnode_t *vp; 1345 struct vattr va; 1346 1347 if (fidp->fid_len > NFS3_FHSIZE) { 1348 *vpp = NULL; 1349 return (ESTALE); 1350 } 1351 1352 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1353 return (EPERM); 1354 fh.fh3_length = fidp->fid_len; 1355 bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length); 1356 1357 vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1358 1359 if (VTOR(vp)->r_flags & RSTALE) { 1360 VN_RELE(vp); 1361 *vpp = NULL; 1362 return (ENOENT); 1363 } 1364 1365 if (vp->v_type == VNON) { 1366 va.va_mask = AT_ALL; 1367 error = nfs3getattr(vp, &va, CRED()); 1368 if (error) { 1369 VN_RELE(vp); 1370 *vpp = NULL; 1371 return (error); 1372 } 1373 vp->v_type = va.va_type; 1374 } 1375 1376 *vpp = vp; 1377 1378 return (0); 1379 } 1380 1381 /* ARGSUSED */ 1382 static int 1383 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why) 1384 { 1385 vnode_t *rtvp; 1386 char root_hostname[SYS_NMLN+1]; 1387 struct servinfo *svp; 1388 int error; 1389 int vfsflags; 1390 size_t size; 1391 char *root_path; 1392 struct pathname pn; 1393 char *name; 1394 cred_t *cr; 1395 struct nfs_args args; /* nfs mount arguments */ 1396 static char token[10]; 1397 1398 bzero(&args, sizeof (args)); 1399 1400 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1401 clkset(-1L); /* hack for now - until we get time svc? */ 1402 1403 if (why == ROOT_REMOUNT) { 1404 /* 1405 * Shouldn't happen. 1406 */ 1407 panic("nfs3_mountroot: why == ROOT_REMOUNT"); 1408 } 1409 1410 if (why == ROOT_UNMOUNT) { 1411 /* 1412 * Nothing to do for NFS. 1413 */ 1414 return (0); 1415 } 1416 1417 /* 1418 * why == ROOT_INIT 1419 */ 1420 1421 name = token; 1422 *name = 0; 1423 getfsname("root", name, sizeof (token)); 1424 1425 pn_alloc(&pn); 1426 root_path = pn.pn_path; 1427 1428 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1429 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1430 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1431 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1432 1433 /* 1434 * Get server address 1435 * Get the root fhandle 1436 * Get server's transport 1437 * Get server's hostname 1438 * Get options 1439 */ 1440 args.addr = &svp->sv_addr; 1441 args.fh = (char *)&svp->sv_fhandle; 1442 args.knconf = svp->sv_knconf; 1443 args.hostname = root_hostname; 1444 vfsflags = 0; 1445 if (error = mount_root(*name ? name : "root", root_path, NFS_V3, 1446 &args, &vfsflags)) { 1447 if (error == EPROTONOSUPPORT) 1448 nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: " 1449 "mount_root failed: server doesn't support NFS V3"); 1450 else 1451 nfs_cmn_err(error, CE_WARN, 1452 "nfs3_mountroot: mount_root failed: %m"); 1453 sv_free(svp); 1454 pn_free(&pn); 1455 return (error); 1456 } 1457 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1458 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1459 (void) strcpy(svp->sv_hostname, root_hostname); 1460 1461 /* 1462 * Force root partition to always be mounted with AUTH_UNIX for now 1463 */ 1464 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1465 svp->sv_secdata->secmod = AUTH_UNIX; 1466 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1467 svp->sv_secdata->data = NULL; 1468 1469 cr = crgetcred(); 1470 rtvp = NULL; 1471 1472 error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1473 1474 crfree(cr); 1475 1476 if (error) { 1477 pn_free(&pn); 1478 goto errout; 1479 } 1480 1481 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1482 if (error) { 1483 nfs_cmn_err(error, CE_WARN, 1484 "nfs3_mountroot: invalid root mount options"); 1485 pn_free(&pn); 1486 goto errout; 1487 } 1488 1489 (void) vfs_lock_wait(vfsp); 1490 vfs_add(NULL, vfsp, vfsflags); 1491 vfs_unlock(vfsp); 1492 1493 size = strlen(svp->sv_hostname); 1494 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1495 rootfs.bo_name[size] = ':'; 1496 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1497 1498 pn_free(&pn); 1499 1500 errout: 1501 if (error) { 1502 sv_free(svp); 1503 nfs_async_stop(vfsp); 1504 nfs_async_manager_stop(vfsp); 1505 } 1506 1507 if (rtvp != NULL) 1508 VN_RELE(rtvp); 1509 1510 return (error); 1511 } 1512 1513 /* 1514 * Initialization routine for VFS routines. Should only be called once 1515 */ 1516 int 1517 nfs3_vfsinit(void) 1518 { 1519 mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1520 return (0); 1521 } 1522 1523 void 1524 nfs3_vfsfini(void) 1525 { 1526 mutex_destroy(&nfs3_syncbusy); 1527 } 1528 1529 void 1530 nfs3_freevfs(vfs_t *vfsp) 1531 { 1532 mntinfo_t *mi; 1533 servinfo_t *svp; 1534 1535 /* free up the resources */ 1536 mi = VFTOMI(vfsp); 1537 svp = mi->mi_servers; 1538 mi->mi_servers = mi->mi_curr_serv = NULL; 1539 sv_free(svp); 1540 1541 /* 1542 * By this time we should have already deleted the 1543 * mi kstats in the unmount code. If they are still around 1544 * somethings wrong 1545 */ 1546 ASSERT(mi->mi_io_kstats == NULL); 1547 nfs_free_mi(mi); 1548 } 1549