1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/pathname.h> 40 #include <sys/sysmacros.h> 41 #include <sys/kmem.h> 42 #include <sys/mkdev.h> 43 #include <sys/mount.h> 44 #include <sys/mntent.h> 45 #include <sys/statvfs.h> 46 #include <sys/errno.h> 47 #include <sys/debug.h> 48 #include <sys/cmn_err.h> 49 #include <sys/utsname.h> 50 #include <sys/bootconf.h> 51 #include <sys/modctl.h> 52 #include <sys/acl.h> 53 #include <sys/flock.h> 54 #include <sys/policy.h> 55 #include <sys/zone.h> 56 #include <sys/class.h> 57 #include <sys/socket.h> 58 #include <sys/netconfig.h> 59 #include <sys/tsol/tnet.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/clnt.h> 64 65 #include <nfs/nfs.h> 66 #include <nfs/nfs_clnt.h> 67 #include <nfs/rnode.h> 68 #include <nfs/mount.h> 69 #include <nfs/nfs_acl.h> 70 71 #include <fs/fs_subr.h> 72 73 /* 74 * From rpcsec module (common/rpcsec). 75 */ 76 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 77 extern void sec_clnt_freeinfo(struct sec_data *); 78 79 /* 80 * The order and contents of this structure must be kept in sync with that of 81 * rfsreqcnt_v3_tmpl in nfs_stats.c 82 */ 83 static char *rfsnames_v3[] = { 84 "null", "getattr", "setattr", "lookup", "access", "readlink", "read", 85 "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir", 86 "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo", 87 "pathconf", "commit" 88 }; 89 90 /* 91 * This table maps from NFS protocol number into call type. 92 * Zero means a "Lookup" type call 93 * One means a "Read" type call 94 * Two means a "Write" type call 95 * This is used to select a default time-out. 96 */ 97 static uchar_t call_type_v3[] = { 98 0, 0, 1, 0, 0, 0, 1, 99 2, 2, 2, 2, 2, 2, 2, 100 2, 2, 1, 2, 0, 0, 0, 101 2 }; 102 103 /* 104 * Similar table, but to determine which timer to use 105 * (only real reads and writes!) 106 */ 107 static uchar_t timer_type_v3[] = { 108 0, 0, 0, 0, 0, 0, 1, 109 2, 0, 0, 0, 0, 0, 0, 110 0, 0, 1, 1, 0, 0, 0, 111 0 }; 112 113 /* 114 * This table maps from NFS protocol number into a call type 115 * for the semisoft mount option. 116 * Zero means do not repeat operation. 117 * One means repeat. 118 */ 119 static uchar_t ss_call_type_v3[] = { 120 0, 0, 1, 0, 0, 0, 0, 121 1, 1, 1, 1, 1, 1, 1, 122 1, 1, 0, 0, 0, 0, 0, 123 1 }; 124 125 /* 126 * nfs3 vfs operations. 127 */ 128 static int nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 129 static int nfs3_unmount(vfs_t *, int, cred_t *); 130 static int nfs3_root(vfs_t *, vnode_t **); 131 static int nfs3_statvfs(vfs_t *, struct statvfs64 *); 132 static int nfs3_sync(vfs_t *, short, cred_t *); 133 static int nfs3_vget(vfs_t *, vnode_t **, fid_t *); 134 static int nfs3_mountroot(vfs_t *, whymountroot_t); 135 static void nfs3_freevfs(vfs_t *); 136 137 static int nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *, 138 int, cred_t *, zone_t *); 139 140 /* 141 * Initialize the vfs structure 142 */ 143 144 static int nfs3fstyp; 145 vfsops_t *nfs3_vfsops; 146 147 /* 148 * Debug variable to check for rdma based 149 * transport startup and cleanup. Controlled 150 * through /etc/system. Off by default. 151 */ 152 extern int rdma_debug; 153 154 int 155 nfs3init(int fstyp, char *name) 156 { 157 static const fs_operation_def_t nfs3_vfsops_template[] = { 158 VFSNAME_MOUNT, nfs3_mount, 159 VFSNAME_UNMOUNT, nfs3_unmount, 160 VFSNAME_ROOT, nfs3_root, 161 VFSNAME_STATVFS, nfs3_statvfs, 162 VFSNAME_SYNC, (fs_generic_func_p) nfs3_sync, 163 VFSNAME_VGET, nfs3_vget, 164 VFSNAME_MOUNTROOT, nfs3_mountroot, 165 VFSNAME_FREEVFS, (fs_generic_func_p)nfs3_freevfs, 166 NULL, NULL 167 }; 168 int error; 169 170 error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops); 171 if (error != 0) { 172 zcmn_err(GLOBAL_ZONEID, CE_WARN, 173 "nfs3init: bad vfs ops template"); 174 return (error); 175 } 176 177 error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops); 178 if (error != 0) { 179 (void) vfs_freevfsops_by_type(fstyp); 180 zcmn_err(GLOBAL_ZONEID, CE_WARN, 181 "nfs3init: bad vnode ops template"); 182 return (error); 183 } 184 185 nfs3fstyp = fstyp; 186 187 return (0); 188 } 189 190 void 191 nfs3fini(void) 192 { 193 } 194 195 static void 196 nfs3_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 197 { 198 199 if (fh) 200 kmem_free(fh, sizeof (*fh)); 201 202 if (nargs->knconf) { 203 if (nargs->knconf->knc_protofmly) 204 kmem_free(nargs->knconf->knc_protofmly, 205 KNC_STRSIZE); 206 if (nargs->knconf->knc_proto) 207 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 208 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 209 nargs->knconf = NULL; 210 } 211 212 if (nargs->fh) { 213 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 214 nargs->fh = NULL; 215 } 216 217 if (nargs->hostname) { 218 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 219 nargs->hostname = NULL; 220 } 221 222 if (nargs->addr) { 223 if (nargs->addr->buf) { 224 ASSERT(nargs->addr->len); 225 kmem_free(nargs->addr->buf, nargs->addr->len); 226 } 227 kmem_free(nargs->addr, sizeof (struct netbuf)); 228 nargs->addr = NULL; 229 } 230 231 if (nargs->syncaddr) { 232 ASSERT(nargs->syncaddr->len); 233 if (nargs->syncaddr->buf) { 234 ASSERT(nargs->syncaddr->len); 235 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 236 } 237 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 238 nargs->syncaddr = NULL; 239 } 240 241 if (nargs->netname) { 242 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 243 nargs->netname = NULL; 244 } 245 246 if (nargs->nfs_ext_u.nfs_extA.secdata) { 247 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata); 248 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 249 } 250 } 251 252 static int 253 nfs3_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 254 { 255 256 int error; 257 size_t nlen; /* length of netname */ 258 size_t hlen; /* length of hostname */ 259 char netname[MAXNETNAMELEN+1]; /* server's netname */ 260 struct netbuf addr; /* server's address */ 261 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 262 struct knetconfig *knconf; /* transport knetconfig structure */ 263 struct sec_data *secdata = NULL; /* security data */ 264 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 265 STRUCT_DECL(knetconfig, knconf_tmp); 266 STRUCT_DECL(netbuf, addr_tmp); 267 int flags; 268 char *p, *pf; 269 char *userbufptr; 270 271 272 bzero(nargs, sizeof (*nargs)); 273 274 STRUCT_INIT(args, get_udatamodel()); 275 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 276 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args)))) 277 return (EFAULT); 278 279 nargs->wsize = STRUCT_FGET(args, wsize); 280 nargs->rsize = STRUCT_FGET(args, rsize); 281 nargs->timeo = STRUCT_FGET(args, timeo); 282 nargs->retrans = STRUCT_FGET(args, retrans); 283 nargs->acregmin = STRUCT_FGET(args, acregmin); 284 nargs->acregmax = STRUCT_FGET(args, acregmax); 285 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 286 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 287 288 flags = STRUCT_FGET(args, flags); 289 nargs->flags = flags; 290 291 addr.buf = NULL; 292 syncaddr.buf = NULL; 293 294 /* 295 * Allocate space for a knetconfig structure and 296 * its strings and copy in from user-land. 297 */ 298 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 299 STRUCT_INIT(knconf_tmp, get_udatamodel()); 300 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 301 STRUCT_SIZE(knconf_tmp))) { 302 kmem_free(knconf, sizeof (*knconf)); 303 return (EFAULT); 304 } 305 306 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 307 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 308 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 309 if (get_udatamodel() != DATAMODEL_LP64) { 310 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 311 } else { 312 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 313 } 314 315 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 316 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 317 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 318 if (error) { 319 kmem_free(pf, KNC_STRSIZE); 320 kmem_free(p, KNC_STRSIZE); 321 kmem_free(knconf, sizeof (*knconf)); 322 return (error); 323 } 324 325 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 326 if (error) { 327 kmem_free(pf, KNC_STRSIZE); 328 kmem_free(p, KNC_STRSIZE); 329 kmem_free(knconf, sizeof (*knconf)); 330 return (error); 331 } 332 333 334 knconf->knc_protofmly = pf; 335 knconf->knc_proto = p; 336 337 nargs->knconf = knconf; 338 /* 339 * Get server address 340 */ 341 STRUCT_INIT(addr_tmp, get_udatamodel()); 342 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 343 STRUCT_SIZE(addr_tmp))) { 344 error = EFAULT; 345 goto errout; 346 } 347 348 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 349 userbufptr = STRUCT_FGETP(addr_tmp, buf); 350 addr.len = STRUCT_FGET(addr_tmp, len); 351 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 352 addr.maxlen = addr.len; 353 if (copyin(userbufptr, addr.buf, addr.len)) { 354 kmem_free(addr.buf, addr.len); 355 error = EFAULT; 356 goto errout; 357 } 358 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 359 360 /* 361 * Get the root fhandle 362 */ 363 364 if (copyin(STRUCT_FGETP(args, fh), fh, sizeof (nfs_fhandle))) { 365 error = EFAULT; 366 goto errout; 367 } 368 369 370 /* 371 * Get server's hostname 372 */ 373 if (flags & NFSMNT_HOSTNAME) { 374 error = copyinstr(STRUCT_FGETP(args, hostname), 375 netname, sizeof (netname), &hlen); 376 if (error) 377 goto errout; 378 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 379 (void) strcpy(nargs->hostname, netname); 380 } else { 381 nargs->hostname = NULL; 382 } 383 384 385 /* 386 * If there are syncaddr and netname data, load them in. This is 387 * to support data needed for NFSV4 when AUTH_DH is the negotiated 388 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 389 */ 390 netname[0] = '\0'; 391 if (flags & NFSMNT_SECURE) { 392 if (STRUCT_FGETP(args, syncaddr) == NULL) { 393 error = EINVAL; 394 goto errout; 395 } 396 /* get syncaddr */ 397 STRUCT_INIT(addr_tmp, get_udatamodel()); 398 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 399 STRUCT_SIZE(addr_tmp))) { 400 error = EINVAL; 401 goto errout; 402 } 403 userbufptr = STRUCT_FGETP(addr_tmp, buf); 404 syncaddr.len = STRUCT_FGET(addr_tmp, len); 405 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 406 syncaddr.maxlen = syncaddr.len; 407 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 408 kmem_free(syncaddr.buf, syncaddr.len); 409 error = EFAULT; 410 goto errout; 411 } 412 413 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 414 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 415 416 ASSERT(STRUCT_FGETP(args, netname)); 417 418 if (copyinstr(STRUCT_FGETP(args, netname), netname, 419 sizeof (netname), &nlen)) { 420 error = EFAULT; 421 goto errout; 422 } 423 424 netname[nlen] = '\0'; 425 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 426 (void) strcpy(nargs->netname, netname); 427 } 428 429 /* 430 * Get the extention data which has the security data structure. 431 * This includes data for AUTH_SYS as well. 432 */ 433 if (flags & NFSMNT_NEWARGS) { 434 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 435 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 436 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 437 /* 438 * Indicating the application is using the new 439 * sec_data structure to pass in the security 440 * data. 441 */ 442 if (STRUCT_FGETP(args, 443 nfs_ext_u.nfs_extA.secdata) != NULL) { 444 error = sec_clnt_loadinfo( 445 (struct sec_data *)STRUCT_FGETP(args, 446 nfs_ext_u.nfs_extA.secdata), 447 &secdata, get_udatamodel()); 448 } 449 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 450 } 451 } 452 453 if (error) 454 goto errout; 455 456 /* 457 * Failover support: 458 * 459 * We may have a linked list of nfs_args structures, 460 * which means the user is looking for failover. If 461 * the mount is either not "read-only" or "soft", 462 * we want to bail out with EINVAL. 463 */ 464 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 465 nargs->nfs_ext_u.nfs_extB.next = 466 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 467 468 errout: 469 if (error) 470 nfs3_free_args(nargs, fh); 471 472 return (error); 473 } 474 475 476 /* 477 * nfs mount vfsop 478 * Set up mount info record and attach it to vfs struct. 479 */ 480 static int 481 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 482 { 483 struct nfs_args *args = NULL; 484 nfs_fhandle *fhandle = NULL; 485 char *data = uap->dataptr; 486 int error; 487 vnode_t *rtvp; /* the server's root */ 488 mntinfo_t *mi; /* mount info, pointed at by vfs */ 489 size_t nlen; /* length of netname */ 490 struct knetconfig *knconf; /* transport knetconfig structure */ 491 struct knetconfig *rdma_knconf; /* rdma transport structure */ 492 rnode_t *rp; 493 struct servinfo *svp; /* nfs server info */ 494 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 495 struct servinfo *svp_head; /* first nfs server info */ 496 struct servinfo *svp_2ndlast; /* 2nd last in server info list */ 497 struct sec_data *secdata; /* security data */ 498 int flags, addr_type; 499 zone_t *zone = nfs_zone(); 500 zone_t *mntzone = NULL; 501 502 503 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 504 return (EPERM); 505 506 if (mvp->v_type != VDIR) 507 return (ENOTDIR); 508 509 /* 510 * get arguments 511 * 512 * nfs_args is now versioned and is extensible, so 513 * uap->datalen might be different from sizeof (args) 514 * in a compatible situation. 515 */ 516 517 more: 518 519 if (!(uap->flags & MS_SYSSPACE)) { 520 if (args == NULL) 521 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 522 else { 523 nfs3_free_args(args, fhandle); 524 fhandle = NULL; 525 } 526 if (fhandle == NULL) 527 fhandle = kmem_alloc(sizeof (nfs_fhandle), KM_SLEEP); 528 error = nfs3_copyin(data, uap->datalen, args, fhandle); 529 if (error) { 530 if (args) 531 kmem_free(args, sizeof (*args)); 532 return (error); 533 } 534 } else { 535 args = (struct nfs_args *)data; 536 fhandle = (nfs_fhandle *)args->fh; 537 } 538 539 540 flags = args->flags; 541 542 if (uap->flags & MS_REMOUNT) { 543 size_t n; 544 char name[FSTYPSZ]; 545 546 if (uap->flags & MS_SYSSPACE) { 547 error = copystr(uap->fstype, name, FSTYPSZ, &n); 548 } else { 549 nfs3_free_args(args, fhandle); 550 kmem_free(args, sizeof (*args)); 551 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 552 } 553 if (error) { 554 if (error == ENAMETOOLONG) 555 return (EINVAL); 556 return (error); 557 } 558 559 /* 560 * This check is to ensure that the request is a 561 * genuine nfs remount request. 562 */ 563 564 if (strncmp(name, "nfs", 3) != 0) 565 return (EINVAL); 566 567 /* 568 * If the request changes the locking type, disallow the 569 * remount, 570 * because it's questionable whether we can transfer the 571 * locking state correctly. 572 */ 573 574 if ((mi = VFTOMI(vfsp)) != NULL) { 575 uint_t new_mi_llock; 576 uint_t old_mi_llock; 577 578 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 579 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 580 if (old_mi_llock != new_mi_llock) 581 return (EBUSY); 582 } 583 return (0); 584 } 585 586 mutex_enter(&mvp->v_lock); 587 if (!(uap->flags & MS_OVERLAY) && 588 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 589 mutex_exit(&mvp->v_lock); 590 if (!(uap->flags & MS_SYSSPACE)) { 591 nfs3_free_args(args, fhandle); 592 kmem_free(args, sizeof (*args)); 593 } 594 return (EBUSY); 595 } 596 mutex_exit(&mvp->v_lock); 597 598 /* make sure things are zeroed for errout: */ 599 rtvp = NULL; 600 mi = NULL; 601 secdata = NULL; 602 603 /* 604 * A valid knetconfig structure is required. 605 */ 606 if (!(flags & NFSMNT_KNCONF)) { 607 if (!(uap->flags & MS_SYSSPACE)) { 608 nfs3_free_args(args, fhandle); 609 kmem_free(args, sizeof (*args)); 610 } 611 return (EINVAL); 612 } 613 614 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 615 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 616 if (!(uap->flags & MS_SYSSPACE)) { 617 nfs3_free_args(args, fhandle); 618 kmem_free(args, sizeof (*args)); 619 } 620 return (EINVAL); 621 } 622 623 /* 624 * Allocate a servinfo struct. 625 */ 626 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 627 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 628 if (svp_tail) { 629 svp_2ndlast = svp_tail; 630 svp_tail->sv_next = svp; 631 } else { 632 svp_head = svp; 633 svp_2ndlast = svp; 634 } 635 636 svp_tail = svp; 637 638 svp->sv_knconf = args->knconf; 639 args->knconf = NULL; 640 641 if (args->addr == NULL || args->addr->buf == NULL) { 642 error = EINVAL; 643 goto errout; 644 } 645 646 svp->sv_addr.maxlen = args->addr->maxlen; 647 svp->sv_addr.len = args->addr->len; 648 svp->sv_addr.buf = args->addr->buf; 649 args->addr->buf = NULL; 650 651 /* 652 * Check the root fhandle length 653 */ 654 ASSERT(fhandle); 655 if (fhandle->fh_len > NFS3_FHSIZE || fhandle->fh_len == 0) { 656 error = EINVAL; 657 #ifdef DEBUG 658 zcmn_err(getzoneid(), CE_WARN, 659 "nfs3_mount: got an invalid fhandle. fh_len = %d", 660 fhandle->fh_len); 661 fhandle->fh_len = NFS_FHANDLE_LEN; 662 nfs_printfhandle(fhandle); 663 #endif 664 goto errout; 665 } 666 667 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 668 svp->sv_fhandle.fh_len = fhandle->fh_len; 669 670 /* 671 * Get server's hostname 672 */ 673 if (flags & NFSMNT_HOSTNAME) { 674 if (args->hostname == NULL) { 675 error = EINVAL; 676 goto errout; 677 } 678 svp->sv_hostnamelen = strlen(args->hostname) + 1; 679 svp->sv_hostname = args->hostname; 680 args->hostname = NULL; 681 } else { 682 char *p = "unknown-host"; 683 svp->sv_hostnamelen = strlen(p) + 1; 684 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 685 (void) strcpy(svp->sv_hostname, p); 686 } 687 688 689 /* 690 * RDMA MOUNT SUPPORT FOR NFS v3: 691 * Establish, is it possible to use RDMA, if so overload the 692 * knconf with rdma specific knconf and free the orignal. 693 */ 694 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 695 /* 696 * Determine the addr type for RDMA, IPv4 or v6. 697 */ 698 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 699 addr_type = AF_INET; 700 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 701 addr_type = AF_INET6; 702 703 if (rdma_reachable(addr_type, &svp->sv_addr, 704 &rdma_knconf) == 0) { 705 /* 706 * If successful, hijack the orignal knconf and 707 * replace with a new one, depending on the flags. 708 */ 709 svp->sv_origknconf = svp->sv_knconf; 710 svp->sv_knconf = rdma_knconf; 711 knconf = rdma_knconf; 712 } else { 713 if (flags & NFSMNT_TRYRDMA) { 714 #ifdef DEBUG 715 if (rdma_debug) 716 zcmn_err(getzoneid(), CE_WARN, 717 "no RDMA onboard, revert\n"); 718 #endif 719 } 720 721 if (flags & NFSMNT_DORDMA) { 722 /* 723 * If proto=rdma is specified and no RDMA 724 * path to this server is avialable then 725 * ditch this server. 726 * This is not included in the mountable 727 * server list or the replica list. 728 * Check if more servers are specified; 729 * Failover case, otherwise bail out of mount. 730 */ 731 if (args->nfs_args_ext == 732 NFS_ARGS_EXTB && 733 args->nfs_ext_u.nfs_extB.next 734 != NULL) { 735 data = (char *) 736 args->nfs_ext_u.nfs_extB.next; 737 if (uap->flags & MS_RDONLY && 738 !(flags & NFSMNT_SOFT)) { 739 if (svp_head->sv_next == NULL) { 740 svp_tail = NULL; 741 svp_2ndlast = NULL; 742 sv_free(svp_head); 743 goto more; 744 } else { 745 svp_tail = svp_2ndlast; 746 svp_2ndlast->sv_next = 747 NULL; 748 sv_free(svp); 749 goto more; 750 } 751 } 752 } else { 753 /* 754 * This is the last server specified 755 * in the nfs_args list passed down 756 * and its not rdma capable. 757 */ 758 if (svp_head->sv_next == NULL) { 759 /* 760 * Is this the only one 761 */ 762 error = EINVAL; 763 #ifdef DEBUG 764 if (rdma_debug) 765 zcmn_err(getzoneid(), 766 CE_WARN, 767 "No RDMA srv"); 768 #endif 769 goto errout; 770 } else { 771 /* 772 * There is list, since some 773 * servers specified before 774 * this passed all requirements 775 */ 776 svp_tail = svp_2ndlast; 777 svp_2ndlast->sv_next = NULL; 778 sv_free(svp); 779 goto proceed; 780 } 781 } 782 } 783 } 784 } 785 786 /* 787 * Get the extention data which has the new security data structure. 788 */ 789 if (flags & NFSMNT_NEWARGS) { 790 switch (args->nfs_args_ext) { 791 case NFS_ARGS_EXTA: 792 case NFS_ARGS_EXTB: 793 /* 794 * Indicating the application is using the new 795 * sec_data structure to pass in the security 796 * data. 797 */ 798 secdata = args->nfs_ext_u.nfs_extA.secdata; 799 if (args->nfs_ext_u.nfs_extA.secdata == NULL) { 800 error = EINVAL; 801 } else { 802 /* 803 * Need to validate the flavor here if 804 * sysspace, userspace was already 805 * validate from the nfs_copyin function. 806 */ 807 switch (secdata->rpcflavor) { 808 case AUTH_NONE: 809 case AUTH_UNIX: 810 case AUTH_LOOPBACK: 811 case AUTH_DES: 812 case RPCSEC_GSS: 813 args->nfs_ext_u.nfs_extA.secdata = 814 NULL; 815 break; 816 default: 817 error = EINVAL; 818 goto errout; 819 } 820 } 821 break; 822 823 default: 824 error = EINVAL; 825 break; 826 } 827 } else if (flags & NFSMNT_SECURE) { 828 /* 829 * Keep this for backward compatibility to support 830 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 831 */ 832 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 833 error = EINVAL; 834 goto errout; 835 } 836 /* 837 * Move security related data to the sec_data structure. 838 */ 839 { 840 dh_k4_clntdata_t *data; 841 char *pf, *p; 842 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 843 if (flags & NFSMNT_RPCTIMESYNC) 844 secdata->flags |= AUTH_F_RPCTIMESYNC; 845 data = kmem_alloc(sizeof (*data), KM_SLEEP); 846 bcopy(args->syncaddr, &data->syncaddr, 847 sizeof (*args->syncaddr)); 848 849 /* 850 * duplicate the knconf information for the 851 * new opaque data. 852 */ 853 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 854 *data->knconf = *knconf; 855 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 856 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 857 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 858 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 859 data->knconf->knc_protofmly = pf; 860 data->knconf->knc_proto = p; 861 862 nlen = strlen(args->hostname) + 1; 863 /* move server netname to the sec_data structure */ 864 if (nlen != 0) { 865 data->netname = kmem_alloc(nlen, KM_SLEEP); 866 bcopy(args->hostname, data->netname, nlen); 867 data->netnamelen = nlen; 868 } 869 secdata->secmod = secdata->rpcflavor = AUTH_DES; 870 secdata->data = (caddr_t)data; 871 } 872 } else { 873 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 874 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 875 secdata->data = NULL; 876 } 877 878 svp->sv_secdata = secdata; 879 if (error) 880 goto errout; 881 882 /* 883 * See bug 1180236. 884 * If mount secure failed, we will fall back to AUTH_NONE 885 * and try again. nfs3rootvp() will turn this back off. 886 * 887 * The NFS Version 3 mount uses the FSINFO and GETATTR 888 * procedures. The server should not care if these procedures 889 * have the proper security flavor, so if mount retries using 890 * AUTH_NONE that does not require a credential setup for root 891 * then the automounter would work without requiring root to be 892 * keylogged into AUTH_DES. 893 */ 894 if (secdata->rpcflavor != AUTH_UNIX && 895 secdata->rpcflavor != AUTH_LOOPBACK) 896 secdata->flags |= AUTH_F_TRYNONE; 897 898 /* 899 * Failover support: 900 * 901 * We may have a linked list of nfs_args structures, 902 * which means the user is looking for failover. If 903 * the mount is either not "read-only" or "soft", 904 * we want to bail out with EINVAL. 905 */ 906 if (args->nfs_args_ext == NFS_ARGS_EXTB && 907 args->nfs_ext_u.nfs_extB.next != NULL) { 908 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 909 data = (char *)args->nfs_ext_u.nfs_extB.next; 910 goto more; 911 } 912 error = EINVAL; 913 goto errout; 914 } 915 916 /* 917 * Determine the zone we're being mounted into. 918 */ 919 zone_hold(mntzone = zone); /* start with this assumption */ 920 if (getzoneid() == GLOBAL_ZONEID) { 921 zone_rele(mntzone); 922 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 923 ASSERT(mntzone != NULL); 924 if (mntzone != zone) { 925 error = EBUSY; 926 goto errout; 927 } 928 } 929 930 if (is_system_labeled()) { 931 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 932 svp->sv_knconf, cr); 933 934 if (error > 0) 935 goto errout; 936 937 if (error == -1) { 938 /* change mount to read-only to prevent write-down */ 939 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 940 } 941 } 942 943 /* 944 * Stop the mount from going any further if the zone is going away. 945 */ 946 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 947 error = EBUSY; 948 goto errout; 949 } 950 951 /* 952 * Get root vnode. 953 */ 954 proceed: 955 error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 956 957 if (error) 958 goto errout; 959 960 /* 961 * Set option fields in the mount info record 962 */ 963 mi = VTOMI(rtvp); 964 965 if (svp_head->sv_next) 966 mi->mi_flags |= MI_LLOCK; 967 968 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 969 970 errout: 971 if (error) { 972 if (rtvp != NULL) { 973 rp = VTOR(rtvp); 974 if (rp->r_flags & RHASHED) 975 rp_rmhash(rp); 976 } 977 sv_free(svp_head); 978 if (mi != NULL) { 979 nfs_async_stop(vfsp); 980 nfs_async_manager_stop(vfsp); 981 if (mi->mi_io_kstats) { 982 kstat_delete(mi->mi_io_kstats); 983 mi->mi_io_kstats = NULL; 984 } 985 if (mi->mi_ro_kstats) { 986 kstat_delete(mi->mi_ro_kstats); 987 mi->mi_ro_kstats = NULL; 988 } 989 nfs_free_mi(mi); 990 } 991 } 992 993 994 if (!(uap->flags & MS_SYSSPACE)) { 995 nfs3_free_args(args, fhandle); 996 kmem_free(args, sizeof (*args)); 997 } 998 999 if (rtvp != NULL) 1000 VN_RELE(rtvp); 1001 1002 if (mntzone != NULL) 1003 zone_rele(mntzone); 1004 1005 return (error); 1006 } 1007 1008 static int nfs3_dynamic = 0; /* global variable to enable dynamic retrans. */ 1009 static ushort_t nfs3_max_threads = 8; /* max number of active async threads */ 1010 static uint_t nfs3_bsize = 32 * 1024; /* client `block' size */ 1011 static uint_t nfs3_async_clusters = 1; /* # of reqs from each async queue */ 1012 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO; 1013 1014 static int 1015 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1016 int flags, cred_t *cr, zone_t *zone) 1017 { 1018 vnode_t *rtvp; 1019 mntinfo_t *mi; 1020 dev_t nfs_dev; 1021 struct vattr va; 1022 struct FSINFO3args args; 1023 struct FSINFO3res res; 1024 int error; 1025 int douprintf; 1026 rnode_t *rp; 1027 int i; 1028 uint_t max_transfer_size; 1029 struct nfs_stats *nfsstatsp; 1030 cred_t *lcr = NULL, *tcr = cr; 1031 1032 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1033 ASSERT(nfsstatsp != NULL); 1034 1035 ASSERT(nfs_zone() == zone); 1036 /* 1037 * Create a mount record and link it to the vfs struct. 1038 */ 1039 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1040 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1041 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1042 mi->mi_flags = MI_ACL | MI_EXTATTR; 1043 if (!(flags & NFSMNT_SOFT)) 1044 mi->mi_flags |= MI_HARD; 1045 if ((flags & NFSMNT_SEMISOFT)) 1046 mi->mi_flags |= MI_SEMISOFT; 1047 if ((flags & NFSMNT_NOPRINT)) 1048 mi->mi_flags |= MI_NOPRINT; 1049 if (flags & NFSMNT_INT) 1050 mi->mi_flags |= MI_INT; 1051 mi->mi_retrans = NFS_RETRIES; 1052 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1053 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1054 mi->mi_timeo = nfs3_cots_timeo; 1055 else 1056 mi->mi_timeo = NFS_TIMEO; 1057 mi->mi_prog = NFS_PROGRAM; 1058 mi->mi_vers = NFS_V3; 1059 mi->mi_rfsnames = rfsnames_v3; 1060 mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr; 1061 mi->mi_call_type = call_type_v3; 1062 mi->mi_ss_call_type = ss_call_type_v3; 1063 mi->mi_timer_type = timer_type_v3; 1064 mi->mi_aclnames = aclnames_v3; 1065 mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr; 1066 mi->mi_acl_call_type = acl_call_type_v3; 1067 mi->mi_acl_ss_call_type = acl_ss_call_type_v3; 1068 mi->mi_acl_timer_type = acl_timer_type_v3; 1069 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1070 mi->mi_servers = svp; 1071 mi->mi_curr_serv = svp; 1072 mi->mi_acregmin = SEC2HR(ACREGMIN); 1073 mi->mi_acregmax = SEC2HR(ACREGMAX); 1074 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1075 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1076 1077 if (nfs3_dynamic) 1078 mi->mi_flags |= MI_DYNAMIC; 1079 1080 if (flags & NFSMNT_DIRECTIO) 1081 mi->mi_flags |= MI_DIRECTIO; 1082 1083 /* 1084 * Make a vfs struct for nfs. We do this here instead of below 1085 * because rtvp needs a vfs before we can do a getattr on it. 1086 * 1087 * Assign a unique device id to the mount 1088 */ 1089 mutex_enter(&nfs_minor_lock); 1090 do { 1091 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1092 nfs_dev = makedevice(nfs_major, nfs_minor); 1093 } while (vfs_devismounted(nfs_dev)); 1094 mutex_exit(&nfs_minor_lock); 1095 1096 vfsp->vfs_dev = nfs_dev; 1097 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp); 1098 vfsp->vfs_data = (caddr_t)mi; 1099 vfsp->vfs_fstype = nfsfstyp; 1100 1101 /* 1102 * Verify that nfs3_bsize tuneable is set to an 1103 * acceptable value. It be a multiple of PAGESIZE or 1104 * file corruption can occur. 1105 */ 1106 if (nfs3_bsize & PAGEOFFSET) 1107 nfs3_bsize &= PAGEMASK; 1108 if (nfs3_bsize < PAGESIZE) 1109 nfs3_bsize = PAGESIZE; 1110 vfsp->vfs_bsize = nfs3_bsize; 1111 1112 /* 1113 * Initialize fields used to support async putpage operations. 1114 */ 1115 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1116 mi->mi_async_clusters[i] = nfs3_async_clusters; 1117 mi->mi_async_init_clusters = nfs3_async_clusters; 1118 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1119 mi->mi_max_threads = nfs3_max_threads; 1120 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1121 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1122 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1123 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1124 1125 mi->mi_vfsp = vfsp; 1126 zone_hold(mi->mi_zone = zone); 1127 nfs_mi_zonelist_add(mi); 1128 1129 /* 1130 * Make the root vnode, use it to get attributes, 1131 * then remake it with the attributes. 1132 */ 1133 rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle, 1134 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1135 1136 /* 1137 * Make the FSINFO calls, primarily at this point to 1138 * determine the transfer size. For client failover, 1139 * we'll want this to be the minimum bid from any 1140 * server, so that we don't overrun stated limits. 1141 * 1142 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1143 * which is only for the mount operation. 1144 */ 1145 1146 mi->mi_tsize = nfs3_tsize(svp->sv_knconf); 1147 mi->mi_stsize = mi->mi_tsize; 1148 1149 mi->mi_curread = nfs3_bsize; 1150 mi->mi_curwrite = mi->mi_curread; 1151 1152 /* 1153 * If the uid is set then set the creds for secure mounts 1154 * by proxy processes such as automountd. 1155 */ 1156 if (svp->sv_secdata->uid != 0 && 1157 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1158 lcr = crdup(cr); 1159 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1160 tcr = lcr; 1161 } 1162 1163 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1164 douprintf = 1; 1165 mi->mi_curr_serv = svp; 1166 max_transfer_size = nfs3_tsize(svp->sv_knconf); 1167 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize); 1168 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize); 1169 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread); 1170 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite); 1171 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle; 1172 1173 error = rfs3call(mi, NFSPROC3_FSINFO, 1174 xdr_nfs_fh3, (caddr_t)&args, 1175 xdr_FSINFO3res, (caddr_t)&res, tcr, 1176 &douprintf, &res.status, 0, NULL); 1177 if (error) 1178 goto bad; 1179 error = geterrno3(res.status); 1180 if (error) 1181 goto bad; 1182 1183 /* get type of root node */ 1184 if (res.resok.obj_attributes.attributes) { 1185 if (res.resok.obj_attributes.attr.type < NF3REG || 1186 res.resok.obj_attributes.attr.type > NF3FIFO) { 1187 #ifdef DEBUG 1188 zcmn_err(getzoneid(), CE_WARN, 1189 "NFS3 server %s returned a bad file type for root", 1190 svp->sv_hostname); 1191 #else 1192 zcmn_err(getzoneid(), CE_WARN, 1193 "NFS server %s returned a bad file type for root", 1194 svp->sv_hostname); 1195 #endif 1196 error = EINVAL; 1197 goto bad; 1198 } else { 1199 if (rtvp->v_type != VNON && 1200 rtvp->v_type != nf3_to_vt[res.resok.obj_attributes.attr.type]) { 1201 #ifdef DEBUG 1202 zcmn_err(getzoneid(), CE_WARN, 1203 "NFS3 server %s returned a different file type for root", 1204 svp->sv_hostname); 1205 #else 1206 zcmn_err(getzoneid(), CE_WARN, 1207 "NFS server %s returned a different file type for root", 1208 svp->sv_hostname); 1209 #endif 1210 error = EINVAL; 1211 goto bad; 1212 } 1213 rtvp->v_type = 1214 nf3_to_vt[res.resok.obj_attributes.attr.type]; 1215 } 1216 } 1217 1218 if (res.resok.rtmax != 0) { 1219 mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize); 1220 if (res.resok.rtpref != 0) { 1221 mi->mi_curread = MIN(res.resok.rtpref, 1222 mi->mi_curread); 1223 } else { 1224 mi->mi_curread = MIN(res.resok.rtmax, 1225 mi->mi_curread); 1226 } 1227 } else if (res.resok.rtpref != 0) { 1228 mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize); 1229 mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread); 1230 } else { 1231 #ifdef DEBUG 1232 zcmn_err(getzoneid(), CE_WARN, 1233 "NFS3 server %s returned 0 for read transfer sizes", 1234 svp->sv_hostname); 1235 #else 1236 zcmn_err(getzoneid(), CE_WARN, 1237 "NFS server %s returned 0 for read transfer sizes", 1238 svp->sv_hostname); 1239 #endif 1240 error = EIO; 1241 goto bad; 1242 } 1243 if (res.resok.wtmax != 0) { 1244 mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize); 1245 if (res.resok.wtpref != 0) { 1246 mi->mi_curwrite = MIN(res.resok.wtpref, 1247 mi->mi_curwrite); 1248 } else { 1249 mi->mi_curwrite = MIN(res.resok.wtmax, 1250 mi->mi_curwrite); 1251 } 1252 } else if (res.resok.wtpref != 0) { 1253 mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize); 1254 mi->mi_curwrite = MIN(res.resok.wtpref, 1255 mi->mi_curwrite); 1256 } else { 1257 #ifdef DEBUG 1258 zcmn_err(getzoneid(), CE_WARN, 1259 "NFS3 server %s returned 0 for write transfer sizes", 1260 svp->sv_hostname); 1261 #else 1262 zcmn_err(getzoneid(), CE_WARN, 1263 "NFS server %s returned 0 for write transfer sizes", 1264 svp->sv_hostname); 1265 #endif 1266 error = EIO; 1267 goto bad; 1268 } 1269 1270 /* 1271 * These signal the ability of the server to create 1272 * hard links and symbolic links, so they really 1273 * aren't relevant if there is more than one server. 1274 * We'll set them here, though it probably looks odd. 1275 */ 1276 if (res.resok.properties & FSF3_LINK) 1277 mi->mi_flags |= MI_LINK; 1278 if (res.resok.properties & FSF3_SYMLINK) 1279 mi->mi_flags |= MI_SYMLINK; 1280 1281 /* Pick up smallest non-zero maxfilesize value */ 1282 if (res.resok.maxfilesize) { 1283 if (mi->mi_maxfilesize) { 1284 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize, 1285 res.resok.maxfilesize); 1286 } else 1287 mi->mi_maxfilesize = res.resok.maxfilesize; 1288 } 1289 1290 /* 1291 * AUTH_F_TRYNONE is only for the mount operation, 1292 * so turn it back off. 1293 */ 1294 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1295 } 1296 mi->mi_curr_serv = mi->mi_servers; 1297 1298 /* 1299 * Start the thread responsible for handling async worker threads. 1300 */ 1301 VFS_HOLD(vfsp); /* add reference for thread */ 1302 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1303 vfsp, 0, minclsyspri); 1304 ASSERT(mi->mi_manager_thread != NULL); 1305 1306 /* 1307 * Initialize kstats 1308 */ 1309 nfs_mnt_kstat_init(vfsp); 1310 1311 /* If we didn't get a type, get one now */ 1312 if (rtvp->v_type == VNON) { 1313 va.va_mask = AT_ALL; 1314 1315 error = nfs3getattr(rtvp, &va, tcr); 1316 if (error) 1317 goto bad; 1318 rtvp->v_type = va.va_type; 1319 } 1320 1321 mi->mi_type = rtvp->v_type; 1322 1323 *rtvpp = rtvp; 1324 if (lcr != NULL) 1325 crfree(lcr); 1326 1327 return (0); 1328 bad: 1329 /* 1330 * An error occurred somewhere, need to clean up... 1331 * We need to release our reference to the root vnode and 1332 * destroy the mntinfo struct that we just created. 1333 */ 1334 if (lcr != NULL) 1335 crfree(lcr); 1336 rp = VTOR(rtvp); 1337 if (rp->r_flags & RHASHED) 1338 rp_rmhash(rp); 1339 VN_RELE(rtvp); 1340 nfs_async_stop(vfsp); 1341 nfs_async_manager_stop(vfsp); 1342 if (mi->mi_io_kstats) { 1343 kstat_delete(mi->mi_io_kstats); 1344 mi->mi_io_kstats = NULL; 1345 } 1346 if (mi->mi_ro_kstats) { 1347 kstat_delete(mi->mi_ro_kstats); 1348 mi->mi_ro_kstats = NULL; 1349 } 1350 nfs_free_mi(mi); 1351 *rtvpp = NULL; 1352 return (error); 1353 } 1354 1355 /* 1356 * vfs operations 1357 */ 1358 static int 1359 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1360 { 1361 mntinfo_t *mi; 1362 ushort_t omax; 1363 1364 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1365 return (EPERM); 1366 1367 mi = VFTOMI(vfsp); 1368 if (flag & MS_FORCE) { 1369 1370 vfsp->vfs_flag |= VFS_UNMOUNTED; 1371 1372 /* 1373 * We are about to stop the async manager. 1374 * Let every one know not to schedule any 1375 * more async requests 1376 */ 1377 mutex_enter(&mi->mi_async_lock); 1378 mi->mi_max_threads = 0; 1379 cv_broadcast(&mi->mi_async_work_cv); 1380 mutex_exit(&mi->mi_async_lock); 1381 1382 /* 1383 * We need to stop the manager thread explicitly; the worker 1384 * threads can time out and exit on their own. 1385 */ 1386 nfs_async_manager_stop(vfsp); 1387 destroy_rtable(vfsp, cr); 1388 if (mi->mi_io_kstats) { 1389 kstat_delete(mi->mi_io_kstats); 1390 mi->mi_io_kstats = NULL; 1391 } 1392 if (mi->mi_ro_kstats) { 1393 kstat_delete(mi->mi_ro_kstats); 1394 mi->mi_ro_kstats = NULL; 1395 } 1396 return (0); 1397 } 1398 /* 1399 * Wait until all asynchronous putpage operations on 1400 * this file system are complete before flushing rnodes 1401 * from the cache. 1402 */ 1403 omax = mi->mi_max_threads; 1404 if (nfs_async_stop_sig(vfsp)) { 1405 return (EINTR); 1406 } 1407 rflush(vfsp, cr); 1408 /* 1409 * If there are any active vnodes on this file system, 1410 * then the file system is busy and can't be umounted. 1411 */ 1412 if (check_rtable(vfsp)) { 1413 mutex_enter(&mi->mi_async_lock); 1414 mi->mi_max_threads = omax; 1415 mutex_exit(&mi->mi_async_lock); 1416 return (EBUSY); 1417 } 1418 /* 1419 * The unmount can't fail from now on; stop the worker thread manager. 1420 */ 1421 nfs_async_manager_stop(vfsp); 1422 /* 1423 * Destroy all rnodes belonging to this file system from the 1424 * rnode hash queues and purge any resources allocated to 1425 * them. 1426 */ 1427 destroy_rtable(vfsp, cr); 1428 if (mi->mi_io_kstats) { 1429 kstat_delete(mi->mi_io_kstats); 1430 mi->mi_io_kstats = NULL; 1431 } 1432 if (mi->mi_ro_kstats) { 1433 kstat_delete(mi->mi_ro_kstats); 1434 mi->mi_ro_kstats = NULL; 1435 } 1436 return (0); 1437 } 1438 1439 /* 1440 * find root of nfs 1441 */ 1442 static int 1443 nfs3_root(vfs_t *vfsp, vnode_t **vpp) 1444 { 1445 mntinfo_t *mi; 1446 vnode_t *vp; 1447 servinfo_t *svp; 1448 1449 mi = VFTOMI(vfsp); 1450 1451 if (nfs_zone() != mi->mi_zone) 1452 return (EPERM); 1453 1454 svp = mi->mi_curr_serv; 1455 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1456 mutex_enter(&svp->sv_lock); 1457 svp->sv_flags &= ~SV_ROOT_STALE; 1458 mutex_exit(&svp->sv_lock); 1459 return (ENOENT); 1460 } 1461 1462 vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle, 1463 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1464 1465 if (VTOR(vp)->r_flags & RSTALE) { 1466 VN_RELE(vp); 1467 return (ENOENT); 1468 } 1469 1470 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1471 1472 vp->v_type = mi->mi_type; 1473 1474 *vpp = vp; 1475 1476 return (0); 1477 } 1478 1479 /* 1480 * Get file system statistics. 1481 */ 1482 static int 1483 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1484 { 1485 int error; 1486 struct mntinfo *mi; 1487 struct FSSTAT3args args; 1488 struct FSSTAT3res res; 1489 int douprintf; 1490 failinfo_t fi; 1491 vnode_t *vp; 1492 cred_t *cr; 1493 hrtime_t t; 1494 1495 mi = VFTOMI(vfsp); 1496 if (nfs_zone() != mi->mi_zone) 1497 return (EPERM); 1498 error = nfs3_root(vfsp, &vp); 1499 if (error) 1500 return (error); 1501 1502 cr = CRED(); 1503 1504 args.fsroot = *VTOFH3(vp); 1505 fi.vp = vp; 1506 fi.fhp = (caddr_t)&args.fsroot; 1507 fi.copyproc = nfs3copyfh; 1508 fi.lookupproc = nfs3lookup; 1509 fi.xattrdirproc = acl_getxattrdir3; 1510 1511 douprintf = 1; 1512 1513 t = gethrtime(); 1514 1515 error = rfs3call(mi, NFSPROC3_FSSTAT, 1516 xdr_nfs_fh3, (caddr_t)&args, 1517 xdr_FSSTAT3res, (caddr_t)&res, cr, 1518 &douprintf, &res.status, 0, &fi); 1519 1520 if (error) { 1521 VN_RELE(vp); 1522 return (error); 1523 } 1524 1525 error = geterrno3(res.status); 1526 if (!error) { 1527 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 1528 sbp->f_bsize = MAXBSIZE; 1529 sbp->f_frsize = DEV_BSIZE; 1530 /* 1531 * Allow -1 fields to pass through unconverted. These 1532 * indicate "don't know" fields. 1533 */ 1534 if (res.resok.tbytes == (size3)-1) 1535 sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes; 1536 else { 1537 sbp->f_blocks = (fsblkcnt64_t) 1538 (res.resok.tbytes / DEV_BSIZE); 1539 } 1540 if (res.resok.fbytes == (size3)-1) 1541 sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes; 1542 else { 1543 sbp->f_bfree = (fsblkcnt64_t) 1544 (res.resok.fbytes / DEV_BSIZE); 1545 } 1546 if (res.resok.abytes == (size3)-1) 1547 sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes; 1548 else { 1549 sbp->f_bavail = (fsblkcnt64_t) 1550 (res.resok.abytes / DEV_BSIZE); 1551 } 1552 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles; 1553 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles; 1554 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles; 1555 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1556 (void) strncpy(sbp->f_basetype, 1557 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1558 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1559 sbp->f_namemax = (ulong_t)-1; 1560 } else { 1561 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 1562 PURGE_STALE_FH(error, vp, cr); 1563 } 1564 1565 VN_RELE(vp); 1566 1567 return (error); 1568 } 1569 1570 static kmutex_t nfs3_syncbusy; 1571 1572 /* 1573 * Flush dirty nfs files for file system vfsp. 1574 * If vfsp == NULL, all nfs files are flushed. 1575 */ 1576 /* ARGSUSED */ 1577 static int 1578 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr) 1579 { 1580 /* 1581 * Cross-zone calls are OK here, since this translates to a 1582 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1583 */ 1584 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) { 1585 rflush(vfsp, cr); 1586 mutex_exit(&nfs3_syncbusy); 1587 } 1588 return (0); 1589 } 1590 1591 /* ARGSUSED */ 1592 static int 1593 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1594 { 1595 int error; 1596 nfs_fh3 fh; 1597 vnode_t *vp; 1598 struct vattr va; 1599 1600 if (fidp->fid_len > NFS3_FHSIZE) { 1601 *vpp = NULL; 1602 return (ESTALE); 1603 } 1604 1605 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1606 return (EPERM); 1607 fh.fh3_length = fidp->fid_len; 1608 bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length); 1609 1610 vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1611 1612 if (VTOR(vp)->r_flags & RSTALE) { 1613 VN_RELE(vp); 1614 *vpp = NULL; 1615 return (ENOENT); 1616 } 1617 1618 if (vp->v_type == VNON) { 1619 va.va_mask = AT_ALL; 1620 error = nfs3getattr(vp, &va, CRED()); 1621 if (error) { 1622 VN_RELE(vp); 1623 *vpp = NULL; 1624 return (error); 1625 } 1626 vp->v_type = va.va_type; 1627 } 1628 1629 *vpp = vp; 1630 1631 return (0); 1632 } 1633 1634 /* ARGSUSED */ 1635 static int 1636 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why) 1637 { 1638 vnode_t *rtvp; 1639 char root_hostname[SYS_NMLN+1]; 1640 struct servinfo *svp; 1641 int error; 1642 int vfsflags; 1643 size_t size; 1644 char *root_path; 1645 struct pathname pn; 1646 char *name; 1647 cred_t *cr; 1648 struct nfs_args args; /* nfs mount arguments */ 1649 static char token[10]; 1650 1651 bzero(&args, sizeof (args)); 1652 1653 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1654 clkset(-1L); /* hack for now - until we get time svc? */ 1655 1656 if (why == ROOT_REMOUNT) { 1657 /* 1658 * Shouldn't happen. 1659 */ 1660 panic("nfs3_mountroot: why == ROOT_REMOUNT"); 1661 } 1662 1663 if (why == ROOT_UNMOUNT) { 1664 /* 1665 * Nothing to do for NFS. 1666 */ 1667 return (0); 1668 } 1669 1670 /* 1671 * why == ROOT_INIT 1672 */ 1673 1674 name = token; 1675 *name = 0; 1676 getfsname("root", name, sizeof (token)); 1677 1678 pn_alloc(&pn); 1679 root_path = pn.pn_path; 1680 1681 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1682 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1683 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1684 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1685 1686 /* 1687 * Get server address 1688 * Get the root fhandle 1689 * Get server's transport 1690 * Get server's hostname 1691 * Get options 1692 */ 1693 args.addr = &svp->sv_addr; 1694 args.fh = (char *)&svp->sv_fhandle; 1695 args.knconf = svp->sv_knconf; 1696 args.hostname = root_hostname; 1697 vfsflags = 0; 1698 if (error = mount_root(*name ? name : "root", root_path, NFS_V3, 1699 &args, &vfsflags)) { 1700 if (error == EPROTONOSUPPORT) 1701 nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: " 1702 "mount_root failed: server doesn't support NFS V3"); 1703 else 1704 nfs_cmn_err(error, CE_WARN, 1705 "nfs3_mountroot: mount_root failed: %m"); 1706 sv_free(svp); 1707 pn_free(&pn); 1708 return (error); 1709 } 1710 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1711 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1712 (void) strcpy(svp->sv_hostname, root_hostname); 1713 1714 /* 1715 * Force root partition to always be mounted with AUTH_UNIX for now 1716 */ 1717 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1718 svp->sv_secdata->secmod = AUTH_UNIX; 1719 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1720 svp->sv_secdata->data = NULL; 1721 1722 cr = crgetcred(); 1723 rtvp = NULL; 1724 1725 error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1726 1727 crfree(cr); 1728 1729 if (error) { 1730 pn_free(&pn); 1731 goto errout; 1732 } 1733 1734 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1735 if (error) { 1736 nfs_cmn_err(error, CE_WARN, 1737 "nfs3_mountroot: invalid root mount options"); 1738 pn_free(&pn); 1739 goto errout; 1740 } 1741 1742 (void) vfs_lock_wait(vfsp); 1743 vfs_add(NULL, vfsp, vfsflags); 1744 vfs_unlock(vfsp); 1745 1746 size = strlen(svp->sv_hostname); 1747 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1748 rootfs.bo_name[size] = ':'; 1749 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1750 1751 pn_free(&pn); 1752 1753 errout: 1754 if (error) { 1755 sv_free(svp); 1756 nfs_async_stop(vfsp); 1757 nfs_async_manager_stop(vfsp); 1758 } 1759 1760 if (rtvp != NULL) 1761 VN_RELE(rtvp); 1762 1763 return (error); 1764 } 1765 1766 /* 1767 * Initialization routine for VFS routines. Should only be called once 1768 */ 1769 int 1770 nfs3_vfsinit(void) 1771 { 1772 mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1773 return (0); 1774 } 1775 1776 void 1777 nfs3_vfsfini(void) 1778 { 1779 mutex_destroy(&nfs3_syncbusy); 1780 } 1781 1782 void 1783 nfs3_freevfs(vfs_t *vfsp) 1784 { 1785 mntinfo_t *mi; 1786 servinfo_t *svp; 1787 1788 /* free up the resources */ 1789 mi = VFTOMI(vfsp); 1790 svp = mi->mi_servers; 1791 mi->mi_servers = mi->mi_curr_serv = NULL; 1792 sv_free(svp); 1793 1794 /* 1795 * By this time we should have already deleted the 1796 * mi kstats in the unmount code. If they are still around 1797 * somethings wrong 1798 */ 1799 ASSERT(mi->mi_io_kstats == NULL); 1800 nfs_free_mi(mi); 1801 } 1802