1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/vnode.h> 40 #include <sys/pathname.h> 41 #include <sys/sysmacros.h> 42 #include <sys/kmem.h> 43 #include <sys/mkdev.h> 44 #include <sys/mount.h> 45 #include <sys/mntent.h> 46 #include <sys/statvfs.h> 47 #include <sys/errno.h> 48 #include <sys/debug.h> 49 #include <sys/cmn_err.h> 50 #include <sys/utsname.h> 51 #include <sys/bootconf.h> 52 #include <sys/modctl.h> 53 #include <sys/acl.h> 54 #include <sys/flock.h> 55 #include <sys/policy.h> 56 #include <sys/zone.h> 57 #include <sys/class.h> 58 #include <sys/socket.h> 59 #include <sys/netconfig.h> 60 #include <sys/tsol/tnet.h> 61 62 #include <rpc/types.h> 63 #include <rpc/auth.h> 64 #include <rpc/clnt.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/nfs_clnt.h> 68 #include <nfs/rnode.h> 69 #include <nfs/mount.h> 70 #include <nfs/nfs_acl.h> 71 72 #include <fs/fs_subr.h> 73 74 /* 75 * From rpcsec module (common/rpcsec). 76 */ 77 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 78 extern void sec_clnt_freeinfo(struct sec_data *); 79 80 /* 81 * The order and contents of this structure must be kept in sync with that of 82 * rfsreqcnt_v3_tmpl in nfs_stats.c 83 */ 84 static char *rfsnames_v3[] = { 85 "null", "getattr", "setattr", "lookup", "access", "readlink", "read", 86 "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir", 87 "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo", 88 "pathconf", "commit" 89 }; 90 91 /* 92 * This table maps from NFS protocol number into call type. 93 * Zero means a "Lookup" type call 94 * One means a "Read" type call 95 * Two means a "Write" type call 96 * This is used to select a default time-out. 97 */ 98 static uchar_t call_type_v3[] = { 99 0, 0, 1, 0, 0, 0, 1, 100 2, 2, 2, 2, 2, 2, 2, 101 2, 2, 1, 2, 0, 0, 0, 102 2 }; 103 104 /* 105 * Similar table, but to determine which timer to use 106 * (only real reads and writes!) 107 */ 108 static uchar_t timer_type_v3[] = { 109 0, 0, 0, 0, 0, 0, 1, 110 2, 0, 0, 0, 0, 0, 0, 111 0, 0, 1, 1, 0, 0, 0, 112 0 }; 113 114 /* 115 * This table maps from NFS protocol number into a call type 116 * for the semisoft mount option. 117 * Zero means do not repeat operation. 118 * One means repeat. 119 */ 120 static uchar_t ss_call_type_v3[] = { 121 0, 0, 1, 0, 0, 0, 0, 122 1, 1, 1, 1, 1, 1, 1, 123 1, 1, 0, 0, 0, 0, 0, 124 1 }; 125 126 /* 127 * nfs3 vfs operations. 128 */ 129 static int nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 130 static int nfs3_unmount(vfs_t *, int, cred_t *); 131 static int nfs3_root(vfs_t *, vnode_t **); 132 static int nfs3_statvfs(vfs_t *, struct statvfs64 *); 133 static int nfs3_sync(vfs_t *, short, cred_t *); 134 static int nfs3_vget(vfs_t *, vnode_t **, fid_t *); 135 static int nfs3_mountroot(vfs_t *, whymountroot_t); 136 static void nfs3_freevfs(vfs_t *); 137 138 static int nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *, 139 int, cred_t *, zone_t *); 140 141 /* 142 * Initialize the vfs structure 143 */ 144 145 static int nfs3fstyp; 146 vfsops_t *nfs3_vfsops; 147 148 /* 149 * Debug variable to check for rdma based 150 * transport startup and cleanup. Controlled 151 * through /etc/system. Off by default. 152 */ 153 extern int rdma_debug; 154 155 int 156 nfs3init(int fstyp, char *name) 157 { 158 static const fs_operation_def_t nfs3_vfsops_template[] = { 159 VFSNAME_MOUNT, { .vfs_mount = nfs3_mount }, 160 VFSNAME_UNMOUNT, { .vfs_unmount = nfs3_unmount }, 161 VFSNAME_ROOT, { .vfs_root = nfs3_root }, 162 VFSNAME_STATVFS, { .vfs_statvfs = nfs3_statvfs }, 163 VFSNAME_SYNC, { .vfs_sync = nfs3_sync }, 164 VFSNAME_VGET, { .vfs_vget = nfs3_vget }, 165 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs3_mountroot }, 166 VFSNAME_FREEVFS, { .vfs_freevfs = nfs3_freevfs }, 167 NULL, NULL 168 }; 169 int error; 170 171 error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops); 172 if (error != 0) { 173 zcmn_err(GLOBAL_ZONEID, CE_WARN, 174 "nfs3init: bad vfs ops template"); 175 return (error); 176 } 177 178 error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops); 179 if (error != 0) { 180 (void) vfs_freevfsops_by_type(fstyp); 181 zcmn_err(GLOBAL_ZONEID, CE_WARN, 182 "nfs3init: bad vnode ops template"); 183 return (error); 184 } 185 186 nfs3fstyp = fstyp; 187 188 return (0); 189 } 190 191 void 192 nfs3fini(void) 193 { 194 } 195 196 static void 197 nfs3_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 198 { 199 200 if (fh) 201 kmem_free(fh, sizeof (*fh)); 202 203 if (nargs->knconf) { 204 if (nargs->knconf->knc_protofmly) 205 kmem_free(nargs->knconf->knc_protofmly, 206 KNC_STRSIZE); 207 if (nargs->knconf->knc_proto) 208 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 209 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 210 nargs->knconf = NULL; 211 } 212 213 if (nargs->fh) { 214 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 215 nargs->fh = NULL; 216 } 217 218 if (nargs->hostname) { 219 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 220 nargs->hostname = NULL; 221 } 222 223 if (nargs->addr) { 224 if (nargs->addr->buf) { 225 ASSERT(nargs->addr->len); 226 kmem_free(nargs->addr->buf, nargs->addr->len); 227 } 228 kmem_free(nargs->addr, sizeof (struct netbuf)); 229 nargs->addr = NULL; 230 } 231 232 if (nargs->syncaddr) { 233 ASSERT(nargs->syncaddr->len); 234 if (nargs->syncaddr->buf) { 235 ASSERT(nargs->syncaddr->len); 236 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 237 } 238 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 239 nargs->syncaddr = NULL; 240 } 241 242 if (nargs->netname) { 243 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 244 nargs->netname = NULL; 245 } 246 247 if (nargs->nfs_ext_u.nfs_extA.secdata) { 248 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata); 249 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 250 } 251 } 252 253 static int 254 nfs3_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 255 { 256 257 int error; 258 size_t nlen; /* length of netname */ 259 size_t hlen; /* length of hostname */ 260 char netname[MAXNETNAMELEN+1]; /* server's netname */ 261 struct netbuf addr; /* server's address */ 262 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 263 struct knetconfig *knconf; /* transport knetconfig structure */ 264 struct sec_data *secdata = NULL; /* security data */ 265 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 266 STRUCT_DECL(knetconfig, knconf_tmp); 267 STRUCT_DECL(netbuf, addr_tmp); 268 int flags; 269 char *p, *pf; 270 char *userbufptr; 271 272 273 bzero(nargs, sizeof (*nargs)); 274 275 STRUCT_INIT(args, get_udatamodel()); 276 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 277 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args)))) 278 return (EFAULT); 279 280 nargs->wsize = STRUCT_FGET(args, wsize); 281 nargs->rsize = STRUCT_FGET(args, rsize); 282 nargs->timeo = STRUCT_FGET(args, timeo); 283 nargs->retrans = STRUCT_FGET(args, retrans); 284 nargs->acregmin = STRUCT_FGET(args, acregmin); 285 nargs->acregmax = STRUCT_FGET(args, acregmax); 286 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 287 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 288 289 flags = STRUCT_FGET(args, flags); 290 nargs->flags = flags; 291 292 addr.buf = NULL; 293 syncaddr.buf = NULL; 294 295 /* 296 * Allocate space for a knetconfig structure and 297 * its strings and copy in from user-land. 298 */ 299 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 300 STRUCT_INIT(knconf_tmp, get_udatamodel()); 301 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 302 STRUCT_SIZE(knconf_tmp))) { 303 kmem_free(knconf, sizeof (*knconf)); 304 return (EFAULT); 305 } 306 307 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 308 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 309 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 310 if (get_udatamodel() != DATAMODEL_LP64) { 311 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 312 } else { 313 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 314 } 315 316 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 317 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 318 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 319 if (error) { 320 kmem_free(pf, KNC_STRSIZE); 321 kmem_free(p, KNC_STRSIZE); 322 kmem_free(knconf, sizeof (*knconf)); 323 return (error); 324 } 325 326 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 327 if (error) { 328 kmem_free(pf, KNC_STRSIZE); 329 kmem_free(p, KNC_STRSIZE); 330 kmem_free(knconf, sizeof (*knconf)); 331 return (error); 332 } 333 334 335 knconf->knc_protofmly = pf; 336 knconf->knc_proto = p; 337 338 nargs->knconf = knconf; 339 /* 340 * Get server address 341 */ 342 STRUCT_INIT(addr_tmp, get_udatamodel()); 343 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 344 STRUCT_SIZE(addr_tmp))) { 345 error = EFAULT; 346 goto errout; 347 } 348 349 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 350 userbufptr = STRUCT_FGETP(addr_tmp, buf); 351 addr.len = STRUCT_FGET(addr_tmp, len); 352 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 353 addr.maxlen = addr.len; 354 if (copyin(userbufptr, addr.buf, addr.len)) { 355 kmem_free(addr.buf, addr.len); 356 error = EFAULT; 357 goto errout; 358 } 359 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 360 361 /* 362 * Get the root fhandle 363 */ 364 365 if (copyin(STRUCT_FGETP(args, fh), fh, sizeof (nfs_fhandle))) { 366 error = EFAULT; 367 goto errout; 368 } 369 370 371 /* 372 * Get server's hostname 373 */ 374 if (flags & NFSMNT_HOSTNAME) { 375 error = copyinstr(STRUCT_FGETP(args, hostname), 376 netname, sizeof (netname), &hlen); 377 if (error) 378 goto errout; 379 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 380 (void) strcpy(nargs->hostname, netname); 381 } else { 382 nargs->hostname = NULL; 383 } 384 385 386 /* 387 * If there are syncaddr and netname data, load them in. This is 388 * to support data needed for NFSV4 when AUTH_DH is the negotiated 389 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 390 */ 391 netname[0] = '\0'; 392 if (flags & NFSMNT_SECURE) { 393 if (STRUCT_FGETP(args, syncaddr) == NULL) { 394 error = EINVAL; 395 goto errout; 396 } 397 /* get syncaddr */ 398 STRUCT_INIT(addr_tmp, get_udatamodel()); 399 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 400 STRUCT_SIZE(addr_tmp))) { 401 error = EINVAL; 402 goto errout; 403 } 404 userbufptr = STRUCT_FGETP(addr_tmp, buf); 405 syncaddr.len = STRUCT_FGET(addr_tmp, len); 406 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 407 syncaddr.maxlen = syncaddr.len; 408 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 409 kmem_free(syncaddr.buf, syncaddr.len); 410 error = EFAULT; 411 goto errout; 412 } 413 414 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 415 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 416 417 ASSERT(STRUCT_FGETP(args, netname)); 418 419 if (copyinstr(STRUCT_FGETP(args, netname), netname, 420 sizeof (netname), &nlen)) { 421 error = EFAULT; 422 goto errout; 423 } 424 425 netname[nlen] = '\0'; 426 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 427 (void) strcpy(nargs->netname, netname); 428 } 429 430 /* 431 * Get the extention data which has the security data structure. 432 * This includes data for AUTH_SYS as well. 433 */ 434 if (flags & NFSMNT_NEWARGS) { 435 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 436 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 437 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 438 /* 439 * Indicating the application is using the new 440 * sec_data structure to pass in the security 441 * data. 442 */ 443 if (STRUCT_FGETP(args, 444 nfs_ext_u.nfs_extA.secdata) != NULL) { 445 error = sec_clnt_loadinfo( 446 (struct sec_data *)STRUCT_FGETP(args, 447 nfs_ext_u.nfs_extA.secdata), 448 &secdata, get_udatamodel()); 449 } 450 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 451 } 452 } 453 454 if (error) 455 goto errout; 456 457 /* 458 * Failover support: 459 * 460 * We may have a linked list of nfs_args structures, 461 * which means the user is looking for failover. If 462 * the mount is either not "read-only" or "soft", 463 * we want to bail out with EINVAL. 464 */ 465 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 466 nargs->nfs_ext_u.nfs_extB.next = 467 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 468 469 errout: 470 if (error) 471 nfs3_free_args(nargs, fh); 472 473 return (error); 474 } 475 476 477 /* 478 * nfs mount vfsop 479 * Set up mount info record and attach it to vfs struct. 480 */ 481 static int 482 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 483 { 484 struct nfs_args *args = NULL; 485 nfs_fhandle *fhandle = NULL; 486 char *data = uap->dataptr; 487 int error; 488 vnode_t *rtvp; /* the server's root */ 489 mntinfo_t *mi; /* mount info, pointed at by vfs */ 490 size_t nlen; /* length of netname */ 491 struct knetconfig *knconf; /* transport knetconfig structure */ 492 struct knetconfig *rdma_knconf; /* rdma transport structure */ 493 rnode_t *rp; 494 struct servinfo *svp; /* nfs server info */ 495 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 496 struct servinfo *svp_head; /* first nfs server info */ 497 struct servinfo *svp_2ndlast; /* 2nd last in server info list */ 498 struct sec_data *secdata; /* security data */ 499 int flags, addr_type; 500 zone_t *zone = nfs_zone(); 501 zone_t *mntzone = NULL; 502 503 504 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 505 return (EPERM); 506 507 if (mvp->v_type != VDIR) 508 return (ENOTDIR); 509 510 /* 511 * get arguments 512 * 513 * nfs_args is now versioned and is extensible, so 514 * uap->datalen might be different from sizeof (args) 515 * in a compatible situation. 516 */ 517 518 more: 519 520 if (!(uap->flags & MS_SYSSPACE)) { 521 if (args == NULL) 522 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 523 else { 524 nfs3_free_args(args, fhandle); 525 fhandle = NULL; 526 } 527 if (fhandle == NULL) 528 fhandle = kmem_alloc(sizeof (nfs_fhandle), KM_SLEEP); 529 error = nfs3_copyin(data, uap->datalen, args, fhandle); 530 if (error) { 531 if (args) 532 kmem_free(args, sizeof (*args)); 533 return (error); 534 } 535 } else { 536 args = (struct nfs_args *)data; 537 fhandle = (nfs_fhandle *)args->fh; 538 } 539 540 541 flags = args->flags; 542 543 if (uap->flags & MS_REMOUNT) { 544 size_t n; 545 char name[FSTYPSZ]; 546 547 if (uap->flags & MS_SYSSPACE) { 548 error = copystr(uap->fstype, name, FSTYPSZ, &n); 549 } else { 550 nfs3_free_args(args, fhandle); 551 kmem_free(args, sizeof (*args)); 552 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 553 } 554 if (error) { 555 if (error == ENAMETOOLONG) 556 return (EINVAL); 557 return (error); 558 } 559 560 /* 561 * This check is to ensure that the request is a 562 * genuine nfs remount request. 563 */ 564 565 if (strncmp(name, "nfs", 3) != 0) 566 return (EINVAL); 567 568 /* 569 * If the request changes the locking type, disallow the 570 * remount, 571 * because it's questionable whether we can transfer the 572 * locking state correctly. 573 */ 574 575 if ((mi = VFTOMI(vfsp)) != NULL) { 576 uint_t new_mi_llock; 577 uint_t old_mi_llock; 578 579 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 580 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 581 if (old_mi_llock != new_mi_llock) 582 return (EBUSY); 583 } 584 return (0); 585 } 586 587 mutex_enter(&mvp->v_lock); 588 if (!(uap->flags & MS_OVERLAY) && 589 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 590 mutex_exit(&mvp->v_lock); 591 if (!(uap->flags & MS_SYSSPACE)) { 592 nfs3_free_args(args, fhandle); 593 kmem_free(args, sizeof (*args)); 594 } 595 return (EBUSY); 596 } 597 mutex_exit(&mvp->v_lock); 598 599 /* make sure things are zeroed for errout: */ 600 rtvp = NULL; 601 mi = NULL; 602 secdata = NULL; 603 604 /* 605 * A valid knetconfig structure is required. 606 */ 607 if (!(flags & NFSMNT_KNCONF)) { 608 if (!(uap->flags & MS_SYSSPACE)) { 609 nfs3_free_args(args, fhandle); 610 kmem_free(args, sizeof (*args)); 611 } 612 return (EINVAL); 613 } 614 615 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 616 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 617 if (!(uap->flags & MS_SYSSPACE)) { 618 nfs3_free_args(args, fhandle); 619 kmem_free(args, sizeof (*args)); 620 } 621 return (EINVAL); 622 } 623 624 /* 625 * Allocate a servinfo struct. 626 */ 627 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 628 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 629 if (svp_tail) { 630 svp_2ndlast = svp_tail; 631 svp_tail->sv_next = svp; 632 } else { 633 svp_head = svp; 634 svp_2ndlast = svp; 635 } 636 637 svp_tail = svp; 638 639 svp->sv_knconf = args->knconf; 640 args->knconf = NULL; 641 642 if (args->addr == NULL || args->addr->buf == NULL) { 643 error = EINVAL; 644 goto errout; 645 } 646 647 svp->sv_addr.maxlen = args->addr->maxlen; 648 svp->sv_addr.len = args->addr->len; 649 svp->sv_addr.buf = args->addr->buf; 650 args->addr->buf = NULL; 651 652 /* 653 * Check the root fhandle length 654 */ 655 ASSERT(fhandle); 656 if (fhandle->fh_len > NFS3_FHSIZE || fhandle->fh_len == 0) { 657 error = EINVAL; 658 #ifdef DEBUG 659 zcmn_err(getzoneid(), CE_WARN, 660 "nfs3_mount: got an invalid fhandle. fh_len = %d", 661 fhandle->fh_len); 662 fhandle->fh_len = NFS_FHANDLE_LEN; 663 nfs_printfhandle(fhandle); 664 #endif 665 goto errout; 666 } 667 668 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 669 svp->sv_fhandle.fh_len = fhandle->fh_len; 670 671 /* 672 * Get server's hostname 673 */ 674 if (flags & NFSMNT_HOSTNAME) { 675 if (args->hostname == NULL) { 676 error = EINVAL; 677 goto errout; 678 } 679 svp->sv_hostnamelen = strlen(args->hostname) + 1; 680 svp->sv_hostname = args->hostname; 681 args->hostname = NULL; 682 } else { 683 char *p = "unknown-host"; 684 svp->sv_hostnamelen = strlen(p) + 1; 685 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 686 (void) strcpy(svp->sv_hostname, p); 687 } 688 689 690 /* 691 * RDMA MOUNT SUPPORT FOR NFS v3: 692 * Establish, is it possible to use RDMA, if so overload the 693 * knconf with rdma specific knconf and free the orignal. 694 */ 695 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 696 /* 697 * Determine the addr type for RDMA, IPv4 or v6. 698 */ 699 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 700 addr_type = AF_INET; 701 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 702 addr_type = AF_INET6; 703 704 if (rdma_reachable(addr_type, &svp->sv_addr, 705 &rdma_knconf) == 0) { 706 /* 707 * If successful, hijack the orignal knconf and 708 * replace with a new one, depending on the flags. 709 */ 710 svp->sv_origknconf = svp->sv_knconf; 711 svp->sv_knconf = rdma_knconf; 712 knconf = rdma_knconf; 713 } else { 714 if (flags & NFSMNT_TRYRDMA) { 715 #ifdef DEBUG 716 if (rdma_debug) 717 zcmn_err(getzoneid(), CE_WARN, 718 "no RDMA onboard, revert\n"); 719 #endif 720 } 721 722 if (flags & NFSMNT_DORDMA) { 723 /* 724 * If proto=rdma is specified and no RDMA 725 * path to this server is avialable then 726 * ditch this server. 727 * This is not included in the mountable 728 * server list or the replica list. 729 * Check if more servers are specified; 730 * Failover case, otherwise bail out of mount. 731 */ 732 if (args->nfs_args_ext == 733 NFS_ARGS_EXTB && 734 args->nfs_ext_u.nfs_extB.next 735 != NULL) { 736 data = (char *) 737 args->nfs_ext_u.nfs_extB.next; 738 if (uap->flags & MS_RDONLY && 739 !(flags & NFSMNT_SOFT)) { 740 if (svp_head->sv_next == NULL) { 741 svp_tail = NULL; 742 svp_2ndlast = NULL; 743 sv_free(svp_head); 744 goto more; 745 } else { 746 svp_tail = svp_2ndlast; 747 svp_2ndlast->sv_next = 748 NULL; 749 sv_free(svp); 750 goto more; 751 } 752 } 753 } else { 754 /* 755 * This is the last server specified 756 * in the nfs_args list passed down 757 * and its not rdma capable. 758 */ 759 if (svp_head->sv_next == NULL) { 760 /* 761 * Is this the only one 762 */ 763 error = EINVAL; 764 #ifdef DEBUG 765 if (rdma_debug) 766 zcmn_err(getzoneid(), 767 CE_WARN, 768 "No RDMA srv"); 769 #endif 770 goto errout; 771 } else { 772 /* 773 * There is list, since some 774 * servers specified before 775 * this passed all requirements 776 */ 777 svp_tail = svp_2ndlast; 778 svp_2ndlast->sv_next = NULL; 779 sv_free(svp); 780 goto proceed; 781 } 782 } 783 } 784 } 785 } 786 787 /* 788 * Get the extention data which has the new security data structure. 789 */ 790 if (flags & NFSMNT_NEWARGS) { 791 switch (args->nfs_args_ext) { 792 case NFS_ARGS_EXTA: 793 case NFS_ARGS_EXTB: 794 /* 795 * Indicating the application is using the new 796 * sec_data structure to pass in the security 797 * data. 798 */ 799 secdata = args->nfs_ext_u.nfs_extA.secdata; 800 if (args->nfs_ext_u.nfs_extA.secdata == NULL) { 801 error = EINVAL; 802 } else { 803 /* 804 * Need to validate the flavor here if 805 * sysspace, userspace was already 806 * validate from the nfs_copyin function. 807 */ 808 switch (secdata->rpcflavor) { 809 case AUTH_NONE: 810 case AUTH_UNIX: 811 case AUTH_LOOPBACK: 812 case AUTH_DES: 813 case RPCSEC_GSS: 814 args->nfs_ext_u.nfs_extA.secdata = 815 NULL; 816 break; 817 default: 818 error = EINVAL; 819 goto errout; 820 } 821 } 822 break; 823 824 default: 825 error = EINVAL; 826 break; 827 } 828 } else if (flags & NFSMNT_SECURE) { 829 /* 830 * Keep this for backward compatibility to support 831 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 832 */ 833 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 834 error = EINVAL; 835 goto errout; 836 } 837 /* 838 * Move security related data to the sec_data structure. 839 */ 840 { 841 dh_k4_clntdata_t *data; 842 char *pf, *p; 843 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 844 if (flags & NFSMNT_RPCTIMESYNC) 845 secdata->flags |= AUTH_F_RPCTIMESYNC; 846 data = kmem_alloc(sizeof (*data), KM_SLEEP); 847 bcopy(args->syncaddr, &data->syncaddr, 848 sizeof (*args->syncaddr)); 849 850 /* 851 * duplicate the knconf information for the 852 * new opaque data. 853 */ 854 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 855 *data->knconf = *knconf; 856 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 857 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 858 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 859 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 860 data->knconf->knc_protofmly = pf; 861 data->knconf->knc_proto = p; 862 863 nlen = strlen(args->hostname) + 1; 864 /* move server netname to the sec_data structure */ 865 if (nlen != 0) { 866 data->netname = kmem_alloc(nlen, KM_SLEEP); 867 bcopy(args->hostname, data->netname, nlen); 868 data->netnamelen = nlen; 869 } 870 secdata->secmod = secdata->rpcflavor = AUTH_DES; 871 secdata->data = (caddr_t)data; 872 } 873 } else { 874 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 875 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 876 secdata->data = NULL; 877 } 878 879 svp->sv_secdata = secdata; 880 if (error) 881 goto errout; 882 883 /* 884 * See bug 1180236. 885 * If mount secure failed, we will fall back to AUTH_NONE 886 * and try again. nfs3rootvp() will turn this back off. 887 * 888 * The NFS Version 3 mount uses the FSINFO and GETATTR 889 * procedures. The server should not care if these procedures 890 * have the proper security flavor, so if mount retries using 891 * AUTH_NONE that does not require a credential setup for root 892 * then the automounter would work without requiring root to be 893 * keylogged into AUTH_DES. 894 */ 895 if (secdata->rpcflavor != AUTH_UNIX && 896 secdata->rpcflavor != AUTH_LOOPBACK) 897 secdata->flags |= AUTH_F_TRYNONE; 898 899 /* 900 * Failover support: 901 * 902 * We may have a linked list of nfs_args structures, 903 * which means the user is looking for failover. If 904 * the mount is either not "read-only" or "soft", 905 * we want to bail out with EINVAL. 906 */ 907 if (args->nfs_args_ext == NFS_ARGS_EXTB && 908 args->nfs_ext_u.nfs_extB.next != NULL) { 909 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 910 data = (char *)args->nfs_ext_u.nfs_extB.next; 911 goto more; 912 } 913 error = EINVAL; 914 goto errout; 915 } 916 917 /* 918 * Determine the zone we're being mounted into. 919 */ 920 zone_hold(mntzone = zone); /* start with this assumption */ 921 if (getzoneid() == GLOBAL_ZONEID) { 922 zone_rele(mntzone); 923 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 924 ASSERT(mntzone != NULL); 925 if (mntzone != zone) { 926 error = EBUSY; 927 goto errout; 928 } 929 } 930 931 if (is_system_labeled()) { 932 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 933 svp->sv_knconf, cr); 934 935 if (error > 0) 936 goto errout; 937 938 if (error == -1) { 939 /* change mount to read-only to prevent write-down */ 940 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 941 } 942 } 943 944 /* 945 * Stop the mount from going any further if the zone is going away. 946 */ 947 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 948 error = EBUSY; 949 goto errout; 950 } 951 952 /* 953 * Get root vnode. 954 */ 955 proceed: 956 error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 957 958 if (error) 959 goto errout; 960 961 /* 962 * Set option fields in the mount info record 963 */ 964 mi = VTOMI(rtvp); 965 966 if (svp_head->sv_next) 967 mi->mi_flags |= MI_LLOCK; 968 969 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 970 971 errout: 972 if (error) { 973 if (rtvp != NULL) { 974 rp = VTOR(rtvp); 975 if (rp->r_flags & RHASHED) 976 rp_rmhash(rp); 977 } 978 sv_free(svp_head); 979 if (mi != NULL) { 980 nfs_async_stop(vfsp); 981 nfs_async_manager_stop(vfsp); 982 if (mi->mi_io_kstats) { 983 kstat_delete(mi->mi_io_kstats); 984 mi->mi_io_kstats = NULL; 985 } 986 if (mi->mi_ro_kstats) { 987 kstat_delete(mi->mi_ro_kstats); 988 mi->mi_ro_kstats = NULL; 989 } 990 nfs_free_mi(mi); 991 } 992 } 993 994 995 if (!(uap->flags & MS_SYSSPACE)) { 996 nfs3_free_args(args, fhandle); 997 kmem_free(args, sizeof (*args)); 998 } 999 1000 if (rtvp != NULL) 1001 VN_RELE(rtvp); 1002 1003 if (mntzone != NULL) 1004 zone_rele(mntzone); 1005 1006 return (error); 1007 } 1008 1009 static int nfs3_dynamic = 0; /* global variable to enable dynamic retrans. */ 1010 static ushort_t nfs3_max_threads = 8; /* max number of active async threads */ 1011 static uint_t nfs3_bsize = 32 * 1024; /* client `block' size */ 1012 static uint_t nfs3_async_clusters = 1; /* # of reqs from each async queue */ 1013 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO; 1014 1015 static int 1016 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1017 int flags, cred_t *cr, zone_t *zone) 1018 { 1019 vnode_t *rtvp; 1020 mntinfo_t *mi; 1021 dev_t nfs_dev; 1022 struct vattr va; 1023 struct FSINFO3args args; 1024 struct FSINFO3res res; 1025 int error; 1026 int douprintf; 1027 rnode_t *rp; 1028 int i; 1029 uint_t max_transfer_size; 1030 struct nfs_stats *nfsstatsp; 1031 cred_t *lcr = NULL, *tcr = cr; 1032 1033 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1034 ASSERT(nfsstatsp != NULL); 1035 1036 ASSERT(nfs_zone() == zone); 1037 /* 1038 * Create a mount record and link it to the vfs struct. 1039 */ 1040 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1041 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1042 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1043 mi->mi_flags = MI_ACL | MI_EXTATTR; 1044 if (!(flags & NFSMNT_SOFT)) 1045 mi->mi_flags |= MI_HARD; 1046 if ((flags & NFSMNT_SEMISOFT)) 1047 mi->mi_flags |= MI_SEMISOFT; 1048 if ((flags & NFSMNT_NOPRINT)) 1049 mi->mi_flags |= MI_NOPRINT; 1050 if (flags & NFSMNT_INT) 1051 mi->mi_flags |= MI_INT; 1052 mi->mi_retrans = NFS_RETRIES; 1053 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1054 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1055 mi->mi_timeo = nfs3_cots_timeo; 1056 else 1057 mi->mi_timeo = NFS_TIMEO; 1058 mi->mi_prog = NFS_PROGRAM; 1059 mi->mi_vers = NFS_V3; 1060 mi->mi_rfsnames = rfsnames_v3; 1061 mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr; 1062 mi->mi_call_type = call_type_v3; 1063 mi->mi_ss_call_type = ss_call_type_v3; 1064 mi->mi_timer_type = timer_type_v3; 1065 mi->mi_aclnames = aclnames_v3; 1066 mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr; 1067 mi->mi_acl_call_type = acl_call_type_v3; 1068 mi->mi_acl_ss_call_type = acl_ss_call_type_v3; 1069 mi->mi_acl_timer_type = acl_timer_type_v3; 1070 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1071 mi->mi_servers = svp; 1072 mi->mi_curr_serv = svp; 1073 mi->mi_acregmin = SEC2HR(ACREGMIN); 1074 mi->mi_acregmax = SEC2HR(ACREGMAX); 1075 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1076 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1077 1078 if (nfs3_dynamic) 1079 mi->mi_flags |= MI_DYNAMIC; 1080 1081 if (flags & NFSMNT_DIRECTIO) 1082 mi->mi_flags |= MI_DIRECTIO; 1083 1084 /* 1085 * Make a vfs struct for nfs. We do this here instead of below 1086 * because rtvp needs a vfs before we can do a getattr on it. 1087 * 1088 * Assign a unique device id to the mount 1089 */ 1090 mutex_enter(&nfs_minor_lock); 1091 do { 1092 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1093 nfs_dev = makedevice(nfs_major, nfs_minor); 1094 } while (vfs_devismounted(nfs_dev)); 1095 mutex_exit(&nfs_minor_lock); 1096 1097 vfsp->vfs_dev = nfs_dev; 1098 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp); 1099 vfsp->vfs_data = (caddr_t)mi; 1100 vfsp->vfs_fstype = nfsfstyp; 1101 1102 /* 1103 * Verify that nfs3_bsize tuneable is set to an 1104 * acceptable value. It be a multiple of PAGESIZE or 1105 * file corruption can occur. 1106 */ 1107 if (nfs3_bsize & PAGEOFFSET) 1108 nfs3_bsize &= PAGEMASK; 1109 if (nfs3_bsize < PAGESIZE) 1110 nfs3_bsize = PAGESIZE; 1111 vfsp->vfs_bsize = nfs3_bsize; 1112 1113 /* 1114 * Initialize fields used to support async putpage operations. 1115 */ 1116 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1117 mi->mi_async_clusters[i] = nfs3_async_clusters; 1118 mi->mi_async_init_clusters = nfs3_async_clusters; 1119 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1120 mi->mi_max_threads = nfs3_max_threads; 1121 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1122 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1123 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1124 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1125 1126 mi->mi_vfsp = vfsp; 1127 zone_hold(mi->mi_zone = zone); 1128 nfs_mi_zonelist_add(mi); 1129 1130 /* 1131 * Make the root vnode, use it to get attributes, 1132 * then remake it with the attributes. 1133 */ 1134 rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle, 1135 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1136 1137 /* 1138 * Make the FSINFO calls, primarily at this point to 1139 * determine the transfer size. For client failover, 1140 * we'll want this to be the minimum bid from any 1141 * server, so that we don't overrun stated limits. 1142 * 1143 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1144 * which is only for the mount operation. 1145 */ 1146 1147 mi->mi_tsize = nfs3_tsize(svp->sv_knconf); 1148 mi->mi_stsize = mi->mi_tsize; 1149 1150 mi->mi_curread = nfs3_bsize; 1151 mi->mi_curwrite = mi->mi_curread; 1152 1153 /* 1154 * If the uid is set then set the creds for secure mounts 1155 * by proxy processes such as automountd. 1156 */ 1157 if (svp->sv_secdata->uid != 0 && 1158 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1159 lcr = crdup(cr); 1160 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1161 tcr = lcr; 1162 } 1163 1164 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1165 douprintf = 1; 1166 mi->mi_curr_serv = svp; 1167 max_transfer_size = nfs3_tsize(svp->sv_knconf); 1168 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize); 1169 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize); 1170 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread); 1171 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite); 1172 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle; 1173 1174 error = rfs3call(mi, NFSPROC3_FSINFO, 1175 xdr_nfs_fh3, (caddr_t)&args, 1176 xdr_FSINFO3res, (caddr_t)&res, tcr, 1177 &douprintf, &res.status, 0, NULL); 1178 if (error) 1179 goto bad; 1180 error = geterrno3(res.status); 1181 if (error) 1182 goto bad; 1183 1184 /* get type of root node */ 1185 if (res.resok.obj_attributes.attributes) { 1186 if (res.resok.obj_attributes.attr.type < NF3REG || 1187 res.resok.obj_attributes.attr.type > NF3FIFO) { 1188 #ifdef DEBUG 1189 zcmn_err(getzoneid(), CE_WARN, 1190 "NFS3 server %s returned a bad file type for root", 1191 svp->sv_hostname); 1192 #else 1193 zcmn_err(getzoneid(), CE_WARN, 1194 "NFS server %s returned a bad file type for root", 1195 svp->sv_hostname); 1196 #endif 1197 error = EINVAL; 1198 goto bad; 1199 } else { 1200 if (rtvp->v_type != VNON && 1201 rtvp->v_type != nf3_to_vt[res.resok.obj_attributes.attr.type]) { 1202 #ifdef DEBUG 1203 zcmn_err(getzoneid(), CE_WARN, 1204 "NFS3 server %s returned a different file type for root", 1205 svp->sv_hostname); 1206 #else 1207 zcmn_err(getzoneid(), CE_WARN, 1208 "NFS server %s returned a different file type for root", 1209 svp->sv_hostname); 1210 #endif 1211 error = EINVAL; 1212 goto bad; 1213 } 1214 rtvp->v_type = 1215 nf3_to_vt[res.resok.obj_attributes.attr.type]; 1216 } 1217 } 1218 1219 if (res.resok.rtmax != 0) { 1220 mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize); 1221 if (res.resok.rtpref != 0) { 1222 mi->mi_curread = MIN(res.resok.rtpref, 1223 mi->mi_curread); 1224 } else { 1225 mi->mi_curread = MIN(res.resok.rtmax, 1226 mi->mi_curread); 1227 } 1228 } else if (res.resok.rtpref != 0) { 1229 mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize); 1230 mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread); 1231 } else { 1232 #ifdef DEBUG 1233 zcmn_err(getzoneid(), CE_WARN, 1234 "NFS3 server %s returned 0 for read transfer sizes", 1235 svp->sv_hostname); 1236 #else 1237 zcmn_err(getzoneid(), CE_WARN, 1238 "NFS server %s returned 0 for read transfer sizes", 1239 svp->sv_hostname); 1240 #endif 1241 error = EIO; 1242 goto bad; 1243 } 1244 if (res.resok.wtmax != 0) { 1245 mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize); 1246 if (res.resok.wtpref != 0) { 1247 mi->mi_curwrite = MIN(res.resok.wtpref, 1248 mi->mi_curwrite); 1249 } else { 1250 mi->mi_curwrite = MIN(res.resok.wtmax, 1251 mi->mi_curwrite); 1252 } 1253 } else if (res.resok.wtpref != 0) { 1254 mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize); 1255 mi->mi_curwrite = MIN(res.resok.wtpref, 1256 mi->mi_curwrite); 1257 } else { 1258 #ifdef DEBUG 1259 zcmn_err(getzoneid(), CE_WARN, 1260 "NFS3 server %s returned 0 for write transfer sizes", 1261 svp->sv_hostname); 1262 #else 1263 zcmn_err(getzoneid(), CE_WARN, 1264 "NFS server %s returned 0 for write transfer sizes", 1265 svp->sv_hostname); 1266 #endif 1267 error = EIO; 1268 goto bad; 1269 } 1270 1271 /* 1272 * These signal the ability of the server to create 1273 * hard links and symbolic links, so they really 1274 * aren't relevant if there is more than one server. 1275 * We'll set them here, though it probably looks odd. 1276 */ 1277 if (res.resok.properties & FSF3_LINK) 1278 mi->mi_flags |= MI_LINK; 1279 if (res.resok.properties & FSF3_SYMLINK) 1280 mi->mi_flags |= MI_SYMLINK; 1281 1282 /* Pick up smallest non-zero maxfilesize value */ 1283 if (res.resok.maxfilesize) { 1284 if (mi->mi_maxfilesize) { 1285 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize, 1286 res.resok.maxfilesize); 1287 } else 1288 mi->mi_maxfilesize = res.resok.maxfilesize; 1289 } 1290 1291 /* 1292 * AUTH_F_TRYNONE is only for the mount operation, 1293 * so turn it back off. 1294 */ 1295 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1296 } 1297 mi->mi_curr_serv = mi->mi_servers; 1298 1299 /* 1300 * Start the thread responsible for handling async worker threads. 1301 */ 1302 VFS_HOLD(vfsp); /* add reference for thread */ 1303 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1304 vfsp, 0, minclsyspri); 1305 ASSERT(mi->mi_manager_thread != NULL); 1306 1307 /* 1308 * Initialize kstats 1309 */ 1310 nfs_mnt_kstat_init(vfsp); 1311 1312 /* If we didn't get a type, get one now */ 1313 if (rtvp->v_type == VNON) { 1314 va.va_mask = AT_ALL; 1315 1316 error = nfs3getattr(rtvp, &va, tcr); 1317 if (error) 1318 goto bad; 1319 rtvp->v_type = va.va_type; 1320 } 1321 1322 mi->mi_type = rtvp->v_type; 1323 1324 *rtvpp = rtvp; 1325 if (lcr != NULL) 1326 crfree(lcr); 1327 1328 return (0); 1329 bad: 1330 /* 1331 * An error occurred somewhere, need to clean up... 1332 * We need to release our reference to the root vnode and 1333 * destroy the mntinfo struct that we just created. 1334 */ 1335 if (lcr != NULL) 1336 crfree(lcr); 1337 rp = VTOR(rtvp); 1338 if (rp->r_flags & RHASHED) 1339 rp_rmhash(rp); 1340 VN_RELE(rtvp); 1341 nfs_async_stop(vfsp); 1342 nfs_async_manager_stop(vfsp); 1343 if (mi->mi_io_kstats) { 1344 kstat_delete(mi->mi_io_kstats); 1345 mi->mi_io_kstats = NULL; 1346 } 1347 if (mi->mi_ro_kstats) { 1348 kstat_delete(mi->mi_ro_kstats); 1349 mi->mi_ro_kstats = NULL; 1350 } 1351 nfs_free_mi(mi); 1352 *rtvpp = NULL; 1353 return (error); 1354 } 1355 1356 /* 1357 * vfs operations 1358 */ 1359 static int 1360 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1361 { 1362 mntinfo_t *mi; 1363 ushort_t omax; 1364 1365 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1366 return (EPERM); 1367 1368 mi = VFTOMI(vfsp); 1369 if (flag & MS_FORCE) { 1370 1371 vfsp->vfs_flag |= VFS_UNMOUNTED; 1372 1373 /* 1374 * We are about to stop the async manager. 1375 * Let every one know not to schedule any 1376 * more async requests 1377 */ 1378 mutex_enter(&mi->mi_async_lock); 1379 mi->mi_max_threads = 0; 1380 cv_broadcast(&mi->mi_async_work_cv); 1381 mutex_exit(&mi->mi_async_lock); 1382 1383 /* 1384 * We need to stop the manager thread explicitly; the worker 1385 * threads can time out and exit on their own. 1386 */ 1387 nfs_async_manager_stop(vfsp); 1388 destroy_rtable(vfsp, cr); 1389 if (mi->mi_io_kstats) { 1390 kstat_delete(mi->mi_io_kstats); 1391 mi->mi_io_kstats = NULL; 1392 } 1393 if (mi->mi_ro_kstats) { 1394 kstat_delete(mi->mi_ro_kstats); 1395 mi->mi_ro_kstats = NULL; 1396 } 1397 return (0); 1398 } 1399 /* 1400 * Wait until all asynchronous putpage operations on 1401 * this file system are complete before flushing rnodes 1402 * from the cache. 1403 */ 1404 omax = mi->mi_max_threads; 1405 if (nfs_async_stop_sig(vfsp)) { 1406 return (EINTR); 1407 } 1408 rflush(vfsp, cr); 1409 /* 1410 * If there are any active vnodes on this file system, 1411 * then the file system is busy and can't be umounted. 1412 */ 1413 if (check_rtable(vfsp)) { 1414 mutex_enter(&mi->mi_async_lock); 1415 mi->mi_max_threads = omax; 1416 mutex_exit(&mi->mi_async_lock); 1417 return (EBUSY); 1418 } 1419 /* 1420 * The unmount can't fail from now on; stop the worker thread manager. 1421 */ 1422 nfs_async_manager_stop(vfsp); 1423 /* 1424 * Destroy all rnodes belonging to this file system from the 1425 * rnode hash queues and purge any resources allocated to 1426 * them. 1427 */ 1428 destroy_rtable(vfsp, cr); 1429 if (mi->mi_io_kstats) { 1430 kstat_delete(mi->mi_io_kstats); 1431 mi->mi_io_kstats = NULL; 1432 } 1433 if (mi->mi_ro_kstats) { 1434 kstat_delete(mi->mi_ro_kstats); 1435 mi->mi_ro_kstats = NULL; 1436 } 1437 return (0); 1438 } 1439 1440 /* 1441 * find root of nfs 1442 */ 1443 static int 1444 nfs3_root(vfs_t *vfsp, vnode_t **vpp) 1445 { 1446 mntinfo_t *mi; 1447 vnode_t *vp; 1448 servinfo_t *svp; 1449 rnode_t *rp; 1450 int error = 0; 1451 1452 mi = VFTOMI(vfsp); 1453 1454 if (nfs_zone() != mi->mi_zone) 1455 return (EPERM); 1456 1457 svp = mi->mi_curr_serv; 1458 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1459 mutex_enter(&svp->sv_lock); 1460 svp->sv_flags &= ~SV_ROOT_STALE; 1461 mutex_exit(&svp->sv_lock); 1462 error = ENOENT; 1463 } 1464 1465 vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle, 1466 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1467 1468 /* 1469 * if the SV_ROOT_STALE flag was reset above, reset the 1470 * RSTALE flag if needed and return an error 1471 */ 1472 if (error == ENOENT) { 1473 rp = VTOR(vp); 1474 if (svp && rp->r_flags & RSTALE) { 1475 mutex_enter(&rp->r_statelock); 1476 rp->r_flags &= ~RSTALE; 1477 mutex_exit(&rp->r_statelock); 1478 } 1479 VN_RELE(vp); 1480 return (error); 1481 } 1482 1483 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1484 1485 vp->v_type = mi->mi_type; 1486 1487 *vpp = vp; 1488 1489 return (0); 1490 } 1491 1492 /* 1493 * Get file system statistics. 1494 */ 1495 static int 1496 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1497 { 1498 int error; 1499 struct mntinfo *mi; 1500 struct FSSTAT3args args; 1501 struct FSSTAT3res res; 1502 int douprintf; 1503 failinfo_t fi; 1504 vnode_t *vp; 1505 cred_t *cr; 1506 hrtime_t t; 1507 1508 mi = VFTOMI(vfsp); 1509 if (nfs_zone() != mi->mi_zone) 1510 return (EPERM); 1511 error = nfs3_root(vfsp, &vp); 1512 if (error) 1513 return (error); 1514 1515 cr = CRED(); 1516 1517 args.fsroot = *VTOFH3(vp); 1518 fi.vp = vp; 1519 fi.fhp = (caddr_t)&args.fsroot; 1520 fi.copyproc = nfs3copyfh; 1521 fi.lookupproc = nfs3lookup; 1522 fi.xattrdirproc = acl_getxattrdir3; 1523 1524 douprintf = 1; 1525 1526 t = gethrtime(); 1527 1528 error = rfs3call(mi, NFSPROC3_FSSTAT, 1529 xdr_nfs_fh3, (caddr_t)&args, 1530 xdr_FSSTAT3res, (caddr_t)&res, cr, 1531 &douprintf, &res.status, 0, &fi); 1532 1533 if (error) { 1534 VN_RELE(vp); 1535 return (error); 1536 } 1537 1538 error = geterrno3(res.status); 1539 if (!error) { 1540 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 1541 sbp->f_bsize = MAXBSIZE; 1542 sbp->f_frsize = DEV_BSIZE; 1543 /* 1544 * Allow -1 fields to pass through unconverted. These 1545 * indicate "don't know" fields. 1546 */ 1547 if (res.resok.tbytes == (size3)-1) 1548 sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes; 1549 else { 1550 sbp->f_blocks = (fsblkcnt64_t) 1551 (res.resok.tbytes / DEV_BSIZE); 1552 } 1553 if (res.resok.fbytes == (size3)-1) 1554 sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes; 1555 else { 1556 sbp->f_bfree = (fsblkcnt64_t) 1557 (res.resok.fbytes / DEV_BSIZE); 1558 } 1559 if (res.resok.abytes == (size3)-1) 1560 sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes; 1561 else { 1562 sbp->f_bavail = (fsblkcnt64_t) 1563 (res.resok.abytes / DEV_BSIZE); 1564 } 1565 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles; 1566 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles; 1567 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles; 1568 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1569 (void) strncpy(sbp->f_basetype, 1570 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1571 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1572 sbp->f_namemax = (ulong_t)-1; 1573 } else { 1574 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 1575 PURGE_STALE_FH(error, vp, cr); 1576 } 1577 1578 VN_RELE(vp); 1579 1580 return (error); 1581 } 1582 1583 static kmutex_t nfs3_syncbusy; 1584 1585 /* 1586 * Flush dirty nfs files for file system vfsp. 1587 * If vfsp == NULL, all nfs files are flushed. 1588 */ 1589 /* ARGSUSED */ 1590 static int 1591 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr) 1592 { 1593 /* 1594 * Cross-zone calls are OK here, since this translates to a 1595 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1596 */ 1597 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) { 1598 rflush(vfsp, cr); 1599 mutex_exit(&nfs3_syncbusy); 1600 } 1601 return (0); 1602 } 1603 1604 /* ARGSUSED */ 1605 static int 1606 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1607 { 1608 int error; 1609 nfs_fh3 fh; 1610 vnode_t *vp; 1611 struct vattr va; 1612 1613 if (fidp->fid_len > NFS3_FHSIZE) { 1614 *vpp = NULL; 1615 return (ESTALE); 1616 } 1617 1618 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1619 return (EPERM); 1620 fh.fh3_length = fidp->fid_len; 1621 bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length); 1622 1623 vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1624 1625 if (VTOR(vp)->r_flags & RSTALE) { 1626 VN_RELE(vp); 1627 *vpp = NULL; 1628 return (ENOENT); 1629 } 1630 1631 if (vp->v_type == VNON) { 1632 va.va_mask = AT_ALL; 1633 error = nfs3getattr(vp, &va, CRED()); 1634 if (error) { 1635 VN_RELE(vp); 1636 *vpp = NULL; 1637 return (error); 1638 } 1639 vp->v_type = va.va_type; 1640 } 1641 1642 *vpp = vp; 1643 1644 return (0); 1645 } 1646 1647 /* ARGSUSED */ 1648 static int 1649 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why) 1650 { 1651 vnode_t *rtvp; 1652 char root_hostname[SYS_NMLN+1]; 1653 struct servinfo *svp; 1654 int error; 1655 int vfsflags; 1656 size_t size; 1657 char *root_path; 1658 struct pathname pn; 1659 char *name; 1660 cred_t *cr; 1661 struct nfs_args args; /* nfs mount arguments */ 1662 static char token[10]; 1663 1664 bzero(&args, sizeof (args)); 1665 1666 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1667 clkset(-1L); /* hack for now - until we get time svc? */ 1668 1669 if (why == ROOT_REMOUNT) { 1670 /* 1671 * Shouldn't happen. 1672 */ 1673 panic("nfs3_mountroot: why == ROOT_REMOUNT"); 1674 } 1675 1676 if (why == ROOT_UNMOUNT) { 1677 /* 1678 * Nothing to do for NFS. 1679 */ 1680 return (0); 1681 } 1682 1683 /* 1684 * why == ROOT_INIT 1685 */ 1686 1687 name = token; 1688 *name = 0; 1689 getfsname("root", name, sizeof (token)); 1690 1691 pn_alloc(&pn); 1692 root_path = pn.pn_path; 1693 1694 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1695 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1696 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1697 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1698 1699 /* 1700 * Get server address 1701 * Get the root fhandle 1702 * Get server's transport 1703 * Get server's hostname 1704 * Get options 1705 */ 1706 args.addr = &svp->sv_addr; 1707 args.fh = (char *)&svp->sv_fhandle; 1708 args.knconf = svp->sv_knconf; 1709 args.hostname = root_hostname; 1710 vfsflags = 0; 1711 if (error = mount_root(*name ? name : "root", root_path, NFS_V3, 1712 &args, &vfsflags)) { 1713 if (error == EPROTONOSUPPORT) 1714 nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: " 1715 "mount_root failed: server doesn't support NFS V3"); 1716 else 1717 nfs_cmn_err(error, CE_WARN, 1718 "nfs3_mountroot: mount_root failed: %m"); 1719 sv_free(svp); 1720 pn_free(&pn); 1721 return (error); 1722 } 1723 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1724 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1725 (void) strcpy(svp->sv_hostname, root_hostname); 1726 1727 /* 1728 * Force root partition to always be mounted with AUTH_UNIX for now 1729 */ 1730 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1731 svp->sv_secdata->secmod = AUTH_UNIX; 1732 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1733 svp->sv_secdata->data = NULL; 1734 1735 cr = crgetcred(); 1736 rtvp = NULL; 1737 1738 error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1739 1740 crfree(cr); 1741 1742 if (error) { 1743 pn_free(&pn); 1744 sv_free(svp); 1745 return (error); 1746 } 1747 1748 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1749 if (error) { 1750 nfs_cmn_err(error, CE_WARN, 1751 "nfs3_mountroot: invalid root mount options"); 1752 pn_free(&pn); 1753 goto errout; 1754 } 1755 1756 (void) vfs_lock_wait(vfsp); 1757 vfs_add(NULL, vfsp, vfsflags); 1758 vfs_unlock(vfsp); 1759 1760 size = strlen(svp->sv_hostname); 1761 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1762 rootfs.bo_name[size] = ':'; 1763 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1764 1765 pn_free(&pn); 1766 1767 errout: 1768 if (error) { 1769 sv_free(svp); 1770 nfs_async_stop(vfsp); 1771 nfs_async_manager_stop(vfsp); 1772 } 1773 1774 if (rtvp != NULL) 1775 VN_RELE(rtvp); 1776 1777 return (error); 1778 } 1779 1780 /* 1781 * Initialization routine for VFS routines. Should only be called once 1782 */ 1783 int 1784 nfs3_vfsinit(void) 1785 { 1786 mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1787 return (0); 1788 } 1789 1790 void 1791 nfs3_vfsfini(void) 1792 { 1793 mutex_destroy(&nfs3_syncbusy); 1794 } 1795 1796 void 1797 nfs3_freevfs(vfs_t *vfsp) 1798 { 1799 mntinfo_t *mi; 1800 servinfo_t *svp; 1801 1802 /* free up the resources */ 1803 mi = VFTOMI(vfsp); 1804 svp = mi->mi_servers; 1805 mi->mi_servers = mi->mi_curr_serv = NULL; 1806 sv_free(svp); 1807 1808 /* 1809 * By this time we should have already deleted the 1810 * mi kstats in the unmount code. If they are still around 1811 * somethings wrong 1812 */ 1813 ASSERT(mi->mi_io_kstats == NULL); 1814 nfs_free_mi(mi); 1815 } 1816