1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/vnode.h> 38 #include <sys/pathname.h> 39 #include <sys/sysmacros.h> 40 #include <sys/kmem.h> 41 #include <sys/mkdev.h> 42 #include <sys/mount.h> 43 #include <sys/mntent.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/policy.h> 54 #include <sys/zone.h> 55 #include <sys/class.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/tsol/tnet.h> 59 60 #include <rpc/types.h> 61 #include <rpc/auth.h> 62 #include <rpc/clnt.h> 63 64 #include <nfs/nfs.h> 65 #include <nfs/nfs_clnt.h> 66 #include <nfs/rnode.h> 67 #include <nfs/mount.h> 68 #include <nfs/nfs_acl.h> 69 70 #include <fs/fs_subr.h> 71 72 /* 73 * From rpcsec module (common/rpcsec). 74 */ 75 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 76 extern void sec_clnt_freeinfo(struct sec_data *); 77 78 /* 79 * The order and contents of this structure must be kept in sync with that of 80 * rfsreqcnt_v3_tmpl in nfs_stats.c 81 */ 82 static char *rfsnames_v3[] = { 83 "null", "getattr", "setattr", "lookup", "access", "readlink", "read", 84 "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir", 85 "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo", 86 "pathconf", "commit" 87 }; 88 89 /* 90 * This table maps from NFS protocol number into call type. 91 * Zero means a "Lookup" type call 92 * One means a "Read" type call 93 * Two means a "Write" type call 94 * This is used to select a default time-out. 95 */ 96 static uchar_t call_type_v3[] = { 97 0, 0, 1, 0, 0, 0, 1, 98 2, 2, 2, 2, 2, 2, 2, 99 2, 2, 1, 2, 0, 0, 0, 100 2 }; 101 102 /* 103 * Similar table, but to determine which timer to use 104 * (only real reads and writes!) 105 */ 106 static uchar_t timer_type_v3[] = { 107 0, 0, 0, 0, 0, 0, 1, 108 2, 0, 0, 0, 0, 0, 0, 109 0, 0, 1, 1, 0, 0, 0, 110 0 }; 111 112 /* 113 * This table maps from NFS protocol number into a call type 114 * for the semisoft mount option. 115 * Zero means do not repeat operation. 116 * One means repeat. 117 */ 118 static uchar_t ss_call_type_v3[] = { 119 0, 0, 1, 0, 0, 0, 0, 120 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 0, 0, 0, 0, 0, 122 1 }; 123 124 /* 125 * nfs3 vfs operations. 126 */ 127 static int nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 128 static int nfs3_unmount(vfs_t *, int, cred_t *); 129 static int nfs3_root(vfs_t *, vnode_t **); 130 static int nfs3_statvfs(vfs_t *, struct statvfs64 *); 131 static int nfs3_sync(vfs_t *, short, cred_t *); 132 static int nfs3_vget(vfs_t *, vnode_t **, fid_t *); 133 static int nfs3_mountroot(vfs_t *, whymountroot_t); 134 static void nfs3_freevfs(vfs_t *); 135 136 static int nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *, 137 int, cred_t *, zone_t *); 138 139 /* 140 * Initialize the vfs structure 141 */ 142 143 static int nfs3fstyp; 144 vfsops_t *nfs3_vfsops; 145 146 /* 147 * Debug variable to check for rdma based 148 * transport startup and cleanup. Controlled 149 * through /etc/system. Off by default. 150 */ 151 extern int rdma_debug; 152 153 int 154 nfs3init(int fstyp, char *name) 155 { 156 static const fs_operation_def_t nfs3_vfsops_template[] = { 157 VFSNAME_MOUNT, { .vfs_mount = nfs3_mount }, 158 VFSNAME_UNMOUNT, { .vfs_unmount = nfs3_unmount }, 159 VFSNAME_ROOT, { .vfs_root = nfs3_root }, 160 VFSNAME_STATVFS, { .vfs_statvfs = nfs3_statvfs }, 161 VFSNAME_SYNC, { .vfs_sync = nfs3_sync }, 162 VFSNAME_VGET, { .vfs_vget = nfs3_vget }, 163 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs3_mountroot }, 164 VFSNAME_FREEVFS, { .vfs_freevfs = nfs3_freevfs }, 165 NULL, NULL 166 }; 167 int error; 168 169 error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops); 170 if (error != 0) { 171 zcmn_err(GLOBAL_ZONEID, CE_WARN, 172 "nfs3init: bad vfs ops template"); 173 return (error); 174 } 175 176 error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops); 177 if (error != 0) { 178 (void) vfs_freevfsops_by_type(fstyp); 179 zcmn_err(GLOBAL_ZONEID, CE_WARN, 180 "nfs3init: bad vnode ops template"); 181 return (error); 182 } 183 184 nfs3fstyp = fstyp; 185 186 return (0); 187 } 188 189 void 190 nfs3fini(void) 191 { 192 } 193 194 static void 195 nfs3_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 196 { 197 198 if (fh) 199 kmem_free(fh, sizeof (*fh)); 200 201 if (nargs->knconf) { 202 if (nargs->knconf->knc_protofmly) 203 kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE); 204 if (nargs->knconf->knc_proto) 205 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 206 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 207 nargs->knconf = NULL; 208 } 209 210 if (nargs->fh) { 211 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 212 nargs->fh = NULL; 213 } 214 215 if (nargs->hostname) { 216 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 217 nargs->hostname = NULL; 218 } 219 220 if (nargs->addr) { 221 if (nargs->addr->buf) { 222 ASSERT(nargs->addr->len); 223 kmem_free(nargs->addr->buf, nargs->addr->len); 224 } 225 kmem_free(nargs->addr, sizeof (struct netbuf)); 226 nargs->addr = NULL; 227 } 228 229 if (nargs->syncaddr) { 230 ASSERT(nargs->syncaddr->len); 231 if (nargs->syncaddr->buf) { 232 ASSERT(nargs->syncaddr->len); 233 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 234 } 235 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 236 nargs->syncaddr = NULL; 237 } 238 239 if (nargs->netname) { 240 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 241 nargs->netname = NULL; 242 } 243 244 if (nargs->nfs_ext_u.nfs_extA.secdata) { 245 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata); 246 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 247 } 248 } 249 250 static int 251 nfs3_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 252 { 253 254 int error; 255 size_t nlen; /* length of netname */ 256 size_t hlen; /* length of hostname */ 257 char netname[MAXNETNAMELEN+1]; /* server's netname */ 258 struct netbuf addr; /* server's address */ 259 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 260 struct knetconfig *knconf; /* transport knetconfig structure */ 261 struct sec_data *secdata = NULL; /* security data */ 262 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 263 STRUCT_DECL(knetconfig, knconf_tmp); 264 STRUCT_DECL(netbuf, addr_tmp); 265 int flags; 266 char *p, *pf; 267 char *userbufptr; 268 269 270 bzero(nargs, sizeof (*nargs)); 271 272 STRUCT_INIT(args, get_udatamodel()); 273 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 274 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args)))) 275 return (EFAULT); 276 277 nargs->wsize = STRUCT_FGET(args, wsize); 278 nargs->rsize = STRUCT_FGET(args, rsize); 279 nargs->timeo = STRUCT_FGET(args, timeo); 280 nargs->retrans = STRUCT_FGET(args, retrans); 281 nargs->acregmin = STRUCT_FGET(args, acregmin); 282 nargs->acregmax = STRUCT_FGET(args, acregmax); 283 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 284 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 285 286 flags = STRUCT_FGET(args, flags); 287 nargs->flags = flags; 288 289 addr.buf = NULL; 290 syncaddr.buf = NULL; 291 292 /* 293 * Allocate space for a knetconfig structure and 294 * its strings and copy in from user-land. 295 */ 296 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 297 STRUCT_INIT(knconf_tmp, get_udatamodel()); 298 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 299 STRUCT_SIZE(knconf_tmp))) { 300 kmem_free(knconf, sizeof (*knconf)); 301 return (EFAULT); 302 } 303 304 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 305 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 306 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 307 if (get_udatamodel() != DATAMODEL_LP64) { 308 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 309 } else { 310 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 311 } 312 313 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 314 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 315 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 316 if (error) { 317 kmem_free(pf, KNC_STRSIZE); 318 kmem_free(p, KNC_STRSIZE); 319 kmem_free(knconf, sizeof (*knconf)); 320 return (error); 321 } 322 323 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 324 if (error) { 325 kmem_free(pf, KNC_STRSIZE); 326 kmem_free(p, KNC_STRSIZE); 327 kmem_free(knconf, sizeof (*knconf)); 328 return (error); 329 } 330 331 332 knconf->knc_protofmly = pf; 333 knconf->knc_proto = p; 334 335 nargs->knconf = knconf; 336 /* 337 * Get server address 338 */ 339 STRUCT_INIT(addr_tmp, get_udatamodel()); 340 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 341 STRUCT_SIZE(addr_tmp))) { 342 error = EFAULT; 343 goto errout; 344 } 345 346 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 347 userbufptr = STRUCT_FGETP(addr_tmp, buf); 348 addr.len = STRUCT_FGET(addr_tmp, len); 349 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 350 addr.maxlen = addr.len; 351 if (copyin(userbufptr, addr.buf, addr.len)) { 352 kmem_free(addr.buf, addr.len); 353 error = EFAULT; 354 goto errout; 355 } 356 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 357 358 /* 359 * Get the root fhandle 360 */ 361 362 if (copyin(STRUCT_FGETP(args, fh), fh, sizeof (nfs_fhandle))) { 363 error = EFAULT; 364 goto errout; 365 } 366 367 368 /* 369 * Get server's hostname 370 */ 371 if (flags & NFSMNT_HOSTNAME) { 372 error = copyinstr(STRUCT_FGETP(args, hostname), netname, 373 sizeof (netname), &hlen); 374 if (error) 375 goto errout; 376 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 377 (void) strcpy(nargs->hostname, netname); 378 } else { 379 nargs->hostname = NULL; 380 } 381 382 383 /* 384 * If there are syncaddr and netname data, load them in. This is 385 * to support data needed for NFSV4 when AUTH_DH is the negotiated 386 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 387 */ 388 netname[0] = '\0'; 389 if (flags & NFSMNT_SECURE) { 390 if (STRUCT_FGETP(args, syncaddr) == NULL) { 391 error = EINVAL; 392 goto errout; 393 } 394 /* get syncaddr */ 395 STRUCT_INIT(addr_tmp, get_udatamodel()); 396 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 397 STRUCT_SIZE(addr_tmp))) { 398 error = EINVAL; 399 goto errout; 400 } 401 userbufptr = STRUCT_FGETP(addr_tmp, buf); 402 syncaddr.len = STRUCT_FGET(addr_tmp, len); 403 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 404 syncaddr.maxlen = syncaddr.len; 405 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 406 kmem_free(syncaddr.buf, syncaddr.len); 407 error = EFAULT; 408 goto errout; 409 } 410 411 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 412 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 413 414 ASSERT(STRUCT_FGETP(args, netname)); 415 416 if (copyinstr(STRUCT_FGETP(args, netname), netname, 417 sizeof (netname), &nlen)) { 418 error = EFAULT; 419 goto errout; 420 } 421 422 netname[nlen] = '\0'; 423 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 424 (void) strcpy(nargs->netname, netname); 425 } 426 427 /* 428 * Get the extention data which has the security data structure. 429 * This includes data for AUTH_SYS as well. 430 */ 431 if (flags & NFSMNT_NEWARGS) { 432 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 433 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 434 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 435 /* 436 * Indicating the application is using the new 437 * sec_data structure to pass in the security 438 * data. 439 */ 440 if (STRUCT_FGETP(args, 441 nfs_ext_u.nfs_extA.secdata) != NULL) { 442 error = sec_clnt_loadinfo( 443 (struct sec_data *)STRUCT_FGETP(args, 444 nfs_ext_u.nfs_extA.secdata), &secdata, 445 get_udatamodel()); 446 } 447 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 448 } 449 } 450 451 if (error) 452 goto errout; 453 454 /* 455 * Failover support: 456 * 457 * We may have a linked list of nfs_args structures, 458 * which means the user is looking for failover. If 459 * the mount is either not "read-only" or "soft", 460 * we want to bail out with EINVAL. 461 */ 462 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 463 nargs->nfs_ext_u.nfs_extB.next = 464 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 465 466 errout: 467 if (error) 468 nfs3_free_args(nargs, fh); 469 470 return (error); 471 } 472 473 474 /* 475 * nfs mount vfsop 476 * Set up mount info record and attach it to vfs struct. 477 */ 478 static int 479 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 480 { 481 struct nfs_args *args = NULL; 482 nfs_fhandle *fhandle = NULL; 483 char *data = uap->dataptr; 484 int error; 485 vnode_t *rtvp; /* the server's root */ 486 mntinfo_t *mi; /* mount info, pointed at by vfs */ 487 size_t nlen; /* length of netname */ 488 struct knetconfig *knconf; /* transport knetconfig structure */ 489 struct knetconfig *rdma_knconf; /* rdma transport structure */ 490 rnode_t *rp; 491 struct servinfo *svp; /* nfs server info */ 492 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 493 struct servinfo *svp_head; /* first nfs server info */ 494 struct servinfo *svp_2ndlast; /* 2nd last in server info list */ 495 struct sec_data *secdata; /* security data */ 496 int flags, addr_type; 497 zone_t *zone = nfs_zone(); 498 zone_t *mntzone = NULL; 499 500 501 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 502 return (EPERM); 503 504 if (mvp->v_type != VDIR) 505 return (ENOTDIR); 506 507 /* 508 * get arguments 509 * 510 * nfs_args is now versioned and is extensible, so 511 * uap->datalen might be different from sizeof (args) 512 * in a compatible situation. 513 */ 514 515 more: 516 517 if (!(uap->flags & MS_SYSSPACE)) { 518 if (args == NULL) 519 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 520 else { 521 nfs3_free_args(args, fhandle); 522 fhandle = NULL; 523 } 524 if (fhandle == NULL) 525 fhandle = kmem_alloc(sizeof (nfs_fhandle), KM_SLEEP); 526 error = nfs3_copyin(data, uap->datalen, args, fhandle); 527 if (error) { 528 if (args) 529 kmem_free(args, sizeof (*args)); 530 return (error); 531 } 532 } else { 533 args = (struct nfs_args *)data; 534 fhandle = (nfs_fhandle *)args->fh; 535 } 536 537 538 flags = args->flags; 539 540 if (uap->flags & MS_REMOUNT) { 541 size_t n; 542 char name[FSTYPSZ]; 543 544 if (uap->flags & MS_SYSSPACE) { 545 error = copystr(uap->fstype, name, FSTYPSZ, &n); 546 } else { 547 nfs3_free_args(args, fhandle); 548 kmem_free(args, sizeof (*args)); 549 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 550 } 551 if (error) { 552 if (error == ENAMETOOLONG) 553 return (EINVAL); 554 return (error); 555 } 556 557 /* 558 * This check is to ensure that the request is a 559 * genuine nfs remount request. 560 */ 561 562 if (strncmp(name, "nfs", 3) != 0) 563 return (EINVAL); 564 565 /* 566 * If the request changes the locking type, disallow the 567 * remount, 568 * because it's questionable whether we can transfer the 569 * locking state correctly. 570 */ 571 572 if ((mi = VFTOMI(vfsp)) != NULL) { 573 uint_t new_mi_llock; 574 uint_t old_mi_llock; 575 576 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 577 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 578 if (old_mi_llock != new_mi_llock) 579 return (EBUSY); 580 } 581 return (0); 582 } 583 584 mutex_enter(&mvp->v_lock); 585 if (!(uap->flags & MS_OVERLAY) && 586 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 587 mutex_exit(&mvp->v_lock); 588 if (!(uap->flags & MS_SYSSPACE)) { 589 nfs3_free_args(args, fhandle); 590 kmem_free(args, sizeof (*args)); 591 } 592 return (EBUSY); 593 } 594 mutex_exit(&mvp->v_lock); 595 596 /* make sure things are zeroed for errout: */ 597 rtvp = NULL; 598 mi = NULL; 599 secdata = NULL; 600 601 /* 602 * A valid knetconfig structure is required. 603 */ 604 if (!(flags & NFSMNT_KNCONF)) { 605 if (!(uap->flags & MS_SYSSPACE)) { 606 nfs3_free_args(args, fhandle); 607 kmem_free(args, sizeof (*args)); 608 } 609 return (EINVAL); 610 } 611 612 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 613 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 614 if (!(uap->flags & MS_SYSSPACE)) { 615 nfs3_free_args(args, fhandle); 616 kmem_free(args, sizeof (*args)); 617 } 618 return (EINVAL); 619 } 620 621 /* 622 * Allocate a servinfo struct. 623 */ 624 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 625 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 626 if (svp_tail) { 627 svp_2ndlast = svp_tail; 628 svp_tail->sv_next = svp; 629 } else { 630 svp_head = svp; 631 svp_2ndlast = svp; 632 } 633 634 svp_tail = svp; 635 636 svp->sv_knconf = args->knconf; 637 args->knconf = NULL; 638 639 if (args->addr == NULL || args->addr->buf == NULL) { 640 error = EINVAL; 641 goto errout; 642 } 643 644 svp->sv_addr.maxlen = args->addr->maxlen; 645 svp->sv_addr.len = args->addr->len; 646 svp->sv_addr.buf = args->addr->buf; 647 args->addr->buf = NULL; 648 649 /* 650 * Check the root fhandle length 651 */ 652 ASSERT(fhandle); 653 if (fhandle->fh_len > NFS3_FHSIZE || fhandle->fh_len == 0) { 654 error = EINVAL; 655 #ifdef DEBUG 656 zcmn_err(getzoneid(), CE_WARN, 657 "nfs3_mount: got an invalid fhandle. fh_len = %d", 658 fhandle->fh_len); 659 fhandle->fh_len = NFS_FHANDLE_LEN; 660 nfs_printfhandle(fhandle); 661 #endif 662 goto errout; 663 } 664 665 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 666 svp->sv_fhandle.fh_len = fhandle->fh_len; 667 668 /* 669 * Get server's hostname 670 */ 671 if (flags & NFSMNT_HOSTNAME) { 672 if (args->hostname == NULL) { 673 error = EINVAL; 674 goto errout; 675 } 676 svp->sv_hostnamelen = strlen(args->hostname) + 1; 677 svp->sv_hostname = args->hostname; 678 args->hostname = NULL; 679 } else { 680 char *p = "unknown-host"; 681 svp->sv_hostnamelen = strlen(p) + 1; 682 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 683 (void) strcpy(svp->sv_hostname, p); 684 } 685 686 687 /* 688 * RDMA MOUNT SUPPORT FOR NFS v3: 689 * Establish, is it possible to use RDMA, if so overload the 690 * knconf with rdma specific knconf and free the orignal. 691 */ 692 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 693 /* 694 * Determine the addr type for RDMA, IPv4 or v6. 695 */ 696 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 697 addr_type = AF_INET; 698 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 699 addr_type = AF_INET6; 700 701 if (rdma_reachable(addr_type, &svp->sv_addr, 702 &rdma_knconf) == 0) { 703 /* 704 * If successful, hijack the orignal knconf and 705 * replace with a new one, depending on the flags. 706 */ 707 svp->sv_origknconf = svp->sv_knconf; 708 svp->sv_knconf = rdma_knconf; 709 knconf = rdma_knconf; 710 } else { 711 if (flags & NFSMNT_TRYRDMA) { 712 #ifdef DEBUG 713 if (rdma_debug) 714 zcmn_err(getzoneid(), CE_WARN, 715 "no RDMA onboard, revert\n"); 716 #endif 717 } 718 719 if (flags & NFSMNT_DORDMA) { 720 /* 721 * If proto=rdma is specified and no RDMA 722 * path to this server is avialable then 723 * ditch this server. 724 * This is not included in the mountable 725 * server list or the replica list. 726 * Check if more servers are specified; 727 * Failover case, otherwise bail out of mount. 728 */ 729 if (args->nfs_args_ext == NFS_ARGS_EXTB && 730 args->nfs_ext_u.nfs_extB.next != NULL) { 731 data = (char *) 732 args->nfs_ext_u.nfs_extB.next; 733 if (uap->flags & MS_RDONLY && 734 !(flags & NFSMNT_SOFT)) { 735 if (svp_head->sv_next == NULL) { 736 svp_tail = NULL; 737 svp_2ndlast = NULL; 738 sv_free(svp_head); 739 goto more; 740 } else { 741 svp_tail = svp_2ndlast; 742 svp_2ndlast->sv_next = 743 NULL; 744 sv_free(svp); 745 goto more; 746 } 747 } 748 } else { 749 /* 750 * This is the last server specified 751 * in the nfs_args list passed down 752 * and its not rdma capable. 753 */ 754 if (svp_head->sv_next == NULL) { 755 /* 756 * Is this the only one 757 */ 758 error = EINVAL; 759 #ifdef DEBUG 760 if (rdma_debug) 761 zcmn_err(getzoneid(), 762 CE_WARN, 763 "No RDMA srv"); 764 #endif 765 goto errout; 766 } else { 767 /* 768 * There is list, since some 769 * servers specified before 770 * this passed all requirements 771 */ 772 svp_tail = svp_2ndlast; 773 svp_2ndlast->sv_next = NULL; 774 sv_free(svp); 775 goto proceed; 776 } 777 } 778 } 779 } 780 } 781 782 /* 783 * Get the extention data which has the new security data structure. 784 */ 785 if (flags & NFSMNT_NEWARGS) { 786 switch (args->nfs_args_ext) { 787 case NFS_ARGS_EXTA: 788 case NFS_ARGS_EXTB: 789 /* 790 * Indicating the application is using the new 791 * sec_data structure to pass in the security 792 * data. 793 */ 794 secdata = args->nfs_ext_u.nfs_extA.secdata; 795 if (args->nfs_ext_u.nfs_extA.secdata == NULL) { 796 error = EINVAL; 797 } else { 798 /* 799 * Need to validate the flavor here if 800 * sysspace, userspace was already 801 * validate from the nfs_copyin function. 802 */ 803 switch (secdata->rpcflavor) { 804 case AUTH_NONE: 805 case AUTH_UNIX: 806 case AUTH_LOOPBACK: 807 case AUTH_DES: 808 case RPCSEC_GSS: 809 args->nfs_ext_u.nfs_extA.secdata = NULL; 810 break; 811 default: 812 error = EINVAL; 813 goto errout; 814 } 815 } 816 break; 817 818 default: 819 error = EINVAL; 820 break; 821 } 822 } else if (flags & NFSMNT_SECURE) { 823 /* 824 * Keep this for backward compatibility to support 825 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 826 */ 827 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 828 error = EINVAL; 829 goto errout; 830 } 831 /* 832 * Move security related data to the sec_data structure. 833 */ 834 { 835 dh_k4_clntdata_t *data; 836 char *pf, *p; 837 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 838 if (flags & NFSMNT_RPCTIMESYNC) 839 secdata->flags |= AUTH_F_RPCTIMESYNC; 840 data = kmem_alloc(sizeof (*data), KM_SLEEP); 841 bcopy(args->syncaddr, &data->syncaddr, 842 sizeof (*args->syncaddr)); 843 844 /* 845 * duplicate the knconf information for the 846 * new opaque data. 847 */ 848 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 849 *data->knconf = *knconf; 850 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 851 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 852 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 853 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 854 data->knconf->knc_protofmly = pf; 855 data->knconf->knc_proto = p; 856 857 nlen = strlen(args->hostname) + 1; 858 /* move server netname to the sec_data structure */ 859 if (nlen != 0) { 860 data->netname = kmem_alloc(nlen, KM_SLEEP); 861 bcopy(args->hostname, data->netname, nlen); 862 data->netnamelen = nlen; 863 } 864 secdata->secmod = secdata->rpcflavor = AUTH_DES; 865 secdata->data = (caddr_t)data; 866 } 867 } else { 868 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 869 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 870 secdata->data = NULL; 871 } 872 873 svp->sv_secdata = secdata; 874 if (error) 875 goto errout; 876 877 /* 878 * See bug 1180236. 879 * If mount secure failed, we will fall back to AUTH_NONE 880 * and try again. nfs3rootvp() will turn this back off. 881 * 882 * The NFS Version 3 mount uses the FSINFO and GETATTR 883 * procedures. The server should not care if these procedures 884 * have the proper security flavor, so if mount retries using 885 * AUTH_NONE that does not require a credential setup for root 886 * then the automounter would work without requiring root to be 887 * keylogged into AUTH_DES. 888 */ 889 if (secdata->rpcflavor != AUTH_UNIX && 890 secdata->rpcflavor != AUTH_LOOPBACK) 891 secdata->flags |= AUTH_F_TRYNONE; 892 893 /* 894 * Failover support: 895 * 896 * We may have a linked list of nfs_args structures, 897 * which means the user is looking for failover. If 898 * the mount is either not "read-only" or "soft", 899 * we want to bail out with EINVAL. 900 */ 901 if (args->nfs_args_ext == NFS_ARGS_EXTB && 902 args->nfs_ext_u.nfs_extB.next != NULL) { 903 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 904 data = (char *)args->nfs_ext_u.nfs_extB.next; 905 goto more; 906 } 907 error = EINVAL; 908 goto errout; 909 } 910 911 /* 912 * Determine the zone we're being mounted into. 913 */ 914 zone_hold(mntzone = zone); /* start with this assumption */ 915 if (getzoneid() == GLOBAL_ZONEID) { 916 zone_rele(mntzone); 917 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 918 ASSERT(mntzone != NULL); 919 if (mntzone != zone) { 920 error = EBUSY; 921 goto errout; 922 } 923 } 924 925 if (is_system_labeled()) { 926 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 927 svp->sv_knconf, cr); 928 929 if (error > 0) 930 goto errout; 931 932 if (error == -1) { 933 /* change mount to read-only to prevent write-down */ 934 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 935 } 936 } 937 938 /* 939 * Stop the mount from going any further if the zone is going away. 940 */ 941 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 942 error = EBUSY; 943 goto errout; 944 } 945 946 /* 947 * Get root vnode. 948 */ 949 proceed: 950 error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 951 952 if (error) 953 goto errout; 954 955 /* 956 * Set option fields in the mount info record 957 */ 958 mi = VTOMI(rtvp); 959 960 if (svp_head->sv_next) 961 mi->mi_flags |= MI_LLOCK; 962 963 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 964 965 errout: 966 if (rtvp != NULL) { 967 if (error) { 968 rp = VTOR(rtvp); 969 if (rp->r_flags & RHASHED) 970 rp_rmhash(rp); 971 } 972 VN_RELE(rtvp); 973 } 974 975 if (error) { 976 sv_free(svp_head); 977 if (mi != NULL) { 978 nfs_async_stop(vfsp); 979 nfs_async_manager_stop(vfsp); 980 if (mi->mi_io_kstats) { 981 kstat_delete(mi->mi_io_kstats); 982 mi->mi_io_kstats = NULL; 983 } 984 if (mi->mi_ro_kstats) { 985 kstat_delete(mi->mi_ro_kstats); 986 mi->mi_ro_kstats = NULL; 987 } 988 nfs_free_mi(mi); 989 } 990 } 991 992 993 if (!(uap->flags & MS_SYSSPACE)) { 994 nfs3_free_args(args, fhandle); 995 kmem_free(args, sizeof (*args)); 996 } 997 998 if (mntzone != NULL) 999 zone_rele(mntzone); 1000 1001 return (error); 1002 } 1003 1004 static int nfs3_dynamic = 0; /* global variable to enable dynamic retrans. */ 1005 static ushort_t nfs3_max_threads = 8; /* max number of active async threads */ 1006 static uint_t nfs3_bsize = 32 * 1024; /* client `block' size */ 1007 static uint_t nfs3_async_clusters = 1; /* # of reqs from each async queue */ 1008 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO; 1009 1010 static int 1011 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1012 int flags, cred_t *cr, zone_t *zone) 1013 { 1014 vnode_t *rtvp; 1015 mntinfo_t *mi; 1016 dev_t nfs_dev; 1017 struct vattr va; 1018 struct FSINFO3args args; 1019 struct FSINFO3res res; 1020 int error; 1021 int douprintf; 1022 rnode_t *rp; 1023 int i; 1024 uint_t max_transfer_size; 1025 struct nfs_stats *nfsstatsp; 1026 cred_t *lcr = NULL, *tcr = cr; 1027 1028 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1029 ASSERT(nfsstatsp != NULL); 1030 1031 ASSERT(nfs_zone() == zone); 1032 /* 1033 * Create a mount record and link it to the vfs struct. 1034 */ 1035 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1036 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1037 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1038 mi->mi_flags = MI_ACL | MI_EXTATTR; 1039 if (!(flags & NFSMNT_SOFT)) 1040 mi->mi_flags |= MI_HARD; 1041 if ((flags & NFSMNT_SEMISOFT)) 1042 mi->mi_flags |= MI_SEMISOFT; 1043 if ((flags & NFSMNT_NOPRINT)) 1044 mi->mi_flags |= MI_NOPRINT; 1045 if (flags & NFSMNT_INT) 1046 mi->mi_flags |= MI_INT; 1047 mi->mi_retrans = NFS_RETRIES; 1048 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1049 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1050 mi->mi_timeo = nfs3_cots_timeo; 1051 else 1052 mi->mi_timeo = NFS_TIMEO; 1053 mi->mi_prog = NFS_PROGRAM; 1054 mi->mi_vers = NFS_V3; 1055 mi->mi_rfsnames = rfsnames_v3; 1056 mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr; 1057 mi->mi_call_type = call_type_v3; 1058 mi->mi_ss_call_type = ss_call_type_v3; 1059 mi->mi_timer_type = timer_type_v3; 1060 mi->mi_aclnames = aclnames_v3; 1061 mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr; 1062 mi->mi_acl_call_type = acl_call_type_v3; 1063 mi->mi_acl_ss_call_type = acl_ss_call_type_v3; 1064 mi->mi_acl_timer_type = acl_timer_type_v3; 1065 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1066 mi->mi_servers = svp; 1067 mi->mi_curr_serv = svp; 1068 mi->mi_acregmin = SEC2HR(ACREGMIN); 1069 mi->mi_acregmax = SEC2HR(ACREGMAX); 1070 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1071 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1072 1073 if (nfs3_dynamic) 1074 mi->mi_flags |= MI_DYNAMIC; 1075 1076 if (flags & NFSMNT_DIRECTIO) 1077 mi->mi_flags |= MI_DIRECTIO; 1078 1079 /* 1080 * Make a vfs struct for nfs. We do this here instead of below 1081 * because rtvp needs a vfs before we can do a getattr on it. 1082 * 1083 * Assign a unique device id to the mount 1084 */ 1085 mutex_enter(&nfs_minor_lock); 1086 do { 1087 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1088 nfs_dev = makedevice(nfs_major, nfs_minor); 1089 } while (vfs_devismounted(nfs_dev)); 1090 mutex_exit(&nfs_minor_lock); 1091 1092 vfsp->vfs_dev = nfs_dev; 1093 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp); 1094 vfsp->vfs_data = (caddr_t)mi; 1095 vfsp->vfs_fstype = nfsfstyp; 1096 1097 /* 1098 * Verify that nfs3_bsize tuneable is set to an 1099 * acceptable value. It be a multiple of PAGESIZE or 1100 * file corruption can occur. 1101 */ 1102 if (nfs3_bsize & PAGEOFFSET) 1103 nfs3_bsize &= PAGEMASK; 1104 if (nfs3_bsize < PAGESIZE) 1105 nfs3_bsize = PAGESIZE; 1106 vfsp->vfs_bsize = nfs3_bsize; 1107 1108 /* 1109 * Initialize fields used to support async putpage operations. 1110 */ 1111 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1112 mi->mi_async_clusters[i] = nfs3_async_clusters; 1113 mi->mi_async_init_clusters = nfs3_async_clusters; 1114 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1115 mi->mi_max_threads = nfs3_max_threads; 1116 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1117 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1118 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1119 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1120 1121 mi->mi_vfsp = vfsp; 1122 zone_hold(mi->mi_zone = zone); 1123 nfs_mi_zonelist_add(mi); 1124 1125 /* 1126 * Make the root vnode, use it to get attributes, 1127 * then remake it with the attributes. 1128 */ 1129 rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle, 1130 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1131 1132 /* 1133 * Make the FSINFO calls, primarily at this point to 1134 * determine the transfer size. For client failover, 1135 * we'll want this to be the minimum bid from any 1136 * server, so that we don't overrun stated limits. 1137 * 1138 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1139 * which is only for the mount operation. 1140 */ 1141 1142 mi->mi_tsize = nfs3_tsize(svp->sv_knconf); 1143 mi->mi_stsize = mi->mi_tsize; 1144 1145 mi->mi_curread = nfs3_bsize; 1146 mi->mi_curwrite = mi->mi_curread; 1147 1148 /* 1149 * If the uid is set then set the creds for secure mounts 1150 * by proxy processes such as automountd. 1151 */ 1152 if (svp->sv_secdata->uid != 0 && 1153 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1154 lcr = crdup(cr); 1155 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1156 tcr = lcr; 1157 } 1158 1159 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1160 douprintf = 1; 1161 mi->mi_curr_serv = svp; 1162 max_transfer_size = nfs3_tsize(svp->sv_knconf); 1163 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize); 1164 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize); 1165 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread); 1166 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite); 1167 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle; 1168 1169 error = rfs3call(mi, NFSPROC3_FSINFO, 1170 xdr_nfs_fh3, (caddr_t)&args, 1171 xdr_FSINFO3res, (caddr_t)&res, tcr, 1172 &douprintf, &res.status, 0, NULL); 1173 if (error) 1174 goto bad; 1175 error = geterrno3(res.status); 1176 if (error) 1177 goto bad; 1178 1179 /* get type of root node */ 1180 if (res.resok.obj_attributes.attributes) { 1181 if (res.resok.obj_attributes.attr.type < NF3REG || 1182 res.resok.obj_attributes.attr.type > NF3FIFO) { 1183 #ifdef DEBUG 1184 zcmn_err(getzoneid(), CE_WARN, 1185 "NFS3 server %s returned a bad file type for root", 1186 svp->sv_hostname); 1187 #else 1188 zcmn_err(getzoneid(), CE_WARN, 1189 "NFS server %s returned a bad file type for root", 1190 svp->sv_hostname); 1191 #endif 1192 error = EINVAL; 1193 goto bad; 1194 } else { 1195 if (rtvp->v_type != VNON && rtvp->v_type != 1196 nf3_to_vt[res.resok.obj_attributes.attr. 1197 type]) { 1198 #ifdef DEBUG 1199 zcmn_err(getzoneid(), CE_WARN, 1200 "NFS3 server %s returned a different file type for root", 1201 svp->sv_hostname); 1202 #else 1203 zcmn_err(getzoneid(), CE_WARN, 1204 "NFS server %s returned a different file type for root", 1205 svp->sv_hostname); 1206 #endif 1207 error = EINVAL; 1208 goto bad; 1209 } 1210 rtvp->v_type = 1211 nf3_to_vt[res.resok.obj_attributes.attr. 1212 type]; 1213 } 1214 } 1215 1216 if (res.resok.rtmax != 0) { 1217 mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize); 1218 if (res.resok.rtpref != 0) { 1219 mi->mi_curread = MIN(res.resok.rtpref, 1220 mi->mi_curread); 1221 } else { 1222 mi->mi_curread = MIN(res.resok.rtmax, 1223 mi->mi_curread); 1224 } 1225 } else if (res.resok.rtpref != 0) { 1226 mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize); 1227 mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread); 1228 } else { 1229 #ifdef DEBUG 1230 zcmn_err(getzoneid(), CE_WARN, 1231 "NFS3 server %s returned 0 for read transfer sizes", 1232 svp->sv_hostname); 1233 #else 1234 zcmn_err(getzoneid(), CE_WARN, 1235 "NFS server %s returned 0 for read transfer sizes", 1236 svp->sv_hostname); 1237 #endif 1238 error = EIO; 1239 goto bad; 1240 } 1241 if (res.resok.wtmax != 0) { 1242 mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize); 1243 if (res.resok.wtpref != 0) { 1244 mi->mi_curwrite = MIN(res.resok.wtpref, 1245 mi->mi_curwrite); 1246 } else { 1247 mi->mi_curwrite = MIN(res.resok.wtmax, 1248 mi->mi_curwrite); 1249 } 1250 } else if (res.resok.wtpref != 0) { 1251 mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize); 1252 mi->mi_curwrite = MIN(res.resok.wtpref, 1253 mi->mi_curwrite); 1254 } else { 1255 #ifdef DEBUG 1256 zcmn_err(getzoneid(), CE_WARN, 1257 "NFS3 server %s returned 0 for write transfer sizes", 1258 svp->sv_hostname); 1259 #else 1260 zcmn_err(getzoneid(), CE_WARN, 1261 "NFS server %s returned 0 for write transfer sizes", 1262 svp->sv_hostname); 1263 #endif 1264 error = EIO; 1265 goto bad; 1266 } 1267 1268 /* 1269 * These signal the ability of the server to create 1270 * hard links and symbolic links, so they really 1271 * aren't relevant if there is more than one server. 1272 * We'll set them here, though it probably looks odd. 1273 */ 1274 if (res.resok.properties & FSF3_LINK) 1275 mi->mi_flags |= MI_LINK; 1276 if (res.resok.properties & FSF3_SYMLINK) 1277 mi->mi_flags |= MI_SYMLINK; 1278 1279 /* Pick up smallest non-zero maxfilesize value */ 1280 if (res.resok.maxfilesize) { 1281 if (mi->mi_maxfilesize) { 1282 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize, 1283 res.resok.maxfilesize); 1284 } else 1285 mi->mi_maxfilesize = res.resok.maxfilesize; 1286 } 1287 1288 /* 1289 * AUTH_F_TRYNONE is only for the mount operation, 1290 * so turn it back off. 1291 */ 1292 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1293 } 1294 mi->mi_curr_serv = mi->mi_servers; 1295 1296 /* 1297 * Start the thread responsible for handling async worker threads. 1298 */ 1299 VFS_HOLD(vfsp); /* add reference for thread */ 1300 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1301 vfsp, 0, minclsyspri); 1302 ASSERT(mi->mi_manager_thread != NULL); 1303 1304 /* 1305 * Initialize kstats 1306 */ 1307 nfs_mnt_kstat_init(vfsp); 1308 1309 /* If we didn't get a type, get one now */ 1310 if (rtvp->v_type == VNON) { 1311 va.va_mask = AT_ALL; 1312 1313 error = nfs3getattr(rtvp, &va, tcr); 1314 if (error) 1315 goto bad; 1316 rtvp->v_type = va.va_type; 1317 } 1318 1319 mi->mi_type = rtvp->v_type; 1320 1321 *rtvpp = rtvp; 1322 if (lcr != NULL) 1323 crfree(lcr); 1324 1325 return (0); 1326 bad: 1327 /* 1328 * An error occurred somewhere, need to clean up... 1329 * We need to release our reference to the root vnode and 1330 * destroy the mntinfo struct that we just created. 1331 */ 1332 if (lcr != NULL) 1333 crfree(lcr); 1334 rp = VTOR(rtvp); 1335 if (rp->r_flags & RHASHED) 1336 rp_rmhash(rp); 1337 VN_RELE(rtvp); 1338 nfs_async_stop(vfsp); 1339 nfs_async_manager_stop(vfsp); 1340 if (mi->mi_io_kstats) { 1341 kstat_delete(mi->mi_io_kstats); 1342 mi->mi_io_kstats = NULL; 1343 } 1344 if (mi->mi_ro_kstats) { 1345 kstat_delete(mi->mi_ro_kstats); 1346 mi->mi_ro_kstats = NULL; 1347 } 1348 nfs_free_mi(mi); 1349 *rtvpp = NULL; 1350 return (error); 1351 } 1352 1353 /* 1354 * vfs operations 1355 */ 1356 static int 1357 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1358 { 1359 mntinfo_t *mi; 1360 ushort_t omax; 1361 1362 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1363 return (EPERM); 1364 1365 mi = VFTOMI(vfsp); 1366 if (flag & MS_FORCE) { 1367 1368 vfsp->vfs_flag |= VFS_UNMOUNTED; 1369 1370 /* 1371 * We are about to stop the async manager. 1372 * Let every one know not to schedule any 1373 * more async requests 1374 */ 1375 mutex_enter(&mi->mi_async_lock); 1376 mi->mi_max_threads = 0; 1377 cv_broadcast(&mi->mi_async_work_cv); 1378 mutex_exit(&mi->mi_async_lock); 1379 1380 /* 1381 * We need to stop the manager thread explicitly; the worker 1382 * threads can time out and exit on their own. 1383 */ 1384 nfs_async_manager_stop(vfsp); 1385 destroy_rtable(vfsp, cr); 1386 if (mi->mi_io_kstats) { 1387 kstat_delete(mi->mi_io_kstats); 1388 mi->mi_io_kstats = NULL; 1389 } 1390 if (mi->mi_ro_kstats) { 1391 kstat_delete(mi->mi_ro_kstats); 1392 mi->mi_ro_kstats = NULL; 1393 } 1394 return (0); 1395 } 1396 /* 1397 * Wait until all asynchronous putpage operations on 1398 * this file system are complete before flushing rnodes 1399 * from the cache. 1400 */ 1401 omax = mi->mi_max_threads; 1402 if (nfs_async_stop_sig(vfsp)) { 1403 return (EINTR); 1404 } 1405 rflush(vfsp, cr); 1406 /* 1407 * If there are any active vnodes on this file system, 1408 * then the file system is busy and can't be umounted. 1409 */ 1410 if (check_rtable(vfsp)) { 1411 mutex_enter(&mi->mi_async_lock); 1412 mi->mi_max_threads = omax; 1413 mutex_exit(&mi->mi_async_lock); 1414 return (EBUSY); 1415 } 1416 /* 1417 * The unmount can't fail from now on; stop the worker thread manager. 1418 */ 1419 nfs_async_manager_stop(vfsp); 1420 /* 1421 * Destroy all rnodes belonging to this file system from the 1422 * rnode hash queues and purge any resources allocated to 1423 * them. 1424 */ 1425 destroy_rtable(vfsp, cr); 1426 if (mi->mi_io_kstats) { 1427 kstat_delete(mi->mi_io_kstats); 1428 mi->mi_io_kstats = NULL; 1429 } 1430 if (mi->mi_ro_kstats) { 1431 kstat_delete(mi->mi_ro_kstats); 1432 mi->mi_ro_kstats = NULL; 1433 } 1434 return (0); 1435 } 1436 1437 /* 1438 * find root of nfs 1439 */ 1440 static int 1441 nfs3_root(vfs_t *vfsp, vnode_t **vpp) 1442 { 1443 mntinfo_t *mi; 1444 vnode_t *vp; 1445 servinfo_t *svp; 1446 rnode_t *rp; 1447 int error = 0; 1448 1449 mi = VFTOMI(vfsp); 1450 1451 if (nfs_zone() != mi->mi_zone) 1452 return (EPERM); 1453 1454 svp = mi->mi_curr_serv; 1455 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1456 mutex_enter(&svp->sv_lock); 1457 svp->sv_flags &= ~SV_ROOT_STALE; 1458 mutex_exit(&svp->sv_lock); 1459 error = ENOENT; 1460 } 1461 1462 vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle, 1463 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1464 1465 /* 1466 * if the SV_ROOT_STALE flag was reset above, reset the 1467 * RSTALE flag if needed and return an error 1468 */ 1469 if (error == ENOENT) { 1470 rp = VTOR(vp); 1471 if (svp && rp->r_flags & RSTALE) { 1472 mutex_enter(&rp->r_statelock); 1473 rp->r_flags &= ~RSTALE; 1474 mutex_exit(&rp->r_statelock); 1475 } 1476 VN_RELE(vp); 1477 return (error); 1478 } 1479 1480 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1481 1482 vp->v_type = mi->mi_type; 1483 1484 *vpp = vp; 1485 1486 return (0); 1487 } 1488 1489 /* 1490 * Get file system statistics. 1491 */ 1492 static int 1493 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1494 { 1495 int error; 1496 struct mntinfo *mi; 1497 struct FSSTAT3args args; 1498 struct FSSTAT3res res; 1499 int douprintf; 1500 failinfo_t fi; 1501 vnode_t *vp; 1502 cred_t *cr; 1503 hrtime_t t; 1504 1505 mi = VFTOMI(vfsp); 1506 if (nfs_zone() != mi->mi_zone) 1507 return (EPERM); 1508 error = nfs3_root(vfsp, &vp); 1509 if (error) 1510 return (error); 1511 1512 cr = CRED(); 1513 1514 args.fsroot = *VTOFH3(vp); 1515 fi.vp = vp; 1516 fi.fhp = (caddr_t)&args.fsroot; 1517 fi.copyproc = nfs3copyfh; 1518 fi.lookupproc = nfs3lookup; 1519 fi.xattrdirproc = acl_getxattrdir3; 1520 1521 douprintf = 1; 1522 1523 t = gethrtime(); 1524 1525 error = rfs3call(mi, NFSPROC3_FSSTAT, 1526 xdr_nfs_fh3, (caddr_t)&args, 1527 xdr_FSSTAT3res, (caddr_t)&res, cr, 1528 &douprintf, &res.status, 0, &fi); 1529 1530 if (error) { 1531 VN_RELE(vp); 1532 return (error); 1533 } 1534 1535 error = geterrno3(res.status); 1536 if (!error) { 1537 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 1538 sbp->f_bsize = MAXBSIZE; 1539 sbp->f_frsize = DEV_BSIZE; 1540 /* 1541 * Allow -1 fields to pass through unconverted. These 1542 * indicate "don't know" fields. 1543 */ 1544 if (res.resok.tbytes == (size3)-1) 1545 sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes; 1546 else { 1547 sbp->f_blocks = (fsblkcnt64_t) 1548 (res.resok.tbytes / DEV_BSIZE); 1549 } 1550 if (res.resok.fbytes == (size3)-1) 1551 sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes; 1552 else { 1553 sbp->f_bfree = (fsblkcnt64_t) 1554 (res.resok.fbytes / DEV_BSIZE); 1555 } 1556 if (res.resok.abytes == (size3)-1) 1557 sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes; 1558 else { 1559 sbp->f_bavail = (fsblkcnt64_t) 1560 (res.resok.abytes / DEV_BSIZE); 1561 } 1562 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles; 1563 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles; 1564 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles; 1565 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1566 (void) strncpy(sbp->f_basetype, 1567 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1568 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1569 sbp->f_namemax = (ulong_t)-1; 1570 } else { 1571 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 1572 PURGE_STALE_FH(error, vp, cr); 1573 } 1574 1575 VN_RELE(vp); 1576 1577 return (error); 1578 } 1579 1580 static kmutex_t nfs3_syncbusy; 1581 1582 /* 1583 * Flush dirty nfs files for file system vfsp. 1584 * If vfsp == NULL, all nfs files are flushed. 1585 */ 1586 /* ARGSUSED */ 1587 static int 1588 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr) 1589 { 1590 /* 1591 * Cross-zone calls are OK here, since this translates to a 1592 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1593 */ 1594 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) { 1595 rflush(vfsp, cr); 1596 mutex_exit(&nfs3_syncbusy); 1597 } 1598 return (0); 1599 } 1600 1601 /* ARGSUSED */ 1602 static int 1603 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1604 { 1605 int error; 1606 nfs_fh3 fh; 1607 vnode_t *vp; 1608 struct vattr va; 1609 1610 if (fidp->fid_len > NFS3_FHSIZE) { 1611 *vpp = NULL; 1612 return (ESTALE); 1613 } 1614 1615 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1616 return (EPERM); 1617 fh.fh3_length = fidp->fid_len; 1618 bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length); 1619 1620 vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1621 1622 if (VTOR(vp)->r_flags & RSTALE) { 1623 VN_RELE(vp); 1624 *vpp = NULL; 1625 return (ENOENT); 1626 } 1627 1628 if (vp->v_type == VNON) { 1629 va.va_mask = AT_ALL; 1630 error = nfs3getattr(vp, &va, CRED()); 1631 if (error) { 1632 VN_RELE(vp); 1633 *vpp = NULL; 1634 return (error); 1635 } 1636 vp->v_type = va.va_type; 1637 } 1638 1639 *vpp = vp; 1640 1641 return (0); 1642 } 1643 1644 /* ARGSUSED */ 1645 static int 1646 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why) 1647 { 1648 vnode_t *rtvp; 1649 char root_hostname[SYS_NMLN+1]; 1650 struct servinfo *svp; 1651 int error; 1652 int vfsflags; 1653 size_t size; 1654 char *root_path; 1655 struct pathname pn; 1656 char *name; 1657 cred_t *cr; 1658 struct nfs_args args; /* nfs mount arguments */ 1659 static char token[10]; 1660 1661 bzero(&args, sizeof (args)); 1662 1663 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1664 clkset(-1L); /* hack for now - until we get time svc? */ 1665 1666 if (why == ROOT_REMOUNT) { 1667 /* 1668 * Shouldn't happen. 1669 */ 1670 panic("nfs3_mountroot: why == ROOT_REMOUNT"); 1671 } 1672 1673 if (why == ROOT_UNMOUNT) { 1674 /* 1675 * Nothing to do for NFS. 1676 */ 1677 return (0); 1678 } 1679 1680 /* 1681 * why == ROOT_INIT 1682 */ 1683 1684 name = token; 1685 *name = 0; 1686 getfsname("root", name, sizeof (token)); 1687 1688 pn_alloc(&pn); 1689 root_path = pn.pn_path; 1690 1691 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1692 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1693 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1694 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1695 1696 /* 1697 * Get server address 1698 * Get the root fhandle 1699 * Get server's transport 1700 * Get server's hostname 1701 * Get options 1702 */ 1703 args.addr = &svp->sv_addr; 1704 args.fh = (char *)&svp->sv_fhandle; 1705 args.knconf = svp->sv_knconf; 1706 args.hostname = root_hostname; 1707 vfsflags = 0; 1708 if (error = mount_root(*name ? name : "root", root_path, NFS_V3, 1709 &args, &vfsflags)) { 1710 if (error == EPROTONOSUPPORT) 1711 nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: " 1712 "mount_root failed: server doesn't support NFS V3"); 1713 else 1714 nfs_cmn_err(error, CE_WARN, 1715 "nfs3_mountroot: mount_root failed: %m"); 1716 sv_free(svp); 1717 pn_free(&pn); 1718 return (error); 1719 } 1720 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1721 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1722 (void) strcpy(svp->sv_hostname, root_hostname); 1723 1724 /* 1725 * Force root partition to always be mounted with AUTH_UNIX for now 1726 */ 1727 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1728 svp->sv_secdata->secmod = AUTH_UNIX; 1729 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1730 svp->sv_secdata->data = NULL; 1731 1732 cr = crgetcred(); 1733 rtvp = NULL; 1734 1735 error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1736 1737 crfree(cr); 1738 1739 if (error) { 1740 pn_free(&pn); 1741 sv_free(svp); 1742 return (error); 1743 } 1744 1745 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1746 if (error) { 1747 nfs_cmn_err(error, CE_WARN, 1748 "nfs3_mountroot: invalid root mount options"); 1749 pn_free(&pn); 1750 goto errout; 1751 } 1752 1753 (void) vfs_lock_wait(vfsp); 1754 vfs_add(NULL, vfsp, vfsflags); 1755 vfs_unlock(vfsp); 1756 1757 size = strlen(svp->sv_hostname); 1758 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1759 rootfs.bo_name[size] = ':'; 1760 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1761 1762 pn_free(&pn); 1763 1764 errout: 1765 if (error) { 1766 sv_free(svp); 1767 nfs_async_stop(vfsp); 1768 nfs_async_manager_stop(vfsp); 1769 } 1770 1771 if (rtvp != NULL) 1772 VN_RELE(rtvp); 1773 1774 return (error); 1775 } 1776 1777 /* 1778 * Initialization routine for VFS routines. Should only be called once 1779 */ 1780 int 1781 nfs3_vfsinit(void) 1782 { 1783 mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1784 return (0); 1785 } 1786 1787 void 1788 nfs3_vfsfini(void) 1789 { 1790 mutex_destroy(&nfs3_syncbusy); 1791 } 1792 1793 void 1794 nfs3_freevfs(vfs_t *vfsp) 1795 { 1796 mntinfo_t *mi; 1797 servinfo_t *svp; 1798 1799 /* free up the resources */ 1800 mi = VFTOMI(vfsp); 1801 svp = mi->mi_servers; 1802 mi->mi_servers = mi->mi_curr_serv = NULL; 1803 sv_free(svp); 1804 1805 /* 1806 * By this time we should have already deleted the 1807 * mi kstats in the unmount code. If they are still around 1808 * somethings wrong 1809 */ 1810 ASSERT(mi->mi_io_kstats == NULL); 1811 nfs_free_mi(mi); 1812 } 1813