1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 27 * All rights reserved. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/systm.h> 33 #include <sys/cred.h> 34 #include <sys/vfs.h> 35 #include <sys/vfs_opreg.h> 36 #include <sys/vnode.h> 37 #include <sys/pathname.h> 38 #include <sys/sysmacros.h> 39 #include <sys/kmem.h> 40 #include <sys/mkdev.h> 41 #include <sys/mount.h> 42 #include <sys/mntent.h> 43 #include <sys/statvfs.h> 44 #include <sys/errno.h> 45 #include <sys/debug.h> 46 #include <sys/cmn_err.h> 47 #include <sys/utsname.h> 48 #include <sys/bootconf.h> 49 #include <sys/modctl.h> 50 #include <sys/acl.h> 51 #include <sys/flock.h> 52 #include <sys/policy.h> 53 #include <sys/zone.h> 54 #include <sys/class.h> 55 #include <sys/socket.h> 56 #include <sys/netconfig.h> 57 #include <sys/tsol/tnet.h> 58 59 #include <rpc/types.h> 60 #include <rpc/auth.h> 61 #include <rpc/clnt.h> 62 63 #include <nfs/nfs.h> 64 #include <nfs/nfs_clnt.h> 65 #include <nfs/rnode.h> 66 #include <nfs/mount.h> 67 #include <nfs/nfs_acl.h> 68 69 #include <fs/fs_subr.h> 70 71 /* 72 * From rpcsec module (common/rpcsec). 73 */ 74 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 75 extern void sec_clnt_freeinfo(struct sec_data *); 76 77 /* 78 * The order and contents of this structure must be kept in sync with that of 79 * rfsreqcnt_v3_tmpl in nfs_stats.c 80 */ 81 static char *rfsnames_v3[] = { 82 "null", "getattr", "setattr", "lookup", "access", "readlink", "read", 83 "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir", 84 "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo", 85 "pathconf", "commit" 86 }; 87 88 /* 89 * This table maps from NFS protocol number into call type. 90 * Zero means a "Lookup" type call 91 * One means a "Read" type call 92 * Two means a "Write" type call 93 * This is used to select a default time-out. 94 */ 95 static uchar_t call_type_v3[] = { 96 0, 0, 1, 0, 0, 0, 1, 97 2, 2, 2, 2, 2, 2, 2, 98 2, 2, 1, 2, 0, 0, 0, 99 2 }; 100 101 /* 102 * Similar table, but to determine which timer to use 103 * (only real reads and writes!) 104 */ 105 static uchar_t timer_type_v3[] = { 106 0, 0, 0, 0, 0, 0, 1, 107 2, 0, 0, 0, 0, 0, 0, 108 0, 0, 1, 1, 0, 0, 0, 109 0 }; 110 111 /* 112 * This table maps from NFS protocol number into a call type 113 * for the semisoft mount option. 114 * Zero means do not repeat operation. 115 * One means repeat. 116 */ 117 static uchar_t ss_call_type_v3[] = { 118 0, 0, 1, 0, 0, 0, 0, 119 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 0, 0, 0, 0, 0, 121 1 }; 122 123 /* 124 * nfs3 vfs operations. 125 */ 126 static int nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 127 static int nfs3_unmount(vfs_t *, int, cred_t *); 128 static int nfs3_root(vfs_t *, vnode_t **); 129 static int nfs3_statvfs(vfs_t *, struct statvfs64 *); 130 static int nfs3_sync(vfs_t *, short, cred_t *); 131 static int nfs3_vget(vfs_t *, vnode_t **, fid_t *); 132 static int nfs3_mountroot(vfs_t *, whymountroot_t); 133 static void nfs3_freevfs(vfs_t *); 134 135 static int nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *, 136 int, cred_t *, zone_t *); 137 138 /* 139 * Initialize the vfs structure 140 */ 141 142 static int nfs3fstyp; 143 vfsops_t *nfs3_vfsops; 144 145 /* 146 * Debug variable to check for rdma based 147 * transport startup and cleanup. Controlled 148 * through /etc/system. Off by default. 149 */ 150 extern int rdma_debug; 151 152 int 153 nfs3init(int fstyp, char *name) 154 { 155 static const fs_operation_def_t nfs3_vfsops_template[] = { 156 VFSNAME_MOUNT, { .vfs_mount = nfs3_mount }, 157 VFSNAME_UNMOUNT, { .vfs_unmount = nfs3_unmount }, 158 VFSNAME_ROOT, { .vfs_root = nfs3_root }, 159 VFSNAME_STATVFS, { .vfs_statvfs = nfs3_statvfs }, 160 VFSNAME_SYNC, { .vfs_sync = nfs3_sync }, 161 VFSNAME_VGET, { .vfs_vget = nfs3_vget }, 162 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs3_mountroot }, 163 VFSNAME_FREEVFS, { .vfs_freevfs = nfs3_freevfs }, 164 NULL, NULL 165 }; 166 int error; 167 168 error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops); 169 if (error != 0) { 170 zcmn_err(GLOBAL_ZONEID, CE_WARN, 171 "nfs3init: bad vfs ops template"); 172 return (error); 173 } 174 175 error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops); 176 if (error != 0) { 177 (void) vfs_freevfsops_by_type(fstyp); 178 zcmn_err(GLOBAL_ZONEID, CE_WARN, 179 "nfs3init: bad vnode ops template"); 180 return (error); 181 } 182 183 nfs3fstyp = fstyp; 184 185 return (0); 186 } 187 188 void 189 nfs3fini(void) 190 { 191 } 192 193 static void 194 nfs3_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 195 { 196 197 if (fh) 198 kmem_free(fh, sizeof (*fh)); 199 200 if (nargs->knconf) { 201 if (nargs->knconf->knc_protofmly) 202 kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE); 203 if (nargs->knconf->knc_proto) 204 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 205 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 206 nargs->knconf = NULL; 207 } 208 209 if (nargs->fh) { 210 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 211 nargs->fh = NULL; 212 } 213 214 if (nargs->hostname) { 215 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 216 nargs->hostname = NULL; 217 } 218 219 if (nargs->addr) { 220 if (nargs->addr->buf) { 221 ASSERT(nargs->addr->len); 222 kmem_free(nargs->addr->buf, nargs->addr->len); 223 } 224 kmem_free(nargs->addr, sizeof (struct netbuf)); 225 nargs->addr = NULL; 226 } 227 228 if (nargs->syncaddr) { 229 ASSERT(nargs->syncaddr->len); 230 if (nargs->syncaddr->buf) { 231 ASSERT(nargs->syncaddr->len); 232 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 233 } 234 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 235 nargs->syncaddr = NULL; 236 } 237 238 if (nargs->netname) { 239 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 240 nargs->netname = NULL; 241 } 242 243 if (nargs->nfs_ext_u.nfs_extA.secdata) { 244 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata); 245 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 246 } 247 } 248 249 static int 250 nfs3_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 251 { 252 253 int error; 254 size_t nlen; /* length of netname */ 255 size_t hlen; /* length of hostname */ 256 char netname[MAXNETNAMELEN+1]; /* server's netname */ 257 struct netbuf addr; /* server's address */ 258 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 259 struct knetconfig *knconf; /* transport knetconfig structure */ 260 struct sec_data *secdata = NULL; /* security data */ 261 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 262 STRUCT_DECL(knetconfig, knconf_tmp); 263 STRUCT_DECL(netbuf, addr_tmp); 264 int flags; 265 char *p, *pf; 266 char *userbufptr; 267 268 269 bzero(nargs, sizeof (*nargs)); 270 271 STRUCT_INIT(args, get_udatamodel()); 272 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 273 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args)))) 274 return (EFAULT); 275 276 nargs->wsize = STRUCT_FGET(args, wsize); 277 nargs->rsize = STRUCT_FGET(args, rsize); 278 nargs->timeo = STRUCT_FGET(args, timeo); 279 nargs->retrans = STRUCT_FGET(args, retrans); 280 nargs->acregmin = STRUCT_FGET(args, acregmin); 281 nargs->acregmax = STRUCT_FGET(args, acregmax); 282 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 283 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 284 285 flags = STRUCT_FGET(args, flags); 286 nargs->flags = flags; 287 288 addr.buf = NULL; 289 syncaddr.buf = NULL; 290 291 /* 292 * Allocate space for a knetconfig structure and 293 * its strings and copy in from user-land. 294 */ 295 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 296 STRUCT_INIT(knconf_tmp, get_udatamodel()); 297 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 298 STRUCT_SIZE(knconf_tmp))) { 299 kmem_free(knconf, sizeof (*knconf)); 300 return (EFAULT); 301 } 302 303 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 304 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 305 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 306 if (get_udatamodel() != DATAMODEL_LP64) { 307 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 308 } else { 309 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 310 } 311 312 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 313 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 314 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 315 if (error) { 316 kmem_free(pf, KNC_STRSIZE); 317 kmem_free(p, KNC_STRSIZE); 318 kmem_free(knconf, sizeof (*knconf)); 319 return (error); 320 } 321 322 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 323 if (error) { 324 kmem_free(pf, KNC_STRSIZE); 325 kmem_free(p, KNC_STRSIZE); 326 kmem_free(knconf, sizeof (*knconf)); 327 return (error); 328 } 329 330 331 knconf->knc_protofmly = pf; 332 knconf->knc_proto = p; 333 334 nargs->knconf = knconf; 335 /* 336 * Get server address 337 */ 338 STRUCT_INIT(addr_tmp, get_udatamodel()); 339 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 340 STRUCT_SIZE(addr_tmp))) { 341 error = EFAULT; 342 goto errout; 343 } 344 345 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 346 userbufptr = STRUCT_FGETP(addr_tmp, buf); 347 addr.len = STRUCT_FGET(addr_tmp, len); 348 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 349 addr.maxlen = addr.len; 350 if (copyin(userbufptr, addr.buf, addr.len)) { 351 kmem_free(addr.buf, addr.len); 352 error = EFAULT; 353 goto errout; 354 } 355 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 356 357 /* 358 * Get the root fhandle 359 */ 360 361 if (copyin(STRUCT_FGETP(args, fh), fh, sizeof (nfs_fhandle))) { 362 error = EFAULT; 363 goto errout; 364 } 365 366 367 /* 368 * Get server's hostname 369 */ 370 if (flags & NFSMNT_HOSTNAME) { 371 error = copyinstr(STRUCT_FGETP(args, hostname), netname, 372 sizeof (netname), &hlen); 373 if (error) 374 goto errout; 375 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 376 (void) strcpy(nargs->hostname, netname); 377 } else { 378 nargs->hostname = NULL; 379 } 380 381 382 /* 383 * If there are syncaddr and netname data, load them in. This is 384 * to support data needed for NFSV4 when AUTH_DH is the negotiated 385 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 386 */ 387 netname[0] = '\0'; 388 if (flags & NFSMNT_SECURE) { 389 if (STRUCT_FGETP(args, syncaddr) == NULL) { 390 error = EINVAL; 391 goto errout; 392 } 393 /* get syncaddr */ 394 STRUCT_INIT(addr_tmp, get_udatamodel()); 395 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 396 STRUCT_SIZE(addr_tmp))) { 397 error = EINVAL; 398 goto errout; 399 } 400 userbufptr = STRUCT_FGETP(addr_tmp, buf); 401 syncaddr.len = STRUCT_FGET(addr_tmp, len); 402 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 403 syncaddr.maxlen = syncaddr.len; 404 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 405 kmem_free(syncaddr.buf, syncaddr.len); 406 error = EFAULT; 407 goto errout; 408 } 409 410 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 411 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 412 413 ASSERT(STRUCT_FGETP(args, netname)); 414 415 if (copyinstr(STRUCT_FGETP(args, netname), netname, 416 sizeof (netname), &nlen)) { 417 error = EFAULT; 418 goto errout; 419 } 420 421 netname[nlen] = '\0'; 422 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 423 (void) strcpy(nargs->netname, netname); 424 } 425 426 /* 427 * Get the extention data which has the security data structure. 428 * This includes data for AUTH_SYS as well. 429 */ 430 if (flags & NFSMNT_NEWARGS) { 431 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 432 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 433 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 434 /* 435 * Indicating the application is using the new 436 * sec_data structure to pass in the security 437 * data. 438 */ 439 if (STRUCT_FGETP(args, 440 nfs_ext_u.nfs_extA.secdata) != NULL) { 441 error = sec_clnt_loadinfo( 442 (struct sec_data *)STRUCT_FGETP(args, 443 nfs_ext_u.nfs_extA.secdata), &secdata, 444 get_udatamodel()); 445 } 446 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 447 } 448 } 449 450 if (error) 451 goto errout; 452 453 /* 454 * Failover support: 455 * 456 * We may have a linked list of nfs_args structures, 457 * which means the user is looking for failover. If 458 * the mount is either not "read-only" or "soft", 459 * we want to bail out with EINVAL. 460 */ 461 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 462 nargs->nfs_ext_u.nfs_extB.next = 463 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 464 465 errout: 466 if (error) 467 nfs3_free_args(nargs, fh); 468 469 return (error); 470 } 471 472 473 /* 474 * nfs mount vfsop 475 * Set up mount info record and attach it to vfs struct. 476 */ 477 static int 478 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 479 { 480 struct nfs_args *args = NULL; 481 nfs_fhandle *fhandle = NULL; 482 char *data = uap->dataptr; 483 int error; 484 vnode_t *rtvp; /* the server's root */ 485 mntinfo_t *mi; /* mount info, pointed at by vfs */ 486 size_t nlen; /* length of netname */ 487 struct knetconfig *knconf; /* transport knetconfig structure */ 488 struct knetconfig *rdma_knconf; /* rdma transport structure */ 489 rnode_t *rp; 490 struct servinfo *svp; /* nfs server info */ 491 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 492 struct servinfo *svp_head; /* first nfs server info */ 493 struct servinfo *svp_2ndlast; /* 2nd last in server info list */ 494 struct sec_data *secdata; /* security data */ 495 int flags, addr_type; 496 zone_t *zone = nfs_zone(); 497 zone_t *mntzone = NULL; 498 499 500 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 501 return (EPERM); 502 503 if (mvp->v_type != VDIR) 504 return (ENOTDIR); 505 506 /* 507 * get arguments 508 * 509 * nfs_args is now versioned and is extensible, so 510 * uap->datalen might be different from sizeof (args) 511 * in a compatible situation. 512 */ 513 514 more: 515 516 if (!(uap->flags & MS_SYSSPACE)) { 517 if (args == NULL) 518 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 519 else { 520 nfs3_free_args(args, fhandle); 521 fhandle = NULL; 522 } 523 if (fhandle == NULL) 524 fhandle = kmem_alloc(sizeof (nfs_fhandle), KM_SLEEP); 525 error = nfs3_copyin(data, uap->datalen, args, fhandle); 526 if (error) { 527 if (args) 528 kmem_free(args, sizeof (*args)); 529 return (error); 530 } 531 } else { 532 args = (struct nfs_args *)data; 533 fhandle = (nfs_fhandle *)args->fh; 534 } 535 536 537 flags = args->flags; 538 539 if (uap->flags & MS_REMOUNT) { 540 size_t n; 541 char name[FSTYPSZ]; 542 543 if (uap->flags & MS_SYSSPACE) { 544 error = copystr(uap->fstype, name, FSTYPSZ, &n); 545 } else { 546 nfs3_free_args(args, fhandle); 547 kmem_free(args, sizeof (*args)); 548 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 549 } 550 if (error) { 551 if (error == ENAMETOOLONG) 552 return (EINVAL); 553 return (error); 554 } 555 556 /* 557 * This check is to ensure that the request is a 558 * genuine nfs remount request. 559 */ 560 561 if (strncmp(name, "nfs", 3) != 0) 562 return (EINVAL); 563 564 /* 565 * If the request changes the locking type, disallow the 566 * remount, 567 * because it's questionable whether we can transfer the 568 * locking state correctly. 569 */ 570 571 if ((mi = VFTOMI(vfsp)) != NULL) { 572 uint_t new_mi_llock; 573 uint_t old_mi_llock; 574 575 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 576 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 577 if (old_mi_llock != new_mi_llock) 578 return (EBUSY); 579 } 580 return (0); 581 } 582 583 mutex_enter(&mvp->v_lock); 584 if (!(uap->flags & MS_OVERLAY) && 585 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 586 mutex_exit(&mvp->v_lock); 587 if (!(uap->flags & MS_SYSSPACE)) { 588 nfs3_free_args(args, fhandle); 589 kmem_free(args, sizeof (*args)); 590 } 591 return (EBUSY); 592 } 593 mutex_exit(&mvp->v_lock); 594 595 /* make sure things are zeroed for errout: */ 596 rtvp = NULL; 597 mi = NULL; 598 secdata = NULL; 599 600 /* 601 * A valid knetconfig structure is required. 602 */ 603 if (!(flags & NFSMNT_KNCONF)) { 604 if (!(uap->flags & MS_SYSSPACE)) { 605 nfs3_free_args(args, fhandle); 606 kmem_free(args, sizeof (*args)); 607 } 608 return (EINVAL); 609 } 610 611 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 612 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 613 if (!(uap->flags & MS_SYSSPACE)) { 614 nfs3_free_args(args, fhandle); 615 kmem_free(args, sizeof (*args)); 616 } 617 return (EINVAL); 618 } 619 620 /* 621 * Allocate a servinfo struct. 622 */ 623 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 624 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 625 if (svp_tail) { 626 svp_2ndlast = svp_tail; 627 svp_tail->sv_next = svp; 628 } else { 629 svp_head = svp; 630 svp_2ndlast = svp; 631 } 632 633 svp_tail = svp; 634 635 svp->sv_knconf = args->knconf; 636 args->knconf = NULL; 637 638 if (args->addr == NULL || args->addr->buf == NULL) { 639 error = EINVAL; 640 goto errout; 641 } 642 643 svp->sv_addr.maxlen = args->addr->maxlen; 644 svp->sv_addr.len = args->addr->len; 645 svp->sv_addr.buf = args->addr->buf; 646 args->addr->buf = NULL; 647 648 /* 649 * Check the root fhandle length 650 */ 651 ASSERT(fhandle); 652 if (fhandle->fh_len > NFS3_FHSIZE || fhandle->fh_len == 0) { 653 error = EINVAL; 654 #ifdef DEBUG 655 zcmn_err(getzoneid(), CE_WARN, 656 "nfs3_mount: got an invalid fhandle. fh_len = %d", 657 fhandle->fh_len); 658 fhandle->fh_len = NFS_FHANDLE_LEN; 659 nfs_printfhandle(fhandle); 660 #endif 661 goto errout; 662 } 663 664 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 665 svp->sv_fhandle.fh_len = fhandle->fh_len; 666 667 /* 668 * Get server's hostname 669 */ 670 if (flags & NFSMNT_HOSTNAME) { 671 if (args->hostname == NULL) { 672 error = EINVAL; 673 goto errout; 674 } 675 svp->sv_hostnamelen = strlen(args->hostname) + 1; 676 svp->sv_hostname = args->hostname; 677 args->hostname = NULL; 678 } else { 679 char *p = "unknown-host"; 680 svp->sv_hostnamelen = strlen(p) + 1; 681 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 682 (void) strcpy(svp->sv_hostname, p); 683 } 684 685 686 /* 687 * RDMA MOUNT SUPPORT FOR NFS v3: 688 * Establish, is it possible to use RDMA, if so overload the 689 * knconf with rdma specific knconf and free the orignal. 690 */ 691 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 692 /* 693 * Determine the addr type for RDMA, IPv4 or v6. 694 */ 695 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 696 addr_type = AF_INET; 697 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 698 addr_type = AF_INET6; 699 700 if (rdma_reachable(addr_type, &svp->sv_addr, 701 &rdma_knconf) == 0) { 702 /* 703 * If successful, hijack the orignal knconf and 704 * replace with a new one, depending on the flags. 705 */ 706 svp->sv_origknconf = svp->sv_knconf; 707 svp->sv_knconf = rdma_knconf; 708 knconf = rdma_knconf; 709 } else { 710 if (flags & NFSMNT_TRYRDMA) { 711 #ifdef DEBUG 712 if (rdma_debug) 713 zcmn_err(getzoneid(), CE_WARN, 714 "no RDMA onboard, revert\n"); 715 #endif 716 } 717 718 if (flags & NFSMNT_DORDMA) { 719 /* 720 * If proto=rdma is specified and no RDMA 721 * path to this server is avialable then 722 * ditch this server. 723 * This is not included in the mountable 724 * server list or the replica list. 725 * Check if more servers are specified; 726 * Failover case, otherwise bail out of mount. 727 */ 728 if (args->nfs_args_ext == NFS_ARGS_EXTB && 729 args->nfs_ext_u.nfs_extB.next != NULL) { 730 data = (char *) 731 args->nfs_ext_u.nfs_extB.next; 732 if (uap->flags & MS_RDONLY && 733 !(flags & NFSMNT_SOFT)) { 734 if (svp_head->sv_next == NULL) { 735 svp_tail = NULL; 736 svp_2ndlast = NULL; 737 sv_free(svp_head); 738 goto more; 739 } else { 740 svp_tail = svp_2ndlast; 741 svp_2ndlast->sv_next = 742 NULL; 743 sv_free(svp); 744 goto more; 745 } 746 } 747 } else { 748 /* 749 * This is the last server specified 750 * in the nfs_args list passed down 751 * and its not rdma capable. 752 */ 753 if (svp_head->sv_next == NULL) { 754 /* 755 * Is this the only one 756 */ 757 error = EINVAL; 758 #ifdef DEBUG 759 if (rdma_debug) 760 zcmn_err(getzoneid(), 761 CE_WARN, 762 "No RDMA srv"); 763 #endif 764 goto errout; 765 } else { 766 /* 767 * There is list, since some 768 * servers specified before 769 * this passed all requirements 770 */ 771 svp_tail = svp_2ndlast; 772 svp_2ndlast->sv_next = NULL; 773 sv_free(svp); 774 goto proceed; 775 } 776 } 777 } 778 } 779 } 780 781 /* 782 * Get the extention data which has the new security data structure. 783 */ 784 if (flags & NFSMNT_NEWARGS) { 785 switch (args->nfs_args_ext) { 786 case NFS_ARGS_EXTA: 787 case NFS_ARGS_EXTB: 788 /* 789 * Indicating the application is using the new 790 * sec_data structure to pass in the security 791 * data. 792 */ 793 secdata = args->nfs_ext_u.nfs_extA.secdata; 794 if (args->nfs_ext_u.nfs_extA.secdata == NULL) { 795 error = EINVAL; 796 } else { 797 /* 798 * Need to validate the flavor here if 799 * sysspace, userspace was already 800 * validate from the nfs_copyin function. 801 */ 802 switch (secdata->rpcflavor) { 803 case AUTH_NONE: 804 case AUTH_UNIX: 805 case AUTH_LOOPBACK: 806 case AUTH_DES: 807 case RPCSEC_GSS: 808 args->nfs_ext_u.nfs_extA.secdata = NULL; 809 break; 810 default: 811 error = EINVAL; 812 goto errout; 813 } 814 } 815 break; 816 817 default: 818 error = EINVAL; 819 break; 820 } 821 } else if (flags & NFSMNT_SECURE) { 822 /* 823 * Keep this for backward compatibility to support 824 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 825 */ 826 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 827 error = EINVAL; 828 goto errout; 829 } 830 /* 831 * Move security related data to the sec_data structure. 832 */ 833 { 834 dh_k4_clntdata_t *data; 835 char *pf, *p; 836 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 837 if (flags & NFSMNT_RPCTIMESYNC) 838 secdata->flags |= AUTH_F_RPCTIMESYNC; 839 data = kmem_alloc(sizeof (*data), KM_SLEEP); 840 bcopy(args->syncaddr, &data->syncaddr, 841 sizeof (*args->syncaddr)); 842 843 /* 844 * duplicate the knconf information for the 845 * new opaque data. 846 */ 847 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 848 *data->knconf = *knconf; 849 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 850 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 851 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 852 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 853 data->knconf->knc_protofmly = pf; 854 data->knconf->knc_proto = p; 855 856 nlen = strlen(args->hostname) + 1; 857 /* move server netname to the sec_data structure */ 858 if (nlen != 0) { 859 data->netname = kmem_alloc(nlen, KM_SLEEP); 860 bcopy(args->hostname, data->netname, nlen); 861 data->netnamelen = nlen; 862 } 863 secdata->secmod = secdata->rpcflavor = AUTH_DES; 864 secdata->data = (caddr_t)data; 865 } 866 } else { 867 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 868 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 869 secdata->data = NULL; 870 } 871 872 svp->sv_secdata = secdata; 873 if (error) 874 goto errout; 875 876 /* 877 * See bug 1180236. 878 * If mount secure failed, we will fall back to AUTH_NONE 879 * and try again. nfs3rootvp() will turn this back off. 880 * 881 * The NFS Version 3 mount uses the FSINFO and GETATTR 882 * procedures. The server should not care if these procedures 883 * have the proper security flavor, so if mount retries using 884 * AUTH_NONE that does not require a credential setup for root 885 * then the automounter would work without requiring root to be 886 * keylogged into AUTH_DES. 887 */ 888 if (secdata->rpcflavor != AUTH_UNIX && 889 secdata->rpcflavor != AUTH_LOOPBACK) 890 secdata->flags |= AUTH_F_TRYNONE; 891 892 /* 893 * Failover support: 894 * 895 * We may have a linked list of nfs_args structures, 896 * which means the user is looking for failover. If 897 * the mount is either not "read-only" or "soft", 898 * we want to bail out with EINVAL. 899 */ 900 if (args->nfs_args_ext == NFS_ARGS_EXTB && 901 args->nfs_ext_u.nfs_extB.next != NULL) { 902 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 903 data = (char *)args->nfs_ext_u.nfs_extB.next; 904 goto more; 905 } 906 error = EINVAL; 907 goto errout; 908 } 909 910 /* 911 * Determine the zone we're being mounted into. 912 */ 913 zone_hold(mntzone = zone); /* start with this assumption */ 914 if (getzoneid() == GLOBAL_ZONEID) { 915 zone_rele(mntzone); 916 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 917 ASSERT(mntzone != NULL); 918 if (mntzone != zone) { 919 error = EBUSY; 920 goto errout; 921 } 922 } 923 924 if (is_system_labeled()) { 925 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 926 svp->sv_knconf, cr); 927 928 if (error > 0) 929 goto errout; 930 931 if (error == -1) { 932 /* change mount to read-only to prevent write-down */ 933 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 934 } 935 } 936 937 /* 938 * Stop the mount from going any further if the zone is going away. 939 */ 940 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 941 error = EBUSY; 942 goto errout; 943 } 944 945 /* 946 * Get root vnode. 947 */ 948 proceed: 949 error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 950 951 if (error) 952 goto errout; 953 954 /* 955 * Set option fields in the mount info record 956 */ 957 mi = VTOMI(rtvp); 958 959 if (svp_head->sv_next) 960 mi->mi_flags |= MI_LLOCK; 961 962 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 963 964 errout: 965 if (rtvp != NULL) { 966 if (error) { 967 rp = VTOR(rtvp); 968 if (rp->r_flags & RHASHED) 969 rp_rmhash(rp); 970 } 971 VN_RELE(rtvp); 972 } 973 974 if (error) { 975 sv_free(svp_head); 976 if (mi != NULL) { 977 nfs_async_stop(vfsp); 978 nfs_async_manager_stop(vfsp); 979 if (mi->mi_io_kstats) { 980 kstat_delete(mi->mi_io_kstats); 981 mi->mi_io_kstats = NULL; 982 } 983 if (mi->mi_ro_kstats) { 984 kstat_delete(mi->mi_ro_kstats); 985 mi->mi_ro_kstats = NULL; 986 } 987 nfs_free_mi(mi); 988 } 989 } 990 991 992 if (!(uap->flags & MS_SYSSPACE)) { 993 nfs3_free_args(args, fhandle); 994 kmem_free(args, sizeof (*args)); 995 } 996 997 if (mntzone != NULL) 998 zone_rele(mntzone); 999 1000 return (error); 1001 } 1002 1003 static int nfs3_dynamic = 0; /* global variable to enable dynamic retrans. */ 1004 static ushort_t nfs3_max_threads = 8; /* max number of active async threads */ 1005 uint_t nfs3_bsize = 32 * 1024; /* client `block' size */ 1006 static uint_t nfs3_async_clusters = 1; /* # of reqs from each async queue */ 1007 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO; 1008 1009 static int 1010 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1011 int flags, cred_t *cr, zone_t *zone) 1012 { 1013 vnode_t *rtvp; 1014 mntinfo_t *mi; 1015 dev_t nfs_dev; 1016 struct vattr va; 1017 struct FSINFO3args args; 1018 struct FSINFO3res res; 1019 int error; 1020 int douprintf; 1021 rnode_t *rp; 1022 int i; 1023 uint_t max_transfer_size; 1024 struct nfs_stats *nfsstatsp; 1025 cred_t *lcr = NULL, *tcr = cr; 1026 1027 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1028 ASSERT(nfsstatsp != NULL); 1029 1030 ASSERT(nfs_zone() == zone); 1031 /* 1032 * Create a mount record and link it to the vfs struct. 1033 */ 1034 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1035 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1036 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1037 mi->mi_flags = MI_ACL | MI_EXTATTR; 1038 if (!(flags & NFSMNT_SOFT)) 1039 mi->mi_flags |= MI_HARD; 1040 if ((flags & NFSMNT_SEMISOFT)) 1041 mi->mi_flags |= MI_SEMISOFT; 1042 if ((flags & NFSMNT_NOPRINT)) 1043 mi->mi_flags |= MI_NOPRINT; 1044 if (flags & NFSMNT_INT) 1045 mi->mi_flags |= MI_INT; 1046 mi->mi_retrans = NFS_RETRIES; 1047 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1048 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1049 mi->mi_timeo = nfs3_cots_timeo; 1050 else 1051 mi->mi_timeo = NFS_TIMEO; 1052 mi->mi_prog = NFS_PROGRAM; 1053 mi->mi_vers = NFS_V3; 1054 mi->mi_rfsnames = rfsnames_v3; 1055 mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr; 1056 mi->mi_call_type = call_type_v3; 1057 mi->mi_ss_call_type = ss_call_type_v3; 1058 mi->mi_timer_type = timer_type_v3; 1059 mi->mi_aclnames = aclnames_v3; 1060 mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr; 1061 mi->mi_acl_call_type = acl_call_type_v3; 1062 mi->mi_acl_ss_call_type = acl_ss_call_type_v3; 1063 mi->mi_acl_timer_type = acl_timer_type_v3; 1064 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1065 mi->mi_servers = svp; 1066 mi->mi_curr_serv = svp; 1067 mi->mi_acregmin = SEC2HR(ACREGMIN); 1068 mi->mi_acregmax = SEC2HR(ACREGMAX); 1069 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1070 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1071 1072 if (nfs3_dynamic) 1073 mi->mi_flags |= MI_DYNAMIC; 1074 1075 if (flags & NFSMNT_DIRECTIO) 1076 mi->mi_flags |= MI_DIRECTIO; 1077 1078 mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL); 1079 list_create(&mi->mi_rnodes, sizeof (rnode_t), 1080 offsetof(rnode_t, r_mi_link)); 1081 1082 /* 1083 * Make a vfs struct for nfs. We do this here instead of below 1084 * because rtvp needs a vfs before we can do a getattr on it. 1085 * 1086 * Assign a unique device id to the mount 1087 */ 1088 mutex_enter(&nfs_minor_lock); 1089 do { 1090 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1091 nfs_dev = makedevice(nfs_major, nfs_minor); 1092 } while (vfs_devismounted(nfs_dev)); 1093 mutex_exit(&nfs_minor_lock); 1094 1095 vfsp->vfs_dev = nfs_dev; 1096 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp); 1097 vfsp->vfs_data = (caddr_t)mi; 1098 vfsp->vfs_fstype = nfsfstyp; 1099 1100 /* 1101 * Verify that nfs3_bsize tuneable is set to an 1102 * acceptable value. It be a multiple of PAGESIZE or 1103 * file corruption can occur. 1104 */ 1105 if (nfs3_bsize & PAGEOFFSET) 1106 nfs3_bsize &= PAGEMASK; 1107 if (nfs3_bsize < PAGESIZE) 1108 nfs3_bsize = PAGESIZE; 1109 vfsp->vfs_bsize = nfs3_bsize; 1110 1111 /* 1112 * Initialize fields used to support async putpage operations. 1113 */ 1114 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1115 mi->mi_async_clusters[i] = nfs3_async_clusters; 1116 mi->mi_async_init_clusters = nfs3_async_clusters; 1117 mi->mi_async_curr[NFS_ASYNC_QUEUE] = 1118 mi->mi_async_curr[NFS_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0]; 1119 mi->mi_max_threads = nfs3_max_threads; 1120 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1121 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1122 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE], NULL, CV_DEFAULT, NULL); 1123 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE], NULL, 1124 CV_DEFAULT, NULL); 1125 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1126 1127 mi->mi_vfsp = vfsp; 1128 mi->mi_zone = zone; 1129 zone_init_ref(&mi->mi_zone_ref); 1130 zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS); 1131 nfs_mi_zonelist_add(mi); 1132 1133 /* 1134 * Make the root vnode, use it to get attributes, 1135 * then remake it with the attributes. 1136 */ 1137 rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle, 1138 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1139 1140 /* 1141 * Make the FSINFO calls, primarily at this point to 1142 * determine the transfer size. For client failover, 1143 * we'll want this to be the minimum bid from any 1144 * server, so that we don't overrun stated limits. 1145 * 1146 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1147 * which is only for the mount operation. 1148 */ 1149 1150 mi->mi_tsize = nfs3_tsize(svp->sv_knconf); 1151 mi->mi_stsize = mi->mi_tsize; 1152 1153 mi->mi_curread = nfs3_bsize; 1154 mi->mi_curwrite = mi->mi_curread; 1155 1156 /* 1157 * If the uid is set then set the creds for secure mounts 1158 * by proxy processes such as automountd. 1159 */ 1160 if (svp->sv_secdata->uid != 0 && 1161 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1162 lcr = crdup(cr); 1163 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1164 tcr = lcr; 1165 } 1166 1167 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1168 douprintf = 1; 1169 mi->mi_curr_serv = svp; 1170 max_transfer_size = nfs3_tsize(svp->sv_knconf); 1171 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize); 1172 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize); 1173 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread); 1174 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite); 1175 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle; 1176 1177 error = rfs3call(mi, NFSPROC3_FSINFO, 1178 xdr_nfs_fh3, (caddr_t)&args, 1179 xdr_FSINFO3res, (caddr_t)&res, tcr, 1180 &douprintf, &res.status, 0, NULL); 1181 if (error) 1182 goto bad; 1183 error = geterrno3(res.status); 1184 if (error) 1185 goto bad; 1186 1187 /* get type of root node */ 1188 if (res.resok.obj_attributes.attributes) { 1189 if (res.resok.obj_attributes.attr.type < NF3REG || 1190 res.resok.obj_attributes.attr.type > NF3FIFO) { 1191 #ifdef DEBUG 1192 zcmn_err(getzoneid(), CE_WARN, 1193 "NFS3 server %s returned a bad file type for root", 1194 svp->sv_hostname); 1195 #else 1196 zcmn_err(getzoneid(), CE_WARN, 1197 "NFS server %s returned a bad file type for root", 1198 svp->sv_hostname); 1199 #endif 1200 error = EINVAL; 1201 goto bad; 1202 } else { 1203 if (rtvp->v_type != VNON && rtvp->v_type != 1204 nf3_to_vt[res.resok.obj_attributes.attr. 1205 type]) { 1206 #ifdef DEBUG 1207 zcmn_err(getzoneid(), CE_WARN, 1208 "NFS3 server %s returned a different file type for root", 1209 svp->sv_hostname); 1210 #else 1211 zcmn_err(getzoneid(), CE_WARN, 1212 "NFS server %s returned a different file type for root", 1213 svp->sv_hostname); 1214 #endif 1215 error = EINVAL; 1216 goto bad; 1217 } 1218 rtvp->v_type = 1219 nf3_to_vt[res.resok.obj_attributes.attr. 1220 type]; 1221 } 1222 } 1223 1224 if (res.resok.rtmax != 0) { 1225 mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize); 1226 if (res.resok.rtpref != 0) { 1227 mi->mi_curread = MIN(res.resok.rtpref, 1228 mi->mi_curread); 1229 } else { 1230 mi->mi_curread = MIN(res.resok.rtmax, 1231 mi->mi_curread); 1232 } 1233 } else if (res.resok.rtpref != 0) { 1234 mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize); 1235 mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread); 1236 } else { 1237 #ifdef DEBUG 1238 zcmn_err(getzoneid(), CE_WARN, 1239 "NFS3 server %s returned 0 for read transfer sizes", 1240 svp->sv_hostname); 1241 #else 1242 zcmn_err(getzoneid(), CE_WARN, 1243 "NFS server %s returned 0 for read transfer sizes", 1244 svp->sv_hostname); 1245 #endif 1246 error = EIO; 1247 goto bad; 1248 } 1249 if (res.resok.wtmax != 0) { 1250 mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize); 1251 if (res.resok.wtpref != 0) { 1252 mi->mi_curwrite = MIN(res.resok.wtpref, 1253 mi->mi_curwrite); 1254 } else { 1255 mi->mi_curwrite = MIN(res.resok.wtmax, 1256 mi->mi_curwrite); 1257 } 1258 } else if (res.resok.wtpref != 0) { 1259 mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize); 1260 mi->mi_curwrite = MIN(res.resok.wtpref, 1261 mi->mi_curwrite); 1262 } else { 1263 #ifdef DEBUG 1264 zcmn_err(getzoneid(), CE_WARN, 1265 "NFS3 server %s returned 0 for write transfer sizes", 1266 svp->sv_hostname); 1267 #else 1268 zcmn_err(getzoneid(), CE_WARN, 1269 "NFS server %s returned 0 for write transfer sizes", 1270 svp->sv_hostname); 1271 #endif 1272 error = EIO; 1273 goto bad; 1274 } 1275 1276 /* 1277 * These signal the ability of the server to create 1278 * hard links and symbolic links, so they really 1279 * aren't relevant if there is more than one server. 1280 * We'll set them here, though it probably looks odd. 1281 */ 1282 if (res.resok.properties & FSF3_LINK) 1283 mi->mi_flags |= MI_LINK; 1284 if (res.resok.properties & FSF3_SYMLINK) 1285 mi->mi_flags |= MI_SYMLINK; 1286 1287 /* Pick up smallest non-zero maxfilesize value */ 1288 if (res.resok.maxfilesize) { 1289 if (mi->mi_maxfilesize) { 1290 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize, 1291 res.resok.maxfilesize); 1292 } else 1293 mi->mi_maxfilesize = res.resok.maxfilesize; 1294 } 1295 1296 /* 1297 * AUTH_F_TRYNONE is only for the mount operation, 1298 * so turn it back off. 1299 */ 1300 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1301 } 1302 mi->mi_curr_serv = mi->mi_servers; 1303 1304 /* 1305 * Start the thread responsible for handling async worker threads. 1306 */ 1307 VFS_HOLD(vfsp); /* add reference for thread */ 1308 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1309 vfsp, 0, minclsyspri); 1310 ASSERT(mi->mi_manager_thread != NULL); 1311 1312 /* 1313 * Initialize kstats 1314 */ 1315 nfs_mnt_kstat_init(vfsp); 1316 1317 /* If we didn't get a type, get one now */ 1318 if (rtvp->v_type == VNON) { 1319 va.va_mask = AT_ALL; 1320 1321 error = nfs3getattr(rtvp, &va, tcr); 1322 if (error) 1323 goto bad; 1324 rtvp->v_type = va.va_type; 1325 } 1326 1327 mi->mi_type = rtvp->v_type; 1328 1329 *rtvpp = rtvp; 1330 if (lcr != NULL) 1331 crfree(lcr); 1332 1333 return (0); 1334 bad: 1335 /* 1336 * An error occurred somewhere, need to clean up... 1337 * We need to release our reference to the root vnode and 1338 * destroy the mntinfo struct that we just created. 1339 */ 1340 if (lcr != NULL) 1341 crfree(lcr); 1342 rp = VTOR(rtvp); 1343 if (rp->r_flags & RHASHED) 1344 rp_rmhash(rp); 1345 VN_RELE(rtvp); 1346 nfs_async_stop(vfsp); 1347 nfs_async_manager_stop(vfsp); 1348 if (mi->mi_io_kstats) { 1349 kstat_delete(mi->mi_io_kstats); 1350 mi->mi_io_kstats = NULL; 1351 } 1352 if (mi->mi_ro_kstats) { 1353 kstat_delete(mi->mi_ro_kstats); 1354 mi->mi_ro_kstats = NULL; 1355 } 1356 nfs_free_mi(mi); 1357 *rtvpp = NULL; 1358 return (error); 1359 } 1360 1361 /* 1362 * vfs operations 1363 */ 1364 static int 1365 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1366 { 1367 mntinfo_t *mi; 1368 ushort_t omax; 1369 1370 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1371 return (EPERM); 1372 1373 mi = VFTOMI(vfsp); 1374 if (flag & MS_FORCE) { 1375 1376 vfsp->vfs_flag |= VFS_UNMOUNTED; 1377 1378 /* 1379 * We are about to stop the async manager. 1380 * Let every one know not to schedule any 1381 * more async requests 1382 */ 1383 mutex_enter(&mi->mi_async_lock); 1384 mi->mi_max_threads = 0; 1385 NFS_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 1386 mutex_exit(&mi->mi_async_lock); 1387 1388 /* 1389 * We need to stop the manager thread explicitly; the worker 1390 * threads can time out and exit on their own. 1391 */ 1392 nfs_async_manager_stop(vfsp); 1393 destroy_rtable(vfsp, cr); 1394 if (mi->mi_io_kstats) { 1395 kstat_delete(mi->mi_io_kstats); 1396 mi->mi_io_kstats = NULL; 1397 } 1398 if (mi->mi_ro_kstats) { 1399 kstat_delete(mi->mi_ro_kstats); 1400 mi->mi_ro_kstats = NULL; 1401 } 1402 return (0); 1403 } 1404 /* 1405 * Wait until all asynchronous putpage operations on 1406 * this file system are complete before flushing rnodes 1407 * from the cache. 1408 */ 1409 omax = mi->mi_max_threads; 1410 if (nfs_async_stop_sig(vfsp)) { 1411 return (EINTR); 1412 } 1413 rflush(vfsp, cr); 1414 /* 1415 * If there are any active vnodes on this file system, 1416 * then the file system is busy and can't be umounted. 1417 */ 1418 if (check_rtable(vfsp)) { 1419 mutex_enter(&mi->mi_async_lock); 1420 mi->mi_max_threads = omax; 1421 mutex_exit(&mi->mi_async_lock); 1422 return (EBUSY); 1423 } 1424 /* 1425 * The unmount can't fail from now on; stop the worker thread manager. 1426 */ 1427 nfs_async_manager_stop(vfsp); 1428 /* 1429 * Destroy all rnodes belonging to this file system from the 1430 * rnode hash queues and purge any resources allocated to 1431 * them. 1432 */ 1433 destroy_rtable(vfsp, cr); 1434 if (mi->mi_io_kstats) { 1435 kstat_delete(mi->mi_io_kstats); 1436 mi->mi_io_kstats = NULL; 1437 } 1438 if (mi->mi_ro_kstats) { 1439 kstat_delete(mi->mi_ro_kstats); 1440 mi->mi_ro_kstats = NULL; 1441 } 1442 return (0); 1443 } 1444 1445 /* 1446 * find root of nfs 1447 */ 1448 static int 1449 nfs3_root(vfs_t *vfsp, vnode_t **vpp) 1450 { 1451 mntinfo_t *mi; 1452 vnode_t *vp; 1453 servinfo_t *svp; 1454 rnode_t *rp; 1455 int error = 0; 1456 1457 mi = VFTOMI(vfsp); 1458 1459 if (nfs_zone() != mi->mi_zone) 1460 return (EPERM); 1461 1462 svp = mi->mi_curr_serv; 1463 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1464 mutex_enter(&svp->sv_lock); 1465 svp->sv_flags &= ~SV_ROOT_STALE; 1466 mutex_exit(&svp->sv_lock); 1467 error = ENOENT; 1468 } 1469 1470 vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle, 1471 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1472 1473 /* 1474 * if the SV_ROOT_STALE flag was reset above, reset the 1475 * RSTALE flag if needed and return an error 1476 */ 1477 if (error == ENOENT) { 1478 rp = VTOR(vp); 1479 if (svp && rp->r_flags & RSTALE) { 1480 mutex_enter(&rp->r_statelock); 1481 rp->r_flags &= ~RSTALE; 1482 mutex_exit(&rp->r_statelock); 1483 } 1484 VN_RELE(vp); 1485 return (error); 1486 } 1487 1488 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1489 1490 vp->v_type = mi->mi_type; 1491 1492 *vpp = vp; 1493 1494 return (0); 1495 } 1496 1497 /* 1498 * Get file system statistics. 1499 */ 1500 static int 1501 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1502 { 1503 int error; 1504 struct mntinfo *mi; 1505 struct FSSTAT3args args; 1506 struct FSSTAT3res res; 1507 int douprintf; 1508 failinfo_t fi; 1509 vnode_t *vp; 1510 cred_t *cr; 1511 hrtime_t t; 1512 1513 mi = VFTOMI(vfsp); 1514 if (nfs_zone() != mi->mi_zone) 1515 return (EPERM); 1516 error = nfs3_root(vfsp, &vp); 1517 if (error) 1518 return (error); 1519 1520 cr = CRED(); 1521 1522 args.fsroot = *VTOFH3(vp); 1523 fi.vp = vp; 1524 fi.fhp = (caddr_t)&args.fsroot; 1525 fi.copyproc = nfs3copyfh; 1526 fi.lookupproc = nfs3lookup; 1527 fi.xattrdirproc = acl_getxattrdir3; 1528 1529 douprintf = 1; 1530 1531 t = gethrtime(); 1532 1533 error = rfs3call(mi, NFSPROC3_FSSTAT, 1534 xdr_nfs_fh3, (caddr_t)&args, 1535 xdr_FSSTAT3res, (caddr_t)&res, cr, 1536 &douprintf, &res.status, 0, &fi); 1537 1538 if (error) { 1539 VN_RELE(vp); 1540 return (error); 1541 } 1542 1543 error = geterrno3(res.status); 1544 if (!error) { 1545 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 1546 sbp->f_bsize = MAXBSIZE; 1547 sbp->f_frsize = DEV_BSIZE; 1548 /* 1549 * Allow -1 fields to pass through unconverted. These 1550 * indicate "don't know" fields. 1551 */ 1552 if (res.resok.tbytes == (size3)-1) 1553 sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes; 1554 else { 1555 sbp->f_blocks = (fsblkcnt64_t) 1556 (res.resok.tbytes / DEV_BSIZE); 1557 } 1558 if (res.resok.fbytes == (size3)-1) 1559 sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes; 1560 else { 1561 sbp->f_bfree = (fsblkcnt64_t) 1562 (res.resok.fbytes / DEV_BSIZE); 1563 } 1564 if (res.resok.abytes == (size3)-1) 1565 sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes; 1566 else { 1567 sbp->f_bavail = (fsblkcnt64_t) 1568 (res.resok.abytes / DEV_BSIZE); 1569 } 1570 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles; 1571 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles; 1572 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles; 1573 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1574 (void) strncpy(sbp->f_basetype, 1575 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1576 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1577 sbp->f_namemax = (ulong_t)-1; 1578 } else { 1579 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 1580 PURGE_STALE_FH(error, vp, cr); 1581 } 1582 1583 VN_RELE(vp); 1584 1585 return (error); 1586 } 1587 1588 static kmutex_t nfs3_syncbusy; 1589 1590 /* 1591 * Flush dirty nfs files for file system vfsp. 1592 * If vfsp == NULL, all nfs files are flushed. 1593 */ 1594 /* ARGSUSED */ 1595 static int 1596 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr) 1597 { 1598 /* 1599 * Cross-zone calls are OK here, since this translates to a 1600 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1601 */ 1602 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) { 1603 rflush(vfsp, cr); 1604 mutex_exit(&nfs3_syncbusy); 1605 } 1606 return (0); 1607 } 1608 1609 /* ARGSUSED */ 1610 static int 1611 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1612 { 1613 int error; 1614 nfs_fh3 fh; 1615 vnode_t *vp; 1616 struct vattr va; 1617 1618 if (fidp->fid_len > NFS3_FHSIZE) { 1619 *vpp = NULL; 1620 return (ESTALE); 1621 } 1622 1623 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1624 return (EPERM); 1625 fh.fh3_length = fidp->fid_len; 1626 bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length); 1627 1628 vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1629 1630 if (VTOR(vp)->r_flags & RSTALE) { 1631 VN_RELE(vp); 1632 *vpp = NULL; 1633 return (ENOENT); 1634 } 1635 1636 if (vp->v_type == VNON) { 1637 va.va_mask = AT_ALL; 1638 error = nfs3getattr(vp, &va, CRED()); 1639 if (error) { 1640 VN_RELE(vp); 1641 *vpp = NULL; 1642 return (error); 1643 } 1644 vp->v_type = va.va_type; 1645 } 1646 1647 *vpp = vp; 1648 1649 return (0); 1650 } 1651 1652 /* ARGSUSED */ 1653 static int 1654 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why) 1655 { 1656 vnode_t *rtvp; 1657 char root_hostname[SYS_NMLN+1]; 1658 struct servinfo *svp; 1659 int error; 1660 int vfsflags; 1661 size_t size; 1662 char *root_path; 1663 struct pathname pn; 1664 char *name; 1665 cred_t *cr; 1666 struct nfs_args args; /* nfs mount arguments */ 1667 static char token[10]; 1668 1669 bzero(&args, sizeof (args)); 1670 1671 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1672 clkset(-1L); /* hack for now - until we get time svc? */ 1673 1674 if (why == ROOT_REMOUNT) { 1675 /* 1676 * Shouldn't happen. 1677 */ 1678 panic("nfs3_mountroot: why == ROOT_REMOUNT"); 1679 } 1680 1681 if (why == ROOT_UNMOUNT) { 1682 /* 1683 * Nothing to do for NFS. 1684 */ 1685 return (0); 1686 } 1687 1688 /* 1689 * why == ROOT_INIT 1690 */ 1691 1692 name = token; 1693 *name = 0; 1694 getfsname("root", name, sizeof (token)); 1695 1696 pn_alloc(&pn); 1697 root_path = pn.pn_path; 1698 1699 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1700 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1701 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1702 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1703 1704 /* 1705 * Get server address 1706 * Get the root fhandle 1707 * Get server's transport 1708 * Get server's hostname 1709 * Get options 1710 */ 1711 args.addr = &svp->sv_addr; 1712 args.fh = (char *)&svp->sv_fhandle; 1713 args.knconf = svp->sv_knconf; 1714 args.hostname = root_hostname; 1715 vfsflags = 0; 1716 if (error = mount_root(*name ? name : "root", root_path, NFS_V3, 1717 &args, &vfsflags)) { 1718 if (error == EPROTONOSUPPORT) 1719 nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: " 1720 "mount_root failed: server doesn't support NFS V3"); 1721 else 1722 nfs_cmn_err(error, CE_WARN, 1723 "nfs3_mountroot: mount_root failed: %m"); 1724 sv_free(svp); 1725 pn_free(&pn); 1726 return (error); 1727 } 1728 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1729 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1730 (void) strcpy(svp->sv_hostname, root_hostname); 1731 1732 /* 1733 * Force root partition to always be mounted with AUTH_UNIX for now 1734 */ 1735 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1736 svp->sv_secdata->secmod = AUTH_UNIX; 1737 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1738 svp->sv_secdata->data = NULL; 1739 1740 cr = crgetcred(); 1741 rtvp = NULL; 1742 1743 error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1744 1745 crfree(cr); 1746 1747 if (error) { 1748 pn_free(&pn); 1749 sv_free(svp); 1750 return (error); 1751 } 1752 1753 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1754 if (error) { 1755 nfs_cmn_err(error, CE_WARN, 1756 "nfs3_mountroot: invalid root mount options"); 1757 pn_free(&pn); 1758 goto errout; 1759 } 1760 1761 (void) vfs_lock_wait(vfsp); 1762 vfs_add(NULL, vfsp, vfsflags); 1763 vfs_unlock(vfsp); 1764 1765 size = strlen(svp->sv_hostname); 1766 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1767 rootfs.bo_name[size] = ':'; 1768 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1769 1770 pn_free(&pn); 1771 1772 errout: 1773 if (error) { 1774 sv_free(svp); 1775 nfs_async_stop(vfsp); 1776 nfs_async_manager_stop(vfsp); 1777 } 1778 1779 if (rtvp != NULL) 1780 VN_RELE(rtvp); 1781 1782 return (error); 1783 } 1784 1785 /* 1786 * Initialization routine for VFS routines. Should only be called once 1787 */ 1788 int 1789 nfs3_vfsinit(void) 1790 { 1791 mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1792 return (0); 1793 } 1794 1795 void 1796 nfs3_vfsfini(void) 1797 { 1798 mutex_destroy(&nfs3_syncbusy); 1799 } 1800 1801 void 1802 nfs3_freevfs(vfs_t *vfsp) 1803 { 1804 mntinfo_t *mi; 1805 servinfo_t *svp; 1806 1807 /* free up the resources */ 1808 mi = VFTOMI(vfsp); 1809 svp = mi->mi_servers; 1810 mi->mi_servers = mi->mi_curr_serv = NULL; 1811 sv_free(svp); 1812 1813 /* 1814 * By this time we should have already deleted the 1815 * mi kstats in the unmount code. If they are still around 1816 * somethings wrong 1817 */ 1818 ASSERT(mi->mi_io_kstats == NULL); 1819 nfs_free_mi(mi); 1820 } 1821