1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 26 * All rights reserved. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/pathname.h> 38 #include <sys/sysmacros.h> 39 #include <sys/kmem.h> 40 #include <sys/mkdev.h> 41 #include <sys/mount.h> 42 #include <sys/mntent.h> 43 #include <sys/statvfs.h> 44 #include <sys/errno.h> 45 #include <sys/debug.h> 46 #include <sys/cmn_err.h> 47 #include <sys/utsname.h> 48 #include <sys/bootconf.h> 49 #include <sys/modctl.h> 50 #include <sys/acl.h> 51 #include <sys/flock.h> 52 #include <sys/policy.h> 53 #include <sys/zone.h> 54 #include <sys/class.h> 55 #include <sys/socket.h> 56 #include <sys/netconfig.h> 57 #include <sys/mntent.h> 58 #include <sys/tsol/label.h> 59 60 #include <rpc/types.h> 61 #include <rpc/auth.h> 62 #include <rpc/clnt.h> 63 64 #include <nfs/nfs.h> 65 #include <nfs/nfs_clnt.h> 66 #include <nfs/rnode.h> 67 #include <nfs/mount.h> 68 #include <nfs/nfs_acl.h> 69 70 #include <fs/fs_subr.h> 71 72 /* 73 * From rpcsec module (common/rpcsec). 74 */ 75 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 76 extern void sec_clnt_freeinfo(struct sec_data *); 77 78 static int pathconf_copyin(struct nfs_args *, struct pathcnf *); 79 static int pathconf_get(struct mntinfo *, struct nfs_args *); 80 static void pathconf_rele(struct mntinfo *); 81 82 /* 83 * The order and contents of this structure must be kept in sync with that of 84 * rfsreqcnt_v2_tmpl in nfs_stats.c 85 */ 86 static char *rfsnames_v2[] = { 87 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read", 88 "unused", "write", "create", "remove", "rename", "link", "symlink", 89 "mkdir", "rmdir", "readdir", "fsstat" 90 }; 91 92 /* 93 * This table maps from NFS protocol number into call type. 94 * Zero means a "Lookup" type call 95 * One means a "Read" type call 96 * Two means a "Write" type call 97 * This is used to select a default time-out. 98 */ 99 static uchar_t call_type_v2[] = { 100 0, 0, 1, 0, 0, 0, 1, 101 0, 2, 2, 2, 2, 2, 2, 102 2, 2, 1, 0 103 }; 104 105 /* 106 * Similar table, but to determine which timer to use 107 * (only real reads and writes!) 108 */ 109 static uchar_t timer_type_v2[] = { 110 0, 0, 0, 0, 0, 0, 1, 111 0, 2, 0, 0, 0, 0, 0, 112 0, 0, 1, 0 113 }; 114 115 /* 116 * This table maps from NFS protocol number into a call type 117 * for the semisoft mount option. 118 * Zero means do not repeat operation. 119 * One means repeat. 120 */ 121 static uchar_t ss_call_type_v2[] = { 122 0, 0, 1, 0, 0, 0, 0, 123 0, 1, 1, 1, 1, 1, 1, 124 1, 1, 0, 0 125 }; 126 127 /* 128 * nfs vfs operations. 129 */ 130 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 131 static int nfs_unmount(vfs_t *, int, cred_t *); 132 static int nfs_root(vfs_t *, vnode_t **); 133 static int nfs_statvfs(vfs_t *, struct statvfs64 *); 134 static int nfs_sync(vfs_t *, short, cred_t *); 135 static int nfs_vget(vfs_t *, vnode_t **, fid_t *); 136 static int nfs_mountroot(vfs_t *, whymountroot_t); 137 static void nfs_freevfs(vfs_t *); 138 139 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *, 140 int, cred_t *, zone_t *); 141 142 /* 143 * Initialize the vfs structure 144 */ 145 146 int nfsfstyp; 147 vfsops_t *nfs_vfsops; 148 149 /* 150 * Debug variable to check for rdma based 151 * transport startup and cleanup. Controlled 152 * through /etc/system. Off by default. 153 */ 154 int rdma_debug = 0; 155 156 int 157 nfsinit(int fstyp, char *name) 158 { 159 static const fs_operation_def_t nfs_vfsops_template[] = { 160 VFSNAME_MOUNT, nfs_mount, 161 VFSNAME_UNMOUNT, nfs_unmount, 162 VFSNAME_ROOT, nfs_root, 163 VFSNAME_STATVFS, nfs_statvfs, 164 VFSNAME_SYNC, (fs_generic_func_p) nfs_sync, 165 VFSNAME_VGET, nfs_vget, 166 VFSNAME_MOUNTROOT, nfs_mountroot, 167 VFSNAME_FREEVFS, (fs_generic_func_p)nfs_freevfs, 168 NULL, NULL 169 }; 170 int error; 171 172 error = vfs_setfsops(fstyp, nfs_vfsops_template, &nfs_vfsops); 173 if (error != 0) { 174 zcmn_err(GLOBAL_ZONEID, CE_WARN, 175 "nfsinit: bad vfs ops template"); 176 return (error); 177 } 178 179 error = vn_make_ops(name, nfs_vnodeops_template, &nfs_vnodeops); 180 if (error != 0) { 181 (void) vfs_freevfsops_by_type(fstyp); 182 zcmn_err(GLOBAL_ZONEID, CE_WARN, 183 "nfsinit: bad vnode ops template"); 184 return (error); 185 } 186 187 188 nfsfstyp = fstyp; 189 190 return (0); 191 } 192 193 void 194 nfsfini(void) 195 { 196 } 197 198 static void 199 nfs_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 200 { 201 202 if (fh) 203 kmem_free(fh, sizeof (*fh)); 204 205 if (nargs->pathconf) { 206 kmem_free(nargs->pathconf, sizeof (struct pathcnf)); 207 nargs->pathconf = NULL; 208 } 209 210 if (nargs->knconf) { 211 if (nargs->knconf->knc_protofmly) 212 kmem_free(nargs->knconf->knc_protofmly, 213 KNC_STRSIZE); 214 if (nargs->knconf->knc_proto) 215 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 216 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 217 nargs->knconf = NULL; 218 } 219 220 if (nargs->fh) { 221 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 222 nargs->fh = NULL; 223 } 224 225 if (nargs->hostname) { 226 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 227 nargs->hostname = NULL; 228 } 229 230 if (nargs->addr) { 231 if (nargs->addr->buf) { 232 ASSERT(nargs->addr->len); 233 kmem_free(nargs->addr->buf, nargs->addr->len); 234 } 235 kmem_free(nargs->addr, sizeof (struct netbuf)); 236 nargs->addr = NULL; 237 } 238 239 if (nargs->syncaddr) { 240 ASSERT(nargs->syncaddr->len); 241 if (nargs->syncaddr->buf) { 242 ASSERT(nargs->syncaddr->len); 243 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 244 } 245 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 246 nargs->syncaddr = NULL; 247 } 248 249 if (nargs->netname) { 250 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 251 nargs->netname = NULL; 252 } 253 254 if (nargs->nfs_ext_u.nfs_extA.secdata) { 255 sec_clnt_freeinfo( 256 nargs->nfs_ext_u.nfs_extA.secdata); 257 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 258 } 259 } 260 261 static int 262 nfs_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 263 { 264 265 int error; 266 size_t nlen; /* length of netname */ 267 size_t hlen; /* length of hostname */ 268 char netname[MAXNETNAMELEN+1]; /* server's netname */ 269 struct netbuf addr; /* server's address */ 270 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 271 struct knetconfig *knconf; /* transport knetconfig structure */ 272 struct sec_data *secdata = NULL; /* security data */ 273 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 274 STRUCT_DECL(knetconfig, knconf_tmp); 275 STRUCT_DECL(netbuf, addr_tmp); 276 int flags; 277 struct pathcnf *pc; /* Pathconf */ 278 char *p, *pf; 279 char *userbufptr; 280 281 282 bzero(nargs, sizeof (*nargs)); 283 284 STRUCT_INIT(args, get_udatamodel()); 285 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 286 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 287 STRUCT_SIZE(args)))) 288 return (EFAULT); 289 290 nargs->wsize = STRUCT_FGET(args, wsize); 291 nargs->rsize = STRUCT_FGET(args, rsize); 292 nargs->timeo = STRUCT_FGET(args, timeo); 293 nargs->retrans = STRUCT_FGET(args, retrans); 294 nargs->acregmin = STRUCT_FGET(args, acregmin); 295 nargs->acregmax = STRUCT_FGET(args, acregmax); 296 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 297 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 298 299 flags = STRUCT_FGET(args, flags); 300 nargs->flags = flags; 301 302 303 addr.buf = NULL; 304 syncaddr.buf = NULL; 305 306 /* 307 * Allocate space for a knetconfig structure and 308 * its strings and copy in from user-land. 309 */ 310 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 311 STRUCT_INIT(knconf_tmp, get_udatamodel()); 312 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 313 STRUCT_SIZE(knconf_tmp))) { 314 kmem_free(knconf, sizeof (*knconf)); 315 return (EFAULT); 316 } 317 318 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 319 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 320 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 321 if (get_udatamodel() != DATAMODEL_LP64) { 322 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 323 } else { 324 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 325 } 326 327 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 328 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 329 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 330 if (error) { 331 kmem_free(pf, KNC_STRSIZE); 332 kmem_free(p, KNC_STRSIZE); 333 kmem_free(knconf, sizeof (*knconf)); 334 return (error); 335 } 336 337 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 338 if (error) { 339 kmem_free(pf, KNC_STRSIZE); 340 kmem_free(p, KNC_STRSIZE); 341 kmem_free(knconf, sizeof (*knconf)); 342 return (error); 343 } 344 345 346 knconf->knc_protofmly = pf; 347 knconf->knc_proto = p; 348 349 nargs->knconf = knconf; 350 351 /* Copyin pathconf if there is one */ 352 if (STRUCT_FGETP(args, pathconf) != NULL) { 353 pc = kmem_alloc(sizeof (*pc), KM_SLEEP); 354 error = pathconf_copyin(STRUCT_BUF(args), pc); 355 nargs->pathconf = pc; 356 if (error) 357 goto errout; 358 } 359 360 /* 361 * Get server address 362 */ 363 STRUCT_INIT(addr_tmp, get_udatamodel()); 364 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 365 STRUCT_SIZE(addr_tmp))) { 366 error = EFAULT; 367 goto errout; 368 } 369 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 370 userbufptr = STRUCT_FGETP(addr_tmp, buf); 371 addr.len = STRUCT_FGET(addr_tmp, len); 372 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 373 addr.maxlen = addr.len; 374 if (copyin(userbufptr, addr.buf, addr.len)) { 375 kmem_free(addr.buf, addr.len); 376 error = EFAULT; 377 goto errout; 378 } 379 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 380 381 /* 382 * Get the root fhandle 383 */ 384 385 if (copyin(STRUCT_FGETP(args, fh), &fh->fh_buf, NFS_FHSIZE)) { 386 error = EFAULT; 387 goto errout; 388 } 389 fh->fh_len = NFS_FHSIZE; 390 391 /* 392 * Get server's hostname 393 */ 394 if (flags & NFSMNT_HOSTNAME) { 395 error = copyinstr(STRUCT_FGETP(args, hostname), 396 netname, sizeof (netname), &hlen); 397 if (error) 398 goto errout; 399 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 400 (void) strcpy(nargs->hostname, netname); 401 402 } else { 403 nargs->hostname = NULL; 404 } 405 406 407 /* 408 * If there are syncaddr and netname data, load them in. This is 409 * to support data needed for NFSV4 when AUTH_DH is the negotiated 410 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 411 */ 412 netname[0] = '\0'; 413 if (flags & NFSMNT_SECURE) { 414 if (STRUCT_FGETP(args, syncaddr) == NULL) { 415 error = EINVAL; 416 goto errout; 417 } 418 /* get syncaddr */ 419 STRUCT_INIT(addr_tmp, get_udatamodel()); 420 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 421 STRUCT_SIZE(addr_tmp))) { 422 error = EINVAL; 423 goto errout; 424 } 425 userbufptr = STRUCT_FGETP(addr_tmp, buf); 426 syncaddr.len = STRUCT_FGET(addr_tmp, len); 427 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 428 syncaddr.maxlen = syncaddr.len; 429 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 430 kmem_free(syncaddr.buf, syncaddr.len); 431 error = EFAULT; 432 goto errout; 433 } 434 435 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 436 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 437 438 ASSERT(STRUCT_FGETP(args, netname)); 439 if (copyinstr(STRUCT_FGETP(args, netname), netname, 440 sizeof (netname), &nlen)) { 441 error = EFAULT; 442 goto errout; 443 } 444 445 netname[nlen] = '\0'; 446 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 447 (void) strcpy(nargs->netname, netname); 448 } 449 450 /* 451 * Get the extention data which has the security data structure. 452 * This includes data for AUTH_SYS as well. 453 */ 454 if (flags & NFSMNT_NEWARGS) { 455 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 456 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 457 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 458 /* 459 * Indicating the application is using the new 460 * sec_data structure to pass in the security 461 * data. 462 */ 463 if (STRUCT_FGETP(args, 464 nfs_ext_u.nfs_extA.secdata) != NULL) { 465 error = sec_clnt_loadinfo( 466 (struct sec_data *)STRUCT_FGETP(args, 467 nfs_ext_u.nfs_extA.secdata), 468 &secdata, get_udatamodel()); 469 } 470 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 471 } 472 } 473 474 if (error) 475 goto errout; 476 477 /* 478 * Failover support: 479 * 480 * We may have a linked list of nfs_args structures, 481 * which means the user is looking for failover. If 482 * the mount is either not "read-only" or "soft", 483 * we want to bail out with EINVAL. 484 */ 485 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 486 nargs->nfs_ext_u.nfs_extB.next = 487 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 488 489 errout: 490 if (error) 491 nfs_free_args(nargs, fh); 492 493 return (error); 494 } 495 496 497 /* 498 * nfs mount vfsop 499 * Set up mount info record and attach it to vfs struct. 500 */ 501 static int 502 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 503 { 504 char *data = uap->dataptr; 505 int error; 506 vnode_t *rtvp; /* the server's root */ 507 mntinfo_t *mi; /* mount info, pointed at by vfs */ 508 size_t nlen; /* length of netname */ 509 struct knetconfig *knconf; /* transport knetconfig structure */ 510 struct knetconfig *rdma_knconf; /* rdma transport structure */ 511 rnode_t *rp; 512 struct servinfo *svp; /* nfs server info */ 513 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 514 struct servinfo *svp_head; /* first nfs server info */ 515 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */ 516 struct sec_data *secdata; /* security data */ 517 struct nfs_args *args = NULL; 518 int flags, addr_type; 519 zone_t *zone = nfs_zone(); 520 zone_t *mntzone = NULL; 521 nfs_fhandle *fhandle = NULL; 522 523 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 524 return (error); 525 526 if (mvp->v_type != VDIR) 527 return (ENOTDIR); 528 529 /* 530 * get arguments 531 * 532 * nfs_args is now versioned and is extensible, so 533 * uap->datalen might be different from sizeof (args) 534 * in a compatible situation. 535 */ 536 more: 537 538 if (!(uap->flags & MS_SYSSPACE)) { 539 if (args == NULL) 540 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 541 else { 542 nfs_free_args(args, fhandle); 543 fhandle = NULL; 544 } 545 if (fhandle == NULL) 546 fhandle = kmem_zalloc(sizeof (nfs_fhandle), KM_SLEEP); 547 error = nfs_copyin(data, uap->datalen, args, fhandle); 548 if (error) { 549 if (args) 550 kmem_free(args, sizeof (*args)); 551 return (error); 552 } 553 } else { 554 args = (struct nfs_args *)data; 555 fhandle = (nfs_fhandle *)args->fh; 556 } 557 558 559 flags = args->flags; 560 561 if (uap->flags & MS_REMOUNT) { 562 size_t n; 563 char name[FSTYPSZ]; 564 565 if (uap->flags & MS_SYSSPACE) 566 error = copystr(uap->fstype, name, FSTYPSZ, &n); 567 else 568 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 569 570 if (error) { 571 if (error == ENAMETOOLONG) 572 return (EINVAL); 573 return (error); 574 } 575 576 577 /* 578 * This check is to ensure that the request is a 579 * genuine nfs remount request. 580 */ 581 582 if (strncmp(name, "nfs", 3) != 0) 583 return (EINVAL); 584 585 /* 586 * If the request changes the locking type, disallow the 587 * remount, 588 * because it's questionable whether we can transfer the 589 * locking state correctly. 590 * 591 * Remounts need to save the pathconf information. 592 * Part of the infamous static kludge. 593 */ 594 595 if ((mi = VFTOMI(vfsp)) != NULL) { 596 uint_t new_mi_llock; 597 uint_t old_mi_llock; 598 599 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 600 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 601 if (old_mi_llock != new_mi_llock) 602 return (EBUSY); 603 } 604 error = pathconf_get((struct mntinfo *)vfsp->vfs_data, args); 605 606 if (!(uap->flags & MS_SYSSPACE)) { 607 nfs_free_args(args, fhandle); 608 kmem_free(args, sizeof (*args)); 609 } 610 611 return (error); 612 } 613 614 mutex_enter(&mvp->v_lock); 615 if (!(uap->flags & MS_OVERLAY) && 616 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 617 mutex_exit(&mvp->v_lock); 618 if (!(uap->flags & MS_SYSSPACE)) { 619 nfs_free_args(args, fhandle); 620 kmem_free(args, sizeof (*args)); 621 } 622 return (EBUSY); 623 } 624 mutex_exit(&mvp->v_lock); 625 626 /* make sure things are zeroed for errout: */ 627 rtvp = NULL; 628 mi = NULL; 629 secdata = NULL; 630 631 /* 632 * A valid knetconfig structure is required. 633 */ 634 if (!(flags & NFSMNT_KNCONF)) { 635 if (!(uap->flags & MS_SYSSPACE)) { 636 nfs_free_args(args, fhandle); 637 kmem_free(args, sizeof (*args)); 638 } 639 return (EINVAL); 640 } 641 642 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 643 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 644 if (!(uap->flags & MS_SYSSPACE)) { 645 nfs_free_args(args, fhandle); 646 kmem_free(args, sizeof (*args)); 647 } 648 return (EINVAL); 649 } 650 651 652 /* 653 * Allocate a servinfo struct. 654 */ 655 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 656 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 657 if (svp_tail) { 658 svp_2ndlast = svp_tail; 659 svp_tail->sv_next = svp; 660 } else { 661 svp_head = svp; 662 svp_2ndlast = svp; 663 } 664 665 svp_tail = svp; 666 667 /* 668 * Get knetconfig and server address 669 */ 670 svp->sv_knconf = args->knconf; 671 args->knconf = NULL; 672 673 if (args->addr == NULL || args->addr->buf == NULL) { 674 error = EINVAL; 675 goto errout; 676 } 677 678 svp->sv_addr.maxlen = args->addr->maxlen; 679 svp->sv_addr.len = args->addr->len; 680 svp->sv_addr.buf = args->addr->buf; 681 args->addr->buf = NULL; 682 683 /* 684 * Get the root fhandle 685 */ 686 ASSERT(fhandle); 687 688 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 689 svp->sv_fhandle.fh_len = fhandle->fh_len; 690 691 /* 692 * Get server's hostname 693 */ 694 if (flags & NFSMNT_HOSTNAME) { 695 if (args->hostname == NULL) { 696 error = EINVAL; 697 goto errout; 698 } 699 svp->sv_hostnamelen = strlen(args->hostname) + 1; 700 svp->sv_hostname = args->hostname; 701 args->hostname = NULL; 702 } else { 703 char *p = "unknown-host"; 704 svp->sv_hostnamelen = strlen(p) + 1; 705 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 706 (void) strcpy(svp->sv_hostname, p); 707 } 708 709 710 /* 711 * RDMA MOUNT SUPPORT FOR NFS v2: 712 * Establish, is it possible to use RDMA, if so overload the 713 * knconf with rdma specific knconf and free the orignal. 714 */ 715 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 716 /* 717 * Determine the addr type for RDMA, IPv4 or v6. 718 */ 719 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 720 addr_type = AF_INET; 721 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 722 addr_type = AF_INET6; 723 724 if (rdma_reachable(addr_type, &svp->sv_addr, 725 &rdma_knconf) == 0) { 726 /* 727 * If successful, hijack, the orignal knconf and 728 * replace with a new one, depending on the flags. 729 */ 730 svp->sv_origknconf = svp->sv_knconf; 731 svp->sv_knconf = rdma_knconf; 732 knconf = rdma_knconf; 733 } else { 734 if (flags & NFSMNT_TRYRDMA) { 735 #ifdef DEBUG 736 if (rdma_debug) 737 zcmn_err(getzoneid(), CE_WARN, 738 "no RDMA onboard, revert\n"); 739 #endif 740 } 741 742 if (flags & NFSMNT_DORDMA) { 743 /* 744 * If proto=rdma is specified and no RDMA 745 * path to this server is avialable then 746 * ditch this server. 747 * This is not included in the mountable 748 * server list or the replica list. 749 * Check if more servers are specified; 750 * Failover case, otherwise bail out of mount. 751 */ 752 if (args->nfs_args_ext == 753 NFS_ARGS_EXTB && 754 args->nfs_ext_u.nfs_extB.next 755 != NULL) { 756 data = (char *) 757 args->nfs_ext_u.nfs_extB.next; 758 if (uap->flags & MS_RDONLY && 759 !(flags & NFSMNT_SOFT)) { 760 if (svp_head->sv_next == NULL) { 761 svp_tail = NULL; 762 svp_2ndlast = NULL; 763 sv_free(svp_head); 764 goto more; 765 } else { 766 svp_tail = svp_2ndlast; 767 svp_2ndlast->sv_next = 768 NULL; 769 sv_free(svp); 770 goto more; 771 } 772 } 773 } else { 774 /* 775 * This is the last server specified 776 * in the nfs_args list passed down 777 * and its not rdma capable. 778 */ 779 if (svp_head->sv_next == NULL) { 780 /* 781 * Is this the only one 782 */ 783 error = EINVAL; 784 #ifdef DEBUG 785 if (rdma_debug) 786 zcmn_err(getzoneid(), 787 CE_WARN, 788 "No RDMA srv"); 789 #endif 790 goto errout; 791 } else { 792 /* 793 * There is list, since some 794 * servers specified before 795 * this passed all requirements 796 */ 797 svp_tail = svp_2ndlast; 798 svp_2ndlast->sv_next = NULL; 799 sv_free(svp); 800 goto proceed; 801 } 802 } 803 } 804 } 805 } 806 807 /* 808 * Get the extention data which has the new security data structure. 809 */ 810 if (flags & NFSMNT_NEWARGS) { 811 switch (args->nfs_args_ext) { 812 case NFS_ARGS_EXTA: 813 case NFS_ARGS_EXTB: 814 /* 815 * Indicating the application is using the new 816 * sec_data structure to pass in the security 817 * data. 818 */ 819 secdata = args->nfs_ext_u.nfs_extA.secdata; 820 if (secdata == NULL) { 821 error = EINVAL; 822 } else { 823 /* 824 * Need to validate the flavor here if 825 * sysspace, userspace was already 826 * validate from the nfs_copyin function. 827 */ 828 switch (secdata->rpcflavor) { 829 case AUTH_NONE: 830 case AUTH_UNIX: 831 case AUTH_LOOPBACK: 832 case AUTH_DES: 833 case RPCSEC_GSS: 834 break; 835 default: 836 error = EINVAL; 837 goto errout; 838 } 839 } 840 args->nfs_ext_u.nfs_extA.secdata = NULL; 841 break; 842 843 default: 844 error = EINVAL; 845 break; 846 } 847 } else if (flags & NFSMNT_SECURE) { 848 /* 849 * Keep this for backward compatibility to support 850 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 851 */ 852 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 853 error = EINVAL; 854 goto errout; 855 } 856 857 /* 858 * get time sync address. 859 */ 860 if (args->syncaddr == NULL) { 861 error = EFAULT; 862 goto errout; 863 } 864 865 /* 866 * Move security related data to the sec_data structure. 867 */ 868 { 869 dh_k4_clntdata_t *data; 870 char *pf, *p; 871 872 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 873 if (flags & NFSMNT_RPCTIMESYNC) 874 secdata->flags |= AUTH_F_RPCTIMESYNC; 875 data = kmem_alloc(sizeof (*data), KM_SLEEP); 876 bcopy(args->syncaddr, &data->syncaddr, 877 sizeof (*args->syncaddr)); 878 879 880 /* 881 * duplicate the knconf information for the 882 * new opaque data. 883 */ 884 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 885 *data->knconf = *knconf; 886 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 887 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 888 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 889 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 890 data->knconf->knc_protofmly = pf; 891 data->knconf->knc_proto = p; 892 893 /* move server netname to the sec_data structure */ 894 nlen = strlen(args->hostname) + 1; 895 if (nlen != 0) { 896 data->netname = kmem_alloc(nlen, KM_SLEEP); 897 bcopy(args->hostname, data->netname, nlen); 898 data->netnamelen = (int)nlen; 899 } 900 secdata->secmod = secdata->rpcflavor = AUTH_DES; 901 secdata->data = (caddr_t)data; 902 } 903 } else { 904 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 905 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 906 secdata->data = NULL; 907 } 908 svp->sv_secdata = secdata; 909 910 /* 911 * See bug 1180236. 912 * If mount secure failed, we will fall back to AUTH_NONE 913 * and try again. nfs3rootvp() will turn this back off. 914 * 915 * The NFS Version 2 mount uses GETATTR and STATFS procedures. 916 * The server does not care if these procedures have the proper 917 * authentication flavor, so if mount retries using AUTH_NONE 918 * that does not require a credential setup for root then the 919 * automounter would work without requiring root to be 920 * keylogged into AUTH_DES. 921 */ 922 if (secdata->rpcflavor != AUTH_UNIX && 923 secdata->rpcflavor != AUTH_LOOPBACK) 924 secdata->flags |= AUTH_F_TRYNONE; 925 926 /* 927 * Failover support: 928 * 929 * We may have a linked list of nfs_args structures, 930 * which means the user is looking for failover. If 931 * the mount is either not "read-only" or "soft", 932 * we want to bail out with EINVAL. 933 */ 934 if (args->nfs_args_ext == NFS_ARGS_EXTB && 935 args->nfs_ext_u.nfs_extB.next != NULL) { 936 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 937 data = (char *)args->nfs_ext_u.nfs_extB.next; 938 goto more; 939 } 940 error = EINVAL; 941 goto errout; 942 } 943 944 /* 945 * Determine the zone we're being mounted into. 946 */ 947 zone_hold(mntzone = zone); /* start with this assumption */ 948 if (getzoneid() == GLOBAL_ZONEID) { 949 zone_rele(mntzone); 950 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 951 ASSERT(mntzone != NULL); 952 if (mntzone != zone) { 953 error = EBUSY; 954 goto errout; 955 } 956 } 957 958 if (is_system_labeled()) { 959 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 960 svp->sv_knconf, cr); 961 962 if (error > 0) 963 goto errout; 964 965 if (error == -1) { 966 /* change mount to read-only to prevent write-down */ 967 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 968 } 969 } 970 971 /* 972 * Stop the mount from going any further if the zone is going away. 973 */ 974 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 975 error = EBUSY; 976 goto errout; 977 } 978 979 /* 980 * Get root vnode. 981 */ 982 proceed: 983 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 984 985 if (error) 986 goto errout; 987 988 /* 989 * Set option fields in the mount info record 990 */ 991 mi = VTOMI(rtvp); 992 993 if (svp_head->sv_next) 994 mi->mi_flags |= MI_LLOCK; 995 996 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 997 if (!error) { 998 /* static pathconf kludge */ 999 error = pathconf_get(mi, args); 1000 } 1001 1002 errout: 1003 if (error) { 1004 if (rtvp != NULL) { 1005 rp = VTOR(rtvp); 1006 if (rp->r_flags & RHASHED) 1007 rp_rmhash(rp); 1008 } 1009 sv_free(svp_head); 1010 if (mi != NULL) { 1011 nfs_async_stop(vfsp); 1012 nfs_async_manager_stop(vfsp); 1013 if (mi->mi_io_kstats) { 1014 kstat_delete(mi->mi_io_kstats); 1015 mi->mi_io_kstats = NULL; 1016 } 1017 if (mi->mi_ro_kstats) { 1018 kstat_delete(mi->mi_ro_kstats); 1019 mi->mi_ro_kstats = NULL; 1020 } 1021 nfs_free_mi(mi); 1022 } 1023 } 1024 1025 if (!(uap->flags & MS_SYSSPACE)) { 1026 nfs_free_args(args, fhandle); 1027 kmem_free(args, sizeof (*args)); 1028 } 1029 1030 if (rtvp != NULL) 1031 VN_RELE(rtvp); 1032 1033 if (mntzone != NULL) 1034 zone_rele(mntzone); 1035 1036 return (error); 1037 } 1038 1039 /* 1040 * The pathconf information is kept on a linked list of kmem_alloc'ed 1041 * structs. We search the list & add a new struct iff there is no other 1042 * struct with the same information. 1043 * See sys/pathconf.h for ``the rest of the story.'' 1044 */ 1045 static struct pathcnf *allpc = NULL; 1046 1047 static int 1048 pathconf_copyin(struct nfs_args *args, struct pathcnf *pc) 1049 { 1050 STRUCT_DECL(pathcnf, pc_tmp); 1051 STRUCT_HANDLE(nfs_args, ap); 1052 int i; 1053 model_t model; 1054 1055 model = get_udatamodel(); 1056 STRUCT_INIT(pc_tmp, model); 1057 STRUCT_SET_HANDLE(ap, model, args); 1058 1059 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) && 1060 STRUCT_FGETP(ap, pathconf) != NULL) { 1061 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp), 1062 STRUCT_SIZE(pc_tmp))) 1063 return (EFAULT); 1064 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask))) 1065 return (EINVAL); 1066 1067 pc->pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max); 1068 pc->pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon); 1069 pc->pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input); 1070 pc->pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max); 1071 pc->pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max); 1072 pc->pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf); 1073 pc->pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable); 1074 pc->pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx); 1075 for (i = 0; i < _PC_N; i++) 1076 pc->pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]); 1077 } 1078 return (0); 1079 } 1080 1081 static int 1082 pathconf_get(struct mntinfo *mi, struct nfs_args *args) 1083 { 1084 struct pathcnf *p, *pc; 1085 1086 pc = args->pathconf; 1087 if (mi->mi_pathconf != NULL) { 1088 pathconf_rele(mi); 1089 mi->mi_pathconf = NULL; 1090 } 1091 if (args->flags & NFSMNT_POSIX && 1092 args->pathconf != NULL) { 1093 1094 if (_PC_ISSET(_PC_ERROR, pc->pc_mask)) 1095 return (EINVAL); 1096 1097 for (p = allpc; p != NULL; p = p->pc_next) { 1098 if (PCCMP(p, pc) == 0) 1099 break; 1100 } 1101 if (p != NULL) { 1102 mi->mi_pathconf = p; 1103 p->pc_refcnt++; 1104 } else { 1105 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1106 bcopy(pc, p, sizeof (struct pathcnf)); 1107 p->pc_next = allpc; 1108 p->pc_refcnt = 1; 1109 allpc = mi->mi_pathconf = p; 1110 } 1111 } 1112 return (0); 1113 } 1114 1115 /* 1116 * release the static pathconf information 1117 */ 1118 static void 1119 pathconf_rele(struct mntinfo *mi) 1120 { 1121 if (mi->mi_pathconf != NULL) { 1122 if (--mi->mi_pathconf->pc_refcnt == 0) { 1123 struct pathcnf *p; 1124 struct pathcnf *p2; 1125 1126 p2 = p = allpc; 1127 while (p != NULL && p != mi->mi_pathconf) { 1128 p2 = p; 1129 p = p->pc_next; 1130 } 1131 if (p == NULL) { 1132 panic("mi->pathconf"); 1133 /*NOTREACHED*/ 1134 } 1135 if (p == allpc) 1136 allpc = p->pc_next; 1137 else 1138 p2->pc_next = p->pc_next; 1139 kmem_free(p, sizeof (*p)); 1140 mi->mi_pathconf = NULL; 1141 } 1142 } 1143 } 1144 1145 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */ 1146 static ushort_t nfs_max_threads = 8; /* max number of active async threads */ 1147 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */ 1148 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; 1149 1150 static int 1151 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1152 int flags, cred_t *cr, zone_t *zone) 1153 { 1154 vnode_t *rtvp; 1155 mntinfo_t *mi; 1156 dev_t nfs_dev; 1157 struct vattr va; 1158 int error; 1159 rnode_t *rp; 1160 int i; 1161 struct nfs_stats *nfsstatsp; 1162 cred_t *lcr = NULL, *tcr = cr; 1163 1164 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1165 ASSERT(nfsstatsp != NULL); 1166 1167 /* 1168 * Create a mount record and link it to the vfs struct. 1169 */ 1170 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1171 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1172 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1173 mi->mi_flags = MI_ACL | MI_EXTATTR; 1174 if (!(flags & NFSMNT_SOFT)) 1175 mi->mi_flags |= MI_HARD; 1176 if ((flags & NFSMNT_SEMISOFT)) 1177 mi->mi_flags |= MI_SEMISOFT; 1178 if ((flags & NFSMNT_NOPRINT)) 1179 mi->mi_flags |= MI_NOPRINT; 1180 if (flags & NFSMNT_INT) 1181 mi->mi_flags |= MI_INT; 1182 mi->mi_retrans = NFS_RETRIES; 1183 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1184 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1185 mi->mi_timeo = nfs_cots_timeo; 1186 else 1187 mi->mi_timeo = NFS_TIMEO; 1188 mi->mi_prog = NFS_PROGRAM; 1189 mi->mi_vers = NFS_VERSION; 1190 mi->mi_rfsnames = rfsnames_v2; 1191 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr; 1192 mi->mi_call_type = call_type_v2; 1193 mi->mi_ss_call_type = ss_call_type_v2; 1194 mi->mi_timer_type = timer_type_v2; 1195 mi->mi_aclnames = aclnames_v2; 1196 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr; 1197 mi->mi_acl_call_type = acl_call_type_v2; 1198 mi->mi_acl_ss_call_type = acl_ss_call_type_v2; 1199 mi->mi_acl_timer_type = acl_timer_type_v2; 1200 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1201 mi->mi_servers = svp; 1202 mi->mi_curr_serv = svp; 1203 mi->mi_acregmin = SEC2HR(ACREGMIN); 1204 mi->mi_acregmax = SEC2HR(ACREGMAX); 1205 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1206 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1207 1208 if (nfs_dynamic) 1209 mi->mi_flags |= MI_DYNAMIC; 1210 1211 if (flags & NFSMNT_DIRECTIO) 1212 mi->mi_flags |= MI_DIRECTIO; 1213 1214 /* 1215 * Make a vfs struct for nfs. We do this here instead of below 1216 * because rtvp needs a vfs before we can do a getattr on it. 1217 * 1218 * Assign a unique device id to the mount 1219 */ 1220 mutex_enter(&nfs_minor_lock); 1221 do { 1222 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1223 nfs_dev = makedevice(nfs_major, nfs_minor); 1224 } while (vfs_devismounted(nfs_dev)); 1225 mutex_exit(&nfs_minor_lock); 1226 1227 vfsp->vfs_dev = nfs_dev; 1228 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp); 1229 vfsp->vfs_data = (caddr_t)mi; 1230 vfsp->vfs_fstype = nfsfstyp; 1231 vfsp->vfs_bsize = NFS_MAXDATA; 1232 1233 /* 1234 * Initialize fields used to support async putpage operations. 1235 */ 1236 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1237 mi->mi_async_clusters[i] = nfs_async_clusters; 1238 mi->mi_async_init_clusters = nfs_async_clusters; 1239 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1240 mi->mi_max_threads = nfs_max_threads; 1241 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1242 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1243 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1244 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1245 1246 mi->mi_vfsp = vfsp; 1247 zone_hold(mi->mi_zone = zone); 1248 nfs_mi_zonelist_add(mi); 1249 1250 /* 1251 * Make the root vnode, use it to get attributes, 1252 * then remake it with the attributes. 1253 */ 1254 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf, 1255 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1256 1257 va.va_mask = AT_ALL; 1258 1259 /* 1260 * If the uid is set then set the creds for secure mounts 1261 * by proxy processes such as automountd. 1262 */ 1263 if (svp->sv_secdata->uid != 0 && 1264 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1265 lcr = crdup(cr); 1266 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1267 tcr = lcr; 1268 } 1269 1270 error = nfsgetattr(rtvp, &va, tcr); 1271 if (error) 1272 goto bad; 1273 rtvp->v_type = va.va_type; 1274 1275 /* 1276 * Poll every server to get the filesystem stats; we're 1277 * only interested in the server's transfer size, and we 1278 * want the minimum. 1279 * 1280 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1281 * which is only for the mount operation. 1282 */ 1283 1284 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize()); 1285 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize()); 1286 1287 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1288 struct nfsstatfs fs; 1289 int douprintf; 1290 1291 douprintf = 1; 1292 mi->mi_curr_serv = svp; 1293 1294 error = rfs2call(mi, RFS_STATFS, 1295 xdr_fhandle, (caddr_t)svp->sv_fhandle.fh_buf, 1296 xdr_statfs, (caddr_t)&fs, tcr, &douprintf, 1297 &fs.fs_status, 0, NULL); 1298 if (error) 1299 goto bad; 1300 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1301 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1302 } 1303 mi->mi_curr_serv = mi->mi_servers; 1304 mi->mi_curread = mi->mi_tsize; 1305 mi->mi_curwrite = mi->mi_stsize; 1306 1307 /* 1308 * Start the manager thread responsible for handling async worker 1309 * threads. 1310 */ 1311 VFS_HOLD(vfsp); /* add reference for thread */ 1312 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1313 vfsp, 0, minclsyspri); 1314 ASSERT(mi->mi_manager_thread != NULL); 1315 1316 /* 1317 * Initialize kstats 1318 */ 1319 nfs_mnt_kstat_init(vfsp); 1320 1321 mi->mi_type = rtvp->v_type; 1322 1323 *rtvpp = rtvp; 1324 if (lcr != NULL) 1325 crfree(lcr); 1326 1327 return (0); 1328 bad: 1329 /* 1330 * An error occurred somewhere, need to clean up... 1331 * We need to release our reference to the root vnode and 1332 * destroy the mntinfo struct that we just created. 1333 */ 1334 if (lcr != NULL) 1335 crfree(lcr); 1336 rp = VTOR(rtvp); 1337 if (rp->r_flags & RHASHED) 1338 rp_rmhash(rp); 1339 VN_RELE(rtvp); 1340 nfs_async_stop(vfsp); 1341 nfs_async_manager_stop(vfsp); 1342 if (mi->mi_io_kstats) { 1343 kstat_delete(mi->mi_io_kstats); 1344 mi->mi_io_kstats = NULL; 1345 } 1346 if (mi->mi_ro_kstats) { 1347 kstat_delete(mi->mi_ro_kstats); 1348 mi->mi_ro_kstats = NULL; 1349 } 1350 nfs_free_mi(mi); 1351 *rtvpp = NULL; 1352 return (error); 1353 } 1354 1355 /* 1356 * vfs operations 1357 */ 1358 static int 1359 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1360 { 1361 mntinfo_t *mi; 1362 ushort_t omax; 1363 1364 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1365 return (EPERM); 1366 1367 mi = VFTOMI(vfsp); 1368 if (flag & MS_FORCE) { 1369 1370 vfsp->vfs_flag |= VFS_UNMOUNTED; 1371 1372 /* 1373 * We are about to stop the async manager. 1374 * Let every one know not to schedule any 1375 * more async requests. 1376 */ 1377 mutex_enter(&mi->mi_async_lock); 1378 mi->mi_max_threads = 0; 1379 cv_broadcast(&mi->mi_async_work_cv); 1380 mutex_exit(&mi->mi_async_lock); 1381 1382 /* 1383 * We need to stop the manager thread explicitly; the worker 1384 * threads can time out and exit on their own. 1385 */ 1386 nfs_async_manager_stop(vfsp); 1387 destroy_rtable(vfsp, cr); 1388 if (mi->mi_io_kstats) { 1389 kstat_delete(mi->mi_io_kstats); 1390 mi->mi_io_kstats = NULL; 1391 } 1392 if (mi->mi_ro_kstats) { 1393 kstat_delete(mi->mi_ro_kstats); 1394 mi->mi_ro_kstats = NULL; 1395 } 1396 return (0); 1397 } 1398 /* 1399 * Wait until all asynchronous putpage operations on 1400 * this file system are complete before flushing rnodes 1401 * from the cache. 1402 */ 1403 omax = mi->mi_max_threads; 1404 if (nfs_async_stop_sig(vfsp)) { 1405 return (EINTR); 1406 } 1407 rflush(vfsp, cr); 1408 /* 1409 * If there are any active vnodes on this file system, 1410 * then the file system is busy and can't be umounted. 1411 */ 1412 if (check_rtable(vfsp)) { 1413 mutex_enter(&mi->mi_async_lock); 1414 mi->mi_max_threads = omax; 1415 mutex_exit(&mi->mi_async_lock); 1416 return (EBUSY); 1417 } 1418 /* 1419 * The unmount can't fail from now on; stop the manager thread. 1420 */ 1421 nfs_async_manager_stop(vfsp); 1422 /* 1423 * Destroy all rnodes belonging to this file system from the 1424 * rnode hash queues and purge any resources allocated to 1425 * them. 1426 */ 1427 destroy_rtable(vfsp, cr); 1428 if (mi->mi_io_kstats) { 1429 kstat_delete(mi->mi_io_kstats); 1430 mi->mi_io_kstats = NULL; 1431 } 1432 if (mi->mi_ro_kstats) { 1433 kstat_delete(mi->mi_ro_kstats); 1434 mi->mi_ro_kstats = NULL; 1435 } 1436 return (0); 1437 } 1438 1439 /* 1440 * find root of nfs 1441 */ 1442 static int 1443 nfs_root(vfs_t *vfsp, vnode_t **vpp) 1444 { 1445 mntinfo_t *mi; 1446 vnode_t *vp; 1447 servinfo_t *svp; 1448 rnode_t *rp; 1449 int error = 0; 1450 1451 mi = VFTOMI(vfsp); 1452 1453 if (nfs_zone() != mi->mi_zone) 1454 return (EPERM); 1455 1456 svp = mi->mi_curr_serv; 1457 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1458 mutex_enter(&svp->sv_lock); 1459 svp->sv_flags &= ~SV_ROOT_STALE; 1460 mutex_exit(&svp->sv_lock); 1461 error = ENOENT; 1462 } 1463 1464 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf, 1465 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1466 1467 /* 1468 * if the SV_ROOT_STALE flag was reset above, reset the 1469 * RSTALE flag if needed and return an error 1470 */ 1471 if (error == ENOENT) { 1472 rp = VTOR(vp); 1473 if (svp && rp->r_flags & RSTALE) { 1474 mutex_enter(&rp->r_statelock); 1475 rp->r_flags &= ~RSTALE; 1476 mutex_exit(&rp->r_statelock); 1477 } 1478 VN_RELE(vp); 1479 return (error); 1480 } 1481 1482 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1483 1484 vp->v_type = mi->mi_type; 1485 1486 *vpp = vp; 1487 1488 return (0); 1489 } 1490 1491 /* 1492 * Get file system statistics. 1493 */ 1494 static int 1495 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1496 { 1497 int error; 1498 mntinfo_t *mi; 1499 struct nfsstatfs fs; 1500 int douprintf; 1501 failinfo_t fi; 1502 vnode_t *vp; 1503 1504 error = nfs_root(vfsp, &vp); 1505 if (error) 1506 return (error); 1507 1508 mi = VFTOMI(vfsp); 1509 douprintf = 1; 1510 fi.vp = vp; 1511 fi.fhp = NULL; /* no need to update, filehandle not copied */ 1512 fi.copyproc = nfscopyfh; 1513 fi.lookupproc = nfslookup; 1514 fi.xattrdirproc = acl_getxattrdir2; 1515 1516 error = rfs2call(mi, RFS_STATFS, 1517 xdr_fhandle, (caddr_t)VTOFH(vp), 1518 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, 1519 &fs.fs_status, 0, &fi); 1520 1521 if (!error) { 1522 error = geterrno(fs.fs_status); 1523 if (!error) { 1524 mutex_enter(&mi->mi_lock); 1525 if (mi->mi_stsize) { 1526 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1527 } else { 1528 mi->mi_stsize = fs.fs_tsize; 1529 mi->mi_curwrite = mi->mi_stsize; 1530 } 1531 mutex_exit(&mi->mi_lock); 1532 sbp->f_bsize = fs.fs_bsize; 1533 sbp->f_frsize = fs.fs_bsize; 1534 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks; 1535 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree; 1536 /* 1537 * Some servers may return negative available 1538 * block counts. They may do this because they 1539 * calculate the number of available blocks by 1540 * subtracting the number of used blocks from 1541 * the total number of blocks modified by the 1542 * minimum free value. For example, if the 1543 * minumum free percentage is 10 and the file 1544 * system is greater than 90 percent full, then 1545 * 90 percent of the total blocks minus the 1546 * actual number of used blocks may be a 1547 * negative number. 1548 * 1549 * In this case, we need to sign extend the 1550 * negative number through the assignment from 1551 * the 32 bit bavail count to the 64 bit bavail 1552 * count. 1553 * 1554 * We need to be able to discern between there 1555 * just being a lot of available blocks on the 1556 * file system and the case described above. 1557 * We are making the assumption that it does 1558 * not make sense to have more available blocks 1559 * than there are free blocks. So, if there 1560 * are, then we treat the number as if it were 1561 * a negative number and arrange to have it 1562 * sign extended when it is converted from 32 1563 * bits to 64 bits. 1564 */ 1565 if (fs.fs_bavail <= fs.fs_bfree) 1566 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail; 1567 else { 1568 sbp->f_bavail = 1569 (fsblkcnt64_t)((long)fs.fs_bavail); 1570 } 1571 sbp->f_files = (fsfilcnt64_t)-1; 1572 sbp->f_ffree = (fsfilcnt64_t)-1; 1573 sbp->f_favail = (fsfilcnt64_t)-1; 1574 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1575 (void) strncpy(sbp->f_basetype, 1576 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1577 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1578 sbp->f_namemax = (uint32_t)-1; 1579 } else { 1580 PURGE_STALE_FH(error, vp, CRED()); 1581 } 1582 } 1583 1584 VN_RELE(vp); 1585 1586 return (error); 1587 } 1588 1589 static kmutex_t nfs_syncbusy; 1590 1591 /* 1592 * Flush dirty nfs files for file system vfsp. 1593 * If vfsp == NULL, all nfs files are flushed. 1594 */ 1595 /* ARGSUSED */ 1596 static int 1597 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 1598 { 1599 /* 1600 * Cross-zone calls are OK here, since this translates to a 1601 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1602 */ 1603 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) { 1604 rflush(vfsp, cr); 1605 mutex_exit(&nfs_syncbusy); 1606 } 1607 return (0); 1608 } 1609 1610 /* ARGSUSED */ 1611 static int 1612 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1613 { 1614 int error; 1615 vnode_t *vp; 1616 struct vattr va; 1617 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp; 1618 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id; 1619 1620 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1621 return (EPERM); 1622 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) { 1623 #ifdef DEBUG 1624 zcmn_err(zoneid, CE_WARN, 1625 "nfs_vget: bad fid len, %d/%d", fidp->fid_len, 1626 (int)(sizeof (*nfsfidp) - sizeof (short))); 1627 #endif 1628 *vpp = NULL; 1629 return (ESTALE); 1630 } 1631 1632 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp, 1633 gethrtime(), CRED(), NULL, NULL); 1634 1635 if (VTOR(vp)->r_flags & RSTALE) { 1636 VN_RELE(vp); 1637 *vpp = NULL; 1638 return (ENOENT); 1639 } 1640 1641 if (vp->v_type == VNON) { 1642 va.va_mask = AT_ALL; 1643 error = nfsgetattr(vp, &va, CRED()); 1644 if (error) { 1645 VN_RELE(vp); 1646 *vpp = NULL; 1647 return (error); 1648 } 1649 vp->v_type = va.va_type; 1650 } 1651 1652 *vpp = vp; 1653 1654 return (0); 1655 } 1656 1657 /* ARGSUSED */ 1658 static int 1659 nfs_mountroot(vfs_t *vfsp, whymountroot_t why) 1660 { 1661 vnode_t *rtvp; 1662 char root_hostname[SYS_NMLN+1]; 1663 struct servinfo *svp; 1664 int error; 1665 int vfsflags; 1666 size_t size; 1667 char *root_path; 1668 struct pathname pn; 1669 char *name; 1670 cred_t *cr; 1671 struct nfs_args args; /* nfs mount arguments */ 1672 static char token[10]; 1673 1674 bzero(&args, sizeof (args)); 1675 1676 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1677 clkset(-1L); /* hack for now - until we get time svc? */ 1678 1679 if (why == ROOT_REMOUNT) { 1680 /* 1681 * Shouldn't happen. 1682 */ 1683 panic("nfs_mountroot: why == ROOT_REMOUNT"); 1684 } 1685 1686 if (why == ROOT_UNMOUNT) { 1687 /* 1688 * Nothing to do for NFS. 1689 */ 1690 return (0); 1691 } 1692 1693 /* 1694 * why == ROOT_INIT 1695 */ 1696 1697 name = token; 1698 *name = 0; 1699 getfsname("root", name, sizeof (token)); 1700 1701 pn_alloc(&pn); 1702 root_path = pn.pn_path; 1703 1704 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1705 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1706 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1707 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1708 1709 /* 1710 * Get server address 1711 * Get the root fhandle 1712 * Get server's transport 1713 * Get server's hostname 1714 * Get options 1715 */ 1716 args.addr = &svp->sv_addr; 1717 args.fh = (char *)&svp->sv_fhandle.fh_buf; 1718 args.knconf = svp->sv_knconf; 1719 args.hostname = root_hostname; 1720 vfsflags = 0; 1721 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, 1722 &args, &vfsflags)) { 1723 nfs_cmn_err(error, CE_WARN, 1724 "nfs_mountroot: mount_root failed: %m"); 1725 sv_free(svp); 1726 pn_free(&pn); 1727 return (error); 1728 } 1729 svp->sv_fhandle.fh_len = NFS_FHSIZE; 1730 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1731 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1732 (void) strcpy(svp->sv_hostname, root_hostname); 1733 1734 /* 1735 * Force root partition to always be mounted with AUTH_UNIX for now 1736 */ 1737 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1738 svp->sv_secdata->secmod = AUTH_UNIX; 1739 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1740 svp->sv_secdata->data = NULL; 1741 1742 cr = crgetcred(); 1743 rtvp = NULL; 1744 1745 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1746 1747 crfree(cr); 1748 1749 if (error) { 1750 pn_free(&pn); 1751 sv_free(svp); 1752 return (error); 1753 } 1754 1755 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1756 if (error) { 1757 nfs_cmn_err(error, CE_WARN, 1758 "nfs_mountroot: invalid root mount options"); 1759 pn_free(&pn); 1760 goto errout; 1761 } 1762 1763 (void) vfs_lock_wait(vfsp); 1764 vfs_add(NULL, vfsp, vfsflags); 1765 vfs_unlock(vfsp); 1766 1767 size = strlen(svp->sv_hostname); 1768 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1769 rootfs.bo_name[size] = ':'; 1770 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1771 1772 pn_free(&pn); 1773 1774 errout: 1775 if (error) { 1776 sv_free(svp); 1777 nfs_async_stop(vfsp); 1778 nfs_async_manager_stop(vfsp); 1779 } 1780 1781 if (rtvp != NULL) 1782 VN_RELE(rtvp); 1783 1784 return (error); 1785 } 1786 1787 /* 1788 * Initialization routine for VFS routines. Should only be called once 1789 */ 1790 int 1791 nfs_vfsinit(void) 1792 { 1793 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1794 return (0); 1795 } 1796 1797 void 1798 nfs_vfsfini(void) 1799 { 1800 mutex_destroy(&nfs_syncbusy); 1801 } 1802 1803 void 1804 nfs_freevfs(vfs_t *vfsp) 1805 { 1806 mntinfo_t *mi; 1807 servinfo_t *svp; 1808 1809 /* free up the resources */ 1810 mi = VFTOMI(vfsp); 1811 pathconf_rele(mi); 1812 svp = mi->mi_servers; 1813 mi->mi_servers = mi->mi_curr_serv = NULL; 1814 sv_free(svp); 1815 1816 /* 1817 * By this time we should have already deleted the 1818 * mi kstats in the unmount code. If they are still around 1819 * somethings wrong 1820 */ 1821 ASSERT(mi->mi_io_kstats == NULL); 1822 nfs_free_mi(mi); 1823 } 1824