1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 26 * All rights reserved. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/cred.h> 33 #include <sys/vfs.h> 34 #include <sys/vfs_opreg.h> 35 #include <sys/vnode.h> 36 #include <sys/pathname.h> 37 #include <sys/sysmacros.h> 38 #include <sys/kmem.h> 39 #include <sys/mkdev.h> 40 #include <sys/mount.h> 41 #include <sys/mntent.h> 42 #include <sys/statvfs.h> 43 #include <sys/errno.h> 44 #include <sys/debug.h> 45 #include <sys/cmn_err.h> 46 #include <sys/utsname.h> 47 #include <sys/bootconf.h> 48 #include <sys/modctl.h> 49 #include <sys/acl.h> 50 #include <sys/flock.h> 51 #include <sys/policy.h> 52 #include <sys/zone.h> 53 #include <sys/class.h> 54 #include <sys/socket.h> 55 #include <sys/netconfig.h> 56 #include <sys/mntent.h> 57 #include <sys/tsol/label.h> 58 59 #include <rpc/types.h> 60 #include <rpc/auth.h> 61 #include <rpc/clnt.h> 62 63 #include <nfs/nfs.h> 64 #include <nfs/nfs_clnt.h> 65 #include <nfs/rnode.h> 66 #include <nfs/mount.h> 67 #include <nfs/nfs_acl.h> 68 69 #include <fs/fs_subr.h> 70 71 /* 72 * From rpcsec module (common/rpcsec). 73 */ 74 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 75 extern void sec_clnt_freeinfo(struct sec_data *); 76 77 static int pathconf_copyin(struct nfs_args *, struct pathcnf *); 78 static int pathconf_get(struct mntinfo *, struct nfs_args *); 79 static void pathconf_rele(struct mntinfo *); 80 81 /* 82 * The order and contents of this structure must be kept in sync with that of 83 * rfsreqcnt_v2_tmpl in nfs_stats.c 84 */ 85 static char *rfsnames_v2[] = { 86 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read", 87 "unused", "write", "create", "remove", "rename", "link", "symlink", 88 "mkdir", "rmdir", "readdir", "fsstat" 89 }; 90 91 /* 92 * This table maps from NFS protocol number into call type. 93 * Zero means a "Lookup" type call 94 * One means a "Read" type call 95 * Two means a "Write" type call 96 * This is used to select a default time-out. 97 */ 98 static uchar_t call_type_v2[] = { 99 0, 0, 1, 0, 0, 0, 1, 100 0, 2, 2, 2, 2, 2, 2, 101 2, 2, 1, 0 102 }; 103 104 /* 105 * Similar table, but to determine which timer to use 106 * (only real reads and writes!) 107 */ 108 static uchar_t timer_type_v2[] = { 109 0, 0, 0, 0, 0, 0, 1, 110 0, 2, 0, 0, 0, 0, 0, 111 0, 0, 1, 0 112 }; 113 114 /* 115 * This table maps from NFS protocol number into a call type 116 * for the semisoft mount option. 117 * Zero means do not repeat operation. 118 * One means repeat. 119 */ 120 static uchar_t ss_call_type_v2[] = { 121 0, 0, 1, 0, 0, 0, 0, 122 0, 1, 1, 1, 1, 1, 1, 123 1, 1, 0, 0 124 }; 125 126 /* 127 * nfs vfs operations. 128 */ 129 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 130 static int nfs_unmount(vfs_t *, int, cred_t *); 131 static int nfs_root(vfs_t *, vnode_t **); 132 static int nfs_statvfs(vfs_t *, struct statvfs64 *); 133 static int nfs_sync(vfs_t *, short, cred_t *); 134 static int nfs_vget(vfs_t *, vnode_t **, fid_t *); 135 static int nfs_mountroot(vfs_t *, whymountroot_t); 136 static void nfs_freevfs(vfs_t *); 137 138 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *, 139 int, cred_t *, zone_t *); 140 141 /* 142 * Initialize the vfs structure 143 */ 144 145 int nfsfstyp; 146 vfsops_t *nfs_vfsops; 147 148 /* 149 * Debug variable to check for rdma based 150 * transport startup and cleanup. Controlled 151 * through /etc/system. Off by default. 152 */ 153 int rdma_debug = 0; 154 155 int 156 nfsinit(int fstyp, char *name) 157 { 158 static const fs_operation_def_t nfs_vfsops_template[] = { 159 VFSNAME_MOUNT, { .vfs_mount = nfs_mount }, 160 VFSNAME_UNMOUNT, { .vfs_unmount = nfs_unmount }, 161 VFSNAME_ROOT, { .vfs_root = nfs_root }, 162 VFSNAME_STATVFS, { .vfs_statvfs = nfs_statvfs }, 163 VFSNAME_SYNC, { .vfs_sync = nfs_sync }, 164 VFSNAME_VGET, { .vfs_vget = nfs_vget }, 165 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs_mountroot }, 166 VFSNAME_FREEVFS, { .vfs_freevfs = nfs_freevfs }, 167 NULL, NULL 168 }; 169 int error; 170 171 error = vfs_setfsops(fstyp, nfs_vfsops_template, &nfs_vfsops); 172 if (error != 0) { 173 zcmn_err(GLOBAL_ZONEID, CE_WARN, 174 "nfsinit: bad vfs ops template"); 175 return (error); 176 } 177 178 error = vn_make_ops(name, nfs_vnodeops_template, &nfs_vnodeops); 179 if (error != 0) { 180 (void) vfs_freevfsops_by_type(fstyp); 181 zcmn_err(GLOBAL_ZONEID, CE_WARN, 182 "nfsinit: bad vnode ops template"); 183 return (error); 184 } 185 186 187 nfsfstyp = fstyp; 188 189 return (0); 190 } 191 192 void 193 nfsfini(void) 194 { 195 } 196 197 static void 198 nfs_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 199 { 200 201 if (fh) 202 kmem_free(fh, sizeof (*fh)); 203 204 if (nargs->pathconf) { 205 kmem_free(nargs->pathconf, sizeof (struct pathcnf)); 206 nargs->pathconf = NULL; 207 } 208 209 if (nargs->knconf) { 210 if (nargs->knconf->knc_protofmly) 211 kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE); 212 if (nargs->knconf->knc_proto) 213 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 214 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 215 nargs->knconf = NULL; 216 } 217 218 if (nargs->fh) { 219 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 220 nargs->fh = NULL; 221 } 222 223 if (nargs->hostname) { 224 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 225 nargs->hostname = NULL; 226 } 227 228 if (nargs->addr) { 229 if (nargs->addr->buf) { 230 ASSERT(nargs->addr->len); 231 kmem_free(nargs->addr->buf, nargs->addr->len); 232 } 233 kmem_free(nargs->addr, sizeof (struct netbuf)); 234 nargs->addr = NULL; 235 } 236 237 if (nargs->syncaddr) { 238 ASSERT(nargs->syncaddr->len); 239 if (nargs->syncaddr->buf) { 240 ASSERT(nargs->syncaddr->len); 241 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 242 } 243 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 244 nargs->syncaddr = NULL; 245 } 246 247 if (nargs->netname) { 248 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 249 nargs->netname = NULL; 250 } 251 252 if (nargs->nfs_ext_u.nfs_extA.secdata) { 253 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata); 254 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 255 } 256 } 257 258 static int 259 nfs_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 260 { 261 262 int error; 263 size_t nlen; /* length of netname */ 264 size_t hlen; /* length of hostname */ 265 char netname[MAXNETNAMELEN+1]; /* server's netname */ 266 struct netbuf addr; /* server's address */ 267 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 268 struct knetconfig *knconf; /* transport knetconfig structure */ 269 struct sec_data *secdata = NULL; /* security data */ 270 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 271 STRUCT_DECL(knetconfig, knconf_tmp); 272 STRUCT_DECL(netbuf, addr_tmp); 273 int flags; 274 struct pathcnf *pc; /* Pathconf */ 275 char *p, *pf; 276 char *userbufptr; 277 278 279 bzero(nargs, sizeof (*nargs)); 280 281 STRUCT_INIT(args, get_udatamodel()); 282 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 283 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args)))) 284 return (EFAULT); 285 286 nargs->wsize = STRUCT_FGET(args, wsize); 287 nargs->rsize = STRUCT_FGET(args, rsize); 288 nargs->timeo = STRUCT_FGET(args, timeo); 289 nargs->retrans = STRUCT_FGET(args, retrans); 290 nargs->acregmin = STRUCT_FGET(args, acregmin); 291 nargs->acregmax = STRUCT_FGET(args, acregmax); 292 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 293 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 294 295 flags = STRUCT_FGET(args, flags); 296 nargs->flags = flags; 297 298 299 addr.buf = NULL; 300 syncaddr.buf = NULL; 301 302 /* 303 * Allocate space for a knetconfig structure and 304 * its strings and copy in from user-land. 305 */ 306 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 307 STRUCT_INIT(knconf_tmp, get_udatamodel()); 308 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 309 STRUCT_SIZE(knconf_tmp))) { 310 kmem_free(knconf, sizeof (*knconf)); 311 return (EFAULT); 312 } 313 314 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 315 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 316 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 317 if (get_udatamodel() != DATAMODEL_LP64) { 318 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 319 } else { 320 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 321 } 322 323 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 324 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 325 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 326 if (error) { 327 kmem_free(pf, KNC_STRSIZE); 328 kmem_free(p, KNC_STRSIZE); 329 kmem_free(knconf, sizeof (*knconf)); 330 return (error); 331 } 332 333 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 334 if (error) { 335 kmem_free(pf, KNC_STRSIZE); 336 kmem_free(p, KNC_STRSIZE); 337 kmem_free(knconf, sizeof (*knconf)); 338 return (error); 339 } 340 341 342 knconf->knc_protofmly = pf; 343 knconf->knc_proto = p; 344 345 nargs->knconf = knconf; 346 347 /* Copyin pathconf if there is one */ 348 if (STRUCT_FGETP(args, pathconf) != NULL) { 349 pc = kmem_alloc(sizeof (*pc), KM_SLEEP); 350 error = pathconf_copyin(STRUCT_BUF(args), pc); 351 nargs->pathconf = pc; 352 if (error) 353 goto errout; 354 } 355 356 /* 357 * Get server address 358 */ 359 STRUCT_INIT(addr_tmp, get_udatamodel()); 360 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 361 STRUCT_SIZE(addr_tmp))) { 362 error = EFAULT; 363 goto errout; 364 } 365 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 366 userbufptr = STRUCT_FGETP(addr_tmp, buf); 367 addr.len = STRUCT_FGET(addr_tmp, len); 368 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 369 addr.maxlen = addr.len; 370 if (copyin(userbufptr, addr.buf, addr.len)) { 371 kmem_free(addr.buf, addr.len); 372 error = EFAULT; 373 goto errout; 374 } 375 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 376 377 /* 378 * Get the root fhandle 379 */ 380 381 if (copyin(STRUCT_FGETP(args, fh), &fh->fh_buf, NFS_FHSIZE)) { 382 error = EFAULT; 383 goto errout; 384 } 385 fh->fh_len = NFS_FHSIZE; 386 387 /* 388 * Get server's hostname 389 */ 390 if (flags & NFSMNT_HOSTNAME) { 391 error = copyinstr(STRUCT_FGETP(args, hostname), netname, 392 sizeof (netname), &hlen); 393 if (error) 394 goto errout; 395 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 396 (void) strcpy(nargs->hostname, netname); 397 398 } else { 399 nargs->hostname = NULL; 400 } 401 402 403 /* 404 * If there are syncaddr and netname data, load them in. This is 405 * to support data needed for NFSV4 when AUTH_DH is the negotiated 406 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 407 */ 408 netname[0] = '\0'; 409 if (flags & NFSMNT_SECURE) { 410 if (STRUCT_FGETP(args, syncaddr) == NULL) { 411 error = EINVAL; 412 goto errout; 413 } 414 /* get syncaddr */ 415 STRUCT_INIT(addr_tmp, get_udatamodel()); 416 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 417 STRUCT_SIZE(addr_tmp))) { 418 error = EINVAL; 419 goto errout; 420 } 421 userbufptr = STRUCT_FGETP(addr_tmp, buf); 422 syncaddr.len = STRUCT_FGET(addr_tmp, len); 423 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 424 syncaddr.maxlen = syncaddr.len; 425 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 426 kmem_free(syncaddr.buf, syncaddr.len); 427 error = EFAULT; 428 goto errout; 429 } 430 431 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 432 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 433 434 ASSERT(STRUCT_FGETP(args, netname)); 435 if (copyinstr(STRUCT_FGETP(args, netname), netname, 436 sizeof (netname), &nlen)) { 437 error = EFAULT; 438 goto errout; 439 } 440 441 netname[nlen] = '\0'; 442 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 443 (void) strcpy(nargs->netname, netname); 444 } 445 446 /* 447 * Get the extention data which has the security data structure. 448 * This includes data for AUTH_SYS as well. 449 */ 450 if (flags & NFSMNT_NEWARGS) { 451 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 452 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 453 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 454 /* 455 * Indicating the application is using the new 456 * sec_data structure to pass in the security 457 * data. 458 */ 459 if (STRUCT_FGETP(args, 460 nfs_ext_u.nfs_extA.secdata) != NULL) { 461 error = sec_clnt_loadinfo( 462 (struct sec_data *)STRUCT_FGETP(args, 463 nfs_ext_u.nfs_extA.secdata), &secdata, 464 get_udatamodel()); 465 } 466 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 467 } 468 } 469 470 if (error) 471 goto errout; 472 473 /* 474 * Failover support: 475 * 476 * We may have a linked list of nfs_args structures, 477 * which means the user is looking for failover. If 478 * the mount is either not "read-only" or "soft", 479 * we want to bail out with EINVAL. 480 */ 481 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 482 nargs->nfs_ext_u.nfs_extB.next = 483 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 484 485 errout: 486 if (error) 487 nfs_free_args(nargs, fh); 488 489 return (error); 490 } 491 492 493 /* 494 * nfs mount vfsop 495 * Set up mount info record and attach it to vfs struct. 496 */ 497 static int 498 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 499 { 500 char *data = uap->dataptr; 501 int error; 502 vnode_t *rtvp; /* the server's root */ 503 mntinfo_t *mi; /* mount info, pointed at by vfs */ 504 size_t nlen; /* length of netname */ 505 struct knetconfig *knconf; /* transport knetconfig structure */ 506 struct knetconfig *rdma_knconf; /* rdma transport structure */ 507 rnode_t *rp; 508 struct servinfo *svp; /* nfs server info */ 509 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 510 struct servinfo *svp_head; /* first nfs server info */ 511 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */ 512 struct sec_data *secdata; /* security data */ 513 struct nfs_args *args = NULL; 514 int flags, addr_type; 515 zone_t *zone = nfs_zone(); 516 zone_t *mntzone = NULL; 517 nfs_fhandle *fhandle = NULL; 518 519 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 520 return (error); 521 522 if (mvp->v_type != VDIR) 523 return (ENOTDIR); 524 525 /* 526 * get arguments 527 * 528 * nfs_args is now versioned and is extensible, so 529 * uap->datalen might be different from sizeof (args) 530 * in a compatible situation. 531 */ 532 more: 533 534 if (!(uap->flags & MS_SYSSPACE)) { 535 if (args == NULL) 536 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 537 else { 538 nfs_free_args(args, fhandle); 539 fhandle = NULL; 540 } 541 if (fhandle == NULL) 542 fhandle = kmem_zalloc(sizeof (nfs_fhandle), KM_SLEEP); 543 error = nfs_copyin(data, uap->datalen, args, fhandle); 544 if (error) { 545 if (args) 546 kmem_free(args, sizeof (*args)); 547 return (error); 548 } 549 } else { 550 args = (struct nfs_args *)data; 551 fhandle = (nfs_fhandle *)args->fh; 552 } 553 554 555 flags = args->flags; 556 557 if (uap->flags & MS_REMOUNT) { 558 size_t n; 559 char name[FSTYPSZ]; 560 561 if (uap->flags & MS_SYSSPACE) 562 error = copystr(uap->fstype, name, FSTYPSZ, &n); 563 else 564 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 565 566 if (error) { 567 if (error == ENAMETOOLONG) 568 return (EINVAL); 569 return (error); 570 } 571 572 573 /* 574 * This check is to ensure that the request is a 575 * genuine nfs remount request. 576 */ 577 578 if (strncmp(name, "nfs", 3) != 0) 579 return (EINVAL); 580 581 /* 582 * If the request changes the locking type, disallow the 583 * remount, 584 * because it's questionable whether we can transfer the 585 * locking state correctly. 586 * 587 * Remounts need to save the pathconf information. 588 * Part of the infamous static kludge. 589 */ 590 591 if ((mi = VFTOMI(vfsp)) != NULL) { 592 uint_t new_mi_llock; 593 uint_t old_mi_llock; 594 595 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 596 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 597 if (old_mi_llock != new_mi_llock) 598 return (EBUSY); 599 } 600 error = pathconf_get((struct mntinfo *)vfsp->vfs_data, args); 601 602 if (!(uap->flags & MS_SYSSPACE)) { 603 nfs_free_args(args, fhandle); 604 kmem_free(args, sizeof (*args)); 605 } 606 607 return (error); 608 } 609 610 mutex_enter(&mvp->v_lock); 611 if (!(uap->flags & MS_OVERLAY) && 612 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 613 mutex_exit(&mvp->v_lock); 614 if (!(uap->flags & MS_SYSSPACE)) { 615 nfs_free_args(args, fhandle); 616 kmem_free(args, sizeof (*args)); 617 } 618 return (EBUSY); 619 } 620 mutex_exit(&mvp->v_lock); 621 622 /* make sure things are zeroed for errout: */ 623 rtvp = NULL; 624 mi = NULL; 625 secdata = NULL; 626 627 /* 628 * A valid knetconfig structure is required. 629 */ 630 if (!(flags & NFSMNT_KNCONF)) { 631 if (!(uap->flags & MS_SYSSPACE)) { 632 nfs_free_args(args, fhandle); 633 kmem_free(args, sizeof (*args)); 634 } 635 return (EINVAL); 636 } 637 638 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 639 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 640 if (!(uap->flags & MS_SYSSPACE)) { 641 nfs_free_args(args, fhandle); 642 kmem_free(args, sizeof (*args)); 643 } 644 return (EINVAL); 645 } 646 647 648 /* 649 * Allocate a servinfo struct. 650 */ 651 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 652 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 653 if (svp_tail) { 654 svp_2ndlast = svp_tail; 655 svp_tail->sv_next = svp; 656 } else { 657 svp_head = svp; 658 svp_2ndlast = svp; 659 } 660 661 svp_tail = svp; 662 663 /* 664 * Get knetconfig and server address 665 */ 666 svp->sv_knconf = args->knconf; 667 args->knconf = NULL; 668 669 if (args->addr == NULL || args->addr->buf == NULL) { 670 error = EINVAL; 671 goto errout; 672 } 673 674 svp->sv_addr.maxlen = args->addr->maxlen; 675 svp->sv_addr.len = args->addr->len; 676 svp->sv_addr.buf = args->addr->buf; 677 args->addr->buf = NULL; 678 679 /* 680 * Get the root fhandle 681 */ 682 ASSERT(fhandle); 683 684 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 685 svp->sv_fhandle.fh_len = fhandle->fh_len; 686 687 /* 688 * Get server's hostname 689 */ 690 if (flags & NFSMNT_HOSTNAME) { 691 if (args->hostname == NULL) { 692 error = EINVAL; 693 goto errout; 694 } 695 svp->sv_hostnamelen = strlen(args->hostname) + 1; 696 svp->sv_hostname = args->hostname; 697 args->hostname = NULL; 698 } else { 699 char *p = "unknown-host"; 700 svp->sv_hostnamelen = strlen(p) + 1; 701 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 702 (void) strcpy(svp->sv_hostname, p); 703 } 704 705 706 /* 707 * RDMA MOUNT SUPPORT FOR NFS v2: 708 * Establish, is it possible to use RDMA, if so overload the 709 * knconf with rdma specific knconf and free the orignal. 710 */ 711 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 712 /* 713 * Determine the addr type for RDMA, IPv4 or v6. 714 */ 715 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 716 addr_type = AF_INET; 717 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 718 addr_type = AF_INET6; 719 720 if (rdma_reachable(addr_type, &svp->sv_addr, 721 &rdma_knconf) == 0) { 722 /* 723 * If successful, hijack, the orignal knconf and 724 * replace with a new one, depending on the flags. 725 */ 726 svp->sv_origknconf = svp->sv_knconf; 727 svp->sv_knconf = rdma_knconf; 728 knconf = rdma_knconf; 729 } else { 730 if (flags & NFSMNT_TRYRDMA) { 731 #ifdef DEBUG 732 if (rdma_debug) 733 zcmn_err(getzoneid(), CE_WARN, 734 "no RDMA onboard, revert\n"); 735 #endif 736 } 737 738 if (flags & NFSMNT_DORDMA) { 739 /* 740 * If proto=rdma is specified and no RDMA 741 * path to this server is avialable then 742 * ditch this server. 743 * This is not included in the mountable 744 * server list or the replica list. 745 * Check if more servers are specified; 746 * Failover case, otherwise bail out of mount. 747 */ 748 if (args->nfs_args_ext == NFS_ARGS_EXTB && 749 args->nfs_ext_u.nfs_extB.next != NULL) { 750 data = (char *) 751 args->nfs_ext_u.nfs_extB.next; 752 if (uap->flags & MS_RDONLY && 753 !(flags & NFSMNT_SOFT)) { 754 if (svp_head->sv_next == NULL) { 755 svp_tail = NULL; 756 svp_2ndlast = NULL; 757 sv_free(svp_head); 758 goto more; 759 } else { 760 svp_tail = svp_2ndlast; 761 svp_2ndlast->sv_next = 762 NULL; 763 sv_free(svp); 764 goto more; 765 } 766 } 767 } else { 768 /* 769 * This is the last server specified 770 * in the nfs_args list passed down 771 * and its not rdma capable. 772 */ 773 if (svp_head->sv_next == NULL) { 774 /* 775 * Is this the only one 776 */ 777 error = EINVAL; 778 #ifdef DEBUG 779 if (rdma_debug) 780 zcmn_err(getzoneid(), 781 CE_WARN, 782 "No RDMA srv"); 783 #endif 784 goto errout; 785 } else { 786 /* 787 * There is list, since some 788 * servers specified before 789 * this passed all requirements 790 */ 791 svp_tail = svp_2ndlast; 792 svp_2ndlast->sv_next = NULL; 793 sv_free(svp); 794 goto proceed; 795 } 796 } 797 } 798 } 799 } 800 801 /* 802 * Get the extention data which has the new security data structure. 803 */ 804 if (flags & NFSMNT_NEWARGS) { 805 switch (args->nfs_args_ext) { 806 case NFS_ARGS_EXTA: 807 case NFS_ARGS_EXTB: 808 /* 809 * Indicating the application is using the new 810 * sec_data structure to pass in the security 811 * data. 812 */ 813 secdata = args->nfs_ext_u.nfs_extA.secdata; 814 if (secdata == NULL) { 815 error = EINVAL; 816 } else { 817 /* 818 * Need to validate the flavor here if 819 * sysspace, userspace was already 820 * validate from the nfs_copyin function. 821 */ 822 switch (secdata->rpcflavor) { 823 case AUTH_NONE: 824 case AUTH_UNIX: 825 case AUTH_LOOPBACK: 826 case AUTH_DES: 827 case RPCSEC_GSS: 828 break; 829 default: 830 error = EINVAL; 831 goto errout; 832 } 833 } 834 args->nfs_ext_u.nfs_extA.secdata = NULL; 835 break; 836 837 default: 838 error = EINVAL; 839 break; 840 } 841 } else if (flags & NFSMNT_SECURE) { 842 /* 843 * Keep this for backward compatibility to support 844 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 845 */ 846 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 847 error = EINVAL; 848 goto errout; 849 } 850 851 /* 852 * get time sync address. 853 */ 854 if (args->syncaddr == NULL) { 855 error = EFAULT; 856 goto errout; 857 } 858 859 /* 860 * Move security related data to the sec_data structure. 861 */ 862 { 863 dh_k4_clntdata_t *data; 864 char *pf, *p; 865 866 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 867 if (flags & NFSMNT_RPCTIMESYNC) 868 secdata->flags |= AUTH_F_RPCTIMESYNC; 869 data = kmem_alloc(sizeof (*data), KM_SLEEP); 870 bcopy(args->syncaddr, &data->syncaddr, 871 sizeof (*args->syncaddr)); 872 873 874 /* 875 * duplicate the knconf information for the 876 * new opaque data. 877 */ 878 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 879 *data->knconf = *knconf; 880 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 881 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 882 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 883 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 884 data->knconf->knc_protofmly = pf; 885 data->knconf->knc_proto = p; 886 887 /* move server netname to the sec_data structure */ 888 nlen = strlen(args->hostname) + 1; 889 if (nlen != 0) { 890 data->netname = kmem_alloc(nlen, KM_SLEEP); 891 bcopy(args->hostname, data->netname, nlen); 892 data->netnamelen = (int)nlen; 893 } 894 secdata->secmod = secdata->rpcflavor = AUTH_DES; 895 secdata->data = (caddr_t)data; 896 } 897 } else { 898 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 899 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 900 secdata->data = NULL; 901 } 902 svp->sv_secdata = secdata; 903 904 /* 905 * See bug 1180236. 906 * If mount secure failed, we will fall back to AUTH_NONE 907 * and try again. nfs3rootvp() will turn this back off. 908 * 909 * The NFS Version 2 mount uses GETATTR and STATFS procedures. 910 * The server does not care if these procedures have the proper 911 * authentication flavor, so if mount retries using AUTH_NONE 912 * that does not require a credential setup for root then the 913 * automounter would work without requiring root to be 914 * keylogged into AUTH_DES. 915 */ 916 if (secdata->rpcflavor != AUTH_UNIX && 917 secdata->rpcflavor != AUTH_LOOPBACK) 918 secdata->flags |= AUTH_F_TRYNONE; 919 920 /* 921 * Failover support: 922 * 923 * We may have a linked list of nfs_args structures, 924 * which means the user is looking for failover. If 925 * the mount is either not "read-only" or "soft", 926 * we want to bail out with EINVAL. 927 */ 928 if (args->nfs_args_ext == NFS_ARGS_EXTB && 929 args->nfs_ext_u.nfs_extB.next != NULL) { 930 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 931 data = (char *)args->nfs_ext_u.nfs_extB.next; 932 goto more; 933 } 934 error = EINVAL; 935 goto errout; 936 } 937 938 /* 939 * Determine the zone we're being mounted into. 940 */ 941 zone_hold(mntzone = zone); /* start with this assumption */ 942 if (getzoneid() == GLOBAL_ZONEID) { 943 zone_rele(mntzone); 944 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 945 ASSERT(mntzone != NULL); 946 if (mntzone != zone) { 947 error = EBUSY; 948 goto errout; 949 } 950 } 951 952 if (is_system_labeled()) { 953 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 954 svp->sv_knconf, cr); 955 956 if (error > 0) 957 goto errout; 958 959 if (error == -1) { 960 /* change mount to read-only to prevent write-down */ 961 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 962 } 963 } 964 965 /* 966 * Stop the mount from going any further if the zone is going away. 967 */ 968 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 969 error = EBUSY; 970 goto errout; 971 } 972 973 /* 974 * Get root vnode. 975 */ 976 proceed: 977 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 978 979 if (error) 980 goto errout; 981 982 /* 983 * Set option fields in the mount info record 984 */ 985 mi = VTOMI(rtvp); 986 987 if (svp_head->sv_next) 988 mi->mi_flags |= MI_LLOCK; 989 990 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 991 if (!error) { 992 /* static pathconf kludge */ 993 error = pathconf_get(mi, args); 994 } 995 996 errout: 997 if (rtvp != NULL) { 998 if (error) { 999 rp = VTOR(rtvp); 1000 if (rp->r_flags & RHASHED) 1001 rp_rmhash(rp); 1002 } 1003 VN_RELE(rtvp); 1004 } 1005 1006 if (error) { 1007 sv_free(svp_head); 1008 if (mi != NULL) { 1009 nfs_async_stop(vfsp); 1010 nfs_async_manager_stop(vfsp); 1011 if (mi->mi_io_kstats) { 1012 kstat_delete(mi->mi_io_kstats); 1013 mi->mi_io_kstats = NULL; 1014 } 1015 if (mi->mi_ro_kstats) { 1016 kstat_delete(mi->mi_ro_kstats); 1017 mi->mi_ro_kstats = NULL; 1018 } 1019 nfs_free_mi(mi); 1020 } 1021 } 1022 1023 if (!(uap->flags & MS_SYSSPACE)) { 1024 nfs_free_args(args, fhandle); 1025 kmem_free(args, sizeof (*args)); 1026 } 1027 1028 if (mntzone != NULL) 1029 zone_rele(mntzone); 1030 1031 return (error); 1032 } 1033 1034 /* 1035 * The pathconf information is kept on a linked list of kmem_alloc'ed 1036 * structs. We search the list & add a new struct iff there is no other 1037 * struct with the same information. 1038 * See sys/pathconf.h for ``the rest of the story.'' 1039 */ 1040 static struct pathcnf *allpc = NULL; 1041 1042 static int 1043 pathconf_copyin(struct nfs_args *args, struct pathcnf *pc) 1044 { 1045 STRUCT_DECL(pathcnf, pc_tmp); 1046 STRUCT_HANDLE(nfs_args, ap); 1047 int i; 1048 model_t model; 1049 1050 model = get_udatamodel(); 1051 STRUCT_INIT(pc_tmp, model); 1052 STRUCT_SET_HANDLE(ap, model, args); 1053 1054 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) && 1055 STRUCT_FGETP(ap, pathconf) != NULL) { 1056 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp), 1057 STRUCT_SIZE(pc_tmp))) 1058 return (EFAULT); 1059 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask))) 1060 return (EINVAL); 1061 1062 pc->pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max); 1063 pc->pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon); 1064 pc->pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input); 1065 pc->pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max); 1066 pc->pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max); 1067 pc->pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf); 1068 pc->pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable); 1069 pc->pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx); 1070 for (i = 0; i < _PC_N; i++) 1071 pc->pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]); 1072 } 1073 return (0); 1074 } 1075 1076 static int 1077 pathconf_get(struct mntinfo *mi, struct nfs_args *args) 1078 { 1079 struct pathcnf *p, *pc; 1080 1081 pc = args->pathconf; 1082 if (mi->mi_pathconf != NULL) { 1083 pathconf_rele(mi); 1084 mi->mi_pathconf = NULL; 1085 } 1086 1087 if (args->flags & NFSMNT_POSIX && args->pathconf != NULL) { 1088 if (_PC_ISSET(_PC_ERROR, pc->pc_mask)) 1089 return (EINVAL); 1090 1091 for (p = allpc; p != NULL; p = p->pc_next) { 1092 if (PCCMP(p, pc) == 0) 1093 break; 1094 } 1095 if (p != NULL) { 1096 mi->mi_pathconf = p; 1097 p->pc_refcnt++; 1098 } else { 1099 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1100 bcopy(pc, p, sizeof (struct pathcnf)); 1101 p->pc_next = allpc; 1102 p->pc_refcnt = 1; 1103 allpc = mi->mi_pathconf = p; 1104 } 1105 } 1106 return (0); 1107 } 1108 1109 /* 1110 * release the static pathconf information 1111 */ 1112 static void 1113 pathconf_rele(struct mntinfo *mi) 1114 { 1115 if (mi->mi_pathconf != NULL) { 1116 if (--mi->mi_pathconf->pc_refcnt == 0) { 1117 struct pathcnf *p; 1118 struct pathcnf *p2; 1119 1120 p2 = p = allpc; 1121 while (p != NULL && p != mi->mi_pathconf) { 1122 p2 = p; 1123 p = p->pc_next; 1124 } 1125 if (p == NULL) { 1126 panic("mi->pathconf"); 1127 /*NOTREACHED*/ 1128 } 1129 if (p == allpc) 1130 allpc = p->pc_next; 1131 else 1132 p2->pc_next = p->pc_next; 1133 kmem_free(p, sizeof (*p)); 1134 mi->mi_pathconf = NULL; 1135 } 1136 } 1137 } 1138 1139 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */ 1140 static ushort_t nfs_max_threads = 8; /* max number of active async threads */ 1141 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */ 1142 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; 1143 1144 static int 1145 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1146 int flags, cred_t *cr, zone_t *zone) 1147 { 1148 vnode_t *rtvp; 1149 mntinfo_t *mi; 1150 dev_t nfs_dev; 1151 struct vattr va; 1152 int error; 1153 rnode_t *rp; 1154 int i; 1155 struct nfs_stats *nfsstatsp; 1156 cred_t *lcr = NULL, *tcr = cr; 1157 1158 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1159 ASSERT(nfsstatsp != NULL); 1160 1161 /* 1162 * Create a mount record and link it to the vfs struct. 1163 */ 1164 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1165 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1166 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1167 mi->mi_flags = MI_ACL | MI_EXTATTR; 1168 if (!(flags & NFSMNT_SOFT)) 1169 mi->mi_flags |= MI_HARD; 1170 if ((flags & NFSMNT_SEMISOFT)) 1171 mi->mi_flags |= MI_SEMISOFT; 1172 if ((flags & NFSMNT_NOPRINT)) 1173 mi->mi_flags |= MI_NOPRINT; 1174 if (flags & NFSMNT_INT) 1175 mi->mi_flags |= MI_INT; 1176 mi->mi_retrans = NFS_RETRIES; 1177 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1178 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1179 mi->mi_timeo = nfs_cots_timeo; 1180 else 1181 mi->mi_timeo = NFS_TIMEO; 1182 mi->mi_prog = NFS_PROGRAM; 1183 mi->mi_vers = NFS_VERSION; 1184 mi->mi_rfsnames = rfsnames_v2; 1185 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr; 1186 mi->mi_call_type = call_type_v2; 1187 mi->mi_ss_call_type = ss_call_type_v2; 1188 mi->mi_timer_type = timer_type_v2; 1189 mi->mi_aclnames = aclnames_v2; 1190 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr; 1191 mi->mi_acl_call_type = acl_call_type_v2; 1192 mi->mi_acl_ss_call_type = acl_ss_call_type_v2; 1193 mi->mi_acl_timer_type = acl_timer_type_v2; 1194 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1195 mi->mi_servers = svp; 1196 mi->mi_curr_serv = svp; 1197 mi->mi_acregmin = SEC2HR(ACREGMIN); 1198 mi->mi_acregmax = SEC2HR(ACREGMAX); 1199 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1200 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1201 1202 if (nfs_dynamic) 1203 mi->mi_flags |= MI_DYNAMIC; 1204 1205 if (flags & NFSMNT_DIRECTIO) 1206 mi->mi_flags |= MI_DIRECTIO; 1207 1208 /* 1209 * Make a vfs struct for nfs. We do this here instead of below 1210 * because rtvp needs a vfs before we can do a getattr on it. 1211 * 1212 * Assign a unique device id to the mount 1213 */ 1214 mutex_enter(&nfs_minor_lock); 1215 do { 1216 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1217 nfs_dev = makedevice(nfs_major, nfs_minor); 1218 } while (vfs_devismounted(nfs_dev)); 1219 mutex_exit(&nfs_minor_lock); 1220 1221 vfsp->vfs_dev = nfs_dev; 1222 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp); 1223 vfsp->vfs_data = (caddr_t)mi; 1224 vfsp->vfs_fstype = nfsfstyp; 1225 vfsp->vfs_bsize = NFS_MAXDATA; 1226 1227 /* 1228 * Initialize fields used to support async putpage operations. 1229 */ 1230 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1231 mi->mi_async_clusters[i] = nfs_async_clusters; 1232 mi->mi_async_init_clusters = nfs_async_clusters; 1233 mi->mi_async_curr[NFS_ASYNC_QUEUE] = 1234 mi->mi_async_curr[NFS_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0]; 1235 mi->mi_max_threads = nfs_max_threads; 1236 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1237 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1238 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE], NULL, CV_DEFAULT, NULL); 1239 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE], NULL, 1240 CV_DEFAULT, NULL); 1241 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1242 1243 mi->mi_vfsp = vfsp; 1244 zone_hold(mi->mi_zone = zone); 1245 nfs_mi_zonelist_add(mi); 1246 1247 /* 1248 * Make the root vnode, use it to get attributes, 1249 * then remake it with the attributes. 1250 */ 1251 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf, 1252 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1253 1254 va.va_mask = AT_ALL; 1255 1256 /* 1257 * If the uid is set then set the creds for secure mounts 1258 * by proxy processes such as automountd. 1259 */ 1260 if (svp->sv_secdata->uid != 0 && 1261 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1262 lcr = crdup(cr); 1263 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1264 tcr = lcr; 1265 } 1266 1267 error = nfsgetattr(rtvp, &va, tcr); 1268 if (error) 1269 goto bad; 1270 rtvp->v_type = va.va_type; 1271 1272 /* 1273 * Poll every server to get the filesystem stats; we're 1274 * only interested in the server's transfer size, and we 1275 * want the minimum. 1276 * 1277 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1278 * which is only for the mount operation. 1279 */ 1280 1281 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize()); 1282 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize()); 1283 1284 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1285 struct nfsstatfs fs; 1286 int douprintf; 1287 1288 douprintf = 1; 1289 mi->mi_curr_serv = svp; 1290 1291 error = rfs2call(mi, RFS_STATFS, xdr_fhandle, 1292 (caddr_t)svp->sv_fhandle.fh_buf, xdr_statfs, (caddr_t)&fs, 1293 tcr, &douprintf, &fs.fs_status, 0, NULL); 1294 if (error) 1295 goto bad; 1296 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1297 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1298 } 1299 mi->mi_curr_serv = mi->mi_servers; 1300 mi->mi_curread = mi->mi_tsize; 1301 mi->mi_curwrite = mi->mi_stsize; 1302 1303 /* 1304 * Start the manager thread responsible for handling async worker 1305 * threads. 1306 */ 1307 VFS_HOLD(vfsp); /* add reference for thread */ 1308 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1309 vfsp, 0, minclsyspri); 1310 ASSERT(mi->mi_manager_thread != NULL); 1311 1312 /* 1313 * Initialize kstats 1314 */ 1315 nfs_mnt_kstat_init(vfsp); 1316 1317 mi->mi_type = rtvp->v_type; 1318 1319 *rtvpp = rtvp; 1320 if (lcr != NULL) 1321 crfree(lcr); 1322 1323 return (0); 1324 bad: 1325 /* 1326 * An error occurred somewhere, need to clean up... 1327 * We need to release our reference to the root vnode and 1328 * destroy the mntinfo struct that we just created. 1329 */ 1330 if (lcr != NULL) 1331 crfree(lcr); 1332 rp = VTOR(rtvp); 1333 if (rp->r_flags & RHASHED) 1334 rp_rmhash(rp); 1335 VN_RELE(rtvp); 1336 nfs_async_stop(vfsp); 1337 nfs_async_manager_stop(vfsp); 1338 if (mi->mi_io_kstats) { 1339 kstat_delete(mi->mi_io_kstats); 1340 mi->mi_io_kstats = NULL; 1341 } 1342 if (mi->mi_ro_kstats) { 1343 kstat_delete(mi->mi_ro_kstats); 1344 mi->mi_ro_kstats = NULL; 1345 } 1346 nfs_free_mi(mi); 1347 *rtvpp = NULL; 1348 return (error); 1349 } 1350 1351 /* 1352 * vfs operations 1353 */ 1354 static int 1355 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1356 { 1357 mntinfo_t *mi; 1358 ushort_t omax; 1359 1360 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1361 return (EPERM); 1362 1363 mi = VFTOMI(vfsp); 1364 if (flag & MS_FORCE) { 1365 1366 vfsp->vfs_flag |= VFS_UNMOUNTED; 1367 1368 /* 1369 * We are about to stop the async manager. 1370 * Let every one know not to schedule any 1371 * more async requests. 1372 */ 1373 mutex_enter(&mi->mi_async_lock); 1374 mi->mi_max_threads = 0; 1375 NFS_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 1376 mutex_exit(&mi->mi_async_lock); 1377 1378 /* 1379 * We need to stop the manager thread explicitly; the worker 1380 * threads can time out and exit on their own. 1381 */ 1382 nfs_async_manager_stop(vfsp); 1383 destroy_rtable(vfsp, cr); 1384 if (mi->mi_io_kstats) { 1385 kstat_delete(mi->mi_io_kstats); 1386 mi->mi_io_kstats = NULL; 1387 } 1388 if (mi->mi_ro_kstats) { 1389 kstat_delete(mi->mi_ro_kstats); 1390 mi->mi_ro_kstats = NULL; 1391 } 1392 return (0); 1393 } 1394 /* 1395 * Wait until all asynchronous putpage operations on 1396 * this file system are complete before flushing rnodes 1397 * from the cache. 1398 */ 1399 omax = mi->mi_max_threads; 1400 if (nfs_async_stop_sig(vfsp)) { 1401 return (EINTR); 1402 } 1403 rflush(vfsp, cr); 1404 /* 1405 * If there are any active vnodes on this file system, 1406 * then the file system is busy and can't be umounted. 1407 */ 1408 if (check_rtable(vfsp)) { 1409 mutex_enter(&mi->mi_async_lock); 1410 mi->mi_max_threads = omax; 1411 mutex_exit(&mi->mi_async_lock); 1412 return (EBUSY); 1413 } 1414 /* 1415 * The unmount can't fail from now on; stop the manager thread. 1416 */ 1417 nfs_async_manager_stop(vfsp); 1418 /* 1419 * Destroy all rnodes belonging to this file system from the 1420 * rnode hash queues and purge any resources allocated to 1421 * them. 1422 */ 1423 destroy_rtable(vfsp, cr); 1424 if (mi->mi_io_kstats) { 1425 kstat_delete(mi->mi_io_kstats); 1426 mi->mi_io_kstats = NULL; 1427 } 1428 if (mi->mi_ro_kstats) { 1429 kstat_delete(mi->mi_ro_kstats); 1430 mi->mi_ro_kstats = NULL; 1431 } 1432 return (0); 1433 } 1434 1435 /* 1436 * find root of nfs 1437 */ 1438 static int 1439 nfs_root(vfs_t *vfsp, vnode_t **vpp) 1440 { 1441 mntinfo_t *mi; 1442 vnode_t *vp; 1443 servinfo_t *svp; 1444 rnode_t *rp; 1445 int error = 0; 1446 1447 mi = VFTOMI(vfsp); 1448 1449 if (nfs_zone() != mi->mi_zone) 1450 return (EPERM); 1451 1452 svp = mi->mi_curr_serv; 1453 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1454 mutex_enter(&svp->sv_lock); 1455 svp->sv_flags &= ~SV_ROOT_STALE; 1456 mutex_exit(&svp->sv_lock); 1457 error = ENOENT; 1458 } 1459 1460 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf, 1461 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1462 1463 /* 1464 * if the SV_ROOT_STALE flag was reset above, reset the 1465 * RSTALE flag if needed and return an error 1466 */ 1467 if (error == ENOENT) { 1468 rp = VTOR(vp); 1469 if (svp && rp->r_flags & RSTALE) { 1470 mutex_enter(&rp->r_statelock); 1471 rp->r_flags &= ~RSTALE; 1472 mutex_exit(&rp->r_statelock); 1473 } 1474 VN_RELE(vp); 1475 return (error); 1476 } 1477 1478 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1479 1480 vp->v_type = mi->mi_type; 1481 1482 *vpp = vp; 1483 1484 return (0); 1485 } 1486 1487 /* 1488 * Get file system statistics. 1489 */ 1490 static int 1491 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1492 { 1493 int error; 1494 mntinfo_t *mi; 1495 struct nfsstatfs fs; 1496 int douprintf; 1497 failinfo_t fi; 1498 vnode_t *vp; 1499 1500 error = nfs_root(vfsp, &vp); 1501 if (error) 1502 return (error); 1503 1504 mi = VFTOMI(vfsp); 1505 douprintf = 1; 1506 fi.vp = vp; 1507 fi.fhp = NULL; /* no need to update, filehandle not copied */ 1508 fi.copyproc = nfscopyfh; 1509 fi.lookupproc = nfslookup; 1510 fi.xattrdirproc = acl_getxattrdir2; 1511 1512 error = rfs2call(mi, RFS_STATFS, xdr_fhandle, (caddr_t)VTOFH(vp), 1513 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, &fs.fs_status, 0, 1514 &fi); 1515 1516 if (!error) { 1517 error = geterrno(fs.fs_status); 1518 if (!error) { 1519 mutex_enter(&mi->mi_lock); 1520 if (mi->mi_stsize) { 1521 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1522 } else { 1523 mi->mi_stsize = fs.fs_tsize; 1524 mi->mi_curwrite = mi->mi_stsize; 1525 } 1526 mutex_exit(&mi->mi_lock); 1527 sbp->f_bsize = fs.fs_bsize; 1528 sbp->f_frsize = fs.fs_bsize; 1529 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks; 1530 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree; 1531 /* 1532 * Some servers may return negative available 1533 * block counts. They may do this because they 1534 * calculate the number of available blocks by 1535 * subtracting the number of used blocks from 1536 * the total number of blocks modified by the 1537 * minimum free value. For example, if the 1538 * minumum free percentage is 10 and the file 1539 * system is greater than 90 percent full, then 1540 * 90 percent of the total blocks minus the 1541 * actual number of used blocks may be a 1542 * negative number. 1543 * 1544 * In this case, we need to sign extend the 1545 * negative number through the assignment from 1546 * the 32 bit bavail count to the 64 bit bavail 1547 * count. 1548 * 1549 * We need to be able to discern between there 1550 * just being a lot of available blocks on the 1551 * file system and the case described above. 1552 * We are making the assumption that it does 1553 * not make sense to have more available blocks 1554 * than there are free blocks. So, if there 1555 * are, then we treat the number as if it were 1556 * a negative number and arrange to have it 1557 * sign extended when it is converted from 32 1558 * bits to 64 bits. 1559 */ 1560 if (fs.fs_bavail <= fs.fs_bfree) 1561 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail; 1562 else { 1563 sbp->f_bavail = 1564 (fsblkcnt64_t)((long)fs.fs_bavail); 1565 } 1566 sbp->f_files = (fsfilcnt64_t)-1; 1567 sbp->f_ffree = (fsfilcnt64_t)-1; 1568 sbp->f_favail = (fsfilcnt64_t)-1; 1569 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1570 (void) strncpy(sbp->f_basetype, 1571 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1572 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1573 sbp->f_namemax = (uint32_t)-1; 1574 } else { 1575 PURGE_STALE_FH(error, vp, CRED()); 1576 } 1577 } 1578 1579 VN_RELE(vp); 1580 1581 return (error); 1582 } 1583 1584 static kmutex_t nfs_syncbusy; 1585 1586 /* 1587 * Flush dirty nfs files for file system vfsp. 1588 * If vfsp == NULL, all nfs files are flushed. 1589 */ 1590 /* ARGSUSED */ 1591 static int 1592 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 1593 { 1594 /* 1595 * Cross-zone calls are OK here, since this translates to a 1596 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1597 */ 1598 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) { 1599 rflush(vfsp, cr); 1600 mutex_exit(&nfs_syncbusy); 1601 } 1602 return (0); 1603 } 1604 1605 /* ARGSUSED */ 1606 static int 1607 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1608 { 1609 int error; 1610 vnode_t *vp; 1611 struct vattr va; 1612 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp; 1613 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id; 1614 1615 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1616 return (EPERM); 1617 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) { 1618 #ifdef DEBUG 1619 zcmn_err(zoneid, CE_WARN, 1620 "nfs_vget: bad fid len, %d/%d", fidp->fid_len, 1621 (int)(sizeof (*nfsfidp) - sizeof (short))); 1622 #endif 1623 *vpp = NULL; 1624 return (ESTALE); 1625 } 1626 1627 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp, 1628 gethrtime(), CRED(), NULL, NULL); 1629 1630 if (VTOR(vp)->r_flags & RSTALE) { 1631 VN_RELE(vp); 1632 *vpp = NULL; 1633 return (ENOENT); 1634 } 1635 1636 if (vp->v_type == VNON) { 1637 va.va_mask = AT_ALL; 1638 error = nfsgetattr(vp, &va, CRED()); 1639 if (error) { 1640 VN_RELE(vp); 1641 *vpp = NULL; 1642 return (error); 1643 } 1644 vp->v_type = va.va_type; 1645 } 1646 1647 *vpp = vp; 1648 1649 return (0); 1650 } 1651 1652 /* ARGSUSED */ 1653 static int 1654 nfs_mountroot(vfs_t *vfsp, whymountroot_t why) 1655 { 1656 vnode_t *rtvp; 1657 char root_hostname[SYS_NMLN+1]; 1658 struct servinfo *svp; 1659 int error; 1660 int vfsflags; 1661 size_t size; 1662 char *root_path; 1663 struct pathname pn; 1664 char *name; 1665 cred_t *cr; 1666 struct nfs_args args; /* nfs mount arguments */ 1667 static char token[10]; 1668 1669 bzero(&args, sizeof (args)); 1670 1671 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1672 clkset(-1L); /* hack for now - until we get time svc? */ 1673 1674 if (why == ROOT_REMOUNT) { 1675 /* 1676 * Shouldn't happen. 1677 */ 1678 panic("nfs_mountroot: why == ROOT_REMOUNT"); 1679 } 1680 1681 if (why == ROOT_UNMOUNT) { 1682 /* 1683 * Nothing to do for NFS. 1684 */ 1685 return (0); 1686 } 1687 1688 /* 1689 * why == ROOT_INIT 1690 */ 1691 1692 name = token; 1693 *name = 0; 1694 getfsname("root", name, sizeof (token)); 1695 1696 pn_alloc(&pn); 1697 root_path = pn.pn_path; 1698 1699 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1700 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1701 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1702 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1703 1704 /* 1705 * Get server address 1706 * Get the root fhandle 1707 * Get server's transport 1708 * Get server's hostname 1709 * Get options 1710 */ 1711 args.addr = &svp->sv_addr; 1712 args.fh = (char *)&svp->sv_fhandle.fh_buf; 1713 args.knconf = svp->sv_knconf; 1714 args.hostname = root_hostname; 1715 vfsflags = 0; 1716 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, 1717 &args, &vfsflags)) { 1718 nfs_cmn_err(error, CE_WARN, 1719 "nfs_mountroot: mount_root failed: %m"); 1720 sv_free(svp); 1721 pn_free(&pn); 1722 return (error); 1723 } 1724 svp->sv_fhandle.fh_len = NFS_FHSIZE; 1725 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1726 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1727 (void) strcpy(svp->sv_hostname, root_hostname); 1728 1729 /* 1730 * Force root partition to always be mounted with AUTH_UNIX for now 1731 */ 1732 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1733 svp->sv_secdata->secmod = AUTH_UNIX; 1734 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1735 svp->sv_secdata->data = NULL; 1736 1737 cr = crgetcred(); 1738 rtvp = NULL; 1739 1740 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1741 1742 crfree(cr); 1743 1744 if (error) { 1745 pn_free(&pn); 1746 sv_free(svp); 1747 return (error); 1748 } 1749 1750 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1751 if (error) { 1752 nfs_cmn_err(error, CE_WARN, 1753 "nfs_mountroot: invalid root mount options"); 1754 pn_free(&pn); 1755 goto errout; 1756 } 1757 1758 (void) vfs_lock_wait(vfsp); 1759 vfs_add(NULL, vfsp, vfsflags); 1760 vfs_unlock(vfsp); 1761 1762 size = strlen(svp->sv_hostname); 1763 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1764 rootfs.bo_name[size] = ':'; 1765 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1766 1767 pn_free(&pn); 1768 1769 errout: 1770 if (error) { 1771 sv_free(svp); 1772 nfs_async_stop(vfsp); 1773 nfs_async_manager_stop(vfsp); 1774 } 1775 1776 if (rtvp != NULL) 1777 VN_RELE(rtvp); 1778 1779 return (error); 1780 } 1781 1782 /* 1783 * Initialization routine for VFS routines. Should only be called once 1784 */ 1785 int 1786 nfs_vfsinit(void) 1787 { 1788 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1789 return (0); 1790 } 1791 1792 void 1793 nfs_vfsfini(void) 1794 { 1795 mutex_destroy(&nfs_syncbusy); 1796 } 1797 1798 void 1799 nfs_freevfs(vfs_t *vfsp) 1800 { 1801 mntinfo_t *mi; 1802 servinfo_t *svp; 1803 1804 /* free up the resources */ 1805 mi = VFTOMI(vfsp); 1806 pathconf_rele(mi); 1807 svp = mi->mi_servers; 1808 mi->mi_servers = mi->mi_curr_serv = NULL; 1809 sv_free(svp); 1810 1811 /* 1812 * By this time we should have already deleted the 1813 * mi kstats in the unmount code. If they are still around 1814 * somethings wrong 1815 */ 1816 ASSERT(mi->mi_io_kstats == NULL); 1817 nfs_free_mi(mi); 1818 } 1819