1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 26 * All rights reserved. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/pathname.h> 38 #include <sys/sysmacros.h> 39 #include <sys/kmem.h> 40 #include <sys/mkdev.h> 41 #include <sys/mount.h> 42 #include <sys/mntent.h> 43 #include <sys/statvfs.h> 44 #include <sys/errno.h> 45 #include <sys/debug.h> 46 #include <sys/cmn_err.h> 47 #include <sys/utsname.h> 48 #include <sys/bootconf.h> 49 #include <sys/modctl.h> 50 #include <sys/acl.h> 51 #include <sys/flock.h> 52 #include <sys/policy.h> 53 #include <sys/zone.h> 54 #include <sys/class.h> 55 #include <sys/socket.h> 56 #include <sys/netconfig.h> 57 #include <sys/mntent.h> 58 #include <sys/tsol/label.h> 59 60 #include <rpc/types.h> 61 #include <rpc/auth.h> 62 #include <rpc/clnt.h> 63 64 #include <nfs/nfs.h> 65 #include <nfs/nfs_clnt.h> 66 #include <nfs/rnode.h> 67 #include <nfs/mount.h> 68 #include <nfs/nfs_acl.h> 69 70 #include <fs/fs_subr.h> 71 72 /* 73 * From rpcsec module (common/rpcsec). 74 */ 75 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 76 extern void sec_clnt_freeinfo(struct sec_data *); 77 78 static int pathconf_copyin(struct nfs_args *, struct pathcnf *); 79 static int pathconf_get(struct mntinfo *, struct nfs_args *); 80 static void pathconf_rele(struct mntinfo *); 81 82 /* 83 * The order and contents of this structure must be kept in sync with that of 84 * rfsreqcnt_v2_tmpl in nfs_stats.c 85 */ 86 static char *rfsnames_v2[] = { 87 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read", 88 "unused", "write", "create", "remove", "rename", "link", "symlink", 89 "mkdir", "rmdir", "readdir", "fsstat" 90 }; 91 92 /* 93 * This table maps from NFS protocol number into call type. 94 * Zero means a "Lookup" type call 95 * One means a "Read" type call 96 * Two means a "Write" type call 97 * This is used to select a default time-out. 98 */ 99 static uchar_t call_type_v2[] = { 100 0, 0, 1, 0, 0, 0, 1, 101 0, 2, 2, 2, 2, 2, 2, 102 2, 2, 1, 0 103 }; 104 105 /* 106 * Similar table, but to determine which timer to use 107 * (only real reads and writes!) 108 */ 109 static uchar_t timer_type_v2[] = { 110 0, 0, 0, 0, 0, 0, 1, 111 0, 2, 0, 0, 0, 0, 0, 112 0, 0, 1, 0 113 }; 114 115 /* 116 * This table maps from NFS protocol number into a call type 117 * for the semisoft mount option. 118 * Zero means do not repeat operation. 119 * One means repeat. 120 */ 121 static uchar_t ss_call_type_v2[] = { 122 0, 0, 1, 0, 0, 0, 0, 123 0, 1, 1, 1, 1, 1, 1, 124 1, 1, 0, 0 125 }; 126 127 /* 128 * nfs vfs operations. 129 */ 130 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 131 static int nfs_unmount(vfs_t *, int, cred_t *); 132 static int nfs_root(vfs_t *, vnode_t **); 133 static int nfs_statvfs(vfs_t *, struct statvfs64 *); 134 static int nfs_sync(vfs_t *, short, cred_t *); 135 static int nfs_vget(vfs_t *, vnode_t **, fid_t *); 136 static int nfs_mountroot(vfs_t *, whymountroot_t); 137 static void nfs_freevfs(vfs_t *); 138 139 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *, 140 int, cred_t *, zone_t *); 141 142 /* 143 * Initialize the vfs structure 144 */ 145 146 int nfsfstyp; 147 vfsops_t *nfs_vfsops; 148 149 /* 150 * Debug variable to check for rdma based 151 * transport startup and cleanup. Controlled 152 * through /etc/system. Off by default. 153 */ 154 int rdma_debug = 0; 155 156 int 157 nfsinit(int fstyp, char *name) 158 { 159 static const fs_operation_def_t nfs_vfsops_template[] = { 160 VFSNAME_MOUNT, nfs_mount, 161 VFSNAME_UNMOUNT, nfs_unmount, 162 VFSNAME_ROOT, nfs_root, 163 VFSNAME_STATVFS, nfs_statvfs, 164 VFSNAME_SYNC, (fs_generic_func_p) nfs_sync, 165 VFSNAME_VGET, nfs_vget, 166 VFSNAME_MOUNTROOT, nfs_mountroot, 167 VFSNAME_FREEVFS, (fs_generic_func_p)nfs_freevfs, 168 NULL, NULL 169 }; 170 int error; 171 172 error = vfs_setfsops(fstyp, nfs_vfsops_template, &nfs_vfsops); 173 if (error != 0) { 174 zcmn_err(GLOBAL_ZONEID, CE_WARN, 175 "nfsinit: bad vfs ops template"); 176 return (error); 177 } 178 179 error = vn_make_ops(name, nfs_vnodeops_template, &nfs_vnodeops); 180 if (error != 0) { 181 (void) vfs_freevfsops_by_type(fstyp); 182 zcmn_err(GLOBAL_ZONEID, CE_WARN, 183 "nfsinit: bad vnode ops template"); 184 return (error); 185 } 186 187 188 nfsfstyp = fstyp; 189 190 return (0); 191 } 192 193 void 194 nfsfini(void) 195 { 196 } 197 198 static void 199 nfs_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 200 { 201 202 if (fh) 203 kmem_free(fh, sizeof (*fh)); 204 205 if (nargs->pathconf) { 206 kmem_free(nargs->pathconf, sizeof (struct pathcnf)); 207 nargs->pathconf = NULL; 208 } 209 210 if (nargs->knconf) { 211 if (nargs->knconf->knc_protofmly) 212 kmem_free(nargs->knconf->knc_protofmly, 213 KNC_STRSIZE); 214 if (nargs->knconf->knc_proto) 215 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 216 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 217 nargs->knconf = NULL; 218 } 219 220 if (nargs->fh) { 221 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 222 nargs->fh = NULL; 223 } 224 225 if (nargs->hostname) { 226 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 227 nargs->hostname = NULL; 228 } 229 230 if (nargs->addr) { 231 if (nargs->addr->buf) { 232 ASSERT(nargs->addr->len); 233 kmem_free(nargs->addr->buf, nargs->addr->len); 234 } 235 kmem_free(nargs->addr, sizeof (struct netbuf)); 236 nargs->addr = NULL; 237 } 238 239 if (nargs->syncaddr) { 240 ASSERT(nargs->syncaddr->len); 241 if (nargs->syncaddr->buf) { 242 ASSERT(nargs->syncaddr->len); 243 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 244 } 245 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 246 nargs->syncaddr = NULL; 247 } 248 249 if (nargs->netname) { 250 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 251 nargs->netname = NULL; 252 } 253 254 if (nargs->nfs_ext_u.nfs_extA.secdata) { 255 sec_clnt_freeinfo( 256 nargs->nfs_ext_u.nfs_extA.secdata); 257 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 258 } 259 } 260 261 static int 262 nfs_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 263 { 264 265 int error; 266 size_t nlen; /* length of netname */ 267 size_t hlen; /* length of hostname */ 268 char netname[MAXNETNAMELEN+1]; /* server's netname */ 269 struct netbuf addr; /* server's address */ 270 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 271 struct knetconfig *knconf; /* transport knetconfig structure */ 272 struct sec_data *secdata = NULL; /* security data */ 273 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 274 STRUCT_DECL(knetconfig, knconf_tmp); 275 STRUCT_DECL(netbuf, addr_tmp); 276 int flags; 277 struct pathcnf *pc; /* Pathconf */ 278 char *p, *pf; 279 char *userbufptr; 280 281 282 bzero(nargs, sizeof (*nargs)); 283 284 STRUCT_INIT(args, get_udatamodel()); 285 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 286 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 287 STRUCT_SIZE(args)))) 288 return (EFAULT); 289 290 nargs->wsize = STRUCT_FGET(args, wsize); 291 nargs->rsize = STRUCT_FGET(args, rsize); 292 nargs->timeo = STRUCT_FGET(args, timeo); 293 nargs->retrans = STRUCT_FGET(args, retrans); 294 nargs->acregmin = STRUCT_FGET(args, acregmin); 295 nargs->acregmax = STRUCT_FGET(args, acregmax); 296 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 297 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 298 299 flags = STRUCT_FGET(args, flags); 300 nargs->flags = flags; 301 302 303 addr.buf = NULL; 304 syncaddr.buf = NULL; 305 306 /* 307 * Allocate space for a knetconfig structure and 308 * its strings and copy in from user-land. 309 */ 310 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 311 STRUCT_INIT(knconf_tmp, get_udatamodel()); 312 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 313 STRUCT_SIZE(knconf_tmp))) { 314 kmem_free(knconf, sizeof (*knconf)); 315 return (EFAULT); 316 } 317 318 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 319 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 320 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 321 if (get_udatamodel() != DATAMODEL_LP64) { 322 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 323 } else { 324 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 325 } 326 327 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 328 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 329 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 330 if (error) { 331 kmem_free(pf, KNC_STRSIZE); 332 kmem_free(p, KNC_STRSIZE); 333 kmem_free(knconf, sizeof (*knconf)); 334 return (error); 335 } 336 337 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 338 if (error) { 339 kmem_free(pf, KNC_STRSIZE); 340 kmem_free(p, KNC_STRSIZE); 341 kmem_free(knconf, sizeof (*knconf)); 342 return (error); 343 } 344 345 346 knconf->knc_protofmly = pf; 347 knconf->knc_proto = p; 348 349 nargs->knconf = knconf; 350 351 /* Copyin pathconf if there is one */ 352 if (STRUCT_FGETP(args, pathconf) != NULL) { 353 pc = kmem_alloc(sizeof (*pc), KM_SLEEP); 354 error = pathconf_copyin(STRUCT_BUF(args), pc); 355 nargs->pathconf = pc; 356 if (error) 357 goto errout; 358 } 359 360 /* 361 * Get server address 362 */ 363 STRUCT_INIT(addr_tmp, get_udatamodel()); 364 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 365 STRUCT_SIZE(addr_tmp))) { 366 error = EFAULT; 367 goto errout; 368 } 369 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 370 userbufptr = STRUCT_FGETP(addr_tmp, buf); 371 addr.len = STRUCT_FGET(addr_tmp, len); 372 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 373 addr.maxlen = addr.len; 374 if (copyin(userbufptr, addr.buf, addr.len)) { 375 kmem_free(addr.buf, addr.len); 376 error = EFAULT; 377 goto errout; 378 } 379 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 380 381 /* 382 * Get the root fhandle 383 */ 384 385 if (copyin(STRUCT_FGETP(args, fh), &fh->fh_buf, NFS_FHSIZE)) { 386 error = EFAULT; 387 goto errout; 388 } 389 fh->fh_len = NFS_FHSIZE; 390 391 /* 392 * Get server's hostname 393 */ 394 if (flags & NFSMNT_HOSTNAME) { 395 error = copyinstr(STRUCT_FGETP(args, hostname), 396 netname, sizeof (netname), &hlen); 397 if (error) 398 goto errout; 399 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 400 (void) strcpy(nargs->hostname, netname); 401 402 } else { 403 nargs->hostname = NULL; 404 } 405 406 407 /* 408 * If there are syncaddr and netname data, load them in. This is 409 * to support data needed for NFSV4 when AUTH_DH is the negotiated 410 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 411 */ 412 netname[0] = '\0'; 413 if (flags & NFSMNT_SECURE) { 414 if (STRUCT_FGETP(args, syncaddr) == NULL) { 415 error = EINVAL; 416 goto errout; 417 } 418 /* get syncaddr */ 419 STRUCT_INIT(addr_tmp, get_udatamodel()); 420 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 421 STRUCT_SIZE(addr_tmp))) { 422 error = EINVAL; 423 goto errout; 424 } 425 userbufptr = STRUCT_FGETP(addr_tmp, buf); 426 syncaddr.len = STRUCT_FGET(addr_tmp, len); 427 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 428 syncaddr.maxlen = syncaddr.len; 429 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 430 kmem_free(syncaddr.buf, syncaddr.len); 431 error = EFAULT; 432 goto errout; 433 } 434 435 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 436 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 437 438 ASSERT(STRUCT_FGETP(args, netname)); 439 if (copyinstr(STRUCT_FGETP(args, netname), netname, 440 sizeof (netname), &nlen)) { 441 error = EFAULT; 442 goto errout; 443 } 444 445 netname[nlen] = '\0'; 446 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 447 (void) strcpy(nargs->netname, netname); 448 } 449 450 /* 451 * Get the extention data which has the security data structure. 452 * This includes data for AUTH_SYS as well. 453 */ 454 if (flags & NFSMNT_NEWARGS) { 455 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 456 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 457 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 458 /* 459 * Indicating the application is using the new 460 * sec_data structure to pass in the security 461 * data. 462 */ 463 if (STRUCT_FGETP(args, 464 nfs_ext_u.nfs_extA.secdata) != NULL) { 465 error = sec_clnt_loadinfo( 466 (struct sec_data *)STRUCT_FGETP(args, 467 nfs_ext_u.nfs_extA.secdata), 468 &secdata, get_udatamodel()); 469 } 470 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 471 } 472 } 473 474 if (error) 475 goto errout; 476 477 /* 478 * Failover support: 479 * 480 * We may have a linked list of nfs_args structures, 481 * which means the user is looking for failover. If 482 * the mount is either not "read-only" or "soft", 483 * we want to bail out with EINVAL. 484 */ 485 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 486 nargs->nfs_ext_u.nfs_extB.next = 487 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 488 489 errout: 490 if (error) 491 nfs_free_args(nargs, fh); 492 493 return (error); 494 } 495 496 497 /* 498 * nfs mount vfsop 499 * Set up mount info record and attach it to vfs struct. 500 */ 501 static int 502 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 503 { 504 char *data = uap->dataptr; 505 int error; 506 vnode_t *rtvp; /* the server's root */ 507 mntinfo_t *mi; /* mount info, pointed at by vfs */ 508 size_t nlen; /* length of netname */ 509 struct knetconfig *knconf; /* transport knetconfig structure */ 510 struct knetconfig *rdma_knconf; /* rdma transport structure */ 511 rnode_t *rp; 512 struct servinfo *svp; /* nfs server info */ 513 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 514 struct servinfo *svp_head; /* first nfs server info */ 515 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */ 516 struct sec_data *secdata; /* security data */ 517 struct nfs_args *args = NULL; 518 int flags, addr_type; 519 zone_t *zone = nfs_zone(); 520 zone_t *mntzone = NULL; 521 nfs_fhandle *fhandle = NULL; 522 523 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 524 return (error); 525 526 if (mvp->v_type != VDIR) 527 return (ENOTDIR); 528 529 /* 530 * get arguments 531 * 532 * nfs_args is now versioned and is extensible, so 533 * uap->datalen might be different from sizeof (args) 534 * in a compatible situation. 535 */ 536 more: 537 538 if (!(uap->flags & MS_SYSSPACE)) { 539 if (args == NULL) 540 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 541 else { 542 nfs_free_args(args, fhandle); 543 fhandle = NULL; 544 } 545 if (fhandle == NULL) 546 fhandle = kmem_zalloc(sizeof (nfs_fhandle), KM_SLEEP); 547 error = nfs_copyin(data, uap->datalen, args, fhandle); 548 if (error) { 549 if (args) 550 kmem_free(args, sizeof (*args)); 551 return (error); 552 } 553 } else { 554 args = (struct nfs_args *)data; 555 fhandle = (nfs_fhandle *)args->fh; 556 } 557 558 559 flags = args->flags; 560 561 if (uap->flags & MS_REMOUNT) { 562 size_t n; 563 char name[FSTYPSZ]; 564 565 566 if (uap->flags & MS_SYSSPACE) { 567 error = copystr(uap->fstype, name, FSTYPSZ, &n); 568 } else { 569 nfs_free_args(args, fhandle); 570 kmem_free(args, sizeof (*args)); 571 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 572 } 573 574 if (error) { 575 if (error == ENAMETOOLONG) 576 return (EINVAL); 577 return (error); 578 } 579 580 581 /* 582 * This check is to ensure that the request is a 583 * genuine nfs remount request. 584 */ 585 586 if (strncmp(name, "nfs", 3) != 0) 587 return (EINVAL); 588 589 /* 590 * If the request changes the locking type, disallow the 591 * remount, 592 * because it's questionable whether we can transfer the 593 * locking state correctly. 594 * 595 * Remounts need to save the pathconf information. 596 * Part of the infamous static kludge. 597 */ 598 599 if ((mi = VFTOMI(vfsp)) != NULL) { 600 uint_t new_mi_llock; 601 uint_t old_mi_llock; 602 603 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 604 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 605 if (old_mi_llock != new_mi_llock) 606 return (EBUSY); 607 } 608 error = pathconf_get((struct mntinfo *)vfsp->vfs_data, args); 609 610 return (error); 611 } 612 613 mutex_enter(&mvp->v_lock); 614 if (!(uap->flags & MS_OVERLAY) && 615 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 616 mutex_exit(&mvp->v_lock); 617 if (!(uap->flags) & MS_SYSSPACE) { 618 nfs_free_args(args, fhandle); 619 kmem_free(args, sizeof (*args)); 620 } 621 return (EBUSY); 622 } 623 mutex_exit(&mvp->v_lock); 624 625 /* make sure things are zeroed for errout: */ 626 rtvp = NULL; 627 mi = NULL; 628 secdata = NULL; 629 630 /* 631 * A valid knetconfig structure is required. 632 */ 633 if (!(flags & NFSMNT_KNCONF)) { 634 if (!(uap->flags & MS_SYSSPACE)) { 635 nfs_free_args(args, fhandle); 636 kmem_free(args, sizeof (*args)); 637 } 638 return (EINVAL); 639 } 640 641 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 642 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 643 if (!(uap->flags & MS_SYSSPACE)) { 644 nfs_free_args(args, fhandle); 645 kmem_free(args, sizeof (*args)); 646 } 647 return (EINVAL); 648 } 649 650 651 /* 652 * Allocate a servinfo struct. 653 */ 654 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 655 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 656 if (svp_tail) { 657 svp_2ndlast = svp_tail; 658 svp_tail->sv_next = svp; 659 } else { 660 svp_head = svp; 661 svp_2ndlast = svp; 662 } 663 664 svp_tail = svp; 665 666 /* 667 * Get knetconfig and server address 668 */ 669 svp->sv_knconf = args->knconf; 670 args->knconf = NULL; 671 672 if (args->addr == NULL || args->addr->buf == NULL) { 673 error = EINVAL; 674 goto errout; 675 } 676 677 svp->sv_addr.maxlen = args->addr->maxlen; 678 svp->sv_addr.len = args->addr->len; 679 svp->sv_addr.buf = args->addr->buf; 680 args->addr->buf = NULL; 681 682 /* 683 * Get the root fhandle 684 */ 685 ASSERT(fhandle); 686 687 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 688 svp->sv_fhandle.fh_len = fhandle->fh_len; 689 690 /* 691 * Get server's hostname 692 */ 693 if (flags & NFSMNT_HOSTNAME) { 694 if (args->hostname == NULL) { 695 error = EINVAL; 696 goto errout; 697 } 698 svp->sv_hostnamelen = strlen(args->hostname) + 1; 699 svp->sv_hostname = args->hostname; 700 args->hostname = NULL; 701 } else { 702 char *p = "unknown-host"; 703 svp->sv_hostnamelen = strlen(p) + 1; 704 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 705 (void) strcpy(svp->sv_hostname, p); 706 } 707 708 709 /* 710 * RDMA MOUNT SUPPORT FOR NFS v2: 711 * Establish, is it possible to use RDMA, if so overload the 712 * knconf with rdma specific knconf and free the orignal. 713 */ 714 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 715 /* 716 * Determine the addr type for RDMA, IPv4 or v6. 717 */ 718 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 719 addr_type = AF_INET; 720 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 721 addr_type = AF_INET6; 722 723 if (rdma_reachable(addr_type, &svp->sv_addr, 724 &rdma_knconf) == 0) { 725 /* 726 * If successful, hijack, the orignal knconf and 727 * replace with a new one, depending on the flags. 728 */ 729 svp->sv_origknconf = svp->sv_knconf; 730 svp->sv_knconf = rdma_knconf; 731 knconf = rdma_knconf; 732 } else { 733 if (flags & NFSMNT_TRYRDMA) { 734 #ifdef DEBUG 735 if (rdma_debug) 736 zcmn_err(getzoneid(), CE_WARN, 737 "no RDMA onboard, revert\n"); 738 #endif 739 } 740 741 if (flags & NFSMNT_DORDMA) { 742 /* 743 * If proto=rdma is specified and no RDMA 744 * path to this server is avialable then 745 * ditch this server. 746 * This is not included in the mountable 747 * server list or the replica list. 748 * Check if more servers are specified; 749 * Failover case, otherwise bail out of mount. 750 */ 751 if (args->nfs_args_ext == 752 NFS_ARGS_EXTB && 753 args->nfs_ext_u.nfs_extB.next 754 != NULL) { 755 data = (char *) 756 args->nfs_ext_u.nfs_extB.next; 757 if (uap->flags & MS_RDONLY && 758 !(flags & NFSMNT_SOFT)) { 759 if (svp_head->sv_next == NULL) { 760 svp_tail = NULL; 761 svp_2ndlast = NULL; 762 sv_free(svp_head); 763 goto more; 764 } else { 765 svp_tail = svp_2ndlast; 766 svp_2ndlast->sv_next = 767 NULL; 768 sv_free(svp); 769 goto more; 770 } 771 } 772 } else { 773 /* 774 * This is the last server specified 775 * in the nfs_args list passed down 776 * and its not rdma capable. 777 */ 778 if (svp_head->sv_next == NULL) { 779 /* 780 * Is this the only one 781 */ 782 error = EINVAL; 783 #ifdef DEBUG 784 if (rdma_debug) 785 zcmn_err(getzoneid(), 786 CE_WARN, 787 "No RDMA srv"); 788 #endif 789 goto errout; 790 } else { 791 /* 792 * There is list, since some 793 * servers specified before 794 * this passed all requirements 795 */ 796 svp_tail = svp_2ndlast; 797 svp_2ndlast->sv_next = NULL; 798 sv_free(svp); 799 goto proceed; 800 } 801 } 802 } 803 } 804 } 805 806 /* 807 * Get the extention data which has the new security data structure. 808 */ 809 if (flags & NFSMNT_NEWARGS) { 810 switch (args->nfs_args_ext) { 811 case NFS_ARGS_EXTA: 812 case NFS_ARGS_EXTB: 813 /* 814 * Indicating the application is using the new 815 * sec_data structure to pass in the security 816 * data. 817 */ 818 secdata = args->nfs_ext_u.nfs_extA.secdata; 819 if (secdata == NULL) { 820 error = EINVAL; 821 } else { 822 /* 823 * Need to validate the flavor here if 824 * sysspace, userspace was already 825 * validate from the nfs_copyin function. 826 */ 827 switch (secdata->rpcflavor) { 828 case AUTH_NONE: 829 case AUTH_UNIX: 830 case AUTH_LOOPBACK: 831 case AUTH_DES: 832 case RPCSEC_GSS: 833 break; 834 default: 835 error = EINVAL; 836 goto errout; 837 } 838 } 839 args->nfs_ext_u.nfs_extA.secdata = NULL; 840 break; 841 842 default: 843 error = EINVAL; 844 break; 845 } 846 } else if (flags & NFSMNT_SECURE) { 847 /* 848 * Keep this for backward compatibility to support 849 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 850 */ 851 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 852 error = EINVAL; 853 goto errout; 854 } 855 856 /* 857 * get time sync address. 858 */ 859 if (args->syncaddr == NULL) { 860 error = EFAULT; 861 goto errout; 862 } 863 864 /* 865 * Move security related data to the sec_data structure. 866 */ 867 { 868 dh_k4_clntdata_t *data; 869 char *pf, *p; 870 871 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 872 if (flags & NFSMNT_RPCTIMESYNC) 873 secdata->flags |= AUTH_F_RPCTIMESYNC; 874 data = kmem_alloc(sizeof (*data), KM_SLEEP); 875 bcopy(args->syncaddr, &data->syncaddr, 876 sizeof (*args->syncaddr)); 877 878 879 /* 880 * duplicate the knconf information for the 881 * new opaque data. 882 */ 883 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 884 *data->knconf = *knconf; 885 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 886 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 887 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 888 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 889 data->knconf->knc_protofmly = pf; 890 data->knconf->knc_proto = p; 891 892 /* move server netname to the sec_data structure */ 893 nlen = strlen(args->hostname) + 1; 894 if (nlen != 0) { 895 data->netname = kmem_alloc(nlen, KM_SLEEP); 896 bcopy(args->hostname, data->netname, nlen); 897 data->netnamelen = (int)nlen; 898 } 899 secdata->secmod = secdata->rpcflavor = AUTH_DES; 900 secdata->data = (caddr_t)data; 901 } 902 } else { 903 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 904 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 905 secdata->data = NULL; 906 } 907 svp->sv_secdata = secdata; 908 909 /* 910 * See bug 1180236. 911 * If mount secure failed, we will fall back to AUTH_NONE 912 * and try again. nfs3rootvp() will turn this back off. 913 * 914 * The NFS Version 2 mount uses GETATTR and STATFS procedures. 915 * The server does not care if these procedures have the proper 916 * authentication flavor, so if mount retries using AUTH_NONE 917 * that does not require a credential setup for root then the 918 * automounter would work without requiring root to be 919 * keylogged into AUTH_DES. 920 */ 921 if (secdata->rpcflavor != AUTH_UNIX && 922 secdata->rpcflavor != AUTH_LOOPBACK) 923 secdata->flags |= AUTH_F_TRYNONE; 924 925 /* 926 * Failover support: 927 * 928 * We may have a linked list of nfs_args structures, 929 * which means the user is looking for failover. If 930 * the mount is either not "read-only" or "soft", 931 * we want to bail out with EINVAL. 932 */ 933 if (args->nfs_args_ext == NFS_ARGS_EXTB && 934 args->nfs_ext_u.nfs_extB.next != NULL) { 935 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 936 data = (char *)args->nfs_ext_u.nfs_extB.next; 937 goto more; 938 } 939 error = EINVAL; 940 goto errout; 941 } 942 943 /* 944 * Determine the zone we're being mounted into. 945 */ 946 zone_hold(mntzone = zone); /* start with this assumption */ 947 if (getzoneid() == GLOBAL_ZONEID) { 948 zone_rele(mntzone); 949 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 950 ASSERT(mntzone != NULL); 951 if (mntzone != zone) { 952 error = EBUSY; 953 goto errout; 954 } 955 } 956 957 if (is_system_labeled()) { 958 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 959 svp->sv_knconf, cr); 960 961 if (error > 0) 962 goto errout; 963 964 if (error == -1) { 965 /* change mount to read-only to prevent write-down */ 966 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 967 } 968 } 969 970 /* 971 * Stop the mount from going any further if the zone is going away. 972 */ 973 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 974 error = EBUSY; 975 goto errout; 976 } 977 978 /* 979 * Get root vnode. 980 */ 981 proceed: 982 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 983 984 if (error) 985 goto errout; 986 987 /* 988 * Set option fields in the mount info record 989 */ 990 mi = VTOMI(rtvp); 991 992 if (svp_head->sv_next) 993 mi->mi_flags |= MI_LLOCK; 994 995 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 996 if (!error) { 997 /* static pathconf kludge */ 998 error = pathconf_get(mi, args); 999 } 1000 1001 errout: 1002 if (error) { 1003 if (rtvp != NULL) { 1004 rp = VTOR(rtvp); 1005 if (rp->r_flags & RHASHED) 1006 rp_rmhash(rp); 1007 } 1008 sv_free(svp_head); 1009 if (mi != NULL) { 1010 nfs_async_stop(vfsp); 1011 nfs_async_manager_stop(vfsp); 1012 if (mi->mi_io_kstats) { 1013 kstat_delete(mi->mi_io_kstats); 1014 mi->mi_io_kstats = NULL; 1015 } 1016 if (mi->mi_ro_kstats) { 1017 kstat_delete(mi->mi_ro_kstats); 1018 mi->mi_ro_kstats = NULL; 1019 } 1020 nfs_free_mi(mi); 1021 } 1022 } 1023 1024 if (!(uap->flags & MS_SYSSPACE) && args) { 1025 nfs_free_args(args, fhandle); 1026 kmem_free(args, sizeof (*args)); 1027 } 1028 1029 if (rtvp != NULL) 1030 VN_RELE(rtvp); 1031 1032 if (mntzone != NULL) 1033 zone_rele(mntzone); 1034 1035 return (error); 1036 } 1037 1038 /* 1039 * The pathconf information is kept on a linked list of kmem_alloc'ed 1040 * structs. We search the list & add a new struct iff there is no other 1041 * struct with the same information. 1042 * See sys/pathconf.h for ``the rest of the story.'' 1043 */ 1044 static struct pathcnf *allpc = NULL; 1045 1046 static int 1047 pathconf_copyin(struct nfs_args *args, struct pathcnf *pc) 1048 { 1049 STRUCT_DECL(pathcnf, pc_tmp); 1050 STRUCT_HANDLE(nfs_args, ap); 1051 int i; 1052 model_t model; 1053 1054 model = get_udatamodel(); 1055 STRUCT_INIT(pc_tmp, model); 1056 STRUCT_SET_HANDLE(ap, model, args); 1057 1058 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) && 1059 STRUCT_FGETP(ap, pathconf) != NULL) { 1060 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp), 1061 STRUCT_SIZE(pc_tmp))) 1062 return (EFAULT); 1063 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask))) 1064 return (EINVAL); 1065 1066 pc->pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max); 1067 pc->pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon); 1068 pc->pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input); 1069 pc->pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max); 1070 pc->pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max); 1071 pc->pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf); 1072 pc->pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable); 1073 pc->pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx); 1074 for (i = 0; i < _PC_N; i++) 1075 pc->pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]); 1076 } 1077 return (0); 1078 } 1079 1080 static int 1081 pathconf_get(struct mntinfo *mi, struct nfs_args *args) 1082 { 1083 struct pathcnf *p, *pc; 1084 1085 pc = args->pathconf; 1086 if (mi->mi_pathconf != NULL) { 1087 pathconf_rele(mi); 1088 mi->mi_pathconf = NULL; 1089 } 1090 if (args->flags & NFSMNT_POSIX && 1091 args->pathconf != NULL) { 1092 1093 if (_PC_ISSET(_PC_ERROR, pc->pc_mask)) 1094 return (EINVAL); 1095 1096 for (p = allpc; p != NULL; p = p->pc_next) { 1097 if (PCCMP(p, pc) == 0) 1098 break; 1099 } 1100 if (p != NULL) { 1101 mi->mi_pathconf = p; 1102 p->pc_refcnt++; 1103 } else { 1104 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1105 bcopy(pc, p, sizeof (struct pathcnf)); 1106 p->pc_next = allpc; 1107 p->pc_refcnt = 1; 1108 allpc = mi->mi_pathconf = p; 1109 } 1110 } 1111 return (0); 1112 } 1113 1114 /* 1115 * release the static pathconf information 1116 */ 1117 static void 1118 pathconf_rele(struct mntinfo *mi) 1119 { 1120 if (mi->mi_pathconf != NULL) { 1121 if (--mi->mi_pathconf->pc_refcnt == 0) { 1122 struct pathcnf *p; 1123 struct pathcnf *p2; 1124 1125 p2 = p = allpc; 1126 while (p != NULL && p != mi->mi_pathconf) { 1127 p2 = p; 1128 p = p->pc_next; 1129 } 1130 if (p == NULL) { 1131 panic("mi->pathconf"); 1132 /*NOTREACHED*/ 1133 } 1134 if (p == allpc) 1135 allpc = p->pc_next; 1136 else 1137 p2->pc_next = p->pc_next; 1138 kmem_free(p, sizeof (*p)); 1139 mi->mi_pathconf = NULL; 1140 } 1141 } 1142 } 1143 1144 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */ 1145 static ushort_t nfs_max_threads = 8; /* max number of active async threads */ 1146 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */ 1147 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; 1148 1149 static int 1150 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1151 int flags, cred_t *cr, zone_t *zone) 1152 { 1153 vnode_t *rtvp; 1154 mntinfo_t *mi; 1155 dev_t nfs_dev; 1156 struct vattr va; 1157 int error; 1158 rnode_t *rp; 1159 int i; 1160 struct nfs_stats *nfsstatsp; 1161 cred_t *lcr = NULL, *tcr = cr; 1162 1163 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1164 ASSERT(nfsstatsp != NULL); 1165 1166 /* 1167 * Create a mount record and link it to the vfs struct. 1168 */ 1169 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1170 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1171 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1172 mi->mi_flags = MI_ACL | MI_EXTATTR; 1173 if (!(flags & NFSMNT_SOFT)) 1174 mi->mi_flags |= MI_HARD; 1175 if ((flags & NFSMNT_SEMISOFT)) 1176 mi->mi_flags |= MI_SEMISOFT; 1177 if ((flags & NFSMNT_NOPRINT)) 1178 mi->mi_flags |= MI_NOPRINT; 1179 if (flags & NFSMNT_INT) 1180 mi->mi_flags |= MI_INT; 1181 mi->mi_retrans = NFS_RETRIES; 1182 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1183 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1184 mi->mi_timeo = nfs_cots_timeo; 1185 else 1186 mi->mi_timeo = NFS_TIMEO; 1187 mi->mi_prog = NFS_PROGRAM; 1188 mi->mi_vers = NFS_VERSION; 1189 mi->mi_rfsnames = rfsnames_v2; 1190 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr; 1191 mi->mi_call_type = call_type_v2; 1192 mi->mi_ss_call_type = ss_call_type_v2; 1193 mi->mi_timer_type = timer_type_v2; 1194 mi->mi_aclnames = aclnames_v2; 1195 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr; 1196 mi->mi_acl_call_type = acl_call_type_v2; 1197 mi->mi_acl_ss_call_type = acl_ss_call_type_v2; 1198 mi->mi_acl_timer_type = acl_timer_type_v2; 1199 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1200 mi->mi_servers = svp; 1201 mi->mi_curr_serv = svp; 1202 mi->mi_acregmin = SEC2HR(ACREGMIN); 1203 mi->mi_acregmax = SEC2HR(ACREGMAX); 1204 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1205 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1206 1207 if (nfs_dynamic) 1208 mi->mi_flags |= MI_DYNAMIC; 1209 1210 if (flags & NFSMNT_DIRECTIO) 1211 mi->mi_flags |= MI_DIRECTIO; 1212 1213 /* 1214 * Make a vfs struct for nfs. We do this here instead of below 1215 * because rtvp needs a vfs before we can do a getattr on it. 1216 * 1217 * Assign a unique device id to the mount 1218 */ 1219 mutex_enter(&nfs_minor_lock); 1220 do { 1221 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1222 nfs_dev = makedevice(nfs_major, nfs_minor); 1223 } while (vfs_devismounted(nfs_dev)); 1224 mutex_exit(&nfs_minor_lock); 1225 1226 vfsp->vfs_dev = nfs_dev; 1227 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp); 1228 vfsp->vfs_data = (caddr_t)mi; 1229 vfsp->vfs_fstype = nfsfstyp; 1230 vfsp->vfs_bsize = NFS_MAXDATA; 1231 1232 /* 1233 * Initialize fields used to support async putpage operations. 1234 */ 1235 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1236 mi->mi_async_clusters[i] = nfs_async_clusters; 1237 mi->mi_async_init_clusters = nfs_async_clusters; 1238 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1239 mi->mi_max_threads = nfs_max_threads; 1240 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1241 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1242 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1243 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1244 1245 mi->mi_vfsp = vfsp; 1246 zone_hold(mi->mi_zone = zone); 1247 nfs_mi_zonelist_add(mi); 1248 1249 /* 1250 * Make the root vnode, use it to get attributes, 1251 * then remake it with the attributes. 1252 */ 1253 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf, 1254 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1255 1256 va.va_mask = AT_ALL; 1257 1258 /* 1259 * If the uid is set then set the creds for secure mounts 1260 * by proxy processes such as automountd. 1261 */ 1262 if (svp->sv_secdata->uid != 0 && 1263 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1264 lcr = crdup(cr); 1265 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1266 tcr = lcr; 1267 } 1268 1269 error = nfsgetattr(rtvp, &va, tcr); 1270 if (error) 1271 goto bad; 1272 rtvp->v_type = va.va_type; 1273 1274 /* 1275 * Poll every server to get the filesystem stats; we're 1276 * only interested in the server's transfer size, and we 1277 * want the minimum. 1278 * 1279 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1280 * which is only for the mount operation. 1281 */ 1282 1283 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize()); 1284 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize()); 1285 1286 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1287 struct nfsstatfs fs; 1288 int douprintf; 1289 1290 douprintf = 1; 1291 mi->mi_curr_serv = svp; 1292 1293 error = rfs2call(mi, RFS_STATFS, 1294 xdr_fhandle, (caddr_t)svp->sv_fhandle.fh_buf, 1295 xdr_statfs, (caddr_t)&fs, tcr, &douprintf, 1296 &fs.fs_status, 0, NULL); 1297 if (error) 1298 goto bad; 1299 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1300 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1301 } 1302 mi->mi_curr_serv = mi->mi_servers; 1303 mi->mi_curread = mi->mi_tsize; 1304 mi->mi_curwrite = mi->mi_stsize; 1305 1306 /* 1307 * Start the manager thread responsible for handling async worker 1308 * threads. 1309 */ 1310 VFS_HOLD(vfsp); /* add reference for thread */ 1311 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1312 vfsp, 0, minclsyspri); 1313 ASSERT(mi->mi_manager_thread != NULL); 1314 1315 /* 1316 * Initialize kstats 1317 */ 1318 nfs_mnt_kstat_init(vfsp); 1319 1320 mi->mi_type = rtvp->v_type; 1321 1322 *rtvpp = rtvp; 1323 if (lcr != NULL) 1324 crfree(lcr); 1325 1326 return (0); 1327 bad: 1328 /* 1329 * An error occurred somewhere, need to clean up... 1330 * We need to release our reference to the root vnode and 1331 * destroy the mntinfo struct that we just created. 1332 */ 1333 if (lcr != NULL) 1334 crfree(lcr); 1335 rp = VTOR(rtvp); 1336 if (rp->r_flags & RHASHED) 1337 rp_rmhash(rp); 1338 VN_RELE(rtvp); 1339 nfs_async_stop(vfsp); 1340 nfs_async_manager_stop(vfsp); 1341 if (mi->mi_io_kstats) { 1342 kstat_delete(mi->mi_io_kstats); 1343 mi->mi_io_kstats = NULL; 1344 } 1345 if (mi->mi_ro_kstats) { 1346 kstat_delete(mi->mi_ro_kstats); 1347 mi->mi_ro_kstats = NULL; 1348 } 1349 nfs_free_mi(mi); 1350 *rtvpp = NULL; 1351 return (error); 1352 } 1353 1354 /* 1355 * vfs operations 1356 */ 1357 static int 1358 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1359 { 1360 mntinfo_t *mi; 1361 ushort_t omax; 1362 1363 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1364 return (EPERM); 1365 1366 mi = VFTOMI(vfsp); 1367 if (flag & MS_FORCE) { 1368 1369 vfsp->vfs_flag |= VFS_UNMOUNTED; 1370 1371 /* 1372 * We are about to stop the async manager. 1373 * Let every one know not to schedule any 1374 * more async requests. 1375 */ 1376 mutex_enter(&mi->mi_async_lock); 1377 mi->mi_max_threads = 0; 1378 cv_broadcast(&mi->mi_async_work_cv); 1379 mutex_exit(&mi->mi_async_lock); 1380 1381 /* 1382 * We need to stop the manager thread explicitly; the worker 1383 * threads can time out and exit on their own. 1384 */ 1385 nfs_async_manager_stop(vfsp); 1386 destroy_rtable(vfsp, cr); 1387 if (mi->mi_io_kstats) { 1388 kstat_delete(mi->mi_io_kstats); 1389 mi->mi_io_kstats = NULL; 1390 } 1391 if (mi->mi_ro_kstats) { 1392 kstat_delete(mi->mi_ro_kstats); 1393 mi->mi_ro_kstats = NULL; 1394 } 1395 return (0); 1396 } 1397 /* 1398 * Wait until all asynchronous putpage operations on 1399 * this file system are complete before flushing rnodes 1400 * from the cache. 1401 */ 1402 omax = mi->mi_max_threads; 1403 if (nfs_async_stop_sig(vfsp)) { 1404 return (EINTR); 1405 } 1406 rflush(vfsp, cr); 1407 /* 1408 * If there are any active vnodes on this file system, 1409 * then the file system is busy and can't be umounted. 1410 */ 1411 if (check_rtable(vfsp)) { 1412 mutex_enter(&mi->mi_async_lock); 1413 mi->mi_max_threads = omax; 1414 mutex_exit(&mi->mi_async_lock); 1415 return (EBUSY); 1416 } 1417 /* 1418 * The unmount can't fail from now on; stop the manager thread. 1419 */ 1420 nfs_async_manager_stop(vfsp); 1421 /* 1422 * Destroy all rnodes belonging to this file system from the 1423 * rnode hash queues and purge any resources allocated to 1424 * them. 1425 */ 1426 destroy_rtable(vfsp, cr); 1427 if (mi->mi_io_kstats) { 1428 kstat_delete(mi->mi_io_kstats); 1429 mi->mi_io_kstats = NULL; 1430 } 1431 if (mi->mi_ro_kstats) { 1432 kstat_delete(mi->mi_ro_kstats); 1433 mi->mi_ro_kstats = NULL; 1434 } 1435 return (0); 1436 } 1437 1438 /* 1439 * find root of nfs 1440 */ 1441 static int 1442 nfs_root(vfs_t *vfsp, vnode_t **vpp) 1443 { 1444 mntinfo_t *mi; 1445 vnode_t *vp; 1446 servinfo_t *svp; 1447 1448 mi = VFTOMI(vfsp); 1449 1450 if (nfs_zone() != mi->mi_zone) 1451 return (EPERM); 1452 1453 svp = mi->mi_curr_serv; 1454 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1455 mutex_enter(&svp->sv_lock); 1456 svp->sv_flags &= ~SV_ROOT_STALE; 1457 mutex_exit(&svp->sv_lock); 1458 return (ENOENT); 1459 } 1460 1461 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf, 1462 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1463 1464 if (VTOR(vp)->r_flags & RSTALE) { 1465 VN_RELE(vp); 1466 return (ENOENT); 1467 } 1468 1469 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1470 1471 vp->v_type = mi->mi_type; 1472 1473 *vpp = vp; 1474 1475 return (0); 1476 } 1477 1478 /* 1479 * Get file system statistics. 1480 */ 1481 static int 1482 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1483 { 1484 int error; 1485 mntinfo_t *mi; 1486 struct nfsstatfs fs; 1487 int douprintf; 1488 failinfo_t fi; 1489 vnode_t *vp; 1490 1491 error = nfs_root(vfsp, &vp); 1492 if (error) 1493 return (error); 1494 1495 mi = VFTOMI(vfsp); 1496 douprintf = 1; 1497 fi.vp = vp; 1498 fi.fhp = NULL; /* no need to update, filehandle not copied */ 1499 fi.copyproc = nfscopyfh; 1500 fi.lookupproc = nfslookup; 1501 fi.xattrdirproc = acl_getxattrdir2; 1502 1503 error = rfs2call(mi, RFS_STATFS, 1504 xdr_fhandle, (caddr_t)VTOFH(vp), 1505 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, 1506 &fs.fs_status, 0, &fi); 1507 1508 if (!error) { 1509 error = geterrno(fs.fs_status); 1510 if (!error) { 1511 mutex_enter(&mi->mi_lock); 1512 if (mi->mi_stsize) { 1513 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1514 } else { 1515 mi->mi_stsize = fs.fs_tsize; 1516 mi->mi_curwrite = mi->mi_stsize; 1517 } 1518 mutex_exit(&mi->mi_lock); 1519 sbp->f_bsize = fs.fs_bsize; 1520 sbp->f_frsize = fs.fs_bsize; 1521 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks; 1522 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree; 1523 /* 1524 * Some servers may return negative available 1525 * block counts. They may do this because they 1526 * calculate the number of available blocks by 1527 * subtracting the number of used blocks from 1528 * the total number of blocks modified by the 1529 * minimum free value. For example, if the 1530 * minumum free percentage is 10 and the file 1531 * system is greater than 90 percent full, then 1532 * 90 percent of the total blocks minus the 1533 * actual number of used blocks may be a 1534 * negative number. 1535 * 1536 * In this case, we need to sign extend the 1537 * negative number through the assignment from 1538 * the 32 bit bavail count to the 64 bit bavail 1539 * count. 1540 * 1541 * We need to be able to discern between there 1542 * just being a lot of available blocks on the 1543 * file system and the case described above. 1544 * We are making the assumption that it does 1545 * not make sense to have more available blocks 1546 * than there are free blocks. So, if there 1547 * are, then we treat the number as if it were 1548 * a negative number and arrange to have it 1549 * sign extended when it is converted from 32 1550 * bits to 64 bits. 1551 */ 1552 if (fs.fs_bavail <= fs.fs_bfree) 1553 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail; 1554 else { 1555 sbp->f_bavail = 1556 (fsblkcnt64_t)((long)fs.fs_bavail); 1557 } 1558 sbp->f_files = (fsfilcnt64_t)-1; 1559 sbp->f_ffree = (fsfilcnt64_t)-1; 1560 sbp->f_favail = (fsfilcnt64_t)-1; 1561 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1562 (void) strncpy(sbp->f_basetype, 1563 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1564 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1565 sbp->f_namemax = (uint32_t)-1; 1566 } else { 1567 PURGE_STALE_FH(error, vp, CRED()); 1568 } 1569 } 1570 1571 VN_RELE(vp); 1572 1573 return (error); 1574 } 1575 1576 static kmutex_t nfs_syncbusy; 1577 1578 /* 1579 * Flush dirty nfs files for file system vfsp. 1580 * If vfsp == NULL, all nfs files are flushed. 1581 */ 1582 /* ARGSUSED */ 1583 static int 1584 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 1585 { 1586 /* 1587 * Cross-zone calls are OK here, since this translates to a 1588 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1589 */ 1590 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) { 1591 rflush(vfsp, cr); 1592 mutex_exit(&nfs_syncbusy); 1593 } 1594 return (0); 1595 } 1596 1597 /* ARGSUSED */ 1598 static int 1599 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1600 { 1601 int error; 1602 vnode_t *vp; 1603 struct vattr va; 1604 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp; 1605 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id; 1606 1607 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1608 return (EPERM); 1609 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) { 1610 #ifdef DEBUG 1611 zcmn_err(zoneid, CE_WARN, 1612 "nfs_vget: bad fid len, %d/%d", fidp->fid_len, 1613 (int)(sizeof (*nfsfidp) - sizeof (short))); 1614 #endif 1615 *vpp = NULL; 1616 return (ESTALE); 1617 } 1618 1619 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp, 1620 gethrtime(), CRED(), NULL, NULL); 1621 1622 if (VTOR(vp)->r_flags & RSTALE) { 1623 VN_RELE(vp); 1624 *vpp = NULL; 1625 return (ENOENT); 1626 } 1627 1628 if (vp->v_type == VNON) { 1629 va.va_mask = AT_ALL; 1630 error = nfsgetattr(vp, &va, CRED()); 1631 if (error) { 1632 VN_RELE(vp); 1633 *vpp = NULL; 1634 return (error); 1635 } 1636 vp->v_type = va.va_type; 1637 } 1638 1639 *vpp = vp; 1640 1641 return (0); 1642 } 1643 1644 /* ARGSUSED */ 1645 static int 1646 nfs_mountroot(vfs_t *vfsp, whymountroot_t why) 1647 { 1648 vnode_t *rtvp; 1649 char root_hostname[SYS_NMLN+1]; 1650 struct servinfo *svp; 1651 int error; 1652 int vfsflags; 1653 size_t size; 1654 char *root_path; 1655 struct pathname pn; 1656 char *name; 1657 cred_t *cr; 1658 struct nfs_args args; /* nfs mount arguments */ 1659 static char token[10]; 1660 1661 bzero(&args, sizeof (args)); 1662 1663 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1664 clkset(-1L); /* hack for now - until we get time svc? */ 1665 1666 if (why == ROOT_REMOUNT) { 1667 /* 1668 * Shouldn't happen. 1669 */ 1670 panic("nfs_mountroot: why == ROOT_REMOUNT"); 1671 } 1672 1673 if (why == ROOT_UNMOUNT) { 1674 /* 1675 * Nothing to do for NFS. 1676 */ 1677 return (0); 1678 } 1679 1680 /* 1681 * why == ROOT_INIT 1682 */ 1683 1684 name = token; 1685 *name = 0; 1686 getfsname("root", name, sizeof (token)); 1687 1688 pn_alloc(&pn); 1689 root_path = pn.pn_path; 1690 1691 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1692 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1693 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1694 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1695 1696 /* 1697 * Get server address 1698 * Get the root fhandle 1699 * Get server's transport 1700 * Get server's hostname 1701 * Get options 1702 */ 1703 args.addr = &svp->sv_addr; 1704 args.fh = (char *)&svp->sv_fhandle.fh_buf; 1705 args.knconf = svp->sv_knconf; 1706 args.hostname = root_hostname; 1707 vfsflags = 0; 1708 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, 1709 &args, &vfsflags)) { 1710 nfs_cmn_err(error, CE_WARN, 1711 "nfs_mountroot: mount_root failed: %m"); 1712 sv_free(svp); 1713 pn_free(&pn); 1714 return (error); 1715 } 1716 svp->sv_fhandle.fh_len = NFS_FHSIZE; 1717 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1718 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1719 (void) strcpy(svp->sv_hostname, root_hostname); 1720 1721 /* 1722 * Force root partition to always be mounted with AUTH_UNIX for now 1723 */ 1724 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1725 svp->sv_secdata->secmod = AUTH_UNIX; 1726 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1727 svp->sv_secdata->data = NULL; 1728 1729 cr = crgetcred(); 1730 rtvp = NULL; 1731 1732 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1733 1734 crfree(cr); 1735 1736 if (error) { 1737 pn_free(&pn); 1738 goto errout; 1739 } 1740 1741 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1742 if (error) { 1743 nfs_cmn_err(error, CE_WARN, 1744 "nfs_mountroot: invalid root mount options"); 1745 pn_free(&pn); 1746 goto errout; 1747 } 1748 1749 (void) vfs_lock_wait(vfsp); 1750 vfs_add(NULL, vfsp, vfsflags); 1751 vfs_unlock(vfsp); 1752 1753 size = strlen(svp->sv_hostname); 1754 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1755 rootfs.bo_name[size] = ':'; 1756 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1757 1758 pn_free(&pn); 1759 1760 errout: 1761 if (error) { 1762 sv_free(svp); 1763 nfs_async_stop(vfsp); 1764 nfs_async_manager_stop(vfsp); 1765 } 1766 1767 if (rtvp != NULL) 1768 VN_RELE(rtvp); 1769 1770 return (error); 1771 } 1772 1773 /* 1774 * Initialization routine for VFS routines. Should only be called once 1775 */ 1776 int 1777 nfs_vfsinit(void) 1778 { 1779 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1780 return (0); 1781 } 1782 1783 void 1784 nfs_vfsfini(void) 1785 { 1786 mutex_destroy(&nfs_syncbusy); 1787 } 1788 1789 void 1790 nfs_freevfs(vfs_t *vfsp) 1791 { 1792 mntinfo_t *mi; 1793 servinfo_t *svp; 1794 1795 /* free up the resources */ 1796 mi = VFTOMI(vfsp); 1797 pathconf_rele(mi); 1798 svp = mi->mi_servers; 1799 mi->mi_servers = mi->mi_curr_serv = NULL; 1800 sv_free(svp); 1801 1802 /* 1803 * By this time we should have already deleted the 1804 * mi kstats in the unmount code. If they are still around 1805 * somethings wrong 1806 */ 1807 ASSERT(mi->mi_io_kstats == NULL); 1808 nfs_free_mi(mi); 1809 } 1810