1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 26 * All rights reserved. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/vnode.h> 38 #include <sys/pathname.h> 39 #include <sys/sysmacros.h> 40 #include <sys/kmem.h> 41 #include <sys/mkdev.h> 42 #include <sys/mount.h> 43 #include <sys/mntent.h> 44 #include <sys/statvfs.h> 45 #include <sys/errno.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/utsname.h> 49 #include <sys/bootconf.h> 50 #include <sys/modctl.h> 51 #include <sys/acl.h> 52 #include <sys/flock.h> 53 #include <sys/policy.h> 54 #include <sys/zone.h> 55 #include <sys/class.h> 56 #include <sys/socket.h> 57 #include <sys/netconfig.h> 58 #include <sys/mntent.h> 59 #include <sys/tsol/label.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/clnt.h> 64 65 #include <nfs/nfs.h> 66 #include <nfs/nfs_clnt.h> 67 #include <nfs/rnode.h> 68 #include <nfs/mount.h> 69 #include <nfs/nfs_acl.h> 70 71 #include <fs/fs_subr.h> 72 73 /* 74 * From rpcsec module (common/rpcsec). 75 */ 76 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 77 extern void sec_clnt_freeinfo(struct sec_data *); 78 79 static int pathconf_copyin(struct nfs_args *, struct pathcnf *); 80 static int pathconf_get(struct mntinfo *, struct nfs_args *); 81 static void pathconf_rele(struct mntinfo *); 82 83 /* 84 * The order and contents of this structure must be kept in sync with that of 85 * rfsreqcnt_v2_tmpl in nfs_stats.c 86 */ 87 static char *rfsnames_v2[] = { 88 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read", 89 "unused", "write", "create", "remove", "rename", "link", "symlink", 90 "mkdir", "rmdir", "readdir", "fsstat" 91 }; 92 93 /* 94 * This table maps from NFS protocol number into call type. 95 * Zero means a "Lookup" type call 96 * One means a "Read" type call 97 * Two means a "Write" type call 98 * This is used to select a default time-out. 99 */ 100 static uchar_t call_type_v2[] = { 101 0, 0, 1, 0, 0, 0, 1, 102 0, 2, 2, 2, 2, 2, 2, 103 2, 2, 1, 0 104 }; 105 106 /* 107 * Similar table, but to determine which timer to use 108 * (only real reads and writes!) 109 */ 110 static uchar_t timer_type_v2[] = { 111 0, 0, 0, 0, 0, 0, 1, 112 0, 2, 0, 0, 0, 0, 0, 113 0, 0, 1, 0 114 }; 115 116 /* 117 * This table maps from NFS protocol number into a call type 118 * for the semisoft mount option. 119 * Zero means do not repeat operation. 120 * One means repeat. 121 */ 122 static uchar_t ss_call_type_v2[] = { 123 0, 0, 1, 0, 0, 0, 0, 124 0, 1, 1, 1, 1, 1, 1, 125 1, 1, 0, 0 126 }; 127 128 /* 129 * nfs vfs operations. 130 */ 131 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 132 static int nfs_unmount(vfs_t *, int, cred_t *); 133 static int nfs_root(vfs_t *, vnode_t **); 134 static int nfs_statvfs(vfs_t *, struct statvfs64 *); 135 static int nfs_sync(vfs_t *, short, cred_t *); 136 static int nfs_vget(vfs_t *, vnode_t **, fid_t *); 137 static int nfs_mountroot(vfs_t *, whymountroot_t); 138 static void nfs_freevfs(vfs_t *); 139 140 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *, 141 int, cred_t *, zone_t *); 142 143 /* 144 * Initialize the vfs structure 145 */ 146 147 int nfsfstyp; 148 vfsops_t *nfs_vfsops; 149 150 /* 151 * Debug variable to check for rdma based 152 * transport startup and cleanup. Controlled 153 * through /etc/system. Off by default. 154 */ 155 int rdma_debug = 0; 156 157 int 158 nfsinit(int fstyp, char *name) 159 { 160 static const fs_operation_def_t nfs_vfsops_template[] = { 161 VFSNAME_MOUNT, { .vfs_mount = nfs_mount }, 162 VFSNAME_UNMOUNT, { .vfs_unmount = nfs_unmount }, 163 VFSNAME_ROOT, { .vfs_root = nfs_root }, 164 VFSNAME_STATVFS, { .vfs_statvfs = nfs_statvfs }, 165 VFSNAME_SYNC, { .vfs_sync = nfs_sync }, 166 VFSNAME_VGET, { .vfs_vget = nfs_vget }, 167 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs_mountroot }, 168 VFSNAME_FREEVFS, { .vfs_freevfs = nfs_freevfs }, 169 NULL, NULL 170 }; 171 int error; 172 173 error = vfs_setfsops(fstyp, nfs_vfsops_template, &nfs_vfsops); 174 if (error != 0) { 175 zcmn_err(GLOBAL_ZONEID, CE_WARN, 176 "nfsinit: bad vfs ops template"); 177 return (error); 178 } 179 180 error = vn_make_ops(name, nfs_vnodeops_template, &nfs_vnodeops); 181 if (error != 0) { 182 (void) vfs_freevfsops_by_type(fstyp); 183 zcmn_err(GLOBAL_ZONEID, CE_WARN, 184 "nfsinit: bad vnode ops template"); 185 return (error); 186 } 187 188 189 nfsfstyp = fstyp; 190 191 return (0); 192 } 193 194 void 195 nfsfini(void) 196 { 197 } 198 199 static void 200 nfs_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 201 { 202 203 if (fh) 204 kmem_free(fh, sizeof (*fh)); 205 206 if (nargs->pathconf) { 207 kmem_free(nargs->pathconf, sizeof (struct pathcnf)); 208 nargs->pathconf = NULL; 209 } 210 211 if (nargs->knconf) { 212 if (nargs->knconf->knc_protofmly) 213 kmem_free(nargs->knconf->knc_protofmly, 214 KNC_STRSIZE); 215 if (nargs->knconf->knc_proto) 216 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 217 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 218 nargs->knconf = NULL; 219 } 220 221 if (nargs->fh) { 222 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 223 nargs->fh = NULL; 224 } 225 226 if (nargs->hostname) { 227 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 228 nargs->hostname = NULL; 229 } 230 231 if (nargs->addr) { 232 if (nargs->addr->buf) { 233 ASSERT(nargs->addr->len); 234 kmem_free(nargs->addr->buf, nargs->addr->len); 235 } 236 kmem_free(nargs->addr, sizeof (struct netbuf)); 237 nargs->addr = NULL; 238 } 239 240 if (nargs->syncaddr) { 241 ASSERT(nargs->syncaddr->len); 242 if (nargs->syncaddr->buf) { 243 ASSERT(nargs->syncaddr->len); 244 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 245 } 246 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 247 nargs->syncaddr = NULL; 248 } 249 250 if (nargs->netname) { 251 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 252 nargs->netname = NULL; 253 } 254 255 if (nargs->nfs_ext_u.nfs_extA.secdata) { 256 sec_clnt_freeinfo( 257 nargs->nfs_ext_u.nfs_extA.secdata); 258 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 259 } 260 } 261 262 static int 263 nfs_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 264 { 265 266 int error; 267 size_t nlen; /* length of netname */ 268 size_t hlen; /* length of hostname */ 269 char netname[MAXNETNAMELEN+1]; /* server's netname */ 270 struct netbuf addr; /* server's address */ 271 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 272 struct knetconfig *knconf; /* transport knetconfig structure */ 273 struct sec_data *secdata = NULL; /* security data */ 274 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 275 STRUCT_DECL(knetconfig, knconf_tmp); 276 STRUCT_DECL(netbuf, addr_tmp); 277 int flags; 278 struct pathcnf *pc; /* Pathconf */ 279 char *p, *pf; 280 char *userbufptr; 281 282 283 bzero(nargs, sizeof (*nargs)); 284 285 STRUCT_INIT(args, get_udatamodel()); 286 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 287 if (copyin(data, STRUCT_BUF(args), MIN(datalen, 288 STRUCT_SIZE(args)))) 289 return (EFAULT); 290 291 nargs->wsize = STRUCT_FGET(args, wsize); 292 nargs->rsize = STRUCT_FGET(args, rsize); 293 nargs->timeo = STRUCT_FGET(args, timeo); 294 nargs->retrans = STRUCT_FGET(args, retrans); 295 nargs->acregmin = STRUCT_FGET(args, acregmin); 296 nargs->acregmax = STRUCT_FGET(args, acregmax); 297 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 298 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 299 300 flags = STRUCT_FGET(args, flags); 301 nargs->flags = flags; 302 303 304 addr.buf = NULL; 305 syncaddr.buf = NULL; 306 307 /* 308 * Allocate space for a knetconfig structure and 309 * its strings and copy in from user-land. 310 */ 311 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 312 STRUCT_INIT(knconf_tmp, get_udatamodel()); 313 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 314 STRUCT_SIZE(knconf_tmp))) { 315 kmem_free(knconf, sizeof (*knconf)); 316 return (EFAULT); 317 } 318 319 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 320 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 321 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 322 if (get_udatamodel() != DATAMODEL_LP64) { 323 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 324 } else { 325 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 326 } 327 328 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 329 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 330 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 331 if (error) { 332 kmem_free(pf, KNC_STRSIZE); 333 kmem_free(p, KNC_STRSIZE); 334 kmem_free(knconf, sizeof (*knconf)); 335 return (error); 336 } 337 338 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 339 if (error) { 340 kmem_free(pf, KNC_STRSIZE); 341 kmem_free(p, KNC_STRSIZE); 342 kmem_free(knconf, sizeof (*knconf)); 343 return (error); 344 } 345 346 347 knconf->knc_protofmly = pf; 348 knconf->knc_proto = p; 349 350 nargs->knconf = knconf; 351 352 /* Copyin pathconf if there is one */ 353 if (STRUCT_FGETP(args, pathconf) != NULL) { 354 pc = kmem_alloc(sizeof (*pc), KM_SLEEP); 355 error = pathconf_copyin(STRUCT_BUF(args), pc); 356 nargs->pathconf = pc; 357 if (error) 358 goto errout; 359 } 360 361 /* 362 * Get server address 363 */ 364 STRUCT_INIT(addr_tmp, get_udatamodel()); 365 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 366 STRUCT_SIZE(addr_tmp))) { 367 error = EFAULT; 368 goto errout; 369 } 370 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 371 userbufptr = STRUCT_FGETP(addr_tmp, buf); 372 addr.len = STRUCT_FGET(addr_tmp, len); 373 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 374 addr.maxlen = addr.len; 375 if (copyin(userbufptr, addr.buf, addr.len)) { 376 kmem_free(addr.buf, addr.len); 377 error = EFAULT; 378 goto errout; 379 } 380 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 381 382 /* 383 * Get the root fhandle 384 */ 385 386 if (copyin(STRUCT_FGETP(args, fh), &fh->fh_buf, NFS_FHSIZE)) { 387 error = EFAULT; 388 goto errout; 389 } 390 fh->fh_len = NFS_FHSIZE; 391 392 /* 393 * Get server's hostname 394 */ 395 if (flags & NFSMNT_HOSTNAME) { 396 error = copyinstr(STRUCT_FGETP(args, hostname), 397 netname, sizeof (netname), &hlen); 398 if (error) 399 goto errout; 400 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 401 (void) strcpy(nargs->hostname, netname); 402 403 } else { 404 nargs->hostname = NULL; 405 } 406 407 408 /* 409 * If there are syncaddr and netname data, load them in. This is 410 * to support data needed for NFSV4 when AUTH_DH is the negotiated 411 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 412 */ 413 netname[0] = '\0'; 414 if (flags & NFSMNT_SECURE) { 415 if (STRUCT_FGETP(args, syncaddr) == NULL) { 416 error = EINVAL; 417 goto errout; 418 } 419 /* get syncaddr */ 420 STRUCT_INIT(addr_tmp, get_udatamodel()); 421 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 422 STRUCT_SIZE(addr_tmp))) { 423 error = EINVAL; 424 goto errout; 425 } 426 userbufptr = STRUCT_FGETP(addr_tmp, buf); 427 syncaddr.len = STRUCT_FGET(addr_tmp, len); 428 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 429 syncaddr.maxlen = syncaddr.len; 430 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 431 kmem_free(syncaddr.buf, syncaddr.len); 432 error = EFAULT; 433 goto errout; 434 } 435 436 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 437 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 438 439 ASSERT(STRUCT_FGETP(args, netname)); 440 if (copyinstr(STRUCT_FGETP(args, netname), netname, 441 sizeof (netname), &nlen)) { 442 error = EFAULT; 443 goto errout; 444 } 445 446 netname[nlen] = '\0'; 447 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 448 (void) strcpy(nargs->netname, netname); 449 } 450 451 /* 452 * Get the extention data which has the security data structure. 453 * This includes data for AUTH_SYS as well. 454 */ 455 if (flags & NFSMNT_NEWARGS) { 456 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 457 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 458 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 459 /* 460 * Indicating the application is using the new 461 * sec_data structure to pass in the security 462 * data. 463 */ 464 if (STRUCT_FGETP(args, 465 nfs_ext_u.nfs_extA.secdata) != NULL) { 466 error = sec_clnt_loadinfo( 467 (struct sec_data *)STRUCT_FGETP(args, 468 nfs_ext_u.nfs_extA.secdata), 469 &secdata, get_udatamodel()); 470 } 471 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 472 } 473 } 474 475 if (error) 476 goto errout; 477 478 /* 479 * Failover support: 480 * 481 * We may have a linked list of nfs_args structures, 482 * which means the user is looking for failover. If 483 * the mount is either not "read-only" or "soft", 484 * we want to bail out with EINVAL. 485 */ 486 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 487 nargs->nfs_ext_u.nfs_extB.next = 488 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 489 490 errout: 491 if (error) 492 nfs_free_args(nargs, fh); 493 494 return (error); 495 } 496 497 498 /* 499 * nfs mount vfsop 500 * Set up mount info record and attach it to vfs struct. 501 */ 502 static int 503 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 504 { 505 char *data = uap->dataptr; 506 int error; 507 vnode_t *rtvp; /* the server's root */ 508 mntinfo_t *mi; /* mount info, pointed at by vfs */ 509 size_t nlen; /* length of netname */ 510 struct knetconfig *knconf; /* transport knetconfig structure */ 511 struct knetconfig *rdma_knconf; /* rdma transport structure */ 512 rnode_t *rp; 513 struct servinfo *svp; /* nfs server info */ 514 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 515 struct servinfo *svp_head; /* first nfs server info */ 516 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */ 517 struct sec_data *secdata; /* security data */ 518 struct nfs_args *args = NULL; 519 int flags, addr_type; 520 zone_t *zone = nfs_zone(); 521 zone_t *mntzone = NULL; 522 nfs_fhandle *fhandle = NULL; 523 524 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 525 return (error); 526 527 if (mvp->v_type != VDIR) 528 return (ENOTDIR); 529 530 /* 531 * get arguments 532 * 533 * nfs_args is now versioned and is extensible, so 534 * uap->datalen might be different from sizeof (args) 535 * in a compatible situation. 536 */ 537 more: 538 539 if (!(uap->flags & MS_SYSSPACE)) { 540 if (args == NULL) 541 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 542 else { 543 nfs_free_args(args, fhandle); 544 fhandle = NULL; 545 } 546 if (fhandle == NULL) 547 fhandle = kmem_zalloc(sizeof (nfs_fhandle), KM_SLEEP); 548 error = nfs_copyin(data, uap->datalen, args, fhandle); 549 if (error) { 550 if (args) 551 kmem_free(args, sizeof (*args)); 552 return (error); 553 } 554 } else { 555 args = (struct nfs_args *)data; 556 fhandle = (nfs_fhandle *)args->fh; 557 } 558 559 560 flags = args->flags; 561 562 if (uap->flags & MS_REMOUNT) { 563 size_t n; 564 char name[FSTYPSZ]; 565 566 if (uap->flags & MS_SYSSPACE) 567 error = copystr(uap->fstype, name, FSTYPSZ, &n); 568 else 569 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 570 571 if (error) { 572 if (error == ENAMETOOLONG) 573 return (EINVAL); 574 return (error); 575 } 576 577 578 /* 579 * This check is to ensure that the request is a 580 * genuine nfs remount request. 581 */ 582 583 if (strncmp(name, "nfs", 3) != 0) 584 return (EINVAL); 585 586 /* 587 * If the request changes the locking type, disallow the 588 * remount, 589 * because it's questionable whether we can transfer the 590 * locking state correctly. 591 * 592 * Remounts need to save the pathconf information. 593 * Part of the infamous static kludge. 594 */ 595 596 if ((mi = VFTOMI(vfsp)) != NULL) { 597 uint_t new_mi_llock; 598 uint_t old_mi_llock; 599 600 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 601 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 602 if (old_mi_llock != new_mi_llock) 603 return (EBUSY); 604 } 605 error = pathconf_get((struct mntinfo *)vfsp->vfs_data, args); 606 607 if (!(uap->flags & MS_SYSSPACE)) { 608 nfs_free_args(args, fhandle); 609 kmem_free(args, sizeof (*args)); 610 } 611 612 return (error); 613 } 614 615 mutex_enter(&mvp->v_lock); 616 if (!(uap->flags & MS_OVERLAY) && 617 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 618 mutex_exit(&mvp->v_lock); 619 if (!(uap->flags & MS_SYSSPACE)) { 620 nfs_free_args(args, fhandle); 621 kmem_free(args, sizeof (*args)); 622 } 623 return (EBUSY); 624 } 625 mutex_exit(&mvp->v_lock); 626 627 /* make sure things are zeroed for errout: */ 628 rtvp = NULL; 629 mi = NULL; 630 secdata = NULL; 631 632 /* 633 * A valid knetconfig structure is required. 634 */ 635 if (!(flags & NFSMNT_KNCONF)) { 636 if (!(uap->flags & MS_SYSSPACE)) { 637 nfs_free_args(args, fhandle); 638 kmem_free(args, sizeof (*args)); 639 } 640 return (EINVAL); 641 } 642 643 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 644 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 645 if (!(uap->flags & MS_SYSSPACE)) { 646 nfs_free_args(args, fhandle); 647 kmem_free(args, sizeof (*args)); 648 } 649 return (EINVAL); 650 } 651 652 653 /* 654 * Allocate a servinfo struct. 655 */ 656 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 657 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 658 if (svp_tail) { 659 svp_2ndlast = svp_tail; 660 svp_tail->sv_next = svp; 661 } else { 662 svp_head = svp; 663 svp_2ndlast = svp; 664 } 665 666 svp_tail = svp; 667 668 /* 669 * Get knetconfig and server address 670 */ 671 svp->sv_knconf = args->knconf; 672 args->knconf = NULL; 673 674 if (args->addr == NULL || args->addr->buf == NULL) { 675 error = EINVAL; 676 goto errout; 677 } 678 679 svp->sv_addr.maxlen = args->addr->maxlen; 680 svp->sv_addr.len = args->addr->len; 681 svp->sv_addr.buf = args->addr->buf; 682 args->addr->buf = NULL; 683 684 /* 685 * Get the root fhandle 686 */ 687 ASSERT(fhandle); 688 689 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 690 svp->sv_fhandle.fh_len = fhandle->fh_len; 691 692 /* 693 * Get server's hostname 694 */ 695 if (flags & NFSMNT_HOSTNAME) { 696 if (args->hostname == NULL) { 697 error = EINVAL; 698 goto errout; 699 } 700 svp->sv_hostnamelen = strlen(args->hostname) + 1; 701 svp->sv_hostname = args->hostname; 702 args->hostname = NULL; 703 } else { 704 char *p = "unknown-host"; 705 svp->sv_hostnamelen = strlen(p) + 1; 706 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 707 (void) strcpy(svp->sv_hostname, p); 708 } 709 710 711 /* 712 * RDMA MOUNT SUPPORT FOR NFS v2: 713 * Establish, is it possible to use RDMA, if so overload the 714 * knconf with rdma specific knconf and free the orignal. 715 */ 716 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 717 /* 718 * Determine the addr type for RDMA, IPv4 or v6. 719 */ 720 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 721 addr_type = AF_INET; 722 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 723 addr_type = AF_INET6; 724 725 if (rdma_reachable(addr_type, &svp->sv_addr, 726 &rdma_knconf) == 0) { 727 /* 728 * If successful, hijack, the orignal knconf and 729 * replace with a new one, depending on the flags. 730 */ 731 svp->sv_origknconf = svp->sv_knconf; 732 svp->sv_knconf = rdma_knconf; 733 knconf = rdma_knconf; 734 } else { 735 if (flags & NFSMNT_TRYRDMA) { 736 #ifdef DEBUG 737 if (rdma_debug) 738 zcmn_err(getzoneid(), CE_WARN, 739 "no RDMA onboard, revert\n"); 740 #endif 741 } 742 743 if (flags & NFSMNT_DORDMA) { 744 /* 745 * If proto=rdma is specified and no RDMA 746 * path to this server is avialable then 747 * ditch this server. 748 * This is not included in the mountable 749 * server list or the replica list. 750 * Check if more servers are specified; 751 * Failover case, otherwise bail out of mount. 752 */ 753 if (args->nfs_args_ext == 754 NFS_ARGS_EXTB && 755 args->nfs_ext_u.nfs_extB.next 756 != NULL) { 757 data = (char *) 758 args->nfs_ext_u.nfs_extB.next; 759 if (uap->flags & MS_RDONLY && 760 !(flags & NFSMNT_SOFT)) { 761 if (svp_head->sv_next == NULL) { 762 svp_tail = NULL; 763 svp_2ndlast = NULL; 764 sv_free(svp_head); 765 goto more; 766 } else { 767 svp_tail = svp_2ndlast; 768 svp_2ndlast->sv_next = 769 NULL; 770 sv_free(svp); 771 goto more; 772 } 773 } 774 } else { 775 /* 776 * This is the last server specified 777 * in the nfs_args list passed down 778 * and its not rdma capable. 779 */ 780 if (svp_head->sv_next == NULL) { 781 /* 782 * Is this the only one 783 */ 784 error = EINVAL; 785 #ifdef DEBUG 786 if (rdma_debug) 787 zcmn_err(getzoneid(), 788 CE_WARN, 789 "No RDMA srv"); 790 #endif 791 goto errout; 792 } else { 793 /* 794 * There is list, since some 795 * servers specified before 796 * this passed all requirements 797 */ 798 svp_tail = svp_2ndlast; 799 svp_2ndlast->sv_next = NULL; 800 sv_free(svp); 801 goto proceed; 802 } 803 } 804 } 805 } 806 } 807 808 /* 809 * Get the extention data which has the new security data structure. 810 */ 811 if (flags & NFSMNT_NEWARGS) { 812 switch (args->nfs_args_ext) { 813 case NFS_ARGS_EXTA: 814 case NFS_ARGS_EXTB: 815 /* 816 * Indicating the application is using the new 817 * sec_data structure to pass in the security 818 * data. 819 */ 820 secdata = args->nfs_ext_u.nfs_extA.secdata; 821 if (secdata == NULL) { 822 error = EINVAL; 823 } else { 824 /* 825 * Need to validate the flavor here if 826 * sysspace, userspace was already 827 * validate from the nfs_copyin function. 828 */ 829 switch (secdata->rpcflavor) { 830 case AUTH_NONE: 831 case AUTH_UNIX: 832 case AUTH_LOOPBACK: 833 case AUTH_DES: 834 case RPCSEC_GSS: 835 break; 836 default: 837 error = EINVAL; 838 goto errout; 839 } 840 } 841 args->nfs_ext_u.nfs_extA.secdata = NULL; 842 break; 843 844 default: 845 error = EINVAL; 846 break; 847 } 848 } else if (flags & NFSMNT_SECURE) { 849 /* 850 * Keep this for backward compatibility to support 851 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 852 */ 853 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 854 error = EINVAL; 855 goto errout; 856 } 857 858 /* 859 * get time sync address. 860 */ 861 if (args->syncaddr == NULL) { 862 error = EFAULT; 863 goto errout; 864 } 865 866 /* 867 * Move security related data to the sec_data structure. 868 */ 869 { 870 dh_k4_clntdata_t *data; 871 char *pf, *p; 872 873 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 874 if (flags & NFSMNT_RPCTIMESYNC) 875 secdata->flags |= AUTH_F_RPCTIMESYNC; 876 data = kmem_alloc(sizeof (*data), KM_SLEEP); 877 bcopy(args->syncaddr, &data->syncaddr, 878 sizeof (*args->syncaddr)); 879 880 881 /* 882 * duplicate the knconf information for the 883 * new opaque data. 884 */ 885 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 886 *data->knconf = *knconf; 887 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 888 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 889 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 890 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 891 data->knconf->knc_protofmly = pf; 892 data->knconf->knc_proto = p; 893 894 /* move server netname to the sec_data structure */ 895 nlen = strlen(args->hostname) + 1; 896 if (nlen != 0) { 897 data->netname = kmem_alloc(nlen, KM_SLEEP); 898 bcopy(args->hostname, data->netname, nlen); 899 data->netnamelen = (int)nlen; 900 } 901 secdata->secmod = secdata->rpcflavor = AUTH_DES; 902 secdata->data = (caddr_t)data; 903 } 904 } else { 905 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 906 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 907 secdata->data = NULL; 908 } 909 svp->sv_secdata = secdata; 910 911 /* 912 * See bug 1180236. 913 * If mount secure failed, we will fall back to AUTH_NONE 914 * and try again. nfs3rootvp() will turn this back off. 915 * 916 * The NFS Version 2 mount uses GETATTR and STATFS procedures. 917 * The server does not care if these procedures have the proper 918 * authentication flavor, so if mount retries using AUTH_NONE 919 * that does not require a credential setup for root then the 920 * automounter would work without requiring root to be 921 * keylogged into AUTH_DES. 922 */ 923 if (secdata->rpcflavor != AUTH_UNIX && 924 secdata->rpcflavor != AUTH_LOOPBACK) 925 secdata->flags |= AUTH_F_TRYNONE; 926 927 /* 928 * Failover support: 929 * 930 * We may have a linked list of nfs_args structures, 931 * which means the user is looking for failover. If 932 * the mount is either not "read-only" or "soft", 933 * we want to bail out with EINVAL. 934 */ 935 if (args->nfs_args_ext == NFS_ARGS_EXTB && 936 args->nfs_ext_u.nfs_extB.next != NULL) { 937 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 938 data = (char *)args->nfs_ext_u.nfs_extB.next; 939 goto more; 940 } 941 error = EINVAL; 942 goto errout; 943 } 944 945 /* 946 * Determine the zone we're being mounted into. 947 */ 948 zone_hold(mntzone = zone); /* start with this assumption */ 949 if (getzoneid() == GLOBAL_ZONEID) { 950 zone_rele(mntzone); 951 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 952 ASSERT(mntzone != NULL); 953 if (mntzone != zone) { 954 error = EBUSY; 955 goto errout; 956 } 957 } 958 959 if (is_system_labeled()) { 960 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 961 svp->sv_knconf, cr); 962 963 if (error > 0) 964 goto errout; 965 966 if (error == -1) { 967 /* change mount to read-only to prevent write-down */ 968 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 969 } 970 } 971 972 /* 973 * Stop the mount from going any further if the zone is going away. 974 */ 975 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 976 error = EBUSY; 977 goto errout; 978 } 979 980 /* 981 * Get root vnode. 982 */ 983 proceed: 984 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 985 986 if (error) 987 goto errout; 988 989 /* 990 * Set option fields in the mount info record 991 */ 992 mi = VTOMI(rtvp); 993 994 if (svp_head->sv_next) 995 mi->mi_flags |= MI_LLOCK; 996 997 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 998 if (!error) { 999 /* static pathconf kludge */ 1000 error = pathconf_get(mi, args); 1001 } 1002 1003 errout: 1004 if (error) { 1005 if (rtvp != NULL) { 1006 rp = VTOR(rtvp); 1007 if (rp->r_flags & RHASHED) 1008 rp_rmhash(rp); 1009 } 1010 sv_free(svp_head); 1011 if (mi != NULL) { 1012 nfs_async_stop(vfsp); 1013 nfs_async_manager_stop(vfsp); 1014 if (mi->mi_io_kstats) { 1015 kstat_delete(mi->mi_io_kstats); 1016 mi->mi_io_kstats = NULL; 1017 } 1018 if (mi->mi_ro_kstats) { 1019 kstat_delete(mi->mi_ro_kstats); 1020 mi->mi_ro_kstats = NULL; 1021 } 1022 nfs_free_mi(mi); 1023 } 1024 } 1025 1026 if (!(uap->flags & MS_SYSSPACE)) { 1027 nfs_free_args(args, fhandle); 1028 kmem_free(args, sizeof (*args)); 1029 } 1030 1031 if (rtvp != NULL) 1032 VN_RELE(rtvp); 1033 1034 if (mntzone != NULL) 1035 zone_rele(mntzone); 1036 1037 return (error); 1038 } 1039 1040 /* 1041 * The pathconf information is kept on a linked list of kmem_alloc'ed 1042 * structs. We search the list & add a new struct iff there is no other 1043 * struct with the same information. 1044 * See sys/pathconf.h for ``the rest of the story.'' 1045 */ 1046 static struct pathcnf *allpc = NULL; 1047 1048 static int 1049 pathconf_copyin(struct nfs_args *args, struct pathcnf *pc) 1050 { 1051 STRUCT_DECL(pathcnf, pc_tmp); 1052 STRUCT_HANDLE(nfs_args, ap); 1053 int i; 1054 model_t model; 1055 1056 model = get_udatamodel(); 1057 STRUCT_INIT(pc_tmp, model); 1058 STRUCT_SET_HANDLE(ap, model, args); 1059 1060 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) && 1061 STRUCT_FGETP(ap, pathconf) != NULL) { 1062 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp), 1063 STRUCT_SIZE(pc_tmp))) 1064 return (EFAULT); 1065 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask))) 1066 return (EINVAL); 1067 1068 pc->pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max); 1069 pc->pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon); 1070 pc->pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input); 1071 pc->pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max); 1072 pc->pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max); 1073 pc->pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf); 1074 pc->pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable); 1075 pc->pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx); 1076 for (i = 0; i < _PC_N; i++) 1077 pc->pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]); 1078 } 1079 return (0); 1080 } 1081 1082 static int 1083 pathconf_get(struct mntinfo *mi, struct nfs_args *args) 1084 { 1085 struct pathcnf *p, *pc; 1086 1087 pc = args->pathconf; 1088 if (mi->mi_pathconf != NULL) { 1089 pathconf_rele(mi); 1090 mi->mi_pathconf = NULL; 1091 } 1092 if (args->flags & NFSMNT_POSIX && 1093 args->pathconf != NULL) { 1094 1095 if (_PC_ISSET(_PC_ERROR, pc->pc_mask)) 1096 return (EINVAL); 1097 1098 for (p = allpc; p != NULL; p = p->pc_next) { 1099 if (PCCMP(p, pc) == 0) 1100 break; 1101 } 1102 if (p != NULL) { 1103 mi->mi_pathconf = p; 1104 p->pc_refcnt++; 1105 } else { 1106 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1107 bcopy(pc, p, sizeof (struct pathcnf)); 1108 p->pc_next = allpc; 1109 p->pc_refcnt = 1; 1110 allpc = mi->mi_pathconf = p; 1111 } 1112 } 1113 return (0); 1114 } 1115 1116 /* 1117 * release the static pathconf information 1118 */ 1119 static void 1120 pathconf_rele(struct mntinfo *mi) 1121 { 1122 if (mi->mi_pathconf != NULL) { 1123 if (--mi->mi_pathconf->pc_refcnt == 0) { 1124 struct pathcnf *p; 1125 struct pathcnf *p2; 1126 1127 p2 = p = allpc; 1128 while (p != NULL && p != mi->mi_pathconf) { 1129 p2 = p; 1130 p = p->pc_next; 1131 } 1132 if (p == NULL) { 1133 panic("mi->pathconf"); 1134 /*NOTREACHED*/ 1135 } 1136 if (p == allpc) 1137 allpc = p->pc_next; 1138 else 1139 p2->pc_next = p->pc_next; 1140 kmem_free(p, sizeof (*p)); 1141 mi->mi_pathconf = NULL; 1142 } 1143 } 1144 } 1145 1146 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */ 1147 static ushort_t nfs_max_threads = 8; /* max number of active async threads */ 1148 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */ 1149 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; 1150 1151 static int 1152 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1153 int flags, cred_t *cr, zone_t *zone) 1154 { 1155 vnode_t *rtvp; 1156 mntinfo_t *mi; 1157 dev_t nfs_dev; 1158 struct vattr va; 1159 int error; 1160 rnode_t *rp; 1161 int i; 1162 struct nfs_stats *nfsstatsp; 1163 cred_t *lcr = NULL, *tcr = cr; 1164 1165 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1166 ASSERT(nfsstatsp != NULL); 1167 1168 /* 1169 * Create a mount record and link it to the vfs struct. 1170 */ 1171 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1172 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1173 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1174 mi->mi_flags = MI_ACL | MI_EXTATTR; 1175 if (!(flags & NFSMNT_SOFT)) 1176 mi->mi_flags |= MI_HARD; 1177 if ((flags & NFSMNT_SEMISOFT)) 1178 mi->mi_flags |= MI_SEMISOFT; 1179 if ((flags & NFSMNT_NOPRINT)) 1180 mi->mi_flags |= MI_NOPRINT; 1181 if (flags & NFSMNT_INT) 1182 mi->mi_flags |= MI_INT; 1183 mi->mi_retrans = NFS_RETRIES; 1184 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1185 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1186 mi->mi_timeo = nfs_cots_timeo; 1187 else 1188 mi->mi_timeo = NFS_TIMEO; 1189 mi->mi_prog = NFS_PROGRAM; 1190 mi->mi_vers = NFS_VERSION; 1191 mi->mi_rfsnames = rfsnames_v2; 1192 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr; 1193 mi->mi_call_type = call_type_v2; 1194 mi->mi_ss_call_type = ss_call_type_v2; 1195 mi->mi_timer_type = timer_type_v2; 1196 mi->mi_aclnames = aclnames_v2; 1197 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr; 1198 mi->mi_acl_call_type = acl_call_type_v2; 1199 mi->mi_acl_ss_call_type = acl_ss_call_type_v2; 1200 mi->mi_acl_timer_type = acl_timer_type_v2; 1201 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1202 mi->mi_servers = svp; 1203 mi->mi_curr_serv = svp; 1204 mi->mi_acregmin = SEC2HR(ACREGMIN); 1205 mi->mi_acregmax = SEC2HR(ACREGMAX); 1206 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1207 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1208 1209 if (nfs_dynamic) 1210 mi->mi_flags |= MI_DYNAMIC; 1211 1212 if (flags & NFSMNT_DIRECTIO) 1213 mi->mi_flags |= MI_DIRECTIO; 1214 1215 /* 1216 * Make a vfs struct for nfs. We do this here instead of below 1217 * because rtvp needs a vfs before we can do a getattr on it. 1218 * 1219 * Assign a unique device id to the mount 1220 */ 1221 mutex_enter(&nfs_minor_lock); 1222 do { 1223 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1224 nfs_dev = makedevice(nfs_major, nfs_minor); 1225 } while (vfs_devismounted(nfs_dev)); 1226 mutex_exit(&nfs_minor_lock); 1227 1228 vfsp->vfs_dev = nfs_dev; 1229 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp); 1230 vfsp->vfs_data = (caddr_t)mi; 1231 vfsp->vfs_fstype = nfsfstyp; 1232 vfsp->vfs_bsize = NFS_MAXDATA; 1233 1234 /* 1235 * Initialize fields used to support async putpage operations. 1236 */ 1237 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1238 mi->mi_async_clusters[i] = nfs_async_clusters; 1239 mi->mi_async_init_clusters = nfs_async_clusters; 1240 mi->mi_async_curr = &mi->mi_async_reqs[0]; 1241 mi->mi_max_threads = nfs_max_threads; 1242 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1243 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1244 cv_init(&mi->mi_async_work_cv, NULL, CV_DEFAULT, NULL); 1245 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1246 1247 mi->mi_vfsp = vfsp; 1248 zone_hold(mi->mi_zone = zone); 1249 nfs_mi_zonelist_add(mi); 1250 1251 /* 1252 * Make the root vnode, use it to get attributes, 1253 * then remake it with the attributes. 1254 */ 1255 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf, 1256 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1257 1258 va.va_mask = AT_ALL; 1259 1260 /* 1261 * If the uid is set then set the creds for secure mounts 1262 * by proxy processes such as automountd. 1263 */ 1264 if (svp->sv_secdata->uid != 0 && 1265 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1266 lcr = crdup(cr); 1267 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1268 tcr = lcr; 1269 } 1270 1271 error = nfsgetattr(rtvp, &va, tcr); 1272 if (error) 1273 goto bad; 1274 rtvp->v_type = va.va_type; 1275 1276 /* 1277 * Poll every server to get the filesystem stats; we're 1278 * only interested in the server's transfer size, and we 1279 * want the minimum. 1280 * 1281 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1282 * which is only for the mount operation. 1283 */ 1284 1285 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize()); 1286 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize()); 1287 1288 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1289 struct nfsstatfs fs; 1290 int douprintf; 1291 1292 douprintf = 1; 1293 mi->mi_curr_serv = svp; 1294 1295 error = rfs2call(mi, RFS_STATFS, 1296 xdr_fhandle, (caddr_t)svp->sv_fhandle.fh_buf, 1297 xdr_statfs, (caddr_t)&fs, tcr, &douprintf, 1298 &fs.fs_status, 0, NULL); 1299 if (error) 1300 goto bad; 1301 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1302 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1303 } 1304 mi->mi_curr_serv = mi->mi_servers; 1305 mi->mi_curread = mi->mi_tsize; 1306 mi->mi_curwrite = mi->mi_stsize; 1307 1308 /* 1309 * Start the manager thread responsible for handling async worker 1310 * threads. 1311 */ 1312 VFS_HOLD(vfsp); /* add reference for thread */ 1313 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1314 vfsp, 0, minclsyspri); 1315 ASSERT(mi->mi_manager_thread != NULL); 1316 1317 /* 1318 * Initialize kstats 1319 */ 1320 nfs_mnt_kstat_init(vfsp); 1321 1322 mi->mi_type = rtvp->v_type; 1323 1324 *rtvpp = rtvp; 1325 if (lcr != NULL) 1326 crfree(lcr); 1327 1328 return (0); 1329 bad: 1330 /* 1331 * An error occurred somewhere, need to clean up... 1332 * We need to release our reference to the root vnode and 1333 * destroy the mntinfo struct that we just created. 1334 */ 1335 if (lcr != NULL) 1336 crfree(lcr); 1337 rp = VTOR(rtvp); 1338 if (rp->r_flags & RHASHED) 1339 rp_rmhash(rp); 1340 VN_RELE(rtvp); 1341 nfs_async_stop(vfsp); 1342 nfs_async_manager_stop(vfsp); 1343 if (mi->mi_io_kstats) { 1344 kstat_delete(mi->mi_io_kstats); 1345 mi->mi_io_kstats = NULL; 1346 } 1347 if (mi->mi_ro_kstats) { 1348 kstat_delete(mi->mi_ro_kstats); 1349 mi->mi_ro_kstats = NULL; 1350 } 1351 nfs_free_mi(mi); 1352 *rtvpp = NULL; 1353 return (error); 1354 } 1355 1356 /* 1357 * vfs operations 1358 */ 1359 static int 1360 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1361 { 1362 mntinfo_t *mi; 1363 ushort_t omax; 1364 1365 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1366 return (EPERM); 1367 1368 mi = VFTOMI(vfsp); 1369 if (flag & MS_FORCE) { 1370 1371 vfsp->vfs_flag |= VFS_UNMOUNTED; 1372 1373 /* 1374 * We are about to stop the async manager. 1375 * Let every one know not to schedule any 1376 * more async requests. 1377 */ 1378 mutex_enter(&mi->mi_async_lock); 1379 mi->mi_max_threads = 0; 1380 cv_broadcast(&mi->mi_async_work_cv); 1381 mutex_exit(&mi->mi_async_lock); 1382 1383 /* 1384 * We need to stop the manager thread explicitly; the worker 1385 * threads can time out and exit on their own. 1386 */ 1387 nfs_async_manager_stop(vfsp); 1388 destroy_rtable(vfsp, cr); 1389 if (mi->mi_io_kstats) { 1390 kstat_delete(mi->mi_io_kstats); 1391 mi->mi_io_kstats = NULL; 1392 } 1393 if (mi->mi_ro_kstats) { 1394 kstat_delete(mi->mi_ro_kstats); 1395 mi->mi_ro_kstats = NULL; 1396 } 1397 return (0); 1398 } 1399 /* 1400 * Wait until all asynchronous putpage operations on 1401 * this file system are complete before flushing rnodes 1402 * from the cache. 1403 */ 1404 omax = mi->mi_max_threads; 1405 if (nfs_async_stop_sig(vfsp)) { 1406 return (EINTR); 1407 } 1408 rflush(vfsp, cr); 1409 /* 1410 * If there are any active vnodes on this file system, 1411 * then the file system is busy and can't be umounted. 1412 */ 1413 if (check_rtable(vfsp)) { 1414 mutex_enter(&mi->mi_async_lock); 1415 mi->mi_max_threads = omax; 1416 mutex_exit(&mi->mi_async_lock); 1417 return (EBUSY); 1418 } 1419 /* 1420 * The unmount can't fail from now on; stop the manager thread. 1421 */ 1422 nfs_async_manager_stop(vfsp); 1423 /* 1424 * Destroy all rnodes belonging to this file system from the 1425 * rnode hash queues and purge any resources allocated to 1426 * them. 1427 */ 1428 destroy_rtable(vfsp, cr); 1429 if (mi->mi_io_kstats) { 1430 kstat_delete(mi->mi_io_kstats); 1431 mi->mi_io_kstats = NULL; 1432 } 1433 if (mi->mi_ro_kstats) { 1434 kstat_delete(mi->mi_ro_kstats); 1435 mi->mi_ro_kstats = NULL; 1436 } 1437 return (0); 1438 } 1439 1440 /* 1441 * find root of nfs 1442 */ 1443 static int 1444 nfs_root(vfs_t *vfsp, vnode_t **vpp) 1445 { 1446 mntinfo_t *mi; 1447 vnode_t *vp; 1448 servinfo_t *svp; 1449 rnode_t *rp; 1450 int error = 0; 1451 1452 mi = VFTOMI(vfsp); 1453 1454 if (nfs_zone() != mi->mi_zone) 1455 return (EPERM); 1456 1457 svp = mi->mi_curr_serv; 1458 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1459 mutex_enter(&svp->sv_lock); 1460 svp->sv_flags &= ~SV_ROOT_STALE; 1461 mutex_exit(&svp->sv_lock); 1462 error = ENOENT; 1463 } 1464 1465 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf, 1466 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1467 1468 /* 1469 * if the SV_ROOT_STALE flag was reset above, reset the 1470 * RSTALE flag if needed and return an error 1471 */ 1472 if (error == ENOENT) { 1473 rp = VTOR(vp); 1474 if (svp && rp->r_flags & RSTALE) { 1475 mutex_enter(&rp->r_statelock); 1476 rp->r_flags &= ~RSTALE; 1477 mutex_exit(&rp->r_statelock); 1478 } 1479 VN_RELE(vp); 1480 return (error); 1481 } 1482 1483 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1484 1485 vp->v_type = mi->mi_type; 1486 1487 *vpp = vp; 1488 1489 return (0); 1490 } 1491 1492 /* 1493 * Get file system statistics. 1494 */ 1495 static int 1496 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1497 { 1498 int error; 1499 mntinfo_t *mi; 1500 struct nfsstatfs fs; 1501 int douprintf; 1502 failinfo_t fi; 1503 vnode_t *vp; 1504 1505 error = nfs_root(vfsp, &vp); 1506 if (error) 1507 return (error); 1508 1509 mi = VFTOMI(vfsp); 1510 douprintf = 1; 1511 fi.vp = vp; 1512 fi.fhp = NULL; /* no need to update, filehandle not copied */ 1513 fi.copyproc = nfscopyfh; 1514 fi.lookupproc = nfslookup; 1515 fi.xattrdirproc = acl_getxattrdir2; 1516 1517 error = rfs2call(mi, RFS_STATFS, 1518 xdr_fhandle, (caddr_t)VTOFH(vp), 1519 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, 1520 &fs.fs_status, 0, &fi); 1521 1522 if (!error) { 1523 error = geterrno(fs.fs_status); 1524 if (!error) { 1525 mutex_enter(&mi->mi_lock); 1526 if (mi->mi_stsize) { 1527 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1528 } else { 1529 mi->mi_stsize = fs.fs_tsize; 1530 mi->mi_curwrite = mi->mi_stsize; 1531 } 1532 mutex_exit(&mi->mi_lock); 1533 sbp->f_bsize = fs.fs_bsize; 1534 sbp->f_frsize = fs.fs_bsize; 1535 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks; 1536 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree; 1537 /* 1538 * Some servers may return negative available 1539 * block counts. They may do this because they 1540 * calculate the number of available blocks by 1541 * subtracting the number of used blocks from 1542 * the total number of blocks modified by the 1543 * minimum free value. For example, if the 1544 * minumum free percentage is 10 and the file 1545 * system is greater than 90 percent full, then 1546 * 90 percent of the total blocks minus the 1547 * actual number of used blocks may be a 1548 * negative number. 1549 * 1550 * In this case, we need to sign extend the 1551 * negative number through the assignment from 1552 * the 32 bit bavail count to the 64 bit bavail 1553 * count. 1554 * 1555 * We need to be able to discern between there 1556 * just being a lot of available blocks on the 1557 * file system and the case described above. 1558 * We are making the assumption that it does 1559 * not make sense to have more available blocks 1560 * than there are free blocks. So, if there 1561 * are, then we treat the number as if it were 1562 * a negative number and arrange to have it 1563 * sign extended when it is converted from 32 1564 * bits to 64 bits. 1565 */ 1566 if (fs.fs_bavail <= fs.fs_bfree) 1567 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail; 1568 else { 1569 sbp->f_bavail = 1570 (fsblkcnt64_t)((long)fs.fs_bavail); 1571 } 1572 sbp->f_files = (fsfilcnt64_t)-1; 1573 sbp->f_ffree = (fsfilcnt64_t)-1; 1574 sbp->f_favail = (fsfilcnt64_t)-1; 1575 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1576 (void) strncpy(sbp->f_basetype, 1577 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1578 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1579 sbp->f_namemax = (uint32_t)-1; 1580 } else { 1581 PURGE_STALE_FH(error, vp, CRED()); 1582 } 1583 } 1584 1585 VN_RELE(vp); 1586 1587 return (error); 1588 } 1589 1590 static kmutex_t nfs_syncbusy; 1591 1592 /* 1593 * Flush dirty nfs files for file system vfsp. 1594 * If vfsp == NULL, all nfs files are flushed. 1595 */ 1596 /* ARGSUSED */ 1597 static int 1598 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 1599 { 1600 /* 1601 * Cross-zone calls are OK here, since this translates to a 1602 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1603 */ 1604 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) { 1605 rflush(vfsp, cr); 1606 mutex_exit(&nfs_syncbusy); 1607 } 1608 return (0); 1609 } 1610 1611 /* ARGSUSED */ 1612 static int 1613 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1614 { 1615 int error; 1616 vnode_t *vp; 1617 struct vattr va; 1618 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp; 1619 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id; 1620 1621 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1622 return (EPERM); 1623 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) { 1624 #ifdef DEBUG 1625 zcmn_err(zoneid, CE_WARN, 1626 "nfs_vget: bad fid len, %d/%d", fidp->fid_len, 1627 (int)(sizeof (*nfsfidp) - sizeof (short))); 1628 #endif 1629 *vpp = NULL; 1630 return (ESTALE); 1631 } 1632 1633 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp, 1634 gethrtime(), CRED(), NULL, NULL); 1635 1636 if (VTOR(vp)->r_flags & RSTALE) { 1637 VN_RELE(vp); 1638 *vpp = NULL; 1639 return (ENOENT); 1640 } 1641 1642 if (vp->v_type == VNON) { 1643 va.va_mask = AT_ALL; 1644 error = nfsgetattr(vp, &va, CRED()); 1645 if (error) { 1646 VN_RELE(vp); 1647 *vpp = NULL; 1648 return (error); 1649 } 1650 vp->v_type = va.va_type; 1651 } 1652 1653 *vpp = vp; 1654 1655 return (0); 1656 } 1657 1658 /* ARGSUSED */ 1659 static int 1660 nfs_mountroot(vfs_t *vfsp, whymountroot_t why) 1661 { 1662 vnode_t *rtvp; 1663 char root_hostname[SYS_NMLN+1]; 1664 struct servinfo *svp; 1665 int error; 1666 int vfsflags; 1667 size_t size; 1668 char *root_path; 1669 struct pathname pn; 1670 char *name; 1671 cred_t *cr; 1672 struct nfs_args args; /* nfs mount arguments */ 1673 static char token[10]; 1674 1675 bzero(&args, sizeof (args)); 1676 1677 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1678 clkset(-1L); /* hack for now - until we get time svc? */ 1679 1680 if (why == ROOT_REMOUNT) { 1681 /* 1682 * Shouldn't happen. 1683 */ 1684 panic("nfs_mountroot: why == ROOT_REMOUNT"); 1685 } 1686 1687 if (why == ROOT_UNMOUNT) { 1688 /* 1689 * Nothing to do for NFS. 1690 */ 1691 return (0); 1692 } 1693 1694 /* 1695 * why == ROOT_INIT 1696 */ 1697 1698 name = token; 1699 *name = 0; 1700 getfsname("root", name, sizeof (token)); 1701 1702 pn_alloc(&pn); 1703 root_path = pn.pn_path; 1704 1705 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1706 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1707 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1708 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1709 1710 /* 1711 * Get server address 1712 * Get the root fhandle 1713 * Get server's transport 1714 * Get server's hostname 1715 * Get options 1716 */ 1717 args.addr = &svp->sv_addr; 1718 args.fh = (char *)&svp->sv_fhandle.fh_buf; 1719 args.knconf = svp->sv_knconf; 1720 args.hostname = root_hostname; 1721 vfsflags = 0; 1722 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, 1723 &args, &vfsflags)) { 1724 nfs_cmn_err(error, CE_WARN, 1725 "nfs_mountroot: mount_root failed: %m"); 1726 sv_free(svp); 1727 pn_free(&pn); 1728 return (error); 1729 } 1730 svp->sv_fhandle.fh_len = NFS_FHSIZE; 1731 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1732 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1733 (void) strcpy(svp->sv_hostname, root_hostname); 1734 1735 /* 1736 * Force root partition to always be mounted with AUTH_UNIX for now 1737 */ 1738 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1739 svp->sv_secdata->secmod = AUTH_UNIX; 1740 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1741 svp->sv_secdata->data = NULL; 1742 1743 cr = crgetcred(); 1744 rtvp = NULL; 1745 1746 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1747 1748 crfree(cr); 1749 1750 if (error) { 1751 pn_free(&pn); 1752 sv_free(svp); 1753 return (error); 1754 } 1755 1756 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1757 if (error) { 1758 nfs_cmn_err(error, CE_WARN, 1759 "nfs_mountroot: invalid root mount options"); 1760 pn_free(&pn); 1761 goto errout; 1762 } 1763 1764 (void) vfs_lock_wait(vfsp); 1765 vfs_add(NULL, vfsp, vfsflags); 1766 vfs_unlock(vfsp); 1767 1768 size = strlen(svp->sv_hostname); 1769 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1770 rootfs.bo_name[size] = ':'; 1771 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1772 1773 pn_free(&pn); 1774 1775 errout: 1776 if (error) { 1777 sv_free(svp); 1778 nfs_async_stop(vfsp); 1779 nfs_async_manager_stop(vfsp); 1780 } 1781 1782 if (rtvp != NULL) 1783 VN_RELE(rtvp); 1784 1785 return (error); 1786 } 1787 1788 /* 1789 * Initialization routine for VFS routines. Should only be called once 1790 */ 1791 int 1792 nfs_vfsinit(void) 1793 { 1794 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1795 return (0); 1796 } 1797 1798 void 1799 nfs_vfsfini(void) 1800 { 1801 mutex_destroy(&nfs_syncbusy); 1802 } 1803 1804 void 1805 nfs_freevfs(vfs_t *vfsp) 1806 { 1807 mntinfo_t *mi; 1808 servinfo_t *svp; 1809 1810 /* free up the resources */ 1811 mi = VFTOMI(vfsp); 1812 pathconf_rele(mi); 1813 svp = mi->mi_servers; 1814 mi->mi_servers = mi->mi_curr_serv = NULL; 1815 sv_free(svp); 1816 1817 /* 1818 * By this time we should have already deleted the 1819 * mi kstats in the unmount code. If they are still around 1820 * somethings wrong 1821 */ 1822 ASSERT(mi->mi_io_kstats == NULL); 1823 nfs_free_mi(mi); 1824 } 1825