1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 25 * All rights reserved. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/cred.h> 32 #include <sys/vfs.h> 33 #include <sys/vfs_opreg.h> 34 #include <sys/vnode.h> 35 #include <sys/pathname.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/mkdev.h> 39 #include <sys/mount.h> 40 #include <sys/mntent.h> 41 #include <sys/statvfs.h> 42 #include <sys/errno.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/utsname.h> 46 #include <sys/bootconf.h> 47 #include <sys/modctl.h> 48 #include <sys/acl.h> 49 #include <sys/flock.h> 50 #include <sys/policy.h> 51 #include <sys/zone.h> 52 #include <sys/class.h> 53 #include <sys/socket.h> 54 #include <sys/netconfig.h> 55 #include <sys/mntent.h> 56 #include <sys/tsol/label.h> 57 58 #include <rpc/types.h> 59 #include <rpc/auth.h> 60 #include <rpc/clnt.h> 61 62 #include <nfs/nfs.h> 63 #include <nfs/nfs_clnt.h> 64 #include <nfs/rnode.h> 65 #include <nfs/mount.h> 66 #include <nfs/nfs_acl.h> 67 68 #include <fs/fs_subr.h> 69 70 /* 71 * From rpcsec module (common/rpcsec). 72 */ 73 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t); 74 extern void sec_clnt_freeinfo(struct sec_data *); 75 76 static int pathconf_copyin(struct nfs_args *, struct pathcnf *); 77 static int pathconf_get(struct mntinfo *, struct nfs_args *); 78 static void pathconf_rele(struct mntinfo *); 79 80 /* 81 * The order and contents of this structure must be kept in sync with that of 82 * rfsreqcnt_v2_tmpl in nfs_stats.c 83 */ 84 static char *rfsnames_v2[] = { 85 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read", 86 "unused", "write", "create", "remove", "rename", "link", "symlink", 87 "mkdir", "rmdir", "readdir", "fsstat" 88 }; 89 90 /* 91 * This table maps from NFS protocol number into call type. 92 * Zero means a "Lookup" type call 93 * One means a "Read" type call 94 * Two means a "Write" type call 95 * This is used to select a default time-out. 96 */ 97 static uchar_t call_type_v2[] = { 98 0, 0, 1, 0, 0, 0, 1, 99 0, 2, 2, 2, 2, 2, 2, 100 2, 2, 1, 0 101 }; 102 103 /* 104 * Similar table, but to determine which timer to use 105 * (only real reads and writes!) 106 */ 107 static uchar_t timer_type_v2[] = { 108 0, 0, 0, 0, 0, 0, 1, 109 0, 2, 0, 0, 0, 0, 0, 110 0, 0, 1, 0 111 }; 112 113 /* 114 * This table maps from NFS protocol number into a call type 115 * for the semisoft mount option. 116 * Zero means do not repeat operation. 117 * One means repeat. 118 */ 119 static uchar_t ss_call_type_v2[] = { 120 0, 0, 1, 0, 0, 0, 0, 121 0, 1, 1, 1, 1, 1, 1, 122 1, 1, 0, 0 123 }; 124 125 /* 126 * nfs vfs operations. 127 */ 128 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *); 129 static int nfs_unmount(vfs_t *, int, cred_t *); 130 static int nfs_root(vfs_t *, vnode_t **); 131 static int nfs_statvfs(vfs_t *, struct statvfs64 *); 132 static int nfs_sync(vfs_t *, short, cred_t *); 133 static int nfs_vget(vfs_t *, vnode_t **, fid_t *); 134 static int nfs_mountroot(vfs_t *, whymountroot_t); 135 static void nfs_freevfs(vfs_t *); 136 137 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *, 138 int, cred_t *, zone_t *); 139 140 /* 141 * Initialize the vfs structure 142 */ 143 144 int nfsfstyp; 145 vfsops_t *nfs_vfsops; 146 147 /* 148 * Debug variable to check for rdma based 149 * transport startup and cleanup. Controlled 150 * through /etc/system. Off by default. 151 */ 152 int rdma_debug = 0; 153 154 int 155 nfsinit(int fstyp, char *name) 156 { 157 static const fs_operation_def_t nfs_vfsops_template[] = { 158 VFSNAME_MOUNT, { .vfs_mount = nfs_mount }, 159 VFSNAME_UNMOUNT, { .vfs_unmount = nfs_unmount }, 160 VFSNAME_ROOT, { .vfs_root = nfs_root }, 161 VFSNAME_STATVFS, { .vfs_statvfs = nfs_statvfs }, 162 VFSNAME_SYNC, { .vfs_sync = nfs_sync }, 163 VFSNAME_VGET, { .vfs_vget = nfs_vget }, 164 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs_mountroot }, 165 VFSNAME_FREEVFS, { .vfs_freevfs = nfs_freevfs }, 166 NULL, NULL 167 }; 168 int error; 169 170 error = vfs_setfsops(fstyp, nfs_vfsops_template, &nfs_vfsops); 171 if (error != 0) { 172 zcmn_err(GLOBAL_ZONEID, CE_WARN, 173 "nfsinit: bad vfs ops template"); 174 return (error); 175 } 176 177 error = vn_make_ops(name, nfs_vnodeops_template, &nfs_vnodeops); 178 if (error != 0) { 179 (void) vfs_freevfsops_by_type(fstyp); 180 zcmn_err(GLOBAL_ZONEID, CE_WARN, 181 "nfsinit: bad vnode ops template"); 182 return (error); 183 } 184 185 186 nfsfstyp = fstyp; 187 188 return (0); 189 } 190 191 void 192 nfsfini(void) 193 { 194 } 195 196 static void 197 nfs_free_args(struct nfs_args *nargs, nfs_fhandle *fh) 198 { 199 200 if (fh) 201 kmem_free(fh, sizeof (*fh)); 202 203 if (nargs->pathconf) { 204 kmem_free(nargs->pathconf, sizeof (struct pathcnf)); 205 nargs->pathconf = NULL; 206 } 207 208 if (nargs->knconf) { 209 if (nargs->knconf->knc_protofmly) 210 kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE); 211 if (nargs->knconf->knc_proto) 212 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE); 213 kmem_free(nargs->knconf, sizeof (*nargs->knconf)); 214 nargs->knconf = NULL; 215 } 216 217 if (nargs->fh) { 218 kmem_free(nargs->fh, strlen(nargs->fh) + 1); 219 nargs->fh = NULL; 220 } 221 222 if (nargs->hostname) { 223 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1); 224 nargs->hostname = NULL; 225 } 226 227 if (nargs->addr) { 228 if (nargs->addr->buf) { 229 ASSERT(nargs->addr->len); 230 kmem_free(nargs->addr->buf, nargs->addr->len); 231 } 232 kmem_free(nargs->addr, sizeof (struct netbuf)); 233 nargs->addr = NULL; 234 } 235 236 if (nargs->syncaddr) { 237 ASSERT(nargs->syncaddr->len); 238 if (nargs->syncaddr->buf) { 239 ASSERT(nargs->syncaddr->len); 240 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len); 241 } 242 kmem_free(nargs->syncaddr, sizeof (struct netbuf)); 243 nargs->syncaddr = NULL; 244 } 245 246 if (nargs->netname) { 247 kmem_free(nargs->netname, strlen(nargs->netname) + 1); 248 nargs->netname = NULL; 249 } 250 251 if (nargs->nfs_ext_u.nfs_extA.secdata) { 252 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata); 253 nargs->nfs_ext_u.nfs_extA.secdata = NULL; 254 } 255 } 256 257 static int 258 nfs_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh) 259 { 260 261 int error; 262 size_t nlen; /* length of netname */ 263 size_t hlen; /* length of hostname */ 264 char netname[MAXNETNAMELEN+1]; /* server's netname */ 265 struct netbuf addr; /* server's address */ 266 struct netbuf syncaddr; /* AUTH_DES time sync addr */ 267 struct knetconfig *knconf; /* transport knetconfig structure */ 268 struct sec_data *secdata = NULL; /* security data */ 269 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */ 270 STRUCT_DECL(knetconfig, knconf_tmp); 271 STRUCT_DECL(netbuf, addr_tmp); 272 int flags; 273 struct pathcnf *pc; /* Pathconf */ 274 char *p, *pf; 275 char *userbufptr; 276 277 278 bzero(nargs, sizeof (*nargs)); 279 280 STRUCT_INIT(args, get_udatamodel()); 281 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE)); 282 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args)))) 283 return (EFAULT); 284 285 nargs->wsize = STRUCT_FGET(args, wsize); 286 nargs->rsize = STRUCT_FGET(args, rsize); 287 nargs->timeo = STRUCT_FGET(args, timeo); 288 nargs->retrans = STRUCT_FGET(args, retrans); 289 nargs->acregmin = STRUCT_FGET(args, acregmin); 290 nargs->acregmax = STRUCT_FGET(args, acregmax); 291 nargs->acdirmin = STRUCT_FGET(args, acdirmin); 292 nargs->acdirmax = STRUCT_FGET(args, acdirmax); 293 294 flags = STRUCT_FGET(args, flags); 295 nargs->flags = flags; 296 297 298 addr.buf = NULL; 299 syncaddr.buf = NULL; 300 301 /* 302 * Allocate space for a knetconfig structure and 303 * its strings and copy in from user-land. 304 */ 305 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP); 306 STRUCT_INIT(knconf_tmp, get_udatamodel()); 307 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp), 308 STRUCT_SIZE(knconf_tmp))) { 309 kmem_free(knconf, sizeof (*knconf)); 310 return (EFAULT); 311 } 312 313 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics); 314 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly); 315 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto); 316 if (get_udatamodel() != DATAMODEL_LP64) { 317 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev)); 318 } else { 319 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev); 320 } 321 322 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 323 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 324 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL); 325 if (error) { 326 kmem_free(pf, KNC_STRSIZE); 327 kmem_free(p, KNC_STRSIZE); 328 kmem_free(knconf, sizeof (*knconf)); 329 return (error); 330 } 331 332 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL); 333 if (error) { 334 kmem_free(pf, KNC_STRSIZE); 335 kmem_free(p, KNC_STRSIZE); 336 kmem_free(knconf, sizeof (*knconf)); 337 return (error); 338 } 339 340 341 knconf->knc_protofmly = pf; 342 knconf->knc_proto = p; 343 344 nargs->knconf = knconf; 345 346 /* Copyin pathconf if there is one */ 347 if (STRUCT_FGETP(args, pathconf) != NULL) { 348 pc = kmem_alloc(sizeof (*pc), KM_SLEEP); 349 error = pathconf_copyin(STRUCT_BUF(args), pc); 350 nargs->pathconf = pc; 351 if (error) 352 goto errout; 353 } 354 355 /* 356 * Get server address 357 */ 358 STRUCT_INIT(addr_tmp, get_udatamodel()); 359 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp), 360 STRUCT_SIZE(addr_tmp))) { 361 error = EFAULT; 362 goto errout; 363 } 364 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 365 userbufptr = STRUCT_FGETP(addr_tmp, buf); 366 addr.len = STRUCT_FGET(addr_tmp, len); 367 addr.buf = kmem_alloc(addr.len, KM_SLEEP); 368 addr.maxlen = addr.len; 369 if (copyin(userbufptr, addr.buf, addr.len)) { 370 kmem_free(addr.buf, addr.len); 371 error = EFAULT; 372 goto errout; 373 } 374 bcopy(&addr, nargs->addr, sizeof (struct netbuf)); 375 376 /* 377 * Get the root fhandle 378 */ 379 380 if (copyin(STRUCT_FGETP(args, fh), &fh->fh_buf, NFS_FHSIZE)) { 381 error = EFAULT; 382 goto errout; 383 } 384 fh->fh_len = NFS_FHSIZE; 385 386 /* 387 * Get server's hostname 388 */ 389 if (flags & NFSMNT_HOSTNAME) { 390 error = copyinstr(STRUCT_FGETP(args, hostname), netname, 391 sizeof (netname), &hlen); 392 if (error) 393 goto errout; 394 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP); 395 (void) strcpy(nargs->hostname, netname); 396 397 } else { 398 nargs->hostname = NULL; 399 } 400 401 402 /* 403 * If there are syncaddr and netname data, load them in. This is 404 * to support data needed for NFSV4 when AUTH_DH is the negotiated 405 * flavor via SECINFO. (instead of using MOUNT protocol in V3). 406 */ 407 netname[0] = '\0'; 408 if (flags & NFSMNT_SECURE) { 409 if (STRUCT_FGETP(args, syncaddr) == NULL) { 410 error = EINVAL; 411 goto errout; 412 } 413 /* get syncaddr */ 414 STRUCT_INIT(addr_tmp, get_udatamodel()); 415 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp), 416 STRUCT_SIZE(addr_tmp))) { 417 error = EINVAL; 418 goto errout; 419 } 420 userbufptr = STRUCT_FGETP(addr_tmp, buf); 421 syncaddr.len = STRUCT_FGET(addr_tmp, len); 422 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP); 423 syncaddr.maxlen = syncaddr.len; 424 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) { 425 kmem_free(syncaddr.buf, syncaddr.len); 426 error = EFAULT; 427 goto errout; 428 } 429 430 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 431 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf)); 432 433 ASSERT(STRUCT_FGETP(args, netname)); 434 if (copyinstr(STRUCT_FGETP(args, netname), netname, 435 sizeof (netname), &nlen)) { 436 error = EFAULT; 437 goto errout; 438 } 439 440 netname[nlen] = '\0'; 441 nargs->netname = kmem_zalloc(nlen, KM_SLEEP); 442 (void) strcpy(nargs->netname, netname); 443 } 444 445 /* 446 * Get the extention data which has the security data structure. 447 * This includes data for AUTH_SYS as well. 448 */ 449 if (flags & NFSMNT_NEWARGS) { 450 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext); 451 if (nargs->nfs_args_ext == NFS_ARGS_EXTA || 452 nargs->nfs_args_ext == NFS_ARGS_EXTB) { 453 /* 454 * Indicating the application is using the new 455 * sec_data structure to pass in the security 456 * data. 457 */ 458 if (STRUCT_FGETP(args, 459 nfs_ext_u.nfs_extA.secdata) != NULL) { 460 error = sec_clnt_loadinfo( 461 (struct sec_data *)STRUCT_FGETP(args, 462 nfs_ext_u.nfs_extA.secdata), &secdata, 463 get_udatamodel()); 464 } 465 nargs->nfs_ext_u.nfs_extA.secdata = secdata; 466 } 467 } 468 469 if (error) 470 goto errout; 471 472 /* 473 * Failover support: 474 * 475 * We may have a linked list of nfs_args structures, 476 * which means the user is looking for failover. If 477 * the mount is either not "read-only" or "soft", 478 * we want to bail out with EINVAL. 479 */ 480 if (nargs->nfs_args_ext == NFS_ARGS_EXTB) 481 nargs->nfs_ext_u.nfs_extB.next = 482 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next); 483 484 errout: 485 if (error) 486 nfs_free_args(nargs, fh); 487 488 return (error); 489 } 490 491 492 /* 493 * nfs mount vfsop 494 * Set up mount info record and attach it to vfs struct. 495 */ 496 static int 497 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 498 { 499 char *data = uap->dataptr; 500 int error; 501 vnode_t *rtvp; /* the server's root */ 502 mntinfo_t *mi; /* mount info, pointed at by vfs */ 503 size_t nlen; /* length of netname */ 504 struct knetconfig *knconf; /* transport knetconfig structure */ 505 struct knetconfig *rdma_knconf; /* rdma transport structure */ 506 rnode_t *rp; 507 struct servinfo *svp; /* nfs server info */ 508 struct servinfo *svp_tail = NULL; /* previous nfs server info */ 509 struct servinfo *svp_head; /* first nfs server info */ 510 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */ 511 struct sec_data *secdata; /* security data */ 512 struct nfs_args *args = NULL; 513 int flags, addr_type; 514 zone_t *zone = nfs_zone(); 515 zone_t *mntzone = NULL; 516 nfs_fhandle *fhandle = NULL; 517 518 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 519 return (error); 520 521 if (mvp->v_type != VDIR) 522 return (ENOTDIR); 523 524 /* 525 * get arguments 526 * 527 * nfs_args is now versioned and is extensible, so 528 * uap->datalen might be different from sizeof (args) 529 * in a compatible situation. 530 */ 531 more: 532 533 if (!(uap->flags & MS_SYSSPACE)) { 534 if (args == NULL) 535 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP); 536 else { 537 nfs_free_args(args, fhandle); 538 fhandle = NULL; 539 } 540 if (fhandle == NULL) 541 fhandle = kmem_zalloc(sizeof (nfs_fhandle), KM_SLEEP); 542 error = nfs_copyin(data, uap->datalen, args, fhandle); 543 if (error) { 544 if (args) 545 kmem_free(args, sizeof (*args)); 546 return (error); 547 } 548 } else { 549 args = (struct nfs_args *)data; 550 fhandle = (nfs_fhandle *)args->fh; 551 } 552 553 554 flags = args->flags; 555 556 if (uap->flags & MS_REMOUNT) { 557 size_t n; 558 char name[FSTYPSZ]; 559 560 if (uap->flags & MS_SYSSPACE) 561 error = copystr(uap->fstype, name, FSTYPSZ, &n); 562 else 563 error = copyinstr(uap->fstype, name, FSTYPSZ, &n); 564 565 if (error) { 566 if (error == ENAMETOOLONG) 567 return (EINVAL); 568 return (error); 569 } 570 571 572 /* 573 * This check is to ensure that the request is a 574 * genuine nfs remount request. 575 */ 576 577 if (strncmp(name, "nfs", 3) != 0) 578 return (EINVAL); 579 580 /* 581 * If the request changes the locking type, disallow the 582 * remount, 583 * because it's questionable whether we can transfer the 584 * locking state correctly. 585 * 586 * Remounts need to save the pathconf information. 587 * Part of the infamous static kludge. 588 */ 589 590 if ((mi = VFTOMI(vfsp)) != NULL) { 591 uint_t new_mi_llock; 592 uint_t old_mi_llock; 593 594 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0; 595 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0; 596 if (old_mi_llock != new_mi_llock) 597 return (EBUSY); 598 } 599 error = pathconf_get((struct mntinfo *)vfsp->vfs_data, args); 600 601 if (!(uap->flags & MS_SYSSPACE)) { 602 nfs_free_args(args, fhandle); 603 kmem_free(args, sizeof (*args)); 604 } 605 606 return (error); 607 } 608 609 mutex_enter(&mvp->v_lock); 610 if (!(uap->flags & MS_OVERLAY) && 611 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 612 mutex_exit(&mvp->v_lock); 613 if (!(uap->flags & MS_SYSSPACE)) { 614 nfs_free_args(args, fhandle); 615 kmem_free(args, sizeof (*args)); 616 } 617 return (EBUSY); 618 } 619 mutex_exit(&mvp->v_lock); 620 621 /* make sure things are zeroed for errout: */ 622 rtvp = NULL; 623 mi = NULL; 624 secdata = NULL; 625 626 /* 627 * A valid knetconfig structure is required. 628 */ 629 if (!(flags & NFSMNT_KNCONF)) { 630 if (!(uap->flags & MS_SYSSPACE)) { 631 nfs_free_args(args, fhandle); 632 kmem_free(args, sizeof (*args)); 633 } 634 return (EINVAL); 635 } 636 637 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) || 638 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) { 639 if (!(uap->flags & MS_SYSSPACE)) { 640 nfs_free_args(args, fhandle); 641 kmem_free(args, sizeof (*args)); 642 } 643 return (EINVAL); 644 } 645 646 647 /* 648 * Allocate a servinfo struct. 649 */ 650 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 651 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); 652 if (svp_tail) { 653 svp_2ndlast = svp_tail; 654 svp_tail->sv_next = svp; 655 } else { 656 svp_head = svp; 657 svp_2ndlast = svp; 658 } 659 660 svp_tail = svp; 661 662 /* 663 * Get knetconfig and server address 664 */ 665 svp->sv_knconf = args->knconf; 666 args->knconf = NULL; 667 668 if (args->addr == NULL || args->addr->buf == NULL) { 669 error = EINVAL; 670 goto errout; 671 } 672 673 svp->sv_addr.maxlen = args->addr->maxlen; 674 svp->sv_addr.len = args->addr->len; 675 svp->sv_addr.buf = args->addr->buf; 676 args->addr->buf = NULL; 677 678 /* 679 * Get the root fhandle 680 */ 681 ASSERT(fhandle); 682 683 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len); 684 svp->sv_fhandle.fh_len = fhandle->fh_len; 685 686 /* 687 * Get server's hostname 688 */ 689 if (flags & NFSMNT_HOSTNAME) { 690 if (args->hostname == NULL) { 691 error = EINVAL; 692 goto errout; 693 } 694 svp->sv_hostnamelen = strlen(args->hostname) + 1; 695 svp->sv_hostname = args->hostname; 696 args->hostname = NULL; 697 } else { 698 char *p = "unknown-host"; 699 svp->sv_hostnamelen = strlen(p) + 1; 700 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP); 701 (void) strcpy(svp->sv_hostname, p); 702 } 703 704 705 /* 706 * RDMA MOUNT SUPPORT FOR NFS v2: 707 * Establish, is it possible to use RDMA, if so overload the 708 * knconf with rdma specific knconf and free the orignal. 709 */ 710 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) { 711 /* 712 * Determine the addr type for RDMA, IPv4 or v6. 713 */ 714 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0) 715 addr_type = AF_INET; 716 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0) 717 addr_type = AF_INET6; 718 719 if (rdma_reachable(addr_type, &svp->sv_addr, 720 &rdma_knconf) == 0) { 721 /* 722 * If successful, hijack, the orignal knconf and 723 * replace with a new one, depending on the flags. 724 */ 725 svp->sv_origknconf = svp->sv_knconf; 726 svp->sv_knconf = rdma_knconf; 727 knconf = rdma_knconf; 728 } else { 729 if (flags & NFSMNT_TRYRDMA) { 730 #ifdef DEBUG 731 if (rdma_debug) 732 zcmn_err(getzoneid(), CE_WARN, 733 "no RDMA onboard, revert\n"); 734 #endif 735 } 736 737 if (flags & NFSMNT_DORDMA) { 738 /* 739 * If proto=rdma is specified and no RDMA 740 * path to this server is avialable then 741 * ditch this server. 742 * This is not included in the mountable 743 * server list or the replica list. 744 * Check if more servers are specified; 745 * Failover case, otherwise bail out of mount. 746 */ 747 if (args->nfs_args_ext == NFS_ARGS_EXTB && 748 args->nfs_ext_u.nfs_extB.next != NULL) { 749 data = (char *) 750 args->nfs_ext_u.nfs_extB.next; 751 if (uap->flags & MS_RDONLY && 752 !(flags & NFSMNT_SOFT)) { 753 if (svp_head->sv_next == NULL) { 754 svp_tail = NULL; 755 svp_2ndlast = NULL; 756 sv_free(svp_head); 757 goto more; 758 } else { 759 svp_tail = svp_2ndlast; 760 svp_2ndlast->sv_next = 761 NULL; 762 sv_free(svp); 763 goto more; 764 } 765 } 766 } else { 767 /* 768 * This is the last server specified 769 * in the nfs_args list passed down 770 * and its not rdma capable. 771 */ 772 if (svp_head->sv_next == NULL) { 773 /* 774 * Is this the only one 775 */ 776 error = EINVAL; 777 #ifdef DEBUG 778 if (rdma_debug) 779 zcmn_err(getzoneid(), 780 CE_WARN, 781 "No RDMA srv"); 782 #endif 783 goto errout; 784 } else { 785 /* 786 * There is list, since some 787 * servers specified before 788 * this passed all requirements 789 */ 790 svp_tail = svp_2ndlast; 791 svp_2ndlast->sv_next = NULL; 792 sv_free(svp); 793 goto proceed; 794 } 795 } 796 } 797 } 798 } 799 800 /* 801 * Get the extention data which has the new security data structure. 802 */ 803 if (flags & NFSMNT_NEWARGS) { 804 switch (args->nfs_args_ext) { 805 case NFS_ARGS_EXTA: 806 case NFS_ARGS_EXTB: 807 /* 808 * Indicating the application is using the new 809 * sec_data structure to pass in the security 810 * data. 811 */ 812 secdata = args->nfs_ext_u.nfs_extA.secdata; 813 if (secdata == NULL) { 814 error = EINVAL; 815 } else { 816 /* 817 * Need to validate the flavor here if 818 * sysspace, userspace was already 819 * validate from the nfs_copyin function. 820 */ 821 switch (secdata->rpcflavor) { 822 case AUTH_NONE: 823 case AUTH_UNIX: 824 case AUTH_LOOPBACK: 825 case AUTH_DES: 826 case RPCSEC_GSS: 827 break; 828 default: 829 error = EINVAL; 830 goto errout; 831 } 832 } 833 args->nfs_ext_u.nfs_extA.secdata = NULL; 834 break; 835 836 default: 837 error = EINVAL; 838 break; 839 } 840 } else if (flags & NFSMNT_SECURE) { 841 /* 842 * Keep this for backward compatibility to support 843 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags. 844 */ 845 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) { 846 error = EINVAL; 847 goto errout; 848 } 849 850 /* 851 * get time sync address. 852 */ 853 if (args->syncaddr == NULL) { 854 error = EFAULT; 855 goto errout; 856 } 857 858 /* 859 * Move security related data to the sec_data structure. 860 */ 861 { 862 dh_k4_clntdata_t *data; 863 char *pf, *p; 864 865 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 866 if (flags & NFSMNT_RPCTIMESYNC) 867 secdata->flags |= AUTH_F_RPCTIMESYNC; 868 data = kmem_alloc(sizeof (*data), KM_SLEEP); 869 bcopy(args->syncaddr, &data->syncaddr, 870 sizeof (*args->syncaddr)); 871 872 873 /* 874 * duplicate the knconf information for the 875 * new opaque data. 876 */ 877 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP); 878 *data->knconf = *knconf; 879 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 880 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 881 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE); 882 bcopy(knconf->knc_proto, pf, KNC_STRSIZE); 883 data->knconf->knc_protofmly = pf; 884 data->knconf->knc_proto = p; 885 886 /* move server netname to the sec_data structure */ 887 nlen = strlen(args->hostname) + 1; 888 if (nlen != 0) { 889 data->netname = kmem_alloc(nlen, KM_SLEEP); 890 bcopy(args->hostname, data->netname, nlen); 891 data->netnamelen = (int)nlen; 892 } 893 secdata->secmod = secdata->rpcflavor = AUTH_DES; 894 secdata->data = (caddr_t)data; 895 } 896 } else { 897 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); 898 secdata->secmod = secdata->rpcflavor = AUTH_UNIX; 899 secdata->data = NULL; 900 } 901 svp->sv_secdata = secdata; 902 903 /* 904 * See bug 1180236. 905 * If mount secure failed, we will fall back to AUTH_NONE 906 * and try again. nfs3rootvp() will turn this back off. 907 * 908 * The NFS Version 2 mount uses GETATTR and STATFS procedures. 909 * The server does not care if these procedures have the proper 910 * authentication flavor, so if mount retries using AUTH_NONE 911 * that does not require a credential setup for root then the 912 * automounter would work without requiring root to be 913 * keylogged into AUTH_DES. 914 */ 915 if (secdata->rpcflavor != AUTH_UNIX && 916 secdata->rpcflavor != AUTH_LOOPBACK) 917 secdata->flags |= AUTH_F_TRYNONE; 918 919 /* 920 * Failover support: 921 * 922 * We may have a linked list of nfs_args structures, 923 * which means the user is looking for failover. If 924 * the mount is either not "read-only" or "soft", 925 * we want to bail out with EINVAL. 926 */ 927 if (args->nfs_args_ext == NFS_ARGS_EXTB && 928 args->nfs_ext_u.nfs_extB.next != NULL) { 929 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) { 930 data = (char *)args->nfs_ext_u.nfs_extB.next; 931 goto more; 932 } 933 error = EINVAL; 934 goto errout; 935 } 936 937 /* 938 * Determine the zone we're being mounted into. 939 */ 940 zone_hold(mntzone = zone); /* start with this assumption */ 941 if (getzoneid() == GLOBAL_ZONEID) { 942 zone_rele(mntzone); 943 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 944 ASSERT(mntzone != NULL); 945 if (mntzone != zone) { 946 error = EBUSY; 947 goto errout; 948 } 949 } 950 951 if (is_system_labeled()) { 952 error = nfs_mount_label_policy(vfsp, &svp->sv_addr, 953 svp->sv_knconf, cr); 954 955 if (error > 0) 956 goto errout; 957 958 if (error == -1) { 959 /* change mount to read-only to prevent write-down */ 960 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 961 } 962 } 963 964 /* 965 * Stop the mount from going any further if the zone is going away. 966 */ 967 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) { 968 error = EBUSY; 969 goto errout; 970 } 971 972 /* 973 * Get root vnode. 974 */ 975 proceed: 976 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone); 977 978 if (error) 979 goto errout; 980 981 /* 982 * Set option fields in the mount info record 983 */ 984 mi = VTOMI(rtvp); 985 986 if (svp_head->sv_next) 987 mi->mi_flags |= MI_LLOCK; 988 989 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args); 990 if (!error) { 991 /* static pathconf kludge */ 992 error = pathconf_get(mi, args); 993 } 994 995 errout: 996 if (rtvp != NULL) { 997 if (error) { 998 rp = VTOR(rtvp); 999 if (rp->r_flags & RHASHED) 1000 rp_rmhash(rp); 1001 } 1002 VN_RELE(rtvp); 1003 } 1004 1005 if (error) { 1006 sv_free(svp_head); 1007 if (mi != NULL) { 1008 nfs_async_stop(vfsp); 1009 nfs_async_manager_stop(vfsp); 1010 if (mi->mi_io_kstats) { 1011 kstat_delete(mi->mi_io_kstats); 1012 mi->mi_io_kstats = NULL; 1013 } 1014 if (mi->mi_ro_kstats) { 1015 kstat_delete(mi->mi_ro_kstats); 1016 mi->mi_ro_kstats = NULL; 1017 } 1018 nfs_free_mi(mi); 1019 } 1020 } 1021 1022 if (!(uap->flags & MS_SYSSPACE)) { 1023 nfs_free_args(args, fhandle); 1024 kmem_free(args, sizeof (*args)); 1025 } 1026 1027 if (mntzone != NULL) 1028 zone_rele(mntzone); 1029 1030 return (error); 1031 } 1032 1033 /* 1034 * The pathconf information is kept on a linked list of kmem_alloc'ed 1035 * structs. We search the list & add a new struct iff there is no other 1036 * struct with the same information. 1037 * See sys/pathconf.h for ``the rest of the story.'' 1038 */ 1039 static struct pathcnf *allpc = NULL; 1040 1041 static int 1042 pathconf_copyin(struct nfs_args *args, struct pathcnf *pc) 1043 { 1044 STRUCT_DECL(pathcnf, pc_tmp); 1045 STRUCT_HANDLE(nfs_args, ap); 1046 int i; 1047 model_t model; 1048 1049 model = get_udatamodel(); 1050 STRUCT_INIT(pc_tmp, model); 1051 STRUCT_SET_HANDLE(ap, model, args); 1052 1053 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) && 1054 STRUCT_FGETP(ap, pathconf) != NULL) { 1055 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp), 1056 STRUCT_SIZE(pc_tmp))) 1057 return (EFAULT); 1058 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask))) 1059 return (EINVAL); 1060 1061 pc->pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max); 1062 pc->pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon); 1063 pc->pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input); 1064 pc->pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max); 1065 pc->pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max); 1066 pc->pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf); 1067 pc->pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable); 1068 pc->pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx); 1069 for (i = 0; i < _PC_N; i++) 1070 pc->pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]); 1071 } 1072 return (0); 1073 } 1074 1075 static int 1076 pathconf_get(struct mntinfo *mi, struct nfs_args *args) 1077 { 1078 struct pathcnf *p, *pc; 1079 1080 pc = args->pathconf; 1081 if (mi->mi_pathconf != NULL) { 1082 pathconf_rele(mi); 1083 mi->mi_pathconf = NULL; 1084 } 1085 1086 if (args->flags & NFSMNT_POSIX && args->pathconf != NULL) { 1087 if (_PC_ISSET(_PC_ERROR, pc->pc_mask)) 1088 return (EINVAL); 1089 1090 for (p = allpc; p != NULL; p = p->pc_next) { 1091 if (PCCMP(p, pc) == 0) 1092 break; 1093 } 1094 if (p != NULL) { 1095 mi->mi_pathconf = p; 1096 p->pc_refcnt++; 1097 } else { 1098 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1099 bcopy(pc, p, sizeof (struct pathcnf)); 1100 p->pc_next = allpc; 1101 p->pc_refcnt = 1; 1102 allpc = mi->mi_pathconf = p; 1103 } 1104 } 1105 return (0); 1106 } 1107 1108 /* 1109 * release the static pathconf information 1110 */ 1111 static void 1112 pathconf_rele(struct mntinfo *mi) 1113 { 1114 if (mi->mi_pathconf != NULL) { 1115 if (--mi->mi_pathconf->pc_refcnt == 0) { 1116 struct pathcnf *p; 1117 struct pathcnf *p2; 1118 1119 p2 = p = allpc; 1120 while (p != NULL && p != mi->mi_pathconf) { 1121 p2 = p; 1122 p = p->pc_next; 1123 } 1124 if (p == NULL) { 1125 panic("mi->pathconf"); 1126 /*NOTREACHED*/ 1127 } 1128 if (p == allpc) 1129 allpc = p->pc_next; 1130 else 1131 p2->pc_next = p->pc_next; 1132 kmem_free(p, sizeof (*p)); 1133 mi->mi_pathconf = NULL; 1134 } 1135 } 1136 } 1137 1138 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */ 1139 static ushort_t nfs_max_threads = 8; /* max number of active async threads */ 1140 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */ 1141 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; 1142 1143 static int 1144 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, 1145 int flags, cred_t *cr, zone_t *zone) 1146 { 1147 vnode_t *rtvp; 1148 mntinfo_t *mi; 1149 dev_t nfs_dev; 1150 struct vattr va; 1151 int error; 1152 rnode_t *rp; 1153 int i; 1154 struct nfs_stats *nfsstatsp; 1155 cred_t *lcr = NULL, *tcr = cr; 1156 1157 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); 1158 ASSERT(nfsstatsp != NULL); 1159 1160 /* 1161 * Create a mount record and link it to the vfs struct. 1162 */ 1163 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP); 1164 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL); 1165 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL); 1166 mi->mi_flags = MI_ACL | MI_EXTATTR; 1167 if (!(flags & NFSMNT_SOFT)) 1168 mi->mi_flags |= MI_HARD; 1169 if ((flags & NFSMNT_SEMISOFT)) 1170 mi->mi_flags |= MI_SEMISOFT; 1171 if ((flags & NFSMNT_NOPRINT)) 1172 mi->mi_flags |= MI_NOPRINT; 1173 if (flags & NFSMNT_INT) 1174 mi->mi_flags |= MI_INT; 1175 mi->mi_retrans = NFS_RETRIES; 1176 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 1177 svp->sv_knconf->knc_semantics == NC_TPI_COTS) 1178 mi->mi_timeo = nfs_cots_timeo; 1179 else 1180 mi->mi_timeo = NFS_TIMEO; 1181 mi->mi_prog = NFS_PROGRAM; 1182 mi->mi_vers = NFS_VERSION; 1183 mi->mi_rfsnames = rfsnames_v2; 1184 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr; 1185 mi->mi_call_type = call_type_v2; 1186 mi->mi_ss_call_type = ss_call_type_v2; 1187 mi->mi_timer_type = timer_type_v2; 1188 mi->mi_aclnames = aclnames_v2; 1189 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr; 1190 mi->mi_acl_call_type = acl_call_type_v2; 1191 mi->mi_acl_ss_call_type = acl_ss_call_type_v2; 1192 mi->mi_acl_timer_type = acl_timer_type_v2; 1193 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL); 1194 mi->mi_servers = svp; 1195 mi->mi_curr_serv = svp; 1196 mi->mi_acregmin = SEC2HR(ACREGMIN); 1197 mi->mi_acregmax = SEC2HR(ACREGMAX); 1198 mi->mi_acdirmin = SEC2HR(ACDIRMIN); 1199 mi->mi_acdirmax = SEC2HR(ACDIRMAX); 1200 1201 if (nfs_dynamic) 1202 mi->mi_flags |= MI_DYNAMIC; 1203 1204 if (flags & NFSMNT_DIRECTIO) 1205 mi->mi_flags |= MI_DIRECTIO; 1206 1207 /* 1208 * Make a vfs struct for nfs. We do this here instead of below 1209 * because rtvp needs a vfs before we can do a getattr on it. 1210 * 1211 * Assign a unique device id to the mount 1212 */ 1213 mutex_enter(&nfs_minor_lock); 1214 do { 1215 nfs_minor = (nfs_minor + 1) & MAXMIN32; 1216 nfs_dev = makedevice(nfs_major, nfs_minor); 1217 } while (vfs_devismounted(nfs_dev)); 1218 mutex_exit(&nfs_minor_lock); 1219 1220 vfsp->vfs_dev = nfs_dev; 1221 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp); 1222 vfsp->vfs_data = (caddr_t)mi; 1223 vfsp->vfs_fstype = nfsfstyp; 1224 vfsp->vfs_bsize = NFS_MAXDATA; 1225 1226 /* 1227 * Initialize fields used to support async putpage operations. 1228 */ 1229 for (i = 0; i < NFS_ASYNC_TYPES; i++) 1230 mi->mi_async_clusters[i] = nfs_async_clusters; 1231 mi->mi_async_init_clusters = nfs_async_clusters; 1232 mi->mi_async_curr[NFS_ASYNC_QUEUE] = 1233 mi->mi_async_curr[NFS_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0]; 1234 mi->mi_max_threads = nfs_max_threads; 1235 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL); 1236 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL); 1237 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE], NULL, CV_DEFAULT, NULL); 1238 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE], NULL, 1239 CV_DEFAULT, NULL); 1240 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL); 1241 1242 mi->mi_vfsp = vfsp; 1243 mi->mi_zone = zone; 1244 zone_init_ref(&mi->mi_zone_ref); 1245 zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS); 1246 nfs_mi_zonelist_add(mi); 1247 1248 /* 1249 * Make the root vnode, use it to get attributes, 1250 * then remake it with the attributes. 1251 */ 1252 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf, 1253 NULL, vfsp, gethrtime(), cr, NULL, NULL); 1254 1255 va.va_mask = AT_ALL; 1256 1257 /* 1258 * If the uid is set then set the creds for secure mounts 1259 * by proxy processes such as automountd. 1260 */ 1261 if (svp->sv_secdata->uid != 0 && 1262 svp->sv_secdata->rpcflavor == RPCSEC_GSS) { 1263 lcr = crdup(cr); 1264 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr)); 1265 tcr = lcr; 1266 } 1267 1268 error = nfsgetattr(rtvp, &va, tcr); 1269 if (error) 1270 goto bad; 1271 rtvp->v_type = va.va_type; 1272 1273 /* 1274 * Poll every server to get the filesystem stats; we're 1275 * only interested in the server's transfer size, and we 1276 * want the minimum. 1277 * 1278 * While we're looping, we'll turn off AUTH_F_TRYNONE, 1279 * which is only for the mount operation. 1280 */ 1281 1282 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize()); 1283 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize()); 1284 1285 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 1286 struct nfsstatfs fs; 1287 int douprintf; 1288 1289 douprintf = 1; 1290 mi->mi_curr_serv = svp; 1291 1292 error = rfs2call(mi, RFS_STATFS, xdr_fhandle, 1293 (caddr_t)svp->sv_fhandle.fh_buf, xdr_statfs, (caddr_t)&fs, 1294 tcr, &douprintf, &fs.fs_status, 0, NULL); 1295 if (error) 1296 goto bad; 1297 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1298 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE; 1299 } 1300 mi->mi_curr_serv = mi->mi_servers; 1301 mi->mi_curread = mi->mi_tsize; 1302 mi->mi_curwrite = mi->mi_stsize; 1303 1304 /* 1305 * Start the manager thread responsible for handling async worker 1306 * threads. 1307 */ 1308 VFS_HOLD(vfsp); /* add reference for thread */ 1309 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager, 1310 vfsp, 0, minclsyspri); 1311 ASSERT(mi->mi_manager_thread != NULL); 1312 1313 /* 1314 * Initialize kstats 1315 */ 1316 nfs_mnt_kstat_init(vfsp); 1317 1318 mi->mi_type = rtvp->v_type; 1319 1320 *rtvpp = rtvp; 1321 if (lcr != NULL) 1322 crfree(lcr); 1323 1324 return (0); 1325 bad: 1326 /* 1327 * An error occurred somewhere, need to clean up... 1328 * We need to release our reference to the root vnode and 1329 * destroy the mntinfo struct that we just created. 1330 */ 1331 if (lcr != NULL) 1332 crfree(lcr); 1333 rp = VTOR(rtvp); 1334 if (rp->r_flags & RHASHED) 1335 rp_rmhash(rp); 1336 VN_RELE(rtvp); 1337 nfs_async_stop(vfsp); 1338 nfs_async_manager_stop(vfsp); 1339 if (mi->mi_io_kstats) { 1340 kstat_delete(mi->mi_io_kstats); 1341 mi->mi_io_kstats = NULL; 1342 } 1343 if (mi->mi_ro_kstats) { 1344 kstat_delete(mi->mi_ro_kstats); 1345 mi->mi_ro_kstats = NULL; 1346 } 1347 nfs_free_mi(mi); 1348 *rtvpp = NULL; 1349 return (error); 1350 } 1351 1352 /* 1353 * vfs operations 1354 */ 1355 static int 1356 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr) 1357 { 1358 mntinfo_t *mi; 1359 ushort_t omax; 1360 1361 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1362 return (EPERM); 1363 1364 mi = VFTOMI(vfsp); 1365 if (flag & MS_FORCE) { 1366 1367 vfsp->vfs_flag |= VFS_UNMOUNTED; 1368 1369 /* 1370 * We are about to stop the async manager. 1371 * Let every one know not to schedule any 1372 * more async requests. 1373 */ 1374 mutex_enter(&mi->mi_async_lock); 1375 mi->mi_max_threads = 0; 1376 NFS_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 1377 mutex_exit(&mi->mi_async_lock); 1378 1379 /* 1380 * We need to stop the manager thread explicitly; the worker 1381 * threads can time out and exit on their own. 1382 */ 1383 nfs_async_manager_stop(vfsp); 1384 destroy_rtable(vfsp, cr); 1385 if (mi->mi_io_kstats) { 1386 kstat_delete(mi->mi_io_kstats); 1387 mi->mi_io_kstats = NULL; 1388 } 1389 if (mi->mi_ro_kstats) { 1390 kstat_delete(mi->mi_ro_kstats); 1391 mi->mi_ro_kstats = NULL; 1392 } 1393 return (0); 1394 } 1395 /* 1396 * Wait until all asynchronous putpage operations on 1397 * this file system are complete before flushing rnodes 1398 * from the cache. 1399 */ 1400 omax = mi->mi_max_threads; 1401 if (nfs_async_stop_sig(vfsp)) { 1402 return (EINTR); 1403 } 1404 rflush(vfsp, cr); 1405 /* 1406 * If there are any active vnodes on this file system, 1407 * then the file system is busy and can't be umounted. 1408 */ 1409 if (check_rtable(vfsp)) { 1410 mutex_enter(&mi->mi_async_lock); 1411 mi->mi_max_threads = omax; 1412 mutex_exit(&mi->mi_async_lock); 1413 return (EBUSY); 1414 } 1415 /* 1416 * The unmount can't fail from now on; stop the manager thread. 1417 */ 1418 nfs_async_manager_stop(vfsp); 1419 /* 1420 * Destroy all rnodes belonging to this file system from the 1421 * rnode hash queues and purge any resources allocated to 1422 * them. 1423 */ 1424 destroy_rtable(vfsp, cr); 1425 if (mi->mi_io_kstats) { 1426 kstat_delete(mi->mi_io_kstats); 1427 mi->mi_io_kstats = NULL; 1428 } 1429 if (mi->mi_ro_kstats) { 1430 kstat_delete(mi->mi_ro_kstats); 1431 mi->mi_ro_kstats = NULL; 1432 } 1433 return (0); 1434 } 1435 1436 /* 1437 * find root of nfs 1438 */ 1439 static int 1440 nfs_root(vfs_t *vfsp, vnode_t **vpp) 1441 { 1442 mntinfo_t *mi; 1443 vnode_t *vp; 1444 servinfo_t *svp; 1445 rnode_t *rp; 1446 int error = 0; 1447 1448 mi = VFTOMI(vfsp); 1449 1450 if (nfs_zone() != mi->mi_zone) 1451 return (EPERM); 1452 1453 svp = mi->mi_curr_serv; 1454 if (svp && (svp->sv_flags & SV_ROOT_STALE)) { 1455 mutex_enter(&svp->sv_lock); 1456 svp->sv_flags &= ~SV_ROOT_STALE; 1457 mutex_exit(&svp->sv_lock); 1458 error = ENOENT; 1459 } 1460 1461 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf, 1462 NULL, vfsp, gethrtime(), CRED(), NULL, NULL); 1463 1464 /* 1465 * if the SV_ROOT_STALE flag was reset above, reset the 1466 * RSTALE flag if needed and return an error 1467 */ 1468 if (error == ENOENT) { 1469 rp = VTOR(vp); 1470 if (svp && rp->r_flags & RSTALE) { 1471 mutex_enter(&rp->r_statelock); 1472 rp->r_flags &= ~RSTALE; 1473 mutex_exit(&rp->r_statelock); 1474 } 1475 VN_RELE(vp); 1476 return (error); 1477 } 1478 1479 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type); 1480 1481 vp->v_type = mi->mi_type; 1482 1483 *vpp = vp; 1484 1485 return (0); 1486 } 1487 1488 /* 1489 * Get file system statistics. 1490 */ 1491 static int 1492 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp) 1493 { 1494 int error; 1495 mntinfo_t *mi; 1496 struct nfsstatfs fs; 1497 int douprintf; 1498 failinfo_t fi; 1499 vnode_t *vp; 1500 1501 error = nfs_root(vfsp, &vp); 1502 if (error) 1503 return (error); 1504 1505 mi = VFTOMI(vfsp); 1506 douprintf = 1; 1507 fi.vp = vp; 1508 fi.fhp = NULL; /* no need to update, filehandle not copied */ 1509 fi.copyproc = nfscopyfh; 1510 fi.lookupproc = nfslookup; 1511 fi.xattrdirproc = acl_getxattrdir2; 1512 1513 error = rfs2call(mi, RFS_STATFS, xdr_fhandle, (caddr_t)VTOFH(vp), 1514 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, &fs.fs_status, 0, 1515 &fi); 1516 1517 if (!error) { 1518 error = geterrno(fs.fs_status); 1519 if (!error) { 1520 mutex_enter(&mi->mi_lock); 1521 if (mi->mi_stsize) { 1522 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize); 1523 } else { 1524 mi->mi_stsize = fs.fs_tsize; 1525 mi->mi_curwrite = mi->mi_stsize; 1526 } 1527 mutex_exit(&mi->mi_lock); 1528 sbp->f_bsize = fs.fs_bsize; 1529 sbp->f_frsize = fs.fs_bsize; 1530 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks; 1531 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree; 1532 /* 1533 * Some servers may return negative available 1534 * block counts. They may do this because they 1535 * calculate the number of available blocks by 1536 * subtracting the number of used blocks from 1537 * the total number of blocks modified by the 1538 * minimum free value. For example, if the 1539 * minumum free percentage is 10 and the file 1540 * system is greater than 90 percent full, then 1541 * 90 percent of the total blocks minus the 1542 * actual number of used blocks may be a 1543 * negative number. 1544 * 1545 * In this case, we need to sign extend the 1546 * negative number through the assignment from 1547 * the 32 bit bavail count to the 64 bit bavail 1548 * count. 1549 * 1550 * We need to be able to discern between there 1551 * just being a lot of available blocks on the 1552 * file system and the case described above. 1553 * We are making the assumption that it does 1554 * not make sense to have more available blocks 1555 * than there are free blocks. So, if there 1556 * are, then we treat the number as if it were 1557 * a negative number and arrange to have it 1558 * sign extended when it is converted from 32 1559 * bits to 64 bits. 1560 */ 1561 if (fs.fs_bavail <= fs.fs_bfree) 1562 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail; 1563 else { 1564 sbp->f_bavail = 1565 (fsblkcnt64_t)((long)fs.fs_bavail); 1566 } 1567 sbp->f_files = (fsfilcnt64_t)-1; 1568 sbp->f_ffree = (fsfilcnt64_t)-1; 1569 sbp->f_favail = (fsfilcnt64_t)-1; 1570 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0]; 1571 (void) strncpy(sbp->f_basetype, 1572 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ); 1573 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 1574 sbp->f_namemax = (uint32_t)-1; 1575 } else { 1576 PURGE_STALE_FH(error, vp, CRED()); 1577 } 1578 } 1579 1580 VN_RELE(vp); 1581 1582 return (error); 1583 } 1584 1585 static kmutex_t nfs_syncbusy; 1586 1587 /* 1588 * Flush dirty nfs files for file system vfsp. 1589 * If vfsp == NULL, all nfs files are flushed. 1590 */ 1591 /* ARGSUSED */ 1592 static int 1593 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 1594 { 1595 /* 1596 * Cross-zone calls are OK here, since this translates to a 1597 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. 1598 */ 1599 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) { 1600 rflush(vfsp, cr); 1601 mutex_exit(&nfs_syncbusy); 1602 } 1603 return (0); 1604 } 1605 1606 /* ARGSUSED */ 1607 static int 1608 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1609 { 1610 int error; 1611 vnode_t *vp; 1612 struct vattr va; 1613 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp; 1614 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id; 1615 1616 if (nfs_zone() != VFTOMI(vfsp)->mi_zone) 1617 return (EPERM); 1618 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) { 1619 #ifdef DEBUG 1620 zcmn_err(zoneid, CE_WARN, 1621 "nfs_vget: bad fid len, %d/%d", fidp->fid_len, 1622 (int)(sizeof (*nfsfidp) - sizeof (short))); 1623 #endif 1624 *vpp = NULL; 1625 return (ESTALE); 1626 } 1627 1628 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp, 1629 gethrtime(), CRED(), NULL, NULL); 1630 1631 if (VTOR(vp)->r_flags & RSTALE) { 1632 VN_RELE(vp); 1633 *vpp = NULL; 1634 return (ENOENT); 1635 } 1636 1637 if (vp->v_type == VNON) { 1638 va.va_mask = AT_ALL; 1639 error = nfsgetattr(vp, &va, CRED()); 1640 if (error) { 1641 VN_RELE(vp); 1642 *vpp = NULL; 1643 return (error); 1644 } 1645 vp->v_type = va.va_type; 1646 } 1647 1648 *vpp = vp; 1649 1650 return (0); 1651 } 1652 1653 /* ARGSUSED */ 1654 static int 1655 nfs_mountroot(vfs_t *vfsp, whymountroot_t why) 1656 { 1657 vnode_t *rtvp; 1658 char root_hostname[SYS_NMLN+1]; 1659 struct servinfo *svp; 1660 int error; 1661 int vfsflags; 1662 size_t size; 1663 char *root_path; 1664 struct pathname pn; 1665 char *name; 1666 cred_t *cr; 1667 struct nfs_args args; /* nfs mount arguments */ 1668 static char token[10]; 1669 1670 bzero(&args, sizeof (args)); 1671 1672 /* do this BEFORE getfile which causes xid stamps to be initialized */ 1673 clkset(-1L); /* hack for now - until we get time svc? */ 1674 1675 if (why == ROOT_REMOUNT) { 1676 /* 1677 * Shouldn't happen. 1678 */ 1679 panic("nfs_mountroot: why == ROOT_REMOUNT"); 1680 } 1681 1682 if (why == ROOT_UNMOUNT) { 1683 /* 1684 * Nothing to do for NFS. 1685 */ 1686 return (0); 1687 } 1688 1689 /* 1690 * why == ROOT_INIT 1691 */ 1692 1693 name = token; 1694 *name = 0; 1695 getfsname("root", name, sizeof (token)); 1696 1697 pn_alloc(&pn); 1698 root_path = pn.pn_path; 1699 1700 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); 1701 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); 1702 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1703 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1704 1705 /* 1706 * Get server address 1707 * Get the root fhandle 1708 * Get server's transport 1709 * Get server's hostname 1710 * Get options 1711 */ 1712 args.addr = &svp->sv_addr; 1713 args.fh = (char *)&svp->sv_fhandle.fh_buf; 1714 args.knconf = svp->sv_knconf; 1715 args.hostname = root_hostname; 1716 vfsflags = 0; 1717 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, 1718 &args, &vfsflags)) { 1719 nfs_cmn_err(error, CE_WARN, 1720 "nfs_mountroot: mount_root failed: %m"); 1721 sv_free(svp); 1722 pn_free(&pn); 1723 return (error); 1724 } 1725 svp->sv_fhandle.fh_len = NFS_FHSIZE; 1726 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1); 1727 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); 1728 (void) strcpy(svp->sv_hostname, root_hostname); 1729 1730 /* 1731 * Force root partition to always be mounted with AUTH_UNIX for now 1732 */ 1733 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP); 1734 svp->sv_secdata->secmod = AUTH_UNIX; 1735 svp->sv_secdata->rpcflavor = AUTH_UNIX; 1736 svp->sv_secdata->data = NULL; 1737 1738 cr = crgetcred(); 1739 rtvp = NULL; 1740 1741 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone); 1742 1743 crfree(cr); 1744 1745 if (error) { 1746 pn_free(&pn); 1747 sv_free(svp); 1748 return (error); 1749 } 1750 1751 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args); 1752 if (error) { 1753 nfs_cmn_err(error, CE_WARN, 1754 "nfs_mountroot: invalid root mount options"); 1755 pn_free(&pn); 1756 goto errout; 1757 } 1758 1759 (void) vfs_lock_wait(vfsp); 1760 vfs_add(NULL, vfsp, vfsflags); 1761 vfs_unlock(vfsp); 1762 1763 size = strlen(svp->sv_hostname); 1764 (void) strcpy(rootfs.bo_name, svp->sv_hostname); 1765 rootfs.bo_name[size] = ':'; 1766 (void) strcpy(&rootfs.bo_name[size + 1], root_path); 1767 1768 pn_free(&pn); 1769 1770 errout: 1771 if (error) { 1772 sv_free(svp); 1773 nfs_async_stop(vfsp); 1774 nfs_async_manager_stop(vfsp); 1775 } 1776 1777 if (rtvp != NULL) 1778 VN_RELE(rtvp); 1779 1780 return (error); 1781 } 1782 1783 /* 1784 * Initialization routine for VFS routines. Should only be called once 1785 */ 1786 int 1787 nfs_vfsinit(void) 1788 { 1789 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL); 1790 return (0); 1791 } 1792 1793 void 1794 nfs_vfsfini(void) 1795 { 1796 mutex_destroy(&nfs_syncbusy); 1797 } 1798 1799 void 1800 nfs_freevfs(vfs_t *vfsp) 1801 { 1802 mntinfo_t *mi; 1803 servinfo_t *svp; 1804 1805 /* free up the resources */ 1806 mi = VFTOMI(vfsp); 1807 pathconf_rele(mi); 1808 svp = mi->mi_servers; 1809 mi->mi_servers = mi->mi_curr_serv = NULL; 1810 sv_free(svp); 1811 1812 /* 1813 * By this time we should have already deleted the 1814 * mi kstats in the unmount code. If they are still around 1815 * somethings wrong 1816 */ 1817 ASSERT(mi->mi_io_kstats == NULL); 1818 nfs_free_mi(mi); 1819 } 1820