1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 * All rights reserved.
28 */
29
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/vfs.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/vnode.h>
37 #include <sys/pathname.h>
38 #include <sys/sysmacros.h>
39 #include <sys/kmem.h>
40 #include <sys/mkdev.h>
41 #include <sys/mount.h>
42 #include <sys/mntent.h>
43 #include <sys/statvfs.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/cmn_err.h>
47 #include <sys/utsname.h>
48 #include <sys/bootconf.h>
49 #include <sys/modctl.h>
50 #include <sys/acl.h>
51 #include <sys/flock.h>
52 #include <sys/policy.h>
53 #include <sys/zone.h>
54 #include <sys/class.h>
55 #include <sys/socket.h>
56 #include <sys/netconfig.h>
57 #include <sys/tsol/tnet.h>
58
59 #include <rpc/types.h>
60 #include <rpc/auth.h>
61 #include <rpc/clnt.h>
62
63 #include <nfs/nfs.h>
64 #include <nfs/nfs_clnt.h>
65 #include <nfs/rnode.h>
66 #include <nfs/mount.h>
67 #include <nfs/nfs_acl.h>
68
69 #include <fs/fs_subr.h>
70
71 /*
72 * From rpcsec module (common/rpcsec).
73 */
74 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
75 extern void sec_clnt_freeinfo(struct sec_data *);
76
77 /*
78 * The order and contents of this structure must be kept in sync with that of
79 * rfsreqcnt_v3_tmpl in nfs_stats.c
80 */
81 static char *rfsnames_v3[] = {
82 "null", "getattr", "setattr", "lookup", "access", "readlink", "read",
83 "write", "create", "mkdir", "symlink", "mknod", "remove", "rmdir",
84 "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo",
85 "pathconf", "commit"
86 };
87
88 /*
89 * This table maps from NFS protocol number into call type.
90 * Zero means a "Lookup" type call
91 * One means a "Read" type call
92 * Two means a "Write" type call
93 * This is used to select a default time-out.
94 */
95 static uchar_t call_type_v3[] = {
96 0, 0, 1, 0, 0, 0, 1,
97 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 1, 2, 0, 0, 0,
99 2 };
100
101 /*
102 * Similar table, but to determine which timer to use
103 * (only real reads and writes!)
104 */
105 static uchar_t timer_type_v3[] = {
106 0, 0, 0, 0, 0, 0, 1,
107 2, 0, 0, 0, 0, 0, 0,
108 0, 0, 1, 1, 0, 0, 0,
109 0 };
110
111 /*
112 * This table maps from NFS protocol number into a call type
113 * for the semisoft mount option.
114 * Zero means do not repeat operation.
115 * One means repeat.
116 */
117 static uchar_t ss_call_type_v3[] = {
118 0, 0, 1, 0, 0, 0, 0,
119 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 0, 0, 0, 0, 0,
121 1 };
122
123 /*
124 * nfs3 vfs operations.
125 */
126 static int nfs3_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
127 static int nfs3_unmount(vfs_t *, int, cred_t *);
128 static int nfs3_root(vfs_t *, vnode_t **);
129 static int nfs3_statvfs(vfs_t *, struct statvfs64 *);
130 static int nfs3_sync(vfs_t *, short, cred_t *);
131 static int nfs3_vget(vfs_t *, vnode_t **, fid_t *);
132 static int nfs3_mountroot(vfs_t *, whymountroot_t);
133 static void nfs3_freevfs(vfs_t *);
134
135 static int nfs3rootvp(vnode_t **, vfs_t *, struct servinfo *,
136 int, cred_t *, zone_t *);
137
138 /*
139 * Initialize the vfs structure
140 */
141
142 static int nfs3fstyp;
143 vfsops_t *nfs3_vfsops;
144
145 /*
146 * Debug variable to check for rdma based
147 * transport startup and cleanup. Controlled
148 * through /etc/system. Off by default.
149 */
150 extern int rdma_debug;
151
152 int
nfs3init(int fstyp,char * name)153 nfs3init(int fstyp, char *name)
154 {
155 static const fs_operation_def_t nfs3_vfsops_template[] = {
156 VFSNAME_MOUNT, { .vfs_mount = nfs3_mount },
157 VFSNAME_UNMOUNT, { .vfs_unmount = nfs3_unmount },
158 VFSNAME_ROOT, { .vfs_root = nfs3_root },
159 VFSNAME_STATVFS, { .vfs_statvfs = nfs3_statvfs },
160 VFSNAME_SYNC, { .vfs_sync = nfs3_sync },
161 VFSNAME_VGET, { .vfs_vget = nfs3_vget },
162 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs3_mountroot },
163 VFSNAME_FREEVFS, { .vfs_freevfs = nfs3_freevfs },
164 NULL, NULL
165 };
166 int error;
167
168 error = vfs_setfsops(fstyp, nfs3_vfsops_template, &nfs3_vfsops);
169 if (error != 0) {
170 zcmn_err(GLOBAL_ZONEID, CE_WARN,
171 "nfs3init: bad vfs ops template");
172 return (error);
173 }
174
175 error = vn_make_ops(name, nfs3_vnodeops_template, &nfs3_vnodeops);
176 if (error != 0) {
177 (void) vfs_freevfsops_by_type(fstyp);
178 zcmn_err(GLOBAL_ZONEID, CE_WARN,
179 "nfs3init: bad vnode ops template");
180 return (error);
181 }
182
183 nfs3fstyp = fstyp;
184
185 return (0);
186 }
187
188 void
nfs3fini(void)189 nfs3fini(void)
190 {
191 }
192
193 static void
nfs3_free_args(struct nfs_args * nargs,nfs_fhandle * fh)194 nfs3_free_args(struct nfs_args *nargs, nfs_fhandle *fh)
195 {
196
197 if (fh)
198 kmem_free(fh, sizeof (*fh));
199
200 if (nargs->knconf) {
201 if (nargs->knconf->knc_protofmly)
202 kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE);
203 if (nargs->knconf->knc_proto)
204 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE);
205 kmem_free(nargs->knconf, sizeof (*nargs->knconf));
206 nargs->knconf = NULL;
207 }
208
209 if (nargs->fh) {
210 kmem_free(nargs->fh, strlen(nargs->fh) + 1);
211 nargs->fh = NULL;
212 }
213
214 if (nargs->hostname) {
215 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1);
216 nargs->hostname = NULL;
217 }
218
219 if (nargs->addr) {
220 if (nargs->addr->buf) {
221 ASSERT(nargs->addr->len);
222 kmem_free(nargs->addr->buf, nargs->addr->len);
223 }
224 kmem_free(nargs->addr, sizeof (struct netbuf));
225 nargs->addr = NULL;
226 }
227
228 if (nargs->syncaddr) {
229 ASSERT(nargs->syncaddr->len);
230 if (nargs->syncaddr->buf) {
231 ASSERT(nargs->syncaddr->len);
232 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len);
233 }
234 kmem_free(nargs->syncaddr, sizeof (struct netbuf));
235 nargs->syncaddr = NULL;
236 }
237
238 if (nargs->netname) {
239 kmem_free(nargs->netname, strlen(nargs->netname) + 1);
240 nargs->netname = NULL;
241 }
242
243 if (nargs->nfs_ext_u.nfs_extA.secdata) {
244 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata);
245 nargs->nfs_ext_u.nfs_extA.secdata = NULL;
246 }
247 }
248
249 static int
nfs3_copyin(char * data,int datalen,struct nfs_args * nargs,nfs_fhandle * fh)250 nfs3_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh)
251 {
252
253 int error;
254 size_t nlen; /* length of netname */
255 size_t hlen; /* length of hostname */
256 char netname[MAXNETNAMELEN+1]; /* server's netname */
257 struct netbuf addr; /* server's address */
258 struct netbuf syncaddr; /* AUTH_DES time sync addr */
259 struct knetconfig *knconf; /* transport knetconfig structure */
260 struct sec_data *secdata = NULL; /* security data */
261 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */
262 STRUCT_DECL(knetconfig, knconf_tmp);
263 STRUCT_DECL(netbuf, addr_tmp);
264 int flags;
265 char *p, *pf;
266 char *userbufptr;
267
268
269 bzero(nargs, sizeof (*nargs));
270
271 STRUCT_INIT(args, get_udatamodel());
272 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
273 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args))))
274 return (EFAULT);
275
276 nargs->wsize = STRUCT_FGET(args, wsize);
277 nargs->rsize = STRUCT_FGET(args, rsize);
278 nargs->timeo = STRUCT_FGET(args, timeo);
279 nargs->retrans = STRUCT_FGET(args, retrans);
280 nargs->acregmin = STRUCT_FGET(args, acregmin);
281 nargs->acregmax = STRUCT_FGET(args, acregmax);
282 nargs->acdirmin = STRUCT_FGET(args, acdirmin);
283 nargs->acdirmax = STRUCT_FGET(args, acdirmax);
284
285 flags = STRUCT_FGET(args, flags);
286 nargs->flags = flags;
287
288 addr.buf = NULL;
289 syncaddr.buf = NULL;
290
291 /*
292 * Allocate space for a knetconfig structure and
293 * its strings and copy in from user-land.
294 */
295 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
296 STRUCT_INIT(knconf_tmp, get_udatamodel());
297 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
298 STRUCT_SIZE(knconf_tmp))) {
299 kmem_free(knconf, sizeof (*knconf));
300 return (EFAULT);
301 }
302
303 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
304 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
305 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
306 if (get_udatamodel() != DATAMODEL_LP64) {
307 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
308 } else {
309 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
310 }
311
312 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
313 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
314 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
315 if (error) {
316 kmem_free(pf, KNC_STRSIZE);
317 kmem_free(p, KNC_STRSIZE);
318 kmem_free(knconf, sizeof (*knconf));
319 return (error);
320 }
321
322 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
323 if (error) {
324 kmem_free(pf, KNC_STRSIZE);
325 kmem_free(p, KNC_STRSIZE);
326 kmem_free(knconf, sizeof (*knconf));
327 return (error);
328 }
329
330
331 knconf->knc_protofmly = pf;
332 knconf->knc_proto = p;
333
334 nargs->knconf = knconf;
335 /*
336 * Get server address
337 */
338 STRUCT_INIT(addr_tmp, get_udatamodel());
339 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
340 STRUCT_SIZE(addr_tmp))) {
341 error = EFAULT;
342 goto errout;
343 }
344
345 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
346 userbufptr = STRUCT_FGETP(addr_tmp, buf);
347 addr.len = STRUCT_FGET(addr_tmp, len);
348 addr.buf = kmem_alloc(addr.len, KM_SLEEP);
349 addr.maxlen = addr.len;
350 if (copyin(userbufptr, addr.buf, addr.len)) {
351 kmem_free(addr.buf, addr.len);
352 error = EFAULT;
353 goto errout;
354 }
355 bcopy(&addr, nargs->addr, sizeof (struct netbuf));
356
357 /*
358 * Get the root fhandle
359 */
360
361 if (copyin(STRUCT_FGETP(args, fh), fh, sizeof (nfs_fhandle))) {
362 error = EFAULT;
363 goto errout;
364 }
365
366
367 /*
368 * Get server's hostname
369 */
370 if (flags & NFSMNT_HOSTNAME) {
371 error = copyinstr(STRUCT_FGETP(args, hostname), netname,
372 sizeof (netname), &hlen);
373 if (error)
374 goto errout;
375 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP);
376 (void) strcpy(nargs->hostname, netname);
377 } else {
378 nargs->hostname = NULL;
379 }
380
381
382 /*
383 * If there are syncaddr and netname data, load them in. This is
384 * to support data needed for NFSV4 when AUTH_DH is the negotiated
385 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
386 */
387 netname[0] = '\0';
388 if (flags & NFSMNT_SECURE) {
389 if (STRUCT_FGETP(args, syncaddr) == NULL) {
390 error = EINVAL;
391 goto errout;
392 }
393 /* get syncaddr */
394 STRUCT_INIT(addr_tmp, get_udatamodel());
395 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp),
396 STRUCT_SIZE(addr_tmp))) {
397 error = EINVAL;
398 goto errout;
399 }
400 userbufptr = STRUCT_FGETP(addr_tmp, buf);
401 syncaddr.len = STRUCT_FGET(addr_tmp, len);
402 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP);
403 syncaddr.maxlen = syncaddr.len;
404 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) {
405 kmem_free(syncaddr.buf, syncaddr.len);
406 error = EFAULT;
407 goto errout;
408 }
409
410 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
411 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf));
412
413 ASSERT(STRUCT_FGETP(args, netname));
414
415 if (copyinstr(STRUCT_FGETP(args, netname), netname,
416 sizeof (netname), &nlen)) {
417 error = EFAULT;
418 goto errout;
419 }
420
421 netname[nlen] = '\0';
422 nargs->netname = kmem_zalloc(nlen, KM_SLEEP);
423 (void) strcpy(nargs->netname, netname);
424 }
425
426 /*
427 * Get the extention data which has the security data structure.
428 * This includes data for AUTH_SYS as well.
429 */
430 if (flags & NFSMNT_NEWARGS) {
431 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext);
432 if (nargs->nfs_args_ext == NFS_ARGS_EXTA ||
433 nargs->nfs_args_ext == NFS_ARGS_EXTB) {
434 /*
435 * Indicating the application is using the new
436 * sec_data structure to pass in the security
437 * data.
438 */
439 if (STRUCT_FGETP(args,
440 nfs_ext_u.nfs_extA.secdata) != NULL) {
441 error = sec_clnt_loadinfo(
442 (struct sec_data *)STRUCT_FGETP(args,
443 nfs_ext_u.nfs_extA.secdata), &secdata,
444 get_udatamodel());
445 }
446 nargs->nfs_ext_u.nfs_extA.secdata = secdata;
447 }
448 }
449
450 if (error)
451 goto errout;
452
453 /*
454 * Failover support:
455 *
456 * We may have a linked list of nfs_args structures,
457 * which means the user is looking for failover. If
458 * the mount is either not "read-only" or "soft",
459 * we want to bail out with EINVAL.
460 */
461 if (nargs->nfs_args_ext == NFS_ARGS_EXTB)
462 nargs->nfs_ext_u.nfs_extB.next =
463 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next);
464
465 errout:
466 if (error)
467 nfs3_free_args(nargs, fh);
468
469 return (error);
470 }
471
472
473 /*
474 * nfs mount vfsop
475 * Set up mount info record and attach it to vfs struct.
476 */
477 static int
nfs3_mount(vfs_t * vfsp,vnode_t * mvp,struct mounta * uap,cred_t * cr)478 nfs3_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
479 {
480 struct nfs_args *args = NULL;
481 nfs_fhandle *fhandle = NULL;
482 char *data = uap->dataptr;
483 int error;
484 vnode_t *rtvp; /* the server's root */
485 mntinfo_t *mi; /* mount info, pointed at by vfs */
486 size_t nlen; /* length of netname */
487 struct knetconfig *knconf; /* transport knetconfig structure */
488 struct knetconfig *rdma_knconf; /* rdma transport structure */
489 rnode_t *rp;
490 struct servinfo *svp; /* nfs server info */
491 struct servinfo *svp_tail = NULL; /* previous nfs server info */
492 struct servinfo *svp_head; /* first nfs server info */
493 struct servinfo *svp_2ndlast; /* 2nd last in server info list */
494 struct sec_data *secdata; /* security data */
495 int flags, addr_type;
496 zone_t *zone = nfs_zone();
497 zone_t *mntzone = NULL;
498
499
500 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
501 return (EPERM);
502
503 if (mvp->v_type != VDIR)
504 return (ENOTDIR);
505
506 /*
507 * get arguments
508 *
509 * nfs_args is now versioned and is extensible, so
510 * uap->datalen might be different from sizeof (args)
511 * in a compatible situation.
512 */
513
514 more:
515
516 if (!(uap->flags & MS_SYSSPACE)) {
517 if (args == NULL)
518 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP);
519 else {
520 nfs3_free_args(args, fhandle);
521 fhandle = NULL;
522 }
523 if (fhandle == NULL)
524 fhandle = kmem_alloc(sizeof (nfs_fhandle), KM_SLEEP);
525 error = nfs3_copyin(data, uap->datalen, args, fhandle);
526 if (error) {
527 if (args)
528 kmem_free(args, sizeof (*args));
529 return (error);
530 }
531 } else {
532 args = (struct nfs_args *)data;
533 fhandle = (nfs_fhandle *)args->fh;
534 }
535
536
537 flags = args->flags;
538
539 if (uap->flags & MS_REMOUNT) {
540 size_t n;
541 char name[FSTYPSZ];
542
543 if (uap->flags & MS_SYSSPACE) {
544 error = copystr(uap->fstype, name, FSTYPSZ, &n);
545 } else {
546 nfs3_free_args(args, fhandle);
547 kmem_free(args, sizeof (*args));
548 error = copyinstr(uap->fstype, name, FSTYPSZ, &n);
549 }
550 if (error) {
551 if (error == ENAMETOOLONG)
552 return (EINVAL);
553 return (error);
554 }
555
556 /*
557 * This check is to ensure that the request is a
558 * genuine nfs remount request.
559 */
560
561 if (strncmp(name, "nfs", 3) != 0)
562 return (EINVAL);
563
564 /*
565 * If the request changes the locking type, disallow the
566 * remount,
567 * because it's questionable whether we can transfer the
568 * locking state correctly.
569 */
570
571 if ((mi = VFTOMI(vfsp)) != NULL) {
572 uint_t new_mi_llock;
573 uint_t old_mi_llock;
574
575 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
576 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0;
577 if (old_mi_llock != new_mi_llock)
578 return (EBUSY);
579 }
580 return (0);
581 }
582
583 mutex_enter(&mvp->v_lock);
584 if (!(uap->flags & MS_OVERLAY) &&
585 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
586 mutex_exit(&mvp->v_lock);
587 if (!(uap->flags & MS_SYSSPACE)) {
588 nfs3_free_args(args, fhandle);
589 kmem_free(args, sizeof (*args));
590 }
591 return (EBUSY);
592 }
593 mutex_exit(&mvp->v_lock);
594
595 /* make sure things are zeroed for errout: */
596 rtvp = NULL;
597 mi = NULL;
598 secdata = NULL;
599
600 /*
601 * A valid knetconfig structure is required.
602 */
603 if (!(flags & NFSMNT_KNCONF)) {
604 if (!(uap->flags & MS_SYSSPACE)) {
605 nfs3_free_args(args, fhandle);
606 kmem_free(args, sizeof (*args));
607 }
608 return (EINVAL);
609 }
610
611 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) ||
612 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) {
613 if (!(uap->flags & MS_SYSSPACE)) {
614 nfs3_free_args(args, fhandle);
615 kmem_free(args, sizeof (*args));
616 }
617 return (EINVAL);
618 }
619
620 /*
621 * Allocate a servinfo struct.
622 */
623 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
624 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
625 if (svp_tail) {
626 svp_2ndlast = svp_tail;
627 svp_tail->sv_next = svp;
628 } else {
629 svp_head = svp;
630 svp_2ndlast = svp;
631 }
632
633 svp_tail = svp;
634
635 svp->sv_knconf = args->knconf;
636 args->knconf = NULL;
637
638 if (args->addr == NULL || args->addr->buf == NULL) {
639 error = EINVAL;
640 goto errout;
641 }
642
643 svp->sv_addr.maxlen = args->addr->maxlen;
644 svp->sv_addr.len = args->addr->len;
645 svp->sv_addr.buf = args->addr->buf;
646 args->addr->buf = NULL;
647
648 /*
649 * Check the root fhandle length
650 */
651 ASSERT(fhandle);
652 if (fhandle->fh_len > NFS3_FHSIZE || fhandle->fh_len == 0) {
653 error = EINVAL;
654 #ifdef DEBUG
655 zcmn_err(getzoneid(), CE_WARN,
656 "nfs3_mount: got an invalid fhandle. fh_len = %d",
657 fhandle->fh_len);
658 fhandle->fh_len = NFS_FHANDLE_LEN;
659 nfs_printfhandle(fhandle);
660 #endif
661 goto errout;
662 }
663
664 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len);
665 svp->sv_fhandle.fh_len = fhandle->fh_len;
666
667 /*
668 * Get server's hostname
669 */
670 if (flags & NFSMNT_HOSTNAME) {
671 if (args->hostname == NULL) {
672 error = EINVAL;
673 goto errout;
674 }
675 svp->sv_hostnamelen = strlen(args->hostname) + 1;
676 svp->sv_hostname = args->hostname;
677 args->hostname = NULL;
678 } else {
679 char *p = "unknown-host";
680 svp->sv_hostnamelen = strlen(p) + 1;
681 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP);
682 (void) strcpy(svp->sv_hostname, p);
683 }
684
685
686 /*
687 * RDMA MOUNT SUPPORT FOR NFS v3:
688 * Establish, is it possible to use RDMA, if so overload the
689 * knconf with rdma specific knconf and free the orignal.
690 */
691 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
692 /*
693 * Determine the addr type for RDMA, IPv4 or v6.
694 */
695 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
696 addr_type = AF_INET;
697 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
698 addr_type = AF_INET6;
699
700 if (rdma_reachable(addr_type, &svp->sv_addr,
701 &rdma_knconf) == 0) {
702 /*
703 * If successful, hijack the orignal knconf and
704 * replace with a new one, depending on the flags.
705 */
706 svp->sv_origknconf = svp->sv_knconf;
707 svp->sv_knconf = rdma_knconf;
708 knconf = rdma_knconf;
709 } else {
710 if (flags & NFSMNT_TRYRDMA) {
711 #ifdef DEBUG
712 if (rdma_debug)
713 zcmn_err(getzoneid(), CE_WARN,
714 "no RDMA onboard, revert\n");
715 #endif
716 }
717
718 if (flags & NFSMNT_DORDMA) {
719 /*
720 * If proto=rdma is specified and no RDMA
721 * path to this server is avialable then
722 * ditch this server.
723 * This is not included in the mountable
724 * server list or the replica list.
725 * Check if more servers are specified;
726 * Failover case, otherwise bail out of mount.
727 */
728 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
729 args->nfs_ext_u.nfs_extB.next != NULL) {
730 data = (char *)
731 args->nfs_ext_u.nfs_extB.next;
732 if (uap->flags & MS_RDONLY &&
733 !(flags & NFSMNT_SOFT)) {
734 if (svp_head->sv_next == NULL) {
735 svp_tail = NULL;
736 svp_2ndlast = NULL;
737 sv_free(svp_head);
738 goto more;
739 } else {
740 svp_tail = svp_2ndlast;
741 svp_2ndlast->sv_next =
742 NULL;
743 sv_free(svp);
744 goto more;
745 }
746 }
747 } else {
748 /*
749 * This is the last server specified
750 * in the nfs_args list passed down
751 * and its not rdma capable.
752 */
753 if (svp_head->sv_next == NULL) {
754 /*
755 * Is this the only one
756 */
757 error = EINVAL;
758 #ifdef DEBUG
759 if (rdma_debug)
760 zcmn_err(getzoneid(),
761 CE_WARN,
762 "No RDMA srv");
763 #endif
764 goto errout;
765 } else {
766 /*
767 * There is list, since some
768 * servers specified before
769 * this passed all requirements
770 */
771 svp_tail = svp_2ndlast;
772 svp_2ndlast->sv_next = NULL;
773 sv_free(svp);
774 goto proceed;
775 }
776 }
777 }
778 }
779 }
780
781 /*
782 * Get the extention data which has the new security data structure.
783 */
784 if (flags & NFSMNT_NEWARGS) {
785 switch (args->nfs_args_ext) {
786 case NFS_ARGS_EXTA:
787 case NFS_ARGS_EXTB:
788 /*
789 * Indicating the application is using the new
790 * sec_data structure to pass in the security
791 * data.
792 */
793 secdata = args->nfs_ext_u.nfs_extA.secdata;
794 if (args->nfs_ext_u.nfs_extA.secdata == NULL) {
795 error = EINVAL;
796 } else {
797 /*
798 * Need to validate the flavor here if
799 * sysspace, userspace was already
800 * validate from the nfs_copyin function.
801 */
802 switch (secdata->rpcflavor) {
803 case AUTH_NONE:
804 case AUTH_UNIX:
805 case AUTH_LOOPBACK:
806 case AUTH_DES:
807 case RPCSEC_GSS:
808 args->nfs_ext_u.nfs_extA.secdata = NULL;
809 break;
810 default:
811 error = EINVAL;
812 goto errout;
813 }
814 }
815 break;
816
817 default:
818 error = EINVAL;
819 break;
820 }
821 } else if (flags & NFSMNT_SECURE) {
822 /*
823 * Keep this for backward compatibility to support
824 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags.
825 */
826 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) {
827 error = EINVAL;
828 goto errout;
829 }
830 /*
831 * Move security related data to the sec_data structure.
832 */
833 {
834 dh_k4_clntdata_t *data;
835 char *pf, *p;
836 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
837 if (flags & NFSMNT_RPCTIMESYNC)
838 secdata->flags |= AUTH_F_RPCTIMESYNC;
839 data = kmem_alloc(sizeof (*data), KM_SLEEP);
840 bcopy(args->syncaddr, &data->syncaddr,
841 sizeof (*args->syncaddr));
842
843 /*
844 * duplicate the knconf information for the
845 * new opaque data.
846 */
847 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
848 *data->knconf = *knconf;
849 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
850 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
851 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
852 bcopy(knconf->knc_proto, pf, KNC_STRSIZE);
853 data->knconf->knc_protofmly = pf;
854 data->knconf->knc_proto = p;
855
856 nlen = strlen(args->hostname) + 1;
857 /* move server netname to the sec_data structure */
858 if (nlen != 0) {
859 data->netname = kmem_alloc(nlen, KM_SLEEP);
860 bcopy(args->hostname, data->netname, nlen);
861 data->netnamelen = nlen;
862 }
863 secdata->secmod = secdata->rpcflavor = AUTH_DES;
864 secdata->data = (caddr_t)data;
865 }
866 } else {
867 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
868 secdata->secmod = secdata->rpcflavor = AUTH_UNIX;
869 secdata->data = NULL;
870 }
871
872 svp->sv_secdata = secdata;
873 if (error)
874 goto errout;
875
876 /*
877 * See bug 1180236.
878 * If mount secure failed, we will fall back to AUTH_NONE
879 * and try again. nfs3rootvp() will turn this back off.
880 *
881 * The NFS Version 3 mount uses the FSINFO and GETATTR
882 * procedures. The server should not care if these procedures
883 * have the proper security flavor, so if mount retries using
884 * AUTH_NONE that does not require a credential setup for root
885 * then the automounter would work without requiring root to be
886 * keylogged into AUTH_DES.
887 */
888 if (secdata->rpcflavor != AUTH_UNIX &&
889 secdata->rpcflavor != AUTH_LOOPBACK)
890 secdata->flags |= AUTH_F_TRYNONE;
891
892 /*
893 * Failover support:
894 *
895 * We may have a linked list of nfs_args structures,
896 * which means the user is looking for failover. If
897 * the mount is either not "read-only" or "soft",
898 * we want to bail out with EINVAL.
899 */
900 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
901 args->nfs_ext_u.nfs_extB.next != NULL) {
902 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
903 data = (char *)args->nfs_ext_u.nfs_extB.next;
904 goto more;
905 }
906 error = EINVAL;
907 goto errout;
908 }
909
910 /*
911 * Determine the zone we're being mounted into.
912 */
913 zone_hold(mntzone = zone); /* start with this assumption */
914 if (getzoneid() == GLOBAL_ZONEID) {
915 zone_rele(mntzone);
916 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
917 ASSERT(mntzone != NULL);
918 if (mntzone != zone) {
919 error = EBUSY;
920 goto errout;
921 }
922 }
923
924 if (is_system_labeled()) {
925 error = nfs_mount_label_policy(vfsp, &svp->sv_addr,
926 svp->sv_knconf, cr);
927
928 if (error > 0)
929 goto errout;
930
931 if (error == -1) {
932 /* change mount to read-only to prevent write-down */
933 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
934 }
935 }
936
937 /*
938 * Stop the mount from going any further if the zone is going away.
939 */
940 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
941 error = EBUSY;
942 goto errout;
943 }
944
945 /*
946 * Get root vnode.
947 */
948 proceed:
949 error = nfs3rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone);
950
951 if (error)
952 goto errout;
953
954 /*
955 * Set option fields in the mount info record
956 */
957 mi = VTOMI(rtvp);
958
959 if (svp_head->sv_next)
960 mi->mi_flags |= MI_LLOCK;
961
962 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args);
963
964 errout:
965 if (rtvp != NULL) {
966 if (error) {
967 rp = VTOR(rtvp);
968 if (rp->r_flags & RHASHED)
969 rp_rmhash(rp);
970 }
971 VN_RELE(rtvp);
972 }
973
974 if (error) {
975 sv_free(svp_head);
976 if (mi != NULL) {
977 nfs_async_stop(vfsp);
978 nfs_async_manager_stop(vfsp);
979 if (mi->mi_io_kstats) {
980 kstat_delete(mi->mi_io_kstats);
981 mi->mi_io_kstats = NULL;
982 }
983 if (mi->mi_ro_kstats) {
984 kstat_delete(mi->mi_ro_kstats);
985 mi->mi_ro_kstats = NULL;
986 }
987 nfs_free_mi(mi);
988 }
989 }
990
991
992 if (!(uap->flags & MS_SYSSPACE)) {
993 nfs3_free_args(args, fhandle);
994 kmem_free(args, sizeof (*args));
995 }
996
997 if (mntzone != NULL)
998 zone_rele(mntzone);
999
1000 return (error);
1001 }
1002
1003 static int nfs3_dynamic = 0; /* global variable to enable dynamic retrans. */
1004 static ushort_t nfs3_max_threads = 8; /* max number of active async threads */
1005 uint_t nfs3_bsize = 32 * 1024; /* client `block' size */
1006 static uint_t nfs3_async_clusters = 1; /* # of reqs from each async queue */
1007 static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO;
1008
1009 static int
nfs3rootvp(vnode_t ** rtvpp,vfs_t * vfsp,struct servinfo * svp,int flags,cred_t * cr,zone_t * zone)1010 nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
1011 int flags, cred_t *cr, zone_t *zone)
1012 {
1013 vnode_t *rtvp;
1014 mntinfo_t *mi;
1015 dev_t nfs_dev;
1016 struct vattr va;
1017 struct FSINFO3args args;
1018 struct FSINFO3res res;
1019 int error;
1020 int douprintf;
1021 rnode_t *rp;
1022 int i;
1023 uint_t max_transfer_size;
1024 struct nfs_stats *nfsstatsp;
1025 cred_t *lcr = NULL, *tcr = cr;
1026
1027 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone());
1028 ASSERT(nfsstatsp != NULL);
1029
1030 ASSERT(nfs_zone() == zone);
1031 /*
1032 * Create a mount record and link it to the vfs struct.
1033 */
1034 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
1035 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
1036 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL);
1037 mi->mi_flags = MI_ACL | MI_EXTATTR;
1038 if (!(flags & NFSMNT_SOFT))
1039 mi->mi_flags |= MI_HARD;
1040 if ((flags & NFSMNT_SEMISOFT))
1041 mi->mi_flags |= MI_SEMISOFT;
1042 if ((flags & NFSMNT_NOPRINT))
1043 mi->mi_flags |= MI_NOPRINT;
1044 if (flags & NFSMNT_INT)
1045 mi->mi_flags |= MI_INT;
1046 mi->mi_retrans = NFS_RETRIES;
1047 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
1048 svp->sv_knconf->knc_semantics == NC_TPI_COTS)
1049 mi->mi_timeo = nfs3_cots_timeo;
1050 else
1051 mi->mi_timeo = NFS_TIMEO;
1052 mi->mi_prog = NFS_PROGRAM;
1053 mi->mi_vers = NFS_V3;
1054 mi->mi_rfsnames = rfsnames_v3;
1055 mi->mi_reqs = nfsstatsp->nfs_stats_v3.rfsreqcnt_ptr;
1056 mi->mi_call_type = call_type_v3;
1057 mi->mi_ss_call_type = ss_call_type_v3;
1058 mi->mi_timer_type = timer_type_v3;
1059 mi->mi_aclnames = aclnames_v3;
1060 mi->mi_aclreqs = nfsstatsp->nfs_stats_v3.aclreqcnt_ptr;
1061 mi->mi_acl_call_type = acl_call_type_v3;
1062 mi->mi_acl_ss_call_type = acl_ss_call_type_v3;
1063 mi->mi_acl_timer_type = acl_timer_type_v3;
1064 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
1065 mi->mi_servers = svp;
1066 mi->mi_curr_serv = svp;
1067 mi->mi_acregmin = SEC2HR(ACREGMIN);
1068 mi->mi_acregmax = SEC2HR(ACREGMAX);
1069 mi->mi_acdirmin = SEC2HR(ACDIRMIN);
1070 mi->mi_acdirmax = SEC2HR(ACDIRMAX);
1071
1072 if (nfs3_dynamic)
1073 mi->mi_flags |= MI_DYNAMIC;
1074
1075 if (flags & NFSMNT_DIRECTIO)
1076 mi->mi_flags |= MI_DIRECTIO;
1077
1078 mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL);
1079 list_create(&mi->mi_rnodes, sizeof (rnode_t),
1080 offsetof(rnode_t, r_mi_link));
1081
1082 /*
1083 * Make a vfs struct for nfs. We do this here instead of below
1084 * because rtvp needs a vfs before we can do a getattr on it.
1085 *
1086 * Assign a unique device id to the mount
1087 */
1088 mutex_enter(&nfs_minor_lock);
1089 do {
1090 nfs_minor = (nfs_minor + 1) & MAXMIN32;
1091 nfs_dev = makedevice(nfs_major, nfs_minor);
1092 } while (vfs_devismounted(nfs_dev));
1093 mutex_exit(&nfs_minor_lock);
1094
1095 vfsp->vfs_dev = nfs_dev;
1096 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs3fstyp);
1097 vfsp->vfs_data = (caddr_t)mi;
1098 vfsp->vfs_fstype = nfsfstyp;
1099
1100 /*
1101 * Verify that nfs3_bsize tuneable is set to an
1102 * acceptable value. It be a multiple of PAGESIZE or
1103 * file corruption can occur.
1104 */
1105 if (nfs3_bsize & PAGEOFFSET)
1106 nfs3_bsize &= PAGEMASK;
1107 if (nfs3_bsize < PAGESIZE)
1108 nfs3_bsize = PAGESIZE;
1109 vfsp->vfs_bsize = nfs3_bsize;
1110
1111 /*
1112 * Initialize fields used to support async putpage operations.
1113 */
1114 for (i = 0; i < NFS_ASYNC_TYPES; i++)
1115 mi->mi_async_clusters[i] = nfs3_async_clusters;
1116 mi->mi_async_init_clusters = nfs3_async_clusters;
1117 mi->mi_async_curr[NFS_ASYNC_QUEUE] =
1118 mi->mi_async_curr[NFS_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0];
1119 mi->mi_max_threads = nfs3_max_threads;
1120 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
1121 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
1122 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE], NULL, CV_DEFAULT, NULL);
1123 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE], NULL,
1124 CV_DEFAULT, NULL);
1125 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
1126
1127 mi->mi_vfsp = vfsp;
1128 mi->mi_zone = zone;
1129 zone_init_ref(&mi->mi_zone_ref);
1130 zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS);
1131 nfs_mi_zonelist_add(mi);
1132
1133 /*
1134 * Make the root vnode, use it to get attributes,
1135 * then remake it with the attributes.
1136 */
1137 rtvp = makenfs3node((nfs_fh3 *)&svp->sv_fhandle,
1138 NULL, vfsp, gethrtime(), cr, NULL, NULL);
1139
1140 /*
1141 * Make the FSINFO calls, primarily at this point to
1142 * determine the transfer size. For client failover,
1143 * we'll want this to be the minimum bid from any
1144 * server, so that we don't overrun stated limits.
1145 *
1146 * While we're looping, we'll turn off AUTH_F_TRYNONE,
1147 * which is only for the mount operation.
1148 */
1149
1150 mi->mi_tsize = nfs3_tsize(svp->sv_knconf);
1151 mi->mi_stsize = mi->mi_tsize;
1152
1153 mi->mi_curread = nfs3_bsize;
1154 mi->mi_curwrite = mi->mi_curread;
1155
1156 /*
1157 * If the uid is set then set the creds for secure mounts
1158 * by proxy processes such as automountd.
1159 */
1160 if (svp->sv_secdata->uid != 0 &&
1161 svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
1162 lcr = crdup(cr);
1163 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
1164 tcr = lcr;
1165 }
1166
1167 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
1168 douprintf = 1;
1169 mi->mi_curr_serv = svp;
1170 max_transfer_size = nfs3_tsize(svp->sv_knconf);
1171 mi->mi_tsize = MIN(max_transfer_size, mi->mi_tsize);
1172 mi->mi_stsize = MIN(max_transfer_size, mi->mi_stsize);
1173 mi->mi_curread = MIN(max_transfer_size, mi->mi_curread);
1174 mi->mi_curwrite = MIN(max_transfer_size, mi->mi_curwrite);
1175 args.fsroot = *(nfs_fh3 *)&svp->sv_fhandle;
1176
1177 error = rfs3call(mi, NFSPROC3_FSINFO,
1178 xdr_nfs_fh3, (caddr_t)&args,
1179 xdr_FSINFO3res, (caddr_t)&res, tcr,
1180 &douprintf, &res.status, 0, NULL);
1181 if (error)
1182 goto bad;
1183 error = geterrno3(res.status);
1184 if (error)
1185 goto bad;
1186
1187 /* get type of root node */
1188 if (res.resok.obj_attributes.attributes) {
1189 if (res.resok.obj_attributes.attr.type < NF3REG ||
1190 res.resok.obj_attributes.attr.type > NF3FIFO) {
1191 #ifdef DEBUG
1192 zcmn_err(getzoneid(), CE_WARN,
1193 "NFS3 server %s returned a bad file type for root",
1194 svp->sv_hostname);
1195 #else
1196 zcmn_err(getzoneid(), CE_WARN,
1197 "NFS server %s returned a bad file type for root",
1198 svp->sv_hostname);
1199 #endif
1200 error = EINVAL;
1201 goto bad;
1202 } else {
1203 if (rtvp->v_type != VNON && rtvp->v_type !=
1204 nf3_to_vt[res.resok.obj_attributes.attr.
1205 type]) {
1206 #ifdef DEBUG
1207 zcmn_err(getzoneid(), CE_WARN,
1208 "NFS3 server %s returned a different file type for root",
1209 svp->sv_hostname);
1210 #else
1211 zcmn_err(getzoneid(), CE_WARN,
1212 "NFS server %s returned a different file type for root",
1213 svp->sv_hostname);
1214 #endif
1215 error = EINVAL;
1216 goto bad;
1217 }
1218 rtvp->v_type =
1219 nf3_to_vt[res.resok.obj_attributes.attr.
1220 type];
1221 }
1222 }
1223
1224 if (res.resok.rtmax != 0) {
1225 mi->mi_tsize = MIN(res.resok.rtmax, mi->mi_tsize);
1226 if (res.resok.rtpref != 0) {
1227 mi->mi_curread = MIN(res.resok.rtpref,
1228 mi->mi_curread);
1229 } else {
1230 mi->mi_curread = MIN(res.resok.rtmax,
1231 mi->mi_curread);
1232 }
1233 } else if (res.resok.rtpref != 0) {
1234 mi->mi_tsize = MIN(res.resok.rtpref, mi->mi_tsize);
1235 mi->mi_curread = MIN(res.resok.rtpref, mi->mi_curread);
1236 } else {
1237 #ifdef DEBUG
1238 zcmn_err(getzoneid(), CE_WARN,
1239 "NFS3 server %s returned 0 for read transfer sizes",
1240 svp->sv_hostname);
1241 #else
1242 zcmn_err(getzoneid(), CE_WARN,
1243 "NFS server %s returned 0 for read transfer sizes",
1244 svp->sv_hostname);
1245 #endif
1246 error = EIO;
1247 goto bad;
1248 }
1249 if (res.resok.wtmax != 0) {
1250 mi->mi_stsize = MIN(res.resok.wtmax, mi->mi_stsize);
1251 if (res.resok.wtpref != 0) {
1252 mi->mi_curwrite = MIN(res.resok.wtpref,
1253 mi->mi_curwrite);
1254 } else {
1255 mi->mi_curwrite = MIN(res.resok.wtmax,
1256 mi->mi_curwrite);
1257 }
1258 } else if (res.resok.wtpref != 0) {
1259 mi->mi_stsize = MIN(res.resok.wtpref, mi->mi_stsize);
1260 mi->mi_curwrite = MIN(res.resok.wtpref,
1261 mi->mi_curwrite);
1262 } else {
1263 #ifdef DEBUG
1264 zcmn_err(getzoneid(), CE_WARN,
1265 "NFS3 server %s returned 0 for write transfer sizes",
1266 svp->sv_hostname);
1267 #else
1268 zcmn_err(getzoneid(), CE_WARN,
1269 "NFS server %s returned 0 for write transfer sizes",
1270 svp->sv_hostname);
1271 #endif
1272 error = EIO;
1273 goto bad;
1274 }
1275
1276 /*
1277 * These signal the ability of the server to create
1278 * hard links and symbolic links, so they really
1279 * aren't relevant if there is more than one server.
1280 * We'll set them here, though it probably looks odd.
1281 */
1282 if (res.resok.properties & FSF3_LINK)
1283 mi->mi_flags |= MI_LINK;
1284 if (res.resok.properties & FSF3_SYMLINK)
1285 mi->mi_flags |= MI_SYMLINK;
1286
1287 /* Pick up smallest non-zero maxfilesize value */
1288 if (res.resok.maxfilesize) {
1289 if (mi->mi_maxfilesize) {
1290 mi->mi_maxfilesize = MIN(mi->mi_maxfilesize,
1291 res.resok.maxfilesize);
1292 } else
1293 mi->mi_maxfilesize = res.resok.maxfilesize;
1294 }
1295
1296 /*
1297 * AUTH_F_TRYNONE is only for the mount operation,
1298 * so turn it back off.
1299 */
1300 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE;
1301 }
1302 mi->mi_curr_serv = mi->mi_servers;
1303
1304 /*
1305 * Start the thread responsible for handling async worker threads.
1306 */
1307 VFS_HOLD(vfsp); /* add reference for thread */
1308 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager,
1309 vfsp, 0, minclsyspri);
1310 ASSERT(mi->mi_manager_thread != NULL);
1311
1312 /*
1313 * Initialize kstats
1314 */
1315 nfs_mnt_kstat_init(vfsp);
1316
1317 /* If we didn't get a type, get one now */
1318 if (rtvp->v_type == VNON) {
1319 va.va_mask = AT_ALL;
1320
1321 error = nfs3getattr(rtvp, &va, tcr);
1322 if (error)
1323 goto bad;
1324 rtvp->v_type = va.va_type;
1325 }
1326
1327 mi->mi_type = rtvp->v_type;
1328
1329 *rtvpp = rtvp;
1330 if (lcr != NULL)
1331 crfree(lcr);
1332
1333 return (0);
1334 bad:
1335 /*
1336 * An error occurred somewhere, need to clean up...
1337 * We need to release our reference to the root vnode and
1338 * destroy the mntinfo struct that we just created.
1339 */
1340 if (lcr != NULL)
1341 crfree(lcr);
1342 rp = VTOR(rtvp);
1343 if (rp->r_flags & RHASHED)
1344 rp_rmhash(rp);
1345 VN_RELE(rtvp);
1346 nfs_async_stop(vfsp);
1347 nfs_async_manager_stop(vfsp);
1348 if (mi->mi_io_kstats) {
1349 kstat_delete(mi->mi_io_kstats);
1350 mi->mi_io_kstats = NULL;
1351 }
1352 if (mi->mi_ro_kstats) {
1353 kstat_delete(mi->mi_ro_kstats);
1354 mi->mi_ro_kstats = NULL;
1355 }
1356 nfs_free_mi(mi);
1357 *rtvpp = NULL;
1358 return (error);
1359 }
1360
1361 /*
1362 * vfs operations
1363 */
1364 static int
nfs3_unmount(vfs_t * vfsp,int flag,cred_t * cr)1365 nfs3_unmount(vfs_t *vfsp, int flag, cred_t *cr)
1366 {
1367 mntinfo_t *mi;
1368 ushort_t omax;
1369
1370 if (secpolicy_fs_unmount(cr, vfsp) != 0)
1371 return (EPERM);
1372
1373 mi = VFTOMI(vfsp);
1374 if (flag & MS_FORCE) {
1375
1376 vfsp->vfs_flag |= VFS_UNMOUNTED;
1377
1378 /*
1379 * We are about to stop the async manager.
1380 * Let every one know not to schedule any
1381 * more async requests
1382 */
1383 mutex_enter(&mi->mi_async_lock);
1384 mi->mi_max_threads = 0;
1385 NFS_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
1386 mutex_exit(&mi->mi_async_lock);
1387
1388 /*
1389 * We need to stop the manager thread explicitly; the worker
1390 * threads can time out and exit on their own.
1391 */
1392 nfs_async_manager_stop(vfsp);
1393 destroy_rtable(vfsp, cr);
1394 if (mi->mi_io_kstats) {
1395 kstat_delete(mi->mi_io_kstats);
1396 mi->mi_io_kstats = NULL;
1397 }
1398 if (mi->mi_ro_kstats) {
1399 kstat_delete(mi->mi_ro_kstats);
1400 mi->mi_ro_kstats = NULL;
1401 }
1402 return (0);
1403 }
1404 /*
1405 * Wait until all asynchronous putpage operations on
1406 * this file system are complete before flushing rnodes
1407 * from the cache.
1408 */
1409 omax = mi->mi_max_threads;
1410 if (nfs_async_stop_sig(vfsp)) {
1411 return (EINTR);
1412 }
1413 rflush(vfsp, cr);
1414 /*
1415 * If there are any active vnodes on this file system,
1416 * then the file system is busy and can't be umounted.
1417 */
1418 if (check_rtable(vfsp)) {
1419 mutex_enter(&mi->mi_async_lock);
1420 mi->mi_max_threads = omax;
1421 mutex_exit(&mi->mi_async_lock);
1422 return (EBUSY);
1423 }
1424 /*
1425 * The unmount can't fail from now on; stop the worker thread manager.
1426 */
1427 nfs_async_manager_stop(vfsp);
1428 /*
1429 * Destroy all rnodes belonging to this file system from the
1430 * rnode hash queues and purge any resources allocated to
1431 * them.
1432 */
1433 destroy_rtable(vfsp, cr);
1434 if (mi->mi_io_kstats) {
1435 kstat_delete(mi->mi_io_kstats);
1436 mi->mi_io_kstats = NULL;
1437 }
1438 if (mi->mi_ro_kstats) {
1439 kstat_delete(mi->mi_ro_kstats);
1440 mi->mi_ro_kstats = NULL;
1441 }
1442 return (0);
1443 }
1444
1445 /*
1446 * find root of nfs
1447 */
1448 static int
nfs3_root(vfs_t * vfsp,vnode_t ** vpp)1449 nfs3_root(vfs_t *vfsp, vnode_t **vpp)
1450 {
1451 mntinfo_t *mi;
1452 vnode_t *vp;
1453 servinfo_t *svp;
1454 rnode_t *rp;
1455 int error = 0;
1456
1457 mi = VFTOMI(vfsp);
1458
1459 if (nfs_zone() != mi->mi_zone)
1460 return (EPERM);
1461
1462 svp = mi->mi_curr_serv;
1463 if (svp && (svp->sv_flags & SV_ROOT_STALE)) {
1464 mutex_enter(&svp->sv_lock);
1465 svp->sv_flags &= ~SV_ROOT_STALE;
1466 mutex_exit(&svp->sv_lock);
1467 error = ENOENT;
1468 }
1469
1470 vp = makenfs3node((nfs_fh3 *)&mi->mi_curr_serv->sv_fhandle,
1471 NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1472
1473 /*
1474 * if the SV_ROOT_STALE flag was reset above, reset the
1475 * RSTALE flag if needed and return an error
1476 */
1477 if (error == ENOENT) {
1478 rp = VTOR(vp);
1479 if (svp && rp->r_flags & RSTALE) {
1480 mutex_enter(&rp->r_statelock);
1481 rp->r_flags &= ~RSTALE;
1482 mutex_exit(&rp->r_statelock);
1483 }
1484 VN_RELE(vp);
1485 return (error);
1486 }
1487
1488 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
1489
1490 vp->v_type = mi->mi_type;
1491
1492 *vpp = vp;
1493
1494 return (0);
1495 }
1496
1497 /*
1498 * Get file system statistics.
1499 */
1500 static int
nfs3_statvfs(vfs_t * vfsp,struct statvfs64 * sbp)1501 nfs3_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
1502 {
1503 int error;
1504 struct mntinfo *mi;
1505 struct FSSTAT3args args;
1506 struct FSSTAT3res res;
1507 int douprintf;
1508 failinfo_t fi;
1509 vnode_t *vp;
1510 cred_t *cr;
1511 hrtime_t t;
1512
1513 mi = VFTOMI(vfsp);
1514 if (nfs_zone() != mi->mi_zone)
1515 return (EPERM);
1516 error = nfs3_root(vfsp, &vp);
1517 if (error)
1518 return (error);
1519
1520 cr = CRED();
1521
1522 args.fsroot = *VTOFH3(vp);
1523 fi.vp = vp;
1524 fi.fhp = (caddr_t)&args.fsroot;
1525 fi.copyproc = nfs3copyfh;
1526 fi.lookupproc = nfs3lookup;
1527 fi.xattrdirproc = acl_getxattrdir3;
1528
1529 douprintf = 1;
1530
1531 t = gethrtime();
1532
1533 error = rfs3call(mi, NFSPROC3_FSSTAT,
1534 xdr_nfs_fh3, (caddr_t)&args,
1535 xdr_FSSTAT3res, (caddr_t)&res, cr,
1536 &douprintf, &res.status, 0, &fi);
1537
1538 if (error) {
1539 VN_RELE(vp);
1540 return (error);
1541 }
1542
1543 error = geterrno3(res.status);
1544 if (!error) {
1545 nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr);
1546 sbp->f_bsize = MAXBSIZE;
1547 sbp->f_frsize = DEV_BSIZE;
1548 /*
1549 * Allow -1 fields to pass through unconverted. These
1550 * indicate "don't know" fields.
1551 */
1552 if (res.resok.tbytes == (size3)-1)
1553 sbp->f_blocks = (fsblkcnt64_t)res.resok.tbytes;
1554 else {
1555 sbp->f_blocks = (fsblkcnt64_t)
1556 (res.resok.tbytes / DEV_BSIZE);
1557 }
1558 if (res.resok.fbytes == (size3)-1)
1559 sbp->f_bfree = (fsblkcnt64_t)res.resok.fbytes;
1560 else {
1561 sbp->f_bfree = (fsblkcnt64_t)
1562 (res.resok.fbytes / DEV_BSIZE);
1563 }
1564 if (res.resok.abytes == (size3)-1)
1565 sbp->f_bavail = (fsblkcnt64_t)res.resok.abytes;
1566 else {
1567 sbp->f_bavail = (fsblkcnt64_t)
1568 (res.resok.abytes / DEV_BSIZE);
1569 }
1570 sbp->f_files = (fsfilcnt64_t)res.resok.tfiles;
1571 sbp->f_ffree = (fsfilcnt64_t)res.resok.ffiles;
1572 sbp->f_favail = (fsfilcnt64_t)res.resok.afiles;
1573 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
1574 (void) strncpy(sbp->f_basetype,
1575 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
1576 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
1577 sbp->f_namemax = (ulong_t)-1;
1578 } else {
1579 nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr);
1580 PURGE_STALE_FH(error, vp, cr);
1581 }
1582
1583 VN_RELE(vp);
1584
1585 return (error);
1586 }
1587
1588 static kmutex_t nfs3_syncbusy;
1589
1590 /*
1591 * Flush dirty nfs files for file system vfsp.
1592 * If vfsp == NULL, all nfs files are flushed.
1593 */
1594 /* ARGSUSED */
1595 static int
nfs3_sync(vfs_t * vfsp,short flag,cred_t * cr)1596 nfs3_sync(vfs_t *vfsp, short flag, cred_t *cr)
1597 {
1598 /*
1599 * Cross-zone calls are OK here, since this translates to a
1600 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
1601 */
1602 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs3_syncbusy) != 0) {
1603 rflush(vfsp, cr);
1604 mutex_exit(&nfs3_syncbusy);
1605 }
1606 return (0);
1607 }
1608
1609 /* ARGSUSED */
1610 static int
nfs3_vget(vfs_t * vfsp,vnode_t ** vpp,fid_t * fidp)1611 nfs3_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1612 {
1613 int error;
1614 nfs_fh3 fh;
1615 vnode_t *vp;
1616 struct vattr va;
1617
1618 if (fidp->fid_len > NFS3_FHSIZE) {
1619 *vpp = NULL;
1620 return (ESTALE);
1621 }
1622
1623 if (nfs_zone() != VFTOMI(vfsp)->mi_zone)
1624 return (EPERM);
1625 fh.fh3_length = fidp->fid_len;
1626 bcopy(fidp->fid_data, fh.fh3_u.data, fh.fh3_length);
1627
1628 vp = makenfs3node(&fh, NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1629
1630 if (VTOR(vp)->r_flags & RSTALE) {
1631 VN_RELE(vp);
1632 *vpp = NULL;
1633 return (ENOENT);
1634 }
1635
1636 if (vp->v_type == VNON) {
1637 va.va_mask = AT_ALL;
1638 error = nfs3getattr(vp, &va, CRED());
1639 if (error) {
1640 VN_RELE(vp);
1641 *vpp = NULL;
1642 return (error);
1643 }
1644 vp->v_type = va.va_type;
1645 }
1646
1647 *vpp = vp;
1648
1649 return (0);
1650 }
1651
1652 /* ARGSUSED */
1653 static int
nfs3_mountroot(vfs_t * vfsp,whymountroot_t why)1654 nfs3_mountroot(vfs_t *vfsp, whymountroot_t why)
1655 {
1656 vnode_t *rtvp;
1657 char root_hostname[SYS_NMLN+1];
1658 struct servinfo *svp;
1659 int error;
1660 int vfsflags;
1661 size_t size;
1662 char *root_path;
1663 struct pathname pn;
1664 char *name;
1665 cred_t *cr;
1666 struct nfs_args args; /* nfs mount arguments */
1667 static char token[10];
1668
1669 bzero(&args, sizeof (args));
1670
1671 /* do this BEFORE getfile which causes xid stamps to be initialized */
1672 clkset(-1L); /* hack for now - until we get time svc? */
1673
1674 if (why == ROOT_REMOUNT) {
1675 /*
1676 * Shouldn't happen.
1677 */
1678 panic("nfs3_mountroot: why == ROOT_REMOUNT");
1679 }
1680
1681 if (why == ROOT_UNMOUNT) {
1682 /*
1683 * Nothing to do for NFS.
1684 */
1685 return (0);
1686 }
1687
1688 /*
1689 * why == ROOT_INIT
1690 */
1691
1692 name = token;
1693 *name = 0;
1694 getfsname("root", name, sizeof (token));
1695
1696 pn_alloc(&pn);
1697 root_path = pn.pn_path;
1698
1699 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
1700 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
1701 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1702 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1703
1704 /*
1705 * Get server address
1706 * Get the root fhandle
1707 * Get server's transport
1708 * Get server's hostname
1709 * Get options
1710 */
1711 args.addr = &svp->sv_addr;
1712 args.fh = (char *)&svp->sv_fhandle;
1713 args.knconf = svp->sv_knconf;
1714 args.hostname = root_hostname;
1715 vfsflags = 0;
1716 if (error = mount_root(*name ? name : "root", root_path, NFS_V3,
1717 &args, &vfsflags)) {
1718 if (error == EPROTONOSUPPORT)
1719 nfs_cmn_err(error, CE_WARN, "nfs3_mountroot: "
1720 "mount_root failed: server doesn't support NFS V3");
1721 else
1722 nfs_cmn_err(error, CE_WARN,
1723 "nfs3_mountroot: mount_root failed: %m");
1724 sv_free(svp);
1725 pn_free(&pn);
1726 return (error);
1727 }
1728 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
1729 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
1730 (void) strcpy(svp->sv_hostname, root_hostname);
1731
1732 /*
1733 * Force root partition to always be mounted with AUTH_UNIX for now
1734 */
1735 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
1736 svp->sv_secdata->secmod = AUTH_UNIX;
1737 svp->sv_secdata->rpcflavor = AUTH_UNIX;
1738 svp->sv_secdata->data = NULL;
1739
1740 cr = crgetcred();
1741 rtvp = NULL;
1742
1743 error = nfs3rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
1744
1745 crfree(cr);
1746
1747 if (error) {
1748 pn_free(&pn);
1749 sv_free(svp);
1750 return (error);
1751 }
1752
1753 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args);
1754 if (error) {
1755 nfs_cmn_err(error, CE_WARN,
1756 "nfs3_mountroot: invalid root mount options");
1757 pn_free(&pn);
1758 goto errout;
1759 }
1760
1761 (void) vfs_lock_wait(vfsp);
1762 vfs_add(NULL, vfsp, vfsflags);
1763 vfs_unlock(vfsp);
1764
1765 size = strlen(svp->sv_hostname);
1766 (void) strcpy(rootfs.bo_name, svp->sv_hostname);
1767 rootfs.bo_name[size] = ':';
1768 (void) strcpy(&rootfs.bo_name[size + 1], root_path);
1769
1770 pn_free(&pn);
1771
1772 errout:
1773 if (error) {
1774 sv_free(svp);
1775 nfs_async_stop(vfsp);
1776 nfs_async_manager_stop(vfsp);
1777 }
1778
1779 if (rtvp != NULL)
1780 VN_RELE(rtvp);
1781
1782 return (error);
1783 }
1784
1785 /*
1786 * Initialization routine for VFS routines. Should only be called once
1787 */
1788 int
nfs3_vfsinit(void)1789 nfs3_vfsinit(void)
1790 {
1791 mutex_init(&nfs3_syncbusy, NULL, MUTEX_DEFAULT, NULL);
1792 return (0);
1793 }
1794
1795 void
nfs3_vfsfini(void)1796 nfs3_vfsfini(void)
1797 {
1798 mutex_destroy(&nfs3_syncbusy);
1799 }
1800
1801 void
nfs3_freevfs(vfs_t * vfsp)1802 nfs3_freevfs(vfs_t *vfsp)
1803 {
1804 mntinfo_t *mi;
1805 servinfo_t *svp;
1806
1807 /* free up the resources */
1808 mi = VFTOMI(vfsp);
1809 svp = mi->mi_servers;
1810 mi->mi_servers = mi->mi_curr_serv = NULL;
1811 sv_free(svp);
1812
1813 /*
1814 * By this time we should have already deleted the
1815 * mi kstats in the unmount code. If they are still around
1816 * somethings wrong
1817 */
1818 ASSERT(mi->mi_io_kstats == NULL);
1819 nfs_free_mi(mi);
1820 }
1821