1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 */
26
27 /*
28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
29 * All Rights Reserved
30 */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/vnode.h>
39 #include <sys/pathname.h>
40 #include <sys/sysmacros.h>
41 #include <sys/kmem.h>
42 #include <sys/mkdev.h>
43 #include <sys/mount.h>
44 #include <sys/statvfs.h>
45 #include <sys/errno.h>
46 #include <sys/debug.h>
47 #include <sys/cmn_err.h>
48 #include <sys/utsname.h>
49 #include <sys/bootconf.h>
50 #include <sys/modctl.h>
51 #include <sys/acl.h>
52 #include <sys/flock.h>
53 #include <sys/time.h>
54 #include <sys/disp.h>
55 #include <sys/policy.h>
56 #include <sys/socket.h>
57 #include <sys/netconfig.h>
58 #include <sys/dnlc.h>
59 #include <sys/list.h>
60 #include <sys/mntent.h>
61 #include <sys/tsol/label.h>
62
63 #include <rpc/types.h>
64 #include <rpc/auth.h>
65 #include <rpc/rpcsec_gss.h>
66 #include <rpc/clnt.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/nfs_clnt.h>
70 #include <nfs/mount.h>
71 #include <nfs/nfs_acl.h>
72
73 #include <fs/fs_subr.h>
74
75 #include <nfs/nfs4.h>
76 #include <nfs/rnode4.h>
77 #include <nfs/nfs4_clnt.h>
78 #include <sys/fs/autofs.h>
79
80 #include <sys/sdt.h>
81
82
83 /*
84 * Arguments passed to thread to free data structures from forced unmount.
85 */
86
87 typedef struct {
88 vfs_t *fm_vfsp;
89 int fm_flag;
90 cred_t *fm_cr;
91 } freemountargs_t;
92
93 static void async_free_mount(vfs_t *, int, cred_t *);
94 static void nfs4_free_mount(vfs_t *, int, cred_t *);
95 static void nfs4_free_mount_thread(freemountargs_t *);
96 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *);
97
98 /*
99 * From rpcsec module (common/rpcsec).
100 */
101 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
102 extern void sec_clnt_freeinfo(struct sec_data *);
103
104 /*
105 * The order and contents of this structure must be kept in sync with that of
106 * rfsreqcnt_v4_tmpl in nfs_stats.c
107 */
108 static char *rfsnames_v4[] = {
109 "null", "compound", "reserved", "access", "close", "commit", "create",
110 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock",
111 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr",
112 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh",
113 "read", "readdir", "readlink", "remove", "rename", "renew",
114 "restorefh", "savefh", "secinfo", "setattr", "setclientid",
115 "setclientid_confirm", "verify", "write"
116 };
117
118 /*
119 * nfs4_max_mount_retry is the number of times the client will redrive
120 * a mount compound before giving up and returning failure. The intent
121 * is to redrive mount compounds which fail NFS4ERR_STALE so that
122 * if a component of the server path being mounted goes stale, it can
123 * "recover" by redriving the mount compund (LOOKUP ops). This recovery
124 * code is needed outside of the recovery framework because mount is a
125 * special case. The client doesn't create vnodes/rnodes for components
126 * of the server path being mounted. The recovery code recovers real
127 * client objects, not STALE FHs which map to components of the server
128 * path being mounted.
129 *
130 * We could just fail the mount on the first time, but that would
131 * instantly trigger failover (from nfs4_mount), and the client should
132 * try to re-lookup the STALE FH before doing failover. The easiest
133 * way to "re-lookup" is to simply redrive the mount compound.
134 */
135 static int nfs4_max_mount_retry = 2;
136
137 /*
138 * nfs4 vfs operations.
139 */
140 int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
141 static int nfs4_unmount(vfs_t *, int, cred_t *);
142 static int nfs4_root(vfs_t *, vnode_t **);
143 static int nfs4_statvfs(vfs_t *, struct statvfs64 *);
144 static int nfs4_sync(vfs_t *, short, cred_t *);
145 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *);
146 static int nfs4_mountroot(vfs_t *, whymountroot_t);
147 static void nfs4_freevfs(vfs_t *);
148
149 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *,
150 int, cred_t *, zone_t *);
151
152 vfsops_t *nfs4_vfsops;
153
154 int nfs4_vfsinit(void);
155 void nfs4_vfsfini(void);
156 static void nfs4setclientid_init(void);
157 static void nfs4setclientid_fini(void);
158 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *,
159 struct nfs4_server *, nfs4_error_t *, int *);
160 static void destroy_nfs4_server(nfs4_server_t *);
161 static void remove_mi(nfs4_server_t *, mntinfo4_t *);
162
163 extern void nfs4_ephemeral_init(void);
164 extern void nfs4_ephemeral_fini(void);
165
166 /* referral related routines */
167 static servinfo4_t *copy_svp(servinfo4_t *);
168 static void free_knconf_contents(struct knetconfig *k);
169 static char *extract_referral_point(const char *, int);
170 static void setup_newsvpath(servinfo4_t *, int);
171 static void update_servinfo4(servinfo4_t *, fs_location4 *,
172 struct nfs_fsl_info *, char *, int);
173
174 /*
175 * Initialize the vfs structure
176 */
177
178 static int nfs4fstyp;
179
180
181 /*
182 * Debug variable to check for rdma based
183 * transport startup and cleanup. Controlled
184 * through /etc/system. Off by default.
185 */
186 extern int rdma_debug;
187
188 int
nfs4init(int fstyp,char * name)189 nfs4init(int fstyp, char *name)
190 {
191 static const fs_operation_def_t nfs4_vfsops_template[] = {
192 VFSNAME_MOUNT, { .vfs_mount = nfs4_mount },
193 VFSNAME_UNMOUNT, { .vfs_unmount = nfs4_unmount },
194 VFSNAME_ROOT, { .vfs_root = nfs4_root },
195 VFSNAME_STATVFS, { .vfs_statvfs = nfs4_statvfs },
196 VFSNAME_SYNC, { .vfs_sync = nfs4_sync },
197 VFSNAME_VGET, { .vfs_vget = nfs4_vget },
198 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs4_mountroot },
199 VFSNAME_FREEVFS, { .vfs_freevfs = nfs4_freevfs },
200 NULL, NULL
201 };
202 int error;
203
204 nfs4_vfsops = NULL;
205 nfs4_vnodeops = NULL;
206 nfs4_trigger_vnodeops = NULL;
207
208 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops);
209 if (error != 0) {
210 zcmn_err(GLOBAL_ZONEID, CE_WARN,
211 "nfs4init: bad vfs ops template");
212 goto out;
213 }
214
215 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops);
216 if (error != 0) {
217 zcmn_err(GLOBAL_ZONEID, CE_WARN,
218 "nfs4init: bad vnode ops template");
219 goto out;
220 }
221
222 error = vn_make_ops("nfs4_trigger", nfs4_trigger_vnodeops_template,
223 &nfs4_trigger_vnodeops);
224 if (error != 0) {
225 zcmn_err(GLOBAL_ZONEID, CE_WARN,
226 "nfs4init: bad trigger vnode ops template");
227 goto out;
228 }
229
230 nfs4fstyp = fstyp;
231 (void) nfs4_vfsinit();
232 (void) nfs4_init_dot_entries();
233
234 out:
235 if (error) {
236 if (nfs4_trigger_vnodeops != NULL)
237 vn_freevnodeops(nfs4_trigger_vnodeops);
238
239 if (nfs4_vnodeops != NULL)
240 vn_freevnodeops(nfs4_vnodeops);
241
242 (void) vfs_freevfsops_by_type(fstyp);
243 }
244
245 return (error);
246 }
247
248 void
nfs4fini(void)249 nfs4fini(void)
250 {
251 (void) nfs4_destroy_dot_entries();
252 nfs4_vfsfini();
253 }
254
255 /*
256 * Create a new sec_data structure to store AUTH_DH related data:
257 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC
258 * flag set for NFS V4 since we are avoiding to contact the rpcbind
259 * daemon and is using the IP time service (IPPORT_TIMESERVER).
260 *
261 * sec_data can be freed by sec_clnt_freeinfo().
262 */
263 static struct sec_data *
create_authdh_data(char * netname,int nlen,struct netbuf * syncaddr,struct knetconfig * knconf)264 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr,
265 struct knetconfig *knconf)
266 {
267 struct sec_data *secdata;
268 dh_k4_clntdata_t *data;
269 char *pf, *p;
270
271 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0)
272 return (NULL);
273
274 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
275 secdata->flags = 0;
276
277 data = kmem_alloc(sizeof (*data), KM_SLEEP);
278
279 data->syncaddr.maxlen = syncaddr->maxlen;
280 data->syncaddr.len = syncaddr->len;
281 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP);
282 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len);
283
284 /*
285 * duplicate the knconf information for the
286 * new opaque data.
287 */
288 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
289 *data->knconf = *knconf;
290 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
291 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
292 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
293 bcopy(knconf->knc_proto, p, KNC_STRSIZE);
294 data->knconf->knc_protofmly = pf;
295 data->knconf->knc_proto = p;
296
297 /* move server netname to the sec_data structure */
298 data->netname = kmem_alloc(nlen, KM_SLEEP);
299 bcopy(netname, data->netname, nlen);
300 data->netnamelen = (int)nlen;
301
302 secdata->secmod = AUTH_DH;
303 secdata->rpcflavor = AUTH_DH;
304 secdata->data = (caddr_t)data;
305
306 return (secdata);
307 }
308
309 /*
310 * Returns (deep) copy of sec_data_t. Allocates all memory required; caller
311 * is responsible for freeing.
312 */
313 sec_data_t *
copy_sec_data(sec_data_t * fsecdata)314 copy_sec_data(sec_data_t *fsecdata)
315 {
316 sec_data_t *tsecdata;
317
318 if (fsecdata == NULL)
319 return (NULL);
320
321 if (fsecdata->rpcflavor == AUTH_DH) {
322 dh_k4_clntdata_t *fdata = (dh_k4_clntdata_t *)fsecdata->data;
323
324 if (fdata == NULL)
325 return (NULL);
326
327 tsecdata = (sec_data_t *)create_authdh_data(fdata->netname,
328 fdata->netnamelen, &fdata->syncaddr, fdata->knconf);
329
330 return (tsecdata);
331 }
332
333 tsecdata = kmem_zalloc(sizeof (sec_data_t), KM_SLEEP);
334
335 tsecdata->secmod = fsecdata->secmod;
336 tsecdata->rpcflavor = fsecdata->rpcflavor;
337 tsecdata->flags = fsecdata->flags;
338 tsecdata->uid = fsecdata->uid;
339
340 if (fsecdata->rpcflavor == RPCSEC_GSS) {
341 gss_clntdata_t *gcd = (gss_clntdata_t *)fsecdata->data;
342
343 tsecdata->data = (caddr_t)copy_sec_data_gss(gcd);
344 } else {
345 tsecdata->data = NULL;
346 }
347
348 return (tsecdata);
349 }
350
351 gss_clntdata_t *
copy_sec_data_gss(gss_clntdata_t * fdata)352 copy_sec_data_gss(gss_clntdata_t *fdata)
353 {
354 gss_clntdata_t *tdata;
355
356 if (fdata == NULL)
357 return (NULL);
358
359 tdata = kmem_zalloc(sizeof (gss_clntdata_t), KM_SLEEP);
360
361 tdata->mechanism.length = fdata->mechanism.length;
362 tdata->mechanism.elements = kmem_zalloc(fdata->mechanism.length,
363 KM_SLEEP);
364 bcopy(fdata->mechanism.elements, tdata->mechanism.elements,
365 fdata->mechanism.length);
366
367 tdata->service = fdata->service;
368
369 (void) strcpy(tdata->uname, fdata->uname);
370 (void) strcpy(tdata->inst, fdata->inst);
371 (void) strcpy(tdata->realm, fdata->realm);
372
373 tdata->qop = fdata->qop;
374
375 return (tdata);
376 }
377
378 static int
nfs4_chkdup_servinfo4(servinfo4_t * svp_head,servinfo4_t * svp)379 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp)
380 {
381 servinfo4_t *si;
382
383 /*
384 * Iterate over the servinfo4 list to make sure
385 * we do not have a duplicate. Skip any servinfo4
386 * that has been marked "NOT IN USE"
387 */
388 for (si = svp_head; si; si = si->sv_next) {
389 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0);
390 if (si->sv_flags & SV4_NOTINUSE) {
391 nfs_rw_exit(&si->sv_lock);
392 continue;
393 }
394 nfs_rw_exit(&si->sv_lock);
395 if (si == svp)
396 continue;
397 if (si->sv_addr.len == svp->sv_addr.len &&
398 strcmp(si->sv_knconf->knc_protofmly,
399 svp->sv_knconf->knc_protofmly) == 0 &&
400 bcmp(si->sv_addr.buf, svp->sv_addr.buf,
401 si->sv_addr.len) == 0) {
402 /* it's a duplicate */
403 return (1);
404 }
405 }
406 /* it's not a duplicate */
407 return (0);
408 }
409
410 void
nfs4_free_args(struct nfs_args * nargs)411 nfs4_free_args(struct nfs_args *nargs)
412 {
413 if (nargs->knconf) {
414 if (nargs->knconf->knc_protofmly)
415 kmem_free(nargs->knconf->knc_protofmly,
416 KNC_STRSIZE);
417 if (nargs->knconf->knc_proto)
418 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE);
419 kmem_free(nargs->knconf, sizeof (*nargs->knconf));
420 nargs->knconf = NULL;
421 }
422
423 if (nargs->fh) {
424 kmem_free(nargs->fh, strlen(nargs->fh) + 1);
425 nargs->fh = NULL;
426 }
427
428 if (nargs->hostname) {
429 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1);
430 nargs->hostname = NULL;
431 }
432
433 if (nargs->addr) {
434 if (nargs->addr->buf) {
435 ASSERT(nargs->addr->len);
436 kmem_free(nargs->addr->buf, nargs->addr->len);
437 }
438 kmem_free(nargs->addr, sizeof (struct netbuf));
439 nargs->addr = NULL;
440 }
441
442 if (nargs->syncaddr) {
443 ASSERT(nargs->syncaddr->len);
444 if (nargs->syncaddr->buf) {
445 ASSERT(nargs->syncaddr->len);
446 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len);
447 }
448 kmem_free(nargs->syncaddr, sizeof (struct netbuf));
449 nargs->syncaddr = NULL;
450 }
451
452 if (nargs->netname) {
453 kmem_free(nargs->netname, strlen(nargs->netname) + 1);
454 nargs->netname = NULL;
455 }
456
457 if (nargs->nfs_ext_u.nfs_extA.secdata) {
458 sec_clnt_freeinfo(
459 nargs->nfs_ext_u.nfs_extA.secdata);
460 nargs->nfs_ext_u.nfs_extA.secdata = NULL;
461 }
462 }
463
464
465 int
nfs4_copyin(char * data,int datalen,struct nfs_args * nargs)466 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs)
467 {
468
469 int error;
470 size_t hlen; /* length of hostname */
471 size_t nlen; /* length of netname */
472 char netname[MAXNETNAMELEN+1]; /* server's netname */
473 struct netbuf addr; /* server's address */
474 struct netbuf syncaddr; /* AUTH_DES time sync addr */
475 struct knetconfig *knconf; /* transport structure */
476 struct sec_data *secdata = NULL; /* security data */
477 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */
478 STRUCT_DECL(knetconfig, knconf_tmp);
479 STRUCT_DECL(netbuf, addr_tmp);
480 int flags;
481 char *p, *pf;
482 struct pathname pn;
483 char *userbufptr;
484
485
486 bzero(nargs, sizeof (*nargs));
487
488 STRUCT_INIT(args, get_udatamodel());
489 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
490 if (copyin(data, STRUCT_BUF(args), MIN(datalen,
491 STRUCT_SIZE(args))))
492 return (EFAULT);
493
494 nargs->wsize = STRUCT_FGET(args, wsize);
495 nargs->rsize = STRUCT_FGET(args, rsize);
496 nargs->timeo = STRUCT_FGET(args, timeo);
497 nargs->retrans = STRUCT_FGET(args, retrans);
498 nargs->acregmin = STRUCT_FGET(args, acregmin);
499 nargs->acregmax = STRUCT_FGET(args, acregmax);
500 nargs->acdirmin = STRUCT_FGET(args, acdirmin);
501 nargs->acdirmax = STRUCT_FGET(args, acdirmax);
502
503 flags = STRUCT_FGET(args, flags);
504 nargs->flags = flags;
505
506 addr.buf = NULL;
507 syncaddr.buf = NULL;
508
509
510 /*
511 * Allocate space for a knetconfig structure and
512 * its strings and copy in from user-land.
513 */
514 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
515 STRUCT_INIT(knconf_tmp, get_udatamodel());
516 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
517 STRUCT_SIZE(knconf_tmp))) {
518 kmem_free(knconf, sizeof (*knconf));
519 return (EFAULT);
520 }
521
522 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
523 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
524 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
525 if (get_udatamodel() != DATAMODEL_LP64) {
526 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
527 } else {
528 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
529 }
530
531 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
532 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
533 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
534 if (error) {
535 kmem_free(pf, KNC_STRSIZE);
536 kmem_free(p, KNC_STRSIZE);
537 kmem_free(knconf, sizeof (*knconf));
538 return (error);
539 }
540
541 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
542 if (error) {
543 kmem_free(pf, KNC_STRSIZE);
544 kmem_free(p, KNC_STRSIZE);
545 kmem_free(knconf, sizeof (*knconf));
546 return (error);
547 }
548
549
550 knconf->knc_protofmly = pf;
551 knconf->knc_proto = p;
552
553 nargs->knconf = knconf;
554
555 /*
556 * Get server address
557 */
558 STRUCT_INIT(addr_tmp, get_udatamodel());
559 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
560 STRUCT_SIZE(addr_tmp))) {
561 error = EFAULT;
562 goto errout;
563 }
564
565 nargs->addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
566 userbufptr = STRUCT_FGETP(addr_tmp, buf);
567 addr.len = STRUCT_FGET(addr_tmp, len);
568 addr.buf = kmem_alloc(addr.len, KM_SLEEP);
569 addr.maxlen = addr.len;
570 if (copyin(userbufptr, addr.buf, addr.len)) {
571 kmem_free(addr.buf, addr.len);
572 error = EFAULT;
573 goto errout;
574 }
575 bcopy(&addr, nargs->addr, sizeof (struct netbuf));
576
577 /*
578 * Get the root fhandle
579 */
580 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn);
581 if (error)
582 goto errout;
583
584 /* Volatile fh: keep server paths, so use actual-size strings */
585 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP);
586 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen);
587 nargs->fh[pn.pn_pathlen] = '\0';
588 pn_free(&pn);
589
590
591 /*
592 * Get server's hostname
593 */
594 if (flags & NFSMNT_HOSTNAME) {
595 error = copyinstr(STRUCT_FGETP(args, hostname),
596 netname, sizeof (netname), &hlen);
597 if (error)
598 goto errout;
599 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP);
600 (void) strcpy(nargs->hostname, netname);
601
602 } else {
603 nargs->hostname = NULL;
604 }
605
606
607 /*
608 * If there are syncaddr and netname data, load them in. This is
609 * to support data needed for NFSV4 when AUTH_DH is the negotiated
610 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
611 */
612 netname[0] = '\0';
613 if (flags & NFSMNT_SECURE) {
614
615 /* get syncaddr */
616 STRUCT_INIT(addr_tmp, get_udatamodel());
617 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp),
618 STRUCT_SIZE(addr_tmp))) {
619 error = EINVAL;
620 goto errout;
621 }
622 userbufptr = STRUCT_FGETP(addr_tmp, buf);
623 syncaddr.len = STRUCT_FGET(addr_tmp, len);
624 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP);
625 syncaddr.maxlen = syncaddr.len;
626 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) {
627 kmem_free(syncaddr.buf, syncaddr.len);
628 error = EFAULT;
629 goto errout;
630 }
631
632 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
633 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf));
634
635 /* get server's netname */
636 if (copyinstr(STRUCT_FGETP(args, netname), netname,
637 sizeof (netname), &nlen)) {
638 error = EFAULT;
639 goto errout;
640 }
641
642 netname[nlen] = '\0';
643 nargs->netname = kmem_zalloc(nlen, KM_SLEEP);
644 (void) strcpy(nargs->netname, netname);
645 }
646
647 /*
648 * Get the extention data which has the security data structure.
649 * This includes data for AUTH_SYS as well.
650 */
651 if (flags & NFSMNT_NEWARGS) {
652 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext);
653 if (nargs->nfs_args_ext == NFS_ARGS_EXTA ||
654 nargs->nfs_args_ext == NFS_ARGS_EXTB) {
655 /*
656 * Indicating the application is using the new
657 * sec_data structure to pass in the security
658 * data.
659 */
660 if (STRUCT_FGETP(args,
661 nfs_ext_u.nfs_extA.secdata) != NULL) {
662 error = sec_clnt_loadinfo(
663 (struct sec_data *)STRUCT_FGETP(args,
664 nfs_ext_u.nfs_extA.secdata),
665 &secdata, get_udatamodel());
666 }
667 nargs->nfs_ext_u.nfs_extA.secdata = secdata;
668 }
669 }
670
671 if (error)
672 goto errout;
673
674 /*
675 * Failover support:
676 *
677 * We may have a linked list of nfs_args structures,
678 * which means the user is looking for failover. If
679 * the mount is either not "read-only" or "soft",
680 * we want to bail out with EINVAL.
681 */
682 if (nargs->nfs_args_ext == NFS_ARGS_EXTB)
683 nargs->nfs_ext_u.nfs_extB.next =
684 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next);
685
686 errout:
687 if (error)
688 nfs4_free_args(nargs);
689
690 return (error);
691 }
692
693
694 /*
695 * nfs mount vfsop
696 * Set up mount info record and attach it to vfs struct.
697 */
698 int
nfs4_mount(vfs_t * vfsp,vnode_t * mvp,struct mounta * uap,cred_t * cr)699 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
700 {
701 char *data = uap->dataptr;
702 int error;
703 vnode_t *rtvp; /* the server's root */
704 mntinfo4_t *mi; /* mount info, pointed at by vfs */
705 struct knetconfig *rdma_knconf; /* rdma transport structure */
706 rnode4_t *rp;
707 struct servinfo4 *svp; /* nfs server info */
708 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */
709 struct servinfo4 *svp_head; /* first nfs server info */
710 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */
711 struct sec_data *secdata; /* security data */
712 struct nfs_args *args = NULL;
713 int flags, addr_type, removed;
714 zone_t *zone = nfs_zone();
715 nfs4_error_t n4e;
716 zone_t *mntzone = NULL;
717
718 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
719 return (EPERM);
720 if (mvp->v_type != VDIR)
721 return (ENOTDIR);
722
723 /*
724 * get arguments
725 *
726 * nfs_args is now versioned and is extensible, so
727 * uap->datalen might be different from sizeof (args)
728 * in a compatible situation.
729 */
730 more:
731 if (!(uap->flags & MS_SYSSPACE)) {
732 if (args == NULL)
733 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
734 else
735 nfs4_free_args(args);
736 error = nfs4_copyin(data, uap->datalen, args);
737 if (error) {
738 if (args) {
739 kmem_free(args, sizeof (*args));
740 }
741 return (error);
742 }
743 } else {
744 args = (struct nfs_args *)data;
745 }
746
747 flags = args->flags;
748
749 /*
750 * If the request changes the locking type, disallow the remount,
751 * because it's questionable whether we can transfer the
752 * locking state correctly.
753 */
754 if (uap->flags & MS_REMOUNT) {
755 if (!(uap->flags & MS_SYSSPACE)) {
756 nfs4_free_args(args);
757 kmem_free(args, sizeof (*args));
758 }
759 if ((mi = VFTOMI4(vfsp)) != NULL) {
760 uint_t new_mi_llock;
761 uint_t old_mi_llock;
762 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
763 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0;
764 if (old_mi_llock != new_mi_llock)
765 return (EBUSY);
766 }
767 return (0);
768 }
769
770 /*
771 * For ephemeral mount trigger stub vnodes, we have two problems
772 * to solve: racing threads will likely fail the v_count check, and
773 * we want only one to proceed with the mount.
774 *
775 * For stubs, if the mount has already occurred (via a racing thread),
776 * just return success. If not, skip the v_count check and proceed.
777 * Note that we are already serialised at this point.
778 */
779 mutex_enter(&mvp->v_lock);
780 if (vn_matchops(mvp, nfs4_trigger_vnodeops)) {
781 /* mntpt is a v4 stub vnode */
782 ASSERT(RP_ISSTUB(VTOR4(mvp)));
783 ASSERT(!(uap->flags & MS_OVERLAY));
784 ASSERT(!(mvp->v_flag & VROOT));
785 if (vn_mountedvfs(mvp) != NULL) {
786 /* ephemeral mount has already occurred */
787 ASSERT(uap->flags & MS_SYSSPACE);
788 mutex_exit(&mvp->v_lock);
789 return (0);
790 }
791 } else {
792 /* mntpt is a non-v4 or v4 non-stub vnode */
793 if (!(uap->flags & MS_OVERLAY) &&
794 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
795 mutex_exit(&mvp->v_lock);
796 if (!(uap->flags & MS_SYSSPACE)) {
797 nfs4_free_args(args);
798 kmem_free(args, sizeof (*args));
799 }
800 return (EBUSY);
801 }
802 }
803 mutex_exit(&mvp->v_lock);
804
805 /* make sure things are zeroed for errout: */
806 rtvp = NULL;
807 mi = NULL;
808 secdata = NULL;
809
810 /*
811 * A valid knetconfig structure is required.
812 */
813 if (!(flags & NFSMNT_KNCONF) ||
814 args->knconf == NULL || args->knconf->knc_protofmly == NULL ||
815 args->knconf->knc_proto == NULL ||
816 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) {
817 if (!(uap->flags & MS_SYSSPACE)) {
818 nfs4_free_args(args);
819 kmem_free(args, sizeof (*args));
820 }
821 return (EINVAL);
822 }
823
824 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) ||
825 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) {
826 if (!(uap->flags & MS_SYSSPACE)) {
827 nfs4_free_args(args);
828 kmem_free(args, sizeof (*args));
829 }
830 return (EINVAL);
831 }
832
833 /*
834 * Allocate a servinfo4 struct.
835 */
836 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
837 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
838 if (svp_tail) {
839 svp_2ndlast = svp_tail;
840 svp_tail->sv_next = svp;
841 } else {
842 svp_head = svp;
843 svp_2ndlast = svp;
844 }
845
846 svp_tail = svp;
847 svp->sv_knconf = args->knconf;
848 args->knconf = NULL;
849
850 /*
851 * Get server address
852 */
853 if (args->addr == NULL || args->addr->buf == NULL) {
854 error = EINVAL;
855 goto errout;
856 }
857
858 svp->sv_addr.maxlen = args->addr->maxlen;
859 svp->sv_addr.len = args->addr->len;
860 svp->sv_addr.buf = args->addr->buf;
861 args->addr->buf = NULL;
862
863 /*
864 * Get the root fhandle
865 */
866 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) {
867 error = EINVAL;
868 goto errout;
869 }
870
871 svp->sv_path = args->fh;
872 svp->sv_pathlen = strlen(args->fh) + 1;
873 args->fh = NULL;
874
875 /*
876 * Get server's hostname
877 */
878 if (flags & NFSMNT_HOSTNAME) {
879 if (args->hostname == NULL || (strlen(args->hostname) >
880 MAXNETNAMELEN)) {
881 error = EINVAL;
882 goto errout;
883 }
884 svp->sv_hostnamelen = strlen(args->hostname) + 1;
885 svp->sv_hostname = args->hostname;
886 args->hostname = NULL;
887 } else {
888 char *p = "unknown-host";
889 svp->sv_hostnamelen = strlen(p) + 1;
890 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP);
891 (void) strcpy(svp->sv_hostname, p);
892 }
893
894 /*
895 * RDMA MOUNT SUPPORT FOR NFS v4.
896 * Establish, is it possible to use RDMA, if so overload the
897 * knconf with rdma specific knconf and free the orignal knconf.
898 */
899 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
900 /*
901 * Determine the addr type for RDMA, IPv4 or v6.
902 */
903 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
904 addr_type = AF_INET;
905 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
906 addr_type = AF_INET6;
907
908 if (rdma_reachable(addr_type, &svp->sv_addr,
909 &rdma_knconf) == 0) {
910 /*
911 * If successful, hijack the orignal knconf and
912 * replace with the new one, depending on the flags.
913 */
914 svp->sv_origknconf = svp->sv_knconf;
915 svp->sv_knconf = rdma_knconf;
916 } else {
917 if (flags & NFSMNT_TRYRDMA) {
918 #ifdef DEBUG
919 if (rdma_debug)
920 zcmn_err(getzoneid(), CE_WARN,
921 "no RDMA onboard, revert\n");
922 #endif
923 }
924
925 if (flags & NFSMNT_DORDMA) {
926 /*
927 * If proto=rdma is specified and no RDMA
928 * path to this server is avialable then
929 * ditch this server.
930 * This is not included in the mountable
931 * server list or the replica list.
932 * Check if more servers are specified;
933 * Failover case, otherwise bail out of mount.
934 */
935 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
936 args->nfs_ext_u.nfs_extB.next != NULL) {
937 data = (char *)
938 args->nfs_ext_u.nfs_extB.next;
939 if (uap->flags & MS_RDONLY &&
940 !(flags & NFSMNT_SOFT)) {
941 if (svp_head->sv_next == NULL) {
942 svp_tail = NULL;
943 svp_2ndlast = NULL;
944 sv4_free(svp_head);
945 goto more;
946 } else {
947 svp_tail = svp_2ndlast;
948 svp_2ndlast->sv_next =
949 NULL;
950 sv4_free(svp);
951 goto more;
952 }
953 }
954 } else {
955 /*
956 * This is the last server specified
957 * in the nfs_args list passed down
958 * and its not rdma capable.
959 */
960 if (svp_head->sv_next == NULL) {
961 /*
962 * Is this the only one
963 */
964 error = EINVAL;
965 #ifdef DEBUG
966 if (rdma_debug)
967 zcmn_err(getzoneid(),
968 CE_WARN,
969 "No RDMA srv");
970 #endif
971 goto errout;
972 } else {
973 /*
974 * There is list, since some
975 * servers specified before
976 * this passed all requirements
977 */
978 svp_tail = svp_2ndlast;
979 svp_2ndlast->sv_next = NULL;
980 sv4_free(svp);
981 goto proceed;
982 }
983 }
984 }
985 }
986 }
987
988 /*
989 * If there are syncaddr and netname data, load them in. This is
990 * to support data needed for NFSV4 when AUTH_DH is the negotiated
991 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
992 */
993 if (args->flags & NFSMNT_SECURE) {
994 svp->sv_dhsec = create_authdh_data(args->netname,
995 strlen(args->netname),
996 args->syncaddr, svp->sv_knconf);
997 }
998
999 /*
1000 * Get the extention data which has the security data structure.
1001 * This includes data for AUTH_SYS as well.
1002 */
1003 if (flags & NFSMNT_NEWARGS) {
1004 switch (args->nfs_args_ext) {
1005 case NFS_ARGS_EXTA:
1006 case NFS_ARGS_EXTB:
1007 /*
1008 * Indicating the application is using the new
1009 * sec_data structure to pass in the security
1010 * data.
1011 */
1012 secdata = args->nfs_ext_u.nfs_extA.secdata;
1013 if (secdata == NULL) {
1014 error = EINVAL;
1015 } else if (uap->flags & MS_SYSSPACE) {
1016 /*
1017 * Need to validate the flavor here if
1018 * sysspace, userspace was already
1019 * validate from the nfs_copyin function.
1020 */
1021 switch (secdata->rpcflavor) {
1022 case AUTH_NONE:
1023 case AUTH_UNIX:
1024 case AUTH_LOOPBACK:
1025 case AUTH_DES:
1026 case RPCSEC_GSS:
1027 break;
1028 default:
1029 error = EINVAL;
1030 goto errout;
1031 }
1032 }
1033 args->nfs_ext_u.nfs_extA.secdata = NULL;
1034 break;
1035
1036 default:
1037 error = EINVAL;
1038 break;
1039 }
1040
1041 } else if (flags & NFSMNT_SECURE) {
1042 /*
1043 * NFSMNT_SECURE is deprecated but we keep it
1044 * to support the rogue user-generated application
1045 * that may use this undocumented interface to do
1046 * AUTH_DH security, e.g. our own rexd.
1047 *
1048 * Also note that NFSMNT_SECURE is used for passing
1049 * AUTH_DH info to be used in negotiation.
1050 */
1051 secdata = create_authdh_data(args->netname,
1052 strlen(args->netname), args->syncaddr, svp->sv_knconf);
1053
1054 } else {
1055 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
1056 secdata->secmod = secdata->rpcflavor = AUTH_SYS;
1057 secdata->data = NULL;
1058 }
1059
1060 svp->sv_secdata = secdata;
1061
1062 /*
1063 * User does not explictly specify a flavor, and a user
1064 * defined default flavor is passed down.
1065 */
1066 if (flags & NFSMNT_SECDEFAULT) {
1067 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1068 svp->sv_flags |= SV4_TRYSECDEFAULT;
1069 nfs_rw_exit(&svp->sv_lock);
1070 }
1071
1072 /*
1073 * Failover support:
1074 *
1075 * We may have a linked list of nfs_args structures,
1076 * which means the user is looking for failover. If
1077 * the mount is either not "read-only" or "soft",
1078 * we want to bail out with EINVAL.
1079 */
1080 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
1081 args->nfs_ext_u.nfs_extB.next != NULL) {
1082 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
1083 data = (char *)args->nfs_ext_u.nfs_extB.next;
1084 goto more;
1085 }
1086 error = EINVAL;
1087 goto errout;
1088 }
1089
1090 /*
1091 * Determine the zone we're being mounted into.
1092 */
1093 zone_hold(mntzone = zone); /* start with this assumption */
1094 if (getzoneid() == GLOBAL_ZONEID) {
1095 zone_rele(mntzone);
1096 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
1097 ASSERT(mntzone != NULL);
1098 if (mntzone != zone) {
1099 error = EBUSY;
1100 goto errout;
1101 }
1102 }
1103
1104 if (is_system_labeled()) {
1105 error = nfs_mount_label_policy(vfsp, &svp->sv_addr,
1106 svp->sv_knconf, cr);
1107
1108 if (error > 0)
1109 goto errout;
1110
1111 if (error == -1) {
1112 /* change mount to read-only to prevent write-down */
1113 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1114 }
1115 }
1116
1117 /*
1118 * Stop the mount from going any further if the zone is going away.
1119 */
1120 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
1121 error = EBUSY;
1122 goto errout;
1123 }
1124
1125 /*
1126 * Get root vnode.
1127 */
1128 proceed:
1129 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone);
1130 if (error) {
1131 /* if nfs4rootvp failed, it will free svp_head */
1132 svp_head = NULL;
1133 goto errout;
1134 }
1135
1136 mi = VTOMI4(rtvp);
1137
1138 /*
1139 * Send client id to the server, if necessary
1140 */
1141 nfs4_error_zinit(&n4e);
1142 nfs4setclientid(mi, cr, FALSE, &n4e);
1143
1144 error = n4e.error;
1145
1146 if (error)
1147 goto errout;
1148
1149 /*
1150 * Set option fields in the mount info record
1151 */
1152
1153 if (svp_head->sv_next) {
1154 mutex_enter(&mi->mi_lock);
1155 mi->mi_flags |= MI4_LLOCK;
1156 mutex_exit(&mi->mi_lock);
1157 }
1158 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args);
1159 if (error)
1160 goto errout;
1161
1162 /*
1163 * Time to tie in the mirror mount info at last!
1164 */
1165 if (flags & NFSMNT_EPHEMERAL)
1166 error = nfs4_record_ephemeral_mount(mi, mvp);
1167
1168 errout:
1169 if (error) {
1170 if (rtvp != NULL) {
1171 rp = VTOR4(rtvp);
1172 if (rp->r_flags & R4HASHED)
1173 rp4_rmhash(rp);
1174 }
1175 if (mi != NULL) {
1176 nfs4_async_stop(vfsp);
1177 nfs4_async_manager_stop(vfsp);
1178 nfs4_remove_mi_from_server(mi, NULL);
1179 if (rtvp != NULL)
1180 VN_RELE(rtvp);
1181 if (mntzone != NULL)
1182 zone_rele(mntzone);
1183 /* need to remove it from the zone */
1184 removed = nfs4_mi_zonelist_remove(mi);
1185 if (removed)
1186 zone_rele_ref(&mi->mi_zone_ref,
1187 ZONE_REF_NFSV4);
1188 MI4_RELE(mi);
1189 if (!(uap->flags & MS_SYSSPACE) && args) {
1190 nfs4_free_args(args);
1191 kmem_free(args, sizeof (*args));
1192 }
1193 return (error);
1194 }
1195 if (svp_head)
1196 sv4_free(svp_head);
1197 }
1198
1199 if (!(uap->flags & MS_SYSSPACE) && args) {
1200 nfs4_free_args(args);
1201 kmem_free(args, sizeof (*args));
1202 }
1203 if (rtvp != NULL)
1204 VN_RELE(rtvp);
1205
1206 if (mntzone != NULL)
1207 zone_rele(mntzone);
1208
1209 return (error);
1210 }
1211
1212 #ifdef DEBUG
1213 #define VERS_MSG "NFS4 server "
1214 #else
1215 #define VERS_MSG "NFS server "
1216 #endif
1217
1218 #define READ_MSG \
1219 VERS_MSG "%s returned 0 for read transfer size"
1220 #define WRITE_MSG \
1221 VERS_MSG "%s returned 0 for write transfer size"
1222 #define SIZE_MSG \
1223 VERS_MSG "%s returned 0 for maximum file size"
1224
1225 /*
1226 * Get the symbolic link text from the server for a given filehandle
1227 * of that symlink.
1228 *
1229 * (get symlink text) PUTFH READLINK
1230 */
1231 static int
getlinktext_otw(mntinfo4_t * mi,nfs_fh4 * fh,char ** linktextp,cred_t * cr,int flags)1232 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr,
1233 int flags)
1234 {
1235 COMPOUND4args_clnt args;
1236 COMPOUND4res_clnt res;
1237 int doqueue;
1238 nfs_argop4 argop[2];
1239 nfs_resop4 *resop;
1240 READLINK4res *lr_res;
1241 uint_t len;
1242 bool_t needrecov = FALSE;
1243 nfs4_recov_state_t recov_state;
1244 nfs4_sharedfh_t *sfh;
1245 nfs4_error_t e;
1246 int num_retry = nfs4_max_mount_retry;
1247 int recovery = !(flags & NFS4_GETFH_NEEDSOP);
1248
1249 sfh = sfh4_get(fh, mi);
1250 recov_state.rs_flags = 0;
1251 recov_state.rs_num_retry_despite_err = 0;
1252
1253 recov_retry:
1254 nfs4_error_zinit(&e);
1255
1256 args.array_len = 2;
1257 args.array = argop;
1258 args.ctag = TAG_GET_SYMLINK;
1259
1260 if (! recovery) {
1261 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state);
1262 if (e.error) {
1263 sfh4_rele(&sfh);
1264 return (e.error);
1265 }
1266 }
1267
1268 /* 0. putfh symlink fh */
1269 argop[0].argop = OP_CPUTFH;
1270 argop[0].nfs_argop4_u.opcputfh.sfh = sfh;
1271
1272 /* 1. readlink */
1273 argop[1].argop = OP_READLINK;
1274
1275 doqueue = 1;
1276
1277 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
1278
1279 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp);
1280
1281 if (needrecov && !recovery && num_retry-- > 0) {
1282
1283 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1284 "getlinktext_otw: initiating recovery\n"));
1285
1286 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL,
1287 OP_READLINK, NULL, NULL, NULL) == FALSE) {
1288 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1289 if (!e.error)
1290 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1291 goto recov_retry;
1292 }
1293 }
1294
1295 /*
1296 * If non-NFS4 pcol error and/or we weren't able to recover.
1297 */
1298 if (e.error != 0) {
1299 if (! recovery)
1300 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1301 sfh4_rele(&sfh);
1302 return (e.error);
1303 }
1304
1305 if (res.status) {
1306 e.error = geterrno4(res.status);
1307 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1308 if (! recovery)
1309 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1310 sfh4_rele(&sfh);
1311 return (e.error);
1312 }
1313
1314 /* res.status == NFS4_OK */
1315 ASSERT(res.status == NFS4_OK);
1316
1317 resop = &res.array[1]; /* readlink res */
1318 lr_res = &resop->nfs_resop4_u.opreadlink;
1319
1320 /* treat symlink name as data */
1321 *linktextp = utf8_to_str((utf8string *)&lr_res->link, &len, NULL);
1322
1323 if (! recovery)
1324 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1325 sfh4_rele(&sfh);
1326 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1327 return (0);
1328 }
1329
1330 /*
1331 * Skip over consecutive slashes and "/./" in a pathname.
1332 */
1333 void
pathname_skipslashdot(struct pathname * pnp)1334 pathname_skipslashdot(struct pathname *pnp)
1335 {
1336 char *c1, *c2;
1337
1338 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') {
1339
1340 c1 = pnp->pn_path + 1;
1341 c2 = pnp->pn_path + 2;
1342
1343 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) {
1344 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */
1345 pnp->pn_pathlen = pnp->pn_pathlen - 2;
1346 } else {
1347 pnp->pn_path++;
1348 pnp->pn_pathlen--;
1349 }
1350 }
1351 }
1352
1353 /*
1354 * Resolve a symbolic link path. The symlink is in the nth component of
1355 * svp->sv_path and has an nfs4 file handle "fh".
1356 * Upon return, the sv_path will point to the new path that has the nth
1357 * component resolved to its symlink text.
1358 */
1359 int
resolve_sympath(mntinfo4_t * mi,servinfo4_t * svp,int nth,nfs_fh4 * fh,cred_t * cr,int flags)1360 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh,
1361 cred_t *cr, int flags)
1362 {
1363 char *oldpath;
1364 char *symlink, *newpath;
1365 struct pathname oldpn, newpn;
1366 char component[MAXNAMELEN];
1367 int i, addlen, error = 0;
1368 int oldpathlen;
1369
1370 /* Get the symbolic link text over the wire. */
1371 error = getlinktext_otw(mi, fh, &symlink, cr, flags);
1372
1373 if (error || symlink == NULL || strlen(symlink) == 0)
1374 return (error);
1375
1376 /*
1377 * Compose the new pathname.
1378 * Note:
1379 * - only the nth component is resolved for the pathname.
1380 * - pathname.pn_pathlen does not count the ending null byte.
1381 */
1382 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1383 oldpath = svp->sv_path;
1384 oldpathlen = svp->sv_pathlen;
1385 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) {
1386 nfs_rw_exit(&svp->sv_lock);
1387 kmem_free(symlink, strlen(symlink) + 1);
1388 return (error);
1389 }
1390 nfs_rw_exit(&svp->sv_lock);
1391 pn_alloc(&newpn);
1392
1393 /*
1394 * Skip over previous components from the oldpath so that the
1395 * oldpn.pn_path will point to the symlink component. Skip
1396 * leading slashes and "/./" (no OP_LOOKUP on ".") so that
1397 * pn_getcompnent can get the component.
1398 */
1399 for (i = 1; i < nth; i++) {
1400 pathname_skipslashdot(&oldpn);
1401 error = pn_getcomponent(&oldpn, component);
1402 if (error)
1403 goto out;
1404 }
1405
1406 /*
1407 * Copy the old path upto the component right before the symlink
1408 * if the symlink is not an absolute path.
1409 */
1410 if (symlink[0] != '/') {
1411 addlen = oldpn.pn_path - oldpn.pn_buf;
1412 bcopy(oldpn.pn_buf, newpn.pn_path, addlen);
1413 newpn.pn_pathlen += addlen;
1414 newpn.pn_path += addlen;
1415 newpn.pn_buf[newpn.pn_pathlen] = '/';
1416 newpn.pn_pathlen++;
1417 newpn.pn_path++;
1418 }
1419
1420 /* copy the resolved symbolic link text */
1421 addlen = strlen(symlink);
1422 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) {
1423 error = ENAMETOOLONG;
1424 goto out;
1425 }
1426 bcopy(symlink, newpn.pn_path, addlen);
1427 newpn.pn_pathlen += addlen;
1428 newpn.pn_path += addlen;
1429
1430 /*
1431 * Check if there is any remaining path after the symlink component.
1432 * First, skip the symlink component.
1433 */
1434 pathname_skipslashdot(&oldpn);
1435 if (error = pn_getcomponent(&oldpn, component))
1436 goto out;
1437
1438 addlen = pn_pathleft(&oldpn); /* includes counting the slash */
1439
1440 /*
1441 * Copy the remaining path to the new pathname if there is any.
1442 */
1443 if (addlen > 0) {
1444 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) {
1445 error = ENAMETOOLONG;
1446 goto out;
1447 }
1448 bcopy(oldpn.pn_path, newpn.pn_path, addlen);
1449 newpn.pn_pathlen += addlen;
1450 }
1451 newpn.pn_buf[newpn.pn_pathlen] = '\0';
1452
1453 /* get the newpath and store it in the servinfo4_t */
1454 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP);
1455 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen);
1456 newpath[newpn.pn_pathlen] = '\0';
1457
1458 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1459 svp->sv_path = newpath;
1460 svp->sv_pathlen = strlen(newpath) + 1;
1461 nfs_rw_exit(&svp->sv_lock);
1462
1463 kmem_free(oldpath, oldpathlen);
1464 out:
1465 kmem_free(symlink, strlen(symlink) + 1);
1466 pn_free(&newpn);
1467 pn_free(&oldpn);
1468
1469 return (error);
1470 }
1471
1472 /*
1473 * This routine updates servinfo4 structure with the new referred server
1474 * info.
1475 * nfsfsloc has the location related information
1476 * fsp has the hostname and pathname info.
1477 * new path = pathname from referral + part of orig pathname(based on nth).
1478 */
1479 static void
update_servinfo4(servinfo4_t * svp,fs_location4 * fsp,struct nfs_fsl_info * nfsfsloc,char * orig_path,int nth)1480 update_servinfo4(servinfo4_t *svp, fs_location4 *fsp,
1481 struct nfs_fsl_info *nfsfsloc, char *orig_path, int nth)
1482 {
1483 struct knetconfig *knconf, *svknconf;
1484 struct netbuf *saddr;
1485 sec_data_t *secdata;
1486 utf8string *host;
1487 int i = 0, num_slashes = 0;
1488 char *p, *spath, *op, *new_path;
1489
1490 /* Update knconf */
1491 knconf = svp->sv_knconf;
1492 free_knconf_contents(knconf);
1493 bzero(knconf, sizeof (struct knetconfig));
1494 svknconf = nfsfsloc->knconf;
1495 knconf->knc_semantics = svknconf->knc_semantics;
1496 knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1497 knconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1498 knconf->knc_rdev = svknconf->knc_rdev;
1499 bcopy(svknconf->knc_protofmly, knconf->knc_protofmly, KNC_STRSIZE);
1500 bcopy(svknconf->knc_proto, knconf->knc_proto, KNC_STRSIZE);
1501
1502 /* Update server address */
1503 saddr = &svp->sv_addr;
1504 if (saddr->buf != NULL)
1505 kmem_free(saddr->buf, saddr->maxlen);
1506 saddr->buf = kmem_alloc(nfsfsloc->addr->maxlen, KM_SLEEP);
1507 saddr->len = nfsfsloc->addr->len;
1508 saddr->maxlen = nfsfsloc->addr->maxlen;
1509 bcopy(nfsfsloc->addr->buf, saddr->buf, nfsfsloc->addr->len);
1510
1511 /* Update server name */
1512 host = fsp->server_val;
1513 kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
1514 svp->sv_hostname = kmem_zalloc(host->utf8string_len + 1, KM_SLEEP);
1515 bcopy(host->utf8string_val, svp->sv_hostname, host->utf8string_len);
1516 svp->sv_hostname[host->utf8string_len] = '\0';
1517 svp->sv_hostnamelen = host->utf8string_len + 1;
1518
1519 /*
1520 * Update server path.
1521 * We need to setup proper path here.
1522 * For ex., If we got a path name serv1:/rp/aaa/bbb
1523 * where aaa is a referral and points to serv2:/rpool/aa
1524 * we need to set the path to serv2:/rpool/aa/bbb
1525 * The first part of this below code generates /rpool/aa
1526 * and the second part appends /bbb to the server path.
1527 */
1528 spath = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1529 *p++ = '/';
1530 for (i = 0; i < fsp->rootpath.pathname4_len; i++) {
1531 component4 *comp;
1532
1533 comp = &fsp->rootpath.pathname4_val[i];
1534 /* If no space, null the string and bail */
1535 if ((p - spath) + comp->utf8string_len + 1 > MAXPATHLEN) {
1536 p = spath + MAXPATHLEN - 1;
1537 spath[0] = '\0';
1538 break;
1539 }
1540 bcopy(comp->utf8string_val, p, comp->utf8string_len);
1541 p += comp->utf8string_len;
1542 *p++ = '/';
1543 }
1544 if (fsp->rootpath.pathname4_len != 0)
1545 *(p - 1) = '\0';
1546 else
1547 *p = '\0';
1548 p = spath;
1549
1550 new_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1551 (void) strlcpy(new_path, p, MAXPATHLEN);
1552 kmem_free(p, MAXPATHLEN);
1553 i = strlen(new_path);
1554
1555 for (op = orig_path; *op; op++) {
1556 if (*op == '/')
1557 num_slashes++;
1558 if (num_slashes == nth + 2) {
1559 while (*op != '\0') {
1560 new_path[i] = *op;
1561 i++;
1562 op++;
1563 }
1564 break;
1565 }
1566 }
1567 new_path[i] = '\0';
1568
1569 kmem_free(svp->sv_path, svp->sv_pathlen);
1570 svp->sv_pathlen = strlen(new_path) + 1;
1571 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
1572 bcopy(new_path, svp->sv_path, svp->sv_pathlen);
1573 kmem_free(new_path, MAXPATHLEN);
1574
1575 /*
1576 * All the security data is specific to old server.
1577 * Clean it up except secdata which deals with mount options.
1578 * We need to inherit that data. Copy secdata into our new servinfo4.
1579 */
1580 if (svp->sv_dhsec) {
1581 sec_clnt_freeinfo(svp->sv_dhsec);
1582 svp->sv_dhsec = NULL;
1583 }
1584 if (svp->sv_save_secinfo &&
1585 svp->sv_save_secinfo != svp->sv_secinfo) {
1586 secinfo_free(svp->sv_save_secinfo);
1587 svp->sv_save_secinfo = NULL;
1588 }
1589 if (svp->sv_secinfo) {
1590 secinfo_free(svp->sv_secinfo);
1591 svp->sv_secinfo = NULL;
1592 }
1593 svp->sv_currsec = NULL;
1594
1595 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
1596 *secdata = *svp->sv_secdata;
1597 secdata->data = NULL;
1598 if (svp->sv_secdata) {
1599 sec_clnt_freeinfo(svp->sv_secdata);
1600 svp->sv_secdata = NULL;
1601 }
1602 svp->sv_secdata = secdata;
1603 }
1604
1605 /*
1606 * Resolve a referral. The referral is in the n+1th component of
1607 * svp->sv_path and has a parent nfs4 file handle "fh".
1608 * Upon return, the sv_path will point to the new path that has referral
1609 * component resolved to its referred path and part of original path.
1610 * Hostname and other address information is also updated.
1611 */
1612 int
resolve_referral(mntinfo4_t * mi,servinfo4_t * svp,cred_t * cr,int nth,nfs_fh4 * fh)1613 resolve_referral(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, int nth,
1614 nfs_fh4 *fh)
1615 {
1616 nfs4_sharedfh_t *sfh;
1617 struct nfs_fsl_info nfsfsloc;
1618 nfs4_ga_res_t garp;
1619 COMPOUND4res_clnt callres;
1620 fs_location4 *fsp;
1621 char *nm, *orig_path;
1622 int orig_pathlen = 0, ret = -1, index;
1623
1624 if (svp->sv_pathlen <= 0)
1625 return (ret);
1626
1627 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1628 orig_pathlen = svp->sv_pathlen;
1629 orig_path = kmem_alloc(orig_pathlen, KM_SLEEP);
1630 bcopy(svp->sv_path, orig_path, orig_pathlen);
1631 nm = extract_referral_point(svp->sv_path, nth);
1632 setup_newsvpath(svp, nth);
1633 nfs_rw_exit(&svp->sv_lock);
1634
1635 sfh = sfh4_get(fh, mi);
1636 index = nfs4_process_referral(mi, sfh, nm, cr,
1637 &garp, &callres, &nfsfsloc);
1638 sfh4_rele(&sfh);
1639 kmem_free(nm, MAXPATHLEN);
1640 if (index < 0) {
1641 kmem_free(orig_path, orig_pathlen);
1642 return (index);
1643 }
1644
1645 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index];
1646 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1647 update_servinfo4(svp, fsp, &nfsfsloc, orig_path, nth);
1648 nfs_rw_exit(&svp->sv_lock);
1649
1650 mutex_enter(&mi->mi_lock);
1651 mi->mi_vfs_referral_loop_cnt++;
1652 mutex_exit(&mi->mi_lock);
1653
1654 ret = 0;
1655 /* Free up XDR memory allocated in nfs4_process_referral() */
1656 xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1657 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1658 kmem_free(orig_path, orig_pathlen);
1659
1660 return (ret);
1661 }
1662
1663 /*
1664 * Get the root filehandle for the given filesystem and server, and update
1665 * svp.
1666 *
1667 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop
1668 * to coordinate with recovery. Otherwise, the caller is assumed to be
1669 * the recovery thread or have already done a start_fop.
1670 *
1671 * Errors are returned by the nfs4_error_t parameter.
1672 */
1673 static void
nfs4getfh_otw(struct mntinfo4 * mi,servinfo4_t * svp,vtype_t * vtp,int flags,cred_t * cr,nfs4_error_t * ep)1674 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp,
1675 int flags, cred_t *cr, nfs4_error_t *ep)
1676 {
1677 COMPOUND4args_clnt args;
1678 COMPOUND4res_clnt res;
1679 int doqueue = 1;
1680 nfs_argop4 *argop;
1681 nfs_resop4 *resop;
1682 nfs4_ga_res_t *garp;
1683 int num_argops;
1684 lookup4_param_t lookuparg;
1685 nfs_fh4 *tmpfhp;
1686 nfs_fh4 *resfhp;
1687 bool_t needrecov = FALSE;
1688 nfs4_recov_state_t recov_state;
1689 int llndx;
1690 int nthcomp;
1691 int recovery = !(flags & NFS4_GETFH_NEEDSOP);
1692
1693 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1694 ASSERT(svp->sv_path != NULL);
1695 if (svp->sv_path[0] == '\0') {
1696 nfs_rw_exit(&svp->sv_lock);
1697 nfs4_error_init(ep, EINVAL);
1698 return;
1699 }
1700 nfs_rw_exit(&svp->sv_lock);
1701
1702 recov_state.rs_flags = 0;
1703 recov_state.rs_num_retry_despite_err = 0;
1704
1705 recov_retry:
1706 if (mi->mi_vfs_referral_loop_cnt >= NFS4_REFERRAL_LOOP_MAX) {
1707 DTRACE_PROBE3(nfs4clnt__debug__referral__loop, mntinfo4 *,
1708 mi, servinfo4_t *, svp, char *, "nfs4getfh_otw");
1709 nfs4_error_init(ep, EINVAL);
1710 return;
1711 }
1712 nfs4_error_zinit(ep);
1713
1714 if (!recovery) {
1715 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT,
1716 &recov_state, NULL);
1717
1718 /*
1719 * If recovery has been started and this request as
1720 * initiated by a mount, then we must wait for recovery
1721 * to finish before proceeding, otherwise, the error
1722 * cleanup would remove data structures needed by the
1723 * recovery thread.
1724 */
1725 if (ep->error) {
1726 mutex_enter(&mi->mi_lock);
1727 if (mi->mi_flags & MI4_MOUNTING) {
1728 mi->mi_flags |= MI4_RECOV_FAIL;
1729 mi->mi_error = EIO;
1730
1731 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1732 "nfs4getfh_otw: waiting 4 recovery\n"));
1733
1734 while (mi->mi_flags & MI4_RECOV_ACTIV)
1735 cv_wait(&mi->mi_failover_cv,
1736 &mi->mi_lock);
1737 }
1738 mutex_exit(&mi->mi_lock);
1739 return;
1740 }
1741
1742 /*
1743 * If the client does not specify a specific flavor to use
1744 * and has not gotten a secinfo list from the server yet,
1745 * retrieve the secinfo list from the server and use a
1746 * flavor from the list to mount.
1747 *
1748 * If fail to get the secinfo list from the server, then
1749 * try the default flavor.
1750 */
1751 if ((svp->sv_flags & SV4_TRYSECDEFAULT) &&
1752 svp->sv_secinfo == NULL) {
1753 (void) nfs4_secinfo_path(mi, cr, FALSE);
1754 }
1755 }
1756
1757 if (recovery)
1758 args.ctag = TAG_REMAP_MOUNT;
1759 else
1760 args.ctag = TAG_MOUNT;
1761
1762 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1763 lookuparg.argsp = &args;
1764 lookuparg.resp = &res;
1765 lookuparg.header_len = 2; /* Putrootfh, getfh */
1766 lookuparg.trailer_len = 0;
1767 lookuparg.ga_bits = FATTR4_FSINFO_MASK;
1768 lookuparg.mi = mi;
1769
1770 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1771 ASSERT(svp->sv_path != NULL);
1772 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0);
1773 nfs_rw_exit(&svp->sv_lock);
1774
1775 argop = args.array;
1776 num_argops = args.array_len;
1777
1778 /* choose public or root filehandle */
1779 if (flags & NFS4_GETFH_PUBLIC)
1780 argop[0].argop = OP_PUTPUBFH;
1781 else
1782 argop[0].argop = OP_PUTROOTFH;
1783
1784 /* get fh */
1785 argop[1].argop = OP_GETFH;
1786
1787 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
1788 "nfs4getfh_otw: %s call, mi 0x%p",
1789 needrecov ? "recov" : "first", (void *)mi));
1790
1791 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1792
1793 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp);
1794
1795 if (needrecov) {
1796 bool_t abort;
1797
1798 if (recovery) {
1799 nfs4args_lookup_free(argop, num_argops);
1800 kmem_free(argop,
1801 lookuparg.arglen * sizeof (nfs_argop4));
1802 if (!ep->error)
1803 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1804 return;
1805 }
1806
1807 NFS4_DEBUG(nfs4_client_recov_debug,
1808 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n"));
1809
1810 abort = nfs4_start_recovery(ep, mi, NULL,
1811 NULL, NULL, NULL, OP_GETFH, NULL, NULL, NULL);
1812 if (!ep->error) {
1813 ep->error = geterrno4(res.status);
1814 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1815 }
1816 nfs4args_lookup_free(argop, num_argops);
1817 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1818 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov);
1819 /* have another go? */
1820 if (abort == FALSE)
1821 goto recov_retry;
1822 return;
1823 }
1824
1825 /*
1826 * No recovery, but check if error is set.
1827 */
1828 if (ep->error) {
1829 nfs4args_lookup_free(argop, num_argops);
1830 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1831 if (!recovery)
1832 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1833 needrecov);
1834 return;
1835 }
1836
1837 /* for non-recovery errors */
1838 if (res.status && res.status != NFS4ERR_SYMLINK &&
1839 res.status != NFS4ERR_MOVED) {
1840 if (!recovery) {
1841 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1842 needrecov);
1843 }
1844 nfs4args_lookup_free(argop, num_argops);
1845 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1846 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1847 return;
1848 }
1849
1850 /*
1851 * If any intermediate component in the path is a symbolic link,
1852 * resolve the symlink, then try mount again using the new path.
1853 */
1854 if (res.status == NFS4ERR_SYMLINK || res.status == NFS4ERR_MOVED) {
1855 int where;
1856
1857 /*
1858 * Need to call nfs4_end_op before resolve_sympath to avoid
1859 * potential nfs4_start_op deadlock.
1860 */
1861 if (!recovery)
1862 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1863 needrecov);
1864
1865 /*
1866 * This must be from OP_LOOKUP failure. The (cfh) for this
1867 * OP_LOOKUP is a symlink node. Found out where the
1868 * OP_GETFH is for the (cfh) that is a symlink node.
1869 *
1870 * Example:
1871 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR,
1872 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR
1873 *
1874 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink.
1875 * In this case, where = 7, nthcomp = 2.
1876 */
1877 where = res.array_len - 2;
1878 ASSERT(where > 0);
1879
1880 if (res.status == NFS4ERR_SYMLINK) {
1881
1882 resop = &res.array[where - 1];
1883 ASSERT(resop->resop == OP_GETFH);
1884 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1885 nthcomp = res.array_len/3 - 1;
1886 ep->error = resolve_sympath(mi, svp, nthcomp,
1887 tmpfhp, cr, flags);
1888
1889 } else if (res.status == NFS4ERR_MOVED) {
1890
1891 resop = &res.array[where - 2];
1892 ASSERT(resop->resop == OP_GETFH);
1893 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1894 nthcomp = res.array_len/3 - 1;
1895 ep->error = resolve_referral(mi, svp, cr, nthcomp,
1896 tmpfhp);
1897 }
1898
1899 nfs4args_lookup_free(argop, num_argops);
1900 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1901 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1902
1903 if (ep->error)
1904 return;
1905
1906 goto recov_retry;
1907 }
1908
1909 /* getfh */
1910 resop = &res.array[res.array_len - 2];
1911 ASSERT(resop->resop == OP_GETFH);
1912 resfhp = &resop->nfs_resop4_u.opgetfh.object;
1913
1914 /* getattr fsinfo res */
1915 resop++;
1916 garp = &resop->nfs_resop4_u.opgetattr.ga_res;
1917
1918 *vtp = garp->n4g_va.va_type;
1919
1920 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet;
1921
1922 mutex_enter(&mi->mi_lock);
1923 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support)
1924 mi->mi_flags |= MI4_LINK;
1925 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support)
1926 mi->mi_flags |= MI4_SYMLINK;
1927 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK)
1928 mi->mi_flags |= MI4_ACL;
1929 mutex_exit(&mi->mi_lock);
1930
1931 if (garp->n4g_ext_res->n4g_maxread == 0)
1932 mi->mi_tsize =
1933 MIN(MAXBSIZE, mi->mi_tsize);
1934 else
1935 mi->mi_tsize =
1936 MIN(garp->n4g_ext_res->n4g_maxread,
1937 mi->mi_tsize);
1938
1939 if (garp->n4g_ext_res->n4g_maxwrite == 0)
1940 mi->mi_stsize =
1941 MIN(MAXBSIZE, mi->mi_stsize);
1942 else
1943 mi->mi_stsize =
1944 MIN(garp->n4g_ext_res->n4g_maxwrite,
1945 mi->mi_stsize);
1946
1947 if (garp->n4g_ext_res->n4g_maxfilesize != 0)
1948 mi->mi_maxfilesize =
1949 MIN(garp->n4g_ext_res->n4g_maxfilesize,
1950 mi->mi_maxfilesize);
1951
1952 /*
1953 * If the final component is a a symbolic link, resolve the symlink,
1954 * then try mount again using the new path.
1955 *
1956 * Assume no symbolic link for root filesysm "/".
1957 */
1958 if (*vtp == VLNK) {
1959 /*
1960 * nthcomp is the total result length minus
1961 * the 1st 2 OPs (PUTROOTFH, GETFH),
1962 * then divided by 3 (LOOKUP,GETFH,GETATTR)
1963 *
1964 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR
1965 * LOOKUP 2nd-comp GETFH GETATTR
1966 *
1967 * (8 - 2)/3 = 2
1968 */
1969 nthcomp = (res.array_len - 2)/3;
1970
1971 /*
1972 * Need to call nfs4_end_op before resolve_sympath to avoid
1973 * potential nfs4_start_op deadlock. See RFE 4777612.
1974 */
1975 if (!recovery)
1976 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1977 needrecov);
1978
1979 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr,
1980 flags);
1981
1982 nfs4args_lookup_free(argop, num_argops);
1983 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1984 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1985
1986 if (ep->error)
1987 return;
1988
1989 goto recov_retry;
1990 }
1991
1992 /*
1993 * We need to figure out where in the compound the getfh
1994 * for the parent directory is. If the object to be mounted is
1995 * the root, then there is no lookup at all:
1996 * PUTROOTFH, GETFH.
1997 * If the object to be mounted is in the root, then the compound is:
1998 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR.
1999 * In either of these cases, the index of the GETFH is 1.
2000 * If it is not at the root, then it's something like:
2001 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR,
2002 * LOOKUP, GETFH, GETATTR
2003 * In this case, the index is llndx (last lookup index) - 2.
2004 */
2005 if (llndx == -1 || llndx == 2)
2006 resop = &res.array[1];
2007 else {
2008 ASSERT(llndx > 2);
2009 resop = &res.array[llndx-2];
2010 }
2011
2012 ASSERT(resop->resop == OP_GETFH);
2013 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
2014
2015 /* save the filehandles for the replica */
2016 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2017 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE);
2018 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len;
2019 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf,
2020 tmpfhp->nfs_fh4_len);
2021 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE);
2022 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len;
2023 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len);
2024
2025 /* initialize fsid and supp_attrs for server fs */
2026 svp->sv_fsid = garp->n4g_fsid;
2027 svp->sv_supp_attrs =
2028 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK;
2029
2030 nfs_rw_exit(&svp->sv_lock);
2031 nfs4args_lookup_free(argop, num_argops);
2032 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
2033 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2034 if (!recovery)
2035 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov);
2036 }
2037
2038 /*
2039 * Save a copy of Servinfo4_t structure.
2040 * We might need when there is a failure in getting file handle
2041 * in case of a referral to replace servinfo4 struct and try again.
2042 */
2043 static struct servinfo4 *
copy_svp(servinfo4_t * nsvp)2044 copy_svp(servinfo4_t *nsvp)
2045 {
2046 servinfo4_t *svp = NULL;
2047 struct knetconfig *sknconf, *tknconf;
2048 struct netbuf *saddr, *taddr;
2049
2050 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
2051 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
2052 svp->sv_flags = nsvp->sv_flags;
2053 svp->sv_fsid = nsvp->sv_fsid;
2054 svp->sv_hostnamelen = nsvp->sv_hostnamelen;
2055 svp->sv_pathlen = nsvp->sv_pathlen;
2056 svp->sv_supp_attrs = nsvp->sv_supp_attrs;
2057
2058 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
2059 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
2060 bcopy(nsvp->sv_hostname, svp->sv_hostname, svp->sv_hostnamelen);
2061 bcopy(nsvp->sv_path, svp->sv_path, svp->sv_pathlen);
2062
2063 saddr = &nsvp->sv_addr;
2064 taddr = &svp->sv_addr;
2065 taddr->maxlen = saddr->maxlen;
2066 taddr->len = saddr->len;
2067 if (saddr->len > 0) {
2068 taddr->buf = kmem_zalloc(saddr->maxlen, KM_SLEEP);
2069 bcopy(saddr->buf, taddr->buf, saddr->len);
2070 }
2071
2072 svp->sv_knconf = kmem_zalloc(sizeof (struct knetconfig), KM_SLEEP);
2073 sknconf = nsvp->sv_knconf;
2074 tknconf = svp->sv_knconf;
2075 tknconf->knc_semantics = sknconf->knc_semantics;
2076 tknconf->knc_rdev = sknconf->knc_rdev;
2077 if (sknconf->knc_proto != NULL) {
2078 tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2079 bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto,
2080 KNC_STRSIZE);
2081 }
2082 if (sknconf->knc_protofmly != NULL) {
2083 tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2084 bcopy(sknconf->knc_protofmly, (char *)tknconf->knc_protofmly,
2085 KNC_STRSIZE);
2086 }
2087
2088 if (nsvp->sv_origknconf != NULL) {
2089 svp->sv_origknconf = kmem_zalloc(sizeof (struct knetconfig),
2090 KM_SLEEP);
2091 sknconf = nsvp->sv_origknconf;
2092 tknconf = svp->sv_origknconf;
2093 tknconf->knc_semantics = sknconf->knc_semantics;
2094 tknconf->knc_rdev = sknconf->knc_rdev;
2095 if (sknconf->knc_proto != NULL) {
2096 tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2097 bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto,
2098 KNC_STRSIZE);
2099 }
2100 if (sknconf->knc_protofmly != NULL) {
2101 tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE,
2102 KM_SLEEP);
2103 bcopy(sknconf->knc_protofmly,
2104 (char *)tknconf->knc_protofmly, KNC_STRSIZE);
2105 }
2106 }
2107
2108 svp->sv_secdata = copy_sec_data(nsvp->sv_secdata);
2109 svp->sv_dhsec = copy_sec_data(svp->sv_dhsec);
2110 /*
2111 * Rest of the security information is not copied as they are built
2112 * with the information available from secdata and dhsec.
2113 */
2114 svp->sv_next = NULL;
2115
2116 return (svp);
2117 }
2118
2119 servinfo4_t *
restore_svp(mntinfo4_t * mi,servinfo4_t * svp,servinfo4_t * origsvp)2120 restore_svp(mntinfo4_t *mi, servinfo4_t *svp, servinfo4_t *origsvp)
2121 {
2122 servinfo4_t *srvnext, *tmpsrv;
2123
2124 if (strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) {
2125 /*
2126 * Since the hostname changed, we must be dealing
2127 * with a referral, and the lookup failed. We will
2128 * restore the whole servinfo4_t to what it was before.
2129 */
2130 srvnext = svp->sv_next;
2131 svp->sv_next = NULL;
2132 tmpsrv = copy_svp(origsvp);
2133 sv4_free(svp);
2134 svp = tmpsrv;
2135 svp->sv_next = srvnext;
2136 mutex_enter(&mi->mi_lock);
2137 mi->mi_servers = svp;
2138 mi->mi_curr_serv = svp;
2139 mutex_exit(&mi->mi_lock);
2140
2141 } else if (origsvp->sv_pathlen != svp->sv_pathlen) {
2142
2143 /*
2144 * For symlink case: restore original path because
2145 * it might have contained symlinks that were
2146 * expanded by nfsgetfh_otw before the failure occurred.
2147 */
2148 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2149 kmem_free(svp->sv_path, svp->sv_pathlen);
2150 svp->sv_path =
2151 kmem_alloc(origsvp->sv_pathlen, KM_SLEEP);
2152 svp->sv_pathlen = origsvp->sv_pathlen;
2153 bcopy(origsvp->sv_path, svp->sv_path,
2154 origsvp->sv_pathlen);
2155 nfs_rw_exit(&svp->sv_lock);
2156 }
2157 return (svp);
2158 }
2159
2160 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */
2161 uint_t nfs4_bsize = 32 * 1024; /* client `block' size */
2162 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */
2163 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO;
2164
2165 /*
2166 * Remap the root filehandle for the given filesystem.
2167 *
2168 * results returned via the nfs4_error_t parameter.
2169 */
2170 void
nfs4_remap_root(mntinfo4_t * mi,nfs4_error_t * ep,int flags)2171 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags)
2172 {
2173 struct servinfo4 *svp, *origsvp;
2174 vtype_t vtype;
2175 nfs_fh4 rootfh;
2176 int getfh_flags;
2177 int num_retry;
2178
2179 mutex_enter(&mi->mi_lock);
2180
2181 remap_retry:
2182 svp = mi->mi_curr_serv;
2183 getfh_flags =
2184 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0;
2185 getfh_flags |=
2186 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0;
2187 mutex_exit(&mi->mi_lock);
2188
2189 /*
2190 * Just in case server path being mounted contains
2191 * symlinks and fails w/STALE, save the initial sv_path
2192 * so we can redrive the initial mount compound with the
2193 * initial sv_path -- not a symlink-expanded version.
2194 *
2195 * This could only happen if a symlink was expanded
2196 * and the expanded mount compound failed stale. Because
2197 * it could be the case that the symlink was removed at
2198 * the server (and replaced with another symlink/dir,
2199 * we need to use the initial sv_path when attempting
2200 * to re-lookup everything and recover.
2201 */
2202 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2203 origsvp = copy_svp(svp);
2204 nfs_rw_exit(&svp->sv_lock);
2205
2206 num_retry = nfs4_max_mount_retry;
2207
2208 do {
2209 /*
2210 * Get the root fh from the server. Retry nfs4_max_mount_retry
2211 * (2) times if it fails with STALE since the recovery
2212 * infrastructure doesn't do STALE recovery for components
2213 * of the server path to the object being mounted.
2214 */
2215 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep);
2216
2217 if (ep->error == 0 && ep->stat == NFS4_OK)
2218 break;
2219
2220 /*
2221 * For some reason, the mount compound failed. Before
2222 * retrying, we need to restore original conditions.
2223 */
2224 svp = restore_svp(mi, svp, origsvp);
2225
2226 } while (num_retry-- > 0);
2227
2228 sv4_free(origsvp);
2229
2230 if (ep->error != 0 || ep->stat != 0) {
2231 return;
2232 }
2233
2234 if (vtype != VNON && vtype != mi->mi_type) {
2235 /* shouldn't happen */
2236 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2237 "nfs4_remap_root: server root vnode type (%d) doesn't "
2238 "match mount info (%d)", vtype, mi->mi_type);
2239 }
2240
2241 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2242 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf;
2243 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len;
2244 nfs_rw_exit(&svp->sv_lock);
2245 sfh4_update(mi->mi_rootfh, &rootfh);
2246
2247 /*
2248 * It's possible that recovery took place on the filesystem
2249 * and the server has been updated between the time we did
2250 * the nfs4getfh_otw and now. Re-drive the otw operation
2251 * to make sure we have a good fh.
2252 */
2253 mutex_enter(&mi->mi_lock);
2254 if (mi->mi_curr_serv != svp)
2255 goto remap_retry;
2256
2257 mutex_exit(&mi->mi_lock);
2258 }
2259
2260 static int
nfs4rootvp(vnode_t ** rtvpp,vfs_t * vfsp,struct servinfo4 * svp_head,int flags,cred_t * cr,zone_t * zone)2261 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head,
2262 int flags, cred_t *cr, zone_t *zone)
2263 {
2264 vnode_t *rtvp = NULL;
2265 mntinfo4_t *mi;
2266 dev_t nfs_dev;
2267 int error = 0;
2268 rnode4_t *rp;
2269 int i, len;
2270 struct vattr va;
2271 vtype_t vtype = VNON;
2272 vtype_t tmp_vtype = VNON;
2273 struct servinfo4 *firstsvp = NULL, *svp = svp_head;
2274 nfs4_oo_hash_bucket_t *bucketp;
2275 nfs_fh4 fh;
2276 char *droptext = "";
2277 struct nfs_stats *nfsstatsp;
2278 nfs4_fname_t *mfname;
2279 nfs4_error_t e;
2280 int num_retry, removed;
2281 cred_t *lcr = NULL, *tcr = cr;
2282 struct servinfo4 *origsvp;
2283 char *resource;
2284
2285 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone());
2286 ASSERT(nfsstatsp != NULL);
2287
2288 ASSERT(nfs_zone() == zone);
2289 ASSERT(crgetref(cr));
2290
2291 /*
2292 * Create a mount record and link it to the vfs struct.
2293 */
2294 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
2295 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
2296 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL);
2297 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL);
2298 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL);
2299
2300 if (!(flags & NFSMNT_SOFT))
2301 mi->mi_flags |= MI4_HARD;
2302 if ((flags & NFSMNT_NOPRINT))
2303 mi->mi_flags |= MI4_NOPRINT;
2304 if (flags & NFSMNT_INT)
2305 mi->mi_flags |= MI4_INT;
2306 if (flags & NFSMNT_PUBLIC)
2307 mi->mi_flags |= MI4_PUBLIC;
2308 if (flags & NFSMNT_MIRRORMOUNT)
2309 mi->mi_flags |= MI4_MIRRORMOUNT;
2310 if (flags & NFSMNT_REFERRAL)
2311 mi->mi_flags |= MI4_REFERRAL;
2312 mi->mi_retrans = NFS_RETRIES;
2313 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
2314 svp->sv_knconf->knc_semantics == NC_TPI_COTS)
2315 mi->mi_timeo = nfs4_cots_timeo;
2316 else
2317 mi->mi_timeo = NFS_TIMEO;
2318 mi->mi_prog = NFS_PROGRAM;
2319 mi->mi_vers = NFS_V4;
2320 mi->mi_rfsnames = rfsnames_v4;
2321 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr;
2322 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
2323 mi->mi_servers = svp;
2324 mi->mi_curr_serv = svp;
2325 mi->mi_acregmin = SEC2HR(ACREGMIN);
2326 mi->mi_acregmax = SEC2HR(ACREGMAX);
2327 mi->mi_acdirmin = SEC2HR(ACDIRMIN);
2328 mi->mi_acdirmax = SEC2HR(ACDIRMAX);
2329 mi->mi_fh_expire_type = FH4_PERSISTENT;
2330 mi->mi_clientid_next = NULL;
2331 mi->mi_clientid_prev = NULL;
2332 mi->mi_srv = NULL;
2333 mi->mi_grace_wait = 0;
2334 mi->mi_error = 0;
2335 mi->mi_srvsettime = 0;
2336 mi->mi_srvset_cnt = 0;
2337
2338 mi->mi_count = 1;
2339
2340 mi->mi_tsize = nfs4_tsize(svp->sv_knconf);
2341 mi->mi_stsize = mi->mi_tsize;
2342
2343 if (flags & NFSMNT_DIRECTIO)
2344 mi->mi_flags |= MI4_DIRECTIO;
2345
2346 mi->mi_flags |= MI4_MOUNTING;
2347
2348 mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL);
2349 list_create(&mi->mi_rnodes, sizeof (rnode4_t),
2350 offsetof(rnode4_t, r_mi_link));
2351
2352 /*
2353 * Make a vfs struct for nfs. We do this here instead of below
2354 * because rtvp needs a vfs before we can do a getattr on it.
2355 *
2356 * Assign a unique device id to the mount
2357 */
2358 mutex_enter(&nfs_minor_lock);
2359 do {
2360 nfs_minor = (nfs_minor + 1) & MAXMIN32;
2361 nfs_dev = makedevice(nfs_major, nfs_minor);
2362 } while (vfs_devismounted(nfs_dev));
2363 mutex_exit(&nfs_minor_lock);
2364
2365 vfsp->vfs_dev = nfs_dev;
2366 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp);
2367 vfsp->vfs_data = (caddr_t)mi;
2368 vfsp->vfs_fstype = nfsfstyp;
2369 vfsp->vfs_bsize = nfs4_bsize;
2370
2371 /*
2372 * Initialize fields used to support async putpage operations.
2373 */
2374 for (i = 0; i < NFS4_ASYNC_TYPES; i++)
2375 mi->mi_async_clusters[i] = nfs4_async_clusters;
2376 mi->mi_async_init_clusters = nfs4_async_clusters;
2377 mi->mi_async_curr[NFS4_ASYNC_QUEUE] =
2378 mi->mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0];
2379 mi->mi_max_threads = nfs4_max_threads;
2380 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
2381 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
2382 cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_QUEUE], NULL, CV_DEFAULT,
2383 NULL);
2384 cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE], NULL,
2385 CV_DEFAULT, NULL);
2386 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
2387 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL);
2388
2389 mi->mi_vfsp = vfsp;
2390 mi->mi_zone = zone;
2391 zone_init_ref(&mi->mi_zone_ref);
2392 zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFSV4);
2393 nfs4_mi_zonelist_add(mi);
2394
2395 /*
2396 * Initialize the <open owner/cred> hash table.
2397 */
2398 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) {
2399 bucketp = &(mi->mi_oo_list[i]);
2400 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL);
2401 list_create(&bucketp->b_oo_hash_list,
2402 sizeof (nfs4_open_owner_t),
2403 offsetof(nfs4_open_owner_t, oo_hash_node));
2404 }
2405
2406 /*
2407 * Initialize the freed open owner list.
2408 */
2409 mi->mi_foo_num = 0;
2410 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS;
2411 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t),
2412 offsetof(nfs4_open_owner_t, oo_foo_node));
2413
2414 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t),
2415 offsetof(nfs4_lost_rqst_t, lr_node));
2416
2417 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t),
2418 offsetof(nfs4_bseqid_entry_t, bs_node));
2419
2420 /*
2421 * Initialize the msg buffer.
2422 */
2423 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t),
2424 offsetof(nfs4_debug_msg_t, msg_node));
2425 mi->mi_msg_count = 0;
2426 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL);
2427
2428 /*
2429 * Initialize kstats
2430 */
2431 nfs4_mnt_kstat_init(vfsp);
2432
2433 /*
2434 * Initialize the shared filehandle pool.
2435 */
2436 sfh4_createtab(&mi->mi_filehandles);
2437
2438 /*
2439 * Save server path we're attempting to mount.
2440 */
2441 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2442 origsvp = copy_svp(svp);
2443 nfs_rw_exit(&svp->sv_lock);
2444
2445 /*
2446 * Make the GETFH call to get root fh for each replica.
2447 */
2448 if (svp_head->sv_next)
2449 droptext = ", dropping replica";
2450
2451 /*
2452 * If the uid is set then set the creds for secure mounts
2453 * by proxy processes such as automountd.
2454 */
2455 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2456 if (svp->sv_secdata->uid != 0 &&
2457 svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
2458 lcr = crdup(cr);
2459 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
2460 tcr = lcr;
2461 }
2462 nfs_rw_exit(&svp->sv_lock);
2463 for (svp = svp_head; svp; svp = svp->sv_next) {
2464 if (nfs4_chkdup_servinfo4(svp_head, svp)) {
2465 nfs_cmn_err(error, CE_WARN,
2466 VERS_MSG "Host %s is a duplicate%s",
2467 svp->sv_hostname, droptext);
2468 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2469 svp->sv_flags |= SV4_NOTINUSE;
2470 nfs_rw_exit(&svp->sv_lock);
2471 continue;
2472 }
2473 mi->mi_curr_serv = svp;
2474
2475 /*
2476 * Just in case server path being mounted contains
2477 * symlinks and fails w/STALE, save the initial sv_path
2478 * so we can redrive the initial mount compound with the
2479 * initial sv_path -- not a symlink-expanded version.
2480 *
2481 * This could only happen if a symlink was expanded
2482 * and the expanded mount compound failed stale. Because
2483 * it could be the case that the symlink was removed at
2484 * the server (and replaced with another symlink/dir,
2485 * we need to use the initial sv_path when attempting
2486 * to re-lookup everything and recover.
2487 *
2488 * Other mount errors should evenutally be handled here also
2489 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount
2490 * failures will result in mount being redriven a few times.
2491 */
2492 num_retry = nfs4_max_mount_retry;
2493 do {
2494 nfs4getfh_otw(mi, svp, &tmp_vtype,
2495 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) |
2496 NFS4_GETFH_NEEDSOP, tcr, &e);
2497
2498 if (e.error == 0 && e.stat == NFS4_OK)
2499 break;
2500
2501 /*
2502 * For some reason, the mount compound failed. Before
2503 * retrying, we need to restore original conditions.
2504 */
2505 svp = restore_svp(mi, svp, origsvp);
2506 svp_head = svp;
2507
2508 } while (num_retry-- > 0);
2509 error = e.error ? e.error : geterrno4(e.stat);
2510 if (error) {
2511 nfs_cmn_err(error, CE_WARN,
2512 VERS_MSG "initial call to %s failed%s: %m",
2513 svp->sv_hostname, droptext);
2514 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2515 svp->sv_flags |= SV4_NOTINUSE;
2516 nfs_rw_exit(&svp->sv_lock);
2517 mi->mi_flags &= ~MI4_RECOV_FAIL;
2518 mi->mi_error = 0;
2519 continue;
2520 }
2521
2522 if (tmp_vtype == VBAD) {
2523 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2524 VERS_MSG "%s returned a bad file type for "
2525 "root%s", svp->sv_hostname, droptext);
2526 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2527 svp->sv_flags |= SV4_NOTINUSE;
2528 nfs_rw_exit(&svp->sv_lock);
2529 continue;
2530 }
2531
2532 if (vtype == VNON) {
2533 vtype = tmp_vtype;
2534 } else if (vtype != tmp_vtype) {
2535 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2536 VERS_MSG "%s returned a different file type "
2537 "for root%s", svp->sv_hostname, droptext);
2538 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2539 svp->sv_flags |= SV4_NOTINUSE;
2540 nfs_rw_exit(&svp->sv_lock);
2541 continue;
2542 }
2543 if (firstsvp == NULL)
2544 firstsvp = svp;
2545 }
2546
2547 if (firstsvp == NULL) {
2548 if (error == 0)
2549 error = ENOENT;
2550 goto bad;
2551 }
2552
2553 mi->mi_curr_serv = svp = firstsvp;
2554 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2555 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0);
2556 fh.nfs_fh4_len = svp->sv_fhandle.fh_len;
2557 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf;
2558 mi->mi_rootfh = sfh4_get(&fh, mi);
2559 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len;
2560 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf;
2561 mi->mi_srvparentfh = sfh4_get(&fh, mi);
2562 nfs_rw_exit(&svp->sv_lock);
2563
2564 /*
2565 * Get the fname for filesystem root.
2566 */
2567 mi->mi_fname = fn_get(NULL, ".", mi->mi_rootfh);
2568 mfname = mi->mi_fname;
2569 fn_hold(mfname);
2570
2571 /*
2572 * Make the root vnode without attributes.
2573 */
2574 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL,
2575 &mfname, NULL, mi, cr, gethrtime());
2576 rtvp->v_type = vtype;
2577
2578 mi->mi_curread = mi->mi_tsize;
2579 mi->mi_curwrite = mi->mi_stsize;
2580
2581 /*
2582 * Start the manager thread responsible for handling async worker
2583 * threads.
2584 */
2585 MI4_HOLD(mi);
2586 VFS_HOLD(vfsp); /* add reference for thread */
2587 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager,
2588 vfsp, 0, minclsyspri);
2589 ASSERT(mi->mi_manager_thread != NULL);
2590
2591 /*
2592 * Create the thread that handles over-the-wire calls for
2593 * VOP_INACTIVE.
2594 * This needs to happen after the manager thread is created.
2595 */
2596 MI4_HOLD(mi);
2597 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread,
2598 mi, 0, minclsyspri);
2599 ASSERT(mi->mi_inactive_thread != NULL);
2600
2601 /* If we didn't get a type, get one now */
2602 if (rtvp->v_type == VNON) {
2603 va.va_mask = AT_TYPE;
2604 error = nfs4getattr(rtvp, &va, tcr);
2605 if (error)
2606 goto bad;
2607 rtvp->v_type = va.va_type;
2608 }
2609
2610 mi->mi_type = rtvp->v_type;
2611
2612 mutex_enter(&mi->mi_lock);
2613 mi->mi_flags &= ~MI4_MOUNTING;
2614 mutex_exit(&mi->mi_lock);
2615
2616 /* Update VFS with new server and path info */
2617 if ((strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) ||
2618 (strcmp(svp->sv_path, origsvp->sv_path) != 0)) {
2619 len = svp->sv_hostnamelen + svp->sv_pathlen;
2620 resource = kmem_zalloc(len, KM_SLEEP);
2621 (void) strcat(resource, svp->sv_hostname);
2622 (void) strcat(resource, ":");
2623 (void) strcat(resource, svp->sv_path);
2624 vfs_setresource(vfsp, resource, 0);
2625 kmem_free(resource, len);
2626 }
2627
2628 sv4_free(origsvp);
2629 *rtvpp = rtvp;
2630 if (lcr != NULL)
2631 crfree(lcr);
2632
2633 return (0);
2634 bad:
2635 /*
2636 * An error occurred somewhere, need to clean up...
2637 */
2638 if (lcr != NULL)
2639 crfree(lcr);
2640
2641 if (rtvp != NULL) {
2642 /*
2643 * We need to release our reference to the root vnode and
2644 * destroy the mntinfo4 struct that we just created.
2645 */
2646 rp = VTOR4(rtvp);
2647 if (rp->r_flags & R4HASHED)
2648 rp4_rmhash(rp);
2649 VN_RELE(rtvp);
2650 }
2651 nfs4_async_stop(vfsp);
2652 nfs4_async_manager_stop(vfsp);
2653 removed = nfs4_mi_zonelist_remove(mi);
2654 if (removed)
2655 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2656
2657 /*
2658 * This releases the initial "hold" of the mi since it will never
2659 * be referenced by the vfsp. Also, when mount returns to vfs.c
2660 * with an error, the vfsp will be destroyed, not rele'd.
2661 */
2662 MI4_RELE(mi);
2663
2664 if (origsvp != NULL)
2665 sv4_free(origsvp);
2666
2667 *rtvpp = NULL;
2668 return (error);
2669 }
2670
2671 /*
2672 * vfs operations
2673 */
2674 static int
nfs4_unmount(vfs_t * vfsp,int flag,cred_t * cr)2675 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr)
2676 {
2677 mntinfo4_t *mi;
2678 ushort_t omax;
2679 int removed;
2680
2681 bool_t must_unlock;
2682
2683 nfs4_ephemeral_tree_t *eph_tree;
2684
2685 if (secpolicy_fs_unmount(cr, vfsp) != 0)
2686 return (EPERM);
2687
2688 mi = VFTOMI4(vfsp);
2689
2690 if (flag & MS_FORCE) {
2691 vfsp->vfs_flag |= VFS_UNMOUNTED;
2692 if (nfs_zone() != mi->mi_zone) {
2693 /*
2694 * If the request is coming from the wrong zone,
2695 * we don't want to create any new threads, and
2696 * performance is not a concern. Do everything
2697 * inline.
2698 */
2699 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
2700 "nfs4_unmount x-zone forced unmount of vfs %p\n",
2701 (void *)vfsp));
2702 nfs4_free_mount(vfsp, flag, cr);
2703 } else {
2704 /*
2705 * Free data structures asynchronously, to avoid
2706 * blocking the current thread (for performance
2707 * reasons only).
2708 */
2709 async_free_mount(vfsp, flag, cr);
2710 }
2711
2712 return (0);
2713 }
2714
2715 /*
2716 * Wait until all asynchronous putpage operations on
2717 * this file system are complete before flushing rnodes
2718 * from the cache.
2719 */
2720 omax = mi->mi_max_threads;
2721 if (nfs4_async_stop_sig(vfsp))
2722 return (EINTR);
2723
2724 r4flush(vfsp, cr);
2725
2726 /*
2727 * About the only reason that this would fail would be
2728 * that the harvester is already busy tearing down this
2729 * node. So we fail back to the caller and let them try
2730 * again when needed.
2731 */
2732 if (nfs4_ephemeral_umount(mi, flag, cr,
2733 &must_unlock, &eph_tree)) {
2734 ASSERT(must_unlock == FALSE);
2735 mutex_enter(&mi->mi_async_lock);
2736 mi->mi_max_threads = omax;
2737 mutex_exit(&mi->mi_async_lock);
2738
2739 return (EBUSY);
2740 }
2741
2742 /*
2743 * If there are any active vnodes on this file system,
2744 * then the file system is busy and can't be unmounted.
2745 */
2746 if (check_rtable4(vfsp)) {
2747 nfs4_ephemeral_umount_unlock(&must_unlock, &eph_tree);
2748
2749 mutex_enter(&mi->mi_async_lock);
2750 mi->mi_max_threads = omax;
2751 mutex_exit(&mi->mi_async_lock);
2752
2753 return (EBUSY);
2754 }
2755
2756 /*
2757 * The unmount can't fail from now on, so record any
2758 * ephemeral changes.
2759 */
2760 nfs4_ephemeral_umount_activate(mi, &must_unlock, &eph_tree);
2761
2762 /*
2763 * There are no active files that could require over-the-wire
2764 * calls to the server, so stop the async manager and the
2765 * inactive thread.
2766 */
2767 nfs4_async_manager_stop(vfsp);
2768
2769 /*
2770 * Destroy all rnodes belonging to this file system from the
2771 * rnode hash queues and purge any resources allocated to
2772 * them.
2773 */
2774 destroy_rtable4(vfsp, cr);
2775 vfsp->vfs_flag |= VFS_UNMOUNTED;
2776
2777 nfs4_remove_mi_from_server(mi, NULL);
2778 removed = nfs4_mi_zonelist_remove(mi);
2779 if (removed)
2780 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2781
2782 return (0);
2783 }
2784
2785 /*
2786 * find root of nfs
2787 */
2788 static int
nfs4_root(vfs_t * vfsp,vnode_t ** vpp)2789 nfs4_root(vfs_t *vfsp, vnode_t **vpp)
2790 {
2791 mntinfo4_t *mi;
2792 vnode_t *vp;
2793 nfs4_fname_t *mfname;
2794 servinfo4_t *svp;
2795
2796 mi = VFTOMI4(vfsp);
2797
2798 if (nfs_zone() != mi->mi_zone)
2799 return (EPERM);
2800
2801 svp = mi->mi_curr_serv;
2802 if (svp) {
2803 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2804 if (svp->sv_flags & SV4_ROOT_STALE) {
2805 nfs_rw_exit(&svp->sv_lock);
2806
2807 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2808 if (svp->sv_flags & SV4_ROOT_STALE) {
2809 svp->sv_flags &= ~SV4_ROOT_STALE;
2810 nfs_rw_exit(&svp->sv_lock);
2811 return (ENOENT);
2812 }
2813 nfs_rw_exit(&svp->sv_lock);
2814 } else
2815 nfs_rw_exit(&svp->sv_lock);
2816 }
2817
2818 mfname = mi->mi_fname;
2819 fn_hold(mfname);
2820 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL,
2821 VFTOMI4(vfsp), CRED(), gethrtime());
2822
2823 if (VTOR4(vp)->r_flags & R4STALE) {
2824 VN_RELE(vp);
2825 return (ENOENT);
2826 }
2827
2828 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
2829
2830 vp->v_type = mi->mi_type;
2831
2832 *vpp = vp;
2833
2834 return (0);
2835 }
2836
2837 static int
nfs4_statfs_otw(vnode_t * vp,struct statvfs64 * sbp,cred_t * cr)2838 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr)
2839 {
2840 int error;
2841 nfs4_ga_res_t gar;
2842 nfs4_ga_ext_res_t ger;
2843
2844 gar.n4g_ext_res = &ger;
2845
2846 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar,
2847 NFS4_STATFS_ATTR_MASK, cr))
2848 return (error);
2849
2850 *sbp = gar.n4g_ext_res->n4g_sb;
2851
2852 return (0);
2853 }
2854
2855 /*
2856 * Get file system statistics.
2857 */
2858 static int
nfs4_statvfs(vfs_t * vfsp,struct statvfs64 * sbp)2859 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
2860 {
2861 int error;
2862 vnode_t *vp;
2863 cred_t *cr;
2864
2865 error = nfs4_root(vfsp, &vp);
2866 if (error)
2867 return (error);
2868
2869 cr = CRED();
2870
2871 error = nfs4_statfs_otw(vp, sbp, cr);
2872 if (!error) {
2873 (void) strncpy(sbp->f_basetype,
2874 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
2875 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
2876 } else {
2877 nfs4_purge_stale_fh(error, vp, cr);
2878 }
2879
2880 VN_RELE(vp);
2881
2882 return (error);
2883 }
2884
2885 static kmutex_t nfs4_syncbusy;
2886
2887 /*
2888 * Flush dirty nfs files for file system vfsp.
2889 * If vfsp == NULL, all nfs files are flushed.
2890 *
2891 * SYNC_CLOSE in flag is passed to us to
2892 * indicate that we are shutting down and or
2893 * rebooting.
2894 */
2895 static int
nfs4_sync(vfs_t * vfsp,short flag,cred_t * cr)2896 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr)
2897 {
2898 /*
2899 * Cross-zone calls are OK here, since this translates to a
2900 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
2901 */
2902 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) {
2903 r4flush(vfsp, cr);
2904 mutex_exit(&nfs4_syncbusy);
2905 }
2906
2907 /*
2908 * if SYNC_CLOSE is set then we know that
2909 * the system is rebooting, mark the mntinfo
2910 * for later examination.
2911 */
2912 if (vfsp && (flag & SYNC_CLOSE)) {
2913 mntinfo4_t *mi;
2914
2915 mi = VFTOMI4(vfsp);
2916 if (!(mi->mi_flags & MI4_SHUTDOWN)) {
2917 mutex_enter(&mi->mi_lock);
2918 mi->mi_flags |= MI4_SHUTDOWN;
2919 mutex_exit(&mi->mi_lock);
2920 }
2921 }
2922 return (0);
2923 }
2924
2925 /*
2926 * vget is difficult, if not impossible, to support in v4 because we don't
2927 * know the parent directory or name, which makes it impossible to create a
2928 * useful shadow vnode. And we need the shadow vnode for things like
2929 * OPEN.
2930 */
2931
2932 /* ARGSUSED */
2933 /*
2934 * XXX Check nfs4_vget_pseudo() for dependency.
2935 */
2936 static int
nfs4_vget(vfs_t * vfsp,vnode_t ** vpp,fid_t * fidp)2937 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
2938 {
2939 return (EREMOTE);
2940 }
2941
2942 /*
2943 * nfs4_mountroot get called in the case where we are diskless booting. All
2944 * we need from here is the ability to get the server info and from there we
2945 * can simply call nfs4_rootvp.
2946 */
2947 /* ARGSUSED */
2948 static int
nfs4_mountroot(vfs_t * vfsp,whymountroot_t why)2949 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why)
2950 {
2951 vnode_t *rtvp;
2952 char root_hostname[SYS_NMLN+1];
2953 struct servinfo4 *svp;
2954 int error;
2955 int vfsflags;
2956 size_t size;
2957 char *root_path;
2958 struct pathname pn;
2959 char *name;
2960 cred_t *cr;
2961 mntinfo4_t *mi;
2962 struct nfs_args args; /* nfs mount arguments */
2963 static char token[10];
2964 nfs4_error_t n4e;
2965
2966 bzero(&args, sizeof (args));
2967
2968 /* do this BEFORE getfile which causes xid stamps to be initialized */
2969 clkset(-1L); /* hack for now - until we get time svc? */
2970
2971 if (why == ROOT_REMOUNT) {
2972 /*
2973 * Shouldn't happen.
2974 */
2975 panic("nfs4_mountroot: why == ROOT_REMOUNT");
2976 }
2977
2978 if (why == ROOT_UNMOUNT) {
2979 /*
2980 * Nothing to do for NFS.
2981 */
2982 return (0);
2983 }
2984
2985 /*
2986 * why == ROOT_INIT
2987 */
2988
2989 name = token;
2990 *name = 0;
2991 (void) getfsname("root", name, sizeof (token));
2992
2993 pn_alloc(&pn);
2994 root_path = pn.pn_path;
2995
2996 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
2997 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
2998 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
2999 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
3000 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
3001
3002 /*
3003 * Get server address
3004 * Get the root path
3005 * Get server's transport
3006 * Get server's hostname
3007 * Get options
3008 */
3009 args.addr = &svp->sv_addr;
3010 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
3011 args.fh = (char *)&svp->sv_fhandle;
3012 args.knconf = svp->sv_knconf;
3013 args.hostname = root_hostname;
3014 vfsflags = 0;
3015 if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
3016 &args, &vfsflags)) {
3017 if (error == EPROTONOSUPPORT)
3018 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: "
3019 "mount_root failed: server doesn't support NFS V4");
3020 else
3021 nfs_cmn_err(error, CE_WARN,
3022 "nfs4_mountroot: mount_root failed: %m");
3023 nfs_rw_exit(&svp->sv_lock);
3024 sv4_free(svp);
3025 pn_free(&pn);
3026 return (error);
3027 }
3028 nfs_rw_exit(&svp->sv_lock);
3029 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
3030 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
3031 (void) strcpy(svp->sv_hostname, root_hostname);
3032
3033 svp->sv_pathlen = (int)(strlen(root_path) + 1);
3034 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
3035 (void) strcpy(svp->sv_path, root_path);
3036
3037 /*
3038 * Force root partition to always be mounted with AUTH_UNIX for now
3039 */
3040 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
3041 svp->sv_secdata->secmod = AUTH_UNIX;
3042 svp->sv_secdata->rpcflavor = AUTH_UNIX;
3043 svp->sv_secdata->data = NULL;
3044
3045 cr = crgetcred();
3046 rtvp = NULL;
3047
3048 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
3049
3050 if (error) {
3051 crfree(cr);
3052 pn_free(&pn);
3053 sv4_free(svp);
3054 return (error);
3055 }
3056
3057 mi = VTOMI4(rtvp);
3058
3059 /*
3060 * Send client id to the server, if necessary
3061 */
3062 nfs4_error_zinit(&n4e);
3063 nfs4setclientid(mi, cr, FALSE, &n4e);
3064 error = n4e.error;
3065
3066 crfree(cr);
3067
3068 if (error) {
3069 pn_free(&pn);
3070 goto errout;
3071 }
3072
3073 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args);
3074 if (error) {
3075 nfs_cmn_err(error, CE_WARN,
3076 "nfs4_mountroot: invalid root mount options");
3077 pn_free(&pn);
3078 goto errout;
3079 }
3080
3081 (void) vfs_lock_wait(vfsp);
3082 vfs_add(NULL, vfsp, vfsflags);
3083 vfs_unlock(vfsp);
3084
3085 size = strlen(svp->sv_hostname);
3086 (void) strcpy(rootfs.bo_name, svp->sv_hostname);
3087 rootfs.bo_name[size] = ':';
3088 (void) strcpy(&rootfs.bo_name[size + 1], root_path);
3089
3090 pn_free(&pn);
3091
3092 errout:
3093 if (error) {
3094 sv4_free(svp);
3095 nfs4_async_stop(vfsp);
3096 nfs4_async_manager_stop(vfsp);
3097 }
3098
3099 if (rtvp != NULL)
3100 VN_RELE(rtvp);
3101
3102 return (error);
3103 }
3104
3105 /*
3106 * Initialization routine for VFS routines. Should only be called once
3107 */
3108 int
nfs4_vfsinit(void)3109 nfs4_vfsinit(void)
3110 {
3111 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL);
3112 nfs4setclientid_init();
3113 nfs4_ephemeral_init();
3114 return (0);
3115 }
3116
3117 void
nfs4_vfsfini(void)3118 nfs4_vfsfini(void)
3119 {
3120 nfs4_ephemeral_fini();
3121 nfs4setclientid_fini();
3122 mutex_destroy(&nfs4_syncbusy);
3123 }
3124
3125 void
nfs4_freevfs(vfs_t * vfsp)3126 nfs4_freevfs(vfs_t *vfsp)
3127 {
3128 mntinfo4_t *mi;
3129
3130 /* need to release the initial hold */
3131 mi = VFTOMI4(vfsp);
3132
3133 /*
3134 * At this point, we can no longer reference the vfs
3135 * and need to inform other holders of the reference
3136 * to the mntinfo4_t.
3137 */
3138 mi->mi_vfsp = NULL;
3139
3140 MI4_RELE(mi);
3141 }
3142
3143 /*
3144 * Client side SETCLIENTID and SETCLIENTID_CONFIRM
3145 */
3146 struct nfs4_server nfs4_server_lst =
3147 { &nfs4_server_lst, &nfs4_server_lst };
3148
3149 kmutex_t nfs4_server_lst_lock;
3150
3151 static void
nfs4setclientid_init(void)3152 nfs4setclientid_init(void)
3153 {
3154 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL);
3155 }
3156
3157 static void
nfs4setclientid_fini(void)3158 nfs4setclientid_fini(void)
3159 {
3160 mutex_destroy(&nfs4_server_lst_lock);
3161 }
3162
3163 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY;
3164 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES;
3165
3166 /*
3167 * Set the clientid for the server for "mi". No-op if the clientid is
3168 * already set.
3169 *
3170 * The recovery boolean should be set to TRUE if this function was called
3171 * by the recovery code, and FALSE otherwise. This is used to determine
3172 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock
3173 * for adding a mntinfo4_t to a nfs4_server_t.
3174 *
3175 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then
3176 * 'n4ep->error' is set to geterrno4(n4ep->stat).
3177 */
3178 void
nfs4setclientid(mntinfo4_t * mi,cred_t * cr,bool_t recovery,nfs4_error_t * n4ep)3179 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep)
3180 {
3181 struct nfs4_server *np;
3182 struct servinfo4 *svp = mi->mi_curr_serv;
3183 nfs4_recov_state_t recov_state;
3184 int num_retries = 0;
3185 bool_t retry;
3186 cred_t *lcr = NULL;
3187 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */
3188 time_t lease_time = 0;
3189
3190 recov_state.rs_flags = 0;
3191 recov_state.rs_num_retry_despite_err = 0;
3192 ASSERT(n4ep != NULL);
3193
3194 recov_retry:
3195 retry = FALSE;
3196 nfs4_error_zinit(n4ep);
3197 if (!recovery)
3198 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
3199
3200 mutex_enter(&nfs4_server_lst_lock);
3201 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */
3202 mutex_exit(&nfs4_server_lst_lock);
3203 if (!np) {
3204 struct nfs4_server *tnp;
3205 np = new_nfs4_server(svp, cr);
3206 mutex_enter(&np->s_lock);
3207
3208 mutex_enter(&nfs4_server_lst_lock);
3209 tnp = servinfo4_to_nfs4_server(svp);
3210 if (tnp) {
3211 /*
3212 * another thread snuck in and put server on list.
3213 * since we aren't adding it to the nfs4_server_list
3214 * we need to set the ref count to 0 and destroy it.
3215 */
3216 np->s_refcnt = 0;
3217 destroy_nfs4_server(np);
3218 np = tnp;
3219 } else {
3220 /*
3221 * do not give list a reference until everything
3222 * succeeds
3223 */
3224 insque(np, &nfs4_server_lst);
3225 }
3226 mutex_exit(&nfs4_server_lst_lock);
3227 }
3228 ASSERT(MUTEX_HELD(&np->s_lock));
3229 /*
3230 * If we find the server already has N4S_CLIENTID_SET, then
3231 * just return, we've already done SETCLIENTID to that server
3232 */
3233 if (np->s_flags & N4S_CLIENTID_SET) {
3234 /* add mi to np's mntinfo4_list */
3235 nfs4_add_mi_to_server(np, mi);
3236 if (!recovery)
3237 nfs_rw_exit(&mi->mi_recovlock);
3238 mutex_exit(&np->s_lock);
3239 nfs4_server_rele(np);
3240 return;
3241 }
3242 mutex_exit(&np->s_lock);
3243
3244
3245 /*
3246 * Drop the mi_recovlock since nfs4_start_op will
3247 * acquire it again for us.
3248 */
3249 if (!recovery) {
3250 nfs_rw_exit(&mi->mi_recovlock);
3251
3252 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state);
3253 if (n4ep->error) {
3254 nfs4_server_rele(np);
3255 return;
3256 }
3257 }
3258
3259 mutex_enter(&np->s_lock);
3260 while (np->s_flags & N4S_CLIENTID_PEND) {
3261 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) {
3262 mutex_exit(&np->s_lock);
3263 nfs4_server_rele(np);
3264 if (!recovery)
3265 nfs4_end_op(mi, NULL, NULL, &recov_state,
3266 recovery);
3267 n4ep->error = EINTR;
3268 return;
3269 }
3270 }
3271
3272 if (np->s_flags & N4S_CLIENTID_SET) {
3273 /* XXX copied/pasted from above */
3274 /* add mi to np's mntinfo4_list */
3275 nfs4_add_mi_to_server(np, mi);
3276 mutex_exit(&np->s_lock);
3277 nfs4_server_rele(np);
3278 if (!recovery)
3279 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery);
3280 return;
3281 }
3282
3283 /*
3284 * Reset the N4S_CB_PINGED flag. This is used to
3285 * indicate if we have received a CB_NULL from the
3286 * server. Also we reset the waiter flag.
3287 */
3288 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER);
3289 /* any failure must now clear this flag */
3290 np->s_flags |= N4S_CLIENTID_PEND;
3291 mutex_exit(&np->s_lock);
3292 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse);
3293
3294 if (n4ep->error == EACCES) {
3295 /*
3296 * If the uid is set then set the creds for secure mounts
3297 * by proxy processes such as automountd.
3298 */
3299 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
3300 if (svp->sv_secdata->uid != 0) {
3301 lcr = crdup(cr);
3302 (void) crsetugid(lcr, svp->sv_secdata->uid,
3303 crgetgid(cr));
3304 }
3305 nfs_rw_exit(&svp->sv_lock);
3306
3307 if (lcr != NULL) {
3308 mutex_enter(&np->s_lock);
3309 crfree(np->s_cred);
3310 np->s_cred = lcr;
3311 mutex_exit(&np->s_lock);
3312 nfs4setclientid_otw(mi, svp, lcr, np, n4ep,
3313 &retry_inuse);
3314 }
3315 }
3316 mutex_enter(&np->s_lock);
3317 lease_time = np->s_lease_time;
3318 np->s_flags &= ~N4S_CLIENTID_PEND;
3319 mutex_exit(&np->s_lock);
3320
3321 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) {
3322 /*
3323 * Start recovery if failover is a possibility. If
3324 * invoked by the recovery thread itself, then just
3325 * return and let it handle the failover first. NB:
3326 * recovery is not allowed if the mount is in progress
3327 * since the infrastructure is not sufficiently setup
3328 * to allow it. Just return the error (after suitable
3329 * retries).
3330 */
3331 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) {
3332 (void) nfs4_start_recovery(n4ep, mi, NULL,
3333 NULL, NULL, NULL, OP_SETCLIENTID, NULL, NULL, NULL);
3334 /*
3335 * Don't retry here, just return and let
3336 * recovery take over.
3337 */
3338 if (recovery)
3339 retry = FALSE;
3340 } else if (nfs4_rpc_retry_error(n4ep->error) ||
3341 n4ep->stat == NFS4ERR_RESOURCE ||
3342 n4ep->stat == NFS4ERR_STALE_CLIENTID) {
3343
3344 retry = TRUE;
3345 /*
3346 * Always retry if in recovery or once had
3347 * contact with the server (but now it's
3348 * overloaded).
3349 */
3350 if (recovery == TRUE ||
3351 n4ep->error == ETIMEDOUT ||
3352 n4ep->error == ECONNRESET)
3353 num_retries = 0;
3354 } else if (retry_inuse && n4ep->error == 0 &&
3355 n4ep->stat == NFS4ERR_CLID_INUSE) {
3356 retry = TRUE;
3357 num_retries = 0;
3358 }
3359 } else {
3360 /*
3361 * Since everything succeeded give the list a reference count if
3362 * it hasn't been given one by add_new_nfs4_server() or if this
3363 * is not a recovery situation in which case it is already on
3364 * the list.
3365 */
3366 mutex_enter(&np->s_lock);
3367 if ((np->s_flags & N4S_INSERTED) == 0) {
3368 np->s_refcnt++;
3369 np->s_flags |= N4S_INSERTED;
3370 }
3371 mutex_exit(&np->s_lock);
3372 }
3373
3374 if (!recovery)
3375 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery);
3376
3377
3378 if (retry && num_retries++ < nfs4_num_sclid_retries) {
3379 if (retry_inuse) {
3380 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay));
3381 retry_inuse = 0;
3382 } else
3383 delay(SEC_TO_TICK(nfs4_retry_sclid_delay));
3384
3385 nfs4_server_rele(np);
3386 goto recov_retry;
3387 }
3388
3389
3390 if (n4ep->error == 0)
3391 n4ep->error = geterrno4(n4ep->stat);
3392
3393 /* broadcast before release in case no other threads are waiting */
3394 cv_broadcast(&np->s_clientid_pend);
3395 nfs4_server_rele(np);
3396 }
3397
3398 int nfs4setclientid_otw_debug = 0;
3399
3400 /*
3401 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM,
3402 * but nothing else; the calling function must be designed to handle those
3403 * other errors.
3404 */
3405 static void
nfs4setclientid_otw(mntinfo4_t * mi,struct servinfo4 * svp,cred_t * cr,struct nfs4_server * np,nfs4_error_t * ep,int * retry_inusep)3406 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr,
3407 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep)
3408 {
3409 COMPOUND4args_clnt args;
3410 COMPOUND4res_clnt res;
3411 nfs_argop4 argop[3];
3412 SETCLIENTID4args *s_args;
3413 SETCLIENTID4resok *s_resok;
3414 int doqueue = 1;
3415 nfs4_ga_res_t *garp = NULL;
3416 timespec_t prop_time, after_time;
3417 verifier4 verf;
3418 clientid4 tmp_clientid;
3419
3420 ASSERT(!MUTEX_HELD(&np->s_lock));
3421
3422 args.ctag = TAG_SETCLIENTID;
3423
3424 args.array = argop;
3425 args.array_len = 3;
3426
3427 /* PUTROOTFH */
3428 argop[0].argop = OP_PUTROOTFH;
3429
3430 /* GETATTR */
3431 argop[1].argop = OP_GETATTR;
3432 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK;
3433 argop[1].nfs_argop4_u.opgetattr.mi = mi;
3434
3435 /* SETCLIENTID */
3436 argop[2].argop = OP_SETCLIENTID;
3437
3438 s_args = &argop[2].nfs_argop4_u.opsetclientid;
3439
3440 mutex_enter(&np->s_lock);
3441
3442 s_args->client.verifier = np->clidtosend.verifier;
3443 s_args->client.id_len = np->clidtosend.id_len;
3444 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT);
3445 s_args->client.id_val = np->clidtosend.id_val;
3446
3447 /*
3448 * Callback needs to happen on non-RDMA transport
3449 * Check if we have saved the original knetconfig
3450 * if so, use that instead.
3451 */
3452 if (svp->sv_origknconf != NULL)
3453 nfs4_cb_args(np, svp->sv_origknconf, s_args);
3454 else
3455 nfs4_cb_args(np, svp->sv_knconf, s_args);
3456
3457 mutex_exit(&np->s_lock);
3458
3459 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
3460
3461 if (ep->error)
3462 return;
3463
3464 /* getattr lease_time res */
3465 if ((res.array_len >= 2) &&
3466 (res.array[1].nfs_resop4_u.opgetattr.status == NFS4_OK)) {
3467 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
3468
3469 #ifndef _LP64
3470 /*
3471 * The 32 bit client cannot handle a lease time greater than
3472 * (INT32_MAX/1000000). This is due to the use of the
3473 * lease_time in calls to drv_usectohz() in
3474 * nfs4_renew_lease_thread(). The problem is that
3475 * drv_usectohz() takes a time_t (which is just a long = 4
3476 * bytes) as its parameter. The lease_time is multiplied by
3477 * 1000000 to convert seconds to usecs for the parameter. If
3478 * a number bigger than (INT32_MAX/1000000) is used then we
3479 * overflow on the 32bit client.
3480 */
3481 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) {
3482 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000;
3483 }
3484 #endif
3485
3486 mutex_enter(&np->s_lock);
3487 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime;
3488
3489 /*
3490 * Keep track of the lease period for the mi's
3491 * mi_msg_list. We need an appropiate time
3492 * bound to associate past facts with a current
3493 * event. The lease period is perfect for this.
3494 */
3495 mutex_enter(&mi->mi_msg_list_lock);
3496 mi->mi_lease_period = np->s_lease_time;
3497 mutex_exit(&mi->mi_msg_list_lock);
3498 mutex_exit(&np->s_lock);
3499 }
3500
3501
3502 if (res.status == NFS4ERR_CLID_INUSE) {
3503 clientaddr4 *clid_inuse;
3504
3505 if (!(*retry_inusep)) {
3506 clid_inuse = &res.array->nfs_resop4_u.
3507 opsetclientid.SETCLIENTID4res_u.client_using;
3508
3509 zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3510 "NFS4 mount (SETCLIENTID failed)."
3511 " nfs4_client_id.id is in"
3512 "use already by: r_netid<%s> r_addr<%s>",
3513 clid_inuse->r_netid, clid_inuse->r_addr);
3514 }
3515
3516 /*
3517 * XXX - The client should be more robust in its
3518 * handling of clientid in use errors (regen another
3519 * clientid and try again?)
3520 */
3521 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3522 return;
3523 }
3524
3525 if (res.status) {
3526 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3527 return;
3528 }
3529
3530 s_resok = &res.array[2].nfs_resop4_u.
3531 opsetclientid.SETCLIENTID4res_u.resok4;
3532
3533 tmp_clientid = s_resok->clientid;
3534
3535 verf = s_resok->setclientid_confirm;
3536
3537 #ifdef DEBUG
3538 if (nfs4setclientid_otw_debug) {
3539 union {
3540 clientid4 clientid;
3541 int foo[2];
3542 } cid;
3543
3544 cid.clientid = s_resok->clientid;
3545
3546 zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3547 "nfs4setclientid_otw: OK, clientid = %x,%x, "
3548 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf);
3549 }
3550 #endif
3551
3552 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3553
3554 /* Confirm the client id and get the lease_time attribute */
3555
3556 args.ctag = TAG_SETCLIENTID_CF;
3557
3558 args.array = argop;
3559 args.array_len = 1;
3560
3561 argop[0].argop = OP_SETCLIENTID_CONFIRM;
3562
3563 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid;
3564 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf;
3565
3566 /* used to figure out RTT for np */
3567 gethrestime(&prop_time);
3568
3569 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: "
3570 "start time: %ld sec %ld nsec", prop_time.tv_sec,
3571 prop_time.tv_nsec));
3572
3573 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
3574
3575 gethrestime(&after_time);
3576 mutex_enter(&np->s_lock);
3577 np->propagation_delay.tv_sec =
3578 MAX(1, after_time.tv_sec - prop_time.tv_sec);
3579 mutex_exit(&np->s_lock);
3580
3581 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: "
3582 "finish time: %ld sec ", after_time.tv_sec));
3583
3584 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: "
3585 "propagation delay set to %ld sec",
3586 np->propagation_delay.tv_sec));
3587
3588 if (ep->error)
3589 return;
3590
3591 if (res.status == NFS4ERR_CLID_INUSE) {
3592 clientaddr4 *clid_inuse;
3593
3594 if (!(*retry_inusep)) {
3595 clid_inuse = &res.array->nfs_resop4_u.
3596 opsetclientid.SETCLIENTID4res_u.client_using;
3597
3598 zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3599 "SETCLIENTID_CONFIRM failed. "
3600 "nfs4_client_id.id is in use already by: "
3601 "r_netid<%s> r_addr<%s>",
3602 clid_inuse->r_netid, clid_inuse->r_addr);
3603 }
3604
3605 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3606 return;
3607 }
3608
3609 if (res.status) {
3610 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3611 return;
3612 }
3613
3614 mutex_enter(&np->s_lock);
3615 np->clientid = tmp_clientid;
3616 np->s_flags |= N4S_CLIENTID_SET;
3617
3618 /* Add mi to np's mntinfo4 list */
3619 nfs4_add_mi_to_server(np, mi);
3620
3621 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) {
3622 /*
3623 * Start lease management thread.
3624 * Keep trying until we succeed.
3625 */
3626
3627 np->s_refcnt++; /* pass reference to thread */
3628 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0,
3629 minclsyspri);
3630 }
3631 mutex_exit(&np->s_lock);
3632
3633 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3634 }
3635
3636 /*
3637 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes
3638 * mi's clientid the same as sp's.
3639 * Assumes sp is locked down.
3640 */
3641 void
nfs4_add_mi_to_server(nfs4_server_t * sp,mntinfo4_t * mi)3642 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi)
3643 {
3644 mntinfo4_t *tmi;
3645 int in_list = 0;
3646
3647 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
3648 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
3649 ASSERT(sp != &nfs4_server_lst);
3650 ASSERT(MUTEX_HELD(&sp->s_lock));
3651
3652 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3653 "nfs4_add_mi_to_server: add mi %p to sp %p",
3654 (void*)mi, (void*)sp));
3655
3656 for (tmi = sp->mntinfo4_list;
3657 tmi != NULL;
3658 tmi = tmi->mi_clientid_next) {
3659 if (tmi == mi) {
3660 NFS4_DEBUG(nfs4_client_lease_debug,
3661 (CE_NOTE,
3662 "nfs4_add_mi_to_server: mi in list"));
3663 in_list = 1;
3664 }
3665 }
3666
3667 /*
3668 * First put a hold on the mntinfo4's vfsp so that references via
3669 * mntinfo4_list will be valid.
3670 */
3671 if (!in_list)
3672 VFS_HOLD(mi->mi_vfsp);
3673
3674 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: "
3675 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi));
3676
3677 if (!in_list) {
3678 if (sp->mntinfo4_list)
3679 sp->mntinfo4_list->mi_clientid_prev = mi;
3680 mi->mi_clientid_next = sp->mntinfo4_list;
3681 mi->mi_srv = sp;
3682 sp->mntinfo4_list = mi;
3683 mi->mi_srvsettime = gethrestime_sec();
3684 mi->mi_srvset_cnt++;
3685 }
3686
3687 /* set mi's clientid to that of sp's for later matching */
3688 mi->mi_clientid = sp->clientid;
3689
3690 /*
3691 * Update the clientid for any other mi's belonging to sp. This
3692 * must be done here while we hold sp->s_lock, so that
3693 * find_nfs4_server() continues to work.
3694 */
3695
3696 for (tmi = sp->mntinfo4_list;
3697 tmi != NULL;
3698 tmi = tmi->mi_clientid_next) {
3699 if (tmi != mi) {
3700 tmi->mi_clientid = sp->clientid;
3701 }
3702 }
3703 }
3704
3705 /*
3706 * Remove the mi from sp's mntinfo4_list and release its reference.
3707 * Exception: if mi still has open files, flag it for later removal (when
3708 * all the files are closed).
3709 *
3710 * If this is the last mntinfo4 in sp's list then tell the lease renewal
3711 * thread to exit.
3712 */
3713 static void
nfs4_remove_mi_from_server_nolock(mntinfo4_t * mi,nfs4_server_t * sp)3714 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp)
3715 {
3716 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3717 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p",
3718 (void*)mi, (void*)sp));
3719
3720 ASSERT(sp != NULL);
3721 ASSERT(MUTEX_HELD(&sp->s_lock));
3722 ASSERT(mi->mi_open_files >= 0);
3723
3724 /*
3725 * First make sure this mntinfo4 can be taken off of the list,
3726 * ie: it doesn't have any open files remaining.
3727 */
3728 if (mi->mi_open_files > 0) {
3729 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3730 "nfs4_remove_mi_from_server_nolock: don't "
3731 "remove mi since it still has files open"));
3732
3733 mutex_enter(&mi->mi_lock);
3734 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE;
3735 mutex_exit(&mi->mi_lock);
3736 return;
3737 }
3738
3739 VFS_HOLD(mi->mi_vfsp);
3740 remove_mi(sp, mi);
3741 VFS_RELE(mi->mi_vfsp);
3742
3743 if (sp->mntinfo4_list == NULL) {
3744 /* last fs unmounted, kill the thread */
3745 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3746 "remove_mi_from_nfs4_server_nolock: kill the thread"));
3747 nfs4_mark_srv_dead(sp);
3748 }
3749 }
3750
3751 /*
3752 * Remove mi from sp's mntinfo4_list and release the vfs reference.
3753 */
3754 static void
remove_mi(nfs4_server_t * sp,mntinfo4_t * mi)3755 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi)
3756 {
3757 ASSERT(MUTEX_HELD(&sp->s_lock));
3758
3759 /*
3760 * We release a reference, and the caller must still have a
3761 * reference.
3762 */
3763 ASSERT(mi->mi_vfsp->vfs_count >= 2);
3764
3765 if (mi->mi_clientid_prev) {
3766 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next;
3767 } else {
3768 /* This is the first mi in sp's mntinfo4_list */
3769 /*
3770 * Make sure the first mntinfo4 in the list is the actual
3771 * mntinfo4 passed in.
3772 */
3773 ASSERT(sp->mntinfo4_list == mi);
3774
3775 sp->mntinfo4_list = mi->mi_clientid_next;
3776 }
3777 if (mi->mi_clientid_next)
3778 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev;
3779
3780 /* Now mark the mntinfo4's links as being removed */
3781 mi->mi_clientid_prev = mi->mi_clientid_next = NULL;
3782 mi->mi_srv = NULL;
3783 mi->mi_srvset_cnt++;
3784
3785 VFS_RELE(mi->mi_vfsp);
3786 }
3787
3788 /*
3789 * Free all the entries in sp's mntinfo4_list.
3790 */
3791 static void
remove_all_mi(nfs4_server_t * sp)3792 remove_all_mi(nfs4_server_t *sp)
3793 {
3794 mntinfo4_t *mi;
3795
3796 ASSERT(MUTEX_HELD(&sp->s_lock));
3797
3798 while (sp->mntinfo4_list != NULL) {
3799 mi = sp->mntinfo4_list;
3800 /*
3801 * Grab a reference in case there is only one left (which
3802 * remove_mi() frees).
3803 */
3804 VFS_HOLD(mi->mi_vfsp);
3805 remove_mi(sp, mi);
3806 VFS_RELE(mi->mi_vfsp);
3807 }
3808 }
3809
3810 /*
3811 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs.
3812 *
3813 * This version can be called with a null nfs4_server_t arg,
3814 * and will either find the right one and handle locking, or
3815 * do nothing because the mi wasn't added to an sp's mntinfo4_list.
3816 */
3817 void
nfs4_remove_mi_from_server(mntinfo4_t * mi,nfs4_server_t * esp)3818 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp)
3819 {
3820 nfs4_server_t *sp;
3821
3822 if (esp) {
3823 nfs4_remove_mi_from_server_nolock(mi, esp);
3824 return;
3825 }
3826
3827 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
3828 if (sp = find_nfs4_server_all(mi, 1)) {
3829 nfs4_remove_mi_from_server_nolock(mi, sp);
3830 mutex_exit(&sp->s_lock);
3831 nfs4_server_rele(sp);
3832 }
3833 nfs_rw_exit(&mi->mi_recovlock);
3834 }
3835
3836 /*
3837 * Return TRUE if the given server has any non-unmounted filesystems.
3838 */
3839
3840 bool_t
nfs4_fs_active(nfs4_server_t * sp)3841 nfs4_fs_active(nfs4_server_t *sp)
3842 {
3843 mntinfo4_t *mi;
3844
3845 ASSERT(MUTEX_HELD(&sp->s_lock));
3846
3847 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) {
3848 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
3849 return (TRUE);
3850 }
3851
3852 return (FALSE);
3853 }
3854
3855 /*
3856 * Mark sp as finished and notify any waiters.
3857 */
3858
3859 void
nfs4_mark_srv_dead(nfs4_server_t * sp)3860 nfs4_mark_srv_dead(nfs4_server_t *sp)
3861 {
3862 ASSERT(MUTEX_HELD(&sp->s_lock));
3863
3864 sp->s_thread_exit = NFS4_THREAD_EXIT;
3865 cv_broadcast(&sp->cv_thread_exit);
3866 }
3867
3868 /*
3869 * Create a new nfs4_server_t structure.
3870 * Returns new node unlocked and not in list, but with a reference count of
3871 * 1.
3872 */
3873 struct nfs4_server *
new_nfs4_server(struct servinfo4 * svp,cred_t * cr)3874 new_nfs4_server(struct servinfo4 *svp, cred_t *cr)
3875 {
3876 struct nfs4_server *np;
3877 timespec_t tt;
3878 union {
3879 struct {
3880 uint32_t sec;
3881 uint32_t subsec;
3882 } un_curtime;
3883 verifier4 un_verifier;
3884 } nfs4clientid_verifier;
3885 /*
3886 * We change this ID string carefully and with the Solaris
3887 * NFS server behaviour in mind. "+referrals" indicates
3888 * a client that can handle an NFSv4 referral.
3889 */
3890 char id_val[] = "Solaris: %s, NFSv4 kernel client +referrals";
3891 int len;
3892
3893 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP);
3894 np->saddr.len = svp->sv_addr.len;
3895 np->saddr.maxlen = svp->sv_addr.maxlen;
3896 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP);
3897 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len);
3898 np->s_refcnt = 1;
3899
3900 /*
3901 * Build the nfs_client_id4 for this server mount. Ensure
3902 * the verifier is useful and that the identification is
3903 * somehow based on the server's address for the case of
3904 * multi-homed servers.
3905 */
3906 nfs4clientid_verifier.un_verifier = 0;
3907 gethrestime(&tt);
3908 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec;
3909 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec;
3910 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier;
3911
3912 /*
3913 * calculate the length of the opaque identifier. Subtract 2
3914 * for the "%s" and add the traditional +1 for null
3915 * termination.
3916 */
3917 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1;
3918 np->clidtosend.id_len = len + np->saddr.maxlen;
3919
3920 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP);
3921 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename());
3922 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len);
3923
3924 np->s_flags = 0;
3925 np->mntinfo4_list = NULL;
3926 /* save cred for issuing rfs4calls inside the renew thread */
3927 crhold(cr);
3928 np->s_cred = cr;
3929 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL);
3930 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL);
3931 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL);
3932 list_create(&np->s_deleg_list, sizeof (rnode4_t),
3933 offsetof(rnode4_t, r_deleg_link));
3934 np->s_thread_exit = 0;
3935 np->state_ref_count = 0;
3936 np->lease_valid = NFS4_LEASE_NOT_STARTED;
3937 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL);
3938 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL);
3939 np->s_otw_call_count = 0;
3940 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL);
3941 np->zoneid = getzoneid();
3942 np->zone_globals = nfs4_get_callback_globals();
3943 ASSERT(np->zone_globals != NULL);
3944 return (np);
3945 }
3946
3947 /*
3948 * Create a new nfs4_server_t structure and add it to the list.
3949 * Returns new node locked; reference must eventually be freed.
3950 */
3951 static struct nfs4_server *
add_new_nfs4_server(struct servinfo4 * svp,cred_t * cr)3952 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr)
3953 {
3954 nfs4_server_t *sp;
3955
3956 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock));
3957 sp = new_nfs4_server(svp, cr);
3958 mutex_enter(&sp->s_lock);
3959 insque(sp, &nfs4_server_lst);
3960 sp->s_refcnt++; /* list gets a reference */
3961 sp->s_flags |= N4S_INSERTED;
3962 sp->clientid = 0;
3963 return (sp);
3964 }
3965
3966 int nfs4_server_t_debug = 0;
3967
3968 #ifdef lint
3969 extern void
3970 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *);
3971 #endif
3972
3973 #ifndef lint
3974 #ifdef DEBUG
3975 void
dumpnfs4slist(char * txt,mntinfo4_t * mi,clientid4 clientid,servinfo4_t * srv_p)3976 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p)
3977 {
3978 int hash16(void *p, int len);
3979 nfs4_server_t *np;
3980
3981 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE,
3982 "dumping nfs4_server_t list in %s", txt));
3983 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3984 "mi 0x%p, want clientid %llx, addr %d/%04X",
3985 mi, (longlong_t)clientid, srv_p->sv_addr.len,
3986 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len)));
3987 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst;
3988 np = np->forw) {
3989 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3990 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d",
3991 np, (longlong_t)np->clientid, np->saddr.len,
3992 hash16((void *)np->saddr.buf, np->saddr.len),
3993 np->state_ref_count));
3994 if (np->saddr.len == srv_p->sv_addr.len &&
3995 bcmp(np->saddr.buf, srv_p->sv_addr.buf,
3996 np->saddr.len) == 0)
3997 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3998 " - address matches"));
3999 if (np->clientid == clientid || np->clientid == 0)
4000 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
4001 " - clientid matches"));
4002 if (np->s_thread_exit != NFS4_THREAD_EXIT)
4003 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
4004 " - thread not exiting"));
4005 }
4006 delay(hz);
4007 }
4008 #endif
4009 #endif
4010
4011
4012 /*
4013 * Move a mntinfo4_t from one server list to another.
4014 * Locking of the two nfs4_server_t nodes will be done in list order.
4015 *
4016 * Returns NULL if the current nfs4_server_t for the filesystem could not
4017 * be found (e.g., due to forced unmount). Otherwise returns a reference
4018 * to the new nfs4_server_t, which must eventually be freed.
4019 */
4020 nfs4_server_t *
nfs4_move_mi(mntinfo4_t * mi,servinfo4_t * old,servinfo4_t * new)4021 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new)
4022 {
4023 nfs4_server_t *p, *op = NULL, *np = NULL;
4024 int num_open;
4025 zoneid_t zoneid = nfs_zoneid();
4026
4027 ASSERT(nfs_zone() == mi->mi_zone);
4028
4029 mutex_enter(&nfs4_server_lst_lock);
4030 #ifdef DEBUG
4031 if (nfs4_server_t_debug)
4032 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new);
4033 #endif
4034 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) {
4035 if (p->zoneid != zoneid)
4036 continue;
4037 if (p->saddr.len == old->sv_addr.len &&
4038 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 &&
4039 p->s_thread_exit != NFS4_THREAD_EXIT) {
4040 op = p;
4041 mutex_enter(&op->s_lock);
4042 op->s_refcnt++;
4043 }
4044 if (p->saddr.len == new->sv_addr.len &&
4045 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 &&
4046 p->s_thread_exit != NFS4_THREAD_EXIT) {
4047 np = p;
4048 mutex_enter(&np->s_lock);
4049 }
4050 if (op != NULL && np != NULL)
4051 break;
4052 }
4053 if (op == NULL) {
4054 /*
4055 * Filesystem has been forcibly unmounted. Bail out.
4056 */
4057 if (np != NULL)
4058 mutex_exit(&np->s_lock);
4059 mutex_exit(&nfs4_server_lst_lock);
4060 return (NULL);
4061 }
4062 if (np != NULL) {
4063 np->s_refcnt++;
4064 } else {
4065 #ifdef DEBUG
4066 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4067 "nfs4_move_mi: no target nfs4_server, will create."));
4068 #endif
4069 np = add_new_nfs4_server(new, kcred);
4070 }
4071 mutex_exit(&nfs4_server_lst_lock);
4072
4073 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4074 "nfs4_move_mi: for mi 0x%p, "
4075 "old servinfo4 0x%p, new servinfo4 0x%p, "
4076 "old nfs4_server 0x%p, new nfs4_server 0x%p, ",
4077 (void*)mi, (void*)old, (void*)new,
4078 (void*)op, (void*)np));
4079 ASSERT(op != NULL && np != NULL);
4080
4081 /* discard any delegations */
4082 nfs4_deleg_discard(mi, op);
4083
4084 num_open = mi->mi_open_files;
4085 mi->mi_open_files = 0;
4086 op->state_ref_count -= num_open;
4087 ASSERT(op->state_ref_count >= 0);
4088 np->state_ref_count += num_open;
4089 nfs4_remove_mi_from_server_nolock(mi, op);
4090 mi->mi_open_files = num_open;
4091 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4092 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d",
4093 mi->mi_open_files, op->state_ref_count, np->state_ref_count));
4094
4095 nfs4_add_mi_to_server(np, mi);
4096
4097 mutex_exit(&op->s_lock);
4098 mutex_exit(&np->s_lock);
4099 nfs4_server_rele(op);
4100
4101 return (np);
4102 }
4103
4104 /*
4105 * Need to have the nfs4_server_lst_lock.
4106 * Search the nfs4_server list to find a match on this servinfo4
4107 * based on its address.
4108 *
4109 * Returns NULL if no match is found. Otherwise returns a reference (which
4110 * must eventually be freed) to a locked nfs4_server.
4111 */
4112 nfs4_server_t *
servinfo4_to_nfs4_server(servinfo4_t * srv_p)4113 servinfo4_to_nfs4_server(servinfo4_t *srv_p)
4114 {
4115 nfs4_server_t *np;
4116 zoneid_t zoneid = nfs_zoneid();
4117
4118 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock));
4119 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
4120 if (np->zoneid == zoneid &&
4121 np->saddr.len == srv_p->sv_addr.len &&
4122 bcmp(np->saddr.buf, srv_p->sv_addr.buf,
4123 np->saddr.len) == 0 &&
4124 np->s_thread_exit != NFS4_THREAD_EXIT) {
4125 mutex_enter(&np->s_lock);
4126 np->s_refcnt++;
4127 return (np);
4128 }
4129 }
4130 return (NULL);
4131 }
4132
4133 /*
4134 * Locks the nfs4_server down if it is found and returns a reference that
4135 * must eventually be freed.
4136 */
4137 static nfs4_server_t *
lookup_nfs4_server(nfs4_server_t * sp,int any_state)4138 lookup_nfs4_server(nfs4_server_t *sp, int any_state)
4139 {
4140 nfs4_server_t *np;
4141
4142 mutex_enter(&nfs4_server_lst_lock);
4143 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
4144 mutex_enter(&np->s_lock);
4145 if (np == sp && np->s_refcnt > 0 &&
4146 (np->s_thread_exit != NFS4_THREAD_EXIT || any_state)) {
4147 mutex_exit(&nfs4_server_lst_lock);
4148 np->s_refcnt++;
4149 return (np);
4150 }
4151 mutex_exit(&np->s_lock);
4152 }
4153 mutex_exit(&nfs4_server_lst_lock);
4154
4155 return (NULL);
4156 }
4157
4158 /*
4159 * The caller should be holding mi->mi_recovlock, and it should continue to
4160 * hold the lock until done with the returned nfs4_server_t. Once
4161 * mi->mi_recovlock is released, there is no guarantee that the returned
4162 * mi->nfs4_server_t will continue to correspond to mi.
4163 */
4164 nfs4_server_t *
find_nfs4_server(mntinfo4_t * mi)4165 find_nfs4_server(mntinfo4_t *mi)
4166 {
4167 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
4168 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
4169
4170 return (lookup_nfs4_server(mi->mi_srv, 0));
4171 }
4172
4173 /*
4174 * Same as above, but takes an "any_state" parameter which can be
4175 * set to 1 if the caller wishes to find nfs4_server_t's which
4176 * have been marked for termination by the exit of the renew
4177 * thread. This should only be used by operations which are
4178 * cleaning up and will not cause an OTW op.
4179 */
4180 nfs4_server_t *
find_nfs4_server_all(mntinfo4_t * mi,int any_state)4181 find_nfs4_server_all(mntinfo4_t *mi, int any_state)
4182 {
4183 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
4184 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
4185
4186 return (lookup_nfs4_server(mi->mi_srv, any_state));
4187 }
4188
4189 /*
4190 * Lock sp, but only if it's still active (in the list and hasn't been
4191 * flagged as exiting) or 'any_state' is non-zero.
4192 * Returns TRUE if sp got locked and adds a reference to sp.
4193 */
4194 bool_t
nfs4_server_vlock(nfs4_server_t * sp,int any_state)4195 nfs4_server_vlock(nfs4_server_t *sp, int any_state)
4196 {
4197 return (lookup_nfs4_server(sp, any_state) != NULL);
4198 }
4199
4200 /*
4201 * Release the reference to sp and destroy it if that's the last one.
4202 */
4203
4204 void
nfs4_server_rele(nfs4_server_t * sp)4205 nfs4_server_rele(nfs4_server_t *sp)
4206 {
4207 mutex_enter(&sp->s_lock);
4208 ASSERT(sp->s_refcnt > 0);
4209 sp->s_refcnt--;
4210 if (sp->s_refcnt > 0) {
4211 mutex_exit(&sp->s_lock);
4212 return;
4213 }
4214 mutex_exit(&sp->s_lock);
4215
4216 mutex_enter(&nfs4_server_lst_lock);
4217 mutex_enter(&sp->s_lock);
4218 if (sp->s_refcnt > 0) {
4219 mutex_exit(&sp->s_lock);
4220 mutex_exit(&nfs4_server_lst_lock);
4221 return;
4222 }
4223 remque(sp);
4224 sp->forw = sp->back = NULL;
4225 mutex_exit(&nfs4_server_lst_lock);
4226 destroy_nfs4_server(sp);
4227 }
4228
4229 static void
destroy_nfs4_server(nfs4_server_t * sp)4230 destroy_nfs4_server(nfs4_server_t *sp)
4231 {
4232 ASSERT(MUTEX_HELD(&sp->s_lock));
4233 ASSERT(sp->s_refcnt == 0);
4234 ASSERT(sp->s_otw_call_count == 0);
4235
4236 remove_all_mi(sp);
4237
4238 crfree(sp->s_cred);
4239 kmem_free(sp->saddr.buf, sp->saddr.maxlen);
4240 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len);
4241 mutex_exit(&sp->s_lock);
4242
4243 /* destroy the nfs4_server */
4244 nfs4callback_destroy(sp);
4245 list_destroy(&sp->s_deleg_list);
4246 mutex_destroy(&sp->s_lock);
4247 cv_destroy(&sp->cv_thread_exit);
4248 cv_destroy(&sp->s_cv_otw_count);
4249 cv_destroy(&sp->s_clientid_pend);
4250 cv_destroy(&sp->wait_cb_null);
4251 nfs_rw_destroy(&sp->s_recovlock);
4252 kmem_free(sp, sizeof (*sp));
4253 }
4254
4255 /*
4256 * Fork off a thread to free the data structures for a mount.
4257 */
4258
4259 static void
async_free_mount(vfs_t * vfsp,int flag,cred_t * cr)4260 async_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
4261 {
4262 freemountargs_t *args;
4263 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP);
4264 args->fm_vfsp = vfsp;
4265 VFS_HOLD(vfsp);
4266 MI4_HOLD(VFTOMI4(vfsp));
4267 args->fm_flag = flag;
4268 args->fm_cr = cr;
4269 crhold(cr);
4270 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0,
4271 minclsyspri);
4272 }
4273
4274 static void
nfs4_free_mount_thread(freemountargs_t * args)4275 nfs4_free_mount_thread(freemountargs_t *args)
4276 {
4277 mntinfo4_t *mi;
4278 nfs4_free_mount(args->fm_vfsp, args->fm_flag, args->fm_cr);
4279 mi = VFTOMI4(args->fm_vfsp);
4280 crfree(args->fm_cr);
4281 VFS_RELE(args->fm_vfsp);
4282 MI4_RELE(mi);
4283 kmem_free(args, sizeof (freemountargs_t));
4284 zthread_exit();
4285 /* NOTREACHED */
4286 }
4287
4288 /*
4289 * Thread to free the data structures for a given filesystem.
4290 */
4291 static void
nfs4_free_mount(vfs_t * vfsp,int flag,cred_t * cr)4292 nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
4293 {
4294 mntinfo4_t *mi = VFTOMI4(vfsp);
4295 nfs4_server_t *sp;
4296 callb_cpr_t cpr_info;
4297 kmutex_t cpr_lock;
4298 boolean_t async_thread;
4299 int removed;
4300
4301 bool_t must_unlock;
4302 nfs4_ephemeral_tree_t *eph_tree;
4303
4304 /*
4305 * We need to participate in the CPR framework if this is a kernel
4306 * thread.
4307 */
4308 async_thread = (curproc == nfs_zone()->zone_zsched);
4309 if (async_thread) {
4310 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
4311 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
4312 "nfsv4AsyncUnmount");
4313 }
4314
4315 /*
4316 * We need to wait for all outstanding OTW calls
4317 * and recovery to finish before we remove the mi
4318 * from the nfs4_server_t, as current pending
4319 * calls might still need this linkage (in order
4320 * to find a nfs4_server_t from a mntinfo4_t).
4321 */
4322 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
4323 sp = find_nfs4_server(mi);
4324 nfs_rw_exit(&mi->mi_recovlock);
4325
4326 if (sp) {
4327 while (sp->s_otw_call_count != 0) {
4328 if (async_thread) {
4329 mutex_enter(&cpr_lock);
4330 CALLB_CPR_SAFE_BEGIN(&cpr_info);
4331 mutex_exit(&cpr_lock);
4332 }
4333 cv_wait(&sp->s_cv_otw_count, &sp->s_lock);
4334 if (async_thread) {
4335 mutex_enter(&cpr_lock);
4336 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
4337 mutex_exit(&cpr_lock);
4338 }
4339 }
4340 mutex_exit(&sp->s_lock);
4341 nfs4_server_rele(sp);
4342 sp = NULL;
4343 }
4344
4345 mutex_enter(&mi->mi_lock);
4346 while (mi->mi_in_recovery != 0) {
4347 if (async_thread) {
4348 mutex_enter(&cpr_lock);
4349 CALLB_CPR_SAFE_BEGIN(&cpr_info);
4350 mutex_exit(&cpr_lock);
4351 }
4352 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock);
4353 if (async_thread) {
4354 mutex_enter(&cpr_lock);
4355 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
4356 mutex_exit(&cpr_lock);
4357 }
4358 }
4359 mutex_exit(&mi->mi_lock);
4360
4361 /*
4362 * If we got an error, then do not nuke the
4363 * tree. Either the harvester is busy reclaiming
4364 * this node or we ran into some busy condition.
4365 *
4366 * The harvester will eventually come along and cleanup.
4367 * The only problem would be the root mount point.
4368 *
4369 * Since the busy node can occur for a variety
4370 * of reasons and can result in an entry staying
4371 * in df output but no longer accessible from the
4372 * directory tree, we are okay.
4373 */
4374 if (!nfs4_ephemeral_umount(mi, flag, cr,
4375 &must_unlock, &eph_tree))
4376 nfs4_ephemeral_umount_activate(mi, &must_unlock,
4377 &eph_tree);
4378
4379 /*
4380 * The original purge of the dnlc via 'dounmount'
4381 * doesn't guarantee that another dnlc entry was not
4382 * added while we waitied for all outstanding OTW
4383 * and recovery calls to finish. So re-purge the
4384 * dnlc now.
4385 */
4386 (void) dnlc_purge_vfsp(vfsp, 0);
4387
4388 /*
4389 * We need to explicitly stop the manager thread; the asyc worker
4390 * threads can timeout and exit on their own.
4391 */
4392 mutex_enter(&mi->mi_async_lock);
4393 mi->mi_max_threads = 0;
4394 NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
4395 mutex_exit(&mi->mi_async_lock);
4396 if (mi->mi_manager_thread)
4397 nfs4_async_manager_stop(vfsp);
4398
4399 destroy_rtable4(vfsp, cr);
4400
4401 nfs4_remove_mi_from_server(mi, NULL);
4402
4403 if (async_thread) {
4404 mutex_enter(&cpr_lock);
4405 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */
4406 mutex_destroy(&cpr_lock);
4407 }
4408
4409 removed = nfs4_mi_zonelist_remove(mi);
4410 if (removed)
4411 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
4412 }
4413
4414 /* Referral related sub-routines */
4415
4416 /* Freeup knetconfig */
4417 static void
free_knconf_contents(struct knetconfig * k)4418 free_knconf_contents(struct knetconfig *k)
4419 {
4420 if (k == NULL)
4421 return;
4422 if (k->knc_protofmly)
4423 kmem_free(k->knc_protofmly, KNC_STRSIZE);
4424 if (k->knc_proto)
4425 kmem_free(k->knc_proto, KNC_STRSIZE);
4426 }
4427
4428 /*
4429 * This updates newpath variable with exact name component from the
4430 * path which gave us a NFS4ERR_MOVED error.
4431 * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned.
4432 */
4433 static char *
extract_referral_point(const char * svp,int nth)4434 extract_referral_point(const char *svp, int nth)
4435 {
4436 int num_slashes = 0;
4437 const char *p;
4438 char *newpath = NULL;
4439 int i = 0;
4440
4441 newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4442 for (p = svp; *p; p++) {
4443 if (*p == '/')
4444 num_slashes++;
4445 if (num_slashes == nth + 1) {
4446 p++;
4447 while (*p != '/') {
4448 if (*p == '\0')
4449 break;
4450 newpath[i] = *p;
4451 i++;
4452 p++;
4453 }
4454 newpath[i++] = '\0';
4455 break;
4456 }
4457 }
4458 return (newpath);
4459 }
4460
4461 /*
4462 * This sets up a new path in sv_path to do a lookup of the referral point.
4463 * If the path is /rp/aaa/bbb and the referral point is aaa,
4464 * this updates /rp/aaa. This path will be used to get referral
4465 * location.
4466 */
4467 static void
setup_newsvpath(servinfo4_t * svp,int nth)4468 setup_newsvpath(servinfo4_t *svp, int nth)
4469 {
4470 int num_slashes = 0, pathlen, i = 0;
4471 char *newpath, *p;
4472
4473 newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4474 for (p = svp->sv_path; *p; p++) {
4475 newpath[i] = *p;
4476 if (*p == '/')
4477 num_slashes++;
4478 if (num_slashes == nth + 1) {
4479 newpath[i] = '\0';
4480 pathlen = strlen(newpath) + 1;
4481 kmem_free(svp->sv_path, svp->sv_pathlen);
4482 svp->sv_path = kmem_alloc(pathlen, KM_SLEEP);
4483 svp->sv_pathlen = pathlen;
4484 bcopy(newpath, svp->sv_path, pathlen);
4485 break;
4486 }
4487 i++;
4488 }
4489 kmem_free(newpath, MAXPATHLEN);
4490 }
4491