xref: /titanic_50/usr/src/uts/common/fs/nfs/nfs_common.c (revision 910cba4f2f1e94daf355ee8635285732ac47326c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  *	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
28  *		All rights reserved.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/errno.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/user.h>
37 #include <sys/stat.h>
38 #include <sys/time.h>
39 #include <sys/utsname.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/pathname.h>
43 #include <sys/bootconf.h>
44 #include <fs/fs_subr.h>
45 #include <rpc/types.h>
46 #include <nfs/nfs.h>
47 #include <nfs/nfs4.h>
48 #include <nfs/nfs_clnt.h>
49 #include <nfs/rnode.h>
50 #include <nfs/mount.h>
51 #include <nfs/nfssys.h>
52 #include <sys/debug.h>
53 #include <sys/cmn_err.h>
54 #include <sys/file.h>
55 #include <sys/fcntl.h>
56 #include <sys/zone.h>
57 
58 /*
59  * This is the loadable module wrapper.
60  */
61 #include <sys/systm.h>
62 #include <sys/modctl.h>
63 #include <sys/syscall.h>
64 #include <sys/ddi.h>
65 
66 #include <rpc/types.h>
67 #include <rpc/auth.h>
68 #include <rpc/clnt.h>
69 #include <rpc/svc.h>
70 
71 /*
72  * The psuedo NFS filesystem to allow diskless booting to dynamically
73  * mount either a NFS V2, NFS V3, or NFS V4 filesystem.  This only implements
74  * the VFS_MOUNTROOT op and is only intended to be used by the
75  * diskless booting code until the real root filesystem is mounted.
76  * Nothing else should ever call this!
77  *
78  * The strategy is that if the initial rootfs type is set to "nfsdyn"
79  * by loadrootmodules() this filesystem is called to mount the
80  * root filesystem.  It first attempts to mount a V4 filesystem, and if that
81  * fails due to an RPC version mismatch it tries V3 and finally V2.
82  * Once the real mount succeeds the vfsops and rootfs name are changed
83  * to reflect the real filesystem type.
84  */
85 static int nfsdyninit(int, char *);
86 static int nfsdyn_mountroot(vfs_t *, whymountroot_t);
87 
88 vfsops_t *nfsdyn_vfsops;
89 
90 /*
91  * The following data structures are used to configure the NFS
92  * system call, the NFS Version 2 client VFS, and the NFS Version
93  * 3 client VFS into the system.  The NFS Version 4 structures are defined in
94  * nfs4_common.c
95  */
96 
97 /*
98  * The NFS system call.
99  */
100 static struct sysent nfssysent = {
101 	2,
102 	SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
103 	nfssys
104 };
105 
106 static struct modlsys modlsys = {
107 	&mod_syscallops,
108 	"NFS syscall, client, and common",
109 	&nfssysent
110 };
111 
112 #ifdef _SYSCALL32_IMPL
113 static struct modlsys modlsys32 = {
114 	&mod_syscallops32,
115 	"NFS syscall, client, and common (32-bit)",
116 	&nfssysent
117 };
118 #endif /* _SYSCALL32_IMPL */
119 
120 /*
121  * The NFS Dynamic client VFS.
122  */
123 static vfsdef_t vfw = {
124 	VFSDEF_VERSION,
125 	"nfsdyn",
126 	nfsdyninit,
127 	0,
128 	NULL
129 };
130 
131 static struct modlfs modlfs = {
132 	&mod_fsops,
133 	"network filesystem",
134 	&vfw
135 };
136 
137 /*
138  * The NFS Version 2 client VFS.
139  */
140 static vfsdef_t vfw2 = {
141 	VFSDEF_VERSION,
142 	"nfs",
143 	nfsinit,
144 	VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
145 	NULL
146 };
147 
148 static struct modlfs modlfs2 = {
149 	&mod_fsops,
150 	"network filesystem version 2",
151 	&vfw2
152 };
153 
154 /*
155  * The NFS Version 3 client VFS.
156  */
157 static vfsdef_t vfw3 = {
158 	VFSDEF_VERSION,
159 	"nfs3",
160 	nfs3init,
161 	VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
162 	NULL
163 };
164 
165 static struct modlfs modlfs3 = {
166 	&mod_fsops,
167 	"network filesystem version 3",
168 	&vfw3
169 };
170 
171 extern struct modlfs modlfs4;
172 
173 /*
174  * We have too many linkage structures so we define our own XXX
175  */
176 struct modlinkage_big {
177 	int		ml_rev;		/* rev of loadable modules system */
178 	void		*ml_linkage[7];	/* NULL terminated list of */
179 					/* linkage structures */
180 };
181 
182 /*
183  * All of the module configuration linkages required to configure
184  * the system call and client VFS's into the system.
185  */
186 static struct modlinkage_big modlinkage = {
187 	MODREV_1,
188 	&modlsys,
189 #ifdef _SYSCALL32_IMPL
190 	&modlsys32,
191 #endif
192 	&modlfs,
193 	&modlfs2,
194 	&modlfs3,
195 	&modlfs4,
196 	NULL
197 };
198 
199 /*
200  * specfs - for getfsname only??
201  * rpcmod - too many symbols to build stubs for them all
202  */
203 char _depends_on[] = "fs/specfs strmod/rpcmod misc/rpcsec";
204 
205 /*
206  * This routine is invoked automatically when the kernel module
207  * containing this routine is loaded.  This allows module specific
208  * initialization to be done when the module is loaded.
209  */
210 int
211 _init(void)
212 {
213 	int status;
214 
215 	if ((status = nfs_clntinit()) != 0) {
216 		cmn_err(CE_WARN, "_init: nfs_clntinit failed");
217 		return (status);
218 	}
219 
220 	/*
221 	 * Create the version specific kstats.
222 	 *
223 	 * PSARC 2001/697 Contract Private Interface
224 	 * All nfs kstats are under SunMC contract
225 	 * Please refer to the PSARC listed above and contact
226 	 * SunMC before making any changes!
227 	 *
228 	 * Changes must be reviewed by Solaris File Sharing
229 	 * Changes must be communicated to contract-2001-697@sun.com
230 	 *
231 	 */
232 
233 	zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL,
234 	    nfsstat_zone_fini);
235 	status = mod_install((struct modlinkage *)&modlinkage);
236 
237 	if (status)  {
238 		(void) zone_key_delete(nfsstat_zone_key);
239 
240 		/*
241 		 * Failed to install module, cleanup previous
242 		 * initialization work.
243 		 */
244 		nfs_clntfini();
245 
246 		/*
247 		 * Clean up work performed indirectly by mod_installfs()
248 		 * as a result of our call to mod_install().
249 		 */
250 		nfs4fini();
251 		nfs3fini();
252 		nfsfini();
253 	}
254 	return (status);
255 }
256 
257 int
258 _fini(void)
259 {
260 	/* Don't allow module to be unloaded */
261 	return (EBUSY);
262 }
263 
264 int
265 _info(struct modinfo *modinfop)
266 {
267 	return (mod_info((struct modlinkage *)&modlinkage, modinfop));
268 }
269 
270 /*
271  * General utilities
272  */
273 
274 /*
275  * Returns the prefered transfer size in bytes based on
276  * what network interfaces are available.
277  */
278 int
279 nfstsize(void)
280 {
281 	/*
282 	 * For the moment, just return NFS_MAXDATA until we can query the
283 	 * appropriate transport.
284 	 */
285 	return (NFS_MAXDATA);
286 }
287 
288 /*
289  * Returns the prefered transfer size in bytes based on
290  * what network interfaces are available.
291  */
292 
293 /* this should reflect the largest transfer size possible */
294 static int nfs3_max_transfer_size = 1024 * 1024;
295 
296 int
297 nfs3tsize(void)
298 {
299 	/*
300 	 * For the moment, just return nfs3_max_transfer_size until we
301 	 * can query the appropriate transport.
302 	 */
303 	return (nfs3_max_transfer_size);
304 }
305 
306 static uint_t nfs3_max_transfer_size_clts = 32 * 1024;
307 static uint_t nfs3_max_transfer_size_cots = 1024 * 1024;
308 static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024;
309 
310 uint_t
311 nfs3_tsize(struct knetconfig *knp)
312 {
313 
314 	if (knp->knc_semantics == NC_TPI_COTS_ORD ||
315 	    knp->knc_semantics == NC_TPI_COTS)
316 		return (nfs3_max_transfer_size_cots);
317 	if (knp->knc_semantics == NC_TPI_RDMA)
318 		return (nfs3_max_transfer_size_rdma);
319 	return (nfs3_max_transfer_size_clts);
320 }
321 
322 uint_t
323 rfs3_tsize(struct svc_req *req)
324 {
325 
326 	if (req->rq_xprt->xp_type == T_COTS_ORD ||
327 	    req->rq_xprt->xp_type == T_COTS)
328 		return (nfs3_max_transfer_size_cots);
329 	if (req->rq_xprt->xp_type == T_RDMA)
330 		return (nfs3_max_transfer_size_rdma);
331 	return (nfs3_max_transfer_size_clts);
332 }
333 
334 /* ARGSUSED */
335 static int
336 nfsdyninit(int fstyp, char *name)
337 {
338 	static const fs_operation_def_t nfsdyn_vfsops_template[] = {
339 		VFSNAME_MOUNTROOT, nfsdyn_mountroot,
340 		NULL, NULL
341 	};
342 	int error;
343 
344 	error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops);
345 	if (error != 0)
346 		return (error);
347 
348 	return (0);
349 }
350 
351 /* ARGSUSED */
352 static int
353 nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why)
354 {
355 	char root_hostname[SYS_NMLN+1];
356 	struct servinfo *svp;
357 	int error;
358 	int vfsflags;
359 	char *root_path;
360 	struct pathname pn;
361 	char *name;
362 	static char token[10];
363 	struct nfs_args args;		/* nfs mount arguments */
364 
365 	bzero(&args, sizeof (args));
366 
367 	/* do this BEFORE getfile which causes xid stamps to be initialized */
368 	clkset(-1L);		/* hack for now - until we get time svc? */
369 
370 	if (why == ROOT_REMOUNT) {
371 		/*
372 		 * Shouldn't happen.
373 		 */
374 		panic("nfs3_mountroot: why == ROOT_REMOUNT\n");
375 	}
376 
377 	if (why == ROOT_UNMOUNT) {
378 		/*
379 		 * Nothing to do for NFS.
380 		 */
381 		return (0);
382 	}
383 
384 	/*
385 	 * why == ROOT_INIT
386 	 */
387 
388 	name = token;
389 	*name = 0;
390 	getfsname("root", name, sizeof (token));
391 
392 	pn_alloc(&pn);
393 	root_path = pn.pn_path;
394 
395 	svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
396 	mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
397 	svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
398 	svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
399 	svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
400 
401 	/*
402 	 * First try version 4
403 	 */
404 	vfs_setops(vfsp, nfs4_vfsops);
405 	args.addr = &svp->sv_addr;
406 	args.fh = (char *)&svp->sv_fhandle;
407 	args.knconf = svp->sv_knconf;
408 	args.hostname = root_hostname;
409 	vfsflags = 0;
410 
411 	if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
412 				&args, &vfsflags)) {
413 		if (error != EPROTONOSUPPORT) {
414 			nfs_cmn_err(error, CE_WARN,
415 				"Unable to mount NFS root filesystem: %m");
416 			sv_free(svp);
417 			pn_free(&pn);
418 			vfs_setops(vfsp, nfsdyn_vfsops);
419 			return (error);
420 		}
421 
422 		/*
423 		 * Then try version 3
424 		 */
425 		bzero(&args, sizeof (args));
426 		vfs_setops(vfsp, nfs3_vfsops);
427 		args.addr = &svp->sv_addr;
428 		args.fh = (char *)&svp->sv_fhandle;
429 		args.knconf = svp->sv_knconf;
430 		args.hostname = root_hostname;
431 		vfsflags = 0;
432 
433 		if (error = mount_root(*name ? name : "root", root_path,
434 						NFS_V3, &args, &vfsflags)) {
435 			if (error != EPROTONOSUPPORT) {
436 				nfs_cmn_err(error, CE_WARN,
437 				    "Unable to mount NFS root filesystem: %m");
438 				sv_free(svp);
439 				pn_free(&pn);
440 				vfs_setops(vfsp, nfsdyn_vfsops);
441 				return (error);
442 			}
443 
444 			/*
445 			 * Finally, try version 2
446 			 */
447 			bzero(&args, sizeof (args));
448 			args.addr = &svp->sv_addr;
449 			args.fh = (char *)&svp->sv_fhandle.fh_buf;
450 			args.knconf = svp->sv_knconf;
451 			args.hostname = root_hostname;
452 			vfsflags = 0;
453 
454 			vfs_setops(vfsp, nfs_vfsops);
455 
456 			if (error = mount_root(*name ? name : "root",
457 					root_path, NFS_VERSION, &args,
458 					&vfsflags)) {
459 				nfs_cmn_err(error, CE_WARN,
460 				    "Unable to mount NFS root filesystem: %m");
461 				sv_free(svp);
462 				pn_free(&pn);
463 				vfs_setops(vfsp, nfsdyn_vfsops);
464 				return (error);
465 			}
466 		}
467 	}
468 
469 	sv_free(svp);
470 	pn_free(&pn);
471 	return (VFS_MOUNTROOT(vfsp, why));
472 }
473 
474 int
475 nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf)
476 {
477 	mntinfo_t *mi;			/* mount info, pointed at by vfs */
478 	STRUCT_HANDLE(nfs_args, args);
479 	int flags;
480 
481 #ifdef lint
482 	model = model;
483 #endif
484 
485 	STRUCT_SET_HANDLE(args, model, buf);
486 
487 	flags = STRUCT_FGET(args, flags);
488 
489 	/*
490 	 * Set option fields in mount info record
491 	 */
492 	mi = VTOMI(vp);
493 
494 	if (flags & NFSMNT_NOAC) {
495 		mi->mi_flags |= MI_NOAC;
496 		PURGE_ATTRCACHE(vp);
497 	}
498 	if (flags & NFSMNT_NOCTO)
499 		mi->mi_flags |= MI_NOCTO;
500 	if (flags & NFSMNT_LLOCK)
501 		mi->mi_flags |= MI_LLOCK;
502 	if (flags & NFSMNT_GRPID)
503 		mi->mi_flags |= MI_GRPID;
504 	if (flags & NFSMNT_RETRANS) {
505 		if (STRUCT_FGET(args, retrans) < 0)
506 			return (EINVAL);
507 		mi->mi_retrans = STRUCT_FGET(args, retrans);
508 	}
509 	if (flags & NFSMNT_TIMEO) {
510 		if (STRUCT_FGET(args, timeo) <= 0)
511 			return (EINVAL);
512 		mi->mi_timeo = STRUCT_FGET(args, timeo);
513 		/*
514 		 * The following scales the standard deviation and
515 		 * and current retransmission timer to match the
516 		 * initial value for the timeout specified.
517 		 */
518 		mi->mi_timers[NFS_CALLTYPES].rt_deviate =
519 		    (mi->mi_timeo * hz * 2) / 5;
520 		mi->mi_timers[NFS_CALLTYPES].rt_rtxcur =
521 		    mi->mi_timeo * hz / 10;
522 	}
523 	if (flags & NFSMNT_RSIZE) {
524 		if (STRUCT_FGET(args, rsize) <= 0)
525 			return (EINVAL);
526 		mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize));
527 		mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize);
528 	}
529 	if (flags & NFSMNT_WSIZE) {
530 		if (STRUCT_FGET(args, wsize) <= 0)
531 			return (EINVAL);
532 		mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize));
533 		mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize);
534 	}
535 	if (flags & NFSMNT_ACREGMIN) {
536 		if (STRUCT_FGET(args, acregmin) < 0)
537 			mi->mi_acregmin = ACMINMAX;
538 		else
539 			mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin),
540 			    ACMINMAX);
541 		mi->mi_acregmin = SEC2HR(mi->mi_acregmin);
542 	}
543 	if (flags & NFSMNT_ACREGMAX) {
544 		if (STRUCT_FGET(args, acregmax) < 0)
545 			mi->mi_acregmax = ACMAXMAX;
546 		else
547 			mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax),
548 			    ACMAXMAX);
549 		mi->mi_acregmax = SEC2HR(mi->mi_acregmax);
550 	}
551 	if (flags & NFSMNT_ACDIRMIN) {
552 		if (STRUCT_FGET(args, acdirmin) < 0)
553 			mi->mi_acdirmin = ACMINMAX;
554 		else
555 			mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin),
556 			    ACMINMAX);
557 		mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin);
558 	}
559 	if (flags & NFSMNT_ACDIRMAX) {
560 		if (STRUCT_FGET(args, acdirmax) < 0)
561 			mi->mi_acdirmax = ACMAXMAX;
562 		else
563 			mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax),
564 			    ACMAXMAX);
565 		mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax);
566 	}
567 
568 	if (flags & NFSMNT_LOOPBACK)
569 		mi->mi_flags |= MI_LOOPBACK;
570 
571 	return (0);
572 }
573 
574 /*
575  * Set or Clear direct I/O flag
576  * VOP_RWLOCK() is held for write access to prevent a race condition
577  * which would occur if a process is in the middle of a write when
578  * directio flag gets set. It is possible that all pages may not get flushed.
579  */
580 
581 /* ARGSUSED */
582 int
583 nfs_directio(vnode_t *vp, int cmd, cred_t *cr)
584 {
585 	int	error = 0;
586 	rnode_t	*rp;
587 
588 	rp = VTOR(vp);
589 
590 	if (cmd == DIRECTIO_ON) {
591 
592 		if (rp->r_flags & RDIRECTIO)
593 			return (0);
594 
595 		/*
596 		 * Flush the page cache.
597 		 */
598 
599 		(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
600 
601 		if (rp->r_flags & RDIRECTIO) {
602 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
603 			return (0);
604 		}
605 
606 		if (vn_has_cached_data(vp) &&
607 		    ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) {
608 			error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
609 			    B_INVAL, cr);
610 			if (error) {
611 				if (error == ENOSPC || error == EDQUOT) {
612 					mutex_enter(&rp->r_statelock);
613 					if (!rp->r_error)
614 						rp->r_error = error;
615 					mutex_exit(&rp->r_statelock);
616 				}
617 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
618 				return (error);
619 			}
620 		}
621 
622 		mutex_enter(&rp->r_statelock);
623 		rp->r_flags |= RDIRECTIO;
624 		mutex_exit(&rp->r_statelock);
625 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
626 		return (0);
627 	}
628 
629 	if (cmd == DIRECTIO_OFF) {
630 		mutex_enter(&rp->r_statelock);
631 		rp->r_flags &= ~RDIRECTIO;	/* disable direct mode */
632 		mutex_exit(&rp->r_statelock);
633 		return (0);
634 	}
635 
636 	return (EINVAL);
637 }
638