xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs_common.c (revision 8cd81a20c40b49e1fad4022a2774ec6ad5066532)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23   * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
24   * Copyright 2013 Joyent, Inc. All rights reserved.
25   */
26  
27  /*
28   *	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
29   *		All rights reserved.
30   */
31  
32  #include <sys/errno.h>
33  #include <sys/param.h>
34  #include <sys/types.h>
35  #include <sys/user.h>
36  #include <sys/stat.h>
37  #include <sys/time.h>
38  #include <sys/utsname.h>
39  #include <sys/vfs.h>
40  #include <sys/vfs_opreg.h>
41  #include <sys/vnode.h>
42  #include <sys/pathname.h>
43  #include <sys/bootconf.h>
44  #include <fs/fs_subr.h>
45  #include <rpc/types.h>
46  #include <nfs/nfs.h>
47  #include <nfs/nfs4.h>
48  #include <nfs/nfs_clnt.h>
49  #include <nfs/rnode.h>
50  #include <nfs/mount.h>
51  #include <nfs/nfssys.h>
52  #include <sys/debug.h>
53  #include <sys/cmn_err.h>
54  #include <sys/file.h>
55  #include <sys/fcntl.h>
56  #include <sys/zone.h>
57  
58  /*
59   * This is the loadable module wrapper.
60   */
61  #include <sys/systm.h>
62  #include <sys/modctl.h>
63  #include <sys/syscall.h>
64  #include <sys/ddi.h>
65  
66  #include <rpc/types.h>
67  #include <rpc/auth.h>
68  #include <rpc/clnt.h>
69  #include <rpc/svc.h>
70  
71  /*
72   * The pseudo NFS filesystem to allow diskless booting to dynamically
73   * mount either a NFS V2, NFS V3, or NFS V4 filesystem.  This only implements
74   * the VFS_MOUNTROOT op and is only intended to be used by the
75   * diskless booting code until the real root filesystem is mounted.
76   * Nothing else should ever call this!
77   *
78   * The strategy is that if the initial rootfs type is set to "nfsdyn"
79   * by loadrootmodules() this filesystem is called to mount the
80   * root filesystem.  It first attempts to mount a V4 filesystem, and if that
81   * fails due to an RPC version mismatch it tries V3 and finally V2.
82   * Once the real mount succeeds the vfsops and rootfs name are changed
83   * to reflect the real filesystem type.
84   */
85  static int nfsdyninit(int, char *);
86  static int nfsdyn_mountroot(vfs_t *, whymountroot_t);
87  
88  vfsops_t *nfsdyn_vfsops;
89  
90  /*
91   * The following data structures are used to configure the NFS
92   * system call, the NFS Version 2 client VFS, and the NFS Version
93   * 3 client VFS into the system.  The NFS Version 4 structures are defined in
94   * nfs4_common.c
95   */
96  
97  /*
98   * The NFS system call.
99   */
100  static struct sysent nfssysent = {
101  	2,
102  	SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
103  	nfssys
104  };
105  
106  static struct modlsys modlsys = {
107  	&mod_syscallops,
108  	"NFS syscall, client, and common",
109  	&nfssysent
110  };
111  
112  #ifdef _SYSCALL32_IMPL
113  static struct modlsys modlsys32 = {
114  	&mod_syscallops32,
115  	"NFS syscall, client, and common (32-bit)",
116  	&nfssysent
117  };
118  #endif /* _SYSCALL32_IMPL */
119  
120  /*
121   * The NFS Dynamic client VFS.
122   */
123  static vfsdef_t vfw = {
124  	VFSDEF_VERSION,
125  	"nfsdyn",
126  	nfsdyninit,
127  	0,
128  	NULL
129  };
130  
131  static struct modlfs modlfs = {
132  	&mod_fsops,
133  	"network filesystem",
134  	&vfw
135  };
136  
137  /*
138   * The NFS Version 2 client VFS.
139   */
140  static vfsdef_t vfw2 = {
141  	VFSDEF_VERSION,
142  	"nfs",
143  	nfsinit,
144  	VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
145  	NULL
146  };
147  
148  static struct modlfs modlfs2 = {
149  	&mod_fsops,
150  	"network filesystem version 2",
151  	&vfw2
152  };
153  
154  /*
155   * The NFS Version 3 client VFS.
156   */
157  static vfsdef_t vfw3 = {
158  	VFSDEF_VERSION,
159  	"nfs3",
160  	nfs3init,
161  	VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS,
162  	NULL
163  };
164  
165  static struct modlfs modlfs3 = {
166  	&mod_fsops,
167  	"network filesystem version 3",
168  	&vfw3
169  };
170  
171  extern struct modlfs modlfs4;
172  
173  /*
174   * We have too many linkage structures so we define our own XXX
175   */
176  struct modlinkage_big {
177  	int		ml_rev;		/* rev of loadable modules system */
178  	void		*ml_linkage[7];	/* NULL terminated list of */
179  					/* linkage structures */
180  };
181  
182  /*
183   * All of the module configuration linkages required to configure
184   * the system call and client VFS's into the system.
185   */
186  static struct modlinkage_big modlinkage = {
187  	MODREV_1,
188  	&modlsys,
189  #ifdef _SYSCALL32_IMPL
190  	&modlsys32,
191  #endif
192  	&modlfs,
193  	&modlfs2,
194  	&modlfs3,
195  	&modlfs4,
196  	NULL
197  };
198  
199  /*
200   * This routine is invoked automatically when the kernel module
201   * containing this routine is loaded.  This allows module specific
202   * initialization to be done when the module is loaded.
203   */
204  int
_init(void)205  _init(void)
206  {
207  	int status;
208  
209  	if ((status = nfs_clntinit()) != 0) {
210  		cmn_err(CE_WARN, "_init: nfs_clntinit failed");
211  		return (status);
212  	}
213  
214  	/*
215  	 * Create the version specific kstats.
216  	 *
217  	 * PSARC 2001/697 Contract Private Interface
218  	 * All nfs kstats are under SunMC contract
219  	 * Please refer to the PSARC listed above and contact
220  	 * SunMC before making any changes!
221  	 *
222  	 * Changes must be reviewed by Solaris File Sharing
223  	 * Changes must be communicated to contract-2001-697@sun.com
224  	 *
225  	 */
226  
227  	zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL,
228  	    nfsstat_zone_fini);
229  	status = mod_install((struct modlinkage *)&modlinkage);
230  
231  	if (status)  {
232  		(void) zone_key_delete(nfsstat_zone_key);
233  
234  		/*
235  		 * Failed to install module, cleanup previous
236  		 * initialization work.
237  		 */
238  		nfs_clntfini();
239  
240  		/*
241  		 * Clean up work performed indirectly by mod_installfs()
242  		 * as a result of our call to mod_install().
243  		 */
244  		nfs4fini();
245  		nfs3fini();
246  		nfsfini();
247  	}
248  	return (status);
249  }
250  
251  int
_fini(void)252  _fini(void)
253  {
254  	/* Don't allow module to be unloaded */
255  	return (EBUSY);
256  }
257  
258  int
_info(struct modinfo * modinfop)259  _info(struct modinfo *modinfop)
260  {
261  	return (mod_info((struct modlinkage *)&modlinkage, modinfop));
262  }
263  
264  /*
265   * General utilities
266   */
267  
268  /*
269   * Returns the preferred transfer size in bytes based on
270   * what network interfaces are available.
271   */
272  int
nfstsize(void)273  nfstsize(void)
274  {
275  	/*
276  	 * For the moment, just return NFS_MAXDATA until we can query the
277  	 * appropriate transport.
278  	 */
279  	return (NFS_MAXDATA);
280  }
281  
282  /*
283   * Returns the preferred transfer size in bytes based on
284   * what network interfaces are available.
285   */
286  
287  /* this should reflect the largest transfer size possible */
288  static int nfs3_max_transfer_size = 1024 * 1024;
289  
290  int
nfs3tsize(void)291  nfs3tsize(void)
292  {
293  	/*
294  	 * For the moment, just return nfs3_max_transfer_size until we
295  	 * can query the appropriate transport.
296  	 */
297  	return (nfs3_max_transfer_size);
298  }
299  
300  static uint_t nfs3_max_transfer_size_clts = 32 * 1024;
301  static uint_t nfs3_max_transfer_size_cots = 1024 * 1024;
302  static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024;
303  
304  uint_t
nfs3_tsize(struct knetconfig * knp)305  nfs3_tsize(struct knetconfig *knp)
306  {
307  
308  	if (knp->knc_semantics == NC_TPI_COTS_ORD ||
309  	    knp->knc_semantics == NC_TPI_COTS)
310  		return (nfs3_max_transfer_size_cots);
311  	if (knp->knc_semantics == NC_TPI_RDMA)
312  		return (nfs3_max_transfer_size_rdma);
313  	return (nfs3_max_transfer_size_clts);
314  }
315  
316  uint_t
rfs3_tsize(struct svc_req * req)317  rfs3_tsize(struct svc_req *req)
318  {
319  
320  	if (req->rq_xprt->xp_type == T_COTS_ORD ||
321  	    req->rq_xprt->xp_type == T_COTS)
322  		return (nfs3_max_transfer_size_cots);
323  	if (req->rq_xprt->xp_type == T_RDMA)
324  		return (nfs3_max_transfer_size_rdma);
325  	return (nfs3_max_transfer_size_clts);
326  }
327  
328  /* ARGSUSED */
329  static int
nfsdyninit(int fstyp,char * name)330  nfsdyninit(int fstyp, char *name)
331  {
332  	static const fs_operation_def_t nfsdyn_vfsops_template[] = {
333  		VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot },
334  		NULL, NULL
335  	};
336  	int error;
337  
338  	error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops);
339  	if (error != 0)
340  		return (error);
341  
342  	return (0);
343  }
344  
345  /* ARGSUSED */
346  static int
nfsdyn_mountroot(vfs_t * vfsp,whymountroot_t why)347  nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why)
348  {
349  	char root_hostname[SYS_NMLN+1];
350  	struct servinfo *svp;
351  	int error;
352  	int vfsflags;
353  	char *root_path;
354  	struct pathname pn;
355  	char *name;
356  	static char token[10];
357  	struct nfs_args args;		/* nfs mount arguments */
358  
359  	bzero(&args, sizeof (args));
360  
361  	/* do this BEFORE getfile which causes xid stamps to be initialized */
362  	clkset(-1L);		/* hack for now - until we get time svc? */
363  
364  	if (why == ROOT_REMOUNT) {
365  		/*
366  		 * Shouldn't happen.
367  		 */
368  		panic("nfs3_mountroot: why == ROOT_REMOUNT\n");
369  	}
370  
371  	if (why == ROOT_UNMOUNT) {
372  		/*
373  		 * Nothing to do for NFS.
374  		 */
375  		return (0);
376  	}
377  
378  	/*
379  	 * why == ROOT_INIT
380  	 */
381  
382  	name = token;
383  	*name = 0;
384  	getfsname("root", name, sizeof (token));
385  
386  	pn_alloc(&pn);
387  	root_path = pn.pn_path;
388  
389  	svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
390  	mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
391  	svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
392  	svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
393  	svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
394  
395  	/*
396  	 * First try version 4
397  	 */
398  	vfs_setops(vfsp, nfs4_vfsops);
399  	args.addr = &svp->sv_addr;
400  	args.fh = (char *)&svp->sv_fhandle;
401  	args.knconf = svp->sv_knconf;
402  	args.hostname = root_hostname;
403  	vfsflags = 0;
404  
405  	if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
406  	    &args, &vfsflags)) {
407  		if (error != EPROTONOSUPPORT) {
408  			nfs_cmn_err(error, CE_WARN,
409  			    "Unable to mount NFS root filesystem: %m");
410  			sv_free(svp);
411  			pn_free(&pn);
412  			vfs_setops(vfsp, nfsdyn_vfsops);
413  			return (error);
414  		}
415  
416  		/*
417  		 * Then try version 3
418  		 */
419  		bzero(&args, sizeof (args));
420  		vfs_setops(vfsp, nfs3_vfsops);
421  		args.addr = &svp->sv_addr;
422  		args.fh = (char *)&svp->sv_fhandle;
423  		args.knconf = svp->sv_knconf;
424  		args.hostname = root_hostname;
425  		vfsflags = 0;
426  
427  		if (error = mount_root(*name ? name : "root", root_path,
428  		    NFS_V3, &args, &vfsflags)) {
429  			if (error != EPROTONOSUPPORT) {
430  				nfs_cmn_err(error, CE_WARN,
431  				    "Unable to mount NFS root filesystem: %m");
432  				sv_free(svp);
433  				pn_free(&pn);
434  				vfs_setops(vfsp, nfsdyn_vfsops);
435  				return (error);
436  			}
437  
438  			/*
439  			 * Finally, try version 2
440  			 */
441  			bzero(&args, sizeof (args));
442  			args.addr = &svp->sv_addr;
443  			args.fh = (char *)&svp->sv_fhandle.fh_buf;
444  			args.knconf = svp->sv_knconf;
445  			args.hostname = root_hostname;
446  			vfsflags = 0;
447  
448  			vfs_setops(vfsp, nfs_vfsops);
449  
450  			if (error = mount_root(*name ? name : "root",
451  			    root_path, NFS_VERSION, &args, &vfsflags)) {
452  				nfs_cmn_err(error, CE_WARN,
453  				    "Unable to mount NFS root filesystem: %m");
454  				sv_free(svp);
455  				pn_free(&pn);
456  				vfs_setops(vfsp, nfsdyn_vfsops);
457  				return (error);
458  			}
459  		}
460  	}
461  
462  	sv_free(svp);
463  	pn_free(&pn);
464  	return (VFS_MOUNTROOT(vfsp, why));
465  }
466  
467  int
nfs_setopts(vnode_t * vp,model_t model,struct nfs_args * buf)468  nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf)
469  {
470  	mntinfo_t *mi;			/* mount info, pointed at by vfs */
471  	STRUCT_HANDLE(nfs_args, args);
472  	int flags;
473  
474  #ifdef lint
475  	model = model;
476  #endif
477  
478  	STRUCT_SET_HANDLE(args, model, buf);
479  
480  	flags = STRUCT_FGET(args, flags);
481  
482  	/*
483  	 * Set option fields in mount info record
484  	 */
485  	mi = VTOMI(vp);
486  
487  	if (flags & NFSMNT_NOAC) {
488  		mi->mi_flags |= MI_NOAC;
489  		PURGE_ATTRCACHE(vp);
490  	}
491  	if (flags & NFSMNT_NOCTO)
492  		mi->mi_flags |= MI_NOCTO;
493  	if (flags & NFSMNT_LLOCK)
494  		mi->mi_flags |= MI_LLOCK;
495  	if (flags & NFSMNT_GRPID)
496  		mi->mi_flags |= MI_GRPID;
497  	if (flags & NFSMNT_RETRANS) {
498  		if (STRUCT_FGET(args, retrans) < 0)
499  			return (EINVAL);
500  		mi->mi_retrans = STRUCT_FGET(args, retrans);
501  	}
502  	if (flags & NFSMNT_TIMEO) {
503  		if (STRUCT_FGET(args, timeo) <= 0)
504  			return (EINVAL);
505  		mi->mi_timeo = STRUCT_FGET(args, timeo);
506  		/*
507  		 * The following scales the standard deviation and
508  		 * and current retransmission timer to match the
509  		 * initial value for the timeout specified.
510  		 */
511  		mi->mi_timers[NFS_CALLTYPES].rt_deviate =
512  		    (mi->mi_timeo * hz * 2) / 5;
513  		mi->mi_timers[NFS_CALLTYPES].rt_rtxcur =
514  		    mi->mi_timeo * hz / 10;
515  	}
516  	if (flags & NFSMNT_RSIZE) {
517  		if (STRUCT_FGET(args, rsize) <= 0)
518  			return (EINVAL);
519  		mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize));
520  		mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize);
521  	}
522  	if (flags & NFSMNT_WSIZE) {
523  		if (STRUCT_FGET(args, wsize) <= 0)
524  			return (EINVAL);
525  		mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize));
526  		mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize);
527  	}
528  	if (flags & NFSMNT_ACREGMIN) {
529  		if (STRUCT_FGET(args, acregmin) < 0)
530  			mi->mi_acregmin = ACMINMAX;
531  		else
532  			mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin),
533  			    ACMINMAX);
534  		mi->mi_acregmin = SEC2HR(mi->mi_acregmin);
535  	}
536  	if (flags & NFSMNT_ACREGMAX) {
537  		if (STRUCT_FGET(args, acregmax) < 0)
538  			mi->mi_acregmax = ACMAXMAX;
539  		else
540  			mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax),
541  			    ACMAXMAX);
542  		mi->mi_acregmax = SEC2HR(mi->mi_acregmax);
543  	}
544  	if (flags & NFSMNT_ACDIRMIN) {
545  		if (STRUCT_FGET(args, acdirmin) < 0)
546  			mi->mi_acdirmin = ACMINMAX;
547  		else
548  			mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin),
549  			    ACMINMAX);
550  		mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin);
551  	}
552  	if (flags & NFSMNT_ACDIRMAX) {
553  		if (STRUCT_FGET(args, acdirmax) < 0)
554  			mi->mi_acdirmax = ACMAXMAX;
555  		else
556  			mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax),
557  			    ACMAXMAX);
558  		mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax);
559  	}
560  
561  	if (flags & NFSMNT_LOOPBACK)
562  		mi->mi_flags |= MI_LOOPBACK;
563  
564  	return (0);
565  }
566  
567  /*
568   * Set or Clear direct I/O flag
569   * VOP_RWLOCK() is held for write access to prevent a race condition
570   * which would occur if a process is in the middle of a write when
571   * directio flag gets set. It is possible that all pages may not get flushed.
572   */
573  
574  /* ARGSUSED */
575  int
nfs_directio(vnode_t * vp,int cmd,cred_t * cr)576  nfs_directio(vnode_t *vp, int cmd, cred_t *cr)
577  {
578  	int	error = 0;
579  	rnode_t	*rp;
580  
581  	rp = VTOR(vp);
582  
583  	if (cmd == DIRECTIO_ON) {
584  
585  		if (rp->r_flags & RDIRECTIO)
586  			return (0);
587  
588  		/*
589  		 * Flush the page cache.
590  		 */
591  
592  		(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
593  
594  		if (rp->r_flags & RDIRECTIO) {
595  			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
596  			return (0);
597  		}
598  
599  		if (vn_has_cached_data(vp) &&
600  		    ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) {
601  			error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
602  			    B_INVAL, cr, NULL);
603  			if (error) {
604  				if (error == ENOSPC || error == EDQUOT) {
605  					mutex_enter(&rp->r_statelock);
606  					if (!rp->r_error)
607  						rp->r_error = error;
608  					mutex_exit(&rp->r_statelock);
609  				}
610  				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
611  				return (error);
612  			}
613  		}
614  
615  		mutex_enter(&rp->r_statelock);
616  		rp->r_flags |= RDIRECTIO;
617  		mutex_exit(&rp->r_statelock);
618  		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
619  		return (0);
620  	}
621  
622  	if (cmd == DIRECTIO_OFF) {
623  		mutex_enter(&rp->r_statelock);
624  		rp->r_flags &= ~RDIRECTIO;	/* disable direct mode */
625  		mutex_exit(&rp->r_statelock);
626  		return (0);
627  	}
628  
629  	return (EINVAL);
630  }
631