/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved  	*/

#ifndef	_NFS_NFS_CLNT_H
#define	_NFS_NFS_CLNT_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/utsname.h>
#include <sys/kstat.h>
#include <sys/time.h>
#include <vm/page.h>
#include <sys/thread.h>
#include <nfs/rnode.h>
#include <sys/list.h>

#ifdef	__cplusplus
extern "C" {
#endif

#define	HOSTNAMESZ	32
#define	ACREGMIN	3	/* min secs to hold cached file attr */
#define	ACREGMAX	60	/* max secs to hold cached file attr */
#define	ACDIRMIN	30	/* min secs to hold cached dir attr */
#define	ACDIRMAX	60	/* max secs to hold cached dir attr */
#define	ACMINMAX	3600	/* 1 hr is longest min timeout */
#define	ACMAXMAX	36000	/* 10 hr is longest max timeout */

#define	NFS_CALLTYPES	3	/* Lookups, Reads, Writes */

/*
 * rfscall() flags
 */
#define	RFSCALL_SOFT	0x00000001	/* Do op as if fs was soft-mounted */

/*
 * Fake errno passed back from rfscall to indicate transfer size adjustment
 */
#define	ENFS_TRYAGAIN	999

/*
 * The NFS specific async_reqs structure.
 */

enum iotype {
	NFS_READ_AHEAD,
	NFS_PUTAPAGE,
	NFS_PAGEIO,
	NFS_READDIR,
	NFS_COMMIT,
	NFS_INACTIVE
};
#define	NFS_ASYNC_TYPES	(NFS_INACTIVE + 1)

struct nfs_async_read_req {
	void (*readahead)();		/* pointer to readahead function */
	u_offset_t blkoff;		/* offset in file */
	struct seg *seg;		/* segment to do i/o to */
	caddr_t addr;			/* address to do i/o to */
};

struct nfs_pageio_req {
	int (*pageio)();		/* pointer to pageio function */
	page_t *pp;			/* page list */
	u_offset_t io_off;		/* offset in file */
	uint_t io_len;			/* size of request */
	int flags;
};

struct nfs_readdir_req {
	int (*readdir)();		/* pointer to readdir function */
	struct rddir_cache *rdc;	/* pointer to cache entry to fill */
};

struct nfs_commit_req {
	void (*commit)();		/* pointer to commit function */
	page_t *plist;			/* page list */
	offset3 offset;			/* starting offset */
	count3 count;			/* size of range to be commited */
};

struct nfs_inactive_req {
	void (*inactive)();		/* pointer to inactive function */
};

struct nfs_async_reqs {
	struct nfs_async_reqs *a_next;	/* pointer to next arg struct */
#ifdef DEBUG
	kthread_t *a_queuer;		/* thread id of queueing thread */
#endif
	struct vnode *a_vp;		/* vnode pointer */
	struct cred *a_cred;		/* cred pointer */
	enum iotype a_io;		/* i/o type */
	union {
		struct nfs_async_read_req a_read_args;
		struct nfs_pageio_req a_pageio_args;
		struct nfs_readdir_req a_readdir_args;
		struct nfs_commit_req a_commit_args;
		struct nfs_inactive_req a_inactive_args;
	} a_args;
};

#define	a_nfs_readahead a_args.a_read_args.readahead
#define	a_nfs_blkoff a_args.a_read_args.blkoff
#define	a_nfs_seg a_args.a_read_args.seg
#define	a_nfs_addr a_args.a_read_args.addr

#define	a_nfs_putapage a_args.a_pageio_args.pageio
#define	a_nfs_pageio a_args.a_pageio_args.pageio
#define	a_nfs_pp a_args.a_pageio_args.pp
#define	a_nfs_off a_args.a_pageio_args.io_off
#define	a_nfs_len a_args.a_pageio_args.io_len
#define	a_nfs_flags a_args.a_pageio_args.flags

#define	a_nfs_readdir a_args.a_readdir_args.readdir
#define	a_nfs_rdc a_args.a_readdir_args.rdc

#define	a_nfs_commit a_args.a_commit_args.commit
#define	a_nfs_plist a_args.a_commit_args.plist
#define	a_nfs_offset a_args.a_commit_args.offset
#define	a_nfs_count a_args.a_commit_args.count

#define	a_nfs_inactive a_args.a_inactive_args.inactive

/*
 * Due to the way the address space callbacks are used to execute a delmap,
 * we must keep track of how many times the same thread has called
 * VOP_DELMAP()->nfs_delmap()/nfs3_delmap().  This is done by having a list of
 * nfs_delmapcall_t's associated with each rnode_t.  This list is protected
 * by the rnode_t's r_statelock.  The individual elements do not need to be
 * protected as they will only ever be created, modified and destroyed by
 * one thread (the call_id).
 * See nfs_delmap()/nfs3_delmap() for further explanation.
 */
typedef struct nfs_delmapcall {
	kthread_t	*call_id;
	int		error;	/* error from delmap */
	list_node_t	call_node;
} nfs_delmapcall_t;

/*
 * delmap address space callback args
 */
typedef struct nfs_delmap_args {
	vnode_t			*vp;
	offset_t		off;
	caddr_t			addr;
	size_t			len;
	uint_t			prot;
	uint_t			maxprot;
	uint_t			flags;
	cred_t			*cr;
	nfs_delmapcall_t	*caller; /* to retrieve errors from the cb */
} nfs_delmap_args_t;

#ifdef _KERNEL
extern nfs_delmapcall_t	*nfs_init_delmapcall(void);
extern void	nfs_free_delmapcall(nfs_delmapcall_t *);
extern int	nfs_find_and_delete_delmapcall(rnode_t *, int *errp);
#endif /* _KERNEL */

/*
 * The following structures, chhead and chtab,  make up the client handle
 * cache.  chhead represents a quadruple(RPC program, RPC version, Protocol
 * Family, and Transport).  For example, a chhead entry could represent
 * NFS/V3/IPv4/TCP requests.  chhead nodes are linked together as a singly
 * linked list and is referenced from chtable.
 *
 * chtab represents an allocated client handle bound to a particular
 * quadruple. These nodes chain down from a chhead node.  chtab
 * entries which are on the chain are considered free, so a thread may simply
 * unlink the first node without traversing the chain.  When the thread is
 * completed with its request, it puts the chtab node back on the chain.
 */
typedef struct chhead {
	struct chhead *ch_next;	/* next quadruple */
	struct chtab *ch_list;	/* pointer to free client handle(s) */
	uint64_t ch_timesused;	/* times this quadruple was requested */
	rpcprog_t ch_prog;	/* RPC program number */
	rpcvers_t ch_vers;	/* RPC version number */
	dev_t ch_dev;		/* pseudo device number (i.e. /dev/udp) */
	char *ch_protofmly;	/* protocol (i.e. NC_INET, NC_LOOPBACK) */
} chhead_t;

typedef struct chtab {
	struct chtab *ch_list;	/* next free client handle */
	struct chhead *ch_head;	/* associated quadruple */
	time_t ch_freed;	/* timestamp when freed */
	CLIENT *ch_client;	/* pointer to client handle */
} chtab_t;

/*
 * clinfo is a structure which encapsulates data that is needed to
 * obtain a client handle from the cache
 */
typedef struct clinfo {
	rpcprog_t cl_prog;	/* RPC program number */
	rpcvers_t cl_vers;	/* RPC version number */
	uint_t cl_readsize;	/* transfer size */
	int cl_retrans;		/* times to retry request */
	uint_t cl_flags;	/* info flags */
} clinfo_t;

/*
 * Failover information, passed opaquely through rfscall()
 */
typedef struct failinfo {
	struct vnode	*vp;
	caddr_t		fhp;
	void (*copyproc)(caddr_t, vnode_t *);
	int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *,
			int, vnode_t *, struct cred *, int);
	int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int);
} failinfo_t;

/*
 * Static server information
 *
 * These fields are protected by sv_lock:
 *	sv_flags
 */
typedef struct servinfo {
	struct knetconfig *sv_knconf;   /* bound TLI fd */
	struct knetconfig *sv_origknconf;	/* For RDMA save orig knconf */
	struct netbuf	sv_addr;	/* server's address */
	nfs_fhandle	sv_fhandle;	/* this server's filehandle */
	struct sec_data *sv_secdata;	/* security data for rpcsec module */
	char	*sv_hostname;		/* server's hostname */
	int	sv_hostnamelen;		/* server's hostname length */
	uint_t	sv_flags;		/* see below */
	struct servinfo	*sv_next;	/* next in list */
	kmutex_t sv_lock;
} servinfo_t;

/*
 * The values for sv_flags.
 */
#define	SV_ROOT_STALE	0x1		/* root vnode got ESTALE */

/*
 * Switch from RDMA knconf to original mount knconf
 */

#define	ORIG_KNCONF(mi) (mi->mi_curr_serv->sv_origknconf ? \
	mi->mi_curr_serv->sv_origknconf : mi->mi_curr_serv->sv_knconf)

/*
 * NFS private data per mounted file system
 *	The mi_lock mutex protects the following fields:
 *		mi_flags
 *		mi_printed
 *		mi_down
 *		mi_tsize
 *		mi_stsize
 *		mi_curread
 *		mi_curwrite
 *		mi_timers
 *		mi_curr_serv
 *		mi_readers
 *		mi_klmconfig
 *
 *	The mi_async_lock mutex protects the following fields:
 *		mi_async_reqs
 *		mi_async_req_count
 *		mi_async_tail
 *		mi_async_curr
 *		mi_async_clusters
 *		mi_async_init_clusters
 *		mi_threads
 *		mi_manager_thread
 *
 *	Normally the netconfig information for the mount comes from
 *	mi_curr_serv and mi_klmconfig is NULL.  If NLM calls need to use a
 *	different transport, mi_klmconfig contains the necessary netconfig
 *	information.
 *
 *	'mi_zone' is initialized at structure creation time, and never
 *	changes; it may be read without a lock.
 *
 *	mi_zone_node is linkage into the mi4_globals.mig_list, and is
 *	protected by mi4_globals.mig_list_lock.
 *
 *	Locking order:
 *	  mi_globals::mig_lock > mi_async_lock > mi_lock
 */
typedef struct mntinfo {
	kmutex_t	mi_lock;	/* protects mntinfo fields */
	struct servinfo *mi_servers;    /* server list */
	struct servinfo *mi_curr_serv;  /* current server */
	kcondvar_t	mi_failover_cv;	/* failover synchronization */
	int		mi_readers;	/* failover - users of mi_curr_serv */
	struct vfs	*mi_vfsp;	/* back pointer to vfs */
	enum vtype	mi_type;	/* file type of the root vnode */
	uint_t		mi_flags;	/* see below */
	uint_t		mi_tsize;	/* max read transfer size (bytes) */
	uint_t		mi_stsize;	/* max write transfer size (bytes) */
	int		mi_timeo;	/* inital timeout in 10th sec */
	int		mi_retrans;	/* times to retry request */
	hrtime_t	mi_acregmin;	/* min time to hold cached file attr */
	hrtime_t	mi_acregmax;	/* max time to hold cached file attr */
	hrtime_t	mi_acdirmin;	/* min time to hold cached dir attr */
	hrtime_t	mi_acdirmax;	/* max time to hold cached dir attr */
	len_t		mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
	/*
	 * Extra fields for congestion control, one per NFS call type,
	 * plus one global one.
	 */
	struct rpc_timers mi_timers[NFS_CALLTYPES+1];
	int		mi_curread;	/* current read size */
	int		mi_curwrite;	/* current write size */
	/*
	 * async I/O management
	 */
	struct nfs_async_reqs *mi_async_reqs[NFS_ASYNC_TYPES];
	struct nfs_async_reqs *mi_async_tail[NFS_ASYNC_TYPES];
	struct nfs_async_reqs **mi_async_curr;	/* current async queue */
	uint_t		mi_async_clusters[NFS_ASYNC_TYPES];
	uint_t		mi_async_init_clusters;
	uint_t		mi_async_req_count; /* # outstanding work requests */
	kcondvar_t	mi_async_reqs_cv; /* signaled when there's work */
	ushort_t	mi_threads;	/* number of active async threads */
	ushort_t	mi_max_threads;	/* max number of async worker threads */
	kthread_t	*mi_manager_thread;  /* async manager thread */
	kcondvar_t	mi_async_cv; /* signaled when the last worker dies */
	kcondvar_t	mi_async_work_cv; /* tell workers to work */
	kmutex_t	mi_async_lock;	/* lock to protect async list */
	/*
	 * Other stuff
	 */
	struct pathcnf *mi_pathconf;	/* static pathconf kludge */
	rpcprog_t	mi_prog;	/* RPC program number */
	rpcvers_t	mi_vers;	/* RPC program version number */
	char		**mi_rfsnames;	/* mapping to proc names */
	kstat_named_t	*mi_reqs;	/* count of requests */
	uchar_t		*mi_call_type;	/* dynamic retrans call types */
	uchar_t		*mi_ss_call_type;	/* semisoft call type */
	uchar_t		*mi_timer_type;	/* dynamic retrans timer types */
	clock_t		mi_printftime;	/* last error printf time */
	/*
	 * ACL entries
	 */
	char		**mi_aclnames;	/* mapping to proc names */
	kstat_named_t	*mi_aclreqs;	/* count of acl requests */
	uchar_t		*mi_acl_call_type; /* dynamic retrans call types */
	uchar_t		*mi_acl_ss_call_type; /* semisoft call types */
	uchar_t		*mi_acl_timer_type; /* dynamic retrans timer types */
	/*
	 * Client Side Failover stats
	 */
	uint_t		mi_noresponse;	/* server not responding count */
	uint_t		mi_failover; 	/* failover to new server count */
	uint_t		mi_remap;	/* remap to new server count */
	/*
	 * Kstat statistics
	 */
	struct kstat	*mi_io_kstats;
	struct kstat	*mi_ro_kstats;
	struct knetconfig *mi_klmconfig;
	/*
	 * Zones support.
	 */
	struct zone	*mi_zone;	/* Zone mounted in */
	list_node_t	mi_zone_node;	/* Linkage into per-zone mi list */
	/*
	 * Serializes threads in failover_remap.
	 * Need to acquire this lock first in failover_remap() function
	 * before acquiring any other rnode lock.
	 */
	kmutex_t	mi_remap_lock;
} mntinfo_t;

/*
 * vfs pointer to mount info
 */
#define	VFTOMI(vfsp)	((mntinfo_t *)((vfsp)->vfs_data))

/*
 * vnode pointer to mount info
 */
#define	VTOMI(vp)	((mntinfo_t *)(((vp)->v_vfsp)->vfs_data))

/*
 * The values for mi_flags.
 */
#define	MI_HARD		0x1		/* hard or soft mount */
#define	MI_PRINTED	0x2		/* not responding message printed */
#define	MI_INT		0x4		/* interrupts allowed on hard mount */
#define	MI_DOWN		0x8		/* server is down */
#define	MI_NOAC		0x10		/* don't cache attributes */
#define	MI_NOCTO	0x20		/* no close-to-open consistency */
#define	MI_DYNAMIC	0x40		/* dynamic transfer size adjustment */
#define	MI_LLOCK	0x80		/* local locking only (no lockmgr) */
#define	MI_GRPID	0x100		/* System V group id inheritance */
#define	MI_RPCTIMESYNC	0x200		/* RPC time sync */
#define	MI_LINK		0x400		/* server supports link */
#define	MI_SYMLINK	0x800		/* server supports symlink */
#define	MI_READDIRONLY	0x1000		/* use readdir instead of readdirplus */
#define	MI_ACL		0x2000		/* server supports NFS_ACL */
#define	MI_BINDINPROG	0x4000		/* binding to server is changing */
#define	MI_LOOPBACK	0x8000		/* Set if this is a loopback mount */
#define	MI_SEMISOFT	0x10000		/* soft reads, hard modify */
#define	MI_NOPRINT	0x20000		/* don't print messages */
#define	MI_DIRECTIO	0x40000		/* do direct I/O */
#define	MI_EXTATTR	0x80000		/* server supports extended attrs */
#define	MI_ASYNC_MGR_STOP	0x100000	/* tell async mgr to die */
#define	MI_DEAD		0x200000	/* mount has been terminated */

/*
 * Read-only mntinfo statistics
 */
struct mntinfo_kstat {
	char		mik_proto[KNC_STRSIZE];
	uint32_t	mik_vers;
	uint_t		mik_flags;
	uint_t		mik_secmod;
	uint32_t	mik_curread;
	uint32_t	mik_curwrite;
	int		mik_timeo;
	int		mik_retrans;
	uint_t		mik_acregmin;
	uint_t		mik_acregmax;
	uint_t		mik_acdirmin;
	uint_t		mik_acdirmax;
	struct {
		uint32_t srtt;
		uint32_t deviate;
		uint32_t rtxcur;
	} mik_timers[NFS_CALLTYPES+1];
	uint32_t	mik_noresponse;
	uint32_t	mik_failover;
	uint32_t	mik_remap;
	char		mik_curserver[SYS_NMLN];
};

/*
 * Mark cached attributes as timed out
 *
 * The caller must not be holding the rnode r_statelock mutex.
 */
#define	PURGE_ATTRCACHE(vp)	{				\
	rnode_t *rp = VTOR(vp);					\
	mutex_enter(&rp->r_statelock);				\
	PURGE_ATTRCACHE_LOCKED(rp);				\
	mutex_exit(&rp->r_statelock);				\
}

#define	PURGE_ATTRCACHE_LOCKED(rp)	{			\
	ASSERT(MUTEX_HELD(&rp->r_statelock));			\
	rp->r_attrtime = gethrtime();				\
	rp->r_mtime = rp->r_attrtime;				\
}

/*
 * Is the attribute cache valid?
 */
#define	ATTRCACHE_VALID(vp)	(gethrtime() < VTOR(vp)->r_attrtime)

/*
 * Flags to indicate whether to purge the DNLC for non-directory vnodes
 * in a call to nfs_purge_caches.
 */
#define	NFS_NOPURGE_DNLC	0
#define	NFS_PURGE_DNLC		1

/*
 * If returned error is ESTALE flush all caches.
 */
#define	PURGE_STALE_FH(error, vp, cr)				\
	if ((error) == ESTALE) {				\
		struct rnode *rp = VTOR(vp);			\
		if (vp->v_flag & VROOT) {			\
			servinfo_t *svp = rp->r_server;		\
			mutex_enter(&svp->sv_lock);		\
			svp->sv_flags |= SV_ROOT_STALE;		\
			mutex_exit(&svp->sv_lock);		\
		}						\
		mutex_enter(&rp->r_statelock);			\
		rp->r_flags |= RSTALE;				\
		if (!rp->r_error)				\
			rp->r_error = (error);			\
		mutex_exit(&rp->r_statelock);			\
		if (vn_has_cached_data(vp))			\
			nfs_invalidate_pages((vp), (u_offset_t)0, (cr)); \
		nfs_purge_caches((vp), NFS_PURGE_DNLC, (cr));	\
	}

/*
 * Is cache valid?
 * Swap is always valid, if no attributes (attrtime == 0) or
 * if mtime matches cached mtime it is valid
 * NOTE: mtime is now a timestruc_t.
 * Caller should be holding the rnode r_statelock mutex.
 */
#define	CACHE_VALID(rp, mtime, fsize)				\
	((RTOV(rp)->v_flag & VISSWAP) == VISSWAP ||		\
	(((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec &&	\
	(mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) &&	\
	((fsize) == (rp)->r_attr.va_size)))

/*
 * Macro to detect forced unmount or a zone shutdown.
 */
#define	FS_OR_ZONE_GONE(vfsp) \
	(((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
	zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)

/*
 * Convert NFS tunables to hrtime_t units, seconds to nanoseconds.
 */
#define	SEC2HR(sec)	((sec) * (long long)NANOSEC)
#define	HR2SEC(hr)	((hr) / (long long)NANOSEC)

/*
 * Structure to identify owner of a PC file share reservation.
 */
struct nfs_owner {
	int	magic;		/* magic uniquifying number */
	char	hname[16];	/* first 16 bytes of hostname */
	char	lowner[8];	/* local owner from fcntl */
};

/*
 * Values for magic.
 */
#define	NFS_OWNER_MAGIC	0x1D81E

/*
 * Support for extended attributes
 */
#define	XATTR_DIR_NAME	"/@/"		/* used for DNLC entries */
#define	XATTR_RPATH	"ExTaTtR"	/* used for r_path for failover */

/*
 * Short hand for checking to see whether the file system was mounted
 * interruptible or not.
 */
#define	INTR(vp)	(VTOMI(vp)->mi_flags & MI_INT)

/*
 * Short hand for checking whether failover is enabled or not
 */
#define	FAILOVER_MOUNT(mi)	(mi->mi_servers->sv_next)

/*
 * How long will async threads wait for additional work.
 */
#define	NFS_ASYNC_TIMEOUT	(60 * 1 * hz)	/* 1 minute */

#ifdef _KERNEL
extern int	clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
		    struct chtab **);
extern void	clfree(CLIENT *, struct chtab *);
extern void	nfs_mi_zonelist_add(mntinfo_t *);
extern void	nfs_free_mi(mntinfo_t *);
extern void	nfs_mnt_kstat_init(struct vfs *);
#endif

/*
 * Per-zone data for managing client handles.  Included here solely for the
 * benefit of MDB.
 */
/*
 * client side statistics
 */
struct clstat {
	kstat_named_t	calls;			/* client requests */
	kstat_named_t	badcalls;		/* rpc failures */
	kstat_named_t	clgets;			/* client handle gets */
	kstat_named_t	cltoomany;		/* client handle cache misses */
#ifdef DEBUG
	kstat_named_t	clalloc;		/* number of client handles */
	kstat_named_t	noresponse;		/* server not responding cnt */
	kstat_named_t	failover;		/* server failover count */
	kstat_named_t	remap;			/* server remap count */
#endif
};

struct nfs_clnt {
	struct chhead	*nfscl_chtable;
	kmutex_t	nfscl_chtable_lock;
	zoneid_t	nfscl_zoneid;
	list_node_t	nfscl_node;
	struct clstat	nfscl_stat;
};

#ifdef	__cplusplus
}
#endif

#endif	/* _NFS_NFS_CLNT_H */