/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/modctl.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/cred.h>
#include <sys/vfs.h>
#include <sys/vfs_opreg.h>
#include <sys/gfs.h>
#include <sys/vnode.h>
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
#include <sys/sysmacros.h>
#include <sys/policy.h>
#include <sys/mount.h>
#include <sys/pathname.h>
#include <sys/dirent.h>
#include <fs/fs_subr.h>
#include <sys/contract.h>
#include <sys/contract_impl.h>
#include <sys/ctfs.h>
#include <sys/ctfs_impl.h>
#include <sys/uio.h>
#include <sys/file.h>
#include <sys/atomic.h>
#include <sys/sunddi.h>

/*
 * ctfs, the contract filesystem.
 *
 * Exposes the construct subsystem to userland.  The structure of the
 * filesytem is a public interface, but the behavior of the files is
 * private and unstable.  Contract consumers are expected to use
 * libcontract(3lib) to operate on ctfs file descriptors.
 *
 * We're trying something a little different here.  Rather than make
 * each vnode op itself call into a vector of file type operations, we
 * actually use different vnode types (gasp!), the implementations of
 * which may call into routines providing common functionality.  This
 * design should hopefully make it easier to factor and maintain the
 * code.  For the most part, there is a separate file for each vnode
 * type's implementation.  The exceptions to this are the ctl/stat
 * nodes, which are very similar, and the three event endpoint types.
 *
 * This file contains common routines used by some or all of the vnode
 * types, the filesystem's module linkage and VFS operations, and the
 * implementation of the root vnode.
 */

/*
 * Ops vectors for all the vnode types; they have to be defined
 * somewhere.  See gfs_make_opsvec for thoughts on how this could be
 * done differently.
 */
vnodeops_t *ctfs_ops_root;
vnodeops_t *ctfs_ops_adir;
vnodeops_t *ctfs_ops_sym;
vnodeops_t *ctfs_ops_tdir;
vnodeops_t *ctfs_ops_tmpl;
vnodeops_t *ctfs_ops_cdir;
vnodeops_t *ctfs_ops_ctl;
vnodeops_t *ctfs_ops_stat;
vnodeops_t *ctfs_ops_event;
vnodeops_t *ctfs_ops_bundle;
vnodeops_t *ctfs_ops_latest;

static const fs_operation_def_t ctfs_vfstops[];
static gfs_opsvec_t ctfs_opsvec[];

static int ctfs_init(int, char *);

static ino64_t ctfs_root_do_inode(vnode_t *, int);


/*
 * File system module linkage
 */
static mntopts_t ctfs_mntopts = {
	0,
	NULL
};

static vfsdef_t vfw = {
	VFSDEF_VERSION,
	"ctfs",
	ctfs_init,
	VSW_HASPROTO,
	&ctfs_mntopts,
};

extern struct mod_ops mod_fsops;

static struct modlfs modlfs = {
	&mod_fsops, "contract filesystem", &vfw
};

static struct modlinkage modlinkage = {
	MODREV_1, (void *)&modlfs, NULL
};

int
_init(void)
{
	return (mod_install(&modlinkage));
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}

int
_fini(void)
{
	/*
	 * As unloading filesystem modules isn't completely safe, we
	 * don't allow it.
	 */
	return (EBUSY);
}

static int ctfs_fstype;
static major_t ctfs_major;
static minor_t ctfs_minor = 0;

/*
 * The ops vector vector.
 */
static const fs_operation_def_t ctfs_tops_root[];
extern const fs_operation_def_t ctfs_tops_tmpl[];
extern const fs_operation_def_t ctfs_tops_ctl[];
extern const fs_operation_def_t ctfs_tops_adir[];
extern const fs_operation_def_t ctfs_tops_cdir[];
extern const fs_operation_def_t ctfs_tops_tdir[];
extern const fs_operation_def_t ctfs_tops_latest[];
extern const fs_operation_def_t ctfs_tops_stat[];
extern const fs_operation_def_t ctfs_tops_sym[];
extern const fs_operation_def_t ctfs_tops_event[];
extern const fs_operation_def_t ctfs_tops_bundle[];
static gfs_opsvec_t ctfs_opsvec[] = {
	{ "ctfs root directory", ctfs_tops_root, &ctfs_ops_root },
	{ "ctfs all directory", ctfs_tops_adir, &ctfs_ops_adir },
	{ "ctfs all symlink", ctfs_tops_sym, &ctfs_ops_sym },
	{ "ctfs template directory", ctfs_tops_tdir, &ctfs_ops_tdir },
	{ "ctfs template file", ctfs_tops_tmpl, &ctfs_ops_tmpl },
	{ "ctfs contract directory", ctfs_tops_cdir, &ctfs_ops_cdir },
	{ "ctfs ctl file", ctfs_tops_ctl, &ctfs_ops_ctl },
	{ "ctfs status file", ctfs_tops_stat, &ctfs_ops_stat },
	{ "ctfs events file", ctfs_tops_event, &ctfs_ops_event },
	{ "ctfs bundle file", ctfs_tops_bundle, &ctfs_ops_bundle },
	{ "ctfs latest file", ctfs_tops_latest, &ctfs_ops_latest },
	{ NULL }
};


/*
 * ctfs_init - the vfsdef_t init entry point
 *
 * Sets the VFS ops, builds all the vnode ops, and allocates a device
 * number.
 */
/* ARGSUSED */
static int
ctfs_init(int fstype, char *name)
{
	vfsops_t *vfsops;
	int error;

	ctfs_fstype = fstype;
	if (error = vfs_setfsops(fstype, ctfs_vfstops, &vfsops)) {
		cmn_err(CE_WARN, "ctfs_init: bad vfs ops template");
		return (error);
	}

	if (error = gfs_make_opsvec(ctfs_opsvec)) {
		(void) vfs_freevfsops(vfsops);
		return (error);
	}

	if ((ctfs_major = getudev()) == (major_t)-1) {
		cmn_err(CE_WARN, "ctfs_init: can't get unique device number");
		ctfs_major = 0;
	}

	return (0);
}

/*
 * ctfs_mount - the VFS_MOUNT entry point
 */
static int
ctfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
{
	ctfs_vfs_t *data;
	dev_t dev;
	gfs_dirent_t *dirent;
	int i;

	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
		return (EPERM);

	if (mvp->v_type != VDIR)
		return (ENOTDIR);

	if ((uap->flags & MS_OVERLAY) == 0 &&
	    (mvp->v_count > 1 || (mvp->v_flag & VROOT)))
		return (EBUSY);

	data = kmem_alloc(sizeof (ctfs_vfs_t), KM_SLEEP);

	/*
	 * Initialize vfs fields not initialized by VFS_INIT/domount
	 */
	vfsp->vfs_bsize = DEV_BSIZE;
	vfsp->vfs_fstype = ctfs_fstype;
	do
		dev = makedevice(ctfs_major,
		    atomic_add_32_nv(&ctfs_minor, 1) & L_MAXMIN32);
	while (vfs_devismounted(dev));
	vfs_make_fsid(&vfsp->vfs_fsid, dev, ctfs_fstype);
	vfsp->vfs_data = data;
	vfsp->vfs_dev = dev;

	/*
	 * Dynamically create gfs_dirent_t array for the root directory.
	 */
	dirent = kmem_zalloc((ct_ntypes + 2) * sizeof (gfs_dirent_t), KM_SLEEP);
	for (i = 0; i < ct_ntypes; i++) {
		dirent[i].gfse_name = (char *)ct_types[i]->ct_type_name;
		dirent[i].gfse_ctor = ctfs_create_tdirnode;
		dirent[i].gfse_flags = GFS_CACHE_VNODE;
	}
	dirent[i].gfse_name = "all";
	dirent[i].gfse_ctor = ctfs_create_adirnode;
	dirent[i].gfse_flags = GFS_CACHE_VNODE;
	dirent[i+1].gfse_name = NULL;

	/*
	 * Create root vnode
	 */
	data->ctvfs_root = gfs_root_create(sizeof (ctfs_rootnode_t),
	    vfsp, ctfs_ops_root, CTFS_INO_ROOT, dirent, ctfs_root_do_inode,
	    CTFS_NAME_MAX, NULL, NULL);

	kmem_free(dirent, (ct_ntypes + 2) * sizeof (gfs_dirent_t));

	return (0);
}

/*
 * ctfs_unmount - the VFS_UNMOUNT entry point
 */
static int
ctfs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
{
	ctfs_vfs_t *data;

	if (secpolicy_fs_unmount(cr, vfsp) != 0)
		return (EPERM);

	/*
	 * Supporting forced unmounts would be nice to do at some
	 * point.
	 */
	if (flag & MS_FORCE)
		return (ENOTSUP);

	/*
	 * We should never have a reference count less than 2: one for
	 * the caller, one for the root vnode.
	 */
	ASSERT(vfsp->vfs_count >= 2);

	/*
	 * If we have any active vnodes, they will (transitively) have
	 * holds on the root vnode.
	 */
	data = vfsp->vfs_data;
	if (data->ctvfs_root->v_count > 1)
		return (EBUSY);

	/*
	 * Release the last hold on the root vnode.  It will, in turn,
	 * release its hold on us.
	 */
	VN_RELE(data->ctvfs_root);

	/*
	 * Disappear.
	 */
	kmem_free(data, sizeof (ctfs_vfs_t));

	return (0);
}

/*
 * ctfs_root - the VFS_ROOT entry point
 */
static int
ctfs_root(vfs_t *vfsp, vnode_t **vpp)
{
	vnode_t *vp;

	vp = ((ctfs_vfs_t *)vfsp->vfs_data)->ctvfs_root;
	VN_HOLD(vp);
	*vpp = vp;

	return (0);
}

/*
 * ctfs_statvfs - the VFS_STATVFS entry point
 */
static int
ctfs_statvfs(vfs_t *vfsp, statvfs64_t *sp)
{
	dev32_t	d32;
	int	total, i;

	bzero(sp, sizeof (*sp));
	sp->f_bsize = DEV_BSIZE;
	sp->f_frsize = DEV_BSIZE;
	for (i = 0, total = 0; i < ct_ntypes; i++)
		total += contract_type_count(ct_types[i]);
	sp->f_files = total;
	sp->f_favail = sp->f_ffree = INT_MAX - total;
	(void) cmpldev(&d32, vfsp->vfs_dev);
	sp->f_fsid = d32;
	(void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name,
	    sizeof (sp->f_basetype));
	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
	sp->f_namemax = CTFS_NAME_MAX;
	(void) strlcpy(sp->f_fstr, "contract", sizeof (sp->f_fstr));

	return (0);
}

static const fs_operation_def_t ctfs_vfstops[] = {
	{ VFSNAME_MOUNT,	{ .vfs_mount = ctfs_mount } },
	{ VFSNAME_UNMOUNT,	{ .vfs_unmount = ctfs_unmount } },
	{ VFSNAME_ROOT,		{ .vfs_root = ctfs_root } },
	{ VFSNAME_STATVFS,	{ .vfs_statvfs = ctfs_statvfs } },
	{ NULL, NULL }
};

/*
 * ctfs_common_getattr
 *
 * Implements functionality common to all ctfs VOP_GETATTR entry
 * points.  It assumes vap->va_size is set.
 */
void
ctfs_common_getattr(vnode_t *vp, vattr_t *vap)
{
	vap->va_uid = 0;
	vap->va_gid = 0;
	vap->va_rdev = 0;
	vap->va_blksize = DEV_BSIZE;
	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
	vap->va_seq = 0;
	vap->va_fsid = vp->v_vfsp->vfs_dev;
	vap->va_nodeid = gfs_file_inode(vp);
}

/*
 * ctfs_open - common VOP_OPEN entry point
 *
 * Used by all ctfs directories; just verifies we are using large-file
 * aware interfaces and we aren't trying to open the directories
 * writable.
 */
/* ARGSUSED */
int
ctfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
{
	if ((flag & (FOFFMAX | FWRITE)) != FOFFMAX)
		return (EINVAL);

	return (0);
}

/*
 * ctfs_close - common VOP_CLOSE entry point
 *
 * For all ctfs vnode types which have no close-time clean-up to do.
 */
/* ARGSUSED */
int
ctfs_close(
	vnode_t *vp,
	int flag,
	int count,
	offset_t offset,
	cred_t *cr,
	caller_context_t *ct)
{
	return (0);
}

/*
 * ctfs_access_dir - common VOP_ACCESS entry point for directories
 */
/* ARGSUSED */
int
ctfs_access_dir(
	vnode_t *vp,
	int mode,
	int flags,
	cred_t *cr,
	caller_context_t *ct)
{
	if (mode & VWRITE)
		return (EACCES);

	return (0);
}

/*
 * ctfs_access_dir - common VOP_ACCESS entry point for read-only files
 */
/* ARGSUSED */
int
ctfs_access_readonly(
	vnode_t *vp,
	int mode,
	int flags,
	cred_t *cr,
	caller_context_t *ct)
{
	if (mode & (VWRITE | VEXEC))
		return (EACCES);

	return (0);
}

/*
 * ctfs_access_dir - common VOP_ACCESS entry point for read-write files
 */
/* ARGSUSED */
int
ctfs_access_readwrite(
	vnode_t *vp,
	int mode,
	int flags,
	cred_t *cr,
	caller_context_t *ct)
{
	if (mode & VEXEC)
		return (EACCES);

	return (0);
}

/*
 * ctfs_root_getattr - VOP_GETATTR entry point
 */
/* ARGSUSED */
static int
ctfs_root_getattr(
	vnode_t *vp,
	vattr_t *vap,
	int flags,
	cred_t *cr,
	caller_context_t *ct)
{
	vap->va_type = VDIR;
	vap->va_mode = 0555;
	vap->va_nlink = 2 + ct_ntypes + 1;
	vap->va_size = vap->va_nlink;
	vap->va_atime.tv_sec = vp->v_vfsp->vfs_mtime;
	vap->va_atime.tv_nsec = 0;
	vap->va_mtime = vap->va_ctime = vap->va_atime;
	ctfs_common_getattr(vp, vap);

	return (0);
}

/* ARGSUSED */
static ino64_t
ctfs_root_do_inode(vnode_t *vp, int index)
{
	return (CTFS_INO_TYPE_DIR(index));
}

static const fs_operation_def_t ctfs_tops_root[] = {
	{ VOPNAME_OPEN,		{ .vop_open = ctfs_open } },
	{ VOPNAME_CLOSE,	{ .vop_close = ctfs_close } },
	{ VOPNAME_IOCTL,	{ .error = fs_inval } },
	{ VOPNAME_GETATTR,	{ .vop_getattr = ctfs_root_getattr } },
	{ VOPNAME_ACCESS,	{ .vop_access = ctfs_access_dir } },
	{ VOPNAME_READDIR,	{ .vop_readdir = gfs_vop_readdir } },
	{ VOPNAME_LOOKUP,	{ .vop_lookup = gfs_vop_lookup } },
	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek } },
	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive } },
	{ NULL, NULL }
};