/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


#include <sys/types.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <inet/ip.h>
#include <sys/ib/clients/rds/rdsib_ib.h>
#include <sys/ib/clients/rds/rdsib_buf.h>
#include <sys/ib/clients/rds/rdsib_cm.h>
#include <sys/ib/clients/rds/rdsib_protocol.h>
#include <sys/ib/clients/rds/rds_transport.h>
#include <sys/ib/clients/rds/rds_kstat.h>

/*
 * Global Configuration Variables
 * As defined in RDS proposal
 */
uint_t		MaxNodes		= RDS_MAX_NODES;
uint_t		RdsPktSize;
uint_t		NDataRX;
uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;

extern int rdsib_open_ib();
extern void rdsib_close_ib();
extern void rds_resume_port(in_port_t port);
extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
    in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
extern boolean_t rds_if_lookup_by_name(char *devname);

rds_transport_ops_t rds_ib_transport_ops = {
	rdsib_open_ib,
	rdsib_close_ib,
	rds_sendmsg,
	rds_resume_port,
	rds_if_lookup_by_name
};

/* global */
rds_state_t	*rdsib_statep = NULL;
krwlock_t	rds_loopback_portmap_lock;
uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
ddi_taskq_t	*rds_taskq = NULL;
dev_info_t	*rdsib_dev_info = NULL;
uint_t		rds_rx_pkts_pending_hwm;

#ifdef DEBUG
uint32_t	rdsdbglvl = RDS_LOG_L3;
#else
uint32_t	rdsdbglvl = RDS_LOG_L2;
#endif

#define		RDS_NUM_TASKQ_THREADS	4

static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
    void **result);
static void rds_read_config_values(dev_info_t *dip);

/* Driver entry points */
static struct cb_ops	rdsib_cb_ops = {
	nulldev,		/* open */
	nulldev,		/* close */
	nodev,			/* strategy */
	nodev,			/* print */
	nodev,			/* dump */
	nodev,			/* read */
	nodev,			/* write */
	nodev,			/* ioctl */
	nodev,			/* devmap */
	nodev,			/* mmap */
	nodev,			/* segmap */
	nochpoll,		/* poll */
	ddi_prop_op,		/* prop_op */
	NULL,			/* stream */
	D_MP,			/* cb_flag */
	CB_REV,			/* rev */
	nodev,			/* int (*cb_aread)() */
	nodev,			/* int (*cb_awrite)() */
};

/* Device options */
static struct dev_ops rdsib_ops = {
	DEVO_REV,		/* devo_rev, */
	0,			/* refcnt  */
	rdsib_info,		/* info */
	nulldev,		/* identify */
	nulldev,		/* probe */
	rdsib_attach,		/* attach */
	rdsib_detach,		/* detach */
	nodev,			/* reset */
	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
	NULL,			/* bus operations */
	NULL,			/* power */
	ddi_quiesce_not_needed,	/* devo_quiesce */
};

/*
 * Module linkage information.
 */
#define	RDS_DEVDESC	"RDS IB driver"
static struct modldrv rdsib_modldrv = {
	&mod_driverops,		/* Driver module */
	RDS_DEVDESC,		/* Driver name and version */
	&rdsib_ops,		/* Driver ops */
};

static struct modlinkage rdsib_modlinkage = {
	MODREV_1,
	(void *)&rdsib_modldrv,
	NULL
};

/* Called from _init */
int
rdsib_init()
{
	/* RDS supports only one instance */
	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);

	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);

	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);

	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);

	/* Initialize logging */
	rds_logging_initialization();

	RDS_SET_NPORT(1); /* this should never be 0 */

	ASSERT(rds_transport_ops == NULL);
	rds_transport_ops = &rds_ib_transport_ops;

	return (0);
}

/* Called from _fini */
void
rdsib_fini()
{
	/* Stop logging */
	rds_logging_destroy();

	cv_destroy(&rds_dpool.pool_cv);
	mutex_destroy(&rds_dpool.pool_lock);
	cv_destroy(&rds_cpool.pool_cv);
	mutex_destroy(&rds_cpool.pool_lock);

	rw_destroy(&rds_loopback_portmap_lock);

	rw_destroy(&rdsib_statep->rds_hca_lock);
	rw_destroy(&rdsib_statep->rds_sessionlock);
	kmem_free(rdsib_statep, sizeof (rds_state_t));

	rds_transport_ops = NULL;
}

int
_init(void)
{
	int	ret;

	if (ibt_hw_is_present() == 0) {
		return (ENODEV);
	}

	ret = rdsib_init();
	if (ret != 0) {
		return (ret);
	}

	ret = mod_install(&rdsib_modlinkage);
	if (ret != 0) {
		/*
		 * Could not load module
		 */
		rdsib_fini();
		return (ret);
	}

	return (0);
}

int
_fini()
{
	int	ret;

	/*
	 * Remove module
	 */
	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
		return (ret);
	}

	rdsib_fini();

	return (0);
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&rdsib_modlinkage, modinfop));
}

static int
rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	int	ret;

	RDS_DPRINTF2("rdsib_attach", "enter");

	if (cmd != DDI_ATTACH)
		return (DDI_FAILURE);

	if (rdsib_dev_info != NULL) {
		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
		return (DDI_FAILURE);
	}

	rdsib_dev_info = dip;
	rds_read_config_values(dip);

	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
	    TASKQ_DEFAULTPRI, 0);
	if (rds_taskq == NULL) {
		RDS_DPRINTF1("rdsib_attach",
		    "ddi_taskq_create failed for rds_taskq");
		rdsib_dev_info = NULL;
		return (DDI_FAILURE);
	}

	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
	if (ret != DDI_SUCCESS) {
		RDS_DPRINTF1("rdsib_attach",
		    "ddi_create_minor_node failed: %d", ret);
		ddi_taskq_destroy(rds_taskq);
		rds_taskq = NULL;
		rdsib_dev_info = NULL;
		return (DDI_FAILURE);
	}

	/* Max number of receive buffers on the system */
	NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;

	/*
	 * High water mark for the receive buffers in the system. If the
	 * number of buffers used crosses this mark then all sockets in
	 * would be stalled. The port quota for the sockets is set based
	 * on this limit.
	 */
	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;

	ret = rdsib_initialize_ib();
	if (ret != 0) {
		RDS_DPRINTF1("rdsib_attach",
		    "rdsib_initialize_ib failed: %d", ret);
		ddi_taskq_destroy(rds_taskq);
		rds_taskq = NULL;
		rdsib_dev_info = NULL;
		return (DDI_FAILURE);
	}

	RDS_DPRINTF2("rdsib_attach", "return");

	return (DDI_SUCCESS);
}

static int
rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	RDS_DPRINTF2("rdsib_detach", "enter");

	if (cmd != DDI_DETACH)
		return (DDI_FAILURE);

	rdsib_deinitialize_ib();

	ddi_remove_minor_node(dip, "rdsib");

	/* destroy taskq */
	if (rds_taskq != NULL) {
		ddi_taskq_destroy(rds_taskq);
		rds_taskq = NULL;
	}

	rdsib_dev_info = NULL;

	RDS_DPRINTF2("rdsib_detach", "return");

	return (DDI_SUCCESS);
}

/* ARGSUSED */
static int
rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
	int ret = DDI_FAILURE;

	switch (cmd) {
	case DDI_INFO_DEVT2DEVINFO:
		if (rdsib_dev_info != NULL) {
			*result = (void *)rdsib_dev_info;
			ret = DDI_SUCCESS;
		}
		break;

	case DDI_INFO_DEVT2INSTANCE:
		*result = NULL;
		ret = DDI_SUCCESS;
		break;

	default:
		break;
	}

	return (ret);
}

static void
rds_read_config_values(dev_info_t *dip)
{
	MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
	    "MaxNodes", RDS_MAX_NODES);

	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);

	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);

	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);

	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);

	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);

	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);

	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);

	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);

	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
	    "MinRnrRetry", RDS_IB_RNR_RETRY);

	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);

	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);

	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
	    "rdsdbglvl", RDS_LOG_L2);

	if (MaxNodes < 2) {
		cmn_err(CE_WARN, "MaxNodes is set to less than 2");
		MaxNodes = 2;
	}
}