/* $FreeBSD$ */
/*-
 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "icl_iser.h"

static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend");

/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Data size is stored in
 *  task->data[ISER_DIR_IN].data_len, Protection size
 *  os stored in task->prot[ISER_DIR_IN].data_len
 */
static int
iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu)
{
	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
	struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN];
	struct iser_mem_reg *mem_reg;
	int err;

	err = iser_dma_map_task_data(iser_pdu,
				     buf_in,
				     ISER_DIR_IN,
				     DMA_FROM_DEVICE);
	if (err)
		return (err);

	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN);
	if (err) {
		ISER_ERR("Failed to set up Data-IN RDMA");
		return (err);
	}

	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN];

	hdr->flags    |= ISER_RSV;
	hdr->read_stag = cpu_to_be32(mem_reg->rkey);
	hdr->read_va   = cpu_to_be64(mem_reg->sge.addr);

	return (0);
}

/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Data size is stored in
 *  task->data[ISER_DIR_OUT].data_len, Protection size
 *  is stored at task->prot[ISER_DIR_OUT].data_len
 */
static int
iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu)
{
	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
	struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT];
	struct iser_mem_reg *mem_reg;
	int err;

	err = iser_dma_map_task_data(iser_pdu,
				     buf_out,
				     ISER_DIR_OUT,
				     DMA_TO_DEVICE);
	if (err)
		return (err);

	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT);
	if (err) {
		ISER_ERR("Failed to set up Data-out RDMA");
		return (err);
	}

	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT];

	hdr->flags     |= ISER_WSV;
	hdr->write_stag = cpu_to_be32(mem_reg->rkey);
	hdr->write_va   = cpu_to_be64(mem_reg->sge.addr);

	return (0);
}

/* creates a new tx descriptor and adds header regd buffer */
void
iser_create_send_desc(struct iser_conn *iser_conn,
		      struct iser_tx_desc *tx_desc)
{
	struct iser_device *device = iser_conn->ib_conn.device;

	ib_dma_sync_single_for_cpu(device->ib_device,
		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);

	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
	tx_desc->iser_header.flags = ISER_VER;

	tx_desc->num_sge = 1;

	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
		tx_desc->tx_sg[0].lkey = device->mr->lkey;
		ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc);
	}
}

void
iser_free_login_buf(struct iser_conn *iser_conn)
{
	struct iser_device *device = iser_conn->ib_conn.device;

	if (!iser_conn->login_buf)
		return;

	if (iser_conn->login_req_dma)
		ib_dma_unmap_single(device->ib_device,
				    iser_conn->login_req_dma,
				    ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);

	if (iser_conn->login_resp_dma)
		ib_dma_unmap_single(device->ib_device,
				    iser_conn->login_resp_dma,
				    ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);

	free(iser_conn->login_buf, M_ISER_INITIATOR);

	/* make sure we never redo any unmapping */
	iser_conn->login_req_dma = 0;
	iser_conn->login_resp_dma = 0;
	iser_conn->login_buf = NULL;
}

int
iser_alloc_login_buf(struct iser_conn *iser_conn)
{
	struct iser_device *device = iser_conn->ib_conn.device;
	int req_err, resp_err;

	BUG_ON(device == NULL);

	iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE,
				      M_ISER_INITIATOR, M_WAITOK | M_ZERO);

	if (!iser_conn->login_buf)
		goto out_err;

	iser_conn->login_req_buf  = iser_conn->login_buf;
	iser_conn->login_resp_buf = iser_conn->login_buf +
				    ISCSI_DEF_MAX_RECV_SEG_LEN;

	iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
						     iser_conn->login_req_buf,
						     ISCSI_DEF_MAX_RECV_SEG_LEN,
						     DMA_TO_DEVICE);

	iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
						      iser_conn->login_resp_buf,
						      ISER_RX_LOGIN_SIZE,
						      DMA_FROM_DEVICE);

	req_err  = ib_dma_mapping_error(device->ib_device,
					iser_conn->login_req_dma);
	resp_err = ib_dma_mapping_error(device->ib_device,
					iser_conn->login_resp_dma);

	if (req_err || resp_err) {
		if (req_err)
			iser_conn->login_req_dma = 0;
		if (resp_err)
			iser_conn->login_resp_dma = 0;
		goto free_login_buf;
	}

	return (0);

free_login_buf:
	iser_free_login_buf(iser_conn);

out_err:
	ISER_DBG("unable to alloc or map login buf");
	return (ENOMEM);
}

int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max)
{
	int i, j;
	u64 dma_addr;
	struct iser_rx_desc *rx_desc;
	struct ib_sge       *rx_sg;
	struct ib_conn *ib_conn = &iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;

	iser_conn->qp_max_recv_dtos = cmds_max;
	iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;

	if (iser_create_fastreg_pool(ib_conn, cmds_max))
		goto create_rdma_reg_res_failed;


	iser_conn->num_rx_descs = cmds_max;
	iser_conn->rx_descs = malloc(iser_conn->num_rx_descs *
				sizeof(struct iser_rx_desc), M_ISER_INITIATOR,
				M_WAITOK | M_ZERO);
	if (!iser_conn->rx_descs)
		goto rx_desc_alloc_fail;

	rx_desc = iser_conn->rx_descs;

	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)  {
		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(device->ib_device, dma_addr))
			goto rx_desc_dma_map_failed;

		rx_desc->dma_addr = dma_addr;

		rx_sg = &rx_desc->rx_sg;
		rx_sg->addr   = rx_desc->dma_addr;
		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
		rx_sg->lkey   = device->mr->lkey;
	}

	iser_conn->rx_desc_head = 0;

	return (0);

rx_desc_dma_map_failed:
	rx_desc = iser_conn->rx_descs;
	for (j = 0; j < i; j++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	free(iser_conn->rx_descs, M_ISER_INITIATOR);
	iser_conn->rx_descs = NULL;
rx_desc_alloc_fail:
	iser_free_fastreg_pool(ib_conn);
create_rdma_reg_res_failed:
	ISER_ERR("failed allocating rx descriptors / data buffers");

	return (ENOMEM);
}

void
iser_free_rx_descriptors(struct iser_conn *iser_conn)
{
	int i;
	struct iser_rx_desc *rx_desc;
	struct ib_conn *ib_conn = &iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;

	iser_free_fastreg_pool(ib_conn);

	rx_desc = iser_conn->rx_descs;
	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);

	free(iser_conn->rx_descs, M_ISER_INITIATOR);

	/* make sure we never redo any unmapping */
	iser_conn->rx_descs = NULL;
}

static void
iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf)
{
	struct scatterlist *sg;
	int i;
	size_t len, tlen;
	int offset;

	tlen = data_buf->data_len;

	for (i = 0; 0 < tlen; i++, tlen -= len)  {
		sg = &data_buf->sgl[i];
		offset = ((uintptr_t)buf) & ~PAGE_MASK;
		len = min(PAGE_SIZE - offset, tlen);
		sg_set_buf(sg, buf, len);
		buf = (void *)(((u64)buf) + (u64)len);
	}

	data_buf->size = i;
	sg_mark_end(sg);
}


static void
iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf)
{
	struct scatterlist *sg;
	int i;
	size_t len, tlen;
	int offset;

	tlen = bp->bio_bcount;
	offset = bp->bio_ma_offset;

	for (i = 0; 0 < tlen; i++, tlen -= len) {
		sg = &data_buf->sgl[i];
		len = min(PAGE_SIZE - offset, tlen);
		sg_set_page(sg, bp->bio_ma[i], len, offset);
		offset = 0;
	}

	data_buf->size = i;
	sg_mark_end(sg);
}

static int
iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf)
{
	struct ccb_hdr *ccbh;
	int err = 0;

	ccbh = &csio->ccb_h;
	switch ((ccbh->flags & CAM_DATA_MASK)) {
		case CAM_DATA_BIO:
			iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf);
			break;
		case CAM_DATA_VADDR:
			/*
			 * Support KVA buffers for various scsi commands such as:
			 *  - REPORT_LUNS
			 *  - MODE_SENSE_6
			 *  - INQUIRY
			 *  - SERVICE_ACTION_IN.
			 * The data of these commands always mapped into KVA.
			 */
			iser_buf_to_sg(csio->data_ptr, data_buf);
			break;
		default:
			ISER_ERR("flags 0x%X unimplemented", ccbh->flags);
			err = EINVAL;
	}
	return (err);
}

static inline bool
iser_signal_comp(u8 sig_count)
{
	return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
}

int
iser_send_command(struct iser_conn *iser_conn,
		  struct icl_iser_pdu *iser_pdu)
{
	struct iser_data_buf *data_buf;
	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
	struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header);
	struct ccb_scsiio *csio = iser_pdu->csio;
	int err = 0;
	u8 sig_count = ++iser_conn->ib_conn.sig_count;

	/* build the tx desc regd header and add it to the tx desc dto */
	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
	iser_create_send_desc(iser_conn, tx_desc);

	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
		data_buf = &iser_pdu->data[ISER_DIR_IN];
	} else {
		data_buf = &iser_pdu->data[ISER_DIR_OUT];
	}

	data_buf->sg = csio->data_ptr;
	data_buf->data_len = csio->dxfer_len;

	if (likely(csio->dxfer_len)) {
		err = iser_csio_to_sg(csio, data_buf);
		if (unlikely(err))
			goto send_command_error;
	}

	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
		err = iser_prepare_read_cmd(iser_pdu);
		if (err)
			goto send_command_error;
	} else if (hdr->bhssc_flags & BHSSC_FLAGS_W) {
		err = iser_prepare_write_cmd(iser_pdu);
		if (err)
			goto send_command_error;
	}

	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
			     iser_signal_comp(sig_count));
	if (!err)
		return (0);

send_command_error:
	ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn,
			hdr->bhssc_initiator_task_tag,
			hdr->bhssc_expected_data_transfer_length,
			err);
	return (err);
}

int
iser_send_control(struct iser_conn *iser_conn,
		  struct icl_iser_pdu *iser_pdu)
{
	struct iser_tx_desc *mdesc;
	struct iser_device *device;
	size_t datalen = iser_pdu->icl_pdu.ip_data_len;
	int err;

	mdesc = &iser_pdu->desc;

	/* build the tx desc regd header and add it to the tx desc dto */
	mdesc->type = ISCSI_TX_CONTROL;
	iser_create_send_desc(iser_conn, mdesc);

	device = iser_conn->ib_conn.device;

	if (datalen > 0) {
		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
		ib_dma_sync_single_for_cpu(device->ib_device,
				iser_conn->login_req_dma, datalen,
				DMA_TO_DEVICE);

		ib_dma_sync_single_for_device(device->ib_device,
			iser_conn->login_req_dma, datalen,
			DMA_TO_DEVICE);

		tx_dsg->addr    = iser_conn->login_req_dma;
		tx_dsg->length  = datalen;
		tx_dsg->lkey    = device->mr->lkey;
		mdesc->num_sge = 2;
	}

	/* For login phase and discovery session we re-use the login buffer */
	if (!iser_conn->handoff_done) {
		err = iser_post_recvl(iser_conn);
		if (err)
			goto send_control_error;
	}

	err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
	if (!err)
		return (0);

send_control_error:
	ISER_ERR("conn %p failed err %d", iser_conn, err);

	return (err);

}

/**
 * iser_rcv_dto_completion - recv DTO completion
 */
void
iser_rcv_completion(struct iser_rx_desc *rx_desc,
		    unsigned long rx_xfer_len,
		    struct ib_conn *ib_conn)
{
	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
						   ib_conn);
	struct icl_conn *ic = &iser_conn->icl_conn;
	struct icl_pdu *response;
	struct iscsi_bhs *hdr;
	u64 rx_dma;
	int rx_buflen;
	int outstanding, count, err;

	/* differentiate between login to all other PDUs */
	if ((char *)rx_desc == iser_conn->login_resp_buf) {
		rx_dma = iser_conn->login_resp_dma;
		rx_buflen = ISER_RX_LOGIN_SIZE;
	} else {
		rx_dma = rx_desc->dma_addr;
		rx_buflen = ISER_RX_PAYLOAD_SIZE;
	}

	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
				   rx_buflen, DMA_FROM_DEVICE);

	hdr = &rx_desc->iscsi_header;

	response = iser_new_pdu(ic, M_NOWAIT);
	response->ip_bhs = hdr;
	response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN;

	/*
	 * In case we got data in the receive buffer, assign the ip_data_mbuf
	 * to the rx_buffer - later we'll copy it to upper layer buffers
	 */
	if (response->ip_data_len)
		response->ip_data_mbuf = (struct mbuf *)(rx_desc->data);

	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
				      rx_buflen, DMA_FROM_DEVICE);

	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
	 * task eliminates the need to worry on tasks which are completed in   *
	 * parallel to the execution of iser_conn_term. So the code that waits *
	 * for the posted rx bufs refcount to become zero handles everything   */
	ib_conn->post_recv_buf_count--;

	if (rx_dma == iser_conn->login_resp_dma)
		goto receive;

	outstanding = ib_conn->post_recv_buf_count;
	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
		count = min(iser_conn->qp_max_recv_dtos - outstanding,
			    iser_conn->min_posted_rx);
		err = iser_post_recvm(iser_conn, count);
		if (err)
			ISER_ERR("posting %d rx bufs err %d", count, err);
	}

receive:
	(ic->ic_receive)(response);
}

void
iser_snd_completion(struct iser_tx_desc *tx_desc,
		    struct ib_conn *ib_conn)
{
	struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc);
	struct iser_conn *iser_conn = iser_pdu->iser_conn;

	if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL)
		iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu);
}