/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved  	*/

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
/*
 * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
 */

#include "mt.h"
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <errno.h>
#include <stropts.h>
#include <sys/stream.h>
#define	_SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/timod.h>
#include <sys/stat.h>
#include <xti.h>
#include <fcntl.h>
#include <signal.h>
#include <assert.h>
#include <syslog.h>
#include <limits.h>
#include <ucred.h>
#include "tx.h"

#define	DEFSIZE 2048

/*
 * The following used to be in tiuser.h, but was causing too much namespace
 * pollution.
 */
#define	ROUNDUP32(X)	((X + 0x03)&~0x03)

static struct _ti_user	*find_tilink(int s);
static struct _ti_user	*add_tilink(int s);
static void _t_free_lookbufs(struct _ti_user *tiptr);
static unsigned int _t_setsize(t_scalar_t infosize, boolean_t option);
static int _t_cbuf_alloc(struct _ti_user *tiptr, char **retbuf);
static int _t_rbuf_alloc(struct _ti_user *tiptr, char **retbuf);
static int _t_adjust_state(int fd, int instate);
static int _t_alloc_bufs(int fd, struct _ti_user *tiptr,
	struct T_info_ack *tsap);

mutex_t	_ti_userlock = DEFAULTMUTEX;	/* Protects hash_bucket[] */

/*
 * Checkfd - checks validity of file descriptor
 */
struct _ti_user *
_t_checkfd(int fd, int force_sync, int api_semantics)
{
	sigset_t mask;
	struct _ti_user *tiptr;
	int retval, timodpushed;

	if (fd < 0) {
		t_errno = TBADF;
		return (NULL);
	}

	if (!force_sync) {
		sig_mutex_lock(&_ti_userlock);
		tiptr = find_tilink(fd);
		sig_mutex_unlock(&_ti_userlock);
		if (tiptr != NULL)
			return (tiptr);
	}

	/*
	 * Not found or a forced sync is required.
	 * check if this is a valid TLI/XTI descriptor.
	 */
	timodpushed = 0;
	do {
		retval = ioctl(fd, I_FIND, "timod");
	} while (retval < 0 && errno == EINTR);

	if (retval < 0 || (retval == 0 && _T_IS_TLI(api_semantics))) {
		/*
		 * not a stream or a TLI endpoint with no timod
		 * XXX Note: If it is a XTI call, we push "timod" and
		 * try to convert it into a transport endpoint later.
		 * We do not do it for TLI and "retain" the old buggy
		 * behavior because ypbind and a lot of other deamons seem
		 * to use a buggy logic test of the form
		 * "(t_getstate(0) != -1 || t_errno != TBADF)" to see if
		 * they we ever invoked with request on stdin and drop into
		 * untested code. This test is in code generated by rpcgen
		 * which is why it is replicated test in many daemons too.
		 * We will need to fix that test too with "IsaTLIendpoint"
		 * test if we ever fix this for TLI
		 */
		t_errno = TBADF;
		return (NULL);
	}

	if (retval == 0) {
		/*
		 * "timod" not already on stream, then push it
		 */
		do {
			/*
			 * Assumes (correctly) that I_PUSH  is
			 * atomic w.r.t signals (EINTR error)
			 */
			retval = ioctl(fd, I_PUSH, "timod");
		} while (retval < 0 && errno == EINTR);

		if (retval < 0) {
			t_errno = TSYSERR;
			return (NULL);
		}
		timodpushed = 1;
	}
	/*
	 * Try to (re)constitute the info at user level from state
	 * in the kernel. This could be information that lost due
	 * to an exec or being instantiated at a new descriptor due
	 * to , open(), dup2() etc.
	 *
	 * _t_create() requires that all signals be blocked.
	 * Note that sig_mutex_lock() only defers signals, it does not
	 * block them, so interruptible syscalls could still get EINTR.
	 */
	(void) thr_sigsetmask(SIG_SETMASK, &fillset, &mask);
	sig_mutex_lock(&_ti_userlock);
	tiptr = _t_create(fd, NULL, api_semantics, NULL);
	if (tiptr == NULL) {
		int sv_errno = errno;
		sig_mutex_unlock(&_ti_userlock);
		(void) thr_sigsetmask(SIG_SETMASK, &mask, NULL);
		/*
		 * restore to stream before timod pushed. It may
		 * not have been a network transport stream.
		 */
		if (timodpushed)
			(void) ioctl(fd, I_POP, 0);
		errno = sv_errno;
		return (NULL);
	}
	sig_mutex_unlock(&_ti_userlock);
	(void) thr_sigsetmask(SIG_SETMASK, &mask, NULL);
	return (tiptr);
}

/*
 * copy data to output buffer making sure the output buffer is 32 bit
 * aligned, even though the input buffer may not be.
 */
int
_t_aligned_copy(
	struct strbuf *strbufp,
	int len,
	int init_offset,
	char *datap,
	t_scalar_t *rtn_offset)
{
	*rtn_offset = ROUNDUP32(init_offset);
	if ((*rtn_offset + len) > strbufp->maxlen) {
		/*
		 * Aligned copy will overflow buffer
		 */
		return (-1);
	}
	(void) memcpy(strbufp->buf + *rtn_offset, datap, (size_t)len);

	return (0);
}


/*
 * append data and control info in look buffer (list in the MT case)
 *
 * The only thing that can be in look buffer is a T_DISCON_IND,
 * T_ORDREL_IND or a T_UDERROR_IND.
 *
 * It also enforces priority of T_DISCONDs over any T_ORDREL_IND
 * already in the buffer. It assumes no T_ORDREL_IND is appended
 * when there is already something on the looklist (error case) and
 * that a T_ORDREL_IND if present will always be the first on the
 * list.
 *
 * This also assumes ti_lock is held via sig_mutex_lock(),
 * so signals are deferred here.
 */
int
_t_register_lookevent(
	struct _ti_user *tiptr,
	caddr_t dptr,
	int dsize,
	caddr_t cptr,
	int csize)
{
	struct _ti_lookbufs *tlbs;
	int cbuf_size, dbuf_size;

	assert(MUTEX_HELD(&tiptr->ti_lock));

	cbuf_size = tiptr->ti_ctlsize;
	dbuf_size = tiptr->ti_rcvsize;

	if ((csize > cbuf_size) || dsize > dbuf_size) {
		/* can't fit - return error */
		return (-1);	/* error */
	}
	/*
	 * Enforce priority of T_DISCON_IND over T_ORDREL_IND
	 * queued earlier.
	 * Note: Since there can be only at most one T_ORDREL_IND
	 * queued (more than one is error case), and we look for it
	 * on each append of T_DISCON_IND, it can only be at the
	 * head of the list if it is there.
	 */
	if (tiptr->ti_lookcnt > 0) { /* something already on looklist */
		if (cptr && csize >= (int)sizeof (struct T_discon_ind) &&
		    /* LINTED pointer cast */
		    *(t_scalar_t *)cptr == T_DISCON_IND) {
			/* appending discon ind */
			assert(tiptr->ti_servtype != T_CLTS);
			/* LINTED pointer cast */
			if (*(t_scalar_t *)tiptr->ti_lookbufs.tl_lookcbuf ==
			    T_ORDREL_IND) { /* T_ORDREL_IND is on list */
				/*
				 * Blow away T_ORDREL_IND
				 */
				_t_free_looklist_head(tiptr);
			}
		}
	}
	tlbs = &tiptr->ti_lookbufs;
	if (tiptr->ti_lookcnt > 0) {
		int listcount = 0;
		/*
		 * Allocate and append a new lookbuf to the
		 * existing list. (Should only happen in MT case)
		 */
		while (tlbs->tl_next != NULL) {
			listcount++;
			tlbs = tlbs->tl_next;
		}
		assert(tiptr->ti_lookcnt == listcount);

		/*
		 * signals are deferred, calls to malloc() are safe.
		 */
		if ((tlbs->tl_next = malloc(sizeof (struct _ti_lookbufs))) ==
		    NULL)
			return (-1); /* error */
		tlbs = tlbs->tl_next;
		/*
		 * Allocate the buffers. The sizes derived from the
		 * sizes of other related buffers. See _t_alloc_bufs()
		 * for details.
		 */
		if ((tlbs->tl_lookcbuf = malloc(cbuf_size)) == NULL) {
			/* giving up - free other memory chunks */
			free(tlbs);
			return (-1); /* error */
		}
		if ((dsize > 0) &&
		    ((tlbs->tl_lookdbuf = malloc(dbuf_size)) == NULL)) {
			/* giving up - free other memory chunks */
			free(tlbs->tl_lookcbuf);
			free(tlbs);
			return (-1); /* error */
		}
	}

	(void) memcpy(tlbs->tl_lookcbuf, cptr, csize);
	if (dsize > 0)
		(void) memcpy(tlbs->tl_lookdbuf, dptr, dsize);
	tlbs->tl_lookdlen = dsize;
	tlbs->tl_lookclen = csize;
	tlbs->tl_next = NULL;
	tiptr->ti_lookcnt++;
	return (0);		/* ok return */
}

/*
 * Is there something that needs attention?
 * Assumes tiptr->ti_lock held and this threads signals blocked
 * in MT case.
 */
int
_t_is_event(int fd, struct _ti_user *tiptr)
{
	int size, retval;

	assert(MUTEX_HELD(&tiptr->ti_lock));
	if ((retval = ioctl(fd, I_NREAD, &size)) < 0) {
		t_errno = TSYSERR;
		return (-1);
	}

	if ((retval > 0) || (tiptr->ti_lookcnt > 0)) {
		t_errno = TLOOK;
		return (-1);
	}
	return (0);
}

/*
 * wait for T_OK_ACK
 * assumes tiptr->ti_lock held in MT case
 */
int
_t_is_ok(int fd, struct _ti_user *tiptr, t_scalar_t type)
{
	struct strbuf ctlbuf;
	struct strbuf databuf;
	union T_primitives *pptr;
	int retval, cntlflag;
	int size;
	int didalloc, didralloc;
	int flags = 0;

	assert(MUTEX_HELD(&tiptr->ti_lock));
	/*
	 * Acquire ctlbuf for use in sending/receiving control part
	 * of the message.
	 */
	if (_t_acquire_ctlbuf(tiptr, &ctlbuf, &didalloc) < 0)
		return (-1);
	/*
	 * Acquire databuf for use in sending/receiving data part
	 */
	if (_t_acquire_databuf(tiptr, &databuf, &didralloc) < 0) {
		if (didalloc)
			free(ctlbuf.buf);
		else
			tiptr->ti_ctlbuf = ctlbuf.buf;
		return (-1);
	}

	/*
	 * Temporarily convert a non blocking endpoint to a
	 * blocking one and restore status later
	 */
	cntlflag = fcntl(fd, F_GETFL, 0);
	if (cntlflag & (O_NDELAY | O_NONBLOCK))
		(void) fcntl(fd, F_SETFL, cntlflag & ~(O_NDELAY | O_NONBLOCK));

	flags = RS_HIPRI;

	while ((retval = getmsg(fd, &ctlbuf, &databuf, &flags)) < 0) {
		if (errno == EINTR)
			continue;
		if (cntlflag & (O_NDELAY | O_NONBLOCK))
			(void) fcntl(fd, F_SETFL, cntlflag);
		t_errno = TSYSERR;
		goto err_out;
	}

	/* did I get entire message */
	if (retval > 0) {
		if (cntlflag & (O_NDELAY | O_NONBLOCK))
			(void) fcntl(fd, F_SETFL, cntlflag);
		t_errno = TSYSERR;
		errno = EIO;
		goto err_out;
	}

	/*
	 * is ctl part large enough to determine type?
	 */
	if (ctlbuf.len < (int)sizeof (t_scalar_t)) {
		if (cntlflag & (O_NDELAY | O_NONBLOCK))
			(void) fcntl(fd, F_SETFL, cntlflag);
		t_errno = TSYSERR;
		errno = EPROTO;
		goto err_out;
	}

	if (cntlflag & (O_NDELAY | O_NONBLOCK))
		(void) fcntl(fd, F_SETFL, cntlflag);

	/* LINTED pointer cast */
	pptr = (union T_primitives *)ctlbuf.buf;

	switch (pptr->type) {
	case T_OK_ACK:
		if ((ctlbuf.len < (int)sizeof (struct T_ok_ack)) ||
		    (pptr->ok_ack.CORRECT_prim != type)) {
			t_errno = TSYSERR;
			errno = EPROTO;
			goto err_out;
		}
		if (didalloc)
			free(ctlbuf.buf);
		else
			tiptr->ti_ctlbuf = ctlbuf.buf;
		if (didralloc)
			free(databuf.buf);
		else
			tiptr->ti_rcvbuf = databuf.buf;
		return (0);

	case T_ERROR_ACK:
		if ((ctlbuf.len < (int)sizeof (struct T_error_ack)) ||
		    (pptr->error_ack.ERROR_prim != type)) {
			t_errno = TSYSERR;
			errno = EPROTO;
			goto err_out;
		}
		/*
		 * if error is out of state and there is something
		 * on read queue, then indicate to user that
		 * there is something that needs attention
		 */
		if (pptr->error_ack.TLI_error == TOUTSTATE) {
			if ((retval = ioctl(fd, I_NREAD, &size)) < 0) {
				t_errno = TSYSERR;
				goto err_out;
			}
			if (retval > 0)
				t_errno = TLOOK;
			else
				t_errno = TOUTSTATE;
		} else {
			t_errno = pptr->error_ack.TLI_error;
			if (t_errno == TSYSERR)
				errno = pptr->error_ack.UNIX_error;
		}
		goto err_out;
	default:
		t_errno = TSYSERR;
		errno = EPROTO;
		/* fallthru to err_out: */
	}
err_out:
	if (didalloc)
		free(ctlbuf.buf);
	else
		tiptr->ti_ctlbuf = ctlbuf.buf;
	if (didralloc)
		free(databuf.buf);
	else
		tiptr->ti_rcvbuf = databuf.buf;
	return (-1);
}

/*
 * timod ioctl
 */
int
_t_do_ioctl(int fd, char *buf, int size, int cmd, int *retlenp)
{
	int retval;
	struct strioctl strioc;

	strioc.ic_cmd = cmd;
	strioc.ic_timout = -1;
	strioc.ic_len = size;
	strioc.ic_dp = buf;

	if ((retval = ioctl(fd, I_STR, &strioc)) < 0) {
		t_errno = TSYSERR;
		return (-1);
	}

	if (retval > 0) {
		t_errno = retval & 0xff;
		if (t_errno == TSYSERR)
			errno = (retval >> 8) & 0xff;
		return (-1);
	}
	if (retlenp)
		*retlenp = strioc.ic_len;
	return (0);
}

/*
 * alloc scratch buffers and look buffers
 */
/* ARGSUSED */
static int
_t_alloc_bufs(int fd, struct _ti_user *tiptr, struct T_info_ack *tsap)
{
	unsigned int size1, size2;
	t_scalar_t optsize;
	unsigned int csize, dsize, asize, osize;
	char *ctlbuf, *rcvbuf;
	char *lookdbuf, *lookcbuf;

	csize = _t_setsize(tsap->CDATA_size, B_FALSE);
	dsize = _t_setsize(tsap->DDATA_size, B_FALSE);

	size1 = _T_MAX(csize, dsize);

	if (size1 != 0) {
		if ((rcvbuf = malloc(size1)) == NULL)
			return (-1);
		if ((lookdbuf = malloc(size1)) == NULL) {
			free(rcvbuf);
			return (-1);
		}
	} else {
		rcvbuf = NULL;
		lookdbuf = NULL;
	}

	asize = _t_setsize(tsap->ADDR_size, B_FALSE);
	if (tsap->OPT_size >= 0)
		/* compensate for XTI level options */
		optsize = tsap->OPT_size + TX_XTI_LEVEL_MAX_OPTBUF;
	else
		optsize = tsap->OPT_size;
	osize = _t_setsize(optsize, B_TRUE);

	/*
	 * We compute the largest buffer size needed for this provider by
	 * adding the components. [ An extra sizeof (t_scalar_t) is added to
	 * take care of rounding off for alignment) for each buffer ]
	 * The goal here is compute the size of largest possible buffer that
	 * might be needed to hold a TPI message for the transport provider
	 * on this endpoint.
	 * Note: T_ADDR_ACK contains potentially two address buffers.
	 */

	size2 = (unsigned int)sizeof (union T_primitives) /* TPI struct */
	    + asize + (unsigned int)sizeof (t_scalar_t) +
		/* first addr buffer plus alignment */
	    asize + (unsigned int)sizeof (t_scalar_t) +
		/* second addr buffer plus ailignment */
	    osize + (unsigned int)sizeof (t_scalar_t);
		/* option buffer plus alignment */

	if ((ctlbuf = malloc(size2)) == NULL) {
		if (size1 != 0) {
			free(rcvbuf);
			free(lookdbuf);
		}
		return (-1);
	}

	if ((lookcbuf = malloc(size2)) == NULL) {
		if (size1 != 0) {
			free(rcvbuf);
			free(lookdbuf);
		}
		free(ctlbuf);
		return (-1);
	}

	tiptr->ti_rcvsize = size1;
	tiptr->ti_rcvbuf = rcvbuf;
	tiptr->ti_ctlsize = size2;
	tiptr->ti_ctlbuf = ctlbuf;

	/*
	 * Note: The head of the lookbuffers list (and associated buffers)
	 * is allocated here on initialization.
	 * More allocated on demand.
	 */
	tiptr->ti_lookbufs.tl_lookclen = 0;
	tiptr->ti_lookbufs.tl_lookcbuf = lookcbuf;
	tiptr->ti_lookbufs.tl_lookdlen = 0;
	tiptr->ti_lookbufs.tl_lookdbuf = lookdbuf;

	return (0);
}


/*
 * set sizes of buffers
 */
static unsigned int
_t_setsize(t_scalar_t infosize, boolean_t option)
{
	static size_t optinfsize;

	switch (infosize) {
	case T_INFINITE /* -1 */:
		if (option) {
			if (optinfsize == 0) {
				size_t uc = ucred_size();
				if (uc < DEFSIZE/2)
					optinfsize = DEFSIZE;
				else
					optinfsize = ucred_size() + DEFSIZE/2;
			}
			return ((unsigned int)optinfsize);
		}
		return (DEFSIZE);
	case T_INVALID /* -2 */:
		return (0);
	default:
		return ((unsigned int) infosize);
	}
}

static void
_t_reinit_tiptr(struct _ti_user *tiptr)
{
	/*
	 * Note: This routine is designed for a "reinitialization"
	 * Following fields are not modified here and preserved.
	 *	 - ti_fd field
	 *	 - ti_lock
	 *	 - ti_next
	 *	 - ti_prev
	 * The above fields have to be separately initialized if this
	 * is used for a fresh initialization.
	 */

	tiptr->ti_flags = 0;
	tiptr->ti_rcvsize = 0;
	tiptr->ti_rcvbuf = NULL;
	tiptr->ti_ctlsize = 0;
	tiptr->ti_ctlbuf = NULL;
	tiptr->ti_lookbufs.tl_lookdbuf = NULL;
	tiptr->ti_lookbufs.tl_lookcbuf = NULL;
	tiptr->ti_lookbufs.tl_lookdlen = 0;
	tiptr->ti_lookbufs.tl_lookclen = 0;
	tiptr->ti_lookbufs.tl_next = NULL;
	tiptr->ti_maxpsz = 0;
	tiptr->ti_tsdusize = 0;
	tiptr->ti_etsdusize = 0;
	tiptr->ti_cdatasize = 0;
	tiptr->ti_ddatasize = 0;
	tiptr->ti_servtype = 0;
	tiptr->ti_lookcnt = 0;
	tiptr->ti_state = 0;
	tiptr->ti_ocnt = 0;
	tiptr->ti_prov_flag = 0;
	tiptr->ti_qlen = 0;
}

/*
 * Link manipulation routines.
 *
 * NBUCKETS hash buckets are used to give fast
 * access. The number is derived the file descriptor softlimit
 * number (64).
 */

#define	NBUCKETS	64
static struct _ti_user		*hash_bucket[NBUCKETS];

/*
 * Allocates a new link and returns a pointer to it.
 * Assumes that the caller is holding _ti_userlock via sig_mutex_lock(),
 * so signals are deferred here.
 */
static struct _ti_user *
add_tilink(int s)
{
	struct _ti_user	*tiptr;
	struct _ti_user	*prevptr;
	struct _ti_user	*curptr;
	int	x;
	struct stat stbuf;

	assert(MUTEX_HELD(&_ti_userlock));

	if (s < 0 || fstat(s, &stbuf) != 0)
		return (NULL);

	x = s % NBUCKETS;
	if (hash_bucket[x] != NULL) {
		/*
		 * Walk along the bucket looking for
		 * duplicate entry or the end.
		 */
		for (curptr = hash_bucket[x]; curptr != NULL;
		    curptr = curptr->ti_next) {
			if (curptr->ti_fd == s) {
				/*
				 * This can happen when the user has close(2)'ed
				 * a descriptor and then been allocated it again
				 * via t_open().
				 *
				 * We will re-use the existing _ti_user struct
				 * in this case rather than using the one
				 * we allocated above.  If there are buffers
				 * associated with the existing _ti_user
				 * struct, they may not be the correct size,
				 * so we can not use it.  We free them
				 * here and re-allocate a new ones
				 * later on.
				 */
				if (curptr->ti_rcvbuf != NULL)
					free(curptr->ti_rcvbuf);
				free(curptr->ti_ctlbuf);
				_t_free_lookbufs(curptr);
				_t_reinit_tiptr(curptr);
				curptr->ti_rdev = stbuf.st_rdev;
				curptr->ti_ino = stbuf.st_ino;
				return (curptr);
			}
			prevptr = curptr;
		}
		/*
		 * Allocate and link in a new one.
		 */
		if ((tiptr = malloc(sizeof (*tiptr))) == NULL)
			return (NULL);
		/*
		 * First initialize fields common with reinitialization and
		 * then other fields too
		 */
		_t_reinit_tiptr(tiptr);
		prevptr->ti_next = tiptr;
		tiptr->ti_prev = prevptr;
	} else {
		/*
		 * First entry.
		 */
		if ((tiptr = malloc(sizeof (*tiptr))) == NULL)
			return (NULL);
		_t_reinit_tiptr(tiptr);
		hash_bucket[x] = tiptr;
		tiptr->ti_prev = NULL;
	}
	tiptr->ti_next = NULL;
	tiptr->ti_fd = s;
	tiptr->ti_rdev = stbuf.st_rdev;
	tiptr->ti_ino = stbuf.st_ino;
	(void) mutex_init(&tiptr->ti_lock, USYNC_THREAD, NULL);
	return (tiptr);
}

/*
 * Find a link by descriptor
 * Assumes that the caller is holding _ti_userlock.
 */
static struct _ti_user *
find_tilink(int s)
{
	struct _ti_user	*curptr;
	int	x;
	struct stat stbuf;

	assert(MUTEX_HELD(&_ti_userlock));

	if (s < 0 || fstat(s, &stbuf) != 0)
		return (NULL);

	x = s % NBUCKETS;
	/*
	 * Walk along the bucket looking for the descriptor.
	 */
	for (curptr = hash_bucket[x]; curptr; curptr = curptr->ti_next) {
		if (curptr->ti_fd == s) {
			if (curptr->ti_rdev == stbuf.st_rdev &&
			    curptr->ti_ino == stbuf.st_ino)
				return (curptr);
			(void) _t_delete_tilink(s);
		}
	}
	return (NULL);
}

/*
 * Assumes that the caller is holding _ti_userlock.
 * Also assumes that all signals are blocked.
 */
int
_t_delete_tilink(int s)
{
	struct _ti_user	*curptr;
	int	x;

	/*
	 * Find the link.
	 */
	assert(MUTEX_HELD(&_ti_userlock));
	if (s < 0)
		return (-1);
	x = s % NBUCKETS;
	/*
	 * Walk along the bucket looking for
	 * the descriptor.
	 */
	for (curptr = hash_bucket[x]; curptr; curptr = curptr->ti_next) {
		if (curptr->ti_fd == s) {
			struct _ti_user	*nextptr;
			struct _ti_user	*prevptr;

			nextptr = curptr->ti_next;
			prevptr = curptr->ti_prev;
			if (prevptr)
				prevptr->ti_next = nextptr;
			else
				hash_bucket[x] = nextptr;
			if (nextptr)
				nextptr->ti_prev = prevptr;

			/*
			 * free resource associated with the curptr
			 */
			if (curptr->ti_rcvbuf != NULL)
				free(curptr->ti_rcvbuf);
			free(curptr->ti_ctlbuf);
			_t_free_lookbufs(curptr);
			(void) mutex_destroy(&curptr->ti_lock);
			free(curptr);
			return (0);
		}
	}
	return (-1);
}

/*
 * Allocate a TLI state structure and synch it with the kernel
 * *tiptr is returned
 * Assumes that the caller is holding the _ti_userlock and has blocked signals.
 *
 * This function may fail the first time it is called with given transport if it
 * doesn't support T_CAPABILITY_REQ TPI message.
 */
struct _ti_user *
_t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed)
{
	/*
	 * Aligned data buffer for ioctl.
	 */
	union {
		struct ti_sync_req ti_req;
		struct ti_sync_ack ti_ack;
		union T_primitives t_prim;
		char pad[128];
	} ioctl_data;
	void *ioctlbuf = &ioctl_data; /* TI_SYNC/GETINFO with room to grow */
			    /* preferred location first local variable */
			    /*  see note below */
	/*
	 * Note: We use "ioctlbuf" allocated on stack above with
	 * room to grow since (struct ti_sync_ack) can grow in size
	 * on future kernels. (We do not use malloc'd "ti_ctlbuf" as that
	 * part of instance structure which may not exist yet)
	 * Its preferred declaration location is first local variable in this
	 * procedure as bugs causing overruns will be detectable on
	 * platforms where procedure calling conventions place return
	 * address on stack (such as x86) instead of causing silent
	 * memory corruption.
	 */
	struct ti_sync_req *tsrp = (struct ti_sync_req *)ioctlbuf;
	struct ti_sync_ack *tsap = (struct ti_sync_ack *)ioctlbuf;
	struct T_capability_req *tcrp = (struct T_capability_req *)ioctlbuf;
	struct T_capability_ack *tcap = (struct T_capability_ack *)ioctlbuf;
	struct T_info_ack *tiap = &tcap->INFO_ack;
	struct _ti_user	*ntiptr;
	int expected_acksize;
	int retlen, rstate, sv_errno, rval;

	assert(MUTEX_HELD(&_ti_userlock));

	/*
	 * Use ioctl required for sync'ing state with kernel.
	 * We use two ioctls. TI_CAPABILITY is used to get TPI information and
	 * TI_SYNC is used to synchronise state with timod. Statically linked
	 * TLI applications will no longer work on older releases where there
	 * are no TI_SYNC and TI_CAPABILITY.
	 */

	/*
	 * Request info about transport.
	 * Assumes that TC1_INFO should always be implemented.
	 * For TI_CAPABILITY size argument to ioctl specifies maximum buffer
	 * size.
	 */
	tcrp->PRIM_type = T_CAPABILITY_REQ;
	tcrp->CAP_bits1 = TC1_INFO | TC1_ACCEPTOR_ID;
	rval = _t_do_ioctl(fd, (char *)ioctlbuf,
	    (int)sizeof (struct T_capability_ack), TI_CAPABILITY, &retlen);
	expected_acksize = (int)sizeof (struct T_capability_ack);

	if (rval < 0) {
		/*
		 * TI_CAPABILITY may fail when transport provider doesn't
		 * support T_CAPABILITY_REQ message type. In this case file
		 * descriptor may be unusable (when transport provider sent
		 * M_ERROR in response to T_CAPABILITY_REQ). This should only
		 * happen once during system lifetime for given transport
		 * provider since timod will emulate TI_CAPABILITY after it
		 * detected the failure.
		 */
		if (t_capreq_failed != NULL)
			*t_capreq_failed = 1;
		return (NULL);
	}

	if (retlen != expected_acksize) {
		t_errno = TSYSERR;
		errno = EIO;
		return (NULL);
	}

	if ((tcap->CAP_bits1 & TC1_INFO) == 0) {
		t_errno = TSYSERR;
		errno = EPROTO;
		return (NULL);
	}
	if (info != NULL) {
		if (tiap->PRIM_type != T_INFO_ACK) {
			t_errno = TSYSERR;
			errno = EPROTO;
			return (NULL);
		}
		info->addr = tiap->ADDR_size;
		info->options = tiap->OPT_size;
		info->tsdu = tiap->TSDU_size;
		info->etsdu = tiap->ETSDU_size;
		info->connect = tiap->CDATA_size;
		info->discon = tiap->DDATA_size;
		info->servtype = tiap->SERV_type;
		if (_T_IS_XTI(api_semantics)) {
			/*
			 * XTI ONLY - TLI "struct t_info" does not
			 * have "flags"
			 */
			info->flags = 0;
			if (tiap->PROVIDER_flag & (SENDZERO|OLD_SENDZERO))
				info->flags |= T_SENDZERO;
			/*
			 * Some day there MAY be a NEW bit in T_info_ack
			 * PROVIDER_flag namespace exposed by TPI header
			 * <sys/tihdr.h> which will functionally correspond to
			 * role played by T_ORDRELDATA in info->flags namespace
			 * When that bit exists, we can add a test to see if
			 * it is set and set T_ORDRELDATA.
			 * Note: Currently only mOSI ("minimal OSI") provider
			 * is specified to use T_ORDRELDATA so probability of
			 * needing it is minimal.
			 */
		}
	}

	/*
	 * if first time or no instance (after fork/exec, dup etc,
	 * then create initialize data structure
	 * and allocate buffers
	 */
	ntiptr = add_tilink(fd);
	if (ntiptr == NULL) {
		t_errno = TSYSERR;
		errno = ENOMEM;
		return (NULL);
	}

	/*
	 * Allocate buffers for the new descriptor
	 */
	if (_t_alloc_bufs(fd, ntiptr, tiap) < 0) {
		sv_errno = errno;
		(void) _t_delete_tilink(fd);
		t_errno = TSYSERR;
		errno = sv_errno;
		return (NULL);
	}

	/* Fill instance structure */

	ntiptr->ti_lookcnt = 0;
	ntiptr->ti_flags = USED;
	ntiptr->ti_state = T_UNINIT;
	ntiptr->ti_ocnt = 0;

	assert(tiap->TIDU_size > 0);
	ntiptr->ti_maxpsz = tiap->TIDU_size;
	assert(tiap->TSDU_size >= -2);
	ntiptr->ti_tsdusize = tiap->TSDU_size;
	assert(tiap->ETSDU_size >= -2);
	ntiptr->ti_etsdusize = tiap->ETSDU_size;
	assert(tiap->CDATA_size >= -2);
	ntiptr->ti_cdatasize = tiap->CDATA_size;
	assert(tiap->DDATA_size >= -2);
	ntiptr->ti_ddatasize = tiap->DDATA_size;
	ntiptr->ti_servtype = tiap->SERV_type;
	ntiptr->ti_prov_flag = tiap->PROVIDER_flag;

	if ((tcap->CAP_bits1 & TC1_ACCEPTOR_ID) != 0) {
		ntiptr->acceptor_id = tcap->ACCEPTOR_id;
		ntiptr->ti_flags |= V_ACCEPTOR_ID;
	}
	else
		ntiptr->ti_flags &= ~V_ACCEPTOR_ID;

	/*
	 * Restore state from kernel (caveat some heuristics)
	 */
	switch (tiap->CURRENT_state) {

	case TS_UNBND:
		ntiptr->ti_state = T_UNBND;
		break;

	case TS_IDLE:
		if ((rstate = _t_adjust_state(fd, T_IDLE)) < 0) {
			sv_errno = errno;
			(void) _t_delete_tilink(fd);
			errno = sv_errno;
			return (NULL);
		}
		ntiptr->ti_state = rstate;
		break;

	case TS_WRES_CIND:
		ntiptr->ti_state = T_INCON;
		break;

	case TS_WCON_CREQ:
		ntiptr->ti_state = T_OUTCON;
		break;

	case TS_DATA_XFER:
		if ((rstate = _t_adjust_state(fd, T_DATAXFER)) < 0)  {
			sv_errno = errno;
			(void) _t_delete_tilink(fd);
			errno = sv_errno;
			return (NULL);
		}
		ntiptr->ti_state = rstate;
		break;

	case TS_WIND_ORDREL:
		ntiptr->ti_state = T_OUTREL;
		break;

	case TS_WREQ_ORDREL:
		if ((rstate = _t_adjust_state(fd, T_INREL)) < 0)  {
			sv_errno = errno;
			(void) _t_delete_tilink(fd);
			errno = sv_errno;
			return (NULL);
		}
		ntiptr->ti_state = rstate;
		break;
	default:
		t_errno = TSTATECHNG;
		(void) _t_delete_tilink(fd);
		return (NULL);
	}

	/*
	 * Sync information with timod.
	 */
	tsrp->tsr_flags = TSRF_QLEN_REQ;

	rval = _t_do_ioctl(fd, ioctlbuf,
	    (int)sizeof (struct ti_sync_req), TI_SYNC, &retlen);
	expected_acksize = (int)sizeof (struct ti_sync_ack);

	if (rval < 0) {
		sv_errno = errno;
		(void) _t_delete_tilink(fd);
		t_errno = TSYSERR;
		errno = sv_errno;
		return (NULL);
	}

	/*
	 * This is a "less than" check as "struct ti_sync_ack" returned by
	 * TI_SYNC can grow in size in future kernels. If/when a statically
	 * linked application is run on a future kernel, it should not fail.
	 */
	if (retlen < expected_acksize) {
		sv_errno = errno;
		(void) _t_delete_tilink(fd);
		t_errno = TSYSERR;
		errno = sv_errno;
		return (NULL);
	}

	if (_T_IS_TLI(api_semantics))
		tsap->tsa_qlen = 0; /* not needed for TLI */

	ntiptr->ti_qlen = tsap->tsa_qlen;

	return (ntiptr);
}


static int
_t_adjust_state(int fd, int instate)
{
	char ctlbuf[sizeof (t_scalar_t)];
	char databuf[sizeof (int)]; /* size unimportant - anything > 0 */
	struct strpeek arg;
	int outstate, retval;

	/*
	 * Peek at message on stream head (if any)
	 * and see if it is data
	 */
	arg.ctlbuf.buf = ctlbuf;
	arg.ctlbuf.maxlen = (int)sizeof (ctlbuf);
	arg.ctlbuf.len = 0;

	arg.databuf.buf = databuf;
	arg.databuf.maxlen = (int)sizeof (databuf);
	arg.databuf.len = 0;

	arg.flags = 0;

	if ((retval = ioctl(fd, I_PEEK, &arg)) < 0)  {
		t_errno = TSYSERR;
		return (-1);
	}
	outstate = instate;
	/*
	 * If peek shows something at stream head, then
	 * Adjust "outstate" based on some heuristics.
	 */
	if (retval > 0) {
		switch (instate) {
		case T_IDLE:
			/*
			 * The following heuristic is to handle data
			 * ahead of T_DISCON_IND indications that might
			 * be at the stream head waiting to be
			 * read (T_DATA_IND or M_DATA)
			 */
			if (((arg.ctlbuf.len == 4) &&
			    /* LINTED pointer cast */
			    ((*(int32_t *)arg.ctlbuf.buf) == T_DATA_IND)) ||
			    ((arg.ctlbuf.len == 0) && arg.databuf.len)) {
				outstate = T_DATAXFER;
			}
			break;
		case T_DATAXFER:
			/*
			 * The following heuristic is to handle
			 * the case where the connection is established
			 * and in data transfer state at the provider
			 * but the T_CONN_CON has not yet been read
			 * from the stream head.
			 */
			if ((arg.ctlbuf.len == 4) &&
			    /* LINTED pointer cast */
			    ((*(int32_t *)arg.ctlbuf.buf) == T_CONN_CON))
				outstate = T_OUTCON;
			break;
		case T_INREL:
			/*
			 * The following heuristic is to handle data
			 * ahead of T_ORDREL_IND indications that might
			 * be at the stream head waiting to be
			 * read (T_DATA_IND or M_DATA)
			 */
			if (((arg.ctlbuf.len == 4) &&
			    /* LINTED pointer cast */
			    ((*(int32_t *)arg.ctlbuf.buf) == T_DATA_IND)) ||
			    ((arg.ctlbuf.len == 0) && arg.databuf.len)) {
				outstate = T_DATAXFER;
			}
			break;
		default:
			break;
		}
	}
	return (outstate);
}

/*
 * Assumes caller has blocked signals at least in this thread (for safe
 * malloc/free operations)
 */
static int
_t_cbuf_alloc(struct _ti_user *tiptr, char **retbuf)
{
	unsigned	size2;

	assert(MUTEX_HELD(&tiptr->ti_lock));
	size2 = tiptr->ti_ctlsize; /* same size as default ctlbuf */

	if ((*retbuf = malloc(size2)) == NULL) {
		return (-1);
	}
	return (size2);
}


/*
 * Assumes caller has blocked signals at least in this thread (for safe
 * malloc/free operations)
 */
int
_t_rbuf_alloc(struct _ti_user *tiptr, char **retbuf)
{
	unsigned	size1;

	assert(MUTEX_HELD(&tiptr->ti_lock));
	size1 = tiptr->ti_rcvsize; /* same size as default rcvbuf */

	if ((*retbuf = malloc(size1)) == NULL) {
		return (-1);
	}
	return (size1);
}

/*
 * Free lookbuffer structures and associated resources
 * Assumes ti_lock held for MT case.
 */
static void
_t_free_lookbufs(struct _ti_user *tiptr)
{
	struct _ti_lookbufs *tlbs, *prev_tlbs, *head_tlbs;

	/*
	 * Assertion:
	 * The structure lock should be held or the global list
	 * manipulation lock. The assumption is that nothing
	 * else can access the descriptor since global list manipulation
	 * lock is held so it is OK to manipulate fields without the
	 * structure lock
	 */
	assert(MUTEX_HELD(&tiptr->ti_lock) || MUTEX_HELD(&_ti_userlock));

	/*
	 * Free only the buffers in the first lookbuf
	 */
	head_tlbs = &tiptr->ti_lookbufs;
	if (head_tlbs->tl_lookdbuf != NULL) {
		free(head_tlbs->tl_lookdbuf);
		head_tlbs->tl_lookdbuf = NULL;
	}
	free(head_tlbs->tl_lookcbuf);
	head_tlbs->tl_lookcbuf = NULL;
	/*
	 * Free the node and the buffers in the rest of the
	 * list
	 */

	tlbs = head_tlbs->tl_next;
	head_tlbs->tl_next = NULL;

	while (tlbs != NULL) {
		if (tlbs->tl_lookdbuf != NULL)
			free(tlbs->tl_lookdbuf);
		free(tlbs->tl_lookcbuf);
		prev_tlbs = tlbs;
		tlbs = tlbs->tl_next;
		free(prev_tlbs);
	}
}

/*
 * Free lookbuffer event list head.
 * Consume current lookbuffer event
 * Assumes ti_lock held for MT case.
 * Note: The head of this list is part of the instance
 * structure so the code is a little unorthodox.
 */
void
_t_free_looklist_head(struct _ti_user *tiptr)
{
	struct _ti_lookbufs *tlbs, *next_tlbs;

	tlbs = &tiptr->ti_lookbufs;

	if (tlbs->tl_next) {
		/*
		 * Free the control and data buffers
		 */
		if (tlbs->tl_lookdbuf != NULL)
			free(tlbs->tl_lookdbuf);
		free(tlbs->tl_lookcbuf);
		/*
		 * Replace with next lookbuf event contents
		 */
		next_tlbs = tlbs->tl_next;
		tlbs->tl_next = next_tlbs->tl_next;
		tlbs->tl_lookcbuf = next_tlbs->tl_lookcbuf;
		tlbs->tl_lookclen = next_tlbs->tl_lookclen;
		tlbs->tl_lookdbuf = next_tlbs->tl_lookdbuf;
		tlbs->tl_lookdlen = next_tlbs->tl_lookdlen;
		free(next_tlbs);
		/*
		 * Decrement the flag - should never get to zero.
		 * in this path
		 */
		tiptr->ti_lookcnt--;
		assert(tiptr->ti_lookcnt > 0);
	} else {
		/*
		 * No more look buffer events - just clear the flag
		 * and leave the buffers alone
		 */
		assert(tiptr->ti_lookcnt == 1);
		tiptr->ti_lookcnt = 0;
	}
}

/*
 * Discard lookbuffer events.
 * Assumes ti_lock held for MT case.
 */
void
_t_flush_lookevents(struct _ti_user *tiptr)
{
	struct _ti_lookbufs *tlbs, *prev_tlbs;

	/*
	 * Leave the first nodes buffers alone (i.e. allocated)
	 * but reset the flag.
	 */
	assert(MUTEX_HELD(&tiptr->ti_lock));
	tiptr->ti_lookcnt = 0;
	/*
	 * Blow away the rest of the list
	 */
	tlbs = tiptr->ti_lookbufs.tl_next;
	tiptr->ti_lookbufs.tl_next = NULL;
	while (tlbs != NULL) {
		if (tlbs->tl_lookdbuf != NULL)
			free(tlbs->tl_lookdbuf);
		free(tlbs->tl_lookcbuf);
		prev_tlbs = tlbs;
		tlbs = tlbs->tl_next;
		free(prev_tlbs);
	}
}


/*
 * This routine checks if the receive. buffer in the instance structure
 * is available (non-null). If it is, the buffer is acquired and marked busy
 * (null). If it is busy (possible in MT programs), it allocates a new
 * buffer and sets a flag indicating new memory was allocated and the caller
 * has to free it.
 */
int
_t_acquire_ctlbuf(
	struct _ti_user *tiptr,
	struct strbuf *ctlbufp,
	int *didallocp)
{
	*didallocp = 0;

	ctlbufp->len = 0;
	if (tiptr->ti_ctlbuf) {
		ctlbufp->buf = tiptr->ti_ctlbuf;
		tiptr->ti_ctlbuf = NULL;
		ctlbufp->maxlen = tiptr->ti_ctlsize;
	} else {
		/*
		 * tiptr->ti_ctlbuf is in use
		 * allocate new buffer and free after use.
		 */
		if ((ctlbufp->maxlen = _t_cbuf_alloc(tiptr,
		    &ctlbufp->buf)) < 0) {
			t_errno = TSYSERR;
			return (-1);
		}
		*didallocp = 1;
	}
	return (0);
}

/*
 * This routine checks if the receive buffer in the instance structure
 * is available (non-null). If it is, the buffer is acquired and marked busy
 * (null). If it is busy (possible in MT programs), it allocates a new
 * buffer and sets a flag indicating new memory was allocated and the caller
 * has to free it.
 * Note: The receive buffer pointer can also be null if the transport
 * provider does not support connect/disconnect data, (e.g. TCP) - not
 * just when it is "busy". In that case, ti_rcvsize will be 0 and that is
 * used to instantiate the databuf which points to a null buffer of
 * length 0 which is the right thing to do for that case.
 */
int
_t_acquire_databuf(
	struct _ti_user *tiptr,
	struct strbuf *databufp,
	int *didallocp)
{
	*didallocp = 0;

	databufp->len = 0;
	if (tiptr->ti_rcvbuf) {
		assert(tiptr->ti_rcvsize != 0);
		databufp->buf = tiptr->ti_rcvbuf;
		tiptr->ti_rcvbuf = NULL;
		databufp->maxlen = tiptr->ti_rcvsize;
	} else if (tiptr->ti_rcvsize == 0) {
		databufp->buf = NULL;
		databufp->maxlen = 0;
	} else {
		/*
		 * tiptr->ti_rcvbuf is in use
		 * allocate new buffer and free after use.
		 */
		if ((databufp->maxlen = _t_rbuf_alloc(tiptr,
		    &databufp->buf)) < 0) {
			t_errno = TSYSERR;
			return (-1);
		}
		*didallocp = 1;
	}
	return (0);
}

/*
 * This routine requests timod to look for any expedited data
 * queued in the "receive buffers" in the kernel. Used for XTI
 * t_look() semantics for transports that send expedited data
 * data inline (e.g TCP).
 * Returns -1 for failure
 * Returns 0 for success
 * 	On a successful return, the location pointed by "expedited_queuedp"
 * 	contains
 *		0 if no expedited data is found queued in "receive buffers"
 *		1 if expedited data is found queued in "receive buffers"
 */

int
_t_expinline_queued(int fd, int *expedited_queuedp)
{
	union {
		struct ti_sync_req ti_req;
		struct ti_sync_ack ti_ack;
		char pad[128];
	} ioctl_data;
	void *ioctlbuf = &ioctl_data; /* for TI_SYNC with room to grow */
			    /* preferred location first local variable */
			    /* see note in _t_create above */
	struct ti_sync_req *tsrp = (struct ti_sync_req *)ioctlbuf;
	struct ti_sync_ack *tsap = (struct ti_sync_ack *)ioctlbuf;
	int rval, retlen;

	*expedited_queuedp = 0;
	/* request info on rq expinds  */
	tsrp->tsr_flags = TSRF_IS_EXP_IN_RCVBUF;
	do {
		rval = _t_do_ioctl(fd, ioctlbuf,
		    (int)sizeof (struct T_info_req), TI_SYNC, &retlen);
	} while (rval < 0 && errno == EINTR);

	if (rval < 0)
		return (-1);

	/*
	 * This is a "less than" check as "struct ti_sync_ack" returned by
	 * TI_SYNC can grow in size in future kernels. If/when a statically
	 * linked application is run on a future kernel, it should not fail.
	 */
	if (retlen < (int)sizeof (struct ti_sync_ack)) {
		t_errno = TSYSERR;
		errno = EIO;
		return (-1);
	}
	if (tsap->tsa_flags & TSAF_EXP_QUEUED)
		*expedited_queuedp = 1;
	return (0);
}

/*
 * Support functions for use by functions that do scatter/gather
 * like t_sndv(), t_rcvv() etc..follow below.
 */

/*
 * _t_bytecount_upto_intmax() :
 *	    Sum of the lengths of the individual buffers in
 *	    the t_iovec array. If the sum exceeds INT_MAX
 *	    it is truncated to INT_MAX.
 */
unsigned int
_t_bytecount_upto_intmax(const struct t_iovec *tiov, unsigned int tiovcount)
{
	size_t nbytes;
	int i;

	nbytes = 0;
	for (i = 0; i < tiovcount && nbytes < INT_MAX; i++) {
		if (tiov[i].iov_len >= INT_MAX) {
			nbytes = INT_MAX;
			break;
		}
		nbytes += tiov[i].iov_len;
	}

	if (nbytes > INT_MAX)
		nbytes = INT_MAX;

	return ((unsigned int)nbytes);
}

/*
 * Gather the data in the t_iovec buffers, into a single linear buffer
 * starting at dataptr. Caller must have allocated sufficient space
 * starting at dataptr. The total amount of data that is gathered is
 * limited to INT_MAX. Any remaining data in the t_iovec buffers is
 * not copied.
 */
void
_t_gather(char *dataptr, const struct t_iovec *tiov, unsigned int tiovcount)
{
	char *curptr;
	unsigned int cur_count;
	unsigned int nbytes_remaining;
	int i;

	curptr = dataptr;
	cur_count = 0;

	nbytes_remaining = _t_bytecount_upto_intmax(tiov, tiovcount);
	for (i = 0; i < tiovcount && nbytes_remaining != 0; i++) {
		if (tiov[i].iov_len <= nbytes_remaining)
			cur_count = (int)tiov[i].iov_len;
		else
			cur_count = nbytes_remaining;
		(void) memcpy(curptr, tiov[i].iov_base, cur_count);
		curptr += cur_count;
		nbytes_remaining -= cur_count;
	}
}

/*
 * Scatter the data from the single linear buffer at pdatabuf->buf into
 * the t_iovec buffers.
 */
void
_t_scatter(struct strbuf *pdatabuf, struct t_iovec *tiov, int tiovcount)
{
	char *curptr;
	unsigned int nbytes_remaining;
	unsigned int curlen;
	int i;

	/*
	 * There cannot be any uncopied data leftover in pdatabuf
	 * at the conclusion of this function. (asserted below)
	 */
	assert(pdatabuf->len <= _t_bytecount_upto_intmax(tiov, tiovcount));
	curptr = pdatabuf->buf;
	nbytes_remaining = pdatabuf->len;
	for (i = 0; i < tiovcount && nbytes_remaining != 0; i++) {
		if (tiov[i].iov_len < nbytes_remaining)
			curlen = (unsigned int)tiov[i].iov_len;
		else
			curlen = nbytes_remaining;
		(void) memcpy(tiov[i].iov_base, curptr, curlen);
		curptr += curlen;
		nbytes_remaining -= curlen;
	}
}

/*
 * Adjust the iovec array, for subsequent use. Examine each element in the
 * iovec array,and zero out the iov_len if the buffer was sent fully.
 * otherwise the buffer was only partially sent, so adjust both iov_len and
 * iov_base.
 *
 */
void
_t_adjust_iov(int bytes_sent, struct iovec *iov, int *iovcountp)
{

	int i;

	for (i = 0; i < *iovcountp && bytes_sent; i++) {
		if (iov[i].iov_len == 0)
			continue;
		if (bytes_sent < iov[i].iov_len)
			break;
		else {
			bytes_sent -= iov[i].iov_len;
			iov[i].iov_len = 0;
		}
	}
	iov[i].iov_len -= bytes_sent;
	iov[i].iov_base += bytes_sent;
}

/*
 * Copy the t_iovec array to the iovec array while taking care to see
 * that the sum of the buffer lengths in the result is not more than
 * INT_MAX. This function requires that T_IOV_MAX is no larger than
 * IOV_MAX. Otherwise the resulting array is not a suitable input to
 * writev(). If the sum of the lengths in t_iovec is zero, so is the
 * resulting iovec.
 */
void
_t_copy_tiov_to_iov(const struct t_iovec *tiov, int tiovcount,
    struct iovec *iov, int *iovcountp)
{
	int i;
	unsigned int nbytes_remaining;

	nbytes_remaining = _t_bytecount_upto_intmax(tiov, tiovcount);
	i = 0;
	do {
		iov[i].iov_base = tiov[i].iov_base;
		if (tiov[i].iov_len > nbytes_remaining)
			iov[i].iov_len = nbytes_remaining;
		else
			iov[i].iov_len  = tiov[i].iov_len;
		nbytes_remaining -= iov[i].iov_len;
		i++;
	} while (nbytes_remaining != 0 && i < tiovcount);

	*iovcountp = i;
}

/*
 * Routine called after connection establishment on transports where
 * connection establishment changes certain transport attributes such as
 * TIDU_size
 */
int
_t_do_postconn_sync(int fd, struct _ti_user *tiptr)
{
	union {
		struct T_capability_req tc_req;
		struct T_capability_ack tc_ack;
	} ioctl_data;

	void *ioctlbuf = &ioctl_data;
	int expected_acksize;
	int retlen, rval;
	struct T_capability_req *tc_reqp = (struct T_capability_req *)ioctlbuf;
	struct T_capability_ack *tc_ackp = (struct T_capability_ack *)ioctlbuf;
	struct T_info_ack *tiap;

	/*
	 * This T_CAPABILITY_REQ should not fail, even if it is unsupported
	 * by the transport provider. timod will emulate it in that case.
	 */
	tc_reqp->PRIM_type = T_CAPABILITY_REQ;
	tc_reqp->CAP_bits1 = TC1_INFO;
	rval = _t_do_ioctl(fd, (char *)ioctlbuf,
	    (int)sizeof (struct T_capability_ack), TI_CAPABILITY, &retlen);
	expected_acksize = (int)sizeof (struct T_capability_ack);

	if (rval < 0)
		return (-1);

	/*
	 * T_capability TPI messages are extensible and can grow in future.
	 * However timod will take care of returning no more information
	 * than what was requested, and truncating the "extended"
	 * information towards the end of the T_capability_ack, if necessary.
	 */
	if (retlen != expected_acksize) {
		t_errno = TSYSERR;
		errno = EIO;
		return (-1);
	}

	/*
	 * The T_info_ack part of the T_capability_ack is guaranteed to be
	 * present only if the corresponding TC1_INFO bit is set
	 */
	if ((tc_ackp->CAP_bits1 & TC1_INFO) == 0) {
		t_errno = TSYSERR;
		errno = EPROTO;
		return (-1);
	}

	tiap = &tc_ackp->INFO_ack;
	if (tiap->PRIM_type != T_INFO_ACK) {
		t_errno = TSYSERR;
		errno = EPROTO;
		return (-1);
	}

	/*
	 * Note: Sync with latest information returned in "struct T_info_ack
	 * but we deliberately not sync the state here as user level state
	 * construction here is not required, only update of attributes which
	 * may have changed because of negotations during connection
	 * establsihment
	 */
	assert(tiap->TIDU_size > 0);
	tiptr->ti_maxpsz = tiap->TIDU_size;
	assert(tiap->TSDU_size >= T_INVALID);
	tiptr->ti_tsdusize = tiap->TSDU_size;
	assert(tiap->ETSDU_size >= T_INVALID);
	tiptr->ti_etsdusize = tiap->ETSDU_size;
	assert(tiap->CDATA_size >= T_INVALID);
	tiptr->ti_cdatasize = tiap->CDATA_size;
	assert(tiap->DDATA_size >= T_INVALID);
	tiptr->ti_ddatasize = tiap->DDATA_size;
	tiptr->ti_prov_flag = tiap->PROVIDER_flag;

	return (0);
}