xref: /titanic_51/usr/src/lib/libnsl/nsl/_utility.c (revision 791a814c934fcd4deb13b26c1f116ff283272a0d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
24 /*	  All Rights Reserved  	*/
25 
26 /*
27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include "mt.h"
34 #include <stdlib.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <stropts.h>
40 #include <sys/stream.h>
41 #define	_SUN_TPI_VERSION 2
42 #include <sys/tihdr.h>
43 #include <sys/timod.h>
44 #include <sys/stat.h>
45 #include <xti.h>
46 #include <fcntl.h>
47 #include <signal.h>
48 #include <assert.h>
49 #include <syslog.h>
50 #include <limits.h>
51 #include "tx.h"
52 
53 #define	DEFSIZE 2048
54 
55 /*
56  * The following used to be in tiuser.h, but was causing too much namespace
57  * pollution.
58  */
59 #define	ROUNDUP32(X)	((X + 0x03)&~0x03)
60 
61 static struct _ti_user	*find_tilink(int s);
62 static struct _ti_user	*add_tilink(int s);
63 static void _t_free_lookbufs(struct _ti_user *tiptr);
64 static unsigned int _t_setsize(t_scalar_t infosize);
65 static int _t_cbuf_alloc(struct _ti_user *tiptr, char **retbuf);
66 static int _t_rbuf_alloc(struct _ti_user *tiptr, char **retbuf);
67 static int _t_adjust_state(int fd, int instate);
68 static int _t_alloc_bufs(int fd, struct _ti_user *tiptr,
69 	struct T_info_ack *tsap);
70 
71 mutex_t	_ti_userlock = DEFAULTMUTEX;	/* Protects hash_bucket[] */
72 
73 /*
74  * Checkfd - checks validity of file descriptor
75  */
76 struct _ti_user *
77 _t_checkfd(int fd, int force_sync, int api_semantics)
78 {
79 	sigset_t mask;
80 	struct _ti_user *tiptr;
81 	int retval, timodpushed;
82 
83 	if (fd < 0) {
84 		t_errno = TBADF;
85 		return (NULL);
86 	}
87 	tiptr = NULL;
88 	sig_mutex_lock(&_ti_userlock);
89 	if ((tiptr = find_tilink(fd)) != NULL) {
90 		if (!force_sync) {
91 			sig_mutex_unlock(&_ti_userlock);
92 			return (tiptr);
93 		}
94 	}
95 	sig_mutex_unlock(&_ti_userlock);
96 
97 	/*
98 	 * Not found or a forced sync is required.
99 	 * check if this is a valid TLI/XTI descriptor.
100 	 */
101 	timodpushed = 0;
102 	do {
103 		retval = ioctl(fd, I_FIND, "timod");
104 	} while (retval < 0 && errno == EINTR);
105 
106 	if (retval < 0 || (retval == 0 && _T_IS_TLI(api_semantics))) {
107 		/*
108 		 * not a stream or a TLI endpoint with no timod
109 		 * XXX Note: If it is a XTI call, we push "timod" and
110 		 * try to convert it into a transport endpoint later.
111 		 * We do not do it for TLI and "retain" the old buggy
112 		 * behavior because ypbind and a lot of other deamons seem
113 		 * to use a buggy logic test of the form
114 		 * "(t_getstate(0) != -1 || t_errno != TBADF)" to see if
115 		 * they we ever invoked with request on stdin and drop into
116 		 * untested code. This test is in code generated by rpcgen
117 		 * which is why it is replicated test in many daemons too.
118 		 * We will need to fix that test too with "IsaTLIendpoint"
119 		 * test if we ever fix this for TLI
120 		 */
121 		t_errno = TBADF;
122 		return (NULL);
123 	}
124 
125 	if (retval == 0) {
126 		/*
127 		 * "timod" not already on stream, then push it
128 		 */
129 		do {
130 			/*
131 			 * Assumes (correctly) that I_PUSH  is
132 			 * atomic w.r.t signals (EINTR error)
133 			 */
134 			retval = ioctl(fd, I_PUSH, "timod");
135 		} while (retval < 0 && errno == EINTR);
136 
137 		if (retval < 0) {
138 			t_errno = TSYSERR;
139 			return (NULL);
140 		}
141 		timodpushed = 1;
142 	}
143 	/*
144 	 * Try to (re)constitute the info at user level from state
145 	 * in the kernel. This could be information that lost due
146 	 * to an exec or being instantiated at a new descriptor due
147 	 * to , open(), dup2() etc.
148 	 *
149 	 * _t_create() requires that all signals be blocked.
150 	 * Note that sig_mutex_lock() only defers signals, it does not
151 	 * block them, so interruptible syscalls could still get EINTR.
152 	 */
153 	(void) thr_sigsetmask(SIG_SETMASK, &fillset, &mask);
154 	sig_mutex_lock(&_ti_userlock);
155 	tiptr = _t_create(fd, NULL, api_semantics, NULL);
156 	if (tiptr == NULL) {
157 		int sv_errno = errno;
158 		sig_mutex_unlock(&_ti_userlock);
159 		(void) thr_sigsetmask(SIG_SETMASK, &mask, NULL);
160 		/*
161 		 * restore to stream before timod pushed. It may
162 		 * not have been a network transport stream.
163 		 */
164 		if (timodpushed)
165 			(void) ioctl(fd, I_POP, 0);
166 		errno = sv_errno;
167 		return (NULL);
168 	}
169 	sig_mutex_unlock(&_ti_userlock);
170 	(void) thr_sigsetmask(SIG_SETMASK, &mask, NULL);
171 	return (tiptr);
172 }
173 
174 /*
175  * copy data to output buffer making sure the output buffer is 32 bit
176  * aligned, even though the input buffer may not be.
177  */
178 int
179 _t_aligned_copy(
180 	struct strbuf *strbufp,
181 	int len,
182 	int init_offset,
183 	char *datap,
184 	t_scalar_t *rtn_offset)
185 {
186 	*rtn_offset = ROUNDUP32(init_offset);
187 	if ((*rtn_offset + len) > strbufp->maxlen) {
188 		/*
189 		 * Aligned copy will overflow buffer
190 		 */
191 		return (-1);
192 	}
193 	(void) memcpy(strbufp->buf + *rtn_offset, datap, (size_t)len);
194 
195 	return (0);
196 }
197 
198 
199 /*
200  * append data and control info in look buffer (list in the MT case)
201  *
202  * The only thing that can be in look buffer is a T_DISCON_IND,
203  * T_ORDREL_IND or a T_UDERROR_IND.
204  *
205  * It also enforces priority of T_DISCONDs over any T_ORDREL_IND
206  * already in the buffer. It assumes no T_ORDREL_IND is appended
207  * when there is already something on the looklist (error case) and
208  * that a T_ORDREL_IND if present will always be the first on the
209  * list.
210  *
211  * This also assumes ti_lock is held via sig_mutex_lock(),
212  * so signals are deferred here.
213  */
214 int
215 _t_register_lookevent(
216 	struct _ti_user *tiptr,
217 	caddr_t dptr,
218 	int dsize,
219 	caddr_t cptr,
220 	int csize)
221 {
222 	struct _ti_lookbufs *tlbs;
223 	int cbuf_size, dbuf_size;
224 
225 	assert(MUTEX_HELD(&tiptr->ti_lock));
226 
227 	cbuf_size = tiptr->ti_ctlsize;
228 	dbuf_size = tiptr->ti_rcvsize;
229 
230 	if ((csize > cbuf_size) || dsize > dbuf_size) {
231 		/* can't fit - return error */
232 		return (-1);	/* error */
233 	}
234 	/*
235 	 * Enforce priority of T_DISCON_IND over T_ORDREL_IND
236 	 * queued earlier.
237 	 * Note: Since there can be only at most one T_ORDREL_IND
238 	 * queued (more than one is error case), and we look for it
239 	 * on each append of T_DISCON_IND, it can only be at the
240 	 * head of the list if it is there.
241 	 */
242 	if (tiptr->ti_lookcnt > 0) { /* something already on looklist */
243 		if (cptr && csize >= (int)sizeof (struct T_discon_ind) &&
244 		    /* LINTED pointer cast */
245 		    *(t_scalar_t *)cptr == T_DISCON_IND) {
246 			/* appending discon ind */
247 			assert(tiptr->ti_servtype != T_CLTS);
248 			/* LINTED pointer cast */
249 			if (*(t_scalar_t *)tiptr->ti_lookbufs.tl_lookcbuf ==
250 			    T_ORDREL_IND) { /* T_ORDREL_IND is on list */
251 				/*
252 				 * Blow away T_ORDREL_IND
253 				 */
254 				_t_free_looklist_head(tiptr);
255 			}
256 		}
257 	}
258 	tlbs = &tiptr->ti_lookbufs;
259 	if (tiptr->ti_lookcnt > 0) {
260 		int listcount = 0;
261 		/*
262 		 * Allocate and append a new lookbuf to the
263 		 * existing list. (Should only happen in MT case)
264 		 */
265 		while (tlbs->tl_next != NULL) {
266 			listcount++;
267 			tlbs = tlbs->tl_next;
268 		}
269 		assert(tiptr->ti_lookcnt == listcount);
270 
271 		/*
272 		 * signals are deferred, calls to malloc() are safe.
273 		 */
274 		if ((tlbs->tl_next = malloc(sizeof (struct _ti_lookbufs))) ==
275 									NULL)
276 			return (-1); /* error */
277 		tlbs = tlbs->tl_next;
278 		/*
279 		 * Allocate the buffers. The sizes derived from the
280 		 * sizes of other related buffers. See _t_alloc_bufs()
281 		 * for details.
282 		 */
283 		if ((tlbs->tl_lookcbuf = malloc(cbuf_size)) == NULL) {
284 			/* giving up - free other memory chunks */
285 			free(tlbs);
286 			return (-1); /* error */
287 		}
288 		if ((dsize > 0) &&
289 		    ((tlbs->tl_lookdbuf = malloc(dbuf_size)) == NULL)) {
290 			/* giving up - free other memory chunks */
291 			free(tlbs->tl_lookcbuf);
292 			free(tlbs);
293 			return (-1); /* error */
294 		}
295 	}
296 
297 	(void) memcpy(tlbs->tl_lookcbuf, cptr, csize);
298 	if (dsize > 0)
299 		(void) memcpy(tlbs->tl_lookdbuf, dptr, dsize);
300 	tlbs->tl_lookdlen = dsize;
301 	tlbs->tl_lookclen = csize;
302 	tlbs->tl_next = NULL;
303 	tiptr->ti_lookcnt++;
304 	return (0);		/* ok return */
305 }
306 
307 /*
308  * Is there something that needs attention?
309  * Assumes tiptr->ti_lock held and this threads signals blocked
310  * in MT case.
311  */
312 int
313 _t_is_event(int fd, struct _ti_user *tiptr)
314 {
315 	int size, retval;
316 
317 	assert(MUTEX_HELD(&tiptr->ti_lock));
318 	if ((retval = ioctl(fd, I_NREAD, &size)) < 0) {
319 		t_errno = TSYSERR;
320 		return (-1);
321 	}
322 
323 	if ((retval > 0) || (tiptr->ti_lookcnt > 0)) {
324 		t_errno = TLOOK;
325 		return (-1);
326 	}
327 	return (0);
328 }
329 
330 /*
331  * wait for T_OK_ACK
332  * assumes tiptr->ti_lock held in MT case
333  */
334 int
335 _t_is_ok(int fd, struct _ti_user *tiptr, t_scalar_t type)
336 {
337 	struct strbuf ctlbuf;
338 	struct strbuf databuf;
339 	union T_primitives *pptr;
340 	int retval, cntlflag;
341 	int size;
342 	int didalloc, didralloc;
343 	int flags = 0;
344 
345 	assert(MUTEX_HELD(&tiptr->ti_lock));
346 	/*
347 	 * Acquire ctlbuf for use in sending/receiving control part
348 	 * of the message.
349 	 */
350 	if (_t_acquire_ctlbuf(tiptr, &ctlbuf, &didalloc) < 0)
351 		return (-1);
352 	/*
353 	 * Acquire databuf for use in sending/receiving data part
354 	 */
355 	if (_t_acquire_databuf(tiptr, &databuf, &didralloc) < 0) {
356 		if (didalloc)
357 			free(ctlbuf.buf);
358 		else
359 			tiptr->ti_ctlbuf = ctlbuf.buf;
360 		return (-1);
361 	}
362 
363 	/*
364 	 * Temporarily convert a non blocking endpoint to a
365 	 * blocking one and restore status later
366 	 */
367 	cntlflag = fcntl(fd, F_GETFL, 0);
368 	if (cntlflag & (O_NDELAY | O_NONBLOCK))
369 		(void) fcntl(fd, F_SETFL, cntlflag & ~(O_NDELAY | O_NONBLOCK));
370 
371 	flags = RS_HIPRI;
372 
373 	while ((retval = getmsg(fd, &ctlbuf, &databuf, &flags)) < 0) {
374 		if (errno == EINTR)
375 			continue;
376 		if (cntlflag & (O_NDELAY | O_NONBLOCK))
377 			(void) fcntl(fd, F_SETFL, cntlflag);
378 		t_errno = TSYSERR;
379 		goto err_out;
380 	}
381 
382 	/* did I get entire message */
383 	if (retval > 0) {
384 		if (cntlflag & (O_NDELAY | O_NONBLOCK))
385 			(void) fcntl(fd, F_SETFL, cntlflag);
386 		t_errno = TSYSERR;
387 		errno = EIO;
388 		goto err_out;
389 	}
390 
391 	/*
392 	 * is ctl part large enough to determine type?
393 	 */
394 	if (ctlbuf.len < (int)sizeof (t_scalar_t)) {
395 		if (cntlflag & (O_NDELAY | O_NONBLOCK))
396 			(void) fcntl(fd, F_SETFL, cntlflag);
397 		t_errno = TSYSERR;
398 		errno = EPROTO;
399 		goto err_out;
400 	}
401 
402 	if (cntlflag & (O_NDELAY | O_NONBLOCK))
403 		(void) fcntl(fd, F_SETFL, cntlflag);
404 
405 	/* LINTED pointer cast */
406 	pptr = (union T_primitives *)ctlbuf.buf;
407 
408 	switch (pptr->type) {
409 	case T_OK_ACK:
410 		if ((ctlbuf.len < (int)sizeof (struct T_ok_ack)) ||
411 		    (pptr->ok_ack.CORRECT_prim != type)) {
412 			t_errno = TSYSERR;
413 			errno = EPROTO;
414 			goto err_out;
415 		}
416 		if (didalloc)
417 			free(ctlbuf.buf);
418 		else
419 			tiptr->ti_ctlbuf = ctlbuf.buf;
420 		if (didralloc)
421 			free(databuf.buf);
422 		else
423 			tiptr->ti_rcvbuf = databuf.buf;
424 		return (0);
425 
426 	case T_ERROR_ACK:
427 		if ((ctlbuf.len < (int)sizeof (struct T_error_ack)) ||
428 		    (pptr->error_ack.ERROR_prim != type)) {
429 			t_errno = TSYSERR;
430 			errno = EPROTO;
431 			goto err_out;
432 		}
433 		/*
434 		 * if error is out of state and there is something
435 		 * on read queue, then indicate to user that
436 		 * there is something that needs attention
437 		 */
438 		if (pptr->error_ack.TLI_error == TOUTSTATE) {
439 			if ((retval = ioctl(fd, I_NREAD, &size)) < 0) {
440 				t_errno = TSYSERR;
441 				goto err_out;
442 			}
443 			if (retval > 0)
444 				t_errno = TLOOK;
445 			else
446 				t_errno = TOUTSTATE;
447 		} else {
448 			t_errno = pptr->error_ack.TLI_error;
449 			if (t_errno == TSYSERR)
450 				errno = pptr->error_ack.UNIX_error;
451 		}
452 		goto err_out;
453 	default:
454 		t_errno = TSYSERR;
455 		errno = EPROTO;
456 		/* fallthru to err_out: */
457 	}
458 err_out:
459 	if (didalloc)
460 		free(ctlbuf.buf);
461 	else
462 		tiptr->ti_ctlbuf = ctlbuf.buf;
463 	if (didralloc)
464 		free(databuf.buf);
465 	else
466 		tiptr->ti_rcvbuf = databuf.buf;
467 	return (-1);
468 }
469 
470 /*
471  * timod ioctl
472  */
473 int
474 _t_do_ioctl(int fd, char *buf, int size, int cmd, int *retlenp)
475 {
476 	int retval;
477 	struct strioctl strioc;
478 
479 	strioc.ic_cmd = cmd;
480 	strioc.ic_timout = -1;
481 	strioc.ic_len = size;
482 	strioc.ic_dp = buf;
483 
484 	if ((retval = ioctl(fd, I_STR, &strioc)) < 0) {
485 		t_errno = TSYSERR;
486 		return (-1);
487 	}
488 
489 	if (retval > 0) {
490 		t_errno = retval&0xff;
491 		if (t_errno == TSYSERR)
492 			errno = (retval >>  8)&0xff;
493 		return (-1);
494 	}
495 	if (retlenp)
496 		*retlenp = strioc.ic_len;
497 	return (0);
498 }
499 
500 /*
501  * alloc scratch buffers and look buffers
502  */
503 /* ARGSUSED */
504 static int
505 _t_alloc_bufs(int fd, struct _ti_user *tiptr, struct T_info_ack *tsap)
506 {
507 	unsigned int size1, size2;
508 	t_scalar_t optsize;
509 	unsigned int csize, dsize, asize, osize;
510 	char *ctlbuf, *rcvbuf;
511 	char *lookdbuf, *lookcbuf;
512 
513 	csize = _t_setsize(tsap->CDATA_size);
514 	dsize = _t_setsize(tsap->DDATA_size);
515 
516 	size1 = _T_MAX(csize, dsize);
517 
518 	if (size1 != 0) {
519 		if ((rcvbuf = malloc(size1)) == NULL)
520 			return (-1);
521 		if ((lookdbuf = malloc(size1)) == NULL) {
522 			free(rcvbuf);
523 			return (-1);
524 		}
525 	} else {
526 		rcvbuf = NULL;
527 		lookdbuf = NULL;
528 	}
529 
530 	asize = _t_setsize(tsap->ADDR_size);
531 	if (tsap->OPT_size >= 0)
532 		/* compensate for XTI level options */
533 		optsize = tsap->OPT_size + TX_XTI_LEVEL_MAX_OPTBUF;
534 	else
535 		optsize = tsap->OPT_size;
536 	osize = _t_setsize(optsize);
537 
538 	/*
539 	 * We compute the largest buffer size needed for this provider by
540 	 * adding the components. [ An extra sizeof (t_scalar_t) is added to
541 	 * take care of rounding off for alignment) for each buffer ]
542 	 * The goal here is compute the size of largest possible buffer that
543 	 * might be needed to hold a TPI message for the transport provider
544 	 * on this endpoint.
545 	 * Note: T_ADDR_ACK contains potentially two address buffers.
546 	 */
547 
548 	size2 = (unsigned int)sizeof (union T_primitives) /* TPI struct */
549 	    + asize + (unsigned int)sizeof (t_scalar_t) +
550 		/* first addr buffer plus alignment */
551 	    asize + (unsigned int)sizeof (t_scalar_t) +
552 		/* second addr buffer plus ailignment */
553 	    osize + (unsigned int)sizeof (t_scalar_t);
554 		/* option buffer plus alignment */
555 
556 	if ((ctlbuf = malloc(size2)) == NULL) {
557 		if (size1 != 0) {
558 			free(rcvbuf);
559 			free(lookdbuf);
560 		}
561 		return (-1);
562 	}
563 
564 	if ((lookcbuf = malloc(size2)) == NULL) {
565 		if (size1 != 0) {
566 			free(rcvbuf);
567 			free(lookdbuf);
568 		}
569 		free(ctlbuf);
570 		return (-1);
571 	}
572 
573 	tiptr->ti_rcvsize = size1;
574 	tiptr->ti_rcvbuf = rcvbuf;
575 	tiptr->ti_ctlsize = size2;
576 	tiptr->ti_ctlbuf = ctlbuf;
577 
578 	/*
579 	 * Note: The head of the lookbuffers list (and associated buffers)
580 	 * is allocated here on initialization.
581 	 * More allocated on demand.
582 	 */
583 	tiptr->ti_lookbufs.tl_lookclen = 0;
584 	tiptr->ti_lookbufs.tl_lookcbuf = lookcbuf;
585 	tiptr->ti_lookbufs.tl_lookdlen = 0;
586 	tiptr->ti_lookbufs.tl_lookdbuf = lookdbuf;
587 
588 	return (0);
589 }
590 
591 
592 /*
593  * set sizes of buffers
594  */
595 static unsigned int
596 _t_setsize(t_scalar_t infosize)
597 {
598 	switch (infosize) {
599 	case T_INFINITE /* -1 */:
600 		return (DEFSIZE);
601 	case T_INVALID /* -2 */:
602 		return (0);
603 	default:
604 		return ((unsigned int) infosize);
605 	}
606 }
607 
608 static void
609 _t_reinit_tiptr(struct _ti_user *tiptr)
610 {
611 	/*
612 	 * Note: This routine is designed for a "reinitialization"
613 	 * Following fields are not modified here and preserved.
614 	 *	 - ti_fd field
615 	 *	 - ti_lock
616 	 *	 - ti_next
617 	 *	 - ti_prev
618 	 * The above fields have to be separately initialized if this
619 	 * is used for a fresh initialization.
620 	 */
621 
622 	tiptr->ti_flags = 0;
623 	tiptr->ti_rcvsize = 0;
624 	tiptr->ti_rcvbuf = NULL;
625 	tiptr->ti_ctlsize = 0;
626 	tiptr->ti_ctlbuf = NULL;
627 	tiptr->ti_lookbufs.tl_lookdbuf = NULL;
628 	tiptr->ti_lookbufs.tl_lookcbuf = NULL;
629 	tiptr->ti_lookbufs.tl_lookdlen = 0;
630 	tiptr->ti_lookbufs.tl_lookclen = 0;
631 	tiptr->ti_lookbufs.tl_next = NULL;
632 	tiptr->ti_maxpsz = 0;
633 	tiptr->ti_tsdusize = 0;
634 	tiptr->ti_etsdusize = 0;
635 	tiptr->ti_cdatasize = 0;
636 	tiptr->ti_ddatasize = 0;
637 	tiptr->ti_servtype = 0;
638 	tiptr->ti_lookcnt = 0;
639 	tiptr->ti_state = 0;
640 	tiptr->ti_ocnt = 0;
641 	tiptr->ti_prov_flag = 0;
642 	tiptr->ti_qlen = 0;
643 }
644 
645 /*
646  * Link manipulation routines.
647  *
648  * NBUCKETS hash buckets are used to give fast
649  * access. The number is derived the file descriptor softlimit
650  * number (64).
651  */
652 
653 #define	NBUCKETS	64
654 static struct _ti_user		*hash_bucket[NBUCKETS];
655 
656 /*
657  * Allocates a new link and returns a pointer to it.
658  * Assumes that the caller is holding _ti_userlock via sig_mutex_lock(),
659  * so signals are deferred here.
660  */
661 static struct _ti_user *
662 add_tilink(int s)
663 {
664 	struct _ti_user	*tiptr;
665 	struct _ti_user	*prevptr;
666 	struct _ti_user	*curptr;
667 	int	x;
668 	struct stat stbuf;
669 
670 	assert(MUTEX_HELD(&_ti_userlock));
671 
672 	if (s < 0 || fstat(s, &stbuf) != 0)
673 		return (NULL);
674 
675 	x = s % NBUCKETS;
676 	if (hash_bucket[x] != NULL) {
677 		/*
678 		 * Walk along the bucket looking for
679 		 * duplicate entry or the end.
680 		 */
681 		for (curptr = hash_bucket[x]; curptr != NULL;
682 						curptr = curptr->ti_next) {
683 			if (curptr->ti_fd == s) {
684 				/*
685 				 * This can happen when the user has close(2)'ed
686 				 * a descriptor and then been allocated it again
687 				 * via t_open().
688 				 *
689 				 * We will re-use the existing _ti_user struct
690 				 * in this case rather than using the one
691 				 * we allocated above.  If there are buffers
692 				 * associated with the existing _ti_user
693 				 * struct, they may not be the correct size,
694 				 * so we can not use it.  We free them
695 				 * here and re-allocate a new ones
696 				 * later on.
697 				 */
698 				if (curptr->ti_rcvbuf != NULL)
699 					free(curptr->ti_rcvbuf);
700 				free(curptr->ti_ctlbuf);
701 				_t_free_lookbufs(curptr);
702 				_t_reinit_tiptr(curptr);
703 				curptr->ti_rdev = stbuf.st_rdev;
704 				curptr->ti_ino = stbuf.st_ino;
705 				return (curptr);
706 			}
707 			prevptr = curptr;
708 		}
709 		/*
710 		 * Allocate and link in a new one.
711 		 */
712 		if ((tiptr = malloc(sizeof (*tiptr))) == NULL)
713 			return (NULL);
714 		/*
715 		 * First initialize fields common with reinitialization and
716 		 * then other fields too
717 		 */
718 		_t_reinit_tiptr(tiptr);
719 		prevptr->ti_next = tiptr;
720 		tiptr->ti_prev = prevptr;
721 	} else {
722 		/*
723 		 * First entry.
724 		 */
725 		if ((tiptr = malloc(sizeof (*tiptr))) == NULL)
726 			return (NULL);
727 		_t_reinit_tiptr(tiptr);
728 		hash_bucket[x] = tiptr;
729 		tiptr->ti_prev = NULL;
730 	}
731 	tiptr->ti_next = NULL;
732 	tiptr->ti_fd = s;
733 	tiptr->ti_rdev = stbuf.st_rdev;
734 	tiptr->ti_ino = stbuf.st_ino;
735 	(void) mutex_init(&tiptr->ti_lock, USYNC_THREAD, NULL);
736 	return (tiptr);
737 }
738 
739 /*
740  * Find a link by descriptor
741  * Assumes that the caller is holding _ti_userlock.
742  */
743 static struct _ti_user *
744 find_tilink(int s)
745 {
746 	struct _ti_user	*curptr;
747 	int	x;
748 	struct stat stbuf;
749 
750 	assert(MUTEX_HELD(&_ti_userlock));
751 
752 	if (s < 0 || fstat(s, &stbuf) != 0)
753 		return (NULL);
754 
755 	x = s % NBUCKETS;
756 	/*
757 	 * Walk along the bucket looking for the descriptor.
758 	 */
759 	for (curptr = hash_bucket[x]; curptr; curptr = curptr->ti_next) {
760 		if (curptr->ti_fd == s) {
761 			if (curptr->ti_rdev == stbuf.st_rdev &&
762 			    curptr->ti_ino == stbuf.st_ino)
763 				return (curptr);
764 			(void) _t_delete_tilink(s);
765 		}
766 	}
767 	return (NULL);
768 }
769 
770 /*
771  * Assumes that the caller is holding _ti_userlock.
772  * Also assumes that all signals are blocked.
773  */
774 int
775 _t_delete_tilink(int s)
776 {
777 	struct _ti_user	*curptr;
778 	int	x;
779 
780 	/*
781 	 * Find the link.
782 	 */
783 	assert(MUTEX_HELD(&_ti_userlock));
784 	if (s < 0)
785 		return (-1);
786 	x = s % NBUCKETS;
787 	/*
788 	 * Walk along the bucket looking for
789 	 * the descriptor.
790 	 */
791 	for (curptr = hash_bucket[x]; curptr; curptr = curptr->ti_next) {
792 		if (curptr->ti_fd == s) {
793 			struct _ti_user	*nextptr;
794 			struct _ti_user	*prevptr;
795 
796 			nextptr = curptr->ti_next;
797 			prevptr = curptr->ti_prev;
798 			if (prevptr)
799 				prevptr->ti_next = nextptr;
800 			else
801 				hash_bucket[x] = nextptr;
802 			if (nextptr)
803 				nextptr->ti_prev = prevptr;
804 
805 			/*
806 			 * free resource associated with the curptr
807 			 */
808 			if (curptr->ti_rcvbuf != NULL)
809 				free(curptr->ti_rcvbuf);
810 			free(curptr->ti_ctlbuf);
811 			_t_free_lookbufs(curptr);
812 			(void) mutex_destroy(&curptr->ti_lock);
813 			free(curptr);
814 			return (0);
815 		}
816 	}
817 	return (-1);
818 }
819 
820 /*
821  * Allocate a TLI state structure and synch it with the kernel
822  * *tiptr is returned
823  * Assumes that the caller is holding the _ti_userlock and has blocked signals.
824  *
825  * This function may fail the first time it is called with given transport if it
826  * doesn't support T_CAPABILITY_REQ TPI message.
827  */
828 struct _ti_user *
829 _t_create(int fd, struct t_info *info, int api_semantics, int *t_capreq_failed)
830 {
831 	/*
832 	 * Aligned data buffer for ioctl.
833 	 */
834 	union {
835 		struct ti_sync_req ti_req;
836 		struct ti_sync_ack ti_ack;
837 		union T_primitives t_prim;
838 		char pad[128];
839 	} ioctl_data;
840 	void *ioctlbuf = &ioctl_data; /* TI_SYNC/GETINFO with room to grow */
841 			    /* preferred location first local variable */
842 			    /*  see note below */
843 	/*
844 	 * Note: We use "ioctlbuf" allocated on stack above with
845 	 * room to grow since (struct ti_sync_ack) can grow in size
846 	 * on future kernels. (We do not use malloc'd "ti_ctlbuf" as that
847 	 * part of instance structure which may not exist yet)
848 	 * Its preferred declaration location is first local variable in this
849 	 * procedure as bugs causing overruns will be detectable on
850 	 * platforms where procedure calling conventions place return
851 	 * address on stack (such as x86) instead of causing silent
852 	 * memory corruption.
853 	 */
854 	struct ti_sync_req *tsrp = (struct ti_sync_req *)ioctlbuf;
855 	struct ti_sync_ack *tsap = (struct ti_sync_ack *)ioctlbuf;
856 	struct T_capability_req *tcrp = (struct T_capability_req *)ioctlbuf;
857 	struct T_capability_ack *tcap = (struct T_capability_ack *)ioctlbuf;
858 	struct T_info_ack *tiap = &tcap->INFO_ack;
859 	struct _ti_user	*ntiptr;
860 	int expected_acksize;
861 	int retlen, rstate, sv_errno, rval;
862 
863 	assert(MUTEX_HELD(&_ti_userlock));
864 
865 	/*
866 	 * Use ioctl required for sync'ing state with kernel.
867 	 * We use two ioctls. TI_CAPABILITY is used to get TPI information and
868 	 * TI_SYNC is used to synchronise state with timod. Statically linked
869 	 * TLI applications will no longer work on older releases where there
870 	 * are no TI_SYNC and TI_CAPABILITY.
871 	 */
872 
873 	/*
874 	 * Request info about transport.
875 	 * Assumes that TC1_INFO should always be implemented.
876 	 * For TI_CAPABILITY size argument to ioctl specifies maximum buffer
877 	 * size.
878 	 */
879 	tcrp->PRIM_type = T_CAPABILITY_REQ;
880 	tcrp->CAP_bits1 = TC1_INFO | TC1_ACCEPTOR_ID;
881 	rval = _t_do_ioctl(fd, (char *)ioctlbuf,
882 	    (int)sizeof (struct T_capability_ack), TI_CAPABILITY, &retlen);
883 	expected_acksize = (int)sizeof (struct T_capability_ack);
884 
885 	if (rval < 0) {
886 		/*
887 		 * TI_CAPABILITY may fail when transport provider doesn't
888 		 * support T_CAPABILITY_REQ message type. In this case file
889 		 * descriptor may be unusable (when transport provider sent
890 		 * M_ERROR in response to T_CAPABILITY_REQ). This should only
891 		 * happen once during system lifetime for given transport
892 		 * provider since timod will emulate TI_CAPABILITY after it
893 		 * detected the failure.
894 		 */
895 		if (t_capreq_failed != NULL)
896 			*t_capreq_failed = 1;
897 		return (NULL);
898 	}
899 
900 	if (retlen != expected_acksize) {
901 		t_errno = TSYSERR;
902 		errno = EIO;
903 		return (NULL);
904 	}
905 
906 	if ((tcap->CAP_bits1 & TC1_INFO) == 0) {
907 		t_errno = TSYSERR;
908 		errno = EPROTO;
909 		return (NULL);
910 	}
911 	if (info != NULL) {
912 		if (tiap->PRIM_type != T_INFO_ACK) {
913 			t_errno = TSYSERR;
914 			errno = EPROTO;
915 			return (NULL);
916 		}
917 		info->addr = tiap->ADDR_size;
918 		info->options = tiap->OPT_size;
919 		info->tsdu = tiap->TSDU_size;
920 		info->etsdu = tiap->ETSDU_size;
921 		info->connect = tiap->CDATA_size;
922 		info->discon = tiap->DDATA_size;
923 		info->servtype = tiap->SERV_type;
924 		if (_T_IS_XTI(api_semantics)) {
925 			/*
926 			 * XTI ONLY - TLI "struct t_info" does not
927 			 * have "flags"
928 			 */
929 			info->flags = 0;
930 			if (tiap->PROVIDER_flag & (SENDZERO|OLD_SENDZERO))
931 				info->flags |= T_SENDZERO;
932 			/*
933 			 * Some day there MAY be a NEW bit in T_info_ack
934 			 * PROVIDER_flag namespace exposed by TPI header
935 			 * <sys/tihdr.h> which will functionally correspond to
936 			 * role played by T_ORDRELDATA in info->flags namespace
937 			 * When that bit exists, we can add a test to see if
938 			 * it is set and set T_ORDRELDATA.
939 			 * Note: Currently only mOSI ("minimal OSI") provider
940 			 * is specified to use T_ORDRELDATA so probability of
941 			 * needing it is minimal.
942 			 */
943 		}
944 	}
945 
946 	/*
947 	 * if first time or no instance (after fork/exec, dup etc,
948 	 * then create initialize data structure
949 	 * and allocate buffers
950 	 */
951 	ntiptr = add_tilink(fd);
952 	if (ntiptr == NULL) {
953 		t_errno = TSYSERR;
954 		errno = ENOMEM;
955 		return (NULL);
956 	}
957 	sig_mutex_lock(&ntiptr->ti_lock);
958 
959 	/*
960 	 * Allocate buffers for the new descriptor
961 	 */
962 	if (_t_alloc_bufs(fd, ntiptr, tiap) < 0) {
963 		sv_errno = errno;
964 		(void) _t_delete_tilink(fd);
965 		t_errno = TSYSERR;
966 		sig_mutex_unlock(&ntiptr->ti_lock);
967 		errno = sv_errno;
968 		return (NULL);
969 	}
970 
971 	/* Fill instance structure */
972 
973 	ntiptr->ti_lookcnt = 0;
974 	ntiptr->ti_flags = USED;
975 	ntiptr->ti_state = T_UNINIT;
976 	ntiptr->ti_ocnt = 0;
977 
978 	assert(tiap->TIDU_size > 0);
979 	ntiptr->ti_maxpsz = tiap->TIDU_size;
980 	assert(tiap->TSDU_size >= -2);
981 	ntiptr->ti_tsdusize = tiap->TSDU_size;
982 	assert(tiap->ETSDU_size >= -2);
983 	ntiptr->ti_etsdusize = tiap->ETSDU_size;
984 	assert(tiap->CDATA_size >= -2);
985 	ntiptr->ti_cdatasize = tiap->CDATA_size;
986 	assert(tiap->DDATA_size >= -2);
987 	ntiptr->ti_ddatasize = tiap->DDATA_size;
988 	ntiptr->ti_servtype = tiap->SERV_type;
989 	ntiptr->ti_prov_flag = tiap->PROVIDER_flag;
990 
991 	if ((tcap->CAP_bits1 & TC1_ACCEPTOR_ID) != 0) {
992 		ntiptr->acceptor_id = tcap->ACCEPTOR_id;
993 		ntiptr->ti_flags |= V_ACCEPTOR_ID;
994 	}
995 	else
996 		ntiptr->ti_flags &= ~V_ACCEPTOR_ID;
997 
998 	/*
999 	 * Restore state from kernel (caveat some heuristics)
1000 	 */
1001 	switch (tiap->CURRENT_state) {
1002 
1003 	case TS_UNBND:
1004 		ntiptr->ti_state = T_UNBND;
1005 		break;
1006 
1007 	case TS_IDLE:
1008 		if ((rstate = _t_adjust_state(fd, T_IDLE)) < 0) {
1009 			sv_errno = errno;
1010 			(void) _t_delete_tilink(fd);
1011 			sig_mutex_unlock(&ntiptr->ti_lock);
1012 			errno = sv_errno;
1013 			return (NULL);
1014 		}
1015 		ntiptr->ti_state = rstate;
1016 		break;
1017 
1018 	case TS_WRES_CIND:
1019 		ntiptr->ti_state = T_INCON;
1020 		break;
1021 
1022 	case TS_WCON_CREQ:
1023 		ntiptr->ti_state = T_OUTCON;
1024 		break;
1025 
1026 	case TS_DATA_XFER:
1027 		if ((rstate = _t_adjust_state(fd, T_DATAXFER)) < 0)  {
1028 			sv_errno = errno;
1029 			(void) _t_delete_tilink(fd);
1030 			sig_mutex_unlock(&ntiptr->ti_lock);
1031 			errno = sv_errno;
1032 			return (NULL);
1033 		}
1034 		ntiptr->ti_state = rstate;
1035 		break;
1036 
1037 	case TS_WIND_ORDREL:
1038 		ntiptr->ti_state = T_OUTREL;
1039 		break;
1040 
1041 	case TS_WREQ_ORDREL:
1042 		if ((rstate = _t_adjust_state(fd, T_INREL)) < 0)  {
1043 			sv_errno = errno;
1044 			(void) _t_delete_tilink(fd);
1045 			sig_mutex_unlock(&ntiptr->ti_lock);
1046 			errno = sv_errno;
1047 			return (NULL);
1048 		}
1049 		ntiptr->ti_state = rstate;
1050 		break;
1051 	default:
1052 		t_errno = TSTATECHNG;
1053 		(void) _t_delete_tilink(fd);
1054 		sig_mutex_unlock(&ntiptr->ti_lock);
1055 		return (NULL);
1056 	}
1057 
1058 	/*
1059 	 * Sync information with timod.
1060 	 */
1061 	tsrp->tsr_flags = TSRF_QLEN_REQ;
1062 
1063 	rval = _t_do_ioctl(fd, ioctlbuf,
1064 	    (int)sizeof (struct ti_sync_req), TI_SYNC, &retlen);
1065 	expected_acksize = (int)sizeof (struct ti_sync_ack);
1066 
1067 	if (rval < 0) {
1068 		sv_errno = errno;
1069 		(void) _t_delete_tilink(fd);
1070 		t_errno = TSYSERR;
1071 		sig_mutex_unlock(&ntiptr->ti_lock);
1072 		errno = sv_errno;
1073 		return (NULL);
1074 	}
1075 
1076 	/*
1077 	 * This is a "less than" check as "struct ti_sync_ack" returned by
1078 	 * TI_SYNC can grow in size in future kernels. If/when a statically
1079 	 * linked application is run on a future kernel, it should not fail.
1080 	 */
1081 	if (retlen < expected_acksize) {
1082 		sv_errno = errno;
1083 		(void) _t_delete_tilink(fd);
1084 		t_errno = TSYSERR;
1085 		sig_mutex_unlock(&ntiptr->ti_lock);
1086 		errno = sv_errno;
1087 		return (NULL);
1088 	}
1089 
1090 	if (_T_IS_TLI(api_semantics))
1091 		tsap->tsa_qlen = 0; /* not needed for TLI */
1092 
1093 	ntiptr->ti_qlen = tsap->tsa_qlen;
1094 	sig_mutex_unlock(&ntiptr->ti_lock);
1095 	return (ntiptr);
1096 }
1097 
1098 
1099 static int
1100 _t_adjust_state(int fd, int instate)
1101 {
1102 	char ctlbuf[sizeof (t_scalar_t)];
1103 	char databuf[sizeof (int)]; /* size unimportant - anything > 0 */
1104 	struct strpeek arg;
1105 	int outstate, retval;
1106 
1107 	/*
1108 	 * Peek at message on stream head (if any)
1109 	 * and see if it is data
1110 	 */
1111 	arg.ctlbuf.buf = ctlbuf;
1112 	arg.ctlbuf.maxlen = (int)sizeof (ctlbuf);
1113 	arg.ctlbuf.len = 0;
1114 
1115 	arg.databuf.buf = databuf;
1116 	arg.databuf.maxlen = (int)sizeof (databuf);
1117 	arg.databuf.len = 0;
1118 
1119 	arg.flags = 0;
1120 
1121 	if ((retval = ioctl(fd, I_PEEK, &arg)) < 0)  {
1122 		t_errno = TSYSERR;
1123 		return (-1);
1124 	}
1125 	outstate = instate;
1126 	/*
1127 	 * If peek shows something at stream head, then
1128 	 * Adjust "outstate" based on some heuristics.
1129 	 */
1130 	if (retval > 0) {
1131 		switch (instate) {
1132 		case T_IDLE:
1133 			/*
1134 			 * The following heuristic is to handle data
1135 			 * ahead of T_DISCON_IND indications that might
1136 			 * be at the stream head waiting to be
1137 			 * read (T_DATA_IND or M_DATA)
1138 			 */
1139 			if (((arg.ctlbuf.len == 4) &&
1140 			    /* LINTED pointer cast */
1141 			    ((*(int32_t *)arg.ctlbuf.buf) == T_DATA_IND)) ||
1142 			    ((arg.ctlbuf.len == 0) && arg.databuf.len)) {
1143 				outstate = T_DATAXFER;
1144 			}
1145 			break;
1146 		case T_DATAXFER:
1147 			/*
1148 			 * The following heuristic is to handle
1149 			 * the case where the connection is established
1150 			 * and in data transfer state at the provider
1151 			 * but the T_CONN_CON has not yet been read
1152 			 * from the stream head.
1153 			 */
1154 			if ((arg.ctlbuf.len == 4) &&
1155 				/* LINTED pointer cast */
1156 				((*(int32_t *)arg.ctlbuf.buf) == T_CONN_CON))
1157 				outstate = T_OUTCON;
1158 			break;
1159 		case T_INREL:
1160 			/*
1161 			 * The following heuristic is to handle data
1162 			 * ahead of T_ORDREL_IND indications that might
1163 			 * be at the stream head waiting to be
1164 			 * read (T_DATA_IND or M_DATA)
1165 			 */
1166 			if (((arg.ctlbuf.len == 4) &&
1167 			    /* LINTED pointer cast */
1168 			    ((*(int32_t *)arg.ctlbuf.buf) == T_DATA_IND)) ||
1169 			    ((arg.ctlbuf.len == 0) && arg.databuf.len)) {
1170 				outstate = T_DATAXFER;
1171 			}
1172 			break;
1173 		default:
1174 			break;
1175 		}
1176 	}
1177 	return (outstate);
1178 }
1179 
1180 /*
1181  * Assumes caller has blocked signals at least in this thread (for safe
1182  * malloc/free operations)
1183  */
1184 static int
1185 _t_cbuf_alloc(struct _ti_user *tiptr, char **retbuf)
1186 {
1187 	unsigned	size2;
1188 
1189 	assert(MUTEX_HELD(&tiptr->ti_lock));
1190 	size2 = tiptr->ti_ctlsize; /* same size as default ctlbuf */
1191 
1192 	if ((*retbuf = malloc(size2)) == NULL) {
1193 		return (-1);
1194 	}
1195 	return (size2);
1196 }
1197 
1198 
1199 /*
1200  * Assumes caller has blocked signals at least in this thread (for safe
1201  * malloc/free operations)
1202  */
1203 int
1204 _t_rbuf_alloc(struct _ti_user *tiptr, char **retbuf)
1205 {
1206 	unsigned	size1;
1207 
1208 	assert(MUTEX_HELD(&tiptr->ti_lock));
1209 	size1 = tiptr->ti_rcvsize; /* same size as default rcvbuf */
1210 
1211 	if ((*retbuf = malloc(size1)) == NULL) {
1212 		return (-1);
1213 	}
1214 	return (size1);
1215 }
1216 
1217 /*
1218  * Free lookbuffer structures and associated resources
1219  * Assumes ti_lock held for MT case.
1220  */
1221 static void
1222 _t_free_lookbufs(struct _ti_user *tiptr)
1223 {
1224 	struct _ti_lookbufs *tlbs, *prev_tlbs, *head_tlbs;
1225 
1226 	/*
1227 	 * Assertion:
1228 	 * The structure lock should be held or the global list
1229 	 * manipulation lock. The assumption is that nothing
1230 	 * else can access the descriptor since global list manipulation
1231 	 * lock is held so it is OK to manipulate fields without the
1232 	 * structure lock
1233 	 */
1234 	assert(MUTEX_HELD(&tiptr->ti_lock) || MUTEX_HELD(&_ti_userlock));
1235 
1236 	/*
1237 	 * Free only the buffers in the first lookbuf
1238 	 */
1239 	head_tlbs = &tiptr->ti_lookbufs;
1240 	if (head_tlbs->tl_lookdbuf != NULL) {
1241 		free(head_tlbs->tl_lookdbuf);
1242 		head_tlbs->tl_lookdbuf = NULL;
1243 	}
1244 	free(head_tlbs->tl_lookcbuf);
1245 	head_tlbs->tl_lookcbuf = NULL;
1246 	/*
1247 	 * Free the node and the buffers in the rest of the
1248 	 * list
1249 	 */
1250 
1251 	tlbs = head_tlbs->tl_next;
1252 	head_tlbs->tl_next = NULL;
1253 
1254 	while (tlbs != NULL) {
1255 		if (tlbs->tl_lookdbuf != NULL)
1256 			free(tlbs->tl_lookdbuf);
1257 		free(tlbs->tl_lookcbuf);
1258 		prev_tlbs = tlbs;
1259 		tlbs = tlbs->tl_next;
1260 		free(prev_tlbs);
1261 	}
1262 }
1263 
1264 /*
1265  * Free lookbuffer event list head.
1266  * Consume current lookbuffer event
1267  * Assumes ti_lock held for MT case.
1268  * Note: The head of this list is part of the instance
1269  * structure so the code is a little unorthodox.
1270  */
1271 void
1272 _t_free_looklist_head(struct _ti_user *tiptr)
1273 {
1274 	struct _ti_lookbufs *tlbs, *next_tlbs;
1275 
1276 	tlbs = &tiptr->ti_lookbufs;
1277 
1278 	if (tlbs->tl_next) {
1279 		/*
1280 		 * Free the control and data buffers
1281 		 */
1282 		if (tlbs->tl_lookdbuf != NULL)
1283 			free(tlbs->tl_lookdbuf);
1284 		free(tlbs->tl_lookcbuf);
1285 		/*
1286 		 * Replace with next lookbuf event contents
1287 		 */
1288 		next_tlbs = tlbs->tl_next;
1289 		tlbs->tl_next = next_tlbs->tl_next;
1290 		tlbs->tl_lookcbuf = next_tlbs->tl_lookcbuf;
1291 		tlbs->tl_lookclen = next_tlbs->tl_lookclen;
1292 		tlbs->tl_lookdbuf = next_tlbs->tl_lookdbuf;
1293 		tlbs->tl_lookdlen = next_tlbs->tl_lookdlen;
1294 		free(next_tlbs);
1295 		/*
1296 		 * Decrement the flag - should never get to zero.
1297 		 * in this path
1298 		 */
1299 		tiptr->ti_lookcnt--;
1300 		assert(tiptr->ti_lookcnt > 0);
1301 	} else {
1302 		/*
1303 		 * No more look buffer events - just clear the flag
1304 		 * and leave the buffers alone
1305 		 */
1306 		assert(tiptr->ti_lookcnt == 1);
1307 		tiptr->ti_lookcnt = 0;
1308 	}
1309 }
1310 
1311 /*
1312  * Discard lookbuffer events.
1313  * Assumes ti_lock held for MT case.
1314  */
1315 void
1316 _t_flush_lookevents(struct _ti_user *tiptr)
1317 {
1318 	struct _ti_lookbufs *tlbs, *prev_tlbs;
1319 
1320 	/*
1321 	 * Leave the first nodes buffers alone (i.e. allocated)
1322 	 * but reset the flag.
1323 	 */
1324 	assert(MUTEX_HELD(&tiptr->ti_lock));
1325 	tiptr->ti_lookcnt = 0;
1326 	/*
1327 	 * Blow away the rest of the list
1328 	 */
1329 	tlbs = tiptr->ti_lookbufs.tl_next;
1330 	tiptr->ti_lookbufs.tl_next = NULL;
1331 	while (tlbs != NULL) {
1332 		if (tlbs->tl_lookdbuf != NULL)
1333 			free(tlbs->tl_lookdbuf);
1334 		free(tlbs->tl_lookcbuf);
1335 		prev_tlbs = tlbs;
1336 		tlbs = tlbs->tl_next;
1337 		free(prev_tlbs);
1338 	}
1339 }
1340 
1341 
1342 /*
1343  * This routine checks if the receive. buffer in the instance structure
1344  * is available (non-null). If it is, the buffer is acquired and marked busy
1345  * (null). If it is busy (possible in MT programs), it allocates a new
1346  * buffer and sets a flag indicating new memory was allocated and the caller
1347  * has to free it.
1348  */
1349 int
1350 _t_acquire_ctlbuf(
1351 	struct _ti_user *tiptr,
1352 	struct strbuf *ctlbufp,
1353 	int *didallocp)
1354 {
1355 	*didallocp = 0;
1356 
1357 	ctlbufp->len = 0;
1358 	if (tiptr->ti_ctlbuf) {
1359 		ctlbufp->buf = tiptr->ti_ctlbuf;
1360 		tiptr->ti_ctlbuf = NULL;
1361 		ctlbufp->maxlen = tiptr->ti_ctlsize;
1362 	} else {
1363 		/*
1364 		 * tiptr->ti_ctlbuf is in use
1365 		 * allocate new buffer and free after use.
1366 		 */
1367 		if ((ctlbufp->maxlen = _t_cbuf_alloc(tiptr,
1368 						&ctlbufp->buf)) < 0) {
1369 			t_errno = TSYSERR;
1370 			return (-1);
1371 		}
1372 		*didallocp = 1;
1373 	}
1374 	return (0);
1375 }
1376 
1377 /*
1378  * This routine checks if the receive buffer in the instance structure
1379  * is available (non-null). If it is, the buffer is acquired and marked busy
1380  * (null). If it is busy (possible in MT programs), it allocates a new
1381  * buffer and sets a flag indicating new memory was allocated and the caller
1382  * has to free it.
1383  * Note: The receive buffer pointer can also be null if the transport
1384  * provider does not support connect/disconnect data, (e.g. TCP) - not
1385  * just when it is "busy". In that case, ti_rcvsize will be 0 and that is
1386  * used to instantiate the databuf which points to a null buffer of
1387  * length 0 which is the right thing to do for that case.
1388  */
1389 int
1390 _t_acquire_databuf(
1391 	struct _ti_user *tiptr,
1392 	struct strbuf *databufp,
1393 	int *didallocp)
1394 {
1395 	*didallocp = 0;
1396 
1397 	databufp->len = 0;
1398 	if (tiptr->ti_rcvbuf) {
1399 		assert(tiptr->ti_rcvsize != 0);
1400 		databufp->buf = tiptr->ti_rcvbuf;
1401 		tiptr->ti_rcvbuf = NULL;
1402 		databufp->maxlen = tiptr->ti_rcvsize;
1403 	} else if (tiptr->ti_rcvsize == 0) {
1404 		databufp->buf = NULL;
1405 		databufp->maxlen = 0;
1406 	} else {
1407 		/*
1408 		 * tiptr->ti_rcvbuf is in use
1409 		 * allocate new buffer and free after use.
1410 		 */
1411 		if ((databufp->maxlen = _t_rbuf_alloc(tiptr,
1412 						&databufp->buf)) < 0) {
1413 			t_errno = TSYSERR;
1414 			return (-1);
1415 		}
1416 		*didallocp = 1;
1417 	}
1418 	return (0);
1419 }
1420 
1421 /*
1422  * This routine requests timod to look for any expedited data
1423  * queued in the "receive buffers" in the kernel. Used for XTI
1424  * t_look() semantics for transports that send expedited data
1425  * data inline (e.g TCP).
1426  * Returns -1 for failure
1427  * Returns 0 for success
1428  * 	On a successful return, the location pointed by "expedited_queuedp"
1429  * 	contains
1430  *		0 if no expedited data is found queued in "receive buffers"
1431  *		1 if expedited data is found queued in "receive buffers"
1432  */
1433 
1434 int
1435 _t_expinline_queued(int fd, int *expedited_queuedp)
1436 {
1437 	union {
1438 		struct ti_sync_req ti_req;
1439 		struct ti_sync_ack ti_ack;
1440 		char pad[128];
1441 	} ioctl_data;
1442 	void *ioctlbuf = &ioctl_data; /* for TI_SYNC with room to grow */
1443 			    /* preferred location first local variable */
1444 			    /* see note in _t_create above */
1445 	struct ti_sync_req *tsrp = (struct ti_sync_req *)ioctlbuf;
1446 	struct ti_sync_ack *tsap = (struct ti_sync_ack *)ioctlbuf;
1447 	int rval, retlen;
1448 
1449 	*expedited_queuedp = 0;
1450 	/* request info on rq expinds  */
1451 	tsrp->tsr_flags = TSRF_IS_EXP_IN_RCVBUF;
1452 	do {
1453 		rval = _t_do_ioctl(fd, ioctlbuf,
1454 		    (int)sizeof (struct T_info_req), TI_SYNC, &retlen);
1455 	} while (rval < 0 && errno == EINTR);
1456 
1457 	if (rval < 0)
1458 		return (-1);
1459 
1460 	/*
1461 	 * This is a "less than" check as "struct ti_sync_ack" returned by
1462 	 * TI_SYNC can grow in size in future kernels. If/when a statically
1463 	 * linked application is run on a future kernel, it should not fail.
1464 	 */
1465 	if (retlen < (int)sizeof (struct ti_sync_ack)) {
1466 		t_errno = TSYSERR;
1467 		errno = EIO;
1468 		return (-1);
1469 	}
1470 	if (tsap->tsa_flags & TSAF_EXP_QUEUED)
1471 		*expedited_queuedp = 1;
1472 	return (0);
1473 }
1474 
1475 /*
1476  * Support functions for use by functions that do scatter/gather
1477  * like t_sndv(), t_rcvv() etc..follow below.
1478  */
1479 
1480 /*
1481  * _t_bytecount_upto_intmax() :
1482  *	    Sum of the lengths of the individual buffers in
1483  *	    the t_iovec array. If the sum exceeds INT_MAX
1484  *	    it is truncated to INT_MAX.
1485  */
1486 unsigned int
1487 _t_bytecount_upto_intmax(const struct t_iovec *tiov, unsigned int tiovcount)
1488 {
1489 	size_t nbytes;
1490 	int i;
1491 
1492 	nbytes = 0;
1493 	for (i = 0; i < tiovcount && nbytes < INT_MAX; i++) {
1494 		if (tiov[i].iov_len >= INT_MAX) {
1495 			nbytes = INT_MAX;
1496 			break;
1497 		}
1498 		nbytes += tiov[i].iov_len;
1499 	}
1500 
1501 	if (nbytes > INT_MAX)
1502 		nbytes = INT_MAX;
1503 
1504 	return ((unsigned int)nbytes);
1505 }
1506 
1507 /*
1508  * Gather the data in the t_iovec buffers, into a single linear buffer
1509  * starting at dataptr. Caller must have allocated sufficient space
1510  * starting at dataptr. The total amount of data that is gathered is
1511  * limited to INT_MAX. Any remaining data in the t_iovec buffers is
1512  * not copied.
1513  */
1514 void
1515 _t_gather(char *dataptr, const struct t_iovec *tiov, unsigned int tiovcount)
1516 {
1517 	char *curptr;
1518 	unsigned int cur_count;
1519 	unsigned int nbytes_remaining;
1520 	int i;
1521 
1522 	curptr = dataptr;
1523 	cur_count = 0;
1524 
1525 	nbytes_remaining = _t_bytecount_upto_intmax(tiov, tiovcount);
1526 	for (i = 0; i < tiovcount && nbytes_remaining != 0; i++) {
1527 		if (tiov[i].iov_len <= nbytes_remaining)
1528 			cur_count = (int)tiov[i].iov_len;
1529 		else
1530 			cur_count = nbytes_remaining;
1531 		(void) memcpy(curptr, tiov[i].iov_base, cur_count);
1532 		curptr += cur_count;
1533 		nbytes_remaining -= cur_count;
1534 	}
1535 }
1536 
1537 /*
1538  * Scatter the data from the single linear buffer at pdatabuf->buf into
1539  * the t_iovec buffers.
1540  */
1541 void
1542 _t_scatter(struct strbuf *pdatabuf, struct t_iovec *tiov, int tiovcount)
1543 {
1544 	char *curptr;
1545 	unsigned int nbytes_remaining;
1546 	unsigned int curlen;
1547 	int i;
1548 
1549 	/*
1550 	 * There cannot be any uncopied data leftover in pdatabuf
1551 	 * at the conclusion of this function. (asserted below)
1552 	 */
1553 	assert(pdatabuf->len <= _t_bytecount_upto_intmax(tiov, tiovcount));
1554 	curptr = pdatabuf->buf;
1555 	nbytes_remaining = pdatabuf->len;
1556 	for (i = 0; i < tiovcount && nbytes_remaining != 0; i++) {
1557 		if (tiov[i].iov_len < nbytes_remaining)
1558 			curlen = (unsigned int)tiov[i].iov_len;
1559 		else
1560 			curlen = nbytes_remaining;
1561 		(void) memcpy(tiov[i].iov_base, curptr, curlen);
1562 		curptr += curlen;
1563 		nbytes_remaining -= curlen;
1564 	}
1565 }
1566 
1567 /*
1568  * Adjust the iovec array, for subsequent use. Examine each element in the
1569  * iovec array,and zero out the iov_len if the buffer was sent fully.
1570  * otherwise the buffer was only partially sent, so adjust both iov_len and
1571  * iov_base.
1572  *
1573  */
1574 void
1575 _t_adjust_iov(int bytes_sent, struct iovec *iov, int *iovcountp)
1576 {
1577 
1578 	int i;
1579 
1580 	for (i = 0; i < *iovcountp && bytes_sent; i++) {
1581 		if (iov[i].iov_len == 0)
1582 			continue;
1583 		if (bytes_sent < iov[i].iov_len)
1584 			break;
1585 		else {
1586 			bytes_sent -= iov[i].iov_len;
1587 			iov[i].iov_len = 0;
1588 		}
1589 	}
1590 	iov[i].iov_len -= bytes_sent;
1591 	iov[i].iov_base += bytes_sent;
1592 }
1593 
1594 /*
1595  * Copy the t_iovec array to the iovec array while taking care to see
1596  * that the sum of the buffer lengths in the result is not more than
1597  * INT_MAX. This function requires that T_IOV_MAX is no larger than
1598  * IOV_MAX. Otherwise the resulting array is not a suitable input to
1599  * writev(). If the sum of the lengths in t_iovec is zero, so is the
1600  * resulting iovec.
1601  */
1602 void
1603 _t_copy_tiov_to_iov(const struct t_iovec *tiov, int tiovcount,
1604     struct iovec *iov, int *iovcountp)
1605 {
1606 	int i;
1607 	unsigned int nbytes_remaining;
1608 
1609 	nbytes_remaining = _t_bytecount_upto_intmax(tiov, tiovcount);
1610 	i = 0;
1611 	do {
1612 		iov[i].iov_base = tiov[i].iov_base;
1613 		if (tiov[i].iov_len > nbytes_remaining)
1614 			iov[i].iov_len = nbytes_remaining;
1615 		else
1616 			iov[i].iov_len  = tiov[i].iov_len;
1617 		nbytes_remaining -= iov[i].iov_len;
1618 		i++;
1619 	} while (nbytes_remaining != 0 && i < tiovcount);
1620 
1621 	*iovcountp = i;
1622 }
1623 
1624 /*
1625  * Routine called after connection establishment on transports where
1626  * connection establishment changes certain transport attributes such as
1627  * TIDU_size
1628  */
1629 int
1630 _t_do_postconn_sync(int fd, struct _ti_user *tiptr)
1631 {
1632 	union {
1633 		struct T_capability_req tc_req;
1634 		struct T_capability_ack tc_ack;
1635 	} ioctl_data;
1636 
1637 	void *ioctlbuf = &ioctl_data;
1638 	int expected_acksize;
1639 	int retlen, rval;
1640 	struct T_capability_req *tc_reqp = (struct T_capability_req *)ioctlbuf;
1641 	struct T_capability_ack *tc_ackp = (struct T_capability_ack *)ioctlbuf;
1642 	struct T_info_ack *tiap;
1643 
1644 	/*
1645 	 * This T_CAPABILITY_REQ should not fail, even if it is unsupported
1646 	 * by the transport provider. timod will emulate it in that case.
1647 	 */
1648 	tc_reqp->PRIM_type = T_CAPABILITY_REQ;
1649 	tc_reqp->CAP_bits1 = TC1_INFO;
1650 	rval = _t_do_ioctl(fd, (char *)ioctlbuf,
1651 	    (int)sizeof (struct T_capability_ack), TI_CAPABILITY, &retlen);
1652 	expected_acksize = (int)sizeof (struct T_capability_ack);
1653 
1654 	if (rval < 0)
1655 		return (-1);
1656 
1657 	/*
1658 	 * T_capability TPI messages are extensible and can grow in future.
1659 	 * However timod will take care of returning no more information
1660 	 * than what was requested, and truncating the "extended"
1661 	 * information towards the end of the T_capability_ack, if necessary.
1662 	 */
1663 	if (retlen != expected_acksize) {
1664 		t_errno = TSYSERR;
1665 		errno = EIO;
1666 		return (-1);
1667 	}
1668 
1669 	/*
1670 	 * The T_info_ack part of the T_capability_ack is guaranteed to be
1671 	 * present only if the corresponding TC1_INFO bit is set
1672 	 */
1673 	if ((tc_ackp->CAP_bits1 & TC1_INFO) == 0) {
1674 		t_errno = TSYSERR;
1675 		errno = EPROTO;
1676 		return (-1);
1677 	}
1678 
1679 	tiap = &tc_ackp->INFO_ack;
1680 	if (tiap->PRIM_type != T_INFO_ACK) {
1681 		t_errno = TSYSERR;
1682 		errno = EPROTO;
1683 		return (-1);
1684 	}
1685 
1686 	/*
1687 	 * Note: Sync with latest information returned in "struct T_info_ack
1688 	 * but we deliberately not sync the state here as user level state
1689 	 * construction here is not required, only update of attributes which
1690 	 * may have changed because of negotations during connection
1691 	 * establsihment
1692 	 */
1693 	assert(tiap->TIDU_size > 0);
1694 	tiptr->ti_maxpsz = tiap->TIDU_size;
1695 	assert(tiap->TSDU_size >= T_INVALID);
1696 	tiptr->ti_tsdusize = tiap->TSDU_size;
1697 	assert(tiap->ETSDU_size >= T_INVALID);
1698 	tiptr->ti_etsdusize = tiap->ETSDU_size;
1699 	assert(tiap->CDATA_size >= T_INVALID);
1700 	tiptr->ti_cdatasize = tiap->CDATA_size;
1701 	assert(tiap->DDATA_size >= T_INVALID);
1702 	tiptr->ti_ddatasize = tiap->DDATA_size;
1703 	tiptr->ti_prov_flag = tiap->PROVIDER_flag;
1704 
1705 	return (0);
1706 }
1707