1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 */
29
30 /*
31 * Multithreaded STREAMS Local Transport Provider.
32 *
33 * OVERVIEW
34 * ========
35 *
36 * This driver provides TLI as well as socket semantics. It provides
37 * connectionless, connection oriented, and connection oriented with orderly
38 * release transports for TLI and sockets. Each transport type has separate name
39 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
40 * this removes any name space conflicts when binding to socket style transport
41 * addresses.
42 *
43 * NOTE: There is one exception: Socket ticots and ticotsord transports share
44 * the same namespace. In fact, sockets always use ticotsord type transport.
45 *
46 * The driver mode is specified during open() by the minor number used for
47 * open.
48 *
49 * The sockets in addition have the following semantic differences:
50 * No support for passing up credentials (TL_SET[U]CRED).
51 *
52 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
53 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
54 * T_OPTDATA_IND.
55 *
56 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
57 * a T_CONN_RES is received from the acceptor. This means that a socket
58 * connect will complete before the peer has called accept.
59 *
60 *
61 * MULTITHREADING
62 * ==============
63 *
64 * The driver does not use STREAMS protection mechanisms. Instead it uses a
65 * generic "serializer" abstraction. Most of the operations are executed behind
66 * the serializer and are, essentially single-threaded. All functions executed
67 * behind the same serializer are strictly serialized. So if one thread calls
68 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
69 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
70 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
71 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
72 * same time.
73 *
74 * Connectionless transport use a single serializer per transport type (one for
75 * TLI and one for sockets. Connection-oriented transports use finer-grained
76 * serializers.
77 *
78 * All COTS-type endpoints start their life with private serializers. During
79 * connection request processing the endpoint serializer is switched to the
80 * listener's serializer and the rest of T_CONN_REQ processing is done on the
81 * listener serializer. During T_CONN_RES processing the eager serializer is
82 * switched from listener to acceptor serializer and after that point all
83 * processing for eager and acceptor happens on this serializer. To avoid races
84 * with endpoint closes while its serializer may be changing closes are blocked
85 * while serializers are manipulated.
86 *
87 * References accounting
88 * ---------------------
89 *
90 * Endpoints are reference counted and freed when the last reference is
91 * dropped. Functions within the serializer may access an endpoint state even
92 * after an endpoint closed. The te_closing being set on the endpoint indicates
93 * that the endpoint entered its close routine.
94 *
95 * One reference is held for each opened endpoint instance. The reference
96 * counter is incremented when the endpoint is linked to another endpoint and
97 * decremented when the link disappears. It is also incremented when the
98 * endpoint is found by the hash table lookup. This increment is atomic with the
99 * lookup itself and happens while the hash table read lock is held.
100 *
101 * Close synchronization
102 * ---------------------
103 *
104 * During close the endpoint as marked as closing using te_closing flag. It is
105 * usually enough to check for te_closing flag since all other state changes
106 * happen after this flag is set and the close entered serializer. Immediately
107 * after setting te_closing flag tl_close() enters serializer and waits until
108 * the callback finishes. This allows all functions called within serializer to
109 * simply check te_closing without any locks.
110 *
111 * Serializer management.
112 * ---------------------
113 *
114 * For COTS transports serializers are created when the endpoint is constructed
115 * and destroyed when the endpoint is destructed. CLTS transports use global
116 * serializers - one for sockets and one for TLI.
117 *
118 * COTS serializers have separate reference counts to deal with several
119 * endpoints sharing the same serializer. There is a subtle problem related to
120 * the serializer destruction. The serializer should never be destroyed by any
121 * function executed inside serializer. This means that close has to wait till
122 * all serializer activity for this endpoint is finished before it can drop the
123 * last reference on the endpoint (which may as well free the serializer). This
124 * is only relevant for COTS transports which manage serializers
125 * dynamically. For CLTS transports close may complete without waiting for all
126 * serializer activity to finish since serializer is only destroyed at driver
127 * detach time.
128 *
129 * COTS endpoints keep track of the number of outstanding requests on the
130 * serializer for the endpoint. The code handling accept() avoids changing
131 * client serializer if it has any pending messages on the serializer and
132 * instead moves acceptor to listener's serializer.
133 *
134 *
135 * Use of hash tables
136 * ------------------
137 *
138 * The driver uses modhash hash table implementation. Each transport uses two
139 * hash tables - one for finding endpoints by acceptor ID and another one for
140 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
141 * pair of hash tables since sockets only use TICOTSORD.
142 *
143 * All hash tables lookups increment a reference count for returned endpoints,
144 * so we may safely check the endpoint state even when the endpoint is removed
145 * from the hash by another thread immediately after it is found.
146 *
147 *
148 * CLOSE processing
149 * ================
150 *
151 * The driver enters serializer twice on close(). The close sequence is the
152 * following:
153 *
154 * 1) Wait until closing is safe (te_closewait becomes zero)
155 * This step is needed to prevent close during serializer switches. In most
156 * cases (close happening after connection establishment) te_closewait is
157 * zero.
158 * 1) Set te_closing.
159 * 2) Call tl_close_ser() within serializer and wait for it to complete.
160 *
161 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
162 * It also needs to clear write-side q_next pointers - this should be done
163 * before qprocsoff().
164 *
165 * This synchronous serializer entry during close is needed to ensure that
166 * the queue is valid everywhere inside the serializer.
167 *
168 * Note that in many cases close will execute tl_close_ser() synchronously,
169 * so it will not wait at all.
170 *
171 * 3) Calls qprocsoff().
172 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
173 * complete (for COTS transports). For CLTS transport there is no wait.
174 *
175 * tl_close_finish_ser() Finishes the close process and wakes up waiting
176 * close if there is any.
177 *
178 * Note that in most cases close will enter te_close_ser_finish()
179 * synchronously and will not wait at all.
180 *
181 *
182 * Flow Control
183 * ============
184 *
185 * The driver implements both read and write side service routines. No one calls
186 * putq() on the read queue. The read side service routine tl_rsrv() is called
187 * when the read side stream is back-enabled. It enters serializer synchronously
188 * (waits till serializer processing is complete). Within serializer it
189 * back-enables all endpoints blocked by the queue for connection-less
190 * transports and enables write side service processing for the peer for
191 * connection-oriented transports.
192 *
193 * Read and write side service routines use special mblk_sized space in the
194 * endpoint structure to enter perimeter.
195 *
196 * Write-side flow control
197 * -----------------------
198 *
199 * Write side flow control is a bit tricky. The driver needs to deal with two
200 * message queues - the explicit STREAMS message queue maintained by
201 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
202 * queues should be synchronized to preserve message ordering and should
203 * maintain a single order determined by the order in which messages enter
204 * tl_wput(). In order to maintain the ordering between these two queues the
205 * STREAMS queue is only manipulated within the serializer, so the ordering is
206 * provided by the serializer.
207 *
208 * Functions called from the tl_wsrv() sometimes may call putbq(). To
209 * immediately stop any further processing of the STREAMS message queues the
210 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
211 * side service processing stops when the flag is set.
212 *
213 * The tl_wsrv() function enters serializer synchronously and waits for it to
214 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
215 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
216 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
217 * always bounded by the amount of messages on the STREAMS queue at the time
218 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
219 * queue from another serialized entry which can't happen in parallel. This
220 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
221 * of it draining forever while writer places new messages on the STREAMS
222 * queue).
223 *
224 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
225 *
226 *
227 * Unix Domain Sockets
228 * ===================
229 *
230 * The driver knows the structure of Unix Domain sockets addresses and treats
231 * them differently from generic TLI addresses. For sockets implicit binds are
232 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
233 * instead of using address length of zero. Explicit binds specify
234 * SOU_MAGIC_EXPLICIT as magic.
235 *
236 * For implicit binds we always use minor number as soua_vp part of the address
237 * and avoid any hash table lookups. This saves two hash tables lookups per
238 * anonymous bind.
239 *
240 * For explicit address we hash the vnode pointer instead of hashing the
241 * full-scale address+zone+length. Hashing by pointer is more efficient then
242 * hashing by the full address.
243 *
244 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
245 * tep structure, so it should be never freed.
246 *
247 * Also for sockets the driver always uses minor number as acceptor id.
248 *
249 * TPI VIOLATIONS
250 * --------------
251 *
252 * This driver violates TPI in several respects for Unix Domain Sockets:
253 *
254 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
255 * is requested and the endpoint is already in use. There is no point in
256 * generating an unused address since this address will be rejected by
257 * sockfs anyway. For implicit binds it always generates a new address
258 * (sets soua_vp to its minor number).
259 *
260 * 2) It always uses minor number as acceptor ID and never uses queue
261 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
262 * message and they do not use the queue pointer.
263 *
264 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
265 * followed by listen(). The listen() should be issued with non-zero
266 * backlog, so sotpi_listen() issues unbind request followed by bind
267 * request to the same address but with a non-zero qlen value. Both
268 * tl_bind() and tl_unbind() require write lock on the hash table to
269 * insert/remove the address. The driver does not remove the address from
270 * the hash for endpoints that are bound to the explicit address and have
271 * backlog of zero. During T_BIND_REQ processing if the address requested
272 * is equal to the address the endpoint already has it updates the backlog
273 * without reinserting the address in the hash table. This optimization
274 * avoids two hash table updates for each listener created. It always
275 * avoids the problem of a "stolen" address when another listener may use
276 * the same address between the unbind and bind and suddenly listen() fails
277 * because address is in use even though the bind() succeeded.
278 *
279 *
280 * CONNECTIONLESS TRANSPORTS
281 * =========================
282 *
283 * Connectionless transports all share the same serializer (one for TLI and one
284 * for Sockets). Functions executing behind serializer can check or modify state
285 * of any endpoint.
286 *
287 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
288 * te_lastep field. The next time X talks to some address A it checks whether A
289 * is the same as Y's address and if it is there is no need to lookup Y. If the
290 * address is different or the state of Y is not appropriate (e.g. closed or not
291 * idle) X does a lookup using tl_find_peer() and caches the new address.
292 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
293 * on the endpoint found.
294 *
295 * During close of endpoint Y it doesn't try to remove itself from other
296 * endpoints caches. They will detect that Y is gone and will search the peer
297 * endpoint again.
298 *
299 * Flow Control Handling.
300 * ----------------------
301 *
302 * Each connectionless endpoint keeps a list of endpoints which are
303 * flow-controlled by its queue. It also keeps a pointer to the queue which
304 * flow-controls itself. Whenever flow control releases for endpoint X it
305 * enables all queues from the list. During close it also back-enables everyone
306 * in the list. If X is flow-controlled when it is closing it removes it from
307 * the peers list.
308 *
309 * DATA STRUCTURES
310 * ===============
311 *
312 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
313 * endpoint state. For connection-oriented transports it has a keeps a list
314 * of pending connections (tl_icon_t). For connectionless transports it keeps a
315 * list of endpoints flow controlled by this one.
316 *
317 * Each transport type is represented by a per-transport data structure
318 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
319 * endpoint address hash tables for each transport. It also contains pointer to
320 * transport serializer for connectionless transports.
321 *
322 * Each endpoint keeps a link to its transport structure, so the code can find
323 * all per-transport information quickly.
324 */
325
326 #include <sys/types.h>
327 #include <sys/inttypes.h>
328 #include <sys/stream.h>
329 #include <sys/stropts.h>
330 #define _SUN_TPI_VERSION 2
331 #include <sys/tihdr.h>
332 #include <sys/strlog.h>
333 #include <sys/debug.h>
334 #include <sys/cred.h>
335 #include <sys/errno.h>
336 #include <sys/kmem.h>
337 #include <sys/id_space.h>
338 #include <sys/modhash.h>
339 #include <sys/mkdev.h>
340 #include <sys/tl.h>
341 #include <sys/stat.h>
342 #include <sys/conf.h>
343 #include <sys/modctl.h>
344 #include <sys/strsun.h>
345 #include <sys/socket.h>
346 #include <sys/socketvar.h>
347 #include <sys/sysmacros.h>
348 #include <sys/xti_xtiopt.h>
349 #include <sys/ddi.h>
350 #include <sys/sunddi.h>
351 #include <sys/zone.h>
352 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
353 #include <inet/optcom.h>
354 #include <sys/strsubr.h>
355 #include <sys/ucred.h>
356 #include <sys/suntpi.h>
357 #include <sys/list.h>
358 #include <sys/serializer.h>
359
360 /*
361 * TBD List
362 * 14 Eliminate state changes through table
363 * 16. AF_UNIX socket options
364 * 17. connect() for ticlts
365 * 18. support for "netstat" to show AF_UNIX plus TLI local
366 * transport connections
367 * 21. sanity check to flushing on sending M_ERROR
368 */
369
370 /*
371 * CONSTANT DECLARATIONS
372 * --------------------
373 */
374
375 /*
376 * Local declarations
377 */
378 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
379
380 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
381 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
382 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
383 /*
384 * Hash tables size.
385 */
386 #define TL_HASH_SIZE 311
387
388 /*
389 * Definitions for module_info
390 */
391 #define TL_ID (104) /* module ID number */
392 #define TL_NAME "tl" /* module name */
393 #define TL_MINPSZ (0) /* min packet size */
394 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
395 #define TL_HIWAT (16*1024) /* hi water mark */
396 #define TL_LOWAT (256) /* lo water mark */
397 /*
398 * Definition of minor numbers/modes for new transport provider modes.
399 * We view the socket use as a separate mode to get a separate name space.
400 */
401 #define TL_TICOTS 0 /* connection oriented transport */
402 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
403 #define TL_TICLTS 2 /* connectionless transport */
404 #define TL_UNUSED 3
405 #define TL_SOCKET 4 /* Socket */
406 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
407 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
408 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
409
410 #define TL_MINOR_MASK 0x7
411 #define TL_MINOR_START (TL_TICLTS + 1)
412
413 /*
414 * LOCAL MACROS
415 */
416 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
417
418 /*
419 * EXTERNAL VARIABLE DECLARATIONS
420 * -----------------------------
421 */
422 /*
423 * state table defined in the OS space.c
424 */
425 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
426
427 /*
428 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
429 */
430 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
431 static int tl_close(queue_t *, int, cred_t *);
432 static void tl_wput(queue_t *, mblk_t *);
433 static void tl_wsrv(queue_t *);
434 static void tl_rsrv(queue_t *);
435
436 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
437 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
438 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439
440
441 /*
442 * GLOBAL DATA STRUCTURES AND VARIABLES
443 * -----------------------------------
444 */
445
446 /*
447 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
448 * For now, we only manage the SO_RECVUCRED option but we also have
449 * harmless dummy options to make things work with some common code we access.
450 */
451 opdes_t tl_opt_arr[] = {
452 /* The SO_TYPE is needed for the hack below */
453 {
454 SO_TYPE,
455 SOL_SOCKET,
456 OA_R,
457 OA_R,
458 OP_NP,
459 0,
460 sizeof (t_scalar_t),
461 0
462 },
463 {
464 SO_RECVUCRED,
465 SOL_SOCKET,
466 OA_RW,
467 OA_RW,
468 OP_NP,
469 0,
470 sizeof (int),
471 0
472 }
473 };
474
475 /*
476 * Table of all supported levels
477 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
478 * any supported options so we need this info separately.
479 *
480 * This is needed only for topmost tpi providers.
481 */
482 optlevel_t tl_valid_levels_arr[] = {
483 XTI_GENERIC,
484 SOL_SOCKET,
485 TL_PROT_LEVEL
486 };
487
488 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
489 /*
490 * Current upper bound on the amount of space needed to return all options.
491 * Additional options with data size of sizeof(long) are handled automatically.
492 * Others need hand job.
493 */
494 #define TL_MAX_OPT_BUF_LEN \
495 ((A_CNT(tl_opt_arr) << 2) + \
496 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
497 + 64 + sizeof (struct T_optmgmt_ack))
498
499 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
500
501 /*
502 * transport addr structure
503 */
504 typedef struct tl_addr {
505 zoneid_t ta_zoneid; /* Zone scope of address */
506 t_scalar_t ta_alen; /* length of abuf */
507 void *ta_abuf; /* the addr itself */
508 } tl_addr_t;
509
510 /*
511 * Refcounted version of serializer.
512 */
513 typedef struct tl_serializer {
514 uint_t ts_refcnt;
515 serializer_t *ts_serializer;
516 } tl_serializer_t;
517
518 /*
519 * Each transport type has a separate state.
520 * Per-transport state.
521 */
522 typedef struct tl_transport_state {
523 char *tr_name;
524 minor_t tr_minor;
525 uint32_t tr_defaddr;
526 mod_hash_t *tr_ai_hash;
527 mod_hash_t *tr_addr_hash;
528 tl_serializer_t *tr_serializer;
529 } tl_transport_state_t;
530
531 #define TL_DFADDR 0x1000
532
533 static tl_transport_state_t tl_transports[] = {
534 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
535 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
536 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
537 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
538 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
539 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
540 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
541 };
542
543 #define TL_MAXTRANSPORT A_CNT(tl_transports)
544
545 struct tl_endpt;
546 typedef struct tl_endpt tl_endpt_t;
547
548 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
549
550 /*
551 * Data structure used to represent pending connects.
552 * Records enough information so that the connecting peer can close
553 * before the connection gets accepted.
554 */
555 typedef struct tl_icon {
556 list_node_t ti_node;
557 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
558 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
559 t_scalar_t ti_seqno; /* Sequence number */
560 } tl_icon_t;
561
562 typedef struct so_ux_addr soux_addr_t;
563 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
564
565 /*
566 * Maximum number of unaccepted connection indications allowed per listener.
567 */
568 #define TL_MAXQLEN 4096
569 int tl_maxqlen = TL_MAXQLEN;
570
571 /*
572 * transport endpoint structure
573 */
574 struct tl_endpt {
575 queue_t *te_rq; /* stream read queue */
576 queue_t *te_wq; /* stream write queue */
577 uint32_t te_refcnt;
578 int32_t te_state; /* TPI state of endpoint */
579 minor_t te_minor; /* minor number */
580 #define te_seqno te_minor
581 uint_t te_flag; /* flag field */
582 boolean_t te_nowsrv;
583 tl_serializer_t *te_ser; /* Serializer to use */
584 #define te_serializer te_ser->ts_serializer
585
586 soux_addr_t te_uxaddr; /* Socket address */
587 #define te_magic te_uxaddr.soua_magic
588 #define te_vp te_uxaddr.soua_vp
589 tl_addr_t te_ap; /* addr bound to this endpt */
590 #define te_zoneid te_ap.ta_zoneid
591 #define te_alen te_ap.ta_alen
592 #define te_abuf te_ap.ta_abuf
593
594 tl_transport_state_t *te_transport;
595 #define te_addrhash te_transport->tr_addr_hash
596 #define te_aihash te_transport->tr_ai_hash
597 #define te_defaddr te_transport->tr_defaddr
598 cred_t *te_credp; /* endpoint user credentials */
599 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
600
601 /*
602 * State specific for connection-oriented and connectionless transports.
603 */
604 union {
605 /* Connection-oriented state. */
606 struct {
607 t_uscalar_t _te_nicon; /* count of conn requests */
608 t_uscalar_t _te_qlen; /* max conn requests */
609 tl_endpt_t *_te_oconp; /* conn request pending */
610 tl_endpt_t *_te_conp; /* connected endpt */
611 #ifndef _ILP32
612 void *_te_pad;
613 #endif
614 list_t _te_iconp; /* list of conn ind. pending */
615 } _te_cots_state;
616 /* Connection-less state. */
617 struct {
618 tl_endpt_t *_te_lastep; /* last dest. endpoint */
619 tl_endpt_t *_te_flowq; /* flow controlled on whom */
620 list_node_t _te_flows; /* lists of connections */
621 list_t _te_flowlist; /* Who flowcontrols on me */
622 } _te_clts_state;
623 } _te_transport_state;
624 #define te_nicon _te_transport_state._te_cots_state._te_nicon
625 #define te_qlen _te_transport_state._te_cots_state._te_qlen
626 #define te_oconp _te_transport_state._te_cots_state._te_oconp
627 #define te_conp _te_transport_state._te_cots_state._te_conp
628 #define te_iconp _te_transport_state._te_cots_state._te_iconp
629 #define te_lastep _te_transport_state._te_clts_state._te_lastep
630 #define te_flowq _te_transport_state._te_clts_state._te_flowq
631 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
632 #define te_flows _te_transport_state._te_clts_state._te_flows
633
634 bufcall_id_t te_bufcid; /* outstanding bufcall id */
635 timeout_id_t te_timoutid; /* outstanding timeout id */
636 pid_t te_cpid; /* cached pid of endpoint */
637 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
638 /*
639 * Pieces of the endpoint state needed for closing.
640 */
641 kmutex_t te_closelock;
642 kcondvar_t te_closecv;
643 uint8_t te_closing; /* The endpoint started closing */
644 uint8_t te_closewait; /* Wait in close until zero */
645 mblk_t te_closemp; /* for entering serializer on close */
646 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
647 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
648 kmutex_t te_srv_lock;
649 kcondvar_t te_srv_cv;
650 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
651 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
652 /*
653 * Pieces of the endpoint state needed for serializer transitions.
654 */
655 kmutex_t te_ser_lock; /* Protects the count below */
656 uint_t te_ser_count; /* Number of messages on serializer */
657 };
658
659 /*
660 * Flag values. Lower 4 bits specify that transport used.
661 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
662 * they allow to identify the endpoint more easily.
663 */
664 #define TL_LISTENER 0x00010 /* the listener endpoint */
665 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
666 #define TL_EAGER 0x00040 /* connecting endpoint */
667 #define TL_ACCEPTED 0x00080 /* accepted connection */
668 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
669 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
670 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
671 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
672 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
673 /*
674 * Boolean checks for the endpoint type.
675 */
676 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
677 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
678 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
679 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
680
681 /*
682 * Certain operations are always used together. These macros reduce the chance
683 * of missing a part of a combination.
684 */
685 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
686 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
687
688 #define TL_PUTBQ(x, mp) { \
689 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
690 (x)->te_nowsrv = B_TRUE; \
691 (void) putbq((x)->te_wq, mp); \
692 }
693
694 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
695 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
696
697 /*
698 * STREAMS driver glue data structures.
699 */
700 static struct module_info tl_minfo = {
701 TL_ID, /* mi_idnum */
702 TL_NAME, /* mi_idname */
703 TL_MINPSZ, /* mi_minpsz */
704 TL_MAXPSZ, /* mi_maxpsz */
705 TL_HIWAT, /* mi_hiwat */
706 TL_LOWAT /* mi_lowat */
707 };
708
709 static struct qinit tl_rinit = {
710 NULL, /* qi_putp */
711 (int (*)())tl_rsrv, /* qi_srvp */
712 tl_open, /* qi_qopen */
713 tl_close, /* qi_qclose */
714 NULL, /* qi_qadmin */
715 &tl_minfo, /* qi_minfo */
716 NULL /* qi_mstat */
717 };
718
719 static struct qinit tl_winit = {
720 (int (*)())tl_wput, /* qi_putp */
721 (int (*)())tl_wsrv, /* qi_srvp */
722 NULL, /* qi_qopen */
723 NULL, /* qi_qclose */
724 NULL, /* qi_qadmin */
725 &tl_minfo, /* qi_minfo */
726 NULL /* qi_mstat */
727 };
728
729 static struct streamtab tlinfo = {
730 &tl_rinit, /* st_rdinit */
731 &tl_winit, /* st_wrinit */
732 NULL, /* st_muxrinit */
733 NULL /* st_muxwrinit */
734 };
735
736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
737 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
738
739 static struct modldrv modldrv = {
740 &mod_driverops, /* Type of module -- pseudo driver here */
741 "TPI Local Transport (tl)",
742 &tl_devops, /* driver ops */
743 };
744
745 /*
746 * Module linkage information for the kernel.
747 */
748 static struct modlinkage modlinkage = {
749 MODREV_1,
750 &modldrv,
751 NULL
752 };
753
754 /*
755 * Templates for response to info request
756 * Check sanity of unlimited connect data etc.
757 */
758
759 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
760 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
761
762 static struct T_info_ack tl_cots_info_ack =
763 {
764 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
765 T_INFINITE, /* TSDU size */
766 T_INFINITE, /* ETSDU size */
767 T_INFINITE, /* CDATA_size */
768 T_INFINITE, /* DDATA_size */
769 T_INFINITE, /* ADDR_size */
770 T_INFINITE, /* OPT_size */
771 0, /* TIDU_size - fill at run time */
772 T_COTS, /* SERV_type */
773 -1, /* CURRENT_state */
774 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
775 };
776
777 static struct T_info_ack tl_clts_info_ack =
778 {
779 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
780 0, /* TSDU_size - fill at run time */
781 -2, /* ETSDU_size -2 => not supported */
782 -2, /* CDATA_size -2 => not supported */
783 -2, /* DDATA_size -2 => not supported */
784 -1, /* ADDR_size -1 => infinite */
785 -1, /* OPT_size */
786 0, /* TIDU_size - fill at run time */
787 T_CLTS, /* SERV_type */
788 -1, /* CURRENT_state */
789 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
790 };
791
792 /*
793 * private copy of devinfo pointer used in tl_info
794 */
795 static dev_info_t *tl_dip;
796
797 /*
798 * Endpoints cache.
799 */
800 static kmem_cache_t *tl_cache;
801 /*
802 * Minor number space.
803 */
804 static id_space_t *tl_minors;
805
806 /*
807 * Default Data Unit size.
808 */
809 static t_scalar_t tl_tidusz;
810
811 /*
812 * Size of hash tables.
813 */
814 static size_t tl_hash_size = TL_HASH_SIZE;
815
816 /*
817 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
818 * for sockets.
819 */
820 static int tl_disable_early_connect = 0;
821 static int tl_client_closing_when_accepting;
822
823 static int tl_serializer_noswitch;
824
825 /*
826 * LOCAL FUNCTION PROTOTYPES
827 * -------------------------
828 */
829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
834 t_scalar_t);
835 static void tl_bind(mblk_t *, tl_endpt_t *);
836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
837 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
838 static void tl_unbind(mblk_t *, tl_endpt_t *);
839 static void tl_optmgmt(queue_t *, mblk_t *);
840 static void tl_conn_req(queue_t *, mblk_t *);
841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
846 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
847 static void tl_info_req(mblk_t *, tl_endpt_t *);
848 static void tl_addr_req(mblk_t *, tl_endpt_t *);
849 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
850 static void tl_data(mblk_t *, tl_endpt_t *);
851 static void tl_exdata(mblk_t *, tl_endpt_t *);
852 static void tl_ordrel(mblk_t *, tl_endpt_t *);
853 static void tl_unitdata(mblk_t *, tl_endpt_t *);
854 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
855 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
856 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
857 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
858 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
859 static void tl_cl_backenable(tl_endpt_t *);
860 static void tl_co_unconnect(tl_endpt_t *);
861 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
862 static void tl_discon_ind(tl_endpt_t *, uint32_t);
863 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
864 static mblk_t *tl_ordrel_ind_alloc(void);
865 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
866 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
867 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
868 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
869 static void tl_icon_freemsgs(mblk_t **);
870 static void tl_merror(queue_t *, mblk_t *, int);
871 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
872 static int tl_default_opt(queue_t *, int, int, uchar_t *);
873 static int tl_get_opt(queue_t *, int, int, uchar_t *);
874 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
875 uchar_t *, void *, cred_t *);
876 static void tl_memrecover(queue_t *, mblk_t *, size_t);
877 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
878 static void tl_free(tl_endpt_t *);
879 static int tl_constructor(void *, void *, int);
880 static void tl_destructor(void *, void *);
881 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
882 static tl_serializer_t *tl_serializer_alloc(int);
883 static void tl_serializer_refhold(tl_serializer_t *);
884 static void tl_serializer_refrele(tl_serializer_t *);
885 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
886 static void tl_serializer_exit(tl_endpt_t *);
887 static boolean_t tl_noclose(tl_endpt_t *);
888 static void tl_closeok(tl_endpt_t *);
889 static void tl_refhold(tl_endpt_t *);
890 static void tl_refrele(tl_endpt_t *);
891 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
892 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
893 static void tl_close_ser(mblk_t *, tl_endpt_t *);
894 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
896 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
897 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
898 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
900 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
901 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
902 static void tl_addr_unbind(tl_endpt_t *);
903
904 /*
905 * Intialize option database object for TL
906 */
907
908 optdb_obj_t tl_opt_obj = {
909 tl_default_opt, /* TL default value function pointer */
910 tl_get_opt, /* TL get function pointer */
911 tl_set_opt, /* TL set function pointer */
912 TL_OPT_ARR_CNT, /* TL option database count of entries */
913 tl_opt_arr, /* TL option database */
914 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
915 tl_valid_levels_arr /* TL valid level array */
916 };
917
918 /*
919 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
920 * ---------------------------------------
921 */
922
923 /*
924 * Loadable module routines
925 */
926 int
_init(void)927 _init(void)
928 {
929 return (mod_install(&modlinkage));
930 }
931
932 int
_fini(void)933 _fini(void)
934 {
935 return (mod_remove(&modlinkage));
936 }
937
938 int
_info(struct modinfo * modinfop)939 _info(struct modinfo *modinfop)
940 {
941 return (mod_info(&modlinkage, modinfop));
942 }
943
944 /*
945 * Driver Entry Points and Other routines
946 */
947 static int
tl_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)948 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
949 {
950 int i;
951 char name[32];
952
953 /*
954 * Resume from a checkpoint state.
955 */
956 if (cmd == DDI_RESUME)
957 return (DDI_SUCCESS);
958
959 if (cmd != DDI_ATTACH)
960 return (DDI_FAILURE);
961
962 /*
963 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
964 * streams message sizes can be unlimited. We use a defined constant
965 * instead.
966 */
967 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
968
969 /*
970 * Create subdevices for each transport.
971 */
972 for (i = 0; i < TL_UNUSED; i++) {
973 if (ddi_create_minor_node(devi,
974 tl_transports[i].tr_name,
975 S_IFCHR, tl_transports[i].tr_minor,
976 DDI_PSEUDO, NULL) == DDI_FAILURE) {
977 ddi_remove_minor_node(devi, NULL);
978 return (DDI_FAILURE);
979 }
980 }
981
982 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
983 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
984
985 if (tl_cache == NULL) {
986 ddi_remove_minor_node(devi, NULL);
987 return (DDI_FAILURE);
988 }
989
990 tl_minors = id_space_create("tl_minor_space",
991 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
992
993 /*
994 * Create ID space for minor numbers
995 */
996 for (i = 0; i < TL_MAXTRANSPORT; i++) {
997 tl_transport_state_t *t = &tl_transports[i];
998
999 if (i == TL_UNUSED)
1000 continue;
1001
1002 /* Socket COTSORD shares namespace with COTS */
1003 if (i == TL_SOCK_COTSORD) {
1004 t->tr_ai_hash =
1005 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1006 ASSERT(t->tr_ai_hash != NULL);
1007 t->tr_addr_hash =
1008 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1009 ASSERT(t->tr_addr_hash != NULL);
1010 continue;
1011 }
1012
1013 /*
1014 * Create hash tables.
1015 */
1016 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1017 t->tr_name);
1018 #ifdef _ILP32
1019 if (i & TL_SOCKET)
1020 t->tr_ai_hash =
1021 mod_hash_create_idhash(name, tl_hash_size - 1,
1022 mod_hash_null_valdtor);
1023 else
1024 t->tr_ai_hash =
1025 mod_hash_create_ptrhash(name, tl_hash_size,
1026 mod_hash_null_valdtor, sizeof (queue_t));
1027 #else
1028 t->tr_ai_hash =
1029 mod_hash_create_idhash(name, tl_hash_size - 1,
1030 mod_hash_null_valdtor);
1031 #endif /* _ILP32 */
1032
1033 if (i & TL_SOCKET) {
1034 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1035 t->tr_name);
1036 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1037 tl_hash_size, mod_hash_null_valdtor,
1038 sizeof (uintptr_t));
1039 } else {
1040 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1041 t->tr_name);
1042 t->tr_addr_hash = mod_hash_create_extended(name,
1043 tl_hash_size, mod_hash_null_keydtor,
1044 mod_hash_null_valdtor,
1045 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1046 }
1047
1048 /* Create serializer for connectionless transports. */
1049 if (i & TL_TICLTS)
1050 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1051 }
1052
1053 tl_dip = devi;
1054
1055 return (DDI_SUCCESS);
1056 }
1057
1058 static int
tl_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)1059 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1060 {
1061 int i;
1062
1063 if (cmd == DDI_SUSPEND)
1064 return (DDI_SUCCESS);
1065
1066 if (cmd != DDI_DETACH)
1067 return (DDI_FAILURE);
1068
1069 /*
1070 * Destroy arenas and hash tables.
1071 */
1072 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1073 tl_transport_state_t *t = &tl_transports[i];
1074
1075 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1076 continue;
1077
1078 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1079 if (t->tr_serializer != NULL) {
1080 tl_serializer_refrele(t->tr_serializer);
1081 t->tr_serializer = NULL;
1082 }
1083
1084 #ifdef _ILP32
1085 if (i & TL_SOCKET)
1086 mod_hash_destroy_idhash(t->tr_ai_hash);
1087 else
1088 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1089 #else
1090 mod_hash_destroy_idhash(t->tr_ai_hash);
1091 #endif /* _ILP32 */
1092 t->tr_ai_hash = NULL;
1093 if (i & TL_SOCKET)
1094 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1095 else
1096 mod_hash_destroy_hash(t->tr_addr_hash);
1097 t->tr_addr_hash = NULL;
1098 }
1099
1100 kmem_cache_destroy(tl_cache);
1101 tl_cache = NULL;
1102 id_space_destroy(tl_minors);
1103 tl_minors = NULL;
1104 ddi_remove_minor_node(devi, NULL);
1105 return (DDI_SUCCESS);
1106 }
1107
1108 /* ARGSUSED */
1109 static int
tl_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1110 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1111 {
1112
1113 int retcode = DDI_FAILURE;
1114
1115 switch (infocmd) {
1116
1117 case DDI_INFO_DEVT2DEVINFO:
1118 if (tl_dip != NULL) {
1119 *result = (void *)tl_dip;
1120 retcode = DDI_SUCCESS;
1121 }
1122 break;
1123
1124 case DDI_INFO_DEVT2INSTANCE:
1125 *result = (void *)0;
1126 retcode = DDI_SUCCESS;
1127 break;
1128
1129 default:
1130 break;
1131 }
1132 return (retcode);
1133 }
1134
1135 /*
1136 * Endpoint reference management.
1137 */
1138 static void
tl_refhold(tl_endpt_t * tep)1139 tl_refhold(tl_endpt_t *tep)
1140 {
1141 atomic_inc_32(&tep->te_refcnt);
1142 }
1143
1144 static void
tl_refrele(tl_endpt_t * tep)1145 tl_refrele(tl_endpt_t *tep)
1146 {
1147 ASSERT(tep->te_refcnt != 0);
1148
1149 if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1150 tl_free(tep);
1151 }
1152
1153 /*ARGSUSED*/
1154 static int
tl_constructor(void * buf,void * cdrarg,int kmflags)1155 tl_constructor(void *buf, void *cdrarg, int kmflags)
1156 {
1157 tl_endpt_t *tep = buf;
1158
1159 bzero(tep, sizeof (tl_endpt_t));
1160 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1161 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1162 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1163 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1164 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1165
1166 return (0);
1167 }
1168
1169 /*ARGSUSED*/
1170 static void
tl_destructor(void * buf,void * cdrarg)1171 tl_destructor(void *buf, void *cdrarg)
1172 {
1173 tl_endpt_t *tep = buf;
1174
1175 mutex_destroy(&tep->te_closelock);
1176 cv_destroy(&tep->te_closecv);
1177 mutex_destroy(&tep->te_srv_lock);
1178 cv_destroy(&tep->te_srv_cv);
1179 mutex_destroy(&tep->te_ser_lock);
1180 }
1181
1182 static void
tl_free(tl_endpt_t * tep)1183 tl_free(tl_endpt_t *tep)
1184 {
1185 ASSERT(tep->te_refcnt == 0);
1186 ASSERT(tep->te_transport != NULL);
1187 ASSERT(tep->te_rq == NULL);
1188 ASSERT(tep->te_wq == NULL);
1189 ASSERT(tep->te_ser != NULL);
1190 ASSERT(tep->te_ser_count == 0);
1191 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1192
1193 if (IS_SOCKET(tep)) {
1194 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1195 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1196 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1197 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1198 } else if (tep->te_abuf != NULL) {
1199 kmem_free(tep->te_abuf, tep->te_alen);
1200 tep->te_alen = -1; /* uninitialized */
1201 tep->te_abuf = NULL;
1202 } else {
1203 ASSERT(tep->te_alen == -1);
1204 }
1205
1206 id_free(tl_minors, tep->te_minor);
1207 ASSERT(tep->te_credp == NULL);
1208
1209 if (tep->te_hash_hndl != NULL)
1210 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1211
1212 if (IS_COTS(tep)) {
1213 TL_REMOVE_PEER(tep->te_conp);
1214 TL_REMOVE_PEER(tep->te_oconp);
1215 tl_serializer_refrele(tep->te_ser);
1216 tep->te_ser = NULL;
1217 ASSERT(tep->te_nicon == 0);
1218 ASSERT(list_head(&tep->te_iconp) == NULL);
1219 } else {
1220 ASSERT(tep->te_lastep == NULL);
1221 ASSERT(list_head(&tep->te_flowlist) == NULL);
1222 ASSERT(tep->te_flowq == NULL);
1223 }
1224
1225 ASSERT(tep->te_bufcid == 0);
1226 ASSERT(tep->te_timoutid == 0);
1227 bzero(&tep->te_ap, sizeof (tep->te_ap));
1228 tep->te_acceptor_id = 0;
1229
1230 ASSERT(tep->te_closewait == 0);
1231 ASSERT(!tep->te_rsrv_active);
1232 ASSERT(!tep->te_wsrv_active);
1233 tep->te_closing = 0;
1234 tep->te_nowsrv = B_FALSE;
1235 tep->te_flag = 0;
1236
1237 kmem_cache_free(tl_cache, tep);
1238 }
1239
1240 /*
1241 * Allocate/free reference-counted wrappers for serializers.
1242 */
1243 static tl_serializer_t *
tl_serializer_alloc(int flags)1244 tl_serializer_alloc(int flags)
1245 {
1246 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1247 serializer_t *ser;
1248
1249 if (s == NULL)
1250 return (NULL);
1251
1252 ser = serializer_create(flags);
1253
1254 if (ser == NULL) {
1255 kmem_free(s, sizeof (tl_serializer_t));
1256 return (NULL);
1257 }
1258
1259 s->ts_refcnt = 1;
1260 s->ts_serializer = ser;
1261 return (s);
1262 }
1263
1264 static void
tl_serializer_refhold(tl_serializer_t * s)1265 tl_serializer_refhold(tl_serializer_t *s)
1266 {
1267 atomic_inc_32(&s->ts_refcnt);
1268 }
1269
1270 static void
tl_serializer_refrele(tl_serializer_t * s)1271 tl_serializer_refrele(tl_serializer_t *s)
1272 {
1273 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1274 serializer_destroy(s->ts_serializer);
1275 kmem_free(s, sizeof (tl_serializer_t));
1276 }
1277 }
1278
1279 /*
1280 * Post a request on the endpoint serializer. For COTS transports keep track of
1281 * the number of pending requests.
1282 */
1283 static void
tl_serializer_enter(tl_endpt_t * tep,tlproc_t tlproc,mblk_t * mp)1284 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1285 {
1286 if (IS_COTS(tep)) {
1287 mutex_enter(&tep->te_ser_lock);
1288 tep->te_ser_count++;
1289 mutex_exit(&tep->te_ser_lock);
1290 }
1291 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1292 }
1293
1294 /*
1295 * Complete processing the request on the serializer. Decrement the counter for
1296 * pending requests for COTS transports.
1297 */
1298 static void
tl_serializer_exit(tl_endpt_t * tep)1299 tl_serializer_exit(tl_endpt_t *tep)
1300 {
1301 if (IS_COTS(tep)) {
1302 mutex_enter(&tep->te_ser_lock);
1303 ASSERT(tep->te_ser_count != 0);
1304 tep->te_ser_count--;
1305 mutex_exit(&tep->te_ser_lock);
1306 }
1307 }
1308
1309 /*
1310 * Hash management functions.
1311 */
1312
1313 /*
1314 * Return TRUE if two addresses are equal, false otherwise.
1315 */
1316 static boolean_t
tl_eqaddr(tl_addr_t * ap1,tl_addr_t * ap2)1317 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1318 {
1319 return ((ap1->ta_alen > 0) &&
1320 (ap1->ta_alen == ap2->ta_alen) &&
1321 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1322 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1323 }
1324
1325 /*
1326 * This function is called whenever an endpoint is found in the hash table.
1327 */
1328 /* ARGSUSED0 */
1329 static void
tl_find_callback(mod_hash_key_t key,mod_hash_val_t val)1330 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1331 {
1332 tl_refhold((tl_endpt_t *)val);
1333 }
1334
1335 /*
1336 * Address hash function.
1337 */
1338 /* ARGSUSED */
1339 static uint_t
tl_hash_by_addr(void * hash_data,mod_hash_key_t key)1340 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1341 {
1342 tl_addr_t *ap = (tl_addr_t *)key;
1343 size_t len = ap->ta_alen;
1344 uchar_t *p = ap->ta_abuf;
1345 uint_t i, g;
1346
1347 ASSERT((len > 0) && (p != NULL));
1348
1349 for (i = ap->ta_zoneid; len -- != 0; p++) {
1350 i = (i << 4) + (*p);
1351 if ((g = (i & 0xf0000000U)) != 0) {
1352 i ^= (g >> 24);
1353 i ^= g;
1354 }
1355 }
1356 return (i);
1357 }
1358
1359 /*
1360 * This function is used by hash lookups. It compares two generic addresses.
1361 */
1362 static int
tl_hash_cmp_addr(mod_hash_key_t key1,mod_hash_key_t key2)1363 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1364 {
1365 #ifdef DEBUG
1366 tl_addr_t *ap1 = (tl_addr_t *)key1;
1367 tl_addr_t *ap2 = (tl_addr_t *)key2;
1368
1369 ASSERT(key1 != NULL);
1370 ASSERT(key2 != NULL);
1371
1372 ASSERT(ap1->ta_abuf != NULL);
1373 ASSERT(ap2->ta_abuf != NULL);
1374 ASSERT(ap1->ta_alen > 0);
1375 ASSERT(ap2->ta_alen > 0);
1376 #endif
1377
1378 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1379 }
1380
1381 /*
1382 * Prevent endpoint from closing if possible.
1383 * Return B_TRUE on success, B_FALSE on failure.
1384 */
1385 static boolean_t
tl_noclose(tl_endpt_t * tep)1386 tl_noclose(tl_endpt_t *tep)
1387 {
1388 boolean_t rc = B_FALSE;
1389
1390 mutex_enter(&tep->te_closelock);
1391 if (! tep->te_closing) {
1392 ASSERT(tep->te_closewait == 0);
1393 tep->te_closewait++;
1394 rc = B_TRUE;
1395 }
1396 mutex_exit(&tep->te_closelock);
1397 return (rc);
1398 }
1399
1400 /*
1401 * Allow endpoint to close if needed.
1402 */
1403 static void
tl_closeok(tl_endpt_t * tep)1404 tl_closeok(tl_endpt_t *tep)
1405 {
1406 ASSERT(tep->te_closewait > 0);
1407 mutex_enter(&tep->te_closelock);
1408 ASSERT(tep->te_closewait == 1);
1409 tep->te_closewait--;
1410 cv_signal(&tep->te_closecv);
1411 mutex_exit(&tep->te_closelock);
1412 }
1413
1414 /*
1415 * STREAMS open entry point.
1416 */
1417 /* ARGSUSED */
1418 static int
tl_open(queue_t * rq,dev_t * devp,int oflag,int sflag,cred_t * credp)1419 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1420 {
1421 tl_endpt_t *tep;
1422 minor_t minor = getminor(*devp);
1423
1424 /*
1425 * Driver is called directly. Both CLONEOPEN and MODOPEN
1426 * are illegal
1427 */
1428 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1429 return (ENXIO);
1430
1431 if (rq->q_ptr != NULL)
1432 return (0);
1433
1434 /* Minor number should specify the mode used for the driver. */
1435 if ((minor >= TL_UNUSED))
1436 return (ENXIO);
1437
1438 if (oflag & SO_SOCKSTR) {
1439 minor |= TL_SOCKET;
1440 }
1441
1442 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1443 tep->te_refcnt = 1;
1444 tep->te_cpid = curproc->p_pid;
1445 rq->q_ptr = WR(rq)->q_ptr = tep;
1446 tep->te_state = TS_UNBND;
1447 tep->te_credp = credp;
1448 crhold(credp);
1449 tep->te_zoneid = getzoneid();
1450
1451 tep->te_flag = minor & TL_MINOR_MASK;
1452 tep->te_transport = &tl_transports[minor];
1453
1454 /* Allocate a unique minor number for this instance. */
1455 tep->te_minor = (minor_t)id_alloc(tl_minors);
1456
1457 /* Reserve hash handle for bind(). */
1458 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1459
1460 /* Transport-specific initialization */
1461 if (IS_COTS(tep)) {
1462 /* Use private serializer */
1463 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1464
1465 /* Create list for pending connections */
1466 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1467 offsetof(tl_icon_t, ti_node));
1468 tep->te_qlen = 0;
1469 tep->te_nicon = 0;
1470 tep->te_oconp = NULL;
1471 tep->te_conp = NULL;
1472 } else {
1473 /* Use shared serializer */
1474 tep->te_ser = tep->te_transport->tr_serializer;
1475 bzero(&tep->te_flows, sizeof (list_node_t));
1476 /* Create list for flow control */
1477 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1478 offsetof(tl_endpt_t, te_flows));
1479 tep->te_flowq = NULL;
1480 tep->te_lastep = NULL;
1481
1482 }
1483
1484 /* Initialize endpoint address */
1485 if (IS_SOCKET(tep)) {
1486 /* Socket-specific address handling. */
1487 tep->te_alen = TL_SOUX_ADDRLEN;
1488 tep->te_abuf = &tep->te_uxaddr;
1489 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1490 tep->te_magic = SOU_MAGIC_IMPLICIT;
1491 } else {
1492 tep->te_alen = -1;
1493 tep->te_abuf = NULL;
1494 }
1495
1496 /* clone the driver */
1497 *devp = makedevice(getmajor(*devp), tep->te_minor);
1498
1499 tep->te_rq = rq;
1500 tep->te_wq = WR(rq);
1501
1502 #ifdef _ILP32
1503 if (IS_SOCKET(tep))
1504 tep->te_acceptor_id = tep->te_minor;
1505 else
1506 tep->te_acceptor_id = (t_uscalar_t)rq;
1507 #else
1508 tep->te_acceptor_id = tep->te_minor;
1509 #endif /* _ILP32 */
1510
1511
1512 qprocson(rq);
1513
1514 /*
1515 * Insert acceptor ID in the hash. The AI hash always sleeps on
1516 * insertion so insertion can't fail.
1517 */
1518 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1519 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1520 (mod_hash_val_t)tep);
1521
1522 return (0);
1523 }
1524
1525 /* ARGSUSED1 */
1526 static int
tl_close(queue_t * rq,int flag,cred_t * credp)1527 tl_close(queue_t *rq, int flag, cred_t *credp)
1528 {
1529 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1530 tl_endpt_t *elp = NULL;
1531 queue_t *wq = tep->te_wq;
1532 int rc;
1533
1534 ASSERT(wq == WR(rq));
1535
1536 /*
1537 * Remove the endpoint from acceptor hash.
1538 */
1539 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1540 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1541 (mod_hash_val_t *)&elp);
1542 ASSERT(rc == 0 && tep == elp);
1543 if ((rc != 0) || (tep != elp)) {
1544 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1545 SL_TRACE|SL_ERROR,
1546 "tl_close:inconsistency in AI hash"));
1547 }
1548
1549 /*
1550 * Wait till close is safe, then mark endpoint as closing.
1551 */
1552 mutex_enter(&tep->te_closelock);
1553 while (tep->te_closewait)
1554 cv_wait(&tep->te_closecv, &tep->te_closelock);
1555 tep->te_closing = B_TRUE;
1556 /*
1557 * Will wait for the serializer part of the close to finish, so set
1558 * te_closewait now.
1559 */
1560 tep->te_closewait = 1;
1561 tep->te_nowsrv = B_FALSE;
1562 mutex_exit(&tep->te_closelock);
1563
1564 /*
1565 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1566 * It is safe because close will wait for tl_close_ser to finish.
1567 */
1568 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1569
1570 /*
1571 * Wait for the first phase of close to complete before qprocsoff().
1572 */
1573 mutex_enter(&tep->te_closelock);
1574 while (tep->te_closewait)
1575 cv_wait(&tep->te_closecv, &tep->te_closelock);
1576 mutex_exit(&tep->te_closelock);
1577
1578 qprocsoff(rq);
1579
1580 if (tep->te_bufcid) {
1581 qunbufcall(rq, tep->te_bufcid);
1582 tep->te_bufcid = 0;
1583 }
1584 if (tep->te_timoutid) {
1585 (void) quntimeout(rq, tep->te_timoutid);
1586 tep->te_timoutid = 0;
1587 }
1588
1589 /*
1590 * Finish close behind serializer.
1591 *
1592 * For a CLTS endpoint increase a refcount and continue close processing
1593 * with serializer protection. This processing may happen asynchronously
1594 * with the completion of tl_close().
1595 *
1596 * Fot a COTS endpoint wait before destroying tep since the serializer
1597 * may go away together with tep and we need to destroy serializer
1598 * outside of serializer context.
1599 */
1600 ASSERT(tep->te_closewait == 0);
1601 if (IS_COTS(tep))
1602 tep->te_closewait = 1;
1603 else
1604 tl_refhold(tep);
1605
1606 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1607
1608 /*
1609 * For connection-oriented transports wait for all serializer activity
1610 * to settle down.
1611 */
1612 if (IS_COTS(tep)) {
1613 mutex_enter(&tep->te_closelock);
1614 while (tep->te_closewait)
1615 cv_wait(&tep->te_closecv, &tep->te_closelock);
1616 mutex_exit(&tep->te_closelock);
1617 }
1618
1619 crfree(tep->te_credp);
1620 tep->te_credp = NULL;
1621 tep->te_wq = NULL;
1622 tl_refrele(tep);
1623 /*
1624 * tep is likely to be destroyed now, so can't reference it any more.
1625 */
1626
1627 rq->q_ptr = wq->q_ptr = NULL;
1628 return (0);
1629 }
1630
1631 /*
1632 * First phase of close processing done behind the serializer.
1633 *
1634 * Do not drop the reference in the end - tl_close() wants this reference to
1635 * stay.
1636 */
1637 /* ARGSUSED0 */
1638 static void
tl_close_ser(mblk_t * mp,tl_endpt_t * tep)1639 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1640 {
1641 ASSERT(tep->te_closing);
1642 ASSERT(tep->te_closewait == 1);
1643 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1644
1645 tep->te_flag |= TL_CLOSE_SER;
1646
1647 /*
1648 * Drain out all messages on queue except for TL_TICOTS where the
1649 * abortive release semantics permit discarding of data on close
1650 */
1651 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1652 tl_wsrv_ser(NULL, tep);
1653 }
1654
1655 /* Remove address from hash table. */
1656 tl_addr_unbind(tep);
1657 /*
1658 * qprocsoff() gets confused when q->q_next is not NULL on the write
1659 * queue of the driver, so clear these before qprocsoff() is called.
1660 * Also clear q_next for the peer since this queue is going away.
1661 */
1662 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1663 tl_endpt_t *peer_tep = tep->te_conp;
1664
1665 tep->te_wq->q_next = NULL;
1666 if ((peer_tep != NULL) && !peer_tep->te_closing)
1667 peer_tep->te_wq->q_next = NULL;
1668 }
1669
1670 tep->te_rq = NULL;
1671
1672 /* wake up tl_close() */
1673 tl_closeok(tep);
1674 tl_serializer_exit(tep);
1675 }
1676
1677 /*
1678 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1679 * the reference for CLTS.
1680 *
1681 * Called from serializer. Should drop reference count for CLTS only.
1682 */
1683 /* ARGSUSED0 */
1684 static void
tl_close_finish_ser(mblk_t * mp,tl_endpt_t * tep)1685 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1686 {
1687 ASSERT(tep->te_closing);
1688 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1689 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1690
1691 tep->te_state = -1; /* Uninitialized */
1692 if (IS_COTS(tep)) {
1693 tl_co_unconnect(tep);
1694 } else {
1695 /* Connectionless specific cleanup */
1696 TL_REMOVE_PEER(tep->te_lastep);
1697 /*
1698 * Backenable anybody that is flow controlled waiting for
1699 * this endpoint.
1700 */
1701 tl_cl_backenable(tep);
1702 if (tep->te_flowq != NULL) {
1703 list_remove(&(tep->te_flowq->te_flowlist), tep);
1704 tep->te_flowq = NULL;
1705 }
1706 }
1707
1708 tl_serializer_exit(tep);
1709 if (IS_COTS(tep))
1710 tl_closeok(tep);
1711 else
1712 tl_refrele(tep);
1713 }
1714
1715 /*
1716 * STREAMS write-side put procedure.
1717 * Enter serializer for most of the processing.
1718 *
1719 * The T_CONN_REQ is processed outside of serializer.
1720 */
1721 static void
tl_wput(queue_t * wq,mblk_t * mp)1722 tl_wput(queue_t *wq, mblk_t *mp)
1723 {
1724 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1725 ssize_t msz = MBLKL(mp);
1726 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1727 tlproc_t *tl_proc = NULL;
1728
1729 switch (DB_TYPE(mp)) {
1730 case M_DATA:
1731 /* Only valid for connection-oriented transports */
1732 if (IS_CLTS(tep)) {
1733 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1734 SL_TRACE|SL_ERROR,
1735 "tl_wput:M_DATA invalid for ticlts driver"));
1736 tl_merror(wq, mp, EPROTO);
1737 return;
1738 }
1739 tl_proc = tl_wput_data_ser;
1740 break;
1741
1742 case M_IOCTL:
1743 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1744 case TL_IOC_CREDOPT:
1745 /* FALLTHROUGH */
1746 case TL_IOC_UCREDOPT:
1747 /*
1748 * Serialize endpoint state change.
1749 */
1750 tl_proc = tl_do_ioctl_ser;
1751 break;
1752
1753 default:
1754 miocnak(wq, mp, 0, EINVAL);
1755 return;
1756 }
1757 break;
1758
1759 case M_FLUSH:
1760 /*
1761 * do canonical M_FLUSH processing
1762 */
1763 if (*mp->b_rptr & FLUSHW) {
1764 flushq(wq, FLUSHALL);
1765 *mp->b_rptr &= ~FLUSHW;
1766 }
1767 if (*mp->b_rptr & FLUSHR) {
1768 flushq(RD(wq), FLUSHALL);
1769 qreply(wq, mp);
1770 } else {
1771 freemsg(mp);
1772 }
1773 return;
1774
1775 case M_PROTO:
1776 if (msz < sizeof (prim->type)) {
1777 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1778 SL_TRACE|SL_ERROR,
1779 "tl_wput:M_PROTO data too short"));
1780 tl_merror(wq, mp, EPROTO);
1781 return;
1782 }
1783 switch (prim->type) {
1784 case T_OPTMGMT_REQ:
1785 case T_SVR4_OPTMGMT_REQ:
1786 /*
1787 * Process TPI option management requests immediately
1788 * in put procedure regardless of in-order processing
1789 * of already queued messages.
1790 * (Note: This driver supports AF_UNIX socket
1791 * implementation. Unless we implement this processing,
1792 * setsockopt() on socket endpoint will block on flow
1793 * controlled endpoints which it should not. That is
1794 * required for successful execution of VSU socket tests
1795 * and is consistent with BSD socket behavior).
1796 */
1797 tl_optmgmt(wq, mp);
1798 return;
1799 case O_T_BIND_REQ:
1800 case T_BIND_REQ:
1801 tl_proc = tl_bind_ser;
1802 break;
1803 case T_CONN_REQ:
1804 if (IS_CLTS(tep)) {
1805 tl_merror(wq, mp, EPROTO);
1806 return;
1807 }
1808 tl_conn_req(wq, mp);
1809 return;
1810 case T_DATA_REQ:
1811 case T_OPTDATA_REQ:
1812 case T_EXDATA_REQ:
1813 case T_ORDREL_REQ:
1814 tl_proc = tl_putq_ser;
1815 break;
1816 case T_UNITDATA_REQ:
1817 if (IS_COTS(tep) ||
1818 (msz < sizeof (struct T_unitdata_req))) {
1819 tl_merror(wq, mp, EPROTO);
1820 return;
1821 }
1822 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1823 tl_proc = tl_unitdata_ser;
1824 } else {
1825 tl_proc = tl_putq_ser;
1826 }
1827 break;
1828 default:
1829 /*
1830 * process in service procedure if message already
1831 * queued (maintain in-order processing)
1832 */
1833 if (wq->q_first != NULL) {
1834 tl_proc = tl_putq_ser;
1835 } else {
1836 tl_proc = tl_wput_ser;
1837 }
1838 break;
1839 }
1840 break;
1841
1842 case M_PCPROTO:
1843 /*
1844 * Check that the message has enough data to figure out TPI
1845 * primitive.
1846 */
1847 if (msz < sizeof (prim->type)) {
1848 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1849 SL_TRACE|SL_ERROR,
1850 "tl_wput:M_PCROTO data too short"));
1851 tl_merror(wq, mp, EPROTO);
1852 return;
1853 }
1854 switch (prim->type) {
1855 case T_CAPABILITY_REQ:
1856 tl_capability_req(mp, tep);
1857 return;
1858 case T_INFO_REQ:
1859 tl_proc = tl_info_req_ser;
1860 break;
1861 case T_ADDR_REQ:
1862 tl_proc = tl_addr_req_ser;
1863 break;
1864
1865 default:
1866 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1867 SL_TRACE|SL_ERROR,
1868 "tl_wput:unknown TPI msg primitive"));
1869 tl_merror(wq, mp, EPROTO);
1870 return;
1871 }
1872 break;
1873 default:
1874 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1875 "tl_wput:default:unexpected Streams message"));
1876 freemsg(mp);
1877 return;
1878 }
1879
1880 /*
1881 * Continue processing via serializer.
1882 */
1883 ASSERT(tl_proc != NULL);
1884 tl_refhold(tep);
1885 tl_serializer_enter(tep, tl_proc, mp);
1886 }
1887
1888 /*
1889 * Place message on the queue while preserving order.
1890 */
1891 static void
tl_putq_ser(mblk_t * mp,tl_endpt_t * tep)1892 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1893 {
1894 if (tep->te_closing) {
1895 tl_wput_ser(mp, tep);
1896 } else {
1897 TL_PUTQ(tep, mp);
1898 tl_serializer_exit(tep);
1899 tl_refrele(tep);
1900 }
1901
1902 }
1903
1904 static void
tl_wput_common_ser(mblk_t * mp,tl_endpt_t * tep)1905 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1906 {
1907 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1908
1909 switch (DB_TYPE(mp)) {
1910 case M_DATA:
1911 tl_data(mp, tep);
1912 break;
1913 case M_PROTO:
1914 tl_do_proto(mp, tep);
1915 break;
1916 default:
1917 freemsg(mp);
1918 break;
1919 }
1920 }
1921
1922 /*
1923 * Write side put procedure called from serializer.
1924 */
1925 static void
tl_wput_ser(mblk_t * mp,tl_endpt_t * tep)1926 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1927 {
1928 tl_wput_common_ser(mp, tep);
1929 tl_serializer_exit(tep);
1930 tl_refrele(tep);
1931 }
1932
1933 /*
1934 * M_DATA processing. Called from serializer.
1935 */
1936 static void
tl_wput_data_ser(mblk_t * mp,tl_endpt_t * tep)1937 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1938 {
1939 tl_endpt_t *peer_tep = tep->te_conp;
1940 queue_t *peer_rq;
1941
1942 ASSERT(DB_TYPE(mp) == M_DATA);
1943 ASSERT(IS_COTS(tep));
1944
1945 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1946
1947 /*
1948 * fastpath for data. Ignore flow control if tep is closing.
1949 */
1950 if ((peer_tep != NULL) &&
1951 !peer_tep->te_closing &&
1952 ((tep->te_state == TS_DATA_XFER) ||
1953 (tep->te_state == TS_WREQ_ORDREL)) &&
1954 (tep->te_wq != NULL) &&
1955 (tep->te_wq->q_first == NULL) &&
1956 ((peer_tep->te_state == TS_DATA_XFER) ||
1957 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1958 ((peer_rq = peer_tep->te_rq) != NULL) &&
1959 (canputnext(peer_rq) || tep->te_closing)) {
1960 putnext(peer_rq, mp);
1961 } else if (tep->te_closing) {
1962 /*
1963 * It is possible that by the time we got here tep started to
1964 * close. If the write queue is not empty, and the state is
1965 * TS_DATA_XFER the data should be delivered in order, so we
1966 * call putq() instead of freeing the data.
1967 */
1968 if ((tep->te_wq != NULL) &&
1969 ((tep->te_state == TS_DATA_XFER) ||
1970 (tep->te_state == TS_WREQ_ORDREL))) {
1971 TL_PUTQ(tep, mp);
1972 } else {
1973 freemsg(mp);
1974 }
1975 } else {
1976 TL_PUTQ(tep, mp);
1977 }
1978
1979 tl_serializer_exit(tep);
1980 tl_refrele(tep);
1981 }
1982
1983 /*
1984 * Write side service routine.
1985 *
1986 * All actual processing happens within serializer which is entered
1987 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1988 * messages that need processing may have arrived, so tl_wsrv repeats until
1989 * queue is empty or te_nowsrv is set.
1990 */
1991 static void
tl_wsrv(queue_t * wq)1992 tl_wsrv(queue_t *wq)
1993 {
1994 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1995
1996 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1997 mutex_enter(&tep->te_srv_lock);
1998 ASSERT(tep->te_wsrv_active == B_FALSE);
1999 tep->te_wsrv_active = B_TRUE;
2000 mutex_exit(&tep->te_srv_lock);
2001
2002 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2003
2004 /*
2005 * Wait for serializer job to complete.
2006 */
2007 mutex_enter(&tep->te_srv_lock);
2008 while (tep->te_wsrv_active) {
2009 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2010 }
2011 cv_signal(&tep->te_srv_cv);
2012 mutex_exit(&tep->te_srv_lock);
2013 }
2014 }
2015
2016 /*
2017 * Serialized write side processing of the STREAMS queue.
2018 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2019 * is NULL.
2020 */
2021 static void
tl_wsrv_ser(mblk_t * ser_mp,tl_endpt_t * tep)2022 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2023 {
2024 mblk_t *mp;
2025 queue_t *wq = tep->te_wq;
2026
2027 ASSERT(wq != NULL);
2028 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2029 tl_wput_common_ser(mp, tep);
2030 }
2031
2032 /*
2033 * Wakeup service routine unless called from close.
2034 * If ser_mp is specified, the caller is tl_wsrv().
2035 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2036 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2037 * be no matching tl_serializer_exit() in this case.
2038 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2039 * waiting on te_srv_cv.
2040 */
2041 if (ser_mp != NULL) {
2042 /*
2043 * We are called from tl_wsrv.
2044 */
2045 mutex_enter(&tep->te_srv_lock);
2046 ASSERT(tep->te_wsrv_active);
2047 tep->te_wsrv_active = B_FALSE;
2048 cv_signal(&tep->te_srv_cv);
2049 mutex_exit(&tep->te_srv_lock);
2050 tl_serializer_exit(tep);
2051 }
2052 }
2053
2054 /*
2055 * Called when the stream is backenabled. Enter serializer and qenable everyone
2056 * flow controlled by tep.
2057 *
2058 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2059 * is possible that two instances of tl_rsrv will be running reusing the same
2060 * rsrv mblk.
2061 */
2062 static void
tl_rsrv(queue_t * rq)2063 tl_rsrv(queue_t *rq)
2064 {
2065 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2066
2067 ASSERT(rq->q_first == NULL);
2068 ASSERT(tep->te_rsrv_active == 0);
2069
2070 tep->te_rsrv_active = B_TRUE;
2071 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2072 /*
2073 * Wait for serializer job to complete.
2074 */
2075 mutex_enter(&tep->te_srv_lock);
2076 while (tep->te_rsrv_active) {
2077 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2078 }
2079 cv_signal(&tep->te_srv_cv);
2080 mutex_exit(&tep->te_srv_lock);
2081 }
2082
2083 /* ARGSUSED */
2084 static void
tl_rsrv_ser(mblk_t * mp,tl_endpt_t * tep)2085 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2086 {
2087 tl_endpt_t *peer_tep;
2088
2089 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2090 tl_cl_backenable(tep);
2091 } else if (
2092 IS_COTS(tep) &&
2093 ((peer_tep = tep->te_conp) != NULL) &&
2094 !peer_tep->te_closing &&
2095 ((tep->te_state == TS_DATA_XFER) ||
2096 (tep->te_state == TS_WIND_ORDREL)||
2097 (tep->te_state == TS_WREQ_ORDREL))) {
2098 TL_QENABLE(peer_tep);
2099 }
2100
2101 /*
2102 * Wakeup read side service routine.
2103 */
2104 mutex_enter(&tep->te_srv_lock);
2105 ASSERT(tep->te_rsrv_active);
2106 tep->te_rsrv_active = B_FALSE;
2107 cv_signal(&tep->te_srv_cv);
2108 mutex_exit(&tep->te_srv_lock);
2109 tl_serializer_exit(tep);
2110 }
2111
2112 /*
2113 * process M_PROTO messages. Always called from serializer.
2114 */
2115 static void
tl_do_proto(mblk_t * mp,tl_endpt_t * tep)2116 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2117 {
2118 ssize_t msz = MBLKL(mp);
2119 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2120
2121 /* Message size was validated by tl_wput(). */
2122 ASSERT(msz >= sizeof (prim->type));
2123
2124 switch (prim->type) {
2125 case T_UNBIND_REQ:
2126 tl_unbind(mp, tep);
2127 break;
2128
2129 case T_ADDR_REQ:
2130 tl_addr_req(mp, tep);
2131 break;
2132
2133 case O_T_CONN_RES:
2134 case T_CONN_RES:
2135 if (IS_CLTS(tep)) {
2136 tl_merror(tep->te_wq, mp, EPROTO);
2137 break;
2138 }
2139 tl_conn_res(mp, tep);
2140 break;
2141
2142 case T_DISCON_REQ:
2143 if (IS_CLTS(tep)) {
2144 tl_merror(tep->te_wq, mp, EPROTO);
2145 break;
2146 }
2147 tl_discon_req(mp, tep);
2148 break;
2149
2150 case T_DATA_REQ:
2151 if (IS_CLTS(tep)) {
2152 tl_merror(tep->te_wq, mp, EPROTO);
2153 break;
2154 }
2155 tl_data(mp, tep);
2156 break;
2157
2158 case T_OPTDATA_REQ:
2159 if (IS_CLTS(tep)) {
2160 tl_merror(tep->te_wq, mp, EPROTO);
2161 break;
2162 }
2163 tl_data(mp, tep);
2164 break;
2165
2166 case T_EXDATA_REQ:
2167 if (IS_CLTS(tep)) {
2168 tl_merror(tep->te_wq, mp, EPROTO);
2169 break;
2170 }
2171 tl_exdata(mp, tep);
2172 break;
2173
2174 case T_ORDREL_REQ:
2175 if (! IS_COTSORD(tep)) {
2176 tl_merror(tep->te_wq, mp, EPROTO);
2177 break;
2178 }
2179 tl_ordrel(mp, tep);
2180 break;
2181
2182 case T_UNITDATA_REQ:
2183 if (IS_COTS(tep)) {
2184 tl_merror(tep->te_wq, mp, EPROTO);
2185 break;
2186 }
2187 tl_unitdata(mp, tep);
2188 break;
2189
2190 default:
2191 tl_merror(tep->te_wq, mp, EPROTO);
2192 break;
2193 }
2194 }
2195
2196 /*
2197 * Process ioctl from serializer.
2198 * This is a wrapper around tl_do_ioctl().
2199 */
2200 static void
tl_do_ioctl_ser(mblk_t * mp,tl_endpt_t * tep)2201 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2202 {
2203 if (! tep->te_closing)
2204 tl_do_ioctl(mp, tep);
2205 else
2206 freemsg(mp);
2207
2208 tl_serializer_exit(tep);
2209 tl_refrele(tep);
2210 }
2211
2212 static void
tl_do_ioctl(mblk_t * mp,tl_endpt_t * tep)2213 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2214 {
2215 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2216 int cmd = iocbp->ioc_cmd;
2217 queue_t *wq = tep->te_wq;
2218 int error;
2219 int thisopt, otheropt;
2220
2221 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2222
2223 switch (cmd) {
2224 case TL_IOC_CREDOPT:
2225 if (cmd == TL_IOC_CREDOPT) {
2226 thisopt = TL_SETCRED;
2227 otheropt = TL_SETUCRED;
2228 } else {
2229 /* FALLTHROUGH */
2230 case TL_IOC_UCREDOPT:
2231 thisopt = TL_SETUCRED;
2232 otheropt = TL_SETCRED;
2233 }
2234 /*
2235 * The credentials passing does not apply to sockets.
2236 * Only one of the cred options can be set at a given time.
2237 */
2238 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2239 miocnak(wq, mp, 0, EINVAL);
2240 return;
2241 }
2242
2243 /*
2244 * Turn on generation of credential options for
2245 * T_conn_req, T_conn_con, T_unidata_ind.
2246 */
2247 error = miocpullup(mp, sizeof (uint32_t));
2248 if (error != 0) {
2249 miocnak(wq, mp, 0, error);
2250 return;
2251 }
2252 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2253 miocnak(wq, mp, 0, EINVAL);
2254 return;
2255 }
2256
2257 if (*(uint32_t *)mp->b_cont->b_rptr)
2258 tep->te_flag |= thisopt;
2259 else
2260 tep->te_flag &= ~thisopt;
2261
2262 miocack(wq, mp, 0, 0);
2263 break;
2264
2265 default:
2266 /* Should not be here */
2267 miocnak(wq, mp, 0, EINVAL);
2268 break;
2269 }
2270 }
2271
2272
2273 /*
2274 * send T_ERROR_ACK
2275 * Note: assumes enough memory or caller passed big enough mp
2276 * - no recovery from allocb failures
2277 */
2278
2279 static void
tl_error_ack(queue_t * wq,mblk_t * mp,t_scalar_t tli_err,t_scalar_t unix_err,t_scalar_t type)2280 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2281 t_scalar_t unix_err, t_scalar_t type)
2282 {
2283 struct T_error_ack *err_ack;
2284 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2285 M_PCPROTO, T_ERROR_ACK);
2286
2287 if (ackmp == NULL) {
2288 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2289 "tl_error_ack:out of mblk memory"));
2290 tl_merror(wq, NULL, ENOSR);
2291 return;
2292 }
2293 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2294 err_ack->ERROR_prim = type;
2295 err_ack->TLI_error = tli_err;
2296 err_ack->UNIX_error = unix_err;
2297
2298 /*
2299 * send error ack message
2300 */
2301 qreply(wq, ackmp);
2302 }
2303
2304
2305
2306 /*
2307 * send T_OK_ACK
2308 * Note: assumes enough memory or caller passed big enough mp
2309 * - no recovery from allocb failures
2310 */
2311 static void
tl_ok_ack(queue_t * wq,mblk_t * mp,t_scalar_t type)2312 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2313 {
2314 struct T_ok_ack *ok_ack;
2315 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2316 M_PCPROTO, T_OK_ACK);
2317
2318 if (ackmp == NULL) {
2319 tl_merror(wq, NULL, ENOMEM);
2320 return;
2321 }
2322
2323 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2324 ok_ack->CORRECT_prim = type;
2325
2326 (void) qreply(wq, ackmp);
2327 }
2328
2329 /*
2330 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2331 * This is a wrapper around tl_bind().
2332 */
2333 static void
tl_bind_ser(mblk_t * mp,tl_endpt_t * tep)2334 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2335 {
2336 if (! tep->te_closing)
2337 tl_bind(mp, tep);
2338 else
2339 freemsg(mp);
2340
2341 tl_serializer_exit(tep);
2342 tl_refrele(tep);
2343 }
2344
2345 /*
2346 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2347 * Assumes that the endpoint is in the unbound.
2348 */
2349 static void
tl_bind(mblk_t * mp,tl_endpt_t * tep)2350 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2351 {
2352 queue_t *wq = tep->te_wq;
2353 struct T_bind_ack *b_ack;
2354 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2355 mblk_t *ackmp, *bamp;
2356 soux_addr_t ux_addr;
2357 t_uscalar_t qlen = 0;
2358 t_scalar_t alen, aoff;
2359 tl_addr_t addr_req;
2360 void *addr_startp;
2361 ssize_t msz = MBLKL(mp), basize;
2362 t_scalar_t tli_err = 0, unix_err = 0;
2363 t_scalar_t save_prim_type = bind->PRIM_type;
2364 t_scalar_t save_state = tep->te_state;
2365
2366 if (tep->te_state != TS_UNBND) {
2367 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2368 SL_TRACE|SL_ERROR,
2369 "tl_wput:bind_request:out of state, state=%d",
2370 tep->te_state));
2371 tli_err = TOUTSTATE;
2372 goto error;
2373 }
2374
2375 if (msz < sizeof (struct T_bind_req)) {
2376 tli_err = TSYSERR; unix_err = EINVAL;
2377 goto error;
2378 }
2379
2380 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2381
2382 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2383 (bind->PRIM_type == T_BIND_REQ));
2384
2385 alen = bind->ADDR_length;
2386 aoff = bind->ADDR_offset;
2387
2388 /* negotiate max conn req pending */
2389 if (IS_COTS(tep)) {
2390 qlen = bind->CONIND_number;
2391 if (qlen > tl_maxqlen)
2392 qlen = tl_maxqlen;
2393 }
2394
2395 /*
2396 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2397 * and bound again.
2398 */
2399 if ((tep->te_hash_hndl == NULL) &&
2400 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2401 mod_hash_reserve_nosleep(tep->te_addrhash,
2402 &tep->te_hash_hndl) != 0) {
2403 tli_err = TSYSERR; unix_err = ENOSR;
2404 goto error;
2405 }
2406
2407 /*
2408 * Verify address correctness.
2409 */
2410 if (IS_SOCKET(tep)) {
2411 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2412
2413 if ((alen != TL_SOUX_ADDRLEN) ||
2414 (aoff < 0) ||
2415 (aoff + alen > msz)) {
2416 (void) (STRLOG(TL_ID, tep->te_minor,
2417 1, SL_TRACE|SL_ERROR,
2418 "tl_bind: invalid socket addr"));
2419 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2420 tli_err = TSYSERR; unix_err = EINVAL;
2421 goto error;
2422 }
2423 /* Copy address from message to local buffer. */
2424 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2425 /*
2426 * Check that we got correct address from sockets
2427 */
2428 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2429 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2430 (void) (STRLOG(TL_ID, tep->te_minor,
2431 1, SL_TRACE|SL_ERROR,
2432 "tl_bind: invalid socket magic"));
2433 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2434 tli_err = TSYSERR; unix_err = EINVAL;
2435 goto error;
2436 }
2437 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2438 (ux_addr.soua_vp != NULL)) {
2439 (void) (STRLOG(TL_ID, tep->te_minor,
2440 1, SL_TRACE|SL_ERROR,
2441 "tl_bind: implicit addr non-empty"));
2442 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2443 tli_err = TSYSERR; unix_err = EINVAL;
2444 goto error;
2445 }
2446 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2447 (ux_addr.soua_vp == NULL)) {
2448 (void) (STRLOG(TL_ID, tep->te_minor,
2449 1, SL_TRACE|SL_ERROR,
2450 "tl_bind: explicit addr empty"));
2451 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2452 tli_err = TSYSERR; unix_err = EINVAL;
2453 goto error;
2454 }
2455 } else {
2456 if ((alen > 0) && ((aoff < 0) ||
2457 ((ssize_t)(aoff + alen) > msz) ||
2458 ((aoff + alen) < 0))) {
2459 (void) (STRLOG(TL_ID, tep->te_minor,
2460 1, SL_TRACE|SL_ERROR,
2461 "tl_bind: invalid message"));
2462 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2463 tli_err = TSYSERR; unix_err = EINVAL;
2464 goto error;
2465 }
2466 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2467 (void) (STRLOG(TL_ID, tep->te_minor,
2468 1, SL_TRACE|SL_ERROR,
2469 "tl_bind: bad addr in message"));
2470 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2471 tli_err = TBADADDR;
2472 goto error;
2473 }
2474 #ifdef DEBUG
2475 /*
2476 * Mild form of ASSERT()ion to detect broken TPI apps.
2477 * if (! assertion)
2478 * log warning;
2479 */
2480 if (! ((alen == 0 && aoff == 0) ||
2481 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2482 (void) (STRLOG(TL_ID, tep->te_minor,
2483 3, SL_TRACE|SL_ERROR,
2484 "tl_bind: addr overlaps TPI message"));
2485 }
2486 #endif
2487 }
2488
2489 /*
2490 * Bind the address provided or allocate one if requested.
2491 * Allow rebinds with a new qlen value.
2492 */
2493 if (IS_SOCKET(tep)) {
2494 /*
2495 * For anonymous requests the te_ap is already set up properly
2496 * so use minor number as an address.
2497 * For explicit requests need to check whether the address is
2498 * already in use.
2499 */
2500 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2501 int rc;
2502
2503 if (tep->te_flag & TL_ADDRHASHED) {
2504 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2505 if (tep->te_vp == ux_addr.soua_vp)
2506 goto skip_addr_bind;
2507 else /* Rebind to a new address. */
2508 tl_addr_unbind(tep);
2509 }
2510 /*
2511 * Insert address in the hash if it is not already
2512 * there. Since we use preallocated handle, the insert
2513 * can fail only if the key is already present.
2514 */
2515 rc = mod_hash_insert_reserve(tep->te_addrhash,
2516 (mod_hash_key_t)ux_addr.soua_vp,
2517 (mod_hash_val_t)tep, tep->te_hash_hndl);
2518
2519 if (rc != 0) {
2520 ASSERT(rc == MH_ERR_DUPLICATE);
2521 /*
2522 * Violate O_T_BIND_REQ semantics and fail with
2523 * TADDRBUSY - sockets will not use any address
2524 * other than supplied one for explicit binds.
2525 */
2526 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2527 SL_TRACE|SL_ERROR,
2528 "tl_bind:requested addr %p is busy",
2529 ux_addr.soua_vp));
2530 tli_err = TADDRBUSY; unix_err = 0;
2531 goto error;
2532 }
2533 tep->te_uxaddr = ux_addr;
2534 tep->te_flag |= TL_ADDRHASHED;
2535 tep->te_hash_hndl = NULL;
2536 }
2537 } else if (alen == 0) {
2538 /*
2539 * assign any free address
2540 */
2541 if (! tl_get_any_addr(tep, NULL)) {
2542 (void) (STRLOG(TL_ID, tep->te_minor,
2543 1, SL_TRACE|SL_ERROR,
2544 "tl_bind:failed to get buffer for any "
2545 "address"));
2546 tli_err = TSYSERR; unix_err = ENOSR;
2547 goto error;
2548 }
2549 } else {
2550 addr_req.ta_alen = alen;
2551 addr_req.ta_abuf = (mp->b_rptr + aoff);
2552 addr_req.ta_zoneid = tep->te_zoneid;
2553
2554 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2555 if (tep->te_abuf == NULL) {
2556 tli_err = TSYSERR; unix_err = ENOSR;
2557 goto error;
2558 }
2559 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2560 tep->te_alen = alen;
2561
2562 if (mod_hash_insert_reserve(tep->te_addrhash,
2563 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2564 tep->te_hash_hndl) != 0) {
2565 if (save_prim_type == T_BIND_REQ) {
2566 /*
2567 * The bind semantics for this primitive
2568 * require a failure if the exact address
2569 * requested is busy
2570 */
2571 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2572 SL_TRACE|SL_ERROR,
2573 "tl_bind:requested addr is busy"));
2574 tli_err = TADDRBUSY; unix_err = 0;
2575 goto error;
2576 }
2577
2578 /*
2579 * O_T_BIND_REQ semantics say if address if requested
2580 * address is busy, bind to any available free address
2581 */
2582 if (! tl_get_any_addr(tep, &addr_req)) {
2583 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2584 SL_TRACE|SL_ERROR,
2585 "tl_bind:unable to get any addr buf"));
2586 tli_err = TSYSERR; unix_err = ENOMEM;
2587 goto error;
2588 }
2589 } else {
2590 tep->te_flag |= TL_ADDRHASHED;
2591 tep->te_hash_hndl = NULL;
2592 }
2593 }
2594
2595 ASSERT(tep->te_alen >= 0);
2596
2597 skip_addr_bind:
2598 /*
2599 * prepare T_BIND_ACK TPI message
2600 */
2601 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2602 bamp = reallocb(mp, basize, 0);
2603 if (bamp == NULL) {
2604 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2605 "tl_wput:tl_bind: allocb failed"));
2606 /*
2607 * roll back state changes
2608 */
2609 tl_addr_unbind(tep);
2610 tep->te_state = TS_UNBND;
2611 tl_memrecover(wq, mp, basize);
2612 return;
2613 }
2614
2615 DB_TYPE(bamp) = M_PCPROTO;
2616 bamp->b_wptr = bamp->b_rptr + basize;
2617 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2618 b_ack->PRIM_type = T_BIND_ACK;
2619 b_ack->CONIND_number = qlen;
2620 b_ack->ADDR_length = tep->te_alen;
2621 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2622 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2623 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2624
2625 if (IS_COTS(tep)) {
2626 tep->te_qlen = qlen;
2627 if (qlen > 0)
2628 tep->te_flag |= TL_LISTENER;
2629 }
2630
2631 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2632 /*
2633 * send T_BIND_ACK message
2634 */
2635 (void) qreply(wq, bamp);
2636 return;
2637
2638 error:
2639 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2640 if (ackmp == NULL) {
2641 /*
2642 * roll back state changes
2643 */
2644 tep->te_state = save_state;
2645 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2646 return;
2647 }
2648 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2649 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2650 }
2651
2652 /*
2653 * Process T_UNBIND_REQ.
2654 * Called from serializer.
2655 */
2656 static void
tl_unbind(mblk_t * mp,tl_endpt_t * tep)2657 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2658 {
2659 queue_t *wq;
2660 mblk_t *ackmp;
2661
2662 if (tep->te_closing) {
2663 freemsg(mp);
2664 return;
2665 }
2666
2667 wq = tep->te_wq;
2668
2669 /*
2670 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2671 * ==> allocate for T_ERROR_ACK (known max)
2672 */
2673 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2674 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2675 return;
2676 }
2677 /*
2678 * memory resources committed
2679 * Note: no message validation. T_UNBIND_REQ message is
2680 * same size as PRIM_type field so already verified earlier.
2681 */
2682
2683 /*
2684 * validate state
2685 */
2686 if (tep->te_state != TS_IDLE) {
2687 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2688 SL_TRACE|SL_ERROR,
2689 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2690 tep->te_state));
2691 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2692 return;
2693 }
2694 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2695
2696 /*
2697 * TPI says on T_UNBIND_REQ:
2698 * send up a M_FLUSH to flush both
2699 * read and write queues
2700 */
2701 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2702
2703 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2704 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2705
2706 /*
2707 * Sockets use bind with qlen==0 followed by bind() to
2708 * the same address with qlen > 0 for listeners.
2709 * We allow rebind with a new qlen value.
2710 */
2711 tl_addr_unbind(tep);
2712 }
2713
2714 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2715 /*
2716 * send T_OK_ACK
2717 */
2718 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2719 }
2720
2721
2722 /*
2723 * Option management code from drv/ip is used here
2724 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2725 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2726 * However, that is what we want as that option is 'unorthodox'
2727 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2728 * and not in T_SVR4_OPTMGMT_REQ/ACK
2729 * Note2: use of optcom_req means this routine is an exception to
2730 * recovery from allocb() failures.
2731 */
2732
2733 static void
tl_optmgmt(queue_t * wq,mblk_t * mp)2734 tl_optmgmt(queue_t *wq, mblk_t *mp)
2735 {
2736 tl_endpt_t *tep;
2737 mblk_t *ackmp;
2738 union T_primitives *prim;
2739 cred_t *cr;
2740
2741 tep = (tl_endpt_t *)wq->q_ptr;
2742 prim = (union T_primitives *)mp->b_rptr;
2743
2744 /*
2745 * All Solaris components should pass a db_credp
2746 * for this TPI message, hence we ASSERT.
2747 * But in case there is some other M_PROTO that looks
2748 * like a TPI message sent by some other kernel
2749 * component, we check and return an error.
2750 */
2751 cr = msg_getcred(mp, NULL);
2752 ASSERT(cr != NULL);
2753 if (cr == NULL) {
2754 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2755 return;
2756 }
2757
2758 /* all states OK for AF_UNIX options ? */
2759 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2760 prim->type == T_SVR4_OPTMGMT_REQ) {
2761 /*
2762 * Broken TLI semantics that options can only be managed
2763 * in TS_IDLE state. Needed for Sparc ABI test suite that
2764 * tests this TLI (mis)feature using this device driver.
2765 */
2766 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2767 SL_TRACE|SL_ERROR,
2768 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2769 tep->te_state));
2770 /*
2771 * preallocate memory for T_ERROR_ACK
2772 */
2773 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2774 if (! ackmp) {
2775 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2776 return;
2777 }
2778
2779 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2780 freemsg(mp);
2781 return;
2782 }
2783
2784 /*
2785 * call common option management routine from drv/ip
2786 */
2787 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2788 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2789 } else {
2790 ASSERT(prim->type == T_OPTMGMT_REQ);
2791 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2792 }
2793 }
2794
2795 /*
2796 * Handle T_conn_req - the driver part of accept().
2797 * If TL_SET[U]CRED generate the credentials options.
2798 * If this is a socket pass through options unmodified.
2799 * For sockets generate the T_CONN_CON here instead of
2800 * waiting for the T_CONN_RES.
2801 */
2802 static void
tl_conn_req(queue_t * wq,mblk_t * mp)2803 tl_conn_req(queue_t *wq, mblk_t *mp)
2804 {
2805 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2806 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2807 ssize_t msz = MBLKL(mp);
2808 t_scalar_t alen, aoff, olen, ooff, err = 0;
2809 tl_endpt_t *peer_tep = NULL;
2810 mblk_t *ackmp;
2811 mblk_t *dimp;
2812 struct T_discon_ind *di;
2813 soux_addr_t ux_addr;
2814 tl_addr_t dst;
2815
2816 ASSERT(IS_COTS(tep));
2817
2818 if (tep->te_closing) {
2819 freemsg(mp);
2820 return;
2821 }
2822
2823 /*
2824 * preallocate memory for:
2825 * 1. max of T_ERROR_ACK and T_OK_ACK
2826 * ==> known max T_ERROR_ACK
2827 * 2. max of T_DISCON_IND and T_CONN_IND
2828 */
2829 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2830 if (! ackmp) {
2831 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2832 return;
2833 }
2834 /*
2835 * memory committed for T_OK_ACK/T_ERROR_ACK now
2836 * will be committed for T_DISCON_IND/T_CONN_IND later
2837 */
2838
2839 if (tep->te_state != TS_IDLE) {
2840 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2841 SL_TRACE|SL_ERROR,
2842 "tl_wput:T_CONN_REQ:out of state, state=%d",
2843 tep->te_state));
2844 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2845 freemsg(mp);
2846 return;
2847 }
2848
2849 /*
2850 * validate the message
2851 * Note: dereference fields in struct inside message only
2852 * after validating the message length.
2853 */
2854 if (msz < sizeof (struct T_conn_req)) {
2855 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2856 "tl_conn_req:invalid message length"));
2857 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2858 freemsg(mp);
2859 return;
2860 }
2861 alen = creq->DEST_length;
2862 aoff = creq->DEST_offset;
2863 olen = creq->OPT_length;
2864 ooff = creq->OPT_offset;
2865 if (olen == 0)
2866 ooff = 0;
2867
2868 if (IS_SOCKET(tep)) {
2869 if ((alen != TL_SOUX_ADDRLEN) ||
2870 (aoff < 0) ||
2871 (aoff + alen > msz) ||
2872 (alen > msz - sizeof (struct T_conn_req))) {
2873 (void) (STRLOG(TL_ID, tep->te_minor,
2874 1, SL_TRACE|SL_ERROR,
2875 "tl_conn_req: invalid socket addr"));
2876 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2877 freemsg(mp);
2878 return;
2879 }
2880 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2881 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2882 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2883 (void) (STRLOG(TL_ID, tep->te_minor,
2884 1, SL_TRACE|SL_ERROR,
2885 "tl_conn_req: invalid socket magic"));
2886 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2887 freemsg(mp);
2888 return;
2889 }
2890 } else {
2891 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2892 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2893 ooff + olen < 0)) ||
2894 olen < 0 || ooff < 0) {
2895 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2896 SL_TRACE|SL_ERROR,
2897 "tl_conn_req:invalid message"));
2898 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2899 freemsg(mp);
2900 return;
2901 }
2902
2903 if (alen <= 0 || aoff < 0 ||
2904 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2905 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2906 SL_TRACE|SL_ERROR,
2907 "tl_conn_req:bad addr in message, "
2908 "alen=%d, msz=%ld",
2909 alen, msz));
2910 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2911 freemsg(mp);
2912 return;
2913 }
2914 #ifdef DEBUG
2915 /*
2916 * Mild form of ASSERT()ion to detect broken TPI apps.
2917 * if (! assertion)
2918 * log warning;
2919 */
2920 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2921 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2922 SL_TRACE|SL_ERROR,
2923 "tl_conn_req: addr overlaps TPI message"));
2924 }
2925 #endif
2926 if (olen) {
2927 /*
2928 * no opts in connect req
2929 * supported in this provider except for sockets.
2930 */
2931 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2932 SL_TRACE|SL_ERROR,
2933 "tl_conn_req:options not supported "
2934 "in message"));
2935 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2936 freemsg(mp);
2937 return;
2938 }
2939 }
2940
2941 /*
2942 * Prevent tep from closing on us.
2943 */
2944 if (! tl_noclose(tep)) {
2945 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2946 "tl_conn_req:endpoint is closing"));
2947 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2948 freemsg(mp);
2949 return;
2950 }
2951
2952 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2953 /*
2954 * get endpoint to connect to
2955 * check that peer with DEST addr is bound to addr
2956 * and has CONIND_number > 0
2957 */
2958 dst.ta_alen = alen;
2959 dst.ta_abuf = mp->b_rptr + aoff;
2960 dst.ta_zoneid = tep->te_zoneid;
2961
2962 /*
2963 * Verify if remote addr is in use
2964 */
2965 peer_tep = (IS_SOCKET(tep) ?
2966 tl_sock_find_peer(tep, &ux_addr) :
2967 tl_find_peer(tep, &dst));
2968
2969 if (peer_tep == NULL) {
2970 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2971 "tl_conn_req:no one at connect address"));
2972 err = ECONNREFUSED;
2973 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2974 /*
2975 * validate that number of incoming connection is
2976 * not to capacity on destination endpoint
2977 */
2978 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2979 "tl_conn_req: qlen overflow connection refused"));
2980 err = ECONNREFUSED;
2981 }
2982
2983 /*
2984 * Send T_DISCON_IND in case of error
2985 */
2986 if (err != 0) {
2987 if (peer_tep != NULL)
2988 tl_refrele(peer_tep);
2989 /* We are still expected to send T_OK_ACK */
2990 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2991 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2992 tl_closeok(tep);
2993 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2994 M_PROTO, T_DISCON_IND);
2995 if (dimp == NULL) {
2996 tl_merror(wq, NULL, ENOSR);
2997 return;
2998 }
2999 di = (struct T_discon_ind *)dimp->b_rptr;
3000 di->DISCON_reason = err;
3001 di->SEQ_number = BADSEQNUM;
3002
3003 tep->te_state = TS_IDLE;
3004 /*
3005 * send T_DISCON_IND message
3006 */
3007 putnext(tep->te_rq, dimp);
3008 return;
3009 }
3010
3011 ASSERT(IS_COTS(peer_tep));
3012
3013 /*
3014 * Found the listener. At this point processing will continue on
3015 * listener serializer. Close of the endpoint should be blocked while we
3016 * switch serializers.
3017 */
3018 tl_serializer_refhold(peer_tep->te_ser);
3019 tl_serializer_refrele(tep->te_ser);
3020 tep->te_ser = peer_tep->te_ser;
3021 ASSERT(tep->te_oconp == NULL);
3022 tep->te_oconp = peer_tep;
3023
3024 /*
3025 * It is safe to close now. Close may continue on listener serializer.
3026 */
3027 tl_closeok(tep);
3028
3029 /*
3030 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3031 * data, so we link mp to ackmp.
3032 */
3033 ackmp->b_cont = mp;
3034 mp = ackmp;
3035
3036 tl_refhold(tep);
3037 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3038 }
3039
3040 /*
3041 * Finish T_CONN_REQ processing on listener serializer.
3042 */
3043 static void
tl_conn_req_ser(mblk_t * mp,tl_endpt_t * tep)3044 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3045 {
3046 queue_t *wq;
3047 tl_endpt_t *peer_tep = tep->te_oconp;
3048 mblk_t *confmp, *cimp, *indmp;
3049 void *opts = NULL;
3050 mblk_t *ackmp = mp;
3051 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3052 struct T_conn_ind *ci;
3053 tl_icon_t *tip;
3054 void *addr_startp;
3055 t_scalar_t olen = creq->OPT_length;
3056 t_scalar_t ooff = creq->OPT_offset;
3057 size_t ci_msz;
3058 size_t size;
3059 cred_t *cr = NULL;
3060 pid_t cpid;
3061
3062 if (tep->te_closing) {
3063 TL_UNCONNECT(tep->te_oconp);
3064 tl_serializer_exit(tep);
3065 tl_refrele(tep);
3066 freemsg(mp);
3067 return;
3068 }
3069
3070 wq = tep->te_wq;
3071 tep->te_flag |= TL_EAGER;
3072
3073 /*
3074 * Extract preallocated ackmp from mp.
3075 */
3076 mp = mp->b_cont;
3077 ackmp->b_cont = NULL;
3078
3079 if (olen == 0)
3080 ooff = 0;
3081
3082 if (peer_tep->te_closing ||
3083 !((peer_tep->te_state == TS_IDLE) ||
3084 (peer_tep->te_state == TS_WRES_CIND))) {
3085 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3086 "tl_conn_req:peer in bad state (%d)",
3087 peer_tep->te_state));
3088 TL_UNCONNECT(tep->te_oconp);
3089 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3090 freemsg(ackmp);
3091 tl_serializer_exit(tep);
3092 tl_refrele(tep);
3093 return;
3094 }
3095
3096 /*
3097 * preallocate now for T_DISCON_IND or T_CONN_IND
3098 */
3099 /*
3100 * calculate length of T_CONN_IND message
3101 */
3102 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3103 cr = msg_getcred(mp, &cpid);
3104 ASSERT(cr != NULL);
3105 if (peer_tep->te_flag & TL_SETCRED) {
3106 ooff = 0;
3107 olen = (t_scalar_t) sizeof (struct opthdr) +
3108 OPTLEN(sizeof (tl_credopt_t));
3109 /* 1 option only */
3110 } else {
3111 ooff = 0;
3112 olen = (t_scalar_t)sizeof (struct opthdr) +
3113 OPTLEN(ucredminsize(cr));
3114 /* 1 option only */
3115 }
3116 }
3117 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3118 ci_msz = T_ALIGN(ci_msz) + olen;
3119 size = max(ci_msz, sizeof (struct T_discon_ind));
3120
3121 /*
3122 * Save options from mp - we'll need them for T_CONN_IND.
3123 */
3124 if (ooff != 0) {
3125 opts = kmem_alloc(olen, KM_NOSLEEP);
3126 if (opts == NULL) {
3127 /*
3128 * roll back state changes
3129 */
3130 tep->te_state = TS_IDLE;
3131 tl_memrecover(wq, mp, size);
3132 freemsg(ackmp);
3133 TL_UNCONNECT(tep->te_oconp);
3134 tl_serializer_exit(tep);
3135 tl_refrele(tep);
3136 return;
3137 }
3138 /* Copy options to a temp buffer */
3139 bcopy(mp->b_rptr + ooff, opts, olen);
3140 }
3141
3142 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3143 /*
3144 * Generate a T_CONN_CON that has the identical address
3145 * (and options) as the T_CONN_REQ.
3146 * NOTE: assumes that the T_conn_req and T_conn_con structures
3147 * are isomorphic.
3148 */
3149 confmp = copyb(mp);
3150 if (! confmp) {
3151 /*
3152 * roll back state changes
3153 */
3154 tep->te_state = TS_IDLE;
3155 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3156 freemsg(ackmp);
3157 if (opts != NULL)
3158 kmem_free(opts, olen);
3159 TL_UNCONNECT(tep->te_oconp);
3160 tl_serializer_exit(tep);
3161 tl_refrele(tep);
3162 return;
3163 }
3164 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3165 T_CONN_CON;
3166 } else {
3167 confmp = NULL;
3168 }
3169 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3170 /*
3171 * roll back state changes
3172 */
3173 tep->te_state = TS_IDLE;
3174 tl_memrecover(wq, mp, size);
3175 freemsg(ackmp);
3176 if (opts != NULL)
3177 kmem_free(opts, olen);
3178 freemsg(confmp);
3179 TL_UNCONNECT(tep->te_oconp);
3180 tl_serializer_exit(tep);
3181 tl_refrele(tep);
3182 return;
3183 }
3184
3185 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3186 if (tip == NULL) {
3187 /*
3188 * roll back state changes
3189 */
3190 tep->te_state = TS_IDLE;
3191 tl_memrecover(wq, indmp, sizeof (*tip));
3192 freemsg(ackmp);
3193 if (opts != NULL)
3194 kmem_free(opts, olen);
3195 freemsg(confmp);
3196 TL_UNCONNECT(tep->te_oconp);
3197 tl_serializer_exit(tep);
3198 tl_refrele(tep);
3199 return;
3200 }
3201 tip->ti_mp = NULL;
3202
3203 /*
3204 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3205 * and tl_icon_t cell.
3206 */
3207
3208 /*
3209 * ack validity of request and send the peer credential in the ACK.
3210 */
3211 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3212
3213 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3214 confmp != NULL) {
3215 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3216 }
3217
3218 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3219
3220 /*
3221 * prepare message to send T_CONN_IND
3222 */
3223 /*
3224 * allocate the message - original data blocks retained
3225 * in the returned mblk
3226 */
3227 cimp = tl_resizemp(indmp, size);
3228 if (! cimp) {
3229 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3230 "tl_conn_req:con_ind:allocb failure"));
3231 tl_merror(wq, indmp, ENOMEM);
3232 TL_UNCONNECT(tep->te_oconp);
3233 tl_serializer_exit(tep);
3234 tl_refrele(tep);
3235 if (opts != NULL)
3236 kmem_free(opts, olen);
3237 freemsg(confmp);
3238 ASSERT(tip->ti_mp == NULL);
3239 kmem_free(tip, sizeof (*tip));
3240 return;
3241 }
3242
3243 DB_TYPE(cimp) = M_PROTO;
3244 ci = (struct T_conn_ind *)cimp->b_rptr;
3245 ci->PRIM_type = T_CONN_IND;
3246 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3247 ci->SRC_length = tep->te_alen;
3248 ci->SEQ_number = tep->te_seqno;
3249
3250 addr_startp = cimp->b_rptr + ci->SRC_offset;
3251 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3252 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3253
3254 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3255 ci->SRC_length);
3256 ci->OPT_length = olen; /* because only 1 option */
3257 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3258 cr, cpid,
3259 peer_tep->te_flag, peer_tep->te_credp);
3260 } else if (ooff != 0) {
3261 /* Copy option from T_CONN_REQ */
3262 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3263 ci->SRC_length);
3264 ci->OPT_length = olen;
3265 ASSERT(opts != NULL);
3266 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3267 } else {
3268 ci->OPT_offset = 0;
3269 ci->OPT_length = 0;
3270 }
3271 if (opts != NULL)
3272 kmem_free(opts, olen);
3273
3274 /*
3275 * register connection request with server peer
3276 * append to list of incoming connections
3277 * increment references for both peer_tep and tep: peer_tep is placed on
3278 * te_oconp and tep is placed on listeners queue.
3279 */
3280 tip->ti_tep = tep;
3281 tip->ti_seqno = tep->te_seqno;
3282 list_insert_tail(&peer_tep->te_iconp, tip);
3283 peer_tep->te_nicon++;
3284
3285 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3286 /*
3287 * send the T_CONN_IND message
3288 */
3289 putnext(peer_tep->te_rq, cimp);
3290
3291 /*
3292 * Send a T_CONN_CON message for sockets.
3293 * Disable the queues until we have reached the correct state!
3294 */
3295 if (confmp != NULL) {
3296 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3297 noenable(wq);
3298 putnext(tep->te_rq, confmp);
3299 }
3300 /*
3301 * Now we need to increment tep reference because tep is referenced by
3302 * server list of pending connections. We also need to decrement
3303 * reference before exiting serializer. Two operations void each other
3304 * so we don't modify reference at all.
3305 */
3306 ASSERT(tep->te_refcnt >= 2);
3307 ASSERT(peer_tep->te_refcnt >= 2);
3308 tl_serializer_exit(tep);
3309 }
3310
3311
3312
3313 /*
3314 * Handle T_conn_res on listener stream. Called on listener serializer.
3315 * tl_conn_req has already generated the T_CONN_CON.
3316 * tl_conn_res is called on listener serializer.
3317 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3318 * Switch eager serializer to acceptor's.
3319 *
3320 * If TL_SET[U]CRED generate the credentials options.
3321 * For sockets tl_conn_req has already generated the T_CONN_CON.
3322 */
3323 static void
tl_conn_res(mblk_t * mp,tl_endpt_t * tep)3324 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3325 {
3326 queue_t *wq;
3327 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3328 ssize_t msz = MBLKL(mp);
3329 t_scalar_t olen, ooff, err = 0;
3330 t_scalar_t prim = cres->PRIM_type;
3331 uchar_t *addr_startp;
3332 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3333 tl_icon_t *tip;
3334 size_t size;
3335 mblk_t *ackmp, *respmp;
3336 mblk_t *dimp, *ccmp = NULL;
3337 struct T_discon_ind *di;
3338 struct T_conn_con *cc;
3339 boolean_t client_noclose_set = B_FALSE;
3340 boolean_t switch_client_serializer = B_TRUE;
3341
3342 ASSERT(IS_COTS(tep));
3343
3344 if (tep->te_closing) {
3345 freemsg(mp);
3346 return;
3347 }
3348
3349 wq = tep->te_wq;
3350
3351 /*
3352 * preallocate memory for:
3353 * 1. max of T_ERROR_ACK and T_OK_ACK
3354 * ==> known max T_ERROR_ACK
3355 * 2. max of T_DISCON_IND and T_CONN_CON
3356 */
3357 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3358 if (! ackmp) {
3359 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3360 return;
3361 }
3362 /*
3363 * memory committed for T_OK_ACK/T_ERROR_ACK now
3364 * will be committed for T_DISCON_IND/T_CONN_CON later
3365 */
3366
3367
3368 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3369
3370 /*
3371 * validate state
3372 */
3373 if (tep->te_state != TS_WRES_CIND) {
3374 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3375 SL_TRACE|SL_ERROR,
3376 "tl_wput:T_CONN_RES:out of state, state=%d",
3377 tep->te_state));
3378 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3379 freemsg(mp);
3380 return;
3381 }
3382
3383 /*
3384 * validate the message
3385 * Note: dereference fields in struct inside message only
3386 * after validating the message length.
3387 */
3388 if (msz < sizeof (struct T_conn_res)) {
3389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3390 "tl_conn_res:invalid message length"));
3391 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3392 freemsg(mp);
3393 return;
3394 }
3395 olen = cres->OPT_length;
3396 ooff = cres->OPT_offset;
3397 if (((olen > 0) && ((ooff + olen) > msz))) {
3398 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3399 "tl_conn_res:invalid message"));
3400 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3401 freemsg(mp);
3402 return;
3403 }
3404 if (olen) {
3405 /*
3406 * no opts in connect res
3407 * supported in this provider
3408 */
3409 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3410 "tl_conn_res:options not supported in message"));
3411 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3412 freemsg(mp);
3413 return;
3414 }
3415
3416 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3417 ASSERT(tep->te_state == TS_WACK_CRES);
3418
3419 if (cres->SEQ_number < TL_MINOR_START &&
3420 cres->SEQ_number >= BADSEQNUM) {
3421 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3422 "tl_conn_res:remote endpoint sequence number bad"));
3423 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3424 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3425 freemsg(mp);
3426 return;
3427 }
3428
3429 /*
3430 * find accepting endpoint. Will have extra reference if found.
3431 */
3432 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3433 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3434 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3435 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3436 "tl_conn_res:bad accepting endpoint"));
3437 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3438 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3439 freemsg(mp);
3440 return;
3441 }
3442
3443 /*
3444 * Prevent acceptor from closing.
3445 */
3446 if (! tl_noclose(acc_ep)) {
3447 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3448 "tl_conn_res:bad accepting endpoint"));
3449 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3450 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3451 tl_refrele(acc_ep);
3452 freemsg(mp);
3453 return;
3454 }
3455
3456 acc_ep->te_flag |= TL_ACCEPTOR;
3457
3458 /*
3459 * validate that accepting endpoint, if different from listening
3460 * has address bound => state is TS_IDLE
3461 * TROUBLE in XPG4 !!?
3462 */
3463 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3464 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3465 "tl_conn_res:accepting endpoint has no address bound,"
3466 "state=%d", acc_ep->te_state));
3467 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3468 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3469 freemsg(mp);
3470 tl_closeok(acc_ep);
3471 tl_refrele(acc_ep);
3472 return;
3473 }
3474
3475 /*
3476 * validate if accepting endpt same as listening, then
3477 * no other incoming connection should be on the queue
3478 */
3479
3480 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3481 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3482 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3483 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3484 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3485 freemsg(mp);
3486 tl_closeok(acc_ep);
3487 tl_refrele(acc_ep);
3488 return;
3489 }
3490
3491 /*
3492 * Mark for deletion, the entry corresponding to client
3493 * on list of pending connections made by the listener
3494 * search list to see if client is one of the
3495 * recorded as a listener.
3496 */
3497 tip = tl_icon_find(tep, cres->SEQ_number);
3498 if (tip == NULL) {
3499 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3500 "tl_conn_res:no client in listener list"));
3501 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3502 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3503 freemsg(mp);
3504 tl_closeok(acc_ep);
3505 tl_refrele(acc_ep);
3506 return;
3507 }
3508
3509 /*
3510 * If ti_tep is NULL the client has already closed. In this case
3511 * the code below will avoid any action on the client side
3512 * but complete the server and acceptor state transitions.
3513 */
3514 ASSERT(tip->ti_tep == NULL ||
3515 tip->ti_tep->te_seqno == cres->SEQ_number);
3516 cl_ep = tip->ti_tep;
3517
3518 /*
3519 * If the client is present it is switched from listener's to acceptor's
3520 * serializer. We should block client closes while serializers are
3521 * being switched.
3522 *
3523 * It is possible that the client is present but is currently being
3524 * closed. There are two possible cases:
3525 *
3526 * 1) The client has already entered tl_close_finish_ser() and sent
3527 * T_ORDREL_IND. In this case we can just ignore the client (but we
3528 * still need to send all messages from tip->ti_mp to the acceptor).
3529 *
3530 * 2) The client started the close but has not entered
3531 * tl_close_finish_ser() yet. In this case, the client is already
3532 * proceeding asynchronously on the listener's serializer, so we're
3533 * forced to change the acceptor to use the listener's serializer to
3534 * ensure that any operations on the acceptor are serialized with
3535 * respect to the close that's in-progress.
3536 */
3537 if (cl_ep != NULL) {
3538 if (tl_noclose(cl_ep)) {
3539 client_noclose_set = B_TRUE;
3540 } else {
3541 /*
3542 * Client is closing. If it it has sent the
3543 * T_ORDREL_IND, we can simply ignore it - otherwise,
3544 * we have to let let the client continue until it is
3545 * sent.
3546 *
3547 * If we do continue using the client, acceptor will
3548 * switch to client's serializer which is used by client
3549 * for its close.
3550 */
3551 tl_client_closing_when_accepting++;
3552 switch_client_serializer = B_FALSE;
3553 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3554 cl_ep->te_state == -1)
3555 cl_ep = NULL;
3556 }
3557 }
3558
3559 if (cl_ep != NULL) {
3560 /*
3561 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3562 * (latter for sockets only)
3563 */
3564 if (cl_ep->te_state != TS_WCON_CREQ &&
3565 (cl_ep->te_state != TS_DATA_XFER &&
3566 IS_SOCKET(cl_ep))) {
3567 err = ECONNREFUSED;
3568 /*
3569 * T_DISCON_IND sent later after committing memory
3570 * and acking validity of request
3571 */
3572 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3573 "tl_conn_res:peer in bad state"));
3574 }
3575
3576 /*
3577 * preallocate now for T_DISCON_IND or T_CONN_CONN
3578 * ack validity of request (T_OK_ACK) after memory committed
3579 */
3580
3581 if (err)
3582 size = sizeof (struct T_discon_ind);
3583 else {
3584 /*
3585 * calculate length of T_CONN_CON message
3586 */
3587 olen = 0;
3588 if (cl_ep->te_flag & TL_SETCRED) {
3589 olen = (t_scalar_t)sizeof (struct opthdr) +
3590 OPTLEN(sizeof (tl_credopt_t));
3591 } else if (cl_ep->te_flag & TL_SETUCRED) {
3592 olen = (t_scalar_t)sizeof (struct opthdr) +
3593 OPTLEN(ucredminsize(acc_ep->te_credp));
3594 }
3595 size = T_ALIGN(sizeof (struct T_conn_con) +
3596 acc_ep->te_alen) + olen;
3597 }
3598 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3599 /*
3600 * roll back state changes
3601 */
3602 tep->te_state = TS_WRES_CIND;
3603 tl_memrecover(wq, mp, size);
3604 freemsg(ackmp);
3605 if (client_noclose_set)
3606 tl_closeok(cl_ep);
3607 tl_closeok(acc_ep);
3608 tl_refrele(acc_ep);
3609 return;
3610 }
3611 mp = NULL;
3612 }
3613
3614 /*
3615 * Now ack validity of request
3616 */
3617 if (tep->te_nicon == 1) {
3618 if (tep == acc_ep)
3619 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3620 else
3621 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3622 } else
3623 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3624
3625 /*
3626 * send T_DISCON_IND now if client state validation failed earlier
3627 */
3628 if (err) {
3629 tl_ok_ack(wq, ackmp, prim);
3630 /*
3631 * flush the queues - why always ?
3632 */
3633 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3634
3635 dimp = tl_resizemp(respmp, size);
3636 if (! dimp) {
3637 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3638 SL_TRACE|SL_ERROR,
3639 "tl_conn_res:con_ind:allocb failure"));
3640 tl_merror(wq, respmp, ENOMEM);
3641 tl_closeok(acc_ep);
3642 if (client_noclose_set)
3643 tl_closeok(cl_ep);
3644 tl_refrele(acc_ep);
3645 return;
3646 }
3647 if (dimp->b_cont) {
3648 /* no user data in provider generated discon ind */
3649 freemsg(dimp->b_cont);
3650 dimp->b_cont = NULL;
3651 }
3652
3653 DB_TYPE(dimp) = M_PROTO;
3654 di = (struct T_discon_ind *)dimp->b_rptr;
3655 di->PRIM_type = T_DISCON_IND;
3656 di->DISCON_reason = err;
3657 di->SEQ_number = BADSEQNUM;
3658
3659 tep->te_state = TS_IDLE;
3660 /*
3661 * send T_DISCON_IND message
3662 */
3663 putnext(acc_ep->te_rq, dimp);
3664 if (client_noclose_set)
3665 tl_closeok(cl_ep);
3666 tl_closeok(acc_ep);
3667 tl_refrele(acc_ep);
3668 return;
3669 }
3670
3671 /*
3672 * now start connecting the accepting endpoint
3673 */
3674 if (tep != acc_ep)
3675 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3676
3677 if (cl_ep == NULL) {
3678 /*
3679 * The client has already closed. Send up any queued messages
3680 * and change the state accordingly.
3681 */
3682 tl_ok_ack(wq, ackmp, prim);
3683 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3684
3685 /*
3686 * remove endpoint from incoming connection
3687 * delete client from list of incoming connections
3688 */
3689 tl_freetip(tep, tip);
3690 freemsg(mp);
3691 tl_closeok(acc_ep);
3692 tl_refrele(acc_ep);
3693 return;
3694 } else if (tip->ti_mp != NULL) {
3695 /*
3696 * The client could have queued a T_DISCON_IND which needs
3697 * to be sent up.
3698 * Note that t_discon_req can not operate the same as
3699 * t_data_req since it is not possible for it to putbq
3700 * the message and return -1 due to the use of qwriter.
3701 */
3702 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3703 }
3704
3705 /*
3706 * prepare connect confirm T_CONN_CON message
3707 */
3708
3709 /*
3710 * allocate the message - original data blocks
3711 * retained in the returned mblk
3712 */
3713 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3714 ccmp = tl_resizemp(respmp, size);
3715 if (ccmp == NULL) {
3716 tl_ok_ack(wq, ackmp, prim);
3717 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3718 SL_TRACE|SL_ERROR,
3719 "tl_conn_res:conn_con:allocb failure"));
3720 tl_merror(wq, respmp, ENOMEM);
3721 tl_closeok(acc_ep);
3722 if (client_noclose_set)
3723 tl_closeok(cl_ep);
3724 tl_refrele(acc_ep);
3725 return;
3726 }
3727
3728 DB_TYPE(ccmp) = M_PROTO;
3729 cc = (struct T_conn_con *)ccmp->b_rptr;
3730 cc->PRIM_type = T_CONN_CON;
3731 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3732 cc->RES_length = acc_ep->te_alen;
3733 addr_startp = ccmp->b_rptr + cc->RES_offset;
3734 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3735 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3736 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3737 cc->RES_length);
3738 cc->OPT_length = olen;
3739 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3740 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3741 cl_ep->te_credp);
3742 } else {
3743 cc->OPT_offset = 0;
3744 cc->OPT_length = 0;
3745 }
3746 /*
3747 * Forward the credential in the packet so it can be picked up
3748 * at the higher layers for more complete credential processing
3749 */
3750 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3751 } else {
3752 freemsg(respmp);
3753 respmp = NULL;
3754 }
3755
3756 /*
3757 * make connection linking
3758 * accepting and client endpoints
3759 * No need to increment references:
3760 * on client: it should already have one from tip->ti_tep linkage.
3761 * on acceptor is should already have one from the table lookup.
3762 *
3763 * At this point both client and acceptor can't close. Set client
3764 * serializer to acceptor's.
3765 */
3766 ASSERT(cl_ep->te_refcnt >= 2);
3767 ASSERT(acc_ep->te_refcnt >= 2);
3768 ASSERT(cl_ep->te_conp == NULL);
3769 ASSERT(acc_ep->te_conp == NULL);
3770 cl_ep->te_conp = acc_ep;
3771 acc_ep->te_conp = cl_ep;
3772 ASSERT(cl_ep->te_ser == tep->te_ser);
3773 if (switch_client_serializer) {
3774 mutex_enter(&cl_ep->te_ser_lock);
3775 if (cl_ep->te_ser_count > 0) {
3776 switch_client_serializer = B_FALSE;
3777 tl_serializer_noswitch++;
3778 } else {
3779 /*
3780 * Move client to the acceptor's serializer.
3781 */
3782 tl_serializer_refhold(acc_ep->te_ser);
3783 tl_serializer_refrele(cl_ep->te_ser);
3784 cl_ep->te_ser = acc_ep->te_ser;
3785 }
3786 mutex_exit(&cl_ep->te_ser_lock);
3787 }
3788 if (!switch_client_serializer) {
3789 /*
3790 * It is not possible to switch client to use acceptor's.
3791 * Move acceptor to client's serializer (which is the same as
3792 * listener's).
3793 */
3794 tl_serializer_refhold(cl_ep->te_ser);
3795 tl_serializer_refrele(acc_ep->te_ser);
3796 acc_ep->te_ser = cl_ep->te_ser;
3797 }
3798
3799 TL_REMOVE_PEER(cl_ep->te_oconp);
3800 TL_REMOVE_PEER(acc_ep->te_oconp);
3801
3802 /*
3803 * remove endpoint from incoming connection
3804 * delete client from list of incoming connections
3805 */
3806 tip->ti_tep = NULL;
3807 tl_freetip(tep, tip);
3808 tl_ok_ack(wq, ackmp, prim);
3809
3810 /*
3811 * data blocks already linked in reallocb()
3812 */
3813
3814 /*
3815 * link queues so that I_SENDFD will work
3816 */
3817 if (! IS_SOCKET(tep)) {
3818 acc_ep->te_wq->q_next = cl_ep->te_rq;
3819 cl_ep->te_wq->q_next = acc_ep->te_rq;
3820 }
3821
3822 /*
3823 * send T_CONN_CON up on client side unless it was already
3824 * done (for a socket). In cases any data or ordrel req has been
3825 * queued make sure that the service procedure runs.
3826 */
3827 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3828 enableok(cl_ep->te_wq);
3829 TL_QENABLE(cl_ep);
3830 if (ccmp != NULL)
3831 freemsg(ccmp);
3832 } else {
3833 /*
3834 * change client state on TE_CONN_CON event
3835 */
3836 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3837 putnext(cl_ep->te_rq, ccmp);
3838 }
3839
3840 /* Mark the both endpoints as accepted */
3841 cl_ep->te_flag |= TL_ACCEPTED;
3842 acc_ep->te_flag |= TL_ACCEPTED;
3843
3844 /*
3845 * Allow client and acceptor to close.
3846 */
3847 tl_closeok(acc_ep);
3848 if (client_noclose_set)
3849 tl_closeok(cl_ep);
3850 }
3851
3852
3853
3854
3855 static void
tl_discon_req(mblk_t * mp,tl_endpt_t * tep)3856 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3857 {
3858 queue_t *wq;
3859 struct T_discon_req *dr;
3860 ssize_t msz;
3861 tl_endpt_t *peer_tep = tep->te_conp;
3862 tl_endpt_t *srv_tep = tep->te_oconp;
3863 tl_icon_t *tip;
3864 size_t size;
3865 mblk_t *ackmp, *dimp, *respmp;
3866 struct T_discon_ind *di;
3867 t_scalar_t save_state, new_state;
3868
3869 if (tep->te_closing) {
3870 freemsg(mp);
3871 return;
3872 }
3873
3874 if ((peer_tep != NULL) && peer_tep->te_closing) {
3875 TL_UNCONNECT(tep->te_conp);
3876 peer_tep = NULL;
3877 }
3878 if ((srv_tep != NULL) && srv_tep->te_closing) {
3879 TL_UNCONNECT(tep->te_oconp);
3880 srv_tep = NULL;
3881 }
3882
3883 wq = tep->te_wq;
3884
3885 /*
3886 * preallocate memory for:
3887 * 1. max of T_ERROR_ACK and T_OK_ACK
3888 * ==> known max T_ERROR_ACK
3889 * 2. for T_DISCON_IND
3890 */
3891 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3892 if (! ackmp) {
3893 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3894 return;
3895 }
3896 /*
3897 * memory committed for T_OK_ACK/T_ERROR_ACK now
3898 * will be committed for T_DISCON_IND later
3899 */
3900
3901 dr = (struct T_discon_req *)mp->b_rptr;
3902 msz = MBLKL(mp);
3903
3904 /*
3905 * validate the state
3906 */
3907 save_state = new_state = tep->te_state;
3908 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3909 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3910 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3911 SL_TRACE|SL_ERROR,
3912 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3913 tep->te_state));
3914 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3915 freemsg(mp);
3916 return;
3917 }
3918 /*
3919 * Defer committing the state change until it is determined if
3920 * the message will be queued with the tl_icon or not.
3921 */
3922 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3923
3924 /* validate the message */
3925 if (msz < sizeof (struct T_discon_req)) {
3926 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3927 "tl_discon_req:invalid message"));
3928 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3929 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3930 freemsg(mp);
3931 return;
3932 }
3933
3934 /*
3935 * if server, then validate that client exists
3936 * by connection sequence number etc.
3937 */
3938 if (tep->te_nicon > 0) { /* server */
3939
3940 /*
3941 * search server list for disconnect client
3942 */
3943 tip = tl_icon_find(tep, dr->SEQ_number);
3944 if (tip == NULL) {
3945 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3946 SL_TRACE|SL_ERROR,
3947 "tl_discon_req:no disconnect endpoint"));
3948 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3949 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3950 freemsg(mp);
3951 return;
3952 }
3953 /*
3954 * If ti_tep is NULL the client has already closed. In this case
3955 * the code below will avoid any action on the client side.
3956 */
3957
3958 IMPLY(tip->ti_tep != NULL,
3959 tip->ti_tep->te_seqno == dr->SEQ_number);
3960 peer_tep = tip->ti_tep;
3961 }
3962
3963 /*
3964 * preallocate now for T_DISCON_IND
3965 * ack validity of request (T_OK_ACK) after memory committed
3966 */
3967 size = sizeof (struct T_discon_ind);
3968 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3969 tl_memrecover(wq, mp, size);
3970 freemsg(ackmp);
3971 return;
3972 }
3973
3974 /*
3975 * prepare message to ack validity of request
3976 */
3977 if (tep->te_nicon == 0)
3978 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3979 else
3980 if (tep->te_nicon == 1)
3981 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3982 else
3983 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3984
3985 /*
3986 * Flushing queues according to TPI. Using the old state.
3987 */
3988 if ((tep->te_nicon <= 1) &&
3989 ((save_state == TS_DATA_XFER) ||
3990 (save_state == TS_WIND_ORDREL) ||
3991 (save_state == TS_WREQ_ORDREL)))
3992 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3993
3994 /* send T_OK_ACK up */
3995 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3996
3997 /*
3998 * now do disconnect business
3999 */
4000 if (tep->te_nicon > 0) { /* listener */
4001 if (peer_tep != NULL && !peer_tep->te_closing) {
4002 /*
4003 * disconnect incoming connect request pending to tep
4004 */
4005 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4006 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4007 SL_TRACE|SL_ERROR,
4008 "tl_discon_req: reallocb failed"));
4009 tep->te_state = new_state;
4010 tl_merror(wq, respmp, ENOMEM);
4011 return;
4012 }
4013 di = (struct T_discon_ind *)dimp->b_rptr;
4014 di->SEQ_number = BADSEQNUM;
4015 save_state = peer_tep->te_state;
4016 peer_tep->te_state = TS_IDLE;
4017
4018 TL_REMOVE_PEER(peer_tep->te_oconp);
4019 enableok(peer_tep->te_wq);
4020 TL_QENABLE(peer_tep);
4021 } else {
4022 freemsg(respmp);
4023 dimp = NULL;
4024 }
4025
4026 /*
4027 * remove endpoint from incoming connection list
4028 * - remove disconnect client from list on server
4029 */
4030 tl_freetip(tep, tip);
4031 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4032 /*
4033 * disconnect an outgoing request pending from tep
4034 */
4035
4036 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4037 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4038 SL_TRACE|SL_ERROR,
4039 "tl_discon_req: reallocb failed"));
4040 tep->te_state = new_state;
4041 tl_merror(wq, respmp, ENOMEM);
4042 return;
4043 }
4044 di = (struct T_discon_ind *)dimp->b_rptr;
4045 DB_TYPE(dimp) = M_PROTO;
4046 di->PRIM_type = T_DISCON_IND;
4047 di->DISCON_reason = ECONNRESET;
4048 di->SEQ_number = tep->te_seqno;
4049
4050 /*
4051 * If this is a socket the T_DISCON_IND is queued with
4052 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4053 * from the list of pending connections.
4054 * Note that when te_oconp is set the peer better have
4055 * a t_connind_t for the client.
4056 */
4057 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4058 /*
4059 * No need to check that
4060 * ti_tep == NULL since the T_DISCON_IND
4061 * takes precedence over other queued
4062 * messages.
4063 */
4064 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4065 peer_tep = NULL;
4066 dimp = NULL;
4067 /*
4068 * Can't clear te_oconp since tl_co_unconnect needs
4069 * it as a hint not to free the tep.
4070 * Keep the state unchanged since tl_conn_res inspects
4071 * it.
4072 */
4073 new_state = tep->te_state;
4074 } else {
4075 /* Found - delete it */
4076 tip = tl_icon_find(peer_tep, tep->te_seqno);
4077 if (tip != NULL) {
4078 ASSERT(tep == tip->ti_tep);
4079 save_state = peer_tep->te_state;
4080 if (peer_tep->te_nicon == 1)
4081 peer_tep->te_state =
4082 NEXTSTATE(TE_DISCON_IND2,
4083 peer_tep->te_state);
4084 else
4085 peer_tep->te_state =
4086 NEXTSTATE(TE_DISCON_IND3,
4087 peer_tep->te_state);
4088 tl_freetip(peer_tep, tip);
4089 }
4090 ASSERT(tep->te_oconp != NULL);
4091 TL_UNCONNECT(tep->te_oconp);
4092 }
4093 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4094 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4095 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4096 SL_TRACE|SL_ERROR,
4097 "tl_discon_req: reallocb failed"));
4098 tep->te_state = new_state;
4099 tl_merror(wq, respmp, ENOMEM);
4100 return;
4101 }
4102 di = (struct T_discon_ind *)dimp->b_rptr;
4103 di->SEQ_number = BADSEQNUM;
4104
4105 save_state = peer_tep->te_state;
4106 peer_tep->te_state = TS_IDLE;
4107 } else {
4108 /* Not connected */
4109 tep->te_state = new_state;
4110 freemsg(respmp);
4111 return;
4112 }
4113
4114 /* Commit state changes */
4115 tep->te_state = new_state;
4116
4117 if (peer_tep == NULL) {
4118 ASSERT(dimp == NULL);
4119 goto done;
4120 }
4121 /*
4122 * Flush queues on peer before sending up
4123 * T_DISCON_IND according to TPI
4124 */
4125
4126 if ((save_state == TS_DATA_XFER) ||
4127 (save_state == TS_WIND_ORDREL) ||
4128 (save_state == TS_WREQ_ORDREL))
4129 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4130
4131 DB_TYPE(dimp) = M_PROTO;
4132 di->PRIM_type = T_DISCON_IND;
4133 di->DISCON_reason = ECONNRESET;
4134
4135 /*
4136 * data blocks already linked into dimp by reallocb()
4137 */
4138 /*
4139 * send indication message to peer user module
4140 */
4141 ASSERT(dimp != NULL);
4142 putnext(peer_tep->te_rq, dimp);
4143 done:
4144 if (tep->te_conp) { /* disconnect pointers if connected */
4145 ASSERT(! peer_tep->te_closing);
4146
4147 /*
4148 * Messages may be queued on peer's write queue
4149 * waiting to be processed by its write service
4150 * procedure. Before the pointer to the peer transport
4151 * structure is set to NULL, qenable the peer's write
4152 * queue so that the queued up messages are processed.
4153 */
4154 if ((save_state == TS_DATA_XFER) ||
4155 (save_state == TS_WIND_ORDREL) ||
4156 (save_state == TS_WREQ_ORDREL))
4157 TL_QENABLE(peer_tep);
4158 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4159 TL_UNCONNECT(peer_tep->te_conp);
4160 if (! IS_SOCKET(tep)) {
4161 /*
4162 * unlink the streams
4163 */
4164 tep->te_wq->q_next = NULL;
4165 peer_tep->te_wq->q_next = NULL;
4166 }
4167 TL_UNCONNECT(tep->te_conp);
4168 }
4169 }
4170
4171 static void
tl_addr_req_ser(mblk_t * mp,tl_endpt_t * tep)4172 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4173 {
4174 if (!tep->te_closing)
4175 tl_addr_req(mp, tep);
4176 else
4177 freemsg(mp);
4178
4179 tl_serializer_exit(tep);
4180 tl_refrele(tep);
4181 }
4182
4183 static void
tl_addr_req(mblk_t * mp,tl_endpt_t * tep)4184 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4185 {
4186 queue_t *wq;
4187 size_t ack_sz;
4188 mblk_t *ackmp;
4189 struct T_addr_ack *taa;
4190
4191 if (tep->te_closing) {
4192 freemsg(mp);
4193 return;
4194 }
4195
4196 wq = tep->te_wq;
4197
4198 /*
4199 * Note: T_ADDR_REQ message has only PRIM_type field
4200 * so it is already validated earlier.
4201 */
4202
4203 if (IS_CLTS(tep) ||
4204 (tep->te_state > TS_WREQ_ORDREL) ||
4205 (tep->te_state < TS_DATA_XFER)) {
4206 /*
4207 * Either connectionless or connection oriented but not
4208 * in connected data transfer state or half-closed states.
4209 */
4210 ack_sz = sizeof (struct T_addr_ack);
4211 if (tep->te_state >= TS_IDLE)
4212 /* is bound */
4213 ack_sz += tep->te_alen;
4214 ackmp = reallocb(mp, ack_sz, 0);
4215 if (ackmp == NULL) {
4216 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4217 SL_TRACE|SL_ERROR,
4218 "tl_addr_req: reallocb failed"));
4219 tl_memrecover(wq, mp, ack_sz);
4220 return;
4221 }
4222
4223 taa = (struct T_addr_ack *)ackmp->b_rptr;
4224
4225 bzero(taa, sizeof (struct T_addr_ack));
4226
4227 taa->PRIM_type = T_ADDR_ACK;
4228 ackmp->b_datap->db_type = M_PCPROTO;
4229 ackmp->b_wptr = (uchar_t *)&taa[1];
4230
4231 if (tep->te_state >= TS_IDLE) {
4232 /* endpoint is bound */
4233 taa->LOCADDR_length = tep->te_alen;
4234 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4235
4236 bcopy(tep->te_abuf, ackmp->b_wptr,
4237 tep->te_alen);
4238 ackmp->b_wptr += tep->te_alen;
4239 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4240 }
4241
4242 (void) qreply(wq, ackmp);
4243 } else {
4244 ASSERT(tep->te_state == TS_DATA_XFER ||
4245 tep->te_state == TS_WIND_ORDREL ||
4246 tep->te_state == TS_WREQ_ORDREL);
4247 /* connection oriented in data transfer */
4248 tl_connected_cots_addr_req(mp, tep);
4249 }
4250 }
4251
4252
4253 static void
tl_connected_cots_addr_req(mblk_t * mp,tl_endpt_t * tep)4254 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4255 {
4256 tl_endpt_t *peer_tep = tep->te_conp;
4257 size_t ack_sz;
4258 mblk_t *ackmp;
4259 struct T_addr_ack *taa;
4260 uchar_t *addr_startp;
4261
4262 if (tep->te_closing) {
4263 freemsg(mp);
4264 return;
4265 }
4266
4267 if (peer_tep == NULL || peer_tep->te_closing) {
4268 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4269 return;
4270 }
4271
4272 ASSERT(tep->te_state >= TS_IDLE);
4273
4274 ack_sz = sizeof (struct T_addr_ack);
4275 ack_sz += T_ALIGN(tep->te_alen);
4276 ack_sz += peer_tep->te_alen;
4277
4278 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4279 if (ackmp == NULL) {
4280 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4281 "tl_connected_cots_addr_req: reallocb failed"));
4282 tl_memrecover(tep->te_wq, mp, ack_sz);
4283 return;
4284 }
4285
4286 taa = (struct T_addr_ack *)ackmp->b_rptr;
4287
4288 /* endpoint is bound */
4289 taa->LOCADDR_length = tep->te_alen;
4290 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4291
4292 addr_startp = (uchar_t *)&taa[1];
4293
4294 bcopy(tep->te_abuf, addr_startp,
4295 tep->te_alen);
4296
4297 taa->REMADDR_length = peer_tep->te_alen;
4298 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4299 taa->LOCADDR_length);
4300 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4301 bcopy(peer_tep->te_abuf, addr_startp,
4302 peer_tep->te_alen);
4303 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4304 taa->REMADDR_offset + peer_tep->te_alen;
4305 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4306
4307 putnext(tep->te_rq, ackmp);
4308 }
4309
4310 static void
tl_copy_info(struct T_info_ack * ia,tl_endpt_t * tep)4311 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4312 {
4313 if (IS_CLTS(tep)) {
4314 *ia = tl_clts_info_ack;
4315 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4316 } else {
4317 *ia = tl_cots_info_ack;
4318 if (IS_COTSORD(tep))
4319 ia->SERV_type = T_COTS_ORD;
4320 }
4321 ia->TIDU_size = tl_tidusz;
4322 ia->CURRENT_state = tep->te_state;
4323 }
4324
4325 /*
4326 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4327 * tl_wput.
4328 */
4329 static void
tl_capability_req(mblk_t * mp,tl_endpt_t * tep)4330 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4331 {
4332 mblk_t *ackmp;
4333 t_uscalar_t cap_bits1;
4334 struct T_capability_ack *tcap;
4335
4336 if (tep->te_closing) {
4337 freemsg(mp);
4338 return;
4339 }
4340
4341 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4342
4343 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4344 M_PCPROTO, T_CAPABILITY_ACK);
4345 if (ackmp == NULL) {
4346 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4347 "tl_capability_req: reallocb failed"));
4348 tl_memrecover(tep->te_wq, mp,
4349 sizeof (struct T_capability_ack));
4350 return;
4351 }
4352
4353 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4354 tcap->CAP_bits1 = 0;
4355
4356 if (cap_bits1 & TC1_INFO) {
4357 tl_copy_info(&tcap->INFO_ack, tep);
4358 tcap->CAP_bits1 |= TC1_INFO;
4359 }
4360
4361 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4362 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4363 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4364 }
4365
4366 putnext(tep->te_rq, ackmp);
4367 }
4368
4369 static void
tl_info_req_ser(mblk_t * mp,tl_endpt_t * tep)4370 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4371 {
4372 if (! tep->te_closing)
4373 tl_info_req(mp, tep);
4374 else
4375 freemsg(mp);
4376
4377 tl_serializer_exit(tep);
4378 tl_refrele(tep);
4379 }
4380
4381 static void
tl_info_req(mblk_t * mp,tl_endpt_t * tep)4382 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4383 {
4384 mblk_t *ackmp;
4385
4386 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4387 M_PCPROTO, T_INFO_ACK);
4388 if (ackmp == NULL) {
4389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4390 "tl_info_req: reallocb failed"));
4391 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4392 return;
4393 }
4394
4395 /*
4396 * fill in T_INFO_ACK contents
4397 */
4398 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4399
4400 /*
4401 * send ack message
4402 */
4403 putnext(tep->te_rq, ackmp);
4404 }
4405
4406 /*
4407 * Handle M_DATA, T_data_req and T_optdata_req.
4408 * If this is a socket pass through T_optdata_req options unmodified.
4409 */
4410 static void
tl_data(mblk_t * mp,tl_endpt_t * tep)4411 tl_data(mblk_t *mp, tl_endpt_t *tep)
4412 {
4413 queue_t *wq = tep->te_wq;
4414 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4415 ssize_t msz = MBLKL(mp);
4416 tl_endpt_t *peer_tep;
4417 queue_t *peer_rq;
4418 boolean_t closing = tep->te_closing;
4419
4420 if (IS_CLTS(tep)) {
4421 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4422 SL_TRACE|SL_ERROR,
4423 "tl_wput:clts:unattached M_DATA"));
4424 if (!closing) {
4425 tl_merror(wq, mp, EPROTO);
4426 } else {
4427 freemsg(mp);
4428 }
4429 return;
4430 }
4431
4432 /*
4433 * If the endpoint is closing it should still forward any data to the
4434 * peer (if it has one). If it is not allowed to forward it can just
4435 * free the message.
4436 */
4437 if (closing &&
4438 (tep->te_state != TS_DATA_XFER) &&
4439 (tep->te_state != TS_WREQ_ORDREL)) {
4440 freemsg(mp);
4441 return;
4442 }
4443
4444 if (DB_TYPE(mp) == M_PROTO) {
4445 if (prim->type == T_DATA_REQ &&
4446 msz < sizeof (struct T_data_req)) {
4447 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4448 SL_TRACE|SL_ERROR,
4449 "tl_data:T_DATA_REQ:invalid message"));
4450 if (!closing) {
4451 tl_merror(wq, mp, EPROTO);
4452 } else {
4453 freemsg(mp);
4454 }
4455 return;
4456 } else if (prim->type == T_OPTDATA_REQ &&
4457 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4458 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4459 SL_TRACE|SL_ERROR,
4460 "tl_data:T_OPTDATA_REQ:invalid message"));
4461 if (!closing) {
4462 tl_merror(wq, mp, EPROTO);
4463 } else {
4464 freemsg(mp);
4465 }
4466 return;
4467 }
4468 }
4469
4470 /*
4471 * connection oriented provider
4472 */
4473 switch (tep->te_state) {
4474 case TS_IDLE:
4475 /*
4476 * Other end not here - do nothing.
4477 */
4478 freemsg(mp);
4479 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4480 "tl_data:cots with endpoint idle"));
4481 return;
4482
4483 case TS_DATA_XFER:
4484 /* valid states */
4485 if (tep->te_conp != NULL)
4486 break;
4487
4488 if (tep->te_oconp == NULL) {
4489 if (!closing) {
4490 tl_merror(wq, mp, EPROTO);
4491 } else {
4492 freemsg(mp);
4493 }
4494 return;
4495 }
4496 /*
4497 * For a socket the T_CONN_CON is sent early thus
4498 * the peer might not yet have accepted the connection.
4499 * If we are closing queue the packet with the T_CONN_IND.
4500 * Otherwise defer processing the packet until the peer
4501 * accepts the connection.
4502 * Note that the queue is noenabled when we go into this
4503 * state.
4504 */
4505 if (!closing) {
4506 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4507 SL_TRACE|SL_ERROR,
4508 "tl_data: ocon"));
4509 TL_PUTBQ(tep, mp);
4510 return;
4511 }
4512 if (DB_TYPE(mp) == M_PROTO) {
4513 if (msz < sizeof (t_scalar_t)) {
4514 freemsg(mp);
4515 return;
4516 }
4517 /* reuse message block - just change REQ to IND */
4518 if (prim->type == T_DATA_REQ)
4519 prim->type = T_DATA_IND;
4520 else
4521 prim->type = T_OPTDATA_IND;
4522 }
4523 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4524 return;
4525
4526 case TS_WREQ_ORDREL:
4527 if (tep->te_conp == NULL) {
4528 /*
4529 * Other end closed - generate discon_ind
4530 * with reason 0 to cause an EPIPE but no
4531 * read side error on AF_UNIX sockets.
4532 */
4533 freemsg(mp);
4534 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4535 SL_TRACE|SL_ERROR,
4536 "tl_data: WREQ_ORDREL and no peer"));
4537 tl_discon_ind(tep, 0);
4538 return;
4539 }
4540 break;
4541
4542 default:
4543 /* invalid state for event TE_DATA_REQ */
4544 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4545 "tl_data:cots:out of state"));
4546 tl_merror(wq, mp, EPROTO);
4547 return;
4548 }
4549 /*
4550 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4551 * (State stays same on this event)
4552 */
4553
4554 /*
4555 * get connected endpoint
4556 */
4557 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4558 freemsg(mp);
4559 /* Peer closed */
4560 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4561 "tl_data: peer gone"));
4562 return;
4563 }
4564
4565 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4566 peer_rq = peer_tep->te_rq;
4567
4568 /*
4569 * Put it back if flow controlled
4570 * Note: Messages already on queue when we are closing is bounded
4571 * so we can ignore flow control.
4572 */
4573 if (!canputnext(peer_rq) && !closing) {
4574 TL_PUTBQ(tep, mp);
4575 return;
4576 }
4577
4578 /*
4579 * validate peer state
4580 */
4581 switch (peer_tep->te_state) {
4582 case TS_DATA_XFER:
4583 case TS_WIND_ORDREL:
4584 /* valid states */
4585 break;
4586 default:
4587 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4588 "tl_data:rx side:invalid state"));
4589 tl_merror(peer_tep->te_wq, mp, EPROTO);
4590 return;
4591 }
4592 if (DB_TYPE(mp) == M_PROTO) {
4593 /* reuse message block - just change REQ to IND */
4594 if (prim->type == T_DATA_REQ)
4595 prim->type = T_DATA_IND;
4596 else
4597 prim->type = T_OPTDATA_IND;
4598 }
4599 /*
4600 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4601 * (peer state stays same on this event)
4602 */
4603 /*
4604 * send data to connected peer
4605 */
4606 putnext(peer_rq, mp);
4607 }
4608
4609
4610
4611 static void
tl_exdata(mblk_t * mp,tl_endpt_t * tep)4612 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4613 {
4614 queue_t *wq = tep->te_wq;
4615 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4616 ssize_t msz = MBLKL(mp);
4617 tl_endpt_t *peer_tep;
4618 queue_t *peer_rq;
4619 boolean_t closing = tep->te_closing;
4620
4621 if (msz < sizeof (struct T_exdata_req)) {
4622 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4623 "tl_exdata:invalid message"));
4624 if (!closing) {
4625 tl_merror(wq, mp, EPROTO);
4626 } else {
4627 freemsg(mp);
4628 }
4629 return;
4630 }
4631
4632 /*
4633 * If the endpoint is closing it should still forward any data to the
4634 * peer (if it has one). If it is not allowed to forward it can just
4635 * free the message.
4636 */
4637 if (closing &&
4638 (tep->te_state != TS_DATA_XFER) &&
4639 (tep->te_state != TS_WREQ_ORDREL)) {
4640 freemsg(mp);
4641 return;
4642 }
4643
4644 /*
4645 * validate state
4646 */
4647 switch (tep->te_state) {
4648 case TS_IDLE:
4649 /*
4650 * Other end not here - do nothing.
4651 */
4652 freemsg(mp);
4653 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4654 "tl_exdata:cots with endpoint idle"));
4655 return;
4656
4657 case TS_DATA_XFER:
4658 /* valid states */
4659 if (tep->te_conp != NULL)
4660 break;
4661
4662 if (tep->te_oconp == NULL) {
4663 if (!closing) {
4664 tl_merror(wq, mp, EPROTO);
4665 } else {
4666 freemsg(mp);
4667 }
4668 return;
4669 }
4670 /*
4671 * For a socket the T_CONN_CON is sent early thus
4672 * the peer might not yet have accepted the connection.
4673 * If we are closing queue the packet with the T_CONN_IND.
4674 * Otherwise defer processing the packet until the peer
4675 * accepts the connection.
4676 * Note that the queue is noenabled when we go into this
4677 * state.
4678 */
4679 if (!closing) {
4680 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4681 SL_TRACE|SL_ERROR,
4682 "tl_exdata: ocon"));
4683 TL_PUTBQ(tep, mp);
4684 return;
4685 }
4686 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4687 "tl_exdata: closing socket ocon"));
4688 prim->type = T_EXDATA_IND;
4689 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4690 return;
4691
4692 case TS_WREQ_ORDREL:
4693 if (tep->te_conp == NULL) {
4694 /*
4695 * Other end closed - generate discon_ind
4696 * with reason 0 to cause an EPIPE but no
4697 * read side error on AF_UNIX sockets.
4698 */
4699 freemsg(mp);
4700 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4701 SL_TRACE|SL_ERROR,
4702 "tl_exdata: WREQ_ORDREL and no peer"));
4703 tl_discon_ind(tep, 0);
4704 return;
4705 }
4706 break;
4707
4708 default:
4709 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4710 SL_TRACE|SL_ERROR,
4711 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4712 tep->te_state));
4713 tl_merror(wq, mp, EPROTO);
4714 return;
4715 }
4716 /*
4717 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4718 * (state stays same on this event)
4719 */
4720
4721 /*
4722 * get connected endpoint
4723 */
4724 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4725 freemsg(mp);
4726 /* Peer closed */
4727 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4728 "tl_exdata: peer gone"));
4729 return;
4730 }
4731
4732 peer_rq = peer_tep->te_rq;
4733
4734 /*
4735 * Put it back if flow controlled
4736 * Note: Messages already on queue when we are closing is bounded
4737 * so we can ignore flow control.
4738 */
4739 if (!canputnext(peer_rq) && !closing) {
4740 TL_PUTBQ(tep, mp);
4741 return;
4742 }
4743
4744 /*
4745 * validate state on peer
4746 */
4747 switch (peer_tep->te_state) {
4748 case TS_DATA_XFER:
4749 case TS_WIND_ORDREL:
4750 /* valid states */
4751 break;
4752 default:
4753 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4754 "tl_exdata:rx side:invalid state"));
4755 tl_merror(peer_tep->te_wq, mp, EPROTO);
4756 return;
4757 }
4758 /*
4759 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4760 * (peer state stays same on this event)
4761 */
4762 /*
4763 * reuse message block
4764 */
4765 prim->type = T_EXDATA_IND;
4766
4767 /*
4768 * send data to connected peer
4769 */
4770 putnext(peer_rq, mp);
4771 }
4772
4773
4774
4775 static void
tl_ordrel(mblk_t * mp,tl_endpt_t * tep)4776 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4777 {
4778 queue_t *wq = tep->te_wq;
4779 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4780 ssize_t msz = MBLKL(mp);
4781 tl_endpt_t *peer_tep;
4782 queue_t *peer_rq;
4783 boolean_t closing = tep->te_closing;
4784
4785 if (msz < sizeof (struct T_ordrel_req)) {
4786 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4787 "tl_ordrel:invalid message"));
4788 if (!closing) {
4789 tl_merror(wq, mp, EPROTO);
4790 } else {
4791 freemsg(mp);
4792 }
4793 return;
4794 }
4795
4796 /*
4797 * validate state
4798 */
4799 switch (tep->te_state) {
4800 case TS_DATA_XFER:
4801 case TS_WREQ_ORDREL:
4802 /* valid states */
4803 if (tep->te_conp != NULL)
4804 break;
4805
4806 if (tep->te_oconp == NULL)
4807 break;
4808
4809 /*
4810 * For a socket the T_CONN_CON is sent early thus
4811 * the peer might not yet have accepted the connection.
4812 * If we are closing queue the packet with the T_CONN_IND.
4813 * Otherwise defer processing the packet until the peer
4814 * accepts the connection.
4815 * Note that the queue is noenabled when we go into this
4816 * state.
4817 */
4818 if (!closing) {
4819 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4820 SL_TRACE|SL_ERROR,
4821 "tl_ordlrel: ocon"));
4822 TL_PUTBQ(tep, mp);
4823 return;
4824 }
4825 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4826 "tl_ordlrel: closing socket ocon"));
4827 prim->type = T_ORDREL_IND;
4828 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4829 return;
4830
4831 default:
4832 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4833 SL_TRACE|SL_ERROR,
4834 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4835 tep->te_state));
4836 if (!closing) {
4837 tl_merror(wq, mp, EPROTO);
4838 } else {
4839 freemsg(mp);
4840 }
4841 return;
4842 }
4843 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4844
4845 /*
4846 * get connected endpoint
4847 */
4848 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4849 /* Peer closed */
4850 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4851 "tl_ordrel: peer gone"));
4852 freemsg(mp);
4853 return;
4854 }
4855
4856 peer_rq = peer_tep->te_rq;
4857
4858 /*
4859 * Put it back if flow controlled except when we are closing.
4860 * Note: Messages already on queue when we are closing is bounded
4861 * so we can ignore flow control.
4862 */
4863 if (! canputnext(peer_rq) && !closing) {
4864 TL_PUTBQ(tep, mp);
4865 return;
4866 }
4867
4868 /*
4869 * validate state on peer
4870 */
4871 switch (peer_tep->te_state) {
4872 case TS_DATA_XFER:
4873 case TS_WIND_ORDREL:
4874 /* valid states */
4875 break;
4876 default:
4877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4878 "tl_ordrel:rx side:invalid state"));
4879 tl_merror(peer_tep->te_wq, mp, EPROTO);
4880 return;
4881 }
4882 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4883
4884 /*
4885 * reuse message block
4886 */
4887 prim->type = T_ORDREL_IND;
4888 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4889 "tl_ordrel: send ordrel_ind"));
4890
4891 /*
4892 * send data to connected peer
4893 */
4894 putnext(peer_rq, mp);
4895 }
4896
4897
4898 /*
4899 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4900 */
4901 static void
tl_uderr(queue_t * wq,mblk_t * mp,t_scalar_t err)4902 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4903 {
4904 size_t err_sz;
4905 tl_endpt_t *tep;
4906 struct T_unitdata_req *udreq;
4907 mblk_t *err_mp;
4908 t_scalar_t alen;
4909 t_scalar_t olen;
4910 struct T_uderror_ind *uderr;
4911 uchar_t *addr_startp;
4912
4913 err_sz = sizeof (struct T_uderror_ind);
4914 tep = (tl_endpt_t *)wq->q_ptr;
4915 udreq = (struct T_unitdata_req *)mp->b_rptr;
4916 alen = udreq->DEST_length;
4917 olen = udreq->OPT_length;
4918
4919 if (alen > 0)
4920 err_sz = T_ALIGN(err_sz + alen);
4921 if (olen > 0)
4922 err_sz += olen;
4923
4924 err_mp = allocb(err_sz, BPRI_MED);
4925 if (! err_mp) {
4926 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4927 "tl_uderr:allocb failure"));
4928 /*
4929 * Note: no rollback of state needed as it does
4930 * not change in connectionless transport
4931 */
4932 tl_memrecover(wq, mp, err_sz);
4933 return;
4934 }
4935
4936 DB_TYPE(err_mp) = M_PROTO;
4937 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4938 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4939 uderr->PRIM_type = T_UDERROR_IND;
4940 uderr->ERROR_type = err;
4941 uderr->DEST_length = alen;
4942 uderr->OPT_length = olen;
4943 if (alen <= 0) {
4944 uderr->DEST_offset = 0;
4945 } else {
4946 uderr->DEST_offset =
4947 (t_scalar_t)sizeof (struct T_uderror_ind);
4948 addr_startp = mp->b_rptr + udreq->DEST_offset;
4949 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4950 (size_t)alen);
4951 }
4952 if (olen <= 0) {
4953 uderr->OPT_offset = 0;
4954 } else {
4955 uderr->OPT_offset =
4956 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4957 uderr->DEST_length);
4958 addr_startp = mp->b_rptr + udreq->OPT_offset;
4959 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4960 (size_t)olen);
4961 }
4962 freemsg(mp);
4963
4964 /*
4965 * send indication message
4966 */
4967 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4968
4969 qreply(wq, err_mp);
4970 }
4971
4972 static void
tl_unitdata_ser(mblk_t * mp,tl_endpt_t * tep)4973 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4974 {
4975 queue_t *wq = tep->te_wq;
4976
4977 if (!tep->te_closing && (wq->q_first != NULL)) {
4978 TL_PUTQ(tep, mp);
4979 } else if (tep->te_rq != NULL)
4980 tl_unitdata(mp, tep);
4981 else
4982 freemsg(mp);
4983
4984 tl_serializer_exit(tep);
4985 tl_refrele(tep);
4986 }
4987
4988 /*
4989 * Handle T_unitdata_req.
4990 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4991 * If this is a socket pass through options unmodified.
4992 */
4993 static void
tl_unitdata(mblk_t * mp,tl_endpt_t * tep)4994 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4995 {
4996 queue_t *wq = tep->te_wq;
4997 soux_addr_t ux_addr;
4998 tl_addr_t destaddr;
4999 uchar_t *addr_startp;
5000 tl_endpt_t *peer_tep;
5001 struct T_unitdata_ind *udind;
5002 struct T_unitdata_req *udreq;
5003 ssize_t msz, ui_sz;
5004 t_scalar_t alen, aoff, olen, ooff;
5005 t_scalar_t oldolen = 0;
5006 cred_t *cr = NULL;
5007 pid_t cpid;
5008
5009 udreq = (struct T_unitdata_req *)mp->b_rptr;
5010 msz = MBLKL(mp);
5011
5012 /*
5013 * validate the state
5014 */
5015 if (tep->te_state != TS_IDLE) {
5016 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5017 SL_TRACE|SL_ERROR,
5018 "tl_wput:T_CONN_REQ:out of state"));
5019 tl_merror(wq, mp, EPROTO);
5020 return;
5021 }
5022 /*
5023 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5024 * (state does not change on this event)
5025 */
5026
5027 /*
5028 * validate the message
5029 * Note: dereference fields in struct inside message only
5030 * after validating the message length.
5031 */
5032 if (msz < sizeof (struct T_unitdata_req)) {
5033 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5034 "tl_unitdata:invalid message length"));
5035 tl_merror(wq, mp, EINVAL);
5036 return;
5037 }
5038 alen = udreq->DEST_length;
5039 aoff = udreq->DEST_offset;
5040 oldolen = olen = udreq->OPT_length;
5041 ooff = udreq->OPT_offset;
5042 if (olen == 0)
5043 ooff = 0;
5044
5045 if (IS_SOCKET(tep)) {
5046 if ((alen != TL_SOUX_ADDRLEN) ||
5047 (aoff < 0) ||
5048 (aoff + alen > msz) ||
5049 (olen < 0) || (ooff < 0) ||
5050 ((olen > 0) && ((ooff + olen) > msz))) {
5051 (void) (STRLOG(TL_ID, tep->te_minor,
5052 1, SL_TRACE|SL_ERROR,
5053 "tl_unitdata_req: invalid socket addr "
5054 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5055 (int)msz, alen, aoff, olen, ooff));
5056 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5057 return;
5058 }
5059 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5060
5061 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5062 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5063 (void) (STRLOG(TL_ID, tep->te_minor,
5064 1, SL_TRACE|SL_ERROR,
5065 "tl_conn_req: invalid socket magic"));
5066 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5067 return;
5068 }
5069 } else {
5070 if ((alen < 0) ||
5071 (aoff < 0) ||
5072 ((alen > 0) && ((aoff + alen) > msz)) ||
5073 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5074 ((aoff + alen) < 0) ||
5075 ((olen > 0) && ((ooff + olen) > msz)) ||
5076 (olen < 0) ||
5077 (ooff < 0) ||
5078 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5079 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5080 SL_TRACE|SL_ERROR,
5081 "tl_unitdata:invalid unit data message"));
5082 tl_merror(wq, mp, EINVAL);
5083 return;
5084 }
5085 }
5086
5087 /* Options not supported unless it's a socket */
5088 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5089 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5090 "tl_unitdata:option use(unsupported) or zero len addr"));
5091 tl_uderr(wq, mp, EPROTO);
5092 return;
5093 }
5094 #ifdef DEBUG
5095 /*
5096 * Mild form of ASSERT()ion to detect broken TPI apps.
5097 * if (! assertion)
5098 * log warning;
5099 */
5100 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5101 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5102 "tl_unitdata:addr overlaps TPI message"));
5103 }
5104 #endif
5105 /*
5106 * get destination endpoint
5107 */
5108 destaddr.ta_alen = alen;
5109 destaddr.ta_abuf = mp->b_rptr + aoff;
5110 destaddr.ta_zoneid = tep->te_zoneid;
5111
5112 /*
5113 * Check whether the destination is the same that was used previously
5114 * and the destination endpoint is in the right state. If something is
5115 * wrong, find destination again and cache it.
5116 */
5117 peer_tep = tep->te_lastep;
5118
5119 if ((peer_tep == NULL) || peer_tep->te_closing ||
5120 (peer_tep->te_state != TS_IDLE) ||
5121 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5122 /*
5123 * Not the same as cached destination , need to find the right
5124 * destination.
5125 */
5126 peer_tep = (IS_SOCKET(tep) ?
5127 tl_sock_find_peer(tep, &ux_addr) :
5128 tl_find_peer(tep, &destaddr));
5129
5130 if (peer_tep == NULL) {
5131 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5132 SL_TRACE|SL_ERROR,
5133 "tl_unitdata:no one at destination address"));
5134 tl_uderr(wq, mp, ECONNRESET);
5135 return;
5136 }
5137
5138 /*
5139 * Cache the new peer.
5140 */
5141 if (tep->te_lastep != NULL)
5142 tl_refrele(tep->te_lastep);
5143
5144 tep->te_lastep = peer_tep;
5145 }
5146
5147 if (peer_tep->te_state != TS_IDLE) {
5148 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5149 "tl_unitdata:provider in invalid state"));
5150 tl_uderr(wq, mp, EPROTO);
5151 return;
5152 }
5153
5154 ASSERT(peer_tep->te_rq != NULL);
5155
5156 /*
5157 * Put it back if flow controlled except when we are closing.
5158 * Note: Messages already on queue when we are closing is bounded
5159 * so we can ignore flow control.
5160 */
5161 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5162 /* record what we are flow controlled on */
5163 if (tep->te_flowq != NULL) {
5164 list_remove(&tep->te_flowq->te_flowlist, tep);
5165 }
5166 list_insert_head(&peer_tep->te_flowlist, tep);
5167 tep->te_flowq = peer_tep;
5168 TL_PUTBQ(tep, mp);
5169 return;
5170 }
5171 /*
5172 * prepare indication message
5173 */
5174
5175 /*
5176 * calculate length of message
5177 */
5178 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5179 cr = msg_getcred(mp, &cpid);
5180 ASSERT(cr != NULL);
5181
5182 if (peer_tep->te_flag & TL_SETCRED) {
5183 ASSERT(olen == 0);
5184 olen = (t_scalar_t)sizeof (struct opthdr) +
5185 OPTLEN(sizeof (tl_credopt_t));
5186 /* 1 option only */
5187 } else if (peer_tep->te_flag & TL_SETUCRED) {
5188 ASSERT(olen == 0);
5189 olen = (t_scalar_t)sizeof (struct opthdr) +
5190 OPTLEN(ucredminsize(cr));
5191 /* 1 option only */
5192 } else {
5193 /* Possibly more than one option */
5194 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5195 OPTLEN(ucredminsize(cr));
5196 }
5197 }
5198
5199 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5200 olen;
5201 /*
5202 * If the unitdata_ind fits and we are not adding options
5203 * reuse the udreq mblk.
5204 */
5205 if (msz >= ui_sz && alen >= tep->te_alen &&
5206 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5207 /*
5208 * Reuse the original mblk. Leave options in place.
5209 */
5210 udind = (struct T_unitdata_ind *)mp->b_rptr;
5211 udind->PRIM_type = T_UNITDATA_IND;
5212 udind->SRC_length = tep->te_alen;
5213 addr_startp = mp->b_rptr + udind->SRC_offset;
5214 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5215 } else {
5216 /* Allocate a new T_unidata_ind message */
5217 mblk_t *ui_mp;
5218
5219 ui_mp = allocb(ui_sz, BPRI_MED);
5220 if (! ui_mp) {
5221 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5222 "tl_unitdata:allocb failure:message queued"));
5223 tl_memrecover(wq, mp, ui_sz);
5224 return;
5225 }
5226
5227 /*
5228 * fill in T_UNITDATA_IND contents
5229 */
5230 DB_TYPE(ui_mp) = M_PROTO;
5231 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5232 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5233 udind->PRIM_type = T_UNITDATA_IND;
5234 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5235 udind->SRC_length = tep->te_alen;
5236 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5237 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5238 udind->OPT_offset =
5239 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5240 udind->OPT_length = olen;
5241 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5242
5243 if (oldolen != 0) {
5244 bcopy((void *)((uintptr_t)udreq + ooff),
5245 (void *)((uintptr_t)udind +
5246 udind->OPT_offset),
5247 oldolen);
5248 }
5249 ASSERT(cr != NULL);
5250
5251 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5252 oldolen, cr, cpid,
5253 peer_tep->te_flag, peer_tep->te_credp);
5254 } else {
5255 bcopy((void *)((uintptr_t)udreq + ooff),
5256 (void *)((uintptr_t)udind + udind->OPT_offset),
5257 olen);
5258 }
5259
5260 /*
5261 * relink data blocks from mp to ui_mp
5262 */
5263 ui_mp->b_cont = mp->b_cont;
5264 freeb(mp);
5265 mp = ui_mp;
5266 }
5267 /*
5268 * send indication message
5269 */
5270 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5271 putnext(peer_tep->te_rq, mp);
5272 }
5273
5274
5275
5276 /*
5277 * Check if a given addr is in use.
5278 * Endpoint ptr returned or NULL if not found.
5279 * The name space is separate for each mode. This implies that
5280 * sockets get their own name space.
5281 */
5282 static tl_endpt_t *
tl_find_peer(tl_endpt_t * tep,tl_addr_t * ap)5283 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5284 {
5285 tl_endpt_t *peer_tep = NULL;
5286 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5287 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5288
5289 ASSERT(! IS_SOCKET(tep));
5290
5291 ASSERT(ap != NULL && ap->ta_alen > 0);
5292 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5293 ASSERT(ap->ta_abuf != NULL);
5294 EQUIV(rc == 0, peer_tep != NULL);
5295 IMPLY(rc == 0,
5296 (tep->te_zoneid == peer_tep->te_zoneid) &&
5297 (tep->te_transport == peer_tep->te_transport));
5298
5299 if ((rc == 0) && (peer_tep->te_closing)) {
5300 tl_refrele(peer_tep);
5301 peer_tep = NULL;
5302 }
5303
5304 return (peer_tep);
5305 }
5306
5307 /*
5308 * Find peer for a socket based on unix domain address.
5309 * For implicit addresses our peer can be found by minor number in ai hash. For
5310 * explicit binds we look vnode address at addr_hash.
5311 */
5312 static tl_endpt_t *
tl_sock_find_peer(tl_endpt_t * tep,soux_addr_t * ux_addr)5313 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5314 {
5315 tl_endpt_t *peer_tep = NULL;
5316 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5317 tep->te_aihash : tep->te_addrhash;
5318 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5319 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5320
5321 ASSERT(IS_SOCKET(tep));
5322 EQUIV(rc == 0, peer_tep != NULL);
5323 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5324
5325 if (peer_tep != NULL) {
5326 /* Don't attempt to use closing peer. */
5327 if (peer_tep->te_closing)
5328 goto errout;
5329
5330 /*
5331 * Cross-zone unix sockets are permitted, but for Trusted
5332 * Extensions only, the "server" for these must be in the
5333 * global zone.
5334 */
5335 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5336 is_system_labeled() &&
5337 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5338 goto errout;
5339 }
5340
5341 return (peer_tep);
5342
5343 errout:
5344 tl_refrele(peer_tep);
5345 return (NULL);
5346 }
5347
5348 /*
5349 * Generate a free addr and return it in struct pointed by ap
5350 * but allocating space for address buffer.
5351 * The generated address will be at least 4 bytes long and, if req->ta_alen
5352 * exceeds 4 bytes, be req->ta_alen bytes long.
5353 *
5354 * If address is found it will be inserted in the hash.
5355 *
5356 * If req->ta_alen is larger than the default alen (4 bytes) the last
5357 * alen-4 bytes will always be the same as in req.
5358 *
5359 * Return 0 for failure.
5360 * Return non-zero for success.
5361 */
5362 static boolean_t
tl_get_any_addr(tl_endpt_t * tep,tl_addr_t * req)5363 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5364 {
5365 t_scalar_t alen;
5366 uint32_t loopcnt; /* Limit loop to 2^32 */
5367
5368 ASSERT(tep->te_hash_hndl != NULL);
5369 ASSERT(! IS_SOCKET(tep));
5370
5371 if (tep->te_hash_hndl == NULL)
5372 return (B_FALSE);
5373
5374 /*
5375 * check if default addr is in use
5376 * if it is - bump it and try again
5377 */
5378 if (req == NULL) {
5379 alen = sizeof (uint32_t);
5380 } else {
5381 alen = max(req->ta_alen, sizeof (uint32_t));
5382 ASSERT(tep->te_zoneid == req->ta_zoneid);
5383 }
5384
5385 if (tep->te_alen < alen) {
5386 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5387
5388 /*
5389 * Not enough space in tep->ta_ap to hold the address,
5390 * allocate a bigger space.
5391 */
5392 if (abuf == NULL)
5393 return (B_FALSE);
5394
5395 if (tep->te_alen > 0)
5396 kmem_free(tep->te_abuf, tep->te_alen);
5397
5398 tep->te_alen = alen;
5399 tep->te_abuf = abuf;
5400 }
5401
5402 /* Copy in the address in req */
5403 if (req != NULL) {
5404 ASSERT(alen >= req->ta_alen);
5405 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5406 }
5407
5408 /*
5409 * First try minor number then try default addresses.
5410 */
5411 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5412
5413 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5414 if (mod_hash_insert_reserve(tep->te_addrhash,
5415 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5416 tep->te_hash_hndl) == 0) {
5417 /*
5418 * found free address
5419 */
5420 tep->te_flag |= TL_ADDRHASHED;
5421 tep->te_hash_hndl = NULL;
5422
5423 return (B_TRUE); /* successful return */
5424 }
5425 /*
5426 * Use default address.
5427 */
5428 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5429 atomic_inc_32(&tep->te_defaddr);
5430 }
5431
5432 /*
5433 * Failed to find anything.
5434 */
5435 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5436 "tl_get_any_addr:looped 2^32 times"));
5437 return (B_FALSE);
5438 }
5439
5440 /*
5441 * reallocb + set r/w ptrs to reflect size.
5442 */
5443 static mblk_t *
tl_resizemp(mblk_t * mp,ssize_t new_size)5444 tl_resizemp(mblk_t *mp, ssize_t new_size)
5445 {
5446 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5447 return (NULL);
5448
5449 mp->b_rptr = DB_BASE(mp);
5450 mp->b_wptr = mp->b_rptr + new_size;
5451 return (mp);
5452 }
5453
5454 static void
tl_cl_backenable(tl_endpt_t * tep)5455 tl_cl_backenable(tl_endpt_t *tep)
5456 {
5457 list_t *l = &tep->te_flowlist;
5458 tl_endpt_t *elp;
5459
5460 ASSERT(IS_CLTS(tep));
5461
5462 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5463 ASSERT(tep->te_ser == elp->te_ser);
5464 ASSERT(elp->te_flowq == tep);
5465 if (! elp->te_closing)
5466 TL_QENABLE(elp);
5467 elp->te_flowq = NULL;
5468 list_remove(l, elp);
5469 }
5470 }
5471
5472 /*
5473 * Unconnect endpoints.
5474 */
5475 static void
tl_co_unconnect(tl_endpt_t * tep)5476 tl_co_unconnect(tl_endpt_t *tep)
5477 {
5478 tl_endpt_t *peer_tep = tep->te_conp;
5479 tl_endpt_t *srv_tep = tep->te_oconp;
5480 list_t *l;
5481 tl_icon_t *tip;
5482 tl_endpt_t *cl_tep;
5483 mblk_t *d_mp;
5484
5485 ASSERT(IS_COTS(tep));
5486 /*
5487 * If our peer is closing, don't use it.
5488 */
5489 if ((peer_tep != NULL) && peer_tep->te_closing) {
5490 TL_UNCONNECT(tep->te_conp);
5491 peer_tep = NULL;
5492 }
5493 if ((srv_tep != NULL) && srv_tep->te_closing) {
5494 TL_UNCONNECT(tep->te_oconp);
5495 srv_tep = NULL;
5496 }
5497
5498 if (tep->te_nicon > 0) {
5499 l = &tep->te_iconp;
5500 /*
5501 * If incoming requests pending, change state
5502 * of clients on disconnect ind event and send
5503 * discon_ind pdu to modules above them
5504 * for server: all clients get disconnect
5505 */
5506
5507 while (tep->te_nicon > 0) {
5508 tip = list_head(l);
5509 cl_tep = tip->ti_tep;
5510
5511 if (cl_tep == NULL) {
5512 tl_freetip(tep, tip);
5513 continue;
5514 }
5515
5516 if (cl_tep->te_oconp != NULL) {
5517 ASSERT(cl_tep != cl_tep->te_oconp);
5518 TL_UNCONNECT(cl_tep->te_oconp);
5519 }
5520
5521 if (cl_tep->te_closing) {
5522 tl_freetip(tep, tip);
5523 continue;
5524 }
5525
5526 enableok(cl_tep->te_wq);
5527 TL_QENABLE(cl_tep);
5528 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5529 if (d_mp != NULL) {
5530 cl_tep->te_state = TS_IDLE;
5531 putnext(cl_tep->te_rq, d_mp);
5532 } else {
5533 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5534 SL_TRACE|SL_ERROR,
5535 "tl_co_unconnect:icmng: "
5536 "allocb failure"));
5537 }
5538 tl_freetip(tep, tip);
5539 }
5540 } else if (srv_tep != NULL) {
5541 /*
5542 * If outgoing request pending, change state
5543 * of server on discon ind event
5544 */
5545
5546 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5547 IS_COTSORD(srv_tep) &&
5548 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5549 /*
5550 * Queue ordrel_ind for server to be picked up
5551 * when the connection is accepted.
5552 */
5553 d_mp = tl_ordrel_ind_alloc();
5554 } else {
5555 /*
5556 * send discon_ind to server
5557 */
5558 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5559 }
5560 if (d_mp == NULL) {
5561 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5562 SL_TRACE|SL_ERROR,
5563 "tl_co_unconnect:outgoing:allocb failure"));
5564 TL_UNCONNECT(tep->te_oconp);
5565 goto discon_peer;
5566 }
5567
5568 /*
5569 * If this is a socket the T_DISCON_IND is queued with
5570 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5571 * from the list of pending connections.
5572 * Note that when te_oconp is set the peer better have
5573 * a t_connind_t for the client.
5574 */
5575 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5576 /*
5577 * Queue the disconnection message.
5578 */
5579 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5580 } else {
5581 tip = tl_icon_find(srv_tep, tep->te_seqno);
5582 if (tip == NULL) {
5583 freemsg(d_mp);
5584 } else {
5585 ASSERT(tep == tip->ti_tep);
5586 ASSERT(tep->te_ser == srv_tep->te_ser);
5587 /*
5588 * Delete tip from the server list.
5589 */
5590 if (srv_tep->te_nicon == 1) {
5591 srv_tep->te_state =
5592 NEXTSTATE(TE_DISCON_IND2,
5593 srv_tep->te_state);
5594 } else {
5595 srv_tep->te_state =
5596 NEXTSTATE(TE_DISCON_IND3,
5597 srv_tep->te_state);
5598 }
5599 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5600 T_DISCON_IND);
5601 putnext(srv_tep->te_rq, d_mp);
5602 tl_freetip(srv_tep, tip);
5603 }
5604 TL_UNCONNECT(tep->te_oconp);
5605 srv_tep = NULL;
5606 }
5607 } else if (peer_tep != NULL) {
5608 /*
5609 * unconnect existing connection
5610 * If connected, change state of peer on
5611 * discon ind event and send discon ind pdu
5612 * to module above it
5613 */
5614
5615 ASSERT(tep->te_ser == peer_tep->te_ser);
5616 if (IS_COTSORD(peer_tep) &&
5617 (peer_tep->te_state == TS_WIND_ORDREL ||
5618 peer_tep->te_state == TS_DATA_XFER)) {
5619 /*
5620 * send ordrel ind
5621 */
5622 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5623 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5624 peer_tep->te_state,
5625 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5626 d_mp = tl_ordrel_ind_alloc();
5627 if (! d_mp) {
5628 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5629 SL_TRACE|SL_ERROR,
5630 "tl_co_unconnect:connected:"
5631 "allocb failure"));
5632 /*
5633 * Continue with cleaning up peer as
5634 * this side may go away with the close
5635 */
5636 TL_QENABLE(peer_tep);
5637 goto discon_peer;
5638 }
5639 peer_tep->te_state =
5640 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5641
5642 putnext(peer_tep->te_rq, d_mp);
5643 /*
5644 * Handle flow control case. This will generate
5645 * a t_discon_ind message with reason 0 if there
5646 * is data queued on the write side.
5647 */
5648 TL_QENABLE(peer_tep);
5649 } else if (IS_COTSORD(peer_tep) &&
5650 peer_tep->te_state == TS_WREQ_ORDREL) {
5651 /*
5652 * Sent an ordrel_ind. We send a discon with
5653 * with error 0 to inform that the peer is gone.
5654 */
5655 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5656 SL_TRACE|SL_ERROR,
5657 "tl_co_unconnect: discon in state %d",
5658 tep->te_state));
5659 tl_discon_ind(peer_tep, 0);
5660 } else {
5661 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5662 SL_TRACE|SL_ERROR,
5663 "tl_co_unconnect: state %d", tep->te_state));
5664 tl_discon_ind(peer_tep, ECONNRESET);
5665 }
5666
5667 discon_peer:
5668 /*
5669 * Disconnect cross-pointers only for close
5670 */
5671 if (tep->te_closing) {
5672 peer_tep = tep->te_conp;
5673 TL_REMOVE_PEER(peer_tep->te_conp);
5674 TL_REMOVE_PEER(tep->te_conp);
5675 }
5676 }
5677 }
5678
5679 /*
5680 * Note: The following routine does not recover from allocb()
5681 * failures
5682 * The reason should be from the <sys/errno.h> space.
5683 */
5684 static void
tl_discon_ind(tl_endpt_t * tep,uint32_t reason)5685 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5686 {
5687 mblk_t *d_mp;
5688
5689 if (tep->te_closing)
5690 return;
5691
5692 /*
5693 * flush the queues.
5694 */
5695 flushq(tep->te_rq, FLUSHDATA);
5696 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5697
5698 /*
5699 * send discon ind
5700 */
5701 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5702 if (! d_mp) {
5703 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5704 "tl_discon_ind:allocb failure"));
5705 return;
5706 }
5707 tep->te_state = TS_IDLE;
5708 putnext(tep->te_rq, d_mp);
5709 }
5710
5711 /*
5712 * Note: The following routine does not recover from allocb()
5713 * failures
5714 * The reason should be from the <sys/errno.h> space.
5715 */
5716 static mblk_t *
tl_discon_ind_alloc(uint32_t reason,t_scalar_t seqnum)5717 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5718 {
5719 mblk_t *mp;
5720 struct T_discon_ind *tdi;
5721
5722 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5723 DB_TYPE(mp) = M_PROTO;
5724 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5725 tdi = (struct T_discon_ind *)mp->b_rptr;
5726 tdi->PRIM_type = T_DISCON_IND;
5727 tdi->DISCON_reason = reason;
5728 tdi->SEQ_number = seqnum;
5729 }
5730 return (mp);
5731 }
5732
5733
5734 /*
5735 * Note: The following routine does not recover from allocb()
5736 * failures
5737 */
5738 static mblk_t *
tl_ordrel_ind_alloc(void)5739 tl_ordrel_ind_alloc(void)
5740 {
5741 mblk_t *mp;
5742 struct T_ordrel_ind *toi;
5743
5744 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5745 DB_TYPE(mp) = M_PROTO;
5746 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5747 toi = (struct T_ordrel_ind *)mp->b_rptr;
5748 toi->PRIM_type = T_ORDREL_IND;
5749 }
5750 return (mp);
5751 }
5752
5753
5754 /*
5755 * Lookup the seqno in the list of queued connections.
5756 */
5757 static tl_icon_t *
tl_icon_find(tl_endpt_t * tep,t_scalar_t seqno)5758 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5759 {
5760 list_t *l = &tep->te_iconp;
5761 tl_icon_t *tip = list_head(l);
5762
5763 ASSERT(seqno != 0);
5764
5765 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5766 ;
5767
5768 return (tip);
5769 }
5770
5771 /*
5772 * Queue data for a given T_CONN_IND while verifying that redundant
5773 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5774 * Used when the originator of the connection closes.
5775 */
5776 static void
tl_icon_queuemsg(tl_endpt_t * tep,t_scalar_t seqno,mblk_t * nmp)5777 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5778 {
5779 tl_icon_t *tip;
5780 mblk_t **mpp, *mp;
5781 int prim, nprim;
5782
5783 if (nmp->b_datap->db_type == M_PROTO)
5784 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5785 else
5786 nprim = -1; /* M_DATA */
5787
5788 tip = tl_icon_find(tep, seqno);
5789 if (tip == NULL) {
5790 freemsg(nmp);
5791 return;
5792 }
5793
5794 ASSERT(tip->ti_seqno != 0);
5795 mpp = &tip->ti_mp;
5796 while (*mpp != NULL) {
5797 mp = *mpp;
5798
5799 if (mp->b_datap->db_type == M_PROTO)
5800 prim = ((union T_primitives *)mp->b_rptr)->type;
5801 else
5802 prim = -1; /* M_DATA */
5803
5804 /*
5805 * Allow nothing after a T_DISCON_IND
5806 */
5807 if (prim == T_DISCON_IND) {
5808 freemsg(nmp);
5809 return;
5810 }
5811 /*
5812 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5813 */
5814 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5815 freemsg(nmp);
5816 return;
5817 }
5818 mpp = &(mp->b_next);
5819 }
5820 *mpp = nmp;
5821 }
5822
5823 /*
5824 * Verify if a certain TPI primitive exists on the connind queue.
5825 * Use prim -1 for M_DATA.
5826 * Return non-zero if found.
5827 */
5828 static boolean_t
tl_icon_hasprim(tl_endpt_t * tep,t_scalar_t seqno,t_scalar_t prim)5829 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5830 {
5831 tl_icon_t *tip = tl_icon_find(tep, seqno);
5832 boolean_t found = B_FALSE;
5833
5834 if (tip != NULL) {
5835 mblk_t *mp;
5836 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5837 found = (DB_TYPE(mp) == M_PROTO &&
5838 ((union T_primitives *)mp->b_rptr)->type == prim);
5839 }
5840 }
5841 return (found);
5842 }
5843
5844 /*
5845 * Send the b_next mblk chain that has accumulated before the connection
5846 * was accepted. Perform the necessary state transitions.
5847 */
5848 static void
tl_icon_sendmsgs(tl_endpt_t * tep,mblk_t ** mpp)5849 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5850 {
5851 mblk_t *mp;
5852 union T_primitives *primp;
5853
5854 if (tep->te_closing) {
5855 tl_icon_freemsgs(mpp);
5856 return;
5857 }
5858
5859 ASSERT(tep->te_state == TS_DATA_XFER);
5860 ASSERT(tep->te_rq->q_first == NULL);
5861
5862 while ((mp = *mpp) != NULL) {
5863 *mpp = mp->b_next;
5864 mp->b_next = NULL;
5865
5866 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5867 switch (DB_TYPE(mp)) {
5868 default:
5869 freemsg(mp);
5870 break;
5871 case M_DATA:
5872 putnext(tep->te_rq, mp);
5873 break;
5874 case M_PROTO:
5875 primp = (union T_primitives *)mp->b_rptr;
5876 switch (primp->type) {
5877 case T_UNITDATA_IND:
5878 case T_DATA_IND:
5879 case T_OPTDATA_IND:
5880 case T_EXDATA_IND:
5881 putnext(tep->te_rq, mp);
5882 break;
5883 case T_ORDREL_IND:
5884 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5885 tep->te_state);
5886 putnext(tep->te_rq, mp);
5887 break;
5888 case T_DISCON_IND:
5889 tep->te_state = TS_IDLE;
5890 putnext(tep->te_rq, mp);
5891 break;
5892 default:
5893 #ifdef DEBUG
5894 cmn_err(CE_PANIC,
5895 "tl_icon_sendmsgs: unknown primitive");
5896 #endif /* DEBUG */
5897 freemsg(mp);
5898 break;
5899 }
5900 break;
5901 }
5902 }
5903 }
5904
5905 /*
5906 * Free the b_next mblk chain that has accumulated before the connection
5907 * was accepted.
5908 */
5909 static void
tl_icon_freemsgs(mblk_t ** mpp)5910 tl_icon_freemsgs(mblk_t **mpp)
5911 {
5912 mblk_t *mp;
5913
5914 while ((mp = *mpp) != NULL) {
5915 *mpp = mp->b_next;
5916 mp->b_next = NULL;
5917 freemsg(mp);
5918 }
5919 }
5920
5921 /*
5922 * Send M_ERROR
5923 * Note: assumes caller ensured enough space in mp or enough
5924 * memory available. Does not attempt recovery from allocb()
5925 * failures
5926 */
5927
5928 static void
tl_merror(queue_t * wq,mblk_t * mp,int error)5929 tl_merror(queue_t *wq, mblk_t *mp, int error)
5930 {
5931 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5932
5933 if (tep->te_closing) {
5934 freemsg(mp);
5935 return;
5936 }
5937
5938 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5939 SL_TRACE|SL_ERROR,
5940 "tl_merror: tep=%p, err=%d", (void *)tep, error));
5941
5942 /*
5943 * flush all messages on queue. we are shutting
5944 * the stream down on fatal error
5945 */
5946 flushq(wq, FLUSHALL);
5947 if (IS_COTS(tep)) {
5948 /* connection oriented - unconnect endpoints */
5949 tl_co_unconnect(tep);
5950 }
5951 if (mp->b_cont) {
5952 freemsg(mp->b_cont);
5953 mp->b_cont = NULL;
5954 }
5955
5956 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5957 freemsg(mp);
5958 mp = allocb(1, BPRI_HI);
5959 if (!mp) {
5960 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5961 SL_TRACE|SL_ERROR,
5962 "tl_merror:M_PROTO: out of memory"));
5963 return;
5964 }
5965 }
5966 if (mp) {
5967 DB_TYPE(mp) = M_ERROR;
5968 mp->b_rptr = DB_BASE(mp);
5969 *mp->b_rptr = (char)error;
5970 mp->b_wptr = mp->b_rptr + sizeof (char);
5971 qreply(wq, mp);
5972 } else {
5973 (void) putnextctl1(tep->te_rq, M_ERROR, error);
5974 }
5975 }
5976
5977 static void
tl_fill_option(uchar_t * buf,cred_t * cr,pid_t cpid,int flag,cred_t * pcr)5978 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5979 {
5980 ASSERT(cr != NULL);
5981
5982 if (flag & TL_SETCRED) {
5983 struct opthdr *opt = (struct opthdr *)buf;
5984 tl_credopt_t *tlcred;
5985
5986 opt->level = TL_PROT_LEVEL;
5987 opt->name = TL_OPT_PEER_CRED;
5988 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5989
5990 tlcred = (tl_credopt_t *)(opt + 1);
5991 tlcred->tc_uid = crgetuid(cr);
5992 tlcred->tc_gid = crgetgid(cr);
5993 tlcred->tc_ruid = crgetruid(cr);
5994 tlcred->tc_rgid = crgetrgid(cr);
5995 tlcred->tc_suid = crgetsuid(cr);
5996 tlcred->tc_sgid = crgetsgid(cr);
5997 tlcred->tc_ngroups = crgetngroups(cr);
5998 } else if (flag & TL_SETUCRED) {
5999 struct opthdr *opt = (struct opthdr *)buf;
6000
6001 opt->level = TL_PROT_LEVEL;
6002 opt->name = TL_OPT_PEER_UCRED;
6003 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6004
6005 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6006 } else {
6007 struct T_opthdr *topt = (struct T_opthdr *)buf;
6008 ASSERT(flag & TL_SOCKUCRED);
6009
6010 topt->level = SOL_SOCKET;
6011 topt->name = SCM_UCRED;
6012 topt->len = ucredminsize(cr) + sizeof (*topt);
6013 topt->status = 0;
6014 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6015 }
6016 }
6017
6018 /* ARGSUSED */
6019 static int
tl_default_opt(queue_t * wq,int level,int name,uchar_t * ptr)6020 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6021 {
6022 /* no default value processed in protocol specific code currently */
6023 return (-1);
6024 }
6025
6026 /* ARGSUSED */
6027 static int
tl_get_opt(queue_t * wq,int level,int name,uchar_t * ptr)6028 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6029 {
6030 int len;
6031 tl_endpt_t *tep;
6032 int *valp;
6033
6034 tep = (tl_endpt_t *)wq->q_ptr;
6035
6036 len = 0;
6037
6038 /*
6039 * Assumes: option level and name sanity check done elsewhere
6040 */
6041
6042 switch (level) {
6043 case SOL_SOCKET:
6044 if (! IS_SOCKET(tep))
6045 break;
6046 switch (name) {
6047 case SO_RECVUCRED:
6048 len = sizeof (int);
6049 valp = (int *)ptr;
6050 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6051 break;
6052 default:
6053 break;
6054 }
6055 break;
6056 case TL_PROT_LEVEL:
6057 switch (name) {
6058 case TL_OPT_PEER_CRED:
6059 case TL_OPT_PEER_UCRED:
6060 /*
6061 * option not supposed to retrieved directly
6062 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6063 * when some internal flags set by other options
6064 * Direct retrieval always designed to fail(ignored)
6065 * for this option.
6066 */
6067 break;
6068 }
6069 }
6070 return (len);
6071 }
6072
6073 /* ARGSUSED */
6074 static int
tl_set_opt(queue_t * wq,uint_t mgmt_flags,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)6075 tl_set_opt(
6076 queue_t *wq,
6077 uint_t mgmt_flags,
6078 int level,
6079 int name,
6080 uint_t inlen,
6081 uchar_t *invalp,
6082 uint_t *outlenp,
6083 uchar_t *outvalp,
6084 void *thisdg_attrs,
6085 cred_t *cr)
6086 {
6087 int error;
6088 tl_endpt_t *tep;
6089
6090 tep = (tl_endpt_t *)wq->q_ptr;
6091
6092 error = 0; /* NOERROR */
6093
6094 /*
6095 * Assumes: option level and name sanity checks done elsewhere
6096 */
6097
6098 switch (level) {
6099 case SOL_SOCKET:
6100 if (! IS_SOCKET(tep)) {
6101 error = EINVAL;
6102 break;
6103 }
6104 /*
6105 * TBD: fill in other AF_UNIX socket options and then stop
6106 * returning error.
6107 */
6108 switch (name) {
6109 case SO_RECVUCRED:
6110 /*
6111 * We only support this for datagram sockets;
6112 * getpeerucred handles the connection oriented
6113 * transports.
6114 */
6115 if (! IS_CLTS(tep)) {
6116 error = EINVAL;
6117 break;
6118 }
6119 if (*(int *)invalp == 0)
6120 tep->te_flag &= ~TL_SOCKUCRED;
6121 else
6122 tep->te_flag |= TL_SOCKUCRED;
6123 break;
6124 default:
6125 error = EINVAL;
6126 break;
6127 }
6128 break;
6129 case TL_PROT_LEVEL:
6130 switch (name) {
6131 case TL_OPT_PEER_CRED:
6132 case TL_OPT_PEER_UCRED:
6133 /*
6134 * option not supposed to be set directly
6135 * Its value in initialized for each endpoint at
6136 * driver open time.
6137 * Direct setting always designed to fail for this
6138 * option.
6139 */
6140 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6141 SL_TRACE|SL_ERROR,
6142 "tl_set_opt: option is not supported"));
6143 error = EPROTO;
6144 break;
6145 }
6146 }
6147 return (error);
6148 }
6149
6150
6151 static void
tl_timer(void * arg)6152 tl_timer(void *arg)
6153 {
6154 queue_t *wq = arg;
6155 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6156
6157 ASSERT(tep);
6158
6159 tep->te_timoutid = 0;
6160
6161 enableok(wq);
6162 /*
6163 * Note: can call wsrv directly here and save context switch
6164 * Consider change when qtimeout (not timeout) is active
6165 */
6166 qenable(wq);
6167 }
6168
6169 static void
tl_buffer(void * arg)6170 tl_buffer(void *arg)
6171 {
6172 queue_t *wq = arg;
6173 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6174
6175 ASSERT(tep);
6176
6177 tep->te_bufcid = 0;
6178 tep->te_nowsrv = B_FALSE;
6179
6180 enableok(wq);
6181 /*
6182 * Note: can call wsrv directly here and save context switch
6183 * Consider change when qbufcall (not bufcall) is active
6184 */
6185 qenable(wq);
6186 }
6187
6188 static void
tl_memrecover(queue_t * wq,mblk_t * mp,size_t size)6189 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6190 {
6191 tl_endpt_t *tep;
6192
6193 tep = (tl_endpt_t *)wq->q_ptr;
6194
6195 if (tep->te_closing) {
6196 freemsg(mp);
6197 return;
6198 }
6199 noenable(wq);
6200
6201 (void) insq(wq, wq->q_first, mp);
6202
6203 if (tep->te_bufcid || tep->te_timoutid) {
6204 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6205 "tl_memrecover:recover %p pending", (void *)wq));
6206 return;
6207 }
6208
6209 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6210 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6211 drv_usectohz(TL_BUFWAIT));
6212 }
6213 }
6214
6215 static void
tl_freetip(tl_endpt_t * tep,tl_icon_t * tip)6216 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6217 {
6218 ASSERT(tip->ti_seqno != 0);
6219
6220 if (tip->ti_mp != NULL) {
6221 tl_icon_freemsgs(&tip->ti_mp);
6222 tip->ti_mp = NULL;
6223 }
6224 if (tip->ti_tep != NULL) {
6225 tl_refrele(tip->ti_tep);
6226 tip->ti_tep = NULL;
6227 }
6228 list_remove(&tep->te_iconp, tip);
6229 kmem_free(tip, sizeof (tl_icon_t));
6230 tep->te_nicon--;
6231 }
6232
6233 /*
6234 * Remove address from address hash.
6235 */
6236 static void
tl_addr_unbind(tl_endpt_t * tep)6237 tl_addr_unbind(tl_endpt_t *tep)
6238 {
6239 tl_endpt_t *elp;
6240
6241 if (tep->te_flag & TL_ADDRHASHED) {
6242 if (IS_SOCKET(tep)) {
6243 (void) mod_hash_remove(tep->te_addrhash,
6244 (mod_hash_key_t)tep->te_vp,
6245 (mod_hash_val_t *)&elp);
6246 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6247 tep->te_magic = SOU_MAGIC_IMPLICIT;
6248 } else {
6249 (void) mod_hash_remove(tep->te_addrhash,
6250 (mod_hash_key_t)&tep->te_ap,
6251 (mod_hash_val_t *)&elp);
6252 (void) kmem_free(tep->te_abuf, tep->te_alen);
6253 tep->te_alen = -1;
6254 tep->te_abuf = NULL;
6255 }
6256 tep->te_flag &= ~TL_ADDRHASHED;
6257 }
6258 }
6259