1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 * Copyright 2020 Joyent, Inc.
29 */
30
31 /*
32 * Multithreaded STREAMS Local Transport Provider.
33 *
34 * OVERVIEW
35 * ========
36 *
37 * This driver provides TLI as well as socket semantics. It provides
38 * connectionless, connection oriented, and connection oriented with orderly
39 * release transports for TLI and sockets. Each transport type has separate name
40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41 * this removes any name space conflicts when binding to socket style transport
42 * addresses.
43 *
44 * NOTE: There is one exception: Socket ticots and ticotsord transports share
45 * the same namespace. In fact, sockets always use ticotsord type transport.
46 *
47 * The driver mode is specified during open() by the minor number used for
48 * open.
49 *
50 * The sockets in addition have the following semantic differences:
51 * No support for passing up credentials (TL_SET[U]CRED).
52 *
53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55 * T_OPTDATA_IND.
56 *
57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58 * a T_CONN_RES is received from the acceptor. This means that a socket
59 * connect will complete before the peer has called accept.
60 *
61 *
62 * MULTITHREADING
63 * ==============
64 *
65 * The driver does not use STREAMS protection mechanisms. Instead it uses a
66 * generic "serializer" abstraction. Most of the operations are executed behind
67 * the serializer and are, essentially single-threaded. All functions executed
68 * behind the same serializer are strictly serialized. So if one thread calls
69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73 * same time.
74 *
75 * Connectionless transport use a single serializer per transport type (one for
76 * TLI and one for sockets. Connection-oriented transports use finer-grained
77 * serializers.
78 *
79 * All COTS-type endpoints start their life with private serializers. During
80 * connection request processing the endpoint serializer is switched to the
81 * listener's serializer and the rest of T_CONN_REQ processing is done on the
82 * listener serializer. During T_CONN_RES processing the eager serializer is
83 * switched from listener to acceptor serializer and after that point all
84 * processing for eager and acceptor happens on this serializer. To avoid races
85 * with endpoint closes while its serializer may be changing closes are blocked
86 * while serializers are manipulated.
87 *
88 * References accounting
89 * ---------------------
90 *
91 * Endpoints are reference counted and freed when the last reference is
92 * dropped. Functions within the serializer may access an endpoint state even
93 * after an endpoint closed. The te_closing being set on the endpoint indicates
94 * that the endpoint entered its close routine.
95 *
96 * One reference is held for each opened endpoint instance. The reference
97 * counter is incremented when the endpoint is linked to another endpoint and
98 * decremented when the link disappears. It is also incremented when the
99 * endpoint is found by the hash table lookup. This increment is atomic with the
100 * lookup itself and happens while the hash table read lock is held.
101 *
102 * Close synchronization
103 * ---------------------
104 *
105 * During close the endpoint as marked as closing using te_closing flag. It is
106 * usually enough to check for te_closing flag since all other state changes
107 * happen after this flag is set and the close entered serializer. Immediately
108 * after setting te_closing flag tl_close() enters serializer and waits until
109 * the callback finishes. This allows all functions called within serializer to
110 * simply check te_closing without any locks.
111 *
112 * Serializer management.
113 * ---------------------
114 *
115 * For COTS transports serializers are created when the endpoint is constructed
116 * and destroyed when the endpoint is destructed. CLTS transports use global
117 * serializers - one for sockets and one for TLI.
118 *
119 * COTS serializers have separate reference counts to deal with several
120 * endpoints sharing the same serializer. There is a subtle problem related to
121 * the serializer destruction. The serializer should never be destroyed by any
122 * function executed inside serializer. This means that close has to wait till
123 * all serializer activity for this endpoint is finished before it can drop the
124 * last reference on the endpoint (which may as well free the serializer). This
125 * is only relevant for COTS transports which manage serializers
126 * dynamically. For CLTS transports close may complete without waiting for all
127 * serializer activity to finish since serializer is only destroyed at driver
128 * detach time.
129 *
130 * COTS endpoints keep track of the number of outstanding requests on the
131 * serializer for the endpoint. The code handling accept() avoids changing
132 * client serializer if it has any pending messages on the serializer and
133 * instead moves acceptor to listener's serializer.
134 *
135 *
136 * Use of hash tables
137 * ------------------
138 *
139 * The driver uses modhash hash table implementation. Each transport uses two
140 * hash tables - one for finding endpoints by acceptor ID and another one for
141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142 * pair of hash tables since sockets only use TICOTSORD.
143 *
144 * All hash tables lookups increment a reference count for returned endpoints,
145 * so we may safely check the endpoint state even when the endpoint is removed
146 * from the hash by another thread immediately after it is found.
147 *
148 *
149 * CLOSE processing
150 * ================
151 *
152 * The driver enters serializer twice on close(). The close sequence is the
153 * following:
154 *
155 * 1) Wait until closing is safe (te_closewait becomes zero)
156 * This step is needed to prevent close during serializer switches. In most
157 * cases (close happening after connection establishment) te_closewait is
158 * zero.
159 * 1) Set te_closing.
160 * 2) Call tl_close_ser() within serializer and wait for it to complete.
161 *
162 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
163 * It also needs to clear write-side q_next pointers - this should be done
164 * before qprocsoff().
165 *
166 * This synchronous serializer entry during close is needed to ensure that
167 * the queue is valid everywhere inside the serializer.
168 *
169 * Note that in many cases close will execute tl_close_ser() synchronously,
170 * so it will not wait at all.
171 *
172 * 3) Calls qprocsoff().
173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174 * complete (for COTS transports). For CLTS transport there is no wait.
175 *
176 * tl_close_finish_ser() Finishes the close process and wakes up waiting
177 * close if there is any.
178 *
179 * Note that in most cases close will enter te_close_ser_finish()
180 * synchronously and will not wait at all.
181 *
182 *
183 * Flow Control
184 * ============
185 *
186 * The driver implements both read and write side service routines. No one calls
187 * putq() on the read queue. The read side service routine tl_rsrv() is called
188 * when the read side stream is back-enabled. It enters serializer synchronously
189 * (waits till serializer processing is complete). Within serializer it
190 * back-enables all endpoints blocked by the queue for connection-less
191 * transports and enables write side service processing for the peer for
192 * connection-oriented transports.
193 *
194 * Read and write side service routines use special mblk_sized space in the
195 * endpoint structure to enter perimeter.
196 *
197 * Write-side flow control
198 * -----------------------
199 *
200 * Write side flow control is a bit tricky. The driver needs to deal with two
201 * message queues - the explicit STREAMS message queue maintained by
202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203 * queues should be synchronized to preserve message ordering and should
204 * maintain a single order determined by the order in which messages enter
205 * tl_wput(). In order to maintain the ordering between these two queues the
206 * STREAMS queue is only manipulated within the serializer, so the ordering is
207 * provided by the serializer.
208 *
209 * Functions called from the tl_wsrv() sometimes may call putbq(). To
210 * immediately stop any further processing of the STREAMS message queues the
211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212 * side service processing stops when the flag is set.
213 *
214 * The tl_wsrv() function enters serializer synchronously and waits for it to
215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218 * always bounded by the amount of messages on the STREAMS queue at the time
219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220 * queue from another serialized entry which can't happen in parallel. This
221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222 * of it draining forever while writer places new messages on the STREAMS
223 * queue).
224 *
225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226 *
227 *
228 * Unix Domain Sockets
229 * ===================
230 *
231 * The driver knows the structure of Unix Domain sockets addresses and treats
232 * them differently from generic TLI addresses. For sockets implicit binds are
233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234 * instead of using address length of zero. Explicit binds specify
235 * SOU_MAGIC_EXPLICIT as magic.
236 *
237 * For implicit binds we always use minor number as soua_vp part of the address
238 * and avoid any hash table lookups. This saves two hash tables lookups per
239 * anonymous bind.
240 *
241 * For explicit address we hash the vnode pointer instead of hashing the
242 * full-scale address+zone+length. Hashing by pointer is more efficient then
243 * hashing by the full address.
244 *
245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246 * tep structure, so it should be never freed.
247 *
248 * Also for sockets the driver always uses minor number as acceptor id.
249 *
250 * TPI VIOLATIONS
251 * --------------
252 *
253 * This driver violates TPI in several respects for Unix Domain Sockets:
254 *
255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256 * is requested and the endpoint is already in use. There is no point in
257 * generating an unused address since this address will be rejected by
258 * sockfs anyway. For implicit binds it always generates a new address
259 * (sets soua_vp to its minor number).
260 *
261 * 2) It always uses minor number as acceptor ID and never uses queue
262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263 * message and they do not use the queue pointer.
264 *
265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266 * followed by listen(). The listen() should be issued with non-zero
267 * backlog, so sotpi_listen() issues unbind request followed by bind
268 * request to the same address but with a non-zero qlen value. Both
269 * tl_bind() and tl_unbind() require write lock on the hash table to
270 * insert/remove the address. The driver does not remove the address from
271 * the hash for endpoints that are bound to the explicit address and have
272 * backlog of zero. During T_BIND_REQ processing if the address requested
273 * is equal to the address the endpoint already has it updates the backlog
274 * without reinserting the address in the hash table. This optimization
275 * avoids two hash table updates for each listener created. It always
276 * avoids the problem of a "stolen" address when another listener may use
277 * the same address between the unbind and bind and suddenly listen() fails
278 * because address is in use even though the bind() succeeded.
279 *
280 *
281 * CONNECTIONLESS TRANSPORTS
282 * =========================
283 *
284 * Connectionless transports all share the same serializer (one for TLI and one
285 * for Sockets). Functions executing behind serializer can check or modify state
286 * of any endpoint.
287 *
288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289 * te_lastep field. The next time X talks to some address A it checks whether A
290 * is the same as Y's address and if it is there is no need to lookup Y. If the
291 * address is different or the state of Y is not appropriate (e.g. closed or not
292 * idle) X does a lookup using tl_find_peer() and caches the new address.
293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294 * on the endpoint found.
295 *
296 * During close of endpoint Y it doesn't try to remove itself from other
297 * endpoints caches. They will detect that Y is gone and will search the peer
298 * endpoint again.
299 *
300 * Flow Control Handling.
301 * ----------------------
302 *
303 * Each connectionless endpoint keeps a list of endpoints which are
304 * flow-controlled by its queue. It also keeps a pointer to the queue which
305 * flow-controls itself. Whenever flow control releases for endpoint X it
306 * enables all queues from the list. During close it also back-enables everyone
307 * in the list. If X is flow-controlled when it is closing it removes it from
308 * the peers list.
309 *
310 * DATA STRUCTURES
311 * ===============
312 *
313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314 * endpoint state. For connection-oriented transports it has a keeps a list
315 * of pending connections (tl_icon_t). For connectionless transports it keeps a
316 * list of endpoints flow controlled by this one.
317 *
318 * Each transport type is represented by a per-transport data structure
319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320 * endpoint address hash tables for each transport. It also contains pointer to
321 * transport serializer for connectionless transports.
322 *
323 * Each endpoint keeps a link to its transport structure, so the code can find
324 * all per-transport information quickly.
325 */
326
327 #include <sys/types.h>
328 #include <sys/inttypes.h>
329 #include <sys/stream.h>
330 #include <sys/stropts.h>
331 #define _SUN_TPI_VERSION 2
332 #include <sys/tihdr.h>
333 #include <sys/strlog.h>
334 #include <sys/debug.h>
335 #include <sys/cred.h>
336 #include <sys/errno.h>
337 #include <sys/kmem.h>
338 #include <sys/id_space.h>
339 #include <sys/modhash.h>
340 #include <sys/mkdev.h>
341 #include <sys/tl.h>
342 #include <sys/stat.h>
343 #include <sys/conf.h>
344 #include <sys/modctl.h>
345 #include <sys/strsun.h>
346 #include <sys/socket.h>
347 #include <sys/socketvar.h>
348 #include <sys/sysmacros.h>
349 #include <sys/xti_xtiopt.h>
350 #include <sys/ddi.h>
351 #include <sys/sunddi.h>
352 #include <sys/zone.h>
353 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
354 #include <inet/optcom.h>
355 #include <sys/strsubr.h>
356 #include <sys/ucred.h>
357 #include <sys/suntpi.h>
358 #include <sys/list.h>
359 #include <sys/serializer.h>
360
361 /*
362 * TBD List
363 * 14 Eliminate state changes through table
364 * 16. AF_UNIX socket options
365 * 17. connect() for ticlts
366 * 18. support for "netstat" to show AF_UNIX plus TLI local
367 * transport connections
368 * 21. sanity check to flushing on sending M_ERROR
369 */
370
371 /*
372 * CONSTANT DECLARATIONS
373 * --------------------
374 */
375
376 /*
377 * Local declarations
378 */
379 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
380 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
381 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
382 /*
383 * Hash tables size.
384 */
385 #define TL_HASH_SIZE 311
386
387 /*
388 * Definitions for module_info
389 */
390 #define TL_ID (104) /* module ID number */
391 #define TL_NAME "tl" /* module name */
392 #define TL_MINPSZ (0) /* min packet size */
393 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
394 #define TL_HIWAT (16*1024) /* hi water mark */
395 #define TL_LOWAT (256) /* lo water mark */
396 /*
397 * Definition of minor numbers/modes for new transport provider modes.
398 * We view the socket use as a separate mode to get a separate name space.
399 */
400 #define TL_TICOTS 0 /* connection oriented transport */
401 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
402 #define TL_TICLTS 2 /* connectionless transport */
403 #define TL_UNUSED 3
404 #define TL_SOCKET 4 /* Socket */
405 #define TL_SOCK_COTS (TL_SOCKET | TL_TICOTS)
406 #define TL_SOCK_COTSORD (TL_SOCKET | TL_TICOTSORD)
407 #define TL_SOCK_CLTS (TL_SOCKET | TL_TICLTS)
408
409 #define TL_MINOR_MASK 0x7
410 #define TL_MINOR_START (TL_TICLTS + 1)
411
412 /*
413 * LOCAL MACROS
414 */
415 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
416
417 /*
418 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
419 */
420 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
421 static int tl_close(queue_t *, int, cred_t *);
422 static int tl_wput(queue_t *, mblk_t *);
423 static int tl_wsrv(queue_t *);
424 static int tl_rsrv(queue_t *);
425
426 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
427 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
428 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
429
430
431 /*
432 * GLOBAL DATA STRUCTURES AND VARIABLES
433 * -----------------------------------
434 */
435
436 /*
437 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
438 * For now, we only manage the SO_RECVUCRED option but we also have
439 * harmless dummy options to make things work with some common code we access.
440 */
441 opdes_t tl_opt_arr[] = {
442 /* The SO_TYPE is needed for the hack below */
443 {
444 SO_TYPE,
445 SOL_SOCKET,
446 OA_R,
447 OA_R,
448 OP_NP,
449 0,
450 sizeof (t_scalar_t),
451 0
452 },
453 {
454 SO_RECVUCRED,
455 SOL_SOCKET,
456 OA_RW,
457 OA_RW,
458 OP_NP,
459 0,
460 sizeof (int),
461 0
462 }
463 };
464
465 /*
466 * Table of all supported levels
467 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
468 * any supported options so we need this info separately.
469 *
470 * This is needed only for topmost tpi providers.
471 */
472 optlevel_t tl_valid_levels_arr[] = {
473 XTI_GENERIC,
474 SOL_SOCKET,
475 TL_PROT_LEVEL
476 };
477
478 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
479 /*
480 * Current upper bound on the amount of space needed to return all options.
481 * Additional options with data size of sizeof(long) are handled automatically.
482 * Others need hand job.
483 */
484 #define TL_MAX_OPT_BUF_LEN \
485 ((A_CNT(tl_opt_arr) << 2) + \
486 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
487 + 64 + sizeof (struct T_optmgmt_ack))
488
489 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
490
491 /*
492 * transport addr structure
493 */
494 typedef struct tl_addr {
495 zoneid_t ta_zoneid; /* Zone scope of address */
496 t_scalar_t ta_alen; /* length of abuf */
497 void *ta_abuf; /* the addr itself */
498 } tl_addr_t;
499
500 /*
501 * Refcounted version of serializer.
502 */
503 typedef struct tl_serializer {
504 uint_t ts_refcnt;
505 serializer_t *ts_serializer;
506 } tl_serializer_t;
507
508 /*
509 * Each transport type has a separate state.
510 * Per-transport state.
511 */
512 typedef struct tl_transport_state {
513 char *tr_name;
514 minor_t tr_minor;
515 uint32_t tr_defaddr;
516 mod_hash_t *tr_ai_hash;
517 mod_hash_t *tr_addr_hash;
518 tl_serializer_t *tr_serializer;
519 } tl_transport_state_t;
520
521 #define TL_DFADDR 0x1000
522
523 static tl_transport_state_t tl_transports[] = {
524 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
525 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
526 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
527 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
528 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
529 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
530 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
531 };
532
533 #define TL_MAXTRANSPORT A_CNT(tl_transports)
534
535 struct tl_endpt;
536 typedef struct tl_endpt tl_endpt_t;
537
538 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
539
540 /*
541 * Data structure used to represent pending connects.
542 * Records enough information so that the connecting peer can close
543 * before the connection gets accepted.
544 */
545 typedef struct tl_icon {
546 list_node_t ti_node;
547 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
548 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
549 t_scalar_t ti_seqno; /* Sequence number */
550 } tl_icon_t;
551
552 typedef struct so_ux_addr soux_addr_t;
553 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
554
555 /*
556 * Maximum number of unaccepted connection indications allowed per listener.
557 */
558 #define TL_MAXQLEN 4096
559 int tl_maxqlen = TL_MAXQLEN;
560
561 /*
562 * transport endpoint structure
563 */
564 struct tl_endpt {
565 queue_t *te_rq; /* stream read queue */
566 queue_t *te_wq; /* stream write queue */
567 uint32_t te_refcnt;
568 int32_t te_state; /* TPI state of endpoint */
569 minor_t te_minor; /* minor number */
570 #define te_seqno te_minor
571 uint_t te_flag; /* flag field */
572 boolean_t te_nowsrv;
573 tl_serializer_t *te_ser; /* Serializer to use */
574 #define te_serializer te_ser->ts_serializer
575
576 soux_addr_t te_uxaddr; /* Socket address */
577 #define te_magic te_uxaddr.soua_magic
578 #define te_vp te_uxaddr.soua_vp
579 tl_addr_t te_ap; /* addr bound to this endpt */
580 #define te_zoneid te_ap.ta_zoneid
581 #define te_alen te_ap.ta_alen
582 #define te_abuf te_ap.ta_abuf
583
584 tl_transport_state_t *te_transport;
585 #define te_addrhash te_transport->tr_addr_hash
586 #define te_aihash te_transport->tr_ai_hash
587 #define te_defaddr te_transport->tr_defaddr
588 cred_t *te_credp; /* endpoint user credentials */
589 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
590
591 /*
592 * State specific for connection-oriented and connectionless transports.
593 */
594 union {
595 /* Connection-oriented state. */
596 struct {
597 t_uscalar_t _te_nicon; /* count of conn requests */
598 t_uscalar_t _te_qlen; /* max conn requests */
599 tl_endpt_t *_te_oconp; /* conn request pending */
600 tl_endpt_t *_te_conp; /* connected endpt */
601 #ifndef _ILP32
602 void *_te_pad;
603 #endif
604 list_t _te_iconp; /* list of conn ind. pending */
605 } _te_cots_state;
606 /* Connection-less state. */
607 struct {
608 tl_endpt_t *_te_lastep; /* last dest. endpoint */
609 tl_endpt_t *_te_flowq; /* flow controlled on whom */
610 list_node_t _te_flows; /* lists of connections */
611 list_t _te_flowlist; /* Who flowcontrols on me */
612 } _te_clts_state;
613 } _te_transport_state;
614 #define te_nicon _te_transport_state._te_cots_state._te_nicon
615 #define te_qlen _te_transport_state._te_cots_state._te_qlen
616 #define te_oconp _te_transport_state._te_cots_state._te_oconp
617 #define te_conp _te_transport_state._te_cots_state._te_conp
618 #define te_iconp _te_transport_state._te_cots_state._te_iconp
619 #define te_lastep _te_transport_state._te_clts_state._te_lastep
620 #define te_flowq _te_transport_state._te_clts_state._te_flowq
621 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
622 #define te_flows _te_transport_state._te_clts_state._te_flows
623
624 bufcall_id_t te_bufcid; /* outstanding bufcall id */
625 timeout_id_t te_timoutid; /* outstanding timeout id */
626 pid_t te_cpid; /* cached pid of endpoint */
627 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
628 /*
629 * Pieces of the endpoint state needed for closing.
630 */
631 kmutex_t te_closelock;
632 kcondvar_t te_closecv;
633 uint8_t te_closing; /* The endpoint started closing */
634 uint8_t te_closewait; /* Wait in close until zero */
635 mblk_t te_closemp; /* for entering serializer on close */
636 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
637 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
638 kmutex_t te_srv_lock;
639 kcondvar_t te_srv_cv;
640 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
641 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
642 /*
643 * Pieces of the endpoint state needed for serializer transitions.
644 */
645 kmutex_t te_ser_lock; /* Protects the count below */
646 uint_t te_ser_count; /* Number of messages on serializer */
647 };
648
649 /*
650 * Flag values. Lower 4 bits specify that transport used.
651 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
652 * they allow to identify the endpoint more easily.
653 */
654 #define TL_LISTENER 0x00010 /* the listener endpoint */
655 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
656 #define TL_EAGER 0x00040 /* connecting endpoint */
657 #define TL_ACCEPTED 0x00080 /* accepted connection */
658 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
659 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
660 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
661 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
662 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
663 /*
664 * Boolean checks for the endpoint type.
665 */
666 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
667 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
668 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
669 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
670
671 /*
672 * Certain operations are always used together. These macros reduce the chance
673 * of missing a part of a combination.
674 */
675 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
676 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
677
678 #define TL_PUTBQ(x, mp) { \
679 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
680 (x)->te_nowsrv = B_TRUE; \
681 (void) putbq((x)->te_wq, mp); \
682 }
683
684 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
685 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
686
687 /*
688 * STREAMS driver glue data structures.
689 */
690 static struct module_info tl_minfo = {
691 TL_ID, /* mi_idnum */
692 TL_NAME, /* mi_idname */
693 TL_MINPSZ, /* mi_minpsz */
694 TL_MAXPSZ, /* mi_maxpsz */
695 TL_HIWAT, /* mi_hiwat */
696 TL_LOWAT /* mi_lowat */
697 };
698
699 static struct qinit tl_rinit = {
700 NULL, /* qi_putp */
701 tl_rsrv, /* qi_srvp */
702 tl_open, /* qi_qopen */
703 tl_close, /* qi_qclose */
704 NULL, /* qi_qadmin */
705 &tl_minfo, /* qi_minfo */
706 NULL /* qi_mstat */
707 };
708
709 static struct qinit tl_winit = {
710 tl_wput, /* qi_putp */
711 tl_wsrv, /* qi_srvp */
712 NULL, /* qi_qopen */
713 NULL, /* qi_qclose */
714 NULL, /* qi_qadmin */
715 &tl_minfo, /* qi_minfo */
716 NULL /* qi_mstat */
717 };
718
719 static struct streamtab tlinfo = {
720 &tl_rinit, /* st_rdinit */
721 &tl_winit, /* st_wrinit */
722 NULL, /* st_muxrinit */
723 NULL /* st_muxwrinit */
724 };
725
726 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
727 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
728
729 static struct modldrv modldrv = {
730 &mod_driverops, /* Type of module -- pseudo driver here */
731 "TPI Local Transport (tl)",
732 &tl_devops, /* driver ops */
733 };
734
735 /*
736 * Module linkage information for the kernel.
737 */
738 static struct modlinkage modlinkage = {
739 MODREV_1,
740 &modldrv,
741 NULL
742 };
743
744 /*
745 * Templates for response to info request
746 * Check sanity of unlimited connect data etc.
747 */
748
749 #define TL_CLTS_PROVIDER_FLAG (XPG4_1 | SENDZERO)
750 #define TL_COTS_PROVIDER_FLAG (XPG4_1 | SENDZERO)
751
752 static struct T_info_ack tl_cots_info_ack =
753 {
754 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
755 T_INFINITE, /* TSDU size */
756 T_INFINITE, /* ETSDU size */
757 T_INFINITE, /* CDATA_size */
758 T_INFINITE, /* DDATA_size */
759 T_INFINITE, /* ADDR_size */
760 T_INFINITE, /* OPT_size */
761 0, /* TIDU_size - fill at run time */
762 T_COTS, /* SERV_type */
763 -1, /* CURRENT_state */
764 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
765 };
766
767 static struct T_info_ack tl_clts_info_ack =
768 {
769 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
770 0, /* TSDU_size - fill at run time */
771 -2, /* ETSDU_size -2 => not supported */
772 -2, /* CDATA_size -2 => not supported */
773 -2, /* DDATA_size -2 => not supported */
774 -1, /* ADDR_size -1 => infinite */
775 -1, /* OPT_size */
776 0, /* TIDU_size - fill at run time */
777 T_CLTS, /* SERV_type */
778 -1, /* CURRENT_state */
779 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
780 };
781
782 /*
783 * private copy of devinfo pointer used in tl_info
784 */
785 static dev_info_t *tl_dip;
786
787 /*
788 * Endpoints cache.
789 */
790 static kmem_cache_t *tl_cache;
791 /*
792 * Minor number space.
793 */
794 static id_space_t *tl_minors;
795
796 /*
797 * Default Data Unit size.
798 */
799 static t_scalar_t tl_tidusz;
800
801 /*
802 * Size of hash tables.
803 */
804 static size_t tl_hash_size = TL_HASH_SIZE;
805
806 /*
807 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
808 * for sockets.
809 */
810 static int tl_disable_early_connect = 0;
811 static int tl_client_closing_when_accepting;
812
813 static int tl_serializer_noswitch;
814
815 #define nr 127 /* not reachable */
816
817 #define TE_NOEVENTS 28
818
819 static char nextstate[TE_NOEVENTS][TS_NOSTATES] = {
820 /* STATES */
821 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 */
822
823 /* Initialization events */
824
825 #define TE_BIND_REQ 0 /* bind request */
826 { 1, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
827 #define TE_UNBIND_REQ 1 /* unbind request */
828 {nr, nr, nr, 2, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
829 #define TE_OPTMGMT_REQ 2 /* manage options req */
830 {nr, nr, nr, 4, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
831 #define TE_BIND_ACK 3 /* bind acknowledment */
832 {nr, 3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
833 #define TE_OPTMGMT_ACK 4 /* manage options ack */
834 {nr, nr, nr, nr, 3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
835 #define TE_ERROR_ACK 5 /* error acknowledgment */
836 {nr, 0, 3, nr, 3, 3, nr, nr, 7, nr, nr, nr, 6, 7, 9, 10, 11},
837 #define TE_OK_ACK1 6 /* ok ack seqcnt == 0 */
838 {nr, nr, 0, nr, nr, 6, nr, nr, nr, nr, nr, nr, 3, nr, 3, 3, 3},
839 #define TE_OK_ACK2 7 /* ok ack seqcnt == 1, q == resq */
840 {nr, nr, nr, nr, nr, nr, nr, nr, 9, nr, nr, nr, nr, 3, nr, nr, nr},
841 #define TE_OK_ACK3 8 /* ok ack seqcnt == 1, q != resq */
842 {nr, nr, nr, nr, nr, nr, nr, nr, 3, nr, nr, nr, nr, 3, nr, nr, nr},
843 #define TE_OK_ACK4 9 /* ok ack seqcnt > 1 */
844 {nr, nr, nr, nr, nr, nr, nr, nr, 7, nr, nr, nr, nr, 7, nr, nr, nr},
845
846 /* Connection oriented events */
847 #define TE_CONN_REQ 10 /* connection request */
848 {nr, nr, nr, 5, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
849 #define TE_CONN_RES 11 /* connection response */
850 {nr, nr, nr, nr, nr, nr, nr, 8, nr, nr, nr, nr, nr, nr, nr, nr, nr},
851 #define TE_DISCON_REQ 12 /* disconnect request */
852 {nr, nr, nr, nr, nr, nr, 12, 13, nr, 14, 15, 16, nr, nr, nr, nr, nr},
853 #define TE_DATA_REQ 13 /* data request */
854 {nr, nr, nr, nr, nr, nr, nr, nr, nr, 9, nr, 11, nr, nr, nr, nr, nr},
855 #define TE_EXDATA_REQ 14 /* expedited data request */
856 {nr, nr, nr, nr, nr, nr, nr, nr, nr, 9, nr, 11, nr, nr, nr, nr, nr},
857 #define TE_ORDREL_REQ 15 /* orderly release req */
858 {nr, nr, nr, nr, nr, nr, nr, nr, nr, 10, nr, 3, nr, nr, nr, nr, nr},
859 #define TE_CONN_IND 16 /* connection indication */
860 {nr, nr, nr, 7, nr, nr, nr, 7, nr, nr, nr, nr, nr, nr, nr, nr, nr},
861 #define TE_CONN_CON 17 /* connection confirmation */
862 {nr, nr, nr, nr, nr, nr, 9, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
863 #define TE_DATA_IND 18 /* data indication */
864 {nr, nr, nr, nr, nr, nr, nr, nr, nr, 9, 10, nr, nr, nr, nr, nr, nr},
865 #define TE_EXDATA_IND 19 /* expedited data indication */
866 {nr, nr, nr, nr, nr, nr, nr, nr, nr, 9, 10, nr, nr, nr, nr, nr, nr},
867 #define TE_ORDREL_IND 20 /* orderly release ind */
868 {nr, nr, nr, nr, nr, nr, nr, nr, nr, 11, 3, nr, nr, nr, nr, nr, nr},
869 #define TE_DISCON_IND1 21 /* disconnect indication seq == 0 */
870 {nr, nr, nr, nr, nr, nr, 3, nr, nr, 3, 3, 3, nr, nr, nr, nr, nr},
871 #define TE_DISCON_IND2 22 /* disconnect indication seq == 1 */
872 {nr, nr, nr, nr, nr, nr, nr, 3, nr, nr, nr, nr, nr, nr, nr, nr, nr},
873 #define TE_DISCON_IND3 23 /* disconnect indication seq > 1 */
874 {nr, nr, nr, nr, nr, nr, nr, 7, nr, nr, nr, nr, nr, nr, nr, nr, nr},
875 #define TE_PASS_CONN 24 /* pass connection */
876 {nr, nr, nr, 9, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
877
878
879 /* Unit data events */
880
881 #define TE_UNITDATA_REQ 25 /* unitdata request */
882 {nr, nr, nr, 3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
883 #define TE_UNITDATA_IND 26 /* unitdata indication */
884 {nr, nr, nr, 3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
885 #define TE_UDERROR_IND 27 /* unitdata error indication */
886 {nr, nr, nr, 3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
887 };
888
889
890
891 /*
892 * LOCAL FUNCTION PROTOTYPES
893 * -------------------------
894 */
895 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
896 static void tl_do_proto(mblk_t *, tl_endpt_t *);
897 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
898 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
899 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
900 t_scalar_t);
901 static void tl_bind(mblk_t *, tl_endpt_t *);
902 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
903 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
904 static void tl_unbind(mblk_t *, tl_endpt_t *);
905 static void tl_optmgmt(queue_t *, mblk_t *);
906 static void tl_conn_req(queue_t *, mblk_t *);
907 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
908 static void tl_conn_res(mblk_t *, tl_endpt_t *);
909 static void tl_discon_req(mblk_t *, tl_endpt_t *);
910 static void tl_capability_req(mblk_t *, tl_endpt_t *);
911 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
912 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
913 static void tl_info_req(mblk_t *, tl_endpt_t *);
914 static void tl_addr_req(mblk_t *, tl_endpt_t *);
915 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
916 static void tl_data(mblk_t *, tl_endpt_t *);
917 static void tl_exdata(mblk_t *, tl_endpt_t *);
918 static void tl_ordrel(mblk_t *, tl_endpt_t *);
919 static void tl_unitdata(mblk_t *, tl_endpt_t *);
920 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
921 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
922 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
923 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
924 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
925 static void tl_cl_backenable(tl_endpt_t *);
926 static void tl_co_unconnect(tl_endpt_t *);
927 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
928 static void tl_discon_ind(tl_endpt_t *, uint32_t);
929 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
930 static mblk_t *tl_ordrel_ind_alloc(void);
931 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
932 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
933 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
934 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
935 static void tl_icon_freemsgs(mblk_t **);
936 static void tl_merror(queue_t *, mblk_t *, int);
937 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
938 static int tl_default_opt(queue_t *, int, int, uchar_t *);
939 static int tl_get_opt(queue_t *, int, int, uchar_t *);
940 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
941 uchar_t *, void *, cred_t *);
942 static void tl_memrecover(queue_t *, mblk_t *, size_t);
943 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
944 static void tl_free(tl_endpt_t *);
945 static int tl_constructor(void *, void *, int);
946 static void tl_destructor(void *, void *);
947 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
948 static tl_serializer_t *tl_serializer_alloc(int);
949 static void tl_serializer_refhold(tl_serializer_t *);
950 static void tl_serializer_refrele(tl_serializer_t *);
951 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
952 static void tl_serializer_exit(tl_endpt_t *);
953 static boolean_t tl_noclose(tl_endpt_t *);
954 static void tl_closeok(tl_endpt_t *);
955 static void tl_refhold(tl_endpt_t *);
956 static void tl_refrele(tl_endpt_t *);
957 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
958 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
959 static void tl_close_ser(mblk_t *, tl_endpt_t *);
960 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
961 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
962 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
963 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
964 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
965 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
966 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
967 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
968 static void tl_addr_unbind(tl_endpt_t *);
969
970 /*
971 * Intialize option database object for TL
972 */
973
974 optdb_obj_t tl_opt_obj = {
975 tl_default_opt, /* TL default value function pointer */
976 tl_get_opt, /* TL get function pointer */
977 tl_set_opt, /* TL set function pointer */
978 TL_OPT_ARR_CNT, /* TL option database count of entries */
979 tl_opt_arr, /* TL option database */
980 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
981 tl_valid_levels_arr /* TL valid level array */
982 };
983
984 /*
985 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
986 * ---------------------------------------
987 */
988
989 /*
990 * Loadable module routines
991 */
992 int
_init(void)993 _init(void)
994 {
995 return (mod_install(&modlinkage));
996 }
997
998 int
_fini(void)999 _fini(void)
1000 {
1001 return (mod_remove(&modlinkage));
1002 }
1003
1004 int
_info(struct modinfo * modinfop)1005 _info(struct modinfo *modinfop)
1006 {
1007 return (mod_info(&modlinkage, modinfop));
1008 }
1009
1010 /*
1011 * Driver Entry Points and Other routines
1012 */
1013 static int
tl_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)1014 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1015 {
1016 int i;
1017 char name[32];
1018
1019 /*
1020 * Resume from a checkpoint state.
1021 */
1022 if (cmd == DDI_RESUME)
1023 return (DDI_SUCCESS);
1024
1025 if (cmd != DDI_ATTACH)
1026 return (DDI_FAILURE);
1027
1028 /*
1029 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
1030 * streams message sizes can be unlimited. We use a defined constant
1031 * instead.
1032 */
1033 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
1034
1035 /*
1036 * Create subdevices for each transport.
1037 */
1038 for (i = 0; i < TL_UNUSED; i++) {
1039 if (ddi_create_minor_node(devi,
1040 tl_transports[i].tr_name,
1041 S_IFCHR, tl_transports[i].tr_minor,
1042 DDI_PSEUDO, 0) == DDI_FAILURE) {
1043 ddi_remove_minor_node(devi, NULL);
1044 return (DDI_FAILURE);
1045 }
1046 }
1047
1048 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
1049 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
1050
1051 if (tl_cache == NULL) {
1052 ddi_remove_minor_node(devi, NULL);
1053 return (DDI_FAILURE);
1054 }
1055
1056 tl_minors = id_space_create("tl_minor_space",
1057 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1058
1059 /*
1060 * Create ID space for minor numbers
1061 */
1062 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1063 tl_transport_state_t *t = &tl_transports[i];
1064
1065 if (i == TL_UNUSED)
1066 continue;
1067
1068 /* Socket COTSORD shares namespace with COTS */
1069 if (i == TL_SOCK_COTSORD) {
1070 t->tr_ai_hash =
1071 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1072 ASSERT(t->tr_ai_hash != NULL);
1073 t->tr_addr_hash =
1074 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1075 ASSERT(t->tr_addr_hash != NULL);
1076 continue;
1077 }
1078
1079 /*
1080 * Create hash tables.
1081 */
1082 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1083 t->tr_name);
1084 #ifdef _ILP32
1085 if (i & TL_SOCKET)
1086 t->tr_ai_hash =
1087 mod_hash_create_idhash(name, tl_hash_size - 1,
1088 mod_hash_null_valdtor);
1089 else
1090 t->tr_ai_hash =
1091 mod_hash_create_ptrhash(name, tl_hash_size,
1092 mod_hash_null_valdtor, sizeof (queue_t));
1093 #else
1094 t->tr_ai_hash =
1095 mod_hash_create_idhash(name, tl_hash_size - 1,
1096 mod_hash_null_valdtor);
1097 #endif /* _ILP32 */
1098
1099 if (i & TL_SOCKET) {
1100 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1101 t->tr_name);
1102 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1103 tl_hash_size, mod_hash_null_valdtor,
1104 sizeof (uintptr_t));
1105 } else {
1106 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1107 t->tr_name);
1108 t->tr_addr_hash = mod_hash_create_extended(name,
1109 tl_hash_size, mod_hash_null_keydtor,
1110 mod_hash_null_valdtor,
1111 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1112 }
1113
1114 /* Create serializer for connectionless transports. */
1115 if (i & TL_TICLTS)
1116 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1117 }
1118
1119 tl_dip = devi;
1120
1121 return (DDI_SUCCESS);
1122 }
1123
1124 static int
tl_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)1125 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1126 {
1127 int i;
1128
1129 if (cmd == DDI_SUSPEND)
1130 return (DDI_SUCCESS);
1131
1132 if (cmd != DDI_DETACH)
1133 return (DDI_FAILURE);
1134
1135 /*
1136 * Destroy arenas and hash tables.
1137 */
1138 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1139 tl_transport_state_t *t = &tl_transports[i];
1140
1141 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1142 continue;
1143
1144 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1145 if (t->tr_serializer != NULL) {
1146 tl_serializer_refrele(t->tr_serializer);
1147 t->tr_serializer = NULL;
1148 }
1149
1150 #ifdef _ILP32
1151 if (i & TL_SOCKET)
1152 mod_hash_destroy_idhash(t->tr_ai_hash);
1153 else
1154 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1155 #else
1156 mod_hash_destroy_idhash(t->tr_ai_hash);
1157 #endif /* _ILP32 */
1158 t->tr_ai_hash = NULL;
1159 if (i & TL_SOCKET)
1160 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1161 else
1162 mod_hash_destroy_hash(t->tr_addr_hash);
1163 t->tr_addr_hash = NULL;
1164 }
1165
1166 kmem_cache_destroy(tl_cache);
1167 tl_cache = NULL;
1168 id_space_destroy(tl_minors);
1169 tl_minors = NULL;
1170 ddi_remove_minor_node(devi, NULL);
1171 return (DDI_SUCCESS);
1172 }
1173
1174 /* ARGSUSED */
1175 static int
tl_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1176 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1177 {
1178
1179 int retcode = DDI_FAILURE;
1180
1181 switch (infocmd) {
1182
1183 case DDI_INFO_DEVT2DEVINFO:
1184 if (tl_dip != NULL) {
1185 *result = (void *)tl_dip;
1186 retcode = DDI_SUCCESS;
1187 }
1188 break;
1189
1190 case DDI_INFO_DEVT2INSTANCE:
1191 *result = NULL;
1192 retcode = DDI_SUCCESS;
1193 break;
1194
1195 default:
1196 break;
1197 }
1198 return (retcode);
1199 }
1200
1201 /*
1202 * Endpoint reference management.
1203 */
1204 static void
tl_refhold(tl_endpt_t * tep)1205 tl_refhold(tl_endpt_t *tep)
1206 {
1207 atomic_inc_32(&tep->te_refcnt);
1208 }
1209
1210 static void
tl_refrele(tl_endpt_t * tep)1211 tl_refrele(tl_endpt_t *tep)
1212 {
1213 ASSERT(tep->te_refcnt != 0);
1214
1215 if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1216 tl_free(tep);
1217 }
1218
1219 /*ARGSUSED*/
1220 static int
tl_constructor(void * buf,void * cdrarg,int kmflags)1221 tl_constructor(void *buf, void *cdrarg, int kmflags)
1222 {
1223 tl_endpt_t *tep = buf;
1224
1225 bzero(tep, sizeof (tl_endpt_t));
1226 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1227 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1228 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1229 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1230 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1231
1232 return (0);
1233 }
1234
1235 /*ARGSUSED*/
1236 static void
tl_destructor(void * buf,void * cdrarg)1237 tl_destructor(void *buf, void *cdrarg)
1238 {
1239 tl_endpt_t *tep = buf;
1240
1241 mutex_destroy(&tep->te_closelock);
1242 cv_destroy(&tep->te_closecv);
1243 mutex_destroy(&tep->te_srv_lock);
1244 cv_destroy(&tep->te_srv_cv);
1245 mutex_destroy(&tep->te_ser_lock);
1246 }
1247
1248 static void
tl_free(tl_endpt_t * tep)1249 tl_free(tl_endpt_t *tep)
1250 {
1251 ASSERT(tep->te_refcnt == 0);
1252 ASSERT(tep->te_transport != NULL);
1253 ASSERT(tep->te_rq == NULL);
1254 ASSERT(tep->te_wq == NULL);
1255 ASSERT(tep->te_ser != NULL);
1256 ASSERT(tep->te_ser_count == 0);
1257 ASSERT(!(tep->te_flag & TL_ADDRHASHED));
1258
1259 if (IS_SOCKET(tep)) {
1260 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1261 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1262 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1263 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1264 } else if (tep->te_abuf != NULL) {
1265 kmem_free(tep->te_abuf, tep->te_alen);
1266 tep->te_alen = -1; /* uninitialized */
1267 tep->te_abuf = NULL;
1268 } else {
1269 ASSERT(tep->te_alen == -1);
1270 }
1271
1272 id_free(tl_minors, tep->te_minor);
1273 ASSERT(tep->te_credp == NULL);
1274
1275 if (tep->te_hash_hndl != NULL)
1276 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1277
1278 if (IS_COTS(tep)) {
1279 TL_REMOVE_PEER(tep->te_conp);
1280 TL_REMOVE_PEER(tep->te_oconp);
1281 tl_serializer_refrele(tep->te_ser);
1282 tep->te_ser = NULL;
1283 ASSERT(tep->te_nicon == 0);
1284 ASSERT(list_head(&tep->te_iconp) == NULL);
1285 } else {
1286 ASSERT(tep->te_lastep == NULL);
1287 ASSERT(list_head(&tep->te_flowlist) == NULL);
1288 ASSERT(tep->te_flowq == NULL);
1289 }
1290
1291 ASSERT(tep->te_bufcid == 0);
1292 ASSERT(tep->te_timoutid == 0);
1293 bzero(&tep->te_ap, sizeof (tep->te_ap));
1294 tep->te_acceptor_id = 0;
1295
1296 ASSERT(tep->te_closewait == 0);
1297 ASSERT(!tep->te_rsrv_active);
1298 ASSERT(!tep->te_wsrv_active);
1299 tep->te_closing = 0;
1300 tep->te_nowsrv = B_FALSE;
1301 tep->te_flag = 0;
1302
1303 kmem_cache_free(tl_cache, tep);
1304 }
1305
1306 /*
1307 * Allocate/free reference-counted wrappers for serializers.
1308 */
1309 static tl_serializer_t *
tl_serializer_alloc(int flags)1310 tl_serializer_alloc(int flags)
1311 {
1312 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1313 serializer_t *ser;
1314
1315 if (s == NULL)
1316 return (NULL);
1317
1318 ser = serializer_create(flags);
1319
1320 if (ser == NULL) {
1321 kmem_free(s, sizeof (tl_serializer_t));
1322 return (NULL);
1323 }
1324
1325 s->ts_refcnt = 1;
1326 s->ts_serializer = ser;
1327 return (s);
1328 }
1329
1330 static void
tl_serializer_refhold(tl_serializer_t * s)1331 tl_serializer_refhold(tl_serializer_t *s)
1332 {
1333 atomic_inc_32(&s->ts_refcnt);
1334 }
1335
1336 static void
tl_serializer_refrele(tl_serializer_t * s)1337 tl_serializer_refrele(tl_serializer_t *s)
1338 {
1339 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1340 serializer_destroy(s->ts_serializer);
1341 kmem_free(s, sizeof (tl_serializer_t));
1342 }
1343 }
1344
1345 /*
1346 * Post a request on the endpoint serializer. For COTS transports keep track of
1347 * the number of pending requests.
1348 */
1349 static void
tl_serializer_enter(tl_endpt_t * tep,tlproc_t tlproc,mblk_t * mp)1350 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1351 {
1352 if (IS_COTS(tep)) {
1353 mutex_enter(&tep->te_ser_lock);
1354 tep->te_ser_count++;
1355 mutex_exit(&tep->te_ser_lock);
1356 }
1357 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1358 }
1359
1360 /*
1361 * Complete processing the request on the serializer. Decrement the counter for
1362 * pending requests for COTS transports.
1363 */
1364 static void
tl_serializer_exit(tl_endpt_t * tep)1365 tl_serializer_exit(tl_endpt_t *tep)
1366 {
1367 if (IS_COTS(tep)) {
1368 mutex_enter(&tep->te_ser_lock);
1369 ASSERT(tep->te_ser_count != 0);
1370 tep->te_ser_count--;
1371 mutex_exit(&tep->te_ser_lock);
1372 }
1373 }
1374
1375 /*
1376 * Hash management functions.
1377 */
1378
1379 /*
1380 * Return TRUE if two addresses are equal, false otherwise.
1381 */
1382 static boolean_t
tl_eqaddr(tl_addr_t * ap1,tl_addr_t * ap2)1383 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1384 {
1385 return ((ap1->ta_alen > 0) &&
1386 (ap1->ta_alen == ap2->ta_alen) &&
1387 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1388 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1389 }
1390
1391 /*
1392 * This function is called whenever an endpoint is found in the hash table.
1393 */
1394 /* ARGSUSED0 */
1395 static void
tl_find_callback(mod_hash_key_t key,mod_hash_val_t val)1396 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1397 {
1398 tl_refhold((tl_endpt_t *)val);
1399 }
1400
1401 /*
1402 * Address hash function.
1403 */
1404 /* ARGSUSED */
1405 static uint_t
tl_hash_by_addr(void * hash_data,mod_hash_key_t key)1406 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1407 {
1408 tl_addr_t *ap = (tl_addr_t *)key;
1409 size_t len = ap->ta_alen;
1410 uchar_t *p = ap->ta_abuf;
1411 uint_t i, g;
1412
1413 ASSERT((len > 0) && (p != NULL));
1414
1415 for (i = ap->ta_zoneid; len -- != 0; p++) {
1416 i = (i << 4) + (*p);
1417 if ((g = (i & 0xf0000000U)) != 0) {
1418 i ^= (g >> 24);
1419 i ^= g;
1420 }
1421 }
1422 return (i);
1423 }
1424
1425 /*
1426 * This function is used by hash lookups. It compares two generic addresses.
1427 */
1428 static int
tl_hash_cmp_addr(mod_hash_key_t key1,mod_hash_key_t key2)1429 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1430 {
1431 #ifdef DEBUG
1432 tl_addr_t *ap1 = (tl_addr_t *)key1;
1433 tl_addr_t *ap2 = (tl_addr_t *)key2;
1434
1435 ASSERT(key1 != NULL);
1436 ASSERT(key2 != NULL);
1437
1438 ASSERT(ap1->ta_abuf != NULL);
1439 ASSERT(ap2->ta_abuf != NULL);
1440 ASSERT(ap1->ta_alen > 0);
1441 ASSERT(ap2->ta_alen > 0);
1442 #endif
1443
1444 return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1445 }
1446
1447 /*
1448 * Prevent endpoint from closing if possible.
1449 * Return B_TRUE on success, B_FALSE on failure.
1450 */
1451 static boolean_t
tl_noclose(tl_endpt_t * tep)1452 tl_noclose(tl_endpt_t *tep)
1453 {
1454 boolean_t rc = B_FALSE;
1455
1456 mutex_enter(&tep->te_closelock);
1457 if (!tep->te_closing) {
1458 ASSERT(tep->te_closewait == 0);
1459 tep->te_closewait++;
1460 rc = B_TRUE;
1461 }
1462 mutex_exit(&tep->te_closelock);
1463 return (rc);
1464 }
1465
1466 /*
1467 * Allow endpoint to close if needed.
1468 */
1469 static void
tl_closeok(tl_endpt_t * tep)1470 tl_closeok(tl_endpt_t *tep)
1471 {
1472 ASSERT(tep->te_closewait > 0);
1473 mutex_enter(&tep->te_closelock);
1474 ASSERT(tep->te_closewait == 1);
1475 tep->te_closewait--;
1476 cv_signal(&tep->te_closecv);
1477 mutex_exit(&tep->te_closelock);
1478 }
1479
1480 /*
1481 * STREAMS open entry point.
1482 */
1483 /* ARGSUSED */
1484 static int
tl_open(queue_t * rq,dev_t * devp,int oflag,int sflag,cred_t * credp)1485 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1486 {
1487 tl_endpt_t *tep;
1488 minor_t minor = getminor(*devp);
1489 id_t inst_minor;
1490
1491 /*
1492 * Driver is called directly. Both CLONEOPEN and MODOPEN
1493 * are illegal
1494 */
1495 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1496 return (ENXIO);
1497
1498 if (rq->q_ptr != NULL)
1499 return (0);
1500
1501 /* Minor number should specify the mode used for the driver. */
1502 if ((minor >= TL_UNUSED))
1503 return (ENXIO);
1504
1505 if (oflag & SO_SOCKSTR) {
1506 minor |= TL_SOCKET;
1507 }
1508
1509 /*
1510 * Attempt to allocate a unique minor number for this instance.
1511 * Avoid an uninterruptable sleep if none are available.
1512 */
1513 if ((inst_minor = id_alloc_nosleep(tl_minors)) == -1) {
1514 return (ENOMEM);
1515 }
1516
1517 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1518 tep->te_refcnt = 1;
1519 tep->te_cpid = curproc->p_pid;
1520 rq->q_ptr = WR(rq)->q_ptr = tep;
1521 tep->te_state = TS_UNBND;
1522 tep->te_credp = credp;
1523 crhold(credp);
1524 tep->te_zoneid = getzoneid();
1525
1526 tep->te_flag = minor & TL_MINOR_MASK;
1527 tep->te_transport = &tl_transports[minor];
1528 tep->te_minor = (minor_t)inst_minor;
1529
1530 /* Reserve hash handle for bind(). */
1531 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1532
1533 /* Transport-specific initialization */
1534 if (IS_COTS(tep)) {
1535 /* Use private serializer */
1536 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1537
1538 /* Create list for pending connections */
1539 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1540 offsetof(tl_icon_t, ti_node));
1541 tep->te_qlen = 0;
1542 tep->te_nicon = 0;
1543 tep->te_oconp = NULL;
1544 tep->te_conp = NULL;
1545 } else {
1546 /* Use shared serializer */
1547 tep->te_ser = tep->te_transport->tr_serializer;
1548 bzero(&tep->te_flows, sizeof (list_node_t));
1549 /* Create list for flow control */
1550 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1551 offsetof(tl_endpt_t, te_flows));
1552 tep->te_flowq = NULL;
1553 tep->te_lastep = NULL;
1554
1555 }
1556
1557 /* Initialize endpoint address */
1558 if (IS_SOCKET(tep)) {
1559 /* Socket-specific address handling. */
1560 tep->te_alen = TL_SOUX_ADDRLEN;
1561 tep->te_abuf = &tep->te_uxaddr;
1562 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1563 tep->te_magic = SOU_MAGIC_IMPLICIT;
1564 } else {
1565 tep->te_alen = -1;
1566 tep->te_abuf = NULL;
1567 }
1568
1569 /* clone the driver */
1570 *devp = makedevice(getmajor(*devp), tep->te_minor);
1571
1572 tep->te_rq = rq;
1573 tep->te_wq = WR(rq);
1574
1575 #ifdef _ILP32
1576 if (IS_SOCKET(tep))
1577 tep->te_acceptor_id = tep->te_minor;
1578 else
1579 tep->te_acceptor_id = (t_uscalar_t)rq;
1580 #else
1581 tep->te_acceptor_id = tep->te_minor;
1582 #endif /* _ILP32 */
1583
1584
1585 qprocson(rq);
1586
1587 /*
1588 * Insert acceptor ID in the hash. The AI hash always sleeps on
1589 * insertion so insertion can't fail.
1590 */
1591 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1592 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1593 (mod_hash_val_t)tep);
1594
1595 return (0);
1596 }
1597
1598 /* ARGSUSED1 */
1599 static int
tl_close(queue_t * rq,int flag,cred_t * credp)1600 tl_close(queue_t *rq, int flag, cred_t *credp)
1601 {
1602 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1603 tl_endpt_t *elp = NULL;
1604 queue_t *wq = tep->te_wq;
1605 int rc;
1606
1607 ASSERT(wq == WR(rq));
1608
1609 /*
1610 * Remove the endpoint from acceptor hash.
1611 */
1612 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1613 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1614 (mod_hash_val_t *)&elp);
1615 ASSERT(rc == 0 && tep == elp);
1616 if ((rc != 0) || (tep != elp)) {
1617 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1618 SL_TRACE | SL_ERROR,
1619 "tl_close:inconsistency in AI hash"));
1620 }
1621
1622 /*
1623 * Wait till close is safe, then mark endpoint as closing.
1624 */
1625 mutex_enter(&tep->te_closelock);
1626 while (tep->te_closewait)
1627 cv_wait(&tep->te_closecv, &tep->te_closelock);
1628 tep->te_closing = B_TRUE;
1629 /*
1630 * Will wait for the serializer part of the close to finish, so set
1631 * te_closewait now.
1632 */
1633 tep->te_closewait = 1;
1634 tep->te_nowsrv = B_FALSE;
1635 mutex_exit(&tep->te_closelock);
1636
1637 /*
1638 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1639 * It is safe because close will wait for tl_close_ser to finish.
1640 */
1641 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1642
1643 /*
1644 * Wait for the first phase of close to complete before qprocsoff().
1645 */
1646 mutex_enter(&tep->te_closelock);
1647 while (tep->te_closewait)
1648 cv_wait(&tep->te_closecv, &tep->te_closelock);
1649 mutex_exit(&tep->te_closelock);
1650
1651 qprocsoff(rq);
1652
1653 if (tep->te_bufcid) {
1654 qunbufcall(rq, tep->te_bufcid);
1655 tep->te_bufcid = 0;
1656 }
1657 if (tep->te_timoutid) {
1658 (void) quntimeout(rq, tep->te_timoutid);
1659 tep->te_timoutid = 0;
1660 }
1661
1662 /*
1663 * Finish close behind serializer.
1664 *
1665 * For a CLTS endpoint increase a refcount and continue close processing
1666 * with serializer protection. This processing may happen asynchronously
1667 * with the completion of tl_close().
1668 *
1669 * Fot a COTS endpoint wait before destroying tep since the serializer
1670 * may go away together with tep and we need to destroy serializer
1671 * outside of serializer context.
1672 */
1673 ASSERT(tep->te_closewait == 0);
1674 if (IS_COTS(tep))
1675 tep->te_closewait = 1;
1676 else
1677 tl_refhold(tep);
1678
1679 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1680
1681 /*
1682 * For connection-oriented transports wait for all serializer activity
1683 * to settle down.
1684 */
1685 if (IS_COTS(tep)) {
1686 mutex_enter(&tep->te_closelock);
1687 while (tep->te_closewait)
1688 cv_wait(&tep->te_closecv, &tep->te_closelock);
1689 mutex_exit(&tep->te_closelock);
1690 }
1691
1692 crfree(tep->te_credp);
1693 tep->te_credp = NULL;
1694 tep->te_wq = NULL;
1695 tl_refrele(tep);
1696 /*
1697 * tep is likely to be destroyed now, so can't reference it any more.
1698 */
1699
1700 rq->q_ptr = wq->q_ptr = NULL;
1701 return (0);
1702 }
1703
1704 /*
1705 * First phase of close processing done behind the serializer.
1706 *
1707 * Do not drop the reference in the end - tl_close() wants this reference to
1708 * stay.
1709 */
1710 /* ARGSUSED0 */
1711 static void
tl_close_ser(mblk_t * mp,tl_endpt_t * tep)1712 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1713 {
1714 ASSERT(tep->te_closing);
1715 ASSERT(tep->te_closewait == 1);
1716 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1717
1718 tep->te_flag |= TL_CLOSE_SER;
1719
1720 /*
1721 * Drain out all messages on queue except for TL_TICOTS where the
1722 * abortive release semantics permit discarding of data on close
1723 */
1724 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1725 tl_wsrv_ser(NULL, tep);
1726 }
1727
1728 /* Remove address from hash table. */
1729 tl_addr_unbind(tep);
1730 /*
1731 * qprocsoff() gets confused when q->q_next is not NULL on the write
1732 * queue of the driver, so clear these before qprocsoff() is called.
1733 * Also clear q_next for the peer since this queue is going away.
1734 */
1735 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1736 tl_endpt_t *peer_tep = tep->te_conp;
1737
1738 tep->te_wq->q_next = NULL;
1739 if ((peer_tep != NULL) && !peer_tep->te_closing)
1740 peer_tep->te_wq->q_next = NULL;
1741 }
1742
1743 tep->te_rq = NULL;
1744
1745 /* wake up tl_close() */
1746 tl_closeok(tep);
1747 tl_serializer_exit(tep);
1748 }
1749
1750 /*
1751 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1752 * the reference for CLTS.
1753 *
1754 * Called from serializer. Should drop reference count for CLTS only.
1755 */
1756 /* ARGSUSED0 */
1757 static void
tl_close_finish_ser(mblk_t * mp,tl_endpt_t * tep)1758 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1759 {
1760 ASSERT(tep->te_closing);
1761 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1762 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1763
1764 tep->te_state = -1; /* Uninitialized */
1765 if (IS_COTS(tep)) {
1766 tl_co_unconnect(tep);
1767 } else {
1768 /* Connectionless specific cleanup */
1769 TL_REMOVE_PEER(tep->te_lastep);
1770 /*
1771 * Backenable anybody that is flow controlled waiting for
1772 * this endpoint.
1773 */
1774 tl_cl_backenable(tep);
1775 if (tep->te_flowq != NULL) {
1776 list_remove(&(tep->te_flowq->te_flowlist), tep);
1777 tep->te_flowq = NULL;
1778 }
1779 }
1780
1781 tl_serializer_exit(tep);
1782 if (IS_COTS(tep))
1783 tl_closeok(tep);
1784 else
1785 tl_refrele(tep);
1786 }
1787
1788 /*
1789 * STREAMS write-side put procedure.
1790 * Enter serializer for most of the processing.
1791 *
1792 * The T_CONN_REQ is processed outside of serializer.
1793 */
1794 static int
tl_wput(queue_t * wq,mblk_t * mp)1795 tl_wput(queue_t *wq, mblk_t *mp)
1796 {
1797 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1798 ssize_t msz = MBLKL(mp);
1799 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1800 tlproc_t *tl_proc = NULL;
1801
1802 switch (DB_TYPE(mp)) {
1803 case M_DATA:
1804 /* Only valid for connection-oriented transports */
1805 if (IS_CLTS(tep)) {
1806 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1807 SL_TRACE | SL_ERROR,
1808 "tl_wput:M_DATA invalid for ticlts driver"));
1809 tl_merror(wq, mp, EPROTO);
1810 return (0);
1811 }
1812 tl_proc = tl_wput_data_ser;
1813 break;
1814
1815 case M_IOCTL:
1816 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1817 case TL_IOC_CREDOPT:
1818 /* FALLTHROUGH */
1819 case TL_IOC_UCREDOPT:
1820 /*
1821 * Serialize endpoint state change.
1822 */
1823 tl_proc = tl_do_ioctl_ser;
1824 break;
1825
1826 default:
1827 miocnak(wq, mp, 0, EINVAL);
1828 return (0);
1829 }
1830 break;
1831
1832 case M_FLUSH:
1833 /*
1834 * do canonical M_FLUSH processing
1835 */
1836 if (*mp->b_rptr & FLUSHW) {
1837 flushq(wq, FLUSHALL);
1838 *mp->b_rptr &= ~FLUSHW;
1839 }
1840 if (*mp->b_rptr & FLUSHR) {
1841 flushq(RD(wq), FLUSHALL);
1842 qreply(wq, mp);
1843 } else {
1844 freemsg(mp);
1845 }
1846 return (0);
1847
1848 case M_PROTO:
1849 if (msz < sizeof (prim->type)) {
1850 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1851 SL_TRACE | SL_ERROR,
1852 "tl_wput:M_PROTO data too short"));
1853 tl_merror(wq, mp, EPROTO);
1854 return (0);
1855 }
1856 switch (prim->type) {
1857 case T_OPTMGMT_REQ:
1858 case T_SVR4_OPTMGMT_REQ:
1859 /*
1860 * Process TPI option management requests immediately
1861 * in put procedure regardless of in-order processing
1862 * of already queued messages.
1863 * (Note: This driver supports AF_UNIX socket
1864 * implementation. Unless we implement this processing,
1865 * setsockopt() on socket endpoint will block on flow
1866 * controlled endpoints which it should not. That is
1867 * required for successful execution of VSU socket tests
1868 * and is consistent with BSD socket behavior).
1869 */
1870 tl_optmgmt(wq, mp);
1871 return (0);
1872 case O_T_BIND_REQ:
1873 case T_BIND_REQ:
1874 tl_proc = tl_bind_ser;
1875 break;
1876 case T_CONN_REQ:
1877 if (IS_CLTS(tep)) {
1878 tl_merror(wq, mp, EPROTO);
1879 return (0);
1880 }
1881 tl_conn_req(wq, mp);
1882 return (0);
1883 case T_DATA_REQ:
1884 case T_OPTDATA_REQ:
1885 case T_EXDATA_REQ:
1886 case T_ORDREL_REQ:
1887 tl_proc = tl_putq_ser;
1888 break;
1889 case T_UNITDATA_REQ:
1890 if (IS_COTS(tep) ||
1891 (msz < sizeof (struct T_unitdata_req))) {
1892 tl_merror(wq, mp, EPROTO);
1893 return (0);
1894 }
1895 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1896 tl_proc = tl_unitdata_ser;
1897 } else {
1898 tl_proc = tl_putq_ser;
1899 }
1900 break;
1901 default:
1902 /*
1903 * process in service procedure if message already
1904 * queued (maintain in-order processing)
1905 */
1906 if (wq->q_first != NULL) {
1907 tl_proc = tl_putq_ser;
1908 } else {
1909 tl_proc = tl_wput_ser;
1910 }
1911 break;
1912 }
1913 break;
1914
1915 case M_PCPROTO:
1916 /*
1917 * Check that the message has enough data to figure out TPI
1918 * primitive.
1919 */
1920 if (msz < sizeof (prim->type)) {
1921 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1922 SL_TRACE | SL_ERROR,
1923 "tl_wput:M_PCROTO data too short"));
1924 tl_merror(wq, mp, EPROTO);
1925 return (0);
1926 }
1927 switch (prim->type) {
1928 case T_CAPABILITY_REQ:
1929 tl_capability_req(mp, tep);
1930 return (0);
1931 case T_INFO_REQ:
1932 tl_proc = tl_info_req_ser;
1933 break;
1934 case T_ADDR_REQ:
1935 tl_proc = tl_addr_req_ser;
1936 break;
1937
1938 default:
1939 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1940 SL_TRACE | SL_ERROR,
1941 "tl_wput:unknown TPI msg primitive"));
1942 tl_merror(wq, mp, EPROTO);
1943 return (0);
1944 }
1945 break;
1946 default:
1947 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
1948 "tl_wput:default:unexpected Streams message"));
1949 freemsg(mp);
1950 return (0);
1951 }
1952
1953 /*
1954 * Continue processing via serializer.
1955 */
1956 ASSERT(tl_proc != NULL);
1957 tl_refhold(tep);
1958 tl_serializer_enter(tep, tl_proc, mp);
1959 return (0);
1960 }
1961
1962 /*
1963 * Place message on the queue while preserving order.
1964 */
1965 static void
tl_putq_ser(mblk_t * mp,tl_endpt_t * tep)1966 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1967 {
1968 if (tep->te_closing) {
1969 tl_wput_ser(mp, tep);
1970 } else {
1971 TL_PUTQ(tep, mp);
1972 tl_serializer_exit(tep);
1973 tl_refrele(tep);
1974 }
1975
1976 }
1977
1978 static void
tl_wput_common_ser(mblk_t * mp,tl_endpt_t * tep)1979 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1980 {
1981 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1982
1983 switch (DB_TYPE(mp)) {
1984 case M_DATA:
1985 tl_data(mp, tep);
1986 break;
1987 case M_PROTO:
1988 tl_do_proto(mp, tep);
1989 break;
1990 default:
1991 freemsg(mp);
1992 break;
1993 }
1994 }
1995
1996 /*
1997 * Write side put procedure called from serializer.
1998 */
1999 static void
tl_wput_ser(mblk_t * mp,tl_endpt_t * tep)2000 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
2001 {
2002 tl_wput_common_ser(mp, tep);
2003 tl_serializer_exit(tep);
2004 tl_refrele(tep);
2005 }
2006
2007 /*
2008 * M_DATA processing. Called from serializer.
2009 */
2010 static void
tl_wput_data_ser(mblk_t * mp,tl_endpt_t * tep)2011 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
2012 {
2013 tl_endpt_t *peer_tep = tep->te_conp;
2014 queue_t *peer_rq;
2015
2016 ASSERT(DB_TYPE(mp) == M_DATA);
2017 ASSERT(IS_COTS(tep));
2018
2019 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
2020
2021 /*
2022 * fastpath for data. Ignore flow control if tep is closing.
2023 */
2024 if ((peer_tep != NULL) &&
2025 !peer_tep->te_closing &&
2026 ((tep->te_state == TS_DATA_XFER) ||
2027 (tep->te_state == TS_WREQ_ORDREL)) &&
2028 (tep->te_wq != NULL) &&
2029 (tep->te_wq->q_first == NULL) &&
2030 (peer_tep->te_state == TS_DATA_XFER ||
2031 peer_tep->te_state == TS_WIND_ORDREL ||
2032 peer_tep->te_state == TS_WREQ_ORDREL) &&
2033 ((peer_rq = peer_tep->te_rq) != NULL) &&
2034 (canputnext(peer_rq) || tep->te_closing)) {
2035 putnext(peer_rq, mp);
2036 } else if (tep->te_closing) {
2037 /*
2038 * It is possible that by the time we got here tep started to
2039 * close. If the write queue is not empty, and the state is
2040 * TS_DATA_XFER the data should be delivered in order, so we
2041 * call putq() instead of freeing the data.
2042 */
2043 if ((tep->te_wq != NULL) &&
2044 ((tep->te_state == TS_DATA_XFER) ||
2045 (tep->te_state == TS_WREQ_ORDREL))) {
2046 TL_PUTQ(tep, mp);
2047 } else {
2048 freemsg(mp);
2049 }
2050 } else {
2051 TL_PUTQ(tep, mp);
2052 }
2053
2054 tl_serializer_exit(tep);
2055 tl_refrele(tep);
2056 }
2057
2058 /*
2059 * Write side service routine.
2060 *
2061 * All actual processing happens within serializer which is entered
2062 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
2063 * messages that need processing may have arrived, so tl_wsrv repeats until
2064 * queue is empty or te_nowsrv is set.
2065 */
2066 static int
tl_wsrv(queue_t * wq)2067 tl_wsrv(queue_t *wq)
2068 {
2069 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2070
2071 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2072 mutex_enter(&tep->te_srv_lock);
2073 ASSERT(tep->te_wsrv_active == B_FALSE);
2074 tep->te_wsrv_active = B_TRUE;
2075 mutex_exit(&tep->te_srv_lock);
2076
2077 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2078
2079 /*
2080 * Wait for serializer job to complete.
2081 */
2082 mutex_enter(&tep->te_srv_lock);
2083 while (tep->te_wsrv_active) {
2084 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2085 }
2086 cv_signal(&tep->te_srv_cv);
2087 mutex_exit(&tep->te_srv_lock);
2088 }
2089 return (0);
2090 }
2091
2092 /*
2093 * Serialized write side processing of the STREAMS queue.
2094 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2095 * is NULL.
2096 */
2097 static void
tl_wsrv_ser(mblk_t * ser_mp,tl_endpt_t * tep)2098 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2099 {
2100 mblk_t *mp;
2101 queue_t *wq = tep->te_wq;
2102
2103 ASSERT(wq != NULL);
2104 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2105 tl_wput_common_ser(mp, tep);
2106 }
2107
2108 /*
2109 * Wakeup service routine unless called from close.
2110 * If ser_mp is specified, the caller is tl_wsrv().
2111 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2112 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2113 * be no matching tl_serializer_exit() in this case.
2114 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2115 * waiting on te_srv_cv.
2116 */
2117 if (ser_mp != NULL) {
2118 /*
2119 * We are called from tl_wsrv.
2120 */
2121 mutex_enter(&tep->te_srv_lock);
2122 ASSERT(tep->te_wsrv_active);
2123 tep->te_wsrv_active = B_FALSE;
2124 cv_signal(&tep->te_srv_cv);
2125 mutex_exit(&tep->te_srv_lock);
2126 tl_serializer_exit(tep);
2127 }
2128 }
2129
2130 /*
2131 * Called when the stream is backenabled. Enter serializer and qenable everyone
2132 * flow controlled by tep.
2133 *
2134 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2135 * is possible that two instances of tl_rsrv will be running reusing the same
2136 * rsrv mblk.
2137 */
2138 static int
tl_rsrv(queue_t * rq)2139 tl_rsrv(queue_t *rq)
2140 {
2141 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2142
2143 ASSERT(rq->q_first == NULL);
2144 ASSERT(tep->te_rsrv_active == 0);
2145
2146 tep->te_rsrv_active = B_TRUE;
2147 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2148 /*
2149 * Wait for serializer job to complete.
2150 */
2151 mutex_enter(&tep->te_srv_lock);
2152 while (tep->te_rsrv_active) {
2153 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2154 }
2155 cv_signal(&tep->te_srv_cv);
2156 mutex_exit(&tep->te_srv_lock);
2157 return (0);
2158 }
2159
2160 /* ARGSUSED */
2161 static void
tl_rsrv_ser(mblk_t * mp,tl_endpt_t * tep)2162 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2163 {
2164 tl_endpt_t *peer_tep;
2165
2166 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2167 tl_cl_backenable(tep);
2168 } else if (
2169 IS_COTS(tep) &&
2170 ((peer_tep = tep->te_conp) != NULL) &&
2171 !peer_tep->te_closing &&
2172 ((tep->te_state == TS_DATA_XFER) ||
2173 (tep->te_state == TS_WIND_ORDREL)||
2174 (tep->te_state == TS_WREQ_ORDREL))) {
2175 TL_QENABLE(peer_tep);
2176 }
2177
2178 /*
2179 * Wakeup read side service routine.
2180 */
2181 mutex_enter(&tep->te_srv_lock);
2182 ASSERT(tep->te_rsrv_active);
2183 tep->te_rsrv_active = B_FALSE;
2184 cv_signal(&tep->te_srv_cv);
2185 mutex_exit(&tep->te_srv_lock);
2186 tl_serializer_exit(tep);
2187 }
2188
2189 /*
2190 * process M_PROTO messages. Always called from serializer.
2191 */
2192 static void
tl_do_proto(mblk_t * mp,tl_endpt_t * tep)2193 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2194 {
2195 ssize_t msz = MBLKL(mp);
2196 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2197
2198 /* Message size was validated by tl_wput(). */
2199 ASSERT(msz >= sizeof (prim->type));
2200
2201 switch (prim->type) {
2202 case T_UNBIND_REQ:
2203 tl_unbind(mp, tep);
2204 break;
2205
2206 case T_ADDR_REQ:
2207 tl_addr_req(mp, tep);
2208 break;
2209
2210 case O_T_CONN_RES:
2211 case T_CONN_RES:
2212 if (IS_CLTS(tep)) {
2213 tl_merror(tep->te_wq, mp, EPROTO);
2214 break;
2215 }
2216 tl_conn_res(mp, tep);
2217 break;
2218
2219 case T_DISCON_REQ:
2220 if (IS_CLTS(tep)) {
2221 tl_merror(tep->te_wq, mp, EPROTO);
2222 break;
2223 }
2224 tl_discon_req(mp, tep);
2225 break;
2226
2227 case T_DATA_REQ:
2228 if (IS_CLTS(tep)) {
2229 tl_merror(tep->te_wq, mp, EPROTO);
2230 break;
2231 }
2232 tl_data(mp, tep);
2233 break;
2234
2235 case T_OPTDATA_REQ:
2236 if (IS_CLTS(tep)) {
2237 tl_merror(tep->te_wq, mp, EPROTO);
2238 break;
2239 }
2240 tl_data(mp, tep);
2241 break;
2242
2243 case T_EXDATA_REQ:
2244 if (IS_CLTS(tep)) {
2245 tl_merror(tep->te_wq, mp, EPROTO);
2246 break;
2247 }
2248 tl_exdata(mp, tep);
2249 break;
2250
2251 case T_ORDREL_REQ:
2252 if (!IS_COTSORD(tep)) {
2253 tl_merror(tep->te_wq, mp, EPROTO);
2254 break;
2255 }
2256 tl_ordrel(mp, tep);
2257 break;
2258
2259 case T_UNITDATA_REQ:
2260 if (IS_COTS(tep)) {
2261 tl_merror(tep->te_wq, mp, EPROTO);
2262 break;
2263 }
2264 tl_unitdata(mp, tep);
2265 break;
2266
2267 default:
2268 tl_merror(tep->te_wq, mp, EPROTO);
2269 break;
2270 }
2271 }
2272
2273 /*
2274 * Process ioctl from serializer.
2275 * This is a wrapper around tl_do_ioctl().
2276 */
2277 static void
tl_do_ioctl_ser(mblk_t * mp,tl_endpt_t * tep)2278 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2279 {
2280 if (!tep->te_closing)
2281 tl_do_ioctl(mp, tep);
2282 else
2283 freemsg(mp);
2284
2285 tl_serializer_exit(tep);
2286 tl_refrele(tep);
2287 }
2288
2289 static void
tl_do_ioctl(mblk_t * mp,tl_endpt_t * tep)2290 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2291 {
2292 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2293 int cmd = iocbp->ioc_cmd;
2294 queue_t *wq = tep->te_wq;
2295 int error;
2296 int thisopt, otheropt;
2297
2298 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2299
2300 switch (cmd) {
2301 case TL_IOC_CREDOPT:
2302 if (cmd == TL_IOC_CREDOPT) {
2303 thisopt = TL_SETCRED;
2304 otheropt = TL_SETUCRED;
2305 } else {
2306 /* FALLTHROUGH */
2307 case TL_IOC_UCREDOPT:
2308 thisopt = TL_SETUCRED;
2309 otheropt = TL_SETCRED;
2310 }
2311 /*
2312 * The credentials passing does not apply to sockets.
2313 * Only one of the cred options can be set at a given time.
2314 */
2315 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2316 miocnak(wq, mp, 0, EINVAL);
2317 return;
2318 }
2319
2320 /*
2321 * Turn on generation of credential options for
2322 * T_conn_req, T_conn_con, T_unidata_ind.
2323 */
2324 error = miocpullup(mp, sizeof (uint32_t));
2325 if (error != 0) {
2326 miocnak(wq, mp, 0, error);
2327 return;
2328 }
2329 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2330 miocnak(wq, mp, 0, EINVAL);
2331 return;
2332 }
2333
2334 if (*(uint32_t *)mp->b_cont->b_rptr)
2335 tep->te_flag |= thisopt;
2336 else
2337 tep->te_flag &= ~thisopt;
2338
2339 miocack(wq, mp, 0, 0);
2340 break;
2341
2342 default:
2343 /* Should not be here */
2344 miocnak(wq, mp, 0, EINVAL);
2345 break;
2346 }
2347 }
2348
2349
2350 /*
2351 * send T_ERROR_ACK
2352 * Note: assumes enough memory or caller passed big enough mp
2353 * - no recovery from allocb failures
2354 */
2355
2356 static void
tl_error_ack(queue_t * wq,mblk_t * mp,t_scalar_t tli_err,t_scalar_t unix_err,t_scalar_t type)2357 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2358 t_scalar_t unix_err, t_scalar_t type)
2359 {
2360 struct T_error_ack *err_ack;
2361 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2362 M_PCPROTO, T_ERROR_ACK);
2363
2364 if (ackmp == NULL) {
2365 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR,
2366 "tl_error_ack:out of mblk memory"));
2367 tl_merror(wq, NULL, ENOSR);
2368 return;
2369 }
2370 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2371 err_ack->ERROR_prim = type;
2372 err_ack->TLI_error = tli_err;
2373 err_ack->UNIX_error = unix_err;
2374
2375 /*
2376 * send error ack message
2377 */
2378 qreply(wq, ackmp);
2379 }
2380
2381
2382
2383 /*
2384 * send T_OK_ACK
2385 * Note: assumes enough memory or caller passed big enough mp
2386 * - no recovery from allocb failures
2387 */
2388 static void
tl_ok_ack(queue_t * wq,mblk_t * mp,t_scalar_t type)2389 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2390 {
2391 struct T_ok_ack *ok_ack;
2392 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2393 M_PCPROTO, T_OK_ACK);
2394
2395 if (ackmp == NULL) {
2396 tl_merror(wq, NULL, ENOMEM);
2397 return;
2398 }
2399
2400 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2401 ok_ack->CORRECT_prim = type;
2402
2403 (void) qreply(wq, ackmp);
2404 }
2405
2406 /*
2407 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2408 * This is a wrapper around tl_bind().
2409 */
2410 static void
tl_bind_ser(mblk_t * mp,tl_endpt_t * tep)2411 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2412 {
2413 if (!tep->te_closing)
2414 tl_bind(mp, tep);
2415 else
2416 freemsg(mp);
2417
2418 tl_serializer_exit(tep);
2419 tl_refrele(tep);
2420 }
2421
2422 /*
2423 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2424 * Assumes that the endpoint is in the unbound.
2425 */
2426 static void
tl_bind(mblk_t * mp,tl_endpt_t * tep)2427 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2428 {
2429 queue_t *wq = tep->te_wq;
2430 struct T_bind_ack *b_ack;
2431 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2432 mblk_t *ackmp, *bamp;
2433 soux_addr_t ux_addr;
2434 t_uscalar_t qlen = 0;
2435 t_scalar_t alen, aoff;
2436 tl_addr_t addr_req;
2437 void *addr_startp;
2438 ssize_t msz = MBLKL(mp), basize;
2439 t_scalar_t tli_err = 0, unix_err = 0;
2440 t_scalar_t save_prim_type = bind->PRIM_type;
2441 t_scalar_t save_state = tep->te_state;
2442
2443 if (tep->te_state != TS_UNBND) {
2444 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2445 SL_TRACE | SL_ERROR,
2446 "tl_wput:bind_request:out of state, state=%d",
2447 tep->te_state));
2448 tli_err = TOUTSTATE;
2449 goto error;
2450 }
2451
2452 if (msz < sizeof (struct T_bind_req)) {
2453 tli_err = TSYSERR;
2454 unix_err = EINVAL;
2455 goto error;
2456 }
2457
2458 tep->te_state = nextstate[TE_BIND_REQ][tep->te_state];
2459
2460 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2461 (bind->PRIM_type == T_BIND_REQ));
2462
2463 alen = bind->ADDR_length;
2464 aoff = bind->ADDR_offset;
2465
2466 /* negotiate max conn req pending */
2467 if (IS_COTS(tep)) {
2468 qlen = bind->CONIND_number;
2469 if (qlen > tl_maxqlen)
2470 qlen = tl_maxqlen;
2471 }
2472
2473 /*
2474 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2475 * and bound again.
2476 */
2477 if ((tep->te_hash_hndl == NULL) &&
2478 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2479 mod_hash_reserve_nosleep(tep->te_addrhash,
2480 &tep->te_hash_hndl) != 0) {
2481 tli_err = TSYSERR;
2482 unix_err = ENOSR;
2483 goto error;
2484 }
2485
2486 /*
2487 * Verify address correctness.
2488 */
2489 if (IS_SOCKET(tep)) {
2490 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2491
2492 if ((alen != TL_SOUX_ADDRLEN) ||
2493 (aoff < 0) ||
2494 (aoff + alen > msz)) {
2495 (void) (STRLOG(TL_ID, tep->te_minor,
2496 1, SL_TRACE | SL_ERROR,
2497 "tl_bind: invalid socket addr"));
2498 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2499 tli_err = TSYSERR;
2500 unix_err = EINVAL;
2501 goto error;
2502 }
2503 /* Copy address from message to local buffer. */
2504 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2505 /*
2506 * Check that we got correct address from sockets
2507 */
2508 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2509 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2510 (void) (STRLOG(TL_ID, tep->te_minor,
2511 1, SL_TRACE | SL_ERROR,
2512 "tl_bind: invalid socket magic"));
2513 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2514 tli_err = TSYSERR;
2515 unix_err = EINVAL;
2516 goto error;
2517 }
2518 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2519 (ux_addr.soua_vp != NULL)) {
2520 (void) (STRLOG(TL_ID, tep->te_minor,
2521 1, SL_TRACE | SL_ERROR,
2522 "tl_bind: implicit addr non-empty"));
2523 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2524 tli_err = TSYSERR;
2525 unix_err = EINVAL;
2526 goto error;
2527 }
2528 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2529 (ux_addr.soua_vp == NULL)) {
2530 (void) (STRLOG(TL_ID, tep->te_minor,
2531 1, SL_TRACE | SL_ERROR,
2532 "tl_bind: explicit addr empty"));
2533 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2534 tli_err = TSYSERR;
2535 unix_err = EINVAL;
2536 goto error;
2537 }
2538 } else {
2539 if ((alen > 0) && ((aoff < 0) ||
2540 ((ssize_t)(aoff + alen) > msz) ||
2541 ((aoff + alen) < 0))) {
2542 (void) (STRLOG(TL_ID, tep->te_minor,
2543 1, SL_TRACE | SL_ERROR,
2544 "tl_bind: invalid message"));
2545 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2546 tli_err = TSYSERR;
2547 unix_err = EINVAL;
2548 goto error;
2549 }
2550 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2551 (void) (STRLOG(TL_ID, tep->te_minor,
2552 1, SL_TRACE | SL_ERROR,
2553 "tl_bind: bad addr in message"));
2554 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2555 tli_err = TBADADDR;
2556 goto error;
2557 }
2558 #ifdef DEBUG
2559 /*
2560 * Mild form of ASSERT()ion to detect broken TPI apps.
2561 * if (!assertion)
2562 * log warning;
2563 */
2564 if (!((alen == 0 && aoff == 0) ||
2565 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2566 (void) (STRLOG(TL_ID, tep->te_minor,
2567 3, SL_TRACE | SL_ERROR,
2568 "tl_bind: addr overlaps TPI message"));
2569 }
2570 #endif
2571 }
2572
2573 /*
2574 * Bind the address provided or allocate one if requested.
2575 * Allow rebinds with a new qlen value.
2576 */
2577 if (IS_SOCKET(tep)) {
2578 /*
2579 * For anonymous requests the te_ap is already set up properly
2580 * so use minor number as an address.
2581 * For explicit requests need to check whether the address is
2582 * already in use.
2583 */
2584 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2585 int rc;
2586
2587 if (tep->te_flag & TL_ADDRHASHED) {
2588 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2589 if (tep->te_vp == ux_addr.soua_vp)
2590 goto skip_addr_bind;
2591 else /* Rebind to a new address. */
2592 tl_addr_unbind(tep);
2593 }
2594 /*
2595 * Insert address in the hash if it is not already
2596 * there. Since we use preallocated handle, the insert
2597 * can fail only if the key is already present.
2598 */
2599 rc = mod_hash_insert_reserve(tep->te_addrhash,
2600 (mod_hash_key_t)ux_addr.soua_vp,
2601 (mod_hash_val_t)tep, tep->te_hash_hndl);
2602
2603 if (rc != 0) {
2604 ASSERT(rc == MH_ERR_DUPLICATE);
2605 /*
2606 * Violate O_T_BIND_REQ semantics and fail with
2607 * TADDRBUSY - sockets will not use any address
2608 * other than supplied one for explicit binds.
2609 */
2610 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2611 SL_TRACE | SL_ERROR,
2612 "tl_bind:requested addr %p is busy",
2613 ux_addr.soua_vp));
2614 tli_err = TADDRBUSY;
2615 unix_err = 0;
2616 goto error;
2617 }
2618 tep->te_uxaddr = ux_addr;
2619 tep->te_flag |= TL_ADDRHASHED;
2620 tep->te_hash_hndl = NULL;
2621 }
2622 } else if (alen == 0) {
2623 /*
2624 * assign any free address
2625 */
2626 if (!tl_get_any_addr(tep, NULL)) {
2627 (void) (STRLOG(TL_ID, tep->te_minor,
2628 1, SL_TRACE | SL_ERROR,
2629 "tl_bind:failed to get buffer for any "
2630 "address"));
2631 tli_err = TSYSERR;
2632 unix_err = ENOSR;
2633 goto error;
2634 }
2635 } else {
2636 addr_req.ta_alen = alen;
2637 addr_req.ta_abuf = (mp->b_rptr + aoff);
2638 addr_req.ta_zoneid = tep->te_zoneid;
2639
2640 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2641 if (tep->te_abuf == NULL) {
2642 tli_err = TSYSERR;
2643 unix_err = ENOSR;
2644 goto error;
2645 }
2646 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2647 tep->te_alen = alen;
2648
2649 if (mod_hash_insert_reserve(tep->te_addrhash,
2650 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2651 tep->te_hash_hndl) != 0) {
2652 if (save_prim_type == T_BIND_REQ) {
2653 /*
2654 * The bind semantics for this primitive
2655 * require a failure if the exact address
2656 * requested is busy
2657 */
2658 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2659 SL_TRACE | SL_ERROR,
2660 "tl_bind:requested addr is busy"));
2661 tli_err = TADDRBUSY;
2662 unix_err = 0;
2663 goto error;
2664 }
2665
2666 /*
2667 * O_T_BIND_REQ semantics say if address if requested
2668 * address is busy, bind to any available free address
2669 */
2670 if (!tl_get_any_addr(tep, &addr_req)) {
2671 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2672 SL_TRACE | SL_ERROR,
2673 "tl_bind:unable to get any addr buf"));
2674 tli_err = TSYSERR;
2675 unix_err = ENOMEM;
2676 goto error;
2677 }
2678 } else {
2679 tep->te_flag |= TL_ADDRHASHED;
2680 tep->te_hash_hndl = NULL;
2681 }
2682 }
2683
2684 ASSERT(tep->te_alen >= 0);
2685
2686 skip_addr_bind:
2687 /*
2688 * prepare T_BIND_ACK TPI message
2689 */
2690 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2691 bamp = reallocb(mp, basize, 0);
2692 if (bamp == NULL) {
2693 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2694 "tl_wput:tl_bind: allocb failed"));
2695 /*
2696 * roll back state changes
2697 */
2698 tl_addr_unbind(tep);
2699 tep->te_state = TS_UNBND;
2700 tl_memrecover(wq, mp, basize);
2701 return;
2702 }
2703
2704 DB_TYPE(bamp) = M_PCPROTO;
2705 bamp->b_wptr = bamp->b_rptr + basize;
2706 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2707 b_ack->PRIM_type = T_BIND_ACK;
2708 b_ack->CONIND_number = qlen;
2709 b_ack->ADDR_length = tep->te_alen;
2710 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2711 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2712 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2713
2714 if (IS_COTS(tep)) {
2715 tep->te_qlen = qlen;
2716 if (qlen > 0)
2717 tep->te_flag |= TL_LISTENER;
2718 }
2719
2720 tep->te_state = nextstate[TE_BIND_ACK][tep->te_state];
2721 /*
2722 * send T_BIND_ACK message
2723 */
2724 (void) qreply(wq, bamp);
2725 return;
2726
2727 error:
2728 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2729 if (ackmp == NULL) {
2730 /*
2731 * roll back state changes
2732 */
2733 tep->te_state = save_state;
2734 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2735 return;
2736 }
2737 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2738 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2739 }
2740
2741 /*
2742 * Process T_UNBIND_REQ.
2743 * Called from serializer.
2744 */
2745 static void
tl_unbind(mblk_t * mp,tl_endpt_t * tep)2746 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2747 {
2748 queue_t *wq;
2749 mblk_t *ackmp;
2750
2751 if (tep->te_closing) {
2752 freemsg(mp);
2753 return;
2754 }
2755
2756 wq = tep->te_wq;
2757
2758 /*
2759 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2760 * ==> allocate for T_ERROR_ACK (known max)
2761 */
2762 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2763 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2764 return;
2765 }
2766 /*
2767 * memory resources committed
2768 * Note: no message validation. T_UNBIND_REQ message is
2769 * same size as PRIM_type field so already verified earlier.
2770 */
2771
2772 /*
2773 * validate state
2774 */
2775 if (tep->te_state != TS_IDLE) {
2776 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2777 SL_TRACE | SL_ERROR,
2778 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2779 tep->te_state));
2780 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2781 return;
2782 }
2783 tep->te_state = nextstate[TE_UNBIND_REQ][tep->te_state];
2784
2785 /*
2786 * TPI says on T_UNBIND_REQ:
2787 * send up a M_FLUSH to flush both
2788 * read and write queues
2789 */
2790 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2791
2792 if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2793 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2794
2795 /*
2796 * Sockets use bind with qlen==0 followed by bind() to
2797 * the same address with qlen > 0 for listeners.
2798 * We allow rebind with a new qlen value.
2799 */
2800 tl_addr_unbind(tep);
2801 }
2802
2803 tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
2804 /*
2805 * send T_OK_ACK
2806 */
2807 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2808 }
2809
2810
2811 /*
2812 * Option management code from drv/ip is used here
2813 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2814 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2815 * However, that is what we want as that option is 'unorthodox'
2816 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2817 * and not in T_SVR4_OPTMGMT_REQ/ACK
2818 * Note2: use of optcom_req means this routine is an exception to
2819 * recovery from allocb() failures.
2820 */
2821
2822 static void
tl_optmgmt(queue_t * wq,mblk_t * mp)2823 tl_optmgmt(queue_t *wq, mblk_t *mp)
2824 {
2825 tl_endpt_t *tep;
2826 mblk_t *ackmp;
2827 union T_primitives *prim;
2828 cred_t *cr;
2829
2830 tep = (tl_endpt_t *)wq->q_ptr;
2831 prim = (union T_primitives *)mp->b_rptr;
2832
2833 /*
2834 * All Solaris components should pass a db_credp
2835 * for this TPI message, hence we ASSERT.
2836 * But in case there is some other M_PROTO that looks
2837 * like a TPI message sent by some other kernel
2838 * component, we check and return an error.
2839 */
2840 cr = msg_getcred(mp, NULL);
2841 ASSERT(cr != NULL);
2842 if (cr == NULL) {
2843 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2844 return;
2845 }
2846
2847 /* all states OK for AF_UNIX options ? */
2848 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2849 prim->type == T_SVR4_OPTMGMT_REQ) {
2850 /*
2851 * Broken TLI semantics that options can only be managed
2852 * in TS_IDLE state. Needed for Sparc ABI test suite that
2853 * tests this TLI (mis)feature using this device driver.
2854 */
2855 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2856 SL_TRACE | SL_ERROR,
2857 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2858 tep->te_state));
2859 /*
2860 * preallocate memory for T_ERROR_ACK
2861 */
2862 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2863 if (ackmp == NULL) {
2864 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2865 return;
2866 }
2867
2868 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2869 freemsg(mp);
2870 return;
2871 }
2872
2873 /*
2874 * call common option management routine from drv/ip
2875 */
2876 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2877 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2878 } else {
2879 ASSERT(prim->type == T_OPTMGMT_REQ);
2880 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2881 }
2882 }
2883
2884 /*
2885 * Handle T_conn_req - the driver part of accept().
2886 * If TL_SET[U]CRED generate the credentials options.
2887 * If this is a socket pass through options unmodified.
2888 * For sockets generate the T_CONN_CON here instead of
2889 * waiting for the T_CONN_RES.
2890 */
2891 static void
tl_conn_req(queue_t * wq,mblk_t * mp)2892 tl_conn_req(queue_t *wq, mblk_t *mp)
2893 {
2894 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2895 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2896 ssize_t msz = MBLKL(mp);
2897 t_scalar_t alen, aoff, olen, ooff, err = 0;
2898 tl_endpt_t *peer_tep = NULL;
2899 mblk_t *ackmp;
2900 mblk_t *dimp;
2901 struct T_discon_ind *di;
2902 soux_addr_t ux_addr;
2903 tl_addr_t dst;
2904
2905 ASSERT(IS_COTS(tep));
2906
2907 if (tep->te_closing) {
2908 freemsg(mp);
2909 return;
2910 }
2911
2912 /*
2913 * preallocate memory for:
2914 * 1. max of T_ERROR_ACK and T_OK_ACK
2915 * ==> known max T_ERROR_ACK
2916 * 2. max of T_DISCON_IND and T_CONN_IND
2917 */
2918 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2919 if (ackmp == NULL) {
2920 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2921 return;
2922 }
2923 /*
2924 * memory committed for T_OK_ACK/T_ERROR_ACK now
2925 * will be committed for T_DISCON_IND/T_CONN_IND later
2926 */
2927
2928 if (tep->te_state != TS_IDLE) {
2929 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2930 SL_TRACE | SL_ERROR,
2931 "tl_wput:T_CONN_REQ:out of state, state=%d",
2932 tep->te_state));
2933 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2934 freemsg(mp);
2935 return;
2936 }
2937
2938 /*
2939 * validate the message
2940 * Note: dereference fields in struct inside message only
2941 * after validating the message length.
2942 */
2943 if (msz < sizeof (struct T_conn_req)) {
2944 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2945 "tl_conn_req:invalid message length"));
2946 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2947 freemsg(mp);
2948 return;
2949 }
2950 alen = creq->DEST_length;
2951 aoff = creq->DEST_offset;
2952 olen = creq->OPT_length;
2953 ooff = creq->OPT_offset;
2954 if (olen == 0)
2955 ooff = 0;
2956
2957 if (IS_SOCKET(tep)) {
2958 if ((alen != TL_SOUX_ADDRLEN) ||
2959 (aoff < 0) ||
2960 (aoff + alen > msz) ||
2961 (alen > msz - sizeof (struct T_conn_req))) {
2962 (void) (STRLOG(TL_ID, tep->te_minor,
2963 1, SL_TRACE | SL_ERROR,
2964 "tl_conn_req: invalid socket addr"));
2965 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2966 freemsg(mp);
2967 return;
2968 }
2969 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2970 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2971 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2972 (void) (STRLOG(TL_ID, tep->te_minor,
2973 1, SL_TRACE | SL_ERROR,
2974 "tl_conn_req: invalid socket magic"));
2975 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2976 freemsg(mp);
2977 return;
2978 }
2979 } else {
2980 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2981 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2982 ooff + olen < 0)) ||
2983 olen < 0 || ooff < 0) {
2984 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2985 SL_TRACE | SL_ERROR,
2986 "tl_conn_req:invalid message"));
2987 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2988 freemsg(mp);
2989 return;
2990 }
2991
2992 if (alen <= 0 || aoff < 0 ||
2993 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2994 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2995 SL_TRACE | SL_ERROR,
2996 "tl_conn_req:bad addr in message, "
2997 "alen=%d, msz=%ld",
2998 alen, msz));
2999 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
3000 freemsg(mp);
3001 return;
3002 }
3003 #ifdef DEBUG
3004 /*
3005 * Mild form of ASSERT()ion to detect broken TPI apps.
3006 * if (!assertion)
3007 * log warning;
3008 */
3009 if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
3010 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3011 SL_TRACE | SL_ERROR,
3012 "tl_conn_req: addr overlaps TPI message"));
3013 }
3014 #endif
3015 if (olen) {
3016 /*
3017 * no opts in connect req
3018 * supported in this provider except for sockets.
3019 */
3020 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3021 SL_TRACE | SL_ERROR,
3022 "tl_conn_req:options not supported "
3023 "in message"));
3024 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
3025 freemsg(mp);
3026 return;
3027 }
3028 }
3029
3030 /*
3031 * Prevent tep from closing on us.
3032 */
3033 if (!tl_noclose(tep)) {
3034 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3035 "tl_conn_req:endpoint is closing"));
3036 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
3037 freemsg(mp);
3038 return;
3039 }
3040
3041 tep->te_state = nextstate[TE_CONN_REQ][tep->te_state];
3042 /*
3043 * get endpoint to connect to
3044 * check that peer with DEST addr is bound to addr
3045 * and has CONIND_number > 0
3046 */
3047 dst.ta_alen = alen;
3048 dst.ta_abuf = mp->b_rptr + aoff;
3049 dst.ta_zoneid = tep->te_zoneid;
3050
3051 /*
3052 * Verify if remote addr is in use
3053 */
3054 peer_tep = (IS_SOCKET(tep) ?
3055 tl_sock_find_peer(tep, &ux_addr) :
3056 tl_find_peer(tep, &dst));
3057
3058 if (peer_tep == NULL) {
3059 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3060 "tl_conn_req:no one at connect address"));
3061 err = ECONNREFUSED;
3062 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
3063 /*
3064 * validate that number of incoming connection is
3065 * not to capacity on destination endpoint
3066 */
3067 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3068 "tl_conn_req: qlen overflow connection refused"));
3069 err = ECONNREFUSED;
3070 }
3071
3072 /*
3073 * Send T_DISCON_IND in case of error
3074 */
3075 if (err != 0) {
3076 if (peer_tep != NULL)
3077 tl_refrele(peer_tep);
3078 /* We are still expected to send T_OK_ACK */
3079 tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
3080 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
3081 tl_closeok(tep);
3082 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
3083 M_PROTO, T_DISCON_IND);
3084 if (dimp == NULL) {
3085 tl_merror(wq, NULL, ENOSR);
3086 return;
3087 }
3088 di = (struct T_discon_ind *)dimp->b_rptr;
3089 di->DISCON_reason = err;
3090 di->SEQ_number = BADSEQNUM;
3091
3092 tep->te_state = TS_IDLE;
3093 /*
3094 * send T_DISCON_IND message
3095 */
3096 putnext(tep->te_rq, dimp);
3097 return;
3098 }
3099
3100 ASSERT(IS_COTS(peer_tep));
3101
3102 /*
3103 * Found the listener. At this point processing will continue on
3104 * listener serializer. Close of the endpoint should be blocked while we
3105 * switch serializers.
3106 */
3107 tl_serializer_refhold(peer_tep->te_ser);
3108 tl_serializer_refrele(tep->te_ser);
3109 tep->te_ser = peer_tep->te_ser;
3110 ASSERT(tep->te_oconp == NULL);
3111 tep->te_oconp = peer_tep;
3112
3113 /*
3114 * It is safe to close now. Close may continue on listener serializer.
3115 */
3116 tl_closeok(tep);
3117
3118 /*
3119 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3120 * data, so we link mp to ackmp.
3121 */
3122 ackmp->b_cont = mp;
3123 mp = ackmp;
3124
3125 tl_refhold(tep);
3126 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3127 }
3128
3129 /*
3130 * Finish T_CONN_REQ processing on listener serializer.
3131 */
3132 static void
tl_conn_req_ser(mblk_t * mp,tl_endpt_t * tep)3133 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3134 {
3135 queue_t *wq;
3136 tl_endpt_t *peer_tep = tep->te_oconp;
3137 mblk_t *confmp, *cimp, *indmp;
3138 void *opts = NULL;
3139 mblk_t *ackmp = mp;
3140 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3141 struct T_conn_ind *ci;
3142 tl_icon_t *tip;
3143 void *addr_startp;
3144 t_scalar_t olen = creq->OPT_length;
3145 t_scalar_t ooff = creq->OPT_offset;
3146 size_t ci_msz;
3147 size_t size;
3148 cred_t *cr = NULL;
3149 pid_t cpid;
3150
3151 if (tep->te_closing) {
3152 TL_UNCONNECT(tep->te_oconp);
3153 tl_serializer_exit(tep);
3154 tl_refrele(tep);
3155 freemsg(mp);
3156 return;
3157 }
3158
3159 wq = tep->te_wq;
3160 tep->te_flag |= TL_EAGER;
3161
3162 /*
3163 * Extract preallocated ackmp from mp.
3164 */
3165 mp = mp->b_cont;
3166 ackmp->b_cont = NULL;
3167
3168 if (olen == 0)
3169 ooff = 0;
3170
3171 if (peer_tep->te_closing ||
3172 !((peer_tep->te_state == TS_IDLE) ||
3173 (peer_tep->te_state == TS_WRES_CIND))) {
3174 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3175 "tl_conn_req:peer in bad state (%d)",
3176 peer_tep->te_state));
3177 TL_UNCONNECT(tep->te_oconp);
3178 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3179 freemsg(ackmp);
3180 tl_serializer_exit(tep);
3181 tl_refrele(tep);
3182 return;
3183 }
3184
3185 /*
3186 * preallocate now for T_DISCON_IND or T_CONN_IND
3187 */
3188 /*
3189 * calculate length of T_CONN_IND message
3190 */
3191 if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3192 cr = msg_getcred(mp, &cpid);
3193 ASSERT(cr != NULL);
3194 if (peer_tep->te_flag & TL_SETCRED) {
3195 ooff = 0;
3196 olen = (t_scalar_t) sizeof (struct opthdr) +
3197 OPTLEN(sizeof (tl_credopt_t));
3198 /* 1 option only */
3199 } else {
3200 ooff = 0;
3201 olen = (t_scalar_t)sizeof (struct opthdr) +
3202 OPTLEN(ucredminsize(cr));
3203 /* 1 option only */
3204 }
3205 }
3206 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3207 ci_msz = T_ALIGN(ci_msz) + olen;
3208 size = max(ci_msz, sizeof (struct T_discon_ind));
3209
3210 /*
3211 * Save options from mp - we'll need them for T_CONN_IND.
3212 */
3213 if (ooff != 0) {
3214 opts = kmem_alloc(olen, KM_NOSLEEP);
3215 if (opts == NULL) {
3216 /*
3217 * roll back state changes
3218 */
3219 tep->te_state = TS_IDLE;
3220 tl_memrecover(wq, mp, size);
3221 freemsg(ackmp);
3222 TL_UNCONNECT(tep->te_oconp);
3223 tl_serializer_exit(tep);
3224 tl_refrele(tep);
3225 return;
3226 }
3227 /* Copy options to a temp buffer */
3228 bcopy(mp->b_rptr + ooff, opts, olen);
3229 }
3230
3231 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3232 /*
3233 * Generate a T_CONN_CON that has the identical address
3234 * (and options) as the T_CONN_REQ.
3235 * NOTE: assumes that the T_conn_req and T_conn_con structures
3236 * are isomorphic.
3237 */
3238 confmp = copyb(mp);
3239 if (confmp == NULL) {
3240 /*
3241 * roll back state changes
3242 */
3243 tep->te_state = TS_IDLE;
3244 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3245 freemsg(ackmp);
3246 if (opts != NULL)
3247 kmem_free(opts, olen);
3248 TL_UNCONNECT(tep->te_oconp);
3249 tl_serializer_exit(tep);
3250 tl_refrele(tep);
3251 return;
3252 }
3253 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3254 T_CONN_CON;
3255 } else {
3256 confmp = NULL;
3257 }
3258 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3259 /*
3260 * roll back state changes
3261 */
3262 tep->te_state = TS_IDLE;
3263 tl_memrecover(wq, mp, size);
3264 freemsg(ackmp);
3265 if (opts != NULL)
3266 kmem_free(opts, olen);
3267 freemsg(confmp);
3268 TL_UNCONNECT(tep->te_oconp);
3269 tl_serializer_exit(tep);
3270 tl_refrele(tep);
3271 return;
3272 }
3273
3274 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3275 if (tip == NULL) {
3276 /*
3277 * roll back state changes
3278 */
3279 tep->te_state = TS_IDLE;
3280 tl_memrecover(wq, indmp, sizeof (*tip));
3281 freemsg(ackmp);
3282 if (opts != NULL)
3283 kmem_free(opts, olen);
3284 freemsg(confmp);
3285 TL_UNCONNECT(tep->te_oconp);
3286 tl_serializer_exit(tep);
3287 tl_refrele(tep);
3288 return;
3289 }
3290 tip->ti_mp = NULL;
3291
3292 /*
3293 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3294 * and tl_icon_t cell.
3295 */
3296
3297 /*
3298 * ack validity of request and send the peer credential in the ACK.
3299 */
3300 tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
3301
3302 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3303 confmp != NULL) {
3304 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3305 }
3306
3307 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3308
3309 /*
3310 * prepare message to send T_CONN_IND
3311 */
3312 /*
3313 * allocate the message - original data blocks retained
3314 * in the returned mblk
3315 */
3316 cimp = tl_resizemp(indmp, size);
3317 if (cimp == NULL) {
3318 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3319 "tl_conn_req:con_ind:allocb failure"));
3320 tl_merror(wq, indmp, ENOMEM);
3321 TL_UNCONNECT(tep->te_oconp);
3322 tl_serializer_exit(tep);
3323 tl_refrele(tep);
3324 if (opts != NULL)
3325 kmem_free(opts, olen);
3326 freemsg(confmp);
3327 ASSERT(tip->ti_mp == NULL);
3328 kmem_free(tip, sizeof (*tip));
3329 return;
3330 }
3331
3332 DB_TYPE(cimp) = M_PROTO;
3333 ci = (struct T_conn_ind *)cimp->b_rptr;
3334 ci->PRIM_type = T_CONN_IND;
3335 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3336 ci->SRC_length = tep->te_alen;
3337 ci->SEQ_number = tep->te_seqno;
3338
3339 addr_startp = cimp->b_rptr + ci->SRC_offset;
3340 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3341 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3342
3343 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3344 ci->SRC_length);
3345 ci->OPT_length = olen; /* because only 1 option */
3346 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3347 cr, cpid,
3348 peer_tep->te_flag, peer_tep->te_credp);
3349 } else if (ooff != 0) {
3350 /* Copy option from T_CONN_REQ */
3351 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3352 ci->SRC_length);
3353 ci->OPT_length = olen;
3354 ASSERT(opts != NULL);
3355 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3356 } else {
3357 ci->OPT_offset = 0;
3358 ci->OPT_length = 0;
3359 }
3360 if (opts != NULL)
3361 kmem_free(opts, olen);
3362
3363 /*
3364 * register connection request with server peer
3365 * append to list of incoming connections
3366 * increment references for both peer_tep and tep: peer_tep is placed on
3367 * te_oconp and tep is placed on listeners queue.
3368 */
3369 tip->ti_tep = tep;
3370 tip->ti_seqno = tep->te_seqno;
3371 list_insert_tail(&peer_tep->te_iconp, tip);
3372 peer_tep->te_nicon++;
3373
3374 peer_tep->te_state = nextstate[TE_CONN_IND][peer_tep->te_state];
3375 /*
3376 * send the T_CONN_IND message
3377 */
3378 putnext(peer_tep->te_rq, cimp);
3379
3380 /*
3381 * Send a T_CONN_CON message for sockets.
3382 * Disable the queues until we have reached the correct state!
3383 */
3384 if (confmp != NULL) {
3385 tep->te_state = nextstate[TE_CONN_CON][tep->te_state];
3386 noenable(wq);
3387 putnext(tep->te_rq, confmp);
3388 }
3389 /*
3390 * Now we need to increment tep reference because tep is referenced by
3391 * server list of pending connections. We also need to decrement
3392 * reference before exiting serializer. Two operations void each other
3393 * so we don't modify reference at all.
3394 */
3395 ASSERT(tep->te_refcnt >= 2);
3396 ASSERT(peer_tep->te_refcnt >= 2);
3397 tl_serializer_exit(tep);
3398 }
3399
3400
3401
3402 /*
3403 * Handle T_conn_res on listener stream. Called on listener serializer.
3404 * tl_conn_req has already generated the T_CONN_CON.
3405 * tl_conn_res is called on listener serializer.
3406 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3407 * Switch eager serializer to acceptor's.
3408 *
3409 * If TL_SET[U]CRED generate the credentials options.
3410 * For sockets tl_conn_req has already generated the T_CONN_CON.
3411 */
3412 static void
tl_conn_res(mblk_t * mp,tl_endpt_t * tep)3413 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3414 {
3415 queue_t *wq;
3416 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3417 ssize_t msz = MBLKL(mp);
3418 t_scalar_t olen, ooff, err = 0;
3419 t_scalar_t prim = cres->PRIM_type;
3420 uchar_t *addr_startp;
3421 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3422 tl_icon_t *tip;
3423 size_t size;
3424 mblk_t *ackmp, *respmp;
3425 mblk_t *dimp, *ccmp = NULL;
3426 struct T_discon_ind *di;
3427 struct T_conn_con *cc;
3428 boolean_t client_noclose_set = B_FALSE;
3429 boolean_t switch_client_serializer = B_TRUE;
3430
3431 ASSERT(IS_COTS(tep));
3432
3433 if (tep->te_closing) {
3434 freemsg(mp);
3435 return;
3436 }
3437
3438 wq = tep->te_wq;
3439
3440 /*
3441 * preallocate memory for:
3442 * 1. max of T_ERROR_ACK and T_OK_ACK
3443 * ==> known max T_ERROR_ACK
3444 * 2. max of T_DISCON_IND and T_CONN_CON
3445 */
3446 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3447 if (ackmp == NULL) {
3448 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3449 return;
3450 }
3451 /*
3452 * memory committed for T_OK_ACK/T_ERROR_ACK now
3453 * will be committed for T_DISCON_IND/T_CONN_CON later
3454 */
3455
3456
3457 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3458
3459 /*
3460 * validate state
3461 */
3462 if (tep->te_state != TS_WRES_CIND) {
3463 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3464 SL_TRACE | SL_ERROR,
3465 "tl_wput:T_CONN_RES:out of state, state=%d",
3466 tep->te_state));
3467 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3468 freemsg(mp);
3469 return;
3470 }
3471
3472 /*
3473 * validate the message
3474 * Note: dereference fields in struct inside message only
3475 * after validating the message length.
3476 */
3477 if (msz < sizeof (struct T_conn_res)) {
3478 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3479 "tl_conn_res:invalid message length"));
3480 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3481 freemsg(mp);
3482 return;
3483 }
3484 olen = cres->OPT_length;
3485 ooff = cres->OPT_offset;
3486 if (((olen > 0) && ((ooff + olen) > msz))) {
3487 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3488 "tl_conn_res:invalid message"));
3489 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3490 freemsg(mp);
3491 return;
3492 }
3493 if (olen) {
3494 /*
3495 * no opts in connect res
3496 * supported in this provider
3497 */
3498 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3499 "tl_conn_res:options not supported in message"));
3500 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3501 freemsg(mp);
3502 return;
3503 }
3504
3505 tep->te_state = nextstate[TE_CONN_RES][tep->te_state];
3506 ASSERT(tep->te_state == TS_WACK_CRES);
3507
3508 if (cres->SEQ_number < TL_MINOR_START &&
3509 cres->SEQ_number >= BADSEQNUM) {
3510 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3511 "tl_conn_res:remote endpoint sequence number bad"));
3512 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3513 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3514 freemsg(mp);
3515 return;
3516 }
3517
3518 /*
3519 * find accepting endpoint. Will have extra reference if found.
3520 */
3521 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3522 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3523 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3524 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3525 "tl_conn_res:bad accepting endpoint"));
3526 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3527 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3528 freemsg(mp);
3529 return;
3530 }
3531
3532 /*
3533 * Prevent acceptor from closing.
3534 */
3535 if (!tl_noclose(acc_ep)) {
3536 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3537 "tl_conn_res:bad accepting endpoint"));
3538 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3539 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3540 tl_refrele(acc_ep);
3541 freemsg(mp);
3542 return;
3543 }
3544
3545 acc_ep->te_flag |= TL_ACCEPTOR;
3546
3547 /*
3548 * validate that accepting endpoint, if different from listening
3549 * has address bound => state is TS_IDLE
3550 * TROUBLE in XPG4 !!?
3551 */
3552 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3553 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3554 "tl_conn_res:accepting endpoint has no address bound,"
3555 "state=%d", acc_ep->te_state));
3556 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3557 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3558 freemsg(mp);
3559 tl_closeok(acc_ep);
3560 tl_refrele(acc_ep);
3561 return;
3562 }
3563
3564 /*
3565 * validate if accepting endpt same as listening, then
3566 * no other incoming connection should be on the queue
3567 */
3568
3569 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3570 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3571 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3572 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3573 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3574 freemsg(mp);
3575 tl_closeok(acc_ep);
3576 tl_refrele(acc_ep);
3577 return;
3578 }
3579
3580 /*
3581 * Mark for deletion, the entry corresponding to client
3582 * on list of pending connections made by the listener
3583 * search list to see if client is one of the
3584 * recorded as a listener.
3585 */
3586 tip = tl_icon_find(tep, cres->SEQ_number);
3587 if (tip == NULL) {
3588 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3589 "tl_conn_res:no client in listener list"));
3590 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3591 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3592 freemsg(mp);
3593 tl_closeok(acc_ep);
3594 tl_refrele(acc_ep);
3595 return;
3596 }
3597
3598 /*
3599 * If ti_tep is NULL the client has already closed. In this case
3600 * the code below will avoid any action on the client side
3601 * but complete the server and acceptor state transitions.
3602 */
3603 ASSERT(tip->ti_tep == NULL ||
3604 tip->ti_tep->te_seqno == cres->SEQ_number);
3605 cl_ep = tip->ti_tep;
3606
3607 /*
3608 * If the client is present it is switched from listener's to acceptor's
3609 * serializer. We should block client closes while serializers are
3610 * being switched.
3611 *
3612 * It is possible that the client is present but is currently being
3613 * closed. There are two possible cases:
3614 *
3615 * 1) The client has already entered tl_close_finish_ser() and sent
3616 * T_ORDREL_IND. In this case we can just ignore the client (but we
3617 * still need to send all messages from tip->ti_mp to the acceptor).
3618 *
3619 * 2) The client started the close but has not entered
3620 * tl_close_finish_ser() yet. In this case, the client is already
3621 * proceeding asynchronously on the listener's serializer, so we're
3622 * forced to change the acceptor to use the listener's serializer to
3623 * ensure that any operations on the acceptor are serialized with
3624 * respect to the close that's in-progress.
3625 */
3626 if (cl_ep != NULL) {
3627 if (tl_noclose(cl_ep)) {
3628 client_noclose_set = B_TRUE;
3629 } else {
3630 /*
3631 * Client is closing. If it it has sent the
3632 * T_ORDREL_IND, we can simply ignore it - otherwise,
3633 * we have to let let the client continue until it is
3634 * sent.
3635 *
3636 * If we do continue using the client, acceptor will
3637 * switch to client's serializer which is used by client
3638 * for its close.
3639 */
3640 tl_client_closing_when_accepting++;
3641 switch_client_serializer = B_FALSE;
3642 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3643 cl_ep->te_state == -1)
3644 cl_ep = NULL;
3645 }
3646 }
3647
3648 if (cl_ep != NULL) {
3649 /*
3650 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3651 * (latter for sockets only)
3652 */
3653 if (cl_ep->te_state != TS_WCON_CREQ &&
3654 (cl_ep->te_state != TS_DATA_XFER &&
3655 IS_SOCKET(cl_ep))) {
3656 err = ECONNREFUSED;
3657 /*
3658 * T_DISCON_IND sent later after committing memory
3659 * and acking validity of request
3660 */
3661 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3662 "tl_conn_res:peer in bad state"));
3663 }
3664
3665 /*
3666 * preallocate now for T_DISCON_IND or T_CONN_CONN
3667 * ack validity of request (T_OK_ACK) after memory committed
3668 */
3669
3670 if (err) {
3671 size = sizeof (struct T_discon_ind);
3672 } else {
3673 /*
3674 * calculate length of T_CONN_CON message
3675 */
3676 olen = 0;
3677 if (cl_ep->te_flag & TL_SETCRED) {
3678 olen = (t_scalar_t)sizeof (struct opthdr) +
3679 OPTLEN(sizeof (tl_credopt_t));
3680 } else if (cl_ep->te_flag & TL_SETUCRED) {
3681 olen = (t_scalar_t)sizeof (struct opthdr) +
3682 OPTLEN(ucredminsize(acc_ep->te_credp));
3683 }
3684 size = T_ALIGN(sizeof (struct T_conn_con) +
3685 acc_ep->te_alen) + olen;
3686 }
3687 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3688 /*
3689 * roll back state changes
3690 */
3691 tep->te_state = TS_WRES_CIND;
3692 tl_memrecover(wq, mp, size);
3693 freemsg(ackmp);
3694 if (client_noclose_set)
3695 tl_closeok(cl_ep);
3696 tl_closeok(acc_ep);
3697 tl_refrele(acc_ep);
3698 return;
3699 }
3700 mp = NULL;
3701 }
3702
3703 /*
3704 * Now ack validity of request
3705 */
3706 if (tep->te_nicon == 1) {
3707 if (tep == acc_ep)
3708 tep->te_state = nextstate[TE_OK_ACK2][tep->te_state];
3709 else
3710 tep->te_state = nextstate[TE_OK_ACK3][tep->te_state];
3711 } else {
3712 tep->te_state = nextstate[TE_OK_ACK4][tep->te_state];
3713 }
3714
3715 /*
3716 * send T_DISCON_IND now if client state validation failed earlier
3717 */
3718 if (err) {
3719 tl_ok_ack(wq, ackmp, prim);
3720 /*
3721 * flush the queues - why always ?
3722 */
3723 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3724
3725 dimp = tl_resizemp(respmp, size);
3726 if (dimp == NULL) {
3727 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3728 SL_TRACE | SL_ERROR,
3729 "tl_conn_res:con_ind:allocb failure"));
3730 tl_merror(wq, respmp, ENOMEM);
3731 tl_closeok(acc_ep);
3732 if (client_noclose_set)
3733 tl_closeok(cl_ep);
3734 tl_refrele(acc_ep);
3735 return;
3736 }
3737 if (dimp->b_cont) {
3738 /* no user data in provider generated discon ind */
3739 freemsg(dimp->b_cont);
3740 dimp->b_cont = NULL;
3741 }
3742
3743 DB_TYPE(dimp) = M_PROTO;
3744 di = (struct T_discon_ind *)dimp->b_rptr;
3745 di->PRIM_type = T_DISCON_IND;
3746 di->DISCON_reason = err;
3747 di->SEQ_number = BADSEQNUM;
3748
3749 tep->te_state = TS_IDLE;
3750 /*
3751 * send T_DISCON_IND message
3752 */
3753 putnext(acc_ep->te_rq, dimp);
3754 if (client_noclose_set)
3755 tl_closeok(cl_ep);
3756 tl_closeok(acc_ep);
3757 tl_refrele(acc_ep);
3758 return;
3759 }
3760
3761 /*
3762 * now start connecting the accepting endpoint
3763 */
3764 if (tep != acc_ep)
3765 acc_ep->te_state = nextstate[TE_PASS_CONN][acc_ep->te_state];
3766
3767 if (cl_ep == NULL) {
3768 /*
3769 * The client has already closed. Send up any queued messages
3770 * and change the state accordingly.
3771 */
3772 tl_ok_ack(wq, ackmp, prim);
3773 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3774
3775 /*
3776 * remove endpoint from incoming connection
3777 * delete client from list of incoming connections
3778 */
3779 tl_freetip(tep, tip);
3780 freemsg(mp);
3781 tl_closeok(acc_ep);
3782 tl_refrele(acc_ep);
3783 return;
3784 } else if (tip->ti_mp != NULL) {
3785 /*
3786 * The client could have queued a T_DISCON_IND which needs
3787 * to be sent up.
3788 * Note that t_discon_req can not operate the same as
3789 * t_data_req since it is not possible for it to putbq
3790 * the message and return -1 due to the use of qwriter.
3791 */
3792 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3793 }
3794
3795 /*
3796 * prepare connect confirm T_CONN_CON message
3797 */
3798
3799 /*
3800 * allocate the message - original data blocks
3801 * retained in the returned mblk
3802 */
3803 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3804 ccmp = tl_resizemp(respmp, size);
3805 if (ccmp == NULL) {
3806 tl_ok_ack(wq, ackmp, prim);
3807 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3808 SL_TRACE | SL_ERROR,
3809 "tl_conn_res:conn_con:allocb failure"));
3810 tl_merror(wq, respmp, ENOMEM);
3811 tl_closeok(acc_ep);
3812 if (client_noclose_set)
3813 tl_closeok(cl_ep);
3814 tl_refrele(acc_ep);
3815 return;
3816 }
3817
3818 DB_TYPE(ccmp) = M_PROTO;
3819 cc = (struct T_conn_con *)ccmp->b_rptr;
3820 cc->PRIM_type = T_CONN_CON;
3821 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3822 cc->RES_length = acc_ep->te_alen;
3823 addr_startp = ccmp->b_rptr + cc->RES_offset;
3824 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3825 if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3826 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3827 cc->RES_length);
3828 cc->OPT_length = olen;
3829 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3830 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3831 cl_ep->te_credp);
3832 } else {
3833 cc->OPT_offset = 0;
3834 cc->OPT_length = 0;
3835 }
3836 /*
3837 * Forward the credential in the packet so it can be picked up
3838 * at the higher layers for more complete credential processing
3839 */
3840 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3841 } else {
3842 freemsg(respmp);
3843 respmp = NULL;
3844 }
3845
3846 /*
3847 * make connection linking
3848 * accepting and client endpoints
3849 * No need to increment references:
3850 * on client: it should already have one from tip->ti_tep linkage.
3851 * on acceptor is should already have one from the table lookup.
3852 *
3853 * At this point both client and acceptor can't close. Set client
3854 * serializer to acceptor's.
3855 */
3856 ASSERT(cl_ep->te_refcnt >= 2);
3857 ASSERT(acc_ep->te_refcnt >= 2);
3858 ASSERT(cl_ep->te_conp == NULL);
3859 ASSERT(acc_ep->te_conp == NULL);
3860 cl_ep->te_conp = acc_ep;
3861 acc_ep->te_conp = cl_ep;
3862 ASSERT(cl_ep->te_ser == tep->te_ser);
3863 if (switch_client_serializer) {
3864 mutex_enter(&cl_ep->te_ser_lock);
3865 if (cl_ep->te_ser_count > 0) {
3866 switch_client_serializer = B_FALSE;
3867 tl_serializer_noswitch++;
3868 } else {
3869 /*
3870 * Move client to the acceptor's serializer.
3871 */
3872 tl_serializer_refhold(acc_ep->te_ser);
3873 tl_serializer_refrele(cl_ep->te_ser);
3874 cl_ep->te_ser = acc_ep->te_ser;
3875 }
3876 mutex_exit(&cl_ep->te_ser_lock);
3877 }
3878 if (!switch_client_serializer) {
3879 /*
3880 * It is not possible to switch client to use acceptor's.
3881 * Move acceptor to client's serializer (which is the same as
3882 * listener's).
3883 */
3884 tl_serializer_refhold(cl_ep->te_ser);
3885 tl_serializer_refrele(acc_ep->te_ser);
3886 acc_ep->te_ser = cl_ep->te_ser;
3887 }
3888
3889 TL_REMOVE_PEER(cl_ep->te_oconp);
3890 TL_REMOVE_PEER(acc_ep->te_oconp);
3891
3892 /*
3893 * remove endpoint from incoming connection
3894 * delete client from list of incoming connections
3895 */
3896 tip->ti_tep = NULL;
3897 tl_freetip(tep, tip);
3898 tl_ok_ack(wq, ackmp, prim);
3899
3900 /*
3901 * data blocks already linked in reallocb()
3902 */
3903
3904 /*
3905 * link queues so that I_SENDFD will work
3906 */
3907 if (!IS_SOCKET(tep)) {
3908 acc_ep->te_wq->q_next = cl_ep->te_rq;
3909 cl_ep->te_wq->q_next = acc_ep->te_rq;
3910 }
3911
3912 /*
3913 * send T_CONN_CON up on client side unless it was already
3914 * done (for a socket). In cases any data or ordrel req has been
3915 * queued make sure that the service procedure runs.
3916 */
3917 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3918 enableok(cl_ep->te_wq);
3919 TL_QENABLE(cl_ep);
3920 if (ccmp != NULL)
3921 freemsg(ccmp);
3922 } else {
3923 /*
3924 * change client state on TE_CONN_CON event
3925 */
3926 cl_ep->te_state = nextstate[TE_CONN_CON][cl_ep->te_state];
3927 putnext(cl_ep->te_rq, ccmp);
3928 }
3929
3930 /* Mark the both endpoints as accepted */
3931 cl_ep->te_flag |= TL_ACCEPTED;
3932 acc_ep->te_flag |= TL_ACCEPTED;
3933
3934 /*
3935 * Allow client and acceptor to close.
3936 */
3937 tl_closeok(acc_ep);
3938 if (client_noclose_set)
3939 tl_closeok(cl_ep);
3940 }
3941
3942
3943
3944
3945 static void
tl_discon_req(mblk_t * mp,tl_endpt_t * tep)3946 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3947 {
3948 queue_t *wq;
3949 struct T_discon_req *dr;
3950 ssize_t msz;
3951 tl_endpt_t *peer_tep = tep->te_conp;
3952 tl_endpt_t *srv_tep = tep->te_oconp;
3953 tl_icon_t *tip;
3954 size_t size;
3955 mblk_t *ackmp, *dimp, *respmp;
3956 struct T_discon_ind *di;
3957 t_scalar_t save_state, new_state;
3958
3959 if (tep->te_closing) {
3960 freemsg(mp);
3961 return;
3962 }
3963
3964 if ((peer_tep != NULL) && peer_tep->te_closing) {
3965 TL_UNCONNECT(tep->te_conp);
3966 peer_tep = NULL;
3967 }
3968 if ((srv_tep != NULL) && srv_tep->te_closing) {
3969 TL_UNCONNECT(tep->te_oconp);
3970 srv_tep = NULL;
3971 }
3972
3973 wq = tep->te_wq;
3974
3975 /*
3976 * preallocate memory for:
3977 * 1. max of T_ERROR_ACK and T_OK_ACK
3978 * ==> known max T_ERROR_ACK
3979 * 2. for T_DISCON_IND
3980 */
3981 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3982 if (ackmp == NULL) {
3983 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3984 return;
3985 }
3986 /*
3987 * memory committed for T_OK_ACK/T_ERROR_ACK now
3988 * will be committed for T_DISCON_IND later
3989 */
3990
3991 dr = (struct T_discon_req *)mp->b_rptr;
3992 msz = MBLKL(mp);
3993
3994 /*
3995 * validate the state
3996 */
3997 save_state = new_state = tep->te_state;
3998 if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3999 !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
4000 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4001 SL_TRACE | SL_ERROR,
4002 "tl_wput:T_DISCON_REQ:out of state, state=%d",
4003 tep->te_state));
4004 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
4005 freemsg(mp);
4006 return;
4007 }
4008 /*
4009 * Defer committing the state change until it is determined if
4010 * the message will be queued with the tl_icon or not.
4011 */
4012 new_state = nextstate[TE_DISCON_REQ][tep->te_state];
4013
4014 /* validate the message */
4015 if (msz < sizeof (struct T_discon_req)) {
4016 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4017 "tl_discon_req:invalid message"));
4018 tep->te_state = nextstate[TE_ERROR_ACK][new_state];
4019 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
4020 freemsg(mp);
4021 return;
4022 }
4023
4024 /*
4025 * if server, then validate that client exists
4026 * by connection sequence number etc.
4027 */
4028 if (tep->te_nicon > 0) { /* server */
4029
4030 /*
4031 * search server list for disconnect client
4032 */
4033 tip = tl_icon_find(tep, dr->SEQ_number);
4034 if (tip == NULL) {
4035 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4036 SL_TRACE | SL_ERROR,
4037 "tl_discon_req:no disconnect endpoint"));
4038 tep->te_state = nextstate[TE_ERROR_ACK][new_state];
4039 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
4040 freemsg(mp);
4041 return;
4042 }
4043 /*
4044 * If ti_tep is NULL the client has already closed. In this case
4045 * the code below will avoid any action on the client side.
4046 */
4047
4048 IMPLY(tip->ti_tep != NULL,
4049 tip->ti_tep->te_seqno == dr->SEQ_number);
4050 peer_tep = tip->ti_tep;
4051 }
4052
4053 /*
4054 * preallocate now for T_DISCON_IND
4055 * ack validity of request (T_OK_ACK) after memory committed
4056 */
4057 size = sizeof (struct T_discon_ind);
4058 if ((respmp = reallocb(mp, size, 0)) == NULL) {
4059 tl_memrecover(wq, mp, size);
4060 freemsg(ackmp);
4061 return;
4062 }
4063
4064 /*
4065 * prepare message to ack validity of request
4066 */
4067 if (tep->te_nicon == 0) {
4068 new_state = nextstate[TE_OK_ACK1][new_state];
4069 } else {
4070 if (tep->te_nicon == 1)
4071 new_state = nextstate[TE_OK_ACK2][new_state];
4072 else
4073 new_state = nextstate[TE_OK_ACK4][new_state];
4074 }
4075
4076 /*
4077 * Flushing queues according to TPI. Using the old state.
4078 */
4079 if ((tep->te_nicon <= 1) &&
4080 ((save_state == TS_DATA_XFER) ||
4081 (save_state == TS_WIND_ORDREL) ||
4082 (save_state == TS_WREQ_ORDREL)))
4083 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
4084
4085 /* send T_OK_ACK up */
4086 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
4087
4088 /*
4089 * now do disconnect business
4090 */
4091 if (tep->te_nicon > 0) { /* listener */
4092 if (peer_tep != NULL && !peer_tep->te_closing) {
4093 /*
4094 * disconnect incoming connect request pending to tep
4095 */
4096 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4097 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4098 SL_TRACE | SL_ERROR,
4099 "tl_discon_req: reallocb failed"));
4100 tep->te_state = new_state;
4101 tl_merror(wq, respmp, ENOMEM);
4102 return;
4103 }
4104 di = (struct T_discon_ind *)dimp->b_rptr;
4105 di->SEQ_number = BADSEQNUM;
4106 save_state = peer_tep->te_state;
4107 peer_tep->te_state = TS_IDLE;
4108
4109 TL_REMOVE_PEER(peer_tep->te_oconp);
4110 enableok(peer_tep->te_wq);
4111 TL_QENABLE(peer_tep);
4112 } else {
4113 freemsg(respmp);
4114 dimp = NULL;
4115 }
4116
4117 /*
4118 * remove endpoint from incoming connection list
4119 * - remove disconnect client from list on server
4120 */
4121 tl_freetip(tep, tip);
4122 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4123 /*
4124 * disconnect an outgoing request pending from tep
4125 */
4126
4127 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4128 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4129 SL_TRACE | SL_ERROR,
4130 "tl_discon_req: reallocb failed"));
4131 tep->te_state = new_state;
4132 tl_merror(wq, respmp, ENOMEM);
4133 return;
4134 }
4135 di = (struct T_discon_ind *)dimp->b_rptr;
4136 DB_TYPE(dimp) = M_PROTO;
4137 di->PRIM_type = T_DISCON_IND;
4138 di->DISCON_reason = ECONNRESET;
4139 di->SEQ_number = tep->te_seqno;
4140
4141 /*
4142 * If this is a socket the T_DISCON_IND is queued with
4143 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4144 * from the list of pending connections.
4145 * Note that when te_oconp is set the peer better have
4146 * a t_connind_t for the client.
4147 */
4148 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4149 /*
4150 * No need to check that
4151 * ti_tep == NULL since the T_DISCON_IND
4152 * takes precedence over other queued
4153 * messages.
4154 */
4155 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4156 peer_tep = NULL;
4157 dimp = NULL;
4158 /*
4159 * Can't clear te_oconp since tl_co_unconnect needs
4160 * it as a hint not to free the tep.
4161 * Keep the state unchanged since tl_conn_res inspects
4162 * it.
4163 */
4164 new_state = tep->te_state;
4165 } else {
4166 /* Found - delete it */
4167 tip = tl_icon_find(peer_tep, tep->te_seqno);
4168 if (tip != NULL) {
4169 ASSERT(tep == tip->ti_tep);
4170 save_state = peer_tep->te_state;
4171 if (peer_tep->te_nicon == 1)
4172 peer_tep->te_state =
4173 nextstate[TE_DISCON_IND2]
4174 [peer_tep->te_state];
4175 else
4176 peer_tep->te_state =
4177 nextstate[TE_DISCON_IND3]
4178 [peer_tep->te_state];
4179 tl_freetip(peer_tep, tip);
4180 }
4181 ASSERT(tep->te_oconp != NULL);
4182 TL_UNCONNECT(tep->te_oconp);
4183 }
4184 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4185 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4186 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4187 SL_TRACE | SL_ERROR,
4188 "tl_discon_req: reallocb failed"));
4189 tep->te_state = new_state;
4190 tl_merror(wq, respmp, ENOMEM);
4191 return;
4192 }
4193 di = (struct T_discon_ind *)dimp->b_rptr;
4194 di->SEQ_number = BADSEQNUM;
4195
4196 save_state = peer_tep->te_state;
4197 peer_tep->te_state = TS_IDLE;
4198 } else {
4199 /* Not connected */
4200 tep->te_state = new_state;
4201 freemsg(respmp);
4202 return;
4203 }
4204
4205 /* Commit state changes */
4206 tep->te_state = new_state;
4207
4208 if (peer_tep == NULL) {
4209 ASSERT(dimp == NULL);
4210 goto done;
4211 }
4212 /*
4213 * Flush queues on peer before sending up
4214 * T_DISCON_IND according to TPI
4215 */
4216
4217 if ((save_state == TS_DATA_XFER) ||
4218 (save_state == TS_WIND_ORDREL) ||
4219 (save_state == TS_WREQ_ORDREL))
4220 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4221
4222 DB_TYPE(dimp) = M_PROTO;
4223 di->PRIM_type = T_DISCON_IND;
4224 di->DISCON_reason = ECONNRESET;
4225
4226 /*
4227 * data blocks already linked into dimp by reallocb()
4228 */
4229 /*
4230 * send indication message to peer user module
4231 */
4232 ASSERT(dimp != NULL);
4233 putnext(peer_tep->te_rq, dimp);
4234 done:
4235 if (tep->te_conp) { /* disconnect pointers if connected */
4236 ASSERT(!peer_tep->te_closing);
4237
4238 /*
4239 * Messages may be queued on peer's write queue
4240 * waiting to be processed by its write service
4241 * procedure. Before the pointer to the peer transport
4242 * structure is set to NULL, qenable the peer's write
4243 * queue so that the queued up messages are processed.
4244 */
4245 if ((save_state == TS_DATA_XFER) ||
4246 (save_state == TS_WIND_ORDREL) ||
4247 (save_state == TS_WREQ_ORDREL))
4248 TL_QENABLE(peer_tep);
4249 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4250 TL_UNCONNECT(peer_tep->te_conp);
4251 if (!IS_SOCKET(tep)) {
4252 /*
4253 * unlink the streams
4254 */
4255 tep->te_wq->q_next = NULL;
4256 peer_tep->te_wq->q_next = NULL;
4257 }
4258 TL_UNCONNECT(tep->te_conp);
4259 }
4260 }
4261
4262 static void
tl_addr_req_ser(mblk_t * mp,tl_endpt_t * tep)4263 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4264 {
4265 if (!tep->te_closing)
4266 tl_addr_req(mp, tep);
4267 else
4268 freemsg(mp);
4269
4270 tl_serializer_exit(tep);
4271 tl_refrele(tep);
4272 }
4273
4274 static void
tl_addr_req(mblk_t * mp,tl_endpt_t * tep)4275 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4276 {
4277 queue_t *wq;
4278 size_t ack_sz;
4279 mblk_t *ackmp;
4280 struct T_addr_ack *taa;
4281
4282 if (tep->te_closing) {
4283 freemsg(mp);
4284 return;
4285 }
4286
4287 wq = tep->te_wq;
4288
4289 /*
4290 * Note: T_ADDR_REQ message has only PRIM_type field
4291 * so it is already validated earlier.
4292 */
4293
4294 if (IS_CLTS(tep) ||
4295 (tep->te_state > TS_WREQ_ORDREL) ||
4296 (tep->te_state < TS_DATA_XFER)) {
4297 /*
4298 * Either connectionless or connection oriented but not
4299 * in connected data transfer state or half-closed states.
4300 */
4301 ack_sz = sizeof (struct T_addr_ack);
4302 if (tep->te_state >= TS_IDLE)
4303 /* is bound */
4304 ack_sz += tep->te_alen;
4305 ackmp = reallocb(mp, ack_sz, 0);
4306 if (ackmp == NULL) {
4307 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4308 SL_TRACE | SL_ERROR,
4309 "tl_addr_req: reallocb failed"));
4310 tl_memrecover(wq, mp, ack_sz);
4311 return;
4312 }
4313
4314 taa = (struct T_addr_ack *)ackmp->b_rptr;
4315
4316 bzero(taa, sizeof (struct T_addr_ack));
4317
4318 taa->PRIM_type = T_ADDR_ACK;
4319 ackmp->b_datap->db_type = M_PCPROTO;
4320 ackmp->b_wptr = (uchar_t *)&taa[1];
4321
4322 if (tep->te_state >= TS_IDLE) {
4323 /* endpoint is bound */
4324 taa->LOCADDR_length = tep->te_alen;
4325 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4326
4327 bcopy(tep->te_abuf, ackmp->b_wptr,
4328 tep->te_alen);
4329 ackmp->b_wptr += tep->te_alen;
4330 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4331 }
4332
4333 (void) qreply(wq, ackmp);
4334 } else {
4335 ASSERT(tep->te_state == TS_DATA_XFER ||
4336 tep->te_state == TS_WIND_ORDREL ||
4337 tep->te_state == TS_WREQ_ORDREL);
4338 /* connection oriented in data transfer */
4339 tl_connected_cots_addr_req(mp, tep);
4340 }
4341 }
4342
4343
4344 static void
tl_connected_cots_addr_req(mblk_t * mp,tl_endpt_t * tep)4345 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4346 {
4347 tl_endpt_t *peer_tep = tep->te_conp;
4348 size_t ack_sz;
4349 mblk_t *ackmp;
4350 struct T_addr_ack *taa;
4351 uchar_t *addr_startp;
4352
4353 if (tep->te_closing) {
4354 freemsg(mp);
4355 return;
4356 }
4357
4358 if (peer_tep == NULL || peer_tep->te_closing) {
4359 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4360 return;
4361 }
4362
4363 ASSERT(tep->te_state >= TS_IDLE);
4364
4365 ack_sz = sizeof (struct T_addr_ack);
4366 ack_sz += T_ALIGN(tep->te_alen);
4367 ack_sz += peer_tep->te_alen;
4368
4369 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4370 if (ackmp == NULL) {
4371 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4372 "tl_connected_cots_addr_req: reallocb failed"));
4373 tl_memrecover(tep->te_wq, mp, ack_sz);
4374 return;
4375 }
4376
4377 taa = (struct T_addr_ack *)ackmp->b_rptr;
4378
4379 /* endpoint is bound */
4380 taa->LOCADDR_length = tep->te_alen;
4381 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4382
4383 addr_startp = (uchar_t *)&taa[1];
4384
4385 bcopy(tep->te_abuf, addr_startp,
4386 tep->te_alen);
4387
4388 taa->REMADDR_length = peer_tep->te_alen;
4389 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4390 taa->LOCADDR_length);
4391 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4392 bcopy(peer_tep->te_abuf, addr_startp,
4393 peer_tep->te_alen);
4394 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4395 taa->REMADDR_offset + peer_tep->te_alen;
4396 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4397
4398 putnext(tep->te_rq, ackmp);
4399 }
4400
4401 static void
tl_copy_info(struct T_info_ack * ia,tl_endpt_t * tep)4402 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4403 {
4404 if (IS_CLTS(tep)) {
4405 *ia = tl_clts_info_ack;
4406 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4407 } else {
4408 *ia = tl_cots_info_ack;
4409 if (IS_COTSORD(tep))
4410 ia->SERV_type = T_COTS_ORD;
4411 }
4412 ia->TIDU_size = tl_tidusz;
4413 ia->CURRENT_state = tep->te_state;
4414 }
4415
4416 /*
4417 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4418 * tl_wput.
4419 */
4420 static void
tl_capability_req(mblk_t * mp,tl_endpt_t * tep)4421 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4422 {
4423 mblk_t *ackmp;
4424 t_uscalar_t cap_bits1;
4425 struct T_capability_ack *tcap;
4426
4427 if (tep->te_closing) {
4428 freemsg(mp);
4429 return;
4430 }
4431
4432 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4433
4434 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4435 M_PCPROTO, T_CAPABILITY_ACK);
4436 if (ackmp == NULL) {
4437 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4438 "tl_capability_req: reallocb failed"));
4439 tl_memrecover(tep->te_wq, mp,
4440 sizeof (struct T_capability_ack));
4441 return;
4442 }
4443
4444 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4445 tcap->CAP_bits1 = 0;
4446
4447 if (cap_bits1 & TC1_INFO) {
4448 tl_copy_info(&tcap->INFO_ack, tep);
4449 tcap->CAP_bits1 |= TC1_INFO;
4450 }
4451
4452 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4453 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4454 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4455 }
4456
4457 putnext(tep->te_rq, ackmp);
4458 }
4459
4460 static void
tl_info_req_ser(mblk_t * mp,tl_endpt_t * tep)4461 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4462 {
4463 if (!tep->te_closing)
4464 tl_info_req(mp, tep);
4465 else
4466 freemsg(mp);
4467
4468 tl_serializer_exit(tep);
4469 tl_refrele(tep);
4470 }
4471
4472 static void
tl_info_req(mblk_t * mp,tl_endpt_t * tep)4473 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4474 {
4475 mblk_t *ackmp;
4476
4477 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4478 M_PCPROTO, T_INFO_ACK);
4479 if (ackmp == NULL) {
4480 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4481 "tl_info_req: reallocb failed"));
4482 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4483 return;
4484 }
4485
4486 /*
4487 * fill in T_INFO_ACK contents
4488 */
4489 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4490
4491 /*
4492 * send ack message
4493 */
4494 putnext(tep->te_rq, ackmp);
4495 }
4496
4497 /*
4498 * Handle M_DATA, T_data_req and T_optdata_req.
4499 * If this is a socket pass through T_optdata_req options unmodified.
4500 */
4501 static void
tl_data(mblk_t * mp,tl_endpt_t * tep)4502 tl_data(mblk_t *mp, tl_endpt_t *tep)
4503 {
4504 queue_t *wq = tep->te_wq;
4505 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4506 ssize_t msz = MBLKL(mp);
4507 tl_endpt_t *peer_tep;
4508 queue_t *peer_rq;
4509 boolean_t closing = tep->te_closing;
4510
4511 if (IS_CLTS(tep)) {
4512 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4513 SL_TRACE | SL_ERROR,
4514 "tl_wput:clts:unattached M_DATA"));
4515 if (!closing) {
4516 tl_merror(wq, mp, EPROTO);
4517 } else {
4518 freemsg(mp);
4519 }
4520 return;
4521 }
4522
4523 /*
4524 * If the endpoint is closing it should still forward any data to the
4525 * peer (if it has one). If it is not allowed to forward it can just
4526 * free the message.
4527 */
4528 if (closing &&
4529 (tep->te_state != TS_DATA_XFER) &&
4530 (tep->te_state != TS_WREQ_ORDREL)) {
4531 freemsg(mp);
4532 return;
4533 }
4534
4535 if (DB_TYPE(mp) == M_PROTO) {
4536 if (prim->type == T_DATA_REQ &&
4537 msz < sizeof (struct T_data_req)) {
4538 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4539 SL_TRACE | SL_ERROR,
4540 "tl_data:T_DATA_REQ:invalid message"));
4541 if (!closing) {
4542 tl_merror(wq, mp, EPROTO);
4543 } else {
4544 freemsg(mp);
4545 }
4546 return;
4547 } else if (prim->type == T_OPTDATA_REQ &&
4548 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4549 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4550 SL_TRACE | SL_ERROR,
4551 "tl_data:T_OPTDATA_REQ:invalid message"));
4552 if (!closing) {
4553 tl_merror(wq, mp, EPROTO);
4554 } else {
4555 freemsg(mp);
4556 }
4557 return;
4558 }
4559 }
4560
4561 /*
4562 * connection oriented provider
4563 */
4564 switch (tep->te_state) {
4565 case TS_IDLE:
4566 /*
4567 * Other end not here - do nothing.
4568 */
4569 freemsg(mp);
4570 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4571 "tl_data:cots with endpoint idle"));
4572 return;
4573
4574 case TS_DATA_XFER:
4575 /* valid states */
4576 if (tep->te_conp != NULL)
4577 break;
4578
4579 if (tep->te_oconp == NULL) {
4580 if (!closing) {
4581 tl_merror(wq, mp, EPROTO);
4582 } else {
4583 freemsg(mp);
4584 }
4585 return;
4586 }
4587 /*
4588 * For a socket the T_CONN_CON is sent early thus
4589 * the peer might not yet have accepted the connection.
4590 * If we are closing queue the packet with the T_CONN_IND.
4591 * Otherwise defer processing the packet until the peer
4592 * accepts the connection.
4593 * Note that the queue is noenabled when we go into this
4594 * state.
4595 */
4596 if (!closing) {
4597 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4598 SL_TRACE | SL_ERROR,
4599 "tl_data: ocon"));
4600 TL_PUTBQ(tep, mp);
4601 return;
4602 }
4603 if (DB_TYPE(mp) == M_PROTO) {
4604 if (msz < sizeof (t_scalar_t)) {
4605 freemsg(mp);
4606 return;
4607 }
4608 /* reuse message block - just change REQ to IND */
4609 if (prim->type == T_DATA_REQ)
4610 prim->type = T_DATA_IND;
4611 else
4612 prim->type = T_OPTDATA_IND;
4613 }
4614 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4615 return;
4616
4617 case TS_WREQ_ORDREL:
4618 if (tep->te_conp == NULL) {
4619 /*
4620 * Other end closed - generate discon_ind
4621 * with reason 0 to cause an EPIPE but no
4622 * read side error on AF_UNIX sockets.
4623 */
4624 freemsg(mp);
4625 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4626 SL_TRACE | SL_ERROR,
4627 "tl_data: WREQ_ORDREL and no peer"));
4628 tl_discon_ind(tep, 0);
4629 return;
4630 }
4631 break;
4632
4633 default:
4634 /* invalid state for event TE_DATA_REQ */
4635 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4636 "tl_data:cots:out of state"));
4637 tl_merror(wq, mp, EPROTO);
4638 return;
4639 }
4640 /*
4641 * tep->te_state = nextstate[TE_DATA_REQ][tep->te_state];
4642 * (State stays same on this event)
4643 */
4644
4645 /*
4646 * get connected endpoint
4647 */
4648 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4649 freemsg(mp);
4650 /* Peer closed */
4651 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4652 "tl_data: peer gone"));
4653 return;
4654 }
4655
4656 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4657 peer_rq = peer_tep->te_rq;
4658
4659 /*
4660 * Put it back if flow controlled
4661 * Note: Messages already on queue when we are closing is bounded
4662 * so we can ignore flow control.
4663 */
4664 if (!canputnext(peer_rq) && !closing) {
4665 TL_PUTBQ(tep, mp);
4666 return;
4667 }
4668
4669 /*
4670 * validate peer state
4671 */
4672 switch (peer_tep->te_state) {
4673 case TS_DATA_XFER:
4674 case TS_WIND_ORDREL:
4675 /* valid states */
4676 break;
4677 default:
4678 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4679 "tl_data:rx side:invalid state"));
4680 tl_merror(peer_tep->te_wq, mp, EPROTO);
4681 return;
4682 }
4683 if (DB_TYPE(mp) == M_PROTO) {
4684 /* reuse message block - just change REQ to IND */
4685 if (prim->type == T_DATA_REQ)
4686 prim->type = T_DATA_IND;
4687 else
4688 prim->type = T_OPTDATA_IND;
4689 }
4690 /*
4691 * peer_tep->te_state = nextstate[TE_DATA_IND][peer_tep->te_state];
4692 * (peer state stays same on this event)
4693 */
4694 /*
4695 * send data to connected peer
4696 */
4697 putnext(peer_rq, mp);
4698 }
4699
4700
4701
4702 static void
tl_exdata(mblk_t * mp,tl_endpt_t * tep)4703 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4704 {
4705 queue_t *wq = tep->te_wq;
4706 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4707 ssize_t msz = MBLKL(mp);
4708 tl_endpt_t *peer_tep;
4709 queue_t *peer_rq;
4710 boolean_t closing = tep->te_closing;
4711
4712 if (msz < sizeof (struct T_exdata_req)) {
4713 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4714 "tl_exdata:invalid message"));
4715 if (!closing) {
4716 tl_merror(wq, mp, EPROTO);
4717 } else {
4718 freemsg(mp);
4719 }
4720 return;
4721 }
4722
4723 /*
4724 * If the endpoint is closing it should still forward any data to the
4725 * peer (if it has one). If it is not allowed to forward it can just
4726 * free the message.
4727 */
4728 if (closing &&
4729 (tep->te_state != TS_DATA_XFER) &&
4730 (tep->te_state != TS_WREQ_ORDREL)) {
4731 freemsg(mp);
4732 return;
4733 }
4734
4735 /*
4736 * validate state
4737 */
4738 switch (tep->te_state) {
4739 case TS_IDLE:
4740 /*
4741 * Other end not here - do nothing.
4742 */
4743 freemsg(mp);
4744 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4745 "tl_exdata:cots with endpoint idle"));
4746 return;
4747
4748 case TS_DATA_XFER:
4749 /* valid states */
4750 if (tep->te_conp != NULL)
4751 break;
4752
4753 if (tep->te_oconp == NULL) {
4754 if (!closing) {
4755 tl_merror(wq, mp, EPROTO);
4756 } else {
4757 freemsg(mp);
4758 }
4759 return;
4760 }
4761 /*
4762 * For a socket the T_CONN_CON is sent early thus
4763 * the peer might not yet have accepted the connection.
4764 * If we are closing queue the packet with the T_CONN_IND.
4765 * Otherwise defer processing the packet until the peer
4766 * accepts the connection.
4767 * Note that the queue is noenabled when we go into this
4768 * state.
4769 */
4770 if (!closing) {
4771 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4772 SL_TRACE | SL_ERROR,
4773 "tl_exdata: ocon"));
4774 TL_PUTBQ(tep, mp);
4775 return;
4776 }
4777 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4778 "tl_exdata: closing socket ocon"));
4779 prim->type = T_EXDATA_IND;
4780 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4781 return;
4782
4783 case TS_WREQ_ORDREL:
4784 if (tep->te_conp == NULL) {
4785 /*
4786 * Other end closed - generate discon_ind
4787 * with reason 0 to cause an EPIPE but no
4788 * read side error on AF_UNIX sockets.
4789 */
4790 freemsg(mp);
4791 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4792 SL_TRACE | SL_ERROR,
4793 "tl_exdata: WREQ_ORDREL and no peer"));
4794 tl_discon_ind(tep, 0);
4795 return;
4796 }
4797 break;
4798
4799 default:
4800 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4801 SL_TRACE | SL_ERROR,
4802 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4803 tep->te_state));
4804 tl_merror(wq, mp, EPROTO);
4805 return;
4806 }
4807 /*
4808 * tep->te_state = nextstate[TE_EXDATA_REQ][tep->te_state];
4809 * (state stays same on this event)
4810 */
4811
4812 /*
4813 * get connected endpoint
4814 */
4815 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4816 freemsg(mp);
4817 /* Peer closed */
4818 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4819 "tl_exdata: peer gone"));
4820 return;
4821 }
4822
4823 peer_rq = peer_tep->te_rq;
4824
4825 /*
4826 * Put it back if flow controlled
4827 * Note: Messages already on queue when we are closing is bounded
4828 * so we can ignore flow control.
4829 */
4830 if (!canputnext(peer_rq) && !closing) {
4831 TL_PUTBQ(tep, mp);
4832 return;
4833 }
4834
4835 /*
4836 * validate state on peer
4837 */
4838 switch (peer_tep->te_state) {
4839 case TS_DATA_XFER:
4840 case TS_WIND_ORDREL:
4841 /* valid states */
4842 break;
4843 default:
4844 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4845 "tl_exdata:rx side:invalid state"));
4846 tl_merror(peer_tep->te_wq, mp, EPROTO);
4847 return;
4848 }
4849 /*
4850 * peer_tep->te_state = nextstate[TE_DATA_IND][peer_tep->te_state];
4851 * (peer state stays same on this event)
4852 */
4853 /*
4854 * reuse message block
4855 */
4856 prim->type = T_EXDATA_IND;
4857
4858 /*
4859 * send data to connected peer
4860 */
4861 putnext(peer_rq, mp);
4862 }
4863
4864
4865
4866 static void
tl_ordrel(mblk_t * mp,tl_endpt_t * tep)4867 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4868 {
4869 queue_t *wq = tep->te_wq;
4870 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4871 ssize_t msz = MBLKL(mp);
4872 tl_endpt_t *peer_tep;
4873 queue_t *peer_rq;
4874 boolean_t closing = tep->te_closing;
4875
4876 if (msz < sizeof (struct T_ordrel_req)) {
4877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4878 "tl_ordrel:invalid message"));
4879 if (!closing) {
4880 tl_merror(wq, mp, EPROTO);
4881 } else {
4882 freemsg(mp);
4883 }
4884 return;
4885 }
4886
4887 /*
4888 * validate state
4889 */
4890 switch (tep->te_state) {
4891 case TS_DATA_XFER:
4892 case TS_WREQ_ORDREL:
4893 /* valid states */
4894 if (tep->te_conp != NULL)
4895 break;
4896
4897 if (tep->te_oconp == NULL)
4898 break;
4899
4900 /*
4901 * For a socket the T_CONN_CON is sent early thus
4902 * the peer might not yet have accepted the connection.
4903 * If we are closing queue the packet with the T_CONN_IND.
4904 * Otherwise defer processing the packet until the peer
4905 * accepts the connection.
4906 * Note that the queue is noenabled when we go into this
4907 * state.
4908 */
4909 if (!closing) {
4910 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4911 SL_TRACE | SL_ERROR,
4912 "tl_ordlrel: ocon"));
4913 TL_PUTBQ(tep, mp);
4914 return;
4915 }
4916 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4917 "tl_ordlrel: closing socket ocon"));
4918 prim->type = T_ORDREL_IND;
4919 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4920 return;
4921
4922 default:
4923 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4924 SL_TRACE | SL_ERROR,
4925 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4926 tep->te_state));
4927 if (!closing) {
4928 tl_merror(wq, mp, EPROTO);
4929 } else {
4930 freemsg(mp);
4931 }
4932 return;
4933 }
4934 tep->te_state = nextstate[TE_ORDREL_REQ][tep->te_state];
4935
4936 /*
4937 * get connected endpoint
4938 */
4939 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4940 /* Peer closed */
4941 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4942 "tl_ordrel: peer gone"));
4943 freemsg(mp);
4944 return;
4945 }
4946
4947 peer_rq = peer_tep->te_rq;
4948
4949 /*
4950 * Put it back if flow controlled except when we are closing.
4951 * Note: Messages already on queue when we are closing is bounded
4952 * so we can ignore flow control.
4953 */
4954 if (!canputnext(peer_rq) && !closing) {
4955 TL_PUTBQ(tep, mp);
4956 return;
4957 }
4958
4959 /*
4960 * validate state on peer
4961 */
4962 switch (peer_tep->te_state) {
4963 case TS_DATA_XFER:
4964 case TS_WIND_ORDREL:
4965 /* valid states */
4966 break;
4967 default:
4968 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4969 "tl_ordrel:rx side:invalid state"));
4970 tl_merror(peer_tep->te_wq, mp, EPROTO);
4971 return;
4972 }
4973 peer_tep->te_state = nextstate[TE_ORDREL_IND][peer_tep->te_state];
4974
4975 /*
4976 * reuse message block
4977 */
4978 prim->type = T_ORDREL_IND;
4979 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4980 "tl_ordrel: send ordrel_ind"));
4981
4982 /*
4983 * send data to connected peer
4984 */
4985 putnext(peer_rq, mp);
4986 }
4987
4988
4989 /*
4990 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4991 */
4992 static void
tl_uderr(queue_t * wq,mblk_t * mp,t_scalar_t err)4993 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4994 {
4995 size_t err_sz;
4996 tl_endpt_t *tep;
4997 struct T_unitdata_req *udreq;
4998 mblk_t *err_mp;
4999 t_scalar_t alen;
5000 t_scalar_t olen;
5001 struct T_uderror_ind *uderr;
5002 uchar_t *addr_startp;
5003
5004 err_sz = sizeof (struct T_uderror_ind);
5005 tep = (tl_endpt_t *)wq->q_ptr;
5006 udreq = (struct T_unitdata_req *)mp->b_rptr;
5007 alen = udreq->DEST_length;
5008 olen = udreq->OPT_length;
5009
5010 if (alen > 0)
5011 err_sz = T_ALIGN(err_sz + alen);
5012 if (olen > 0)
5013 err_sz += olen;
5014
5015 err_mp = allocb(err_sz, BPRI_MED);
5016 if (err_mp == NULL) {
5017 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5018 "tl_uderr:allocb failure"));
5019 /*
5020 * Note: no rollback of state needed as it does
5021 * not change in connectionless transport
5022 */
5023 tl_memrecover(wq, mp, err_sz);
5024 return;
5025 }
5026
5027 DB_TYPE(err_mp) = M_PROTO;
5028 err_mp->b_wptr = err_mp->b_rptr + err_sz;
5029 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
5030 uderr->PRIM_type = T_UDERROR_IND;
5031 uderr->ERROR_type = err;
5032 uderr->DEST_length = alen;
5033 uderr->OPT_length = olen;
5034 if (alen <= 0) {
5035 uderr->DEST_offset = 0;
5036 } else {
5037 uderr->DEST_offset =
5038 (t_scalar_t)sizeof (struct T_uderror_ind);
5039 addr_startp = mp->b_rptr + udreq->DEST_offset;
5040 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
5041 (size_t)alen);
5042 }
5043 if (olen <= 0) {
5044 uderr->OPT_offset = 0;
5045 } else {
5046 uderr->OPT_offset =
5047 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
5048 uderr->DEST_length);
5049 addr_startp = mp->b_rptr + udreq->OPT_offset;
5050 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
5051 (size_t)olen);
5052 }
5053 freemsg(mp);
5054
5055 /*
5056 * send indication message
5057 */
5058 tep->te_state = nextstate[TE_UDERROR_IND][tep->te_state];
5059
5060 qreply(wq, err_mp);
5061 }
5062
5063 static void
tl_unitdata_ser(mblk_t * mp,tl_endpt_t * tep)5064 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
5065 {
5066 queue_t *wq = tep->te_wq;
5067
5068 if (!tep->te_closing && (wq->q_first != NULL)) {
5069 TL_PUTQ(tep, mp);
5070 } else {
5071 if (tep->te_rq != NULL)
5072 tl_unitdata(mp, tep);
5073 else
5074 freemsg(mp);
5075 }
5076
5077 tl_serializer_exit(tep);
5078 tl_refrele(tep);
5079 }
5080
5081 /*
5082 * Handle T_unitdata_req.
5083 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
5084 * If this is a socket pass through options unmodified.
5085 */
5086 static void
tl_unitdata(mblk_t * mp,tl_endpt_t * tep)5087 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
5088 {
5089 queue_t *wq = tep->te_wq;
5090 soux_addr_t ux_addr;
5091 tl_addr_t destaddr;
5092 uchar_t *addr_startp;
5093 tl_endpt_t *peer_tep;
5094 struct T_unitdata_ind *udind;
5095 struct T_unitdata_req *udreq;
5096 ssize_t msz, ui_sz, reuse_mb_sz;
5097 t_scalar_t alen, aoff, olen, ooff;
5098 t_scalar_t oldolen = 0;
5099 cred_t *cr = NULL;
5100 pid_t cpid;
5101
5102 udreq = (struct T_unitdata_req *)mp->b_rptr;
5103 msz = MBLKL(mp);
5104
5105 /*
5106 * validate the state
5107 */
5108 if (tep->te_state != TS_IDLE) {
5109 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5110 SL_TRACE | SL_ERROR,
5111 "tl_wput:T_CONN_REQ:out of state"));
5112 tl_merror(wq, mp, EPROTO);
5113 return;
5114 }
5115 /*
5116 * tep->te_state = nextstate[TE_UNITDATA_REQ][tep->te_state];
5117 * (state does not change on this event)
5118 */
5119
5120 /*
5121 * validate the message
5122 * Note: dereference fields in struct inside message only
5123 * after validating the message length.
5124 */
5125 if (msz < sizeof (struct T_unitdata_req)) {
5126 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5127 "tl_unitdata:invalid message length"));
5128 tl_merror(wq, mp, EINVAL);
5129 return;
5130 }
5131 alen = udreq->DEST_length;
5132 aoff = udreq->DEST_offset;
5133 oldolen = olen = udreq->OPT_length;
5134 ooff = udreq->OPT_offset;
5135 if (olen == 0)
5136 ooff = 0;
5137
5138 if (IS_SOCKET(tep)) {
5139 if ((alen != TL_SOUX_ADDRLEN) ||
5140 (aoff < 0) ||
5141 (aoff + alen > msz) ||
5142 (olen < 0) || (ooff < 0) ||
5143 ((olen > 0) && ((ooff + olen) > msz))) {
5144 (void) (STRLOG(TL_ID, tep->te_minor,
5145 1, SL_TRACE | SL_ERROR,
5146 "tl_unitdata_req: invalid socket addr "
5147 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5148 (int)msz, alen, aoff, olen, ooff));
5149 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5150 return;
5151 }
5152 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5153
5154 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5155 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5156 (void) (STRLOG(TL_ID, tep->te_minor,
5157 1, SL_TRACE | SL_ERROR,
5158 "tl_conn_req: invalid socket magic"));
5159 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5160 return;
5161 }
5162 } else {
5163 if ((alen < 0) ||
5164 (aoff < 0) ||
5165 ((alen > 0) && ((aoff + alen) > msz)) ||
5166 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5167 ((aoff + alen) < 0) ||
5168 ((olen > 0) && ((ooff + olen) > msz)) ||
5169 (olen < 0) ||
5170 (ooff < 0) ||
5171 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5172 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5173 SL_TRACE | SL_ERROR,
5174 "tl_unitdata:invalid unit data message"));
5175 tl_merror(wq, mp, EINVAL);
5176 return;
5177 }
5178 }
5179
5180 /* Options not supported unless it's a socket */
5181 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5182 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5183 "tl_unitdata:option use(unsupported) or zero len addr"));
5184 tl_uderr(wq, mp, EPROTO);
5185 return;
5186 }
5187 #ifdef DEBUG
5188 /*
5189 * Mild form of ASSERT()ion to detect broken TPI apps.
5190 * if (!assertion)
5191 * log warning;
5192 */
5193 if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5194 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5195 "tl_unitdata:addr overlaps TPI message"));
5196 }
5197 #endif
5198 /*
5199 * get destination endpoint
5200 */
5201 destaddr.ta_alen = alen;
5202 destaddr.ta_abuf = mp->b_rptr + aoff;
5203 destaddr.ta_zoneid = tep->te_zoneid;
5204
5205 /*
5206 * Check whether the destination is the same that was used previously
5207 * and the destination endpoint is in the right state. If something is
5208 * wrong, find destination again and cache it.
5209 */
5210 peer_tep = tep->te_lastep;
5211
5212 if ((peer_tep == NULL) || peer_tep->te_closing ||
5213 (peer_tep->te_state != TS_IDLE) ||
5214 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5215 /*
5216 * Not the same as cached destination , need to find the right
5217 * destination.
5218 */
5219 peer_tep = (IS_SOCKET(tep) ?
5220 tl_sock_find_peer(tep, &ux_addr) :
5221 tl_find_peer(tep, &destaddr));
5222
5223 if (peer_tep == NULL) {
5224 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5225 SL_TRACE | SL_ERROR,
5226 "tl_unitdata:no one at destination address"));
5227 tl_uderr(wq, mp, ECONNRESET);
5228 return;
5229 }
5230
5231 /*
5232 * Cache the new peer.
5233 */
5234 if (tep->te_lastep != NULL)
5235 tl_refrele(tep->te_lastep);
5236
5237 tep->te_lastep = peer_tep;
5238 }
5239
5240 if (peer_tep->te_state != TS_IDLE) {
5241 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5242 "tl_unitdata:provider in invalid state"));
5243 tl_uderr(wq, mp, EPROTO);
5244 return;
5245 }
5246
5247 ASSERT(peer_tep->te_rq != NULL);
5248
5249 /*
5250 * Put it back if flow controlled except when we are closing.
5251 * Note: Messages already on queue when we are closing is bounded
5252 * so we can ignore flow control.
5253 */
5254 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5255 /* record what we are flow controlled on */
5256 if (tep->te_flowq != NULL) {
5257 list_remove(&tep->te_flowq->te_flowlist, tep);
5258 }
5259 list_insert_head(&peer_tep->te_flowlist, tep);
5260 tep->te_flowq = peer_tep;
5261 TL_PUTBQ(tep, mp);
5262 return;
5263 }
5264 /*
5265 * prepare indication message
5266 */
5267
5268 /*
5269 * calculate length of message
5270 */
5271 if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5272 cr = msg_getcred(mp, &cpid);
5273 ASSERT(cr != NULL);
5274
5275 if (peer_tep->te_flag & TL_SETCRED) {
5276 ASSERT(olen == 0);
5277 olen = (t_scalar_t)sizeof (struct opthdr) +
5278 OPTLEN(sizeof (tl_credopt_t));
5279 /* 1 option only */
5280 } else if (peer_tep->te_flag & TL_SETUCRED) {
5281 ASSERT(olen == 0);
5282 olen = (t_scalar_t)sizeof (struct opthdr) +
5283 OPTLEN(ucredminsize(cr));
5284 /* 1 option only */
5285 } else {
5286 /* Possibly more than one option */
5287 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5288 OPTLEN(ucredminsize(cr));
5289 }
5290 }
5291
5292 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5293 reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5294
5295 /*
5296 * If the unitdata_ind fits and we are not adding options
5297 * reuse the udreq mblk.
5298 *
5299 * Otherwise, it is possible we need to append an option if one of the
5300 * te_flag bits is set. This requires extra space in the data block for
5301 * the additional option but the traditional technique used below to
5302 * allocate a new block and copy into it will not work when there is a
5303 * message block with a free pointer (since we don't know anything
5304 * about the layout of the data, pointers referencing or within the
5305 * data, etc.). To handle this possibility the upper layers may have
5306 * preallocated some space to use for appending an option. We check the
5307 * overall mblock size against the size we need ('reuse_mb_sz' with the
5308 * original address length [alen] to ensure we won't overrun the
5309 * current mblk data size) to see if there is free space and thus
5310 * avoid allocating a new message block.
5311 */
5312 if (msz >= ui_sz && alen >= tep->te_alen &&
5313 !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) {
5314 /*
5315 * Reuse the original mblk. Leave options in place.
5316 */
5317 udind = (struct T_unitdata_ind *)mp->b_rptr;
5318 udind->PRIM_type = T_UNITDATA_IND;
5319 udind->SRC_length = tep->te_alen;
5320 addr_startp = mp->b_rptr + udind->SRC_offset;
5321 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5322
5323 } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5324 mp->b_datap->db_frtnp != NULL) {
5325 /*
5326 * We have a message block with a free pointer, but extra space
5327 * has been pre-allocated for us in case we need to append an
5328 * option. Reuse the original mblk, leaving existing options in
5329 * place.
5330 */
5331 udind = (struct T_unitdata_ind *)mp->b_rptr;
5332 udind->PRIM_type = T_UNITDATA_IND;
5333 udind->SRC_length = tep->te_alen;
5334 addr_startp = mp->b_rptr + udind->SRC_offset;
5335 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5336
5337 if (peer_tep->te_flag &
5338 (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5339 ASSERT(cr != NULL);
5340 /*
5341 * We're appending one new option here after the
5342 * original ones.
5343 */
5344 tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5345 cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5346 }
5347
5348 } else if (mp->b_datap->db_frtnp != NULL) {
5349 /*
5350 * The next block creates a new mp and tries to copy the data
5351 * block into it, but that cannot handle a message with a free
5352 * pointer (for more details see the comment in kstrputmsg()
5353 * where dupmsg() is called). Since we can never properly
5354 * duplicate the mp while also extending the data, just error
5355 * out now.
5356 */
5357 tl_uderr(wq, mp, EPROTO);
5358 return;
5359 } else {
5360 /* Allocate a new T_unitdata_ind message */
5361 mblk_t *ui_mp;
5362
5363 ui_mp = allocb(ui_sz, BPRI_MED);
5364 if (ui_mp == NULL) {
5365 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5366 "tl_unitdata:allocb failure:message queued"));
5367 tl_memrecover(wq, mp, ui_sz);
5368 return;
5369 }
5370
5371 /*
5372 * fill in T_UNITDATA_IND contents
5373 */
5374 DB_TYPE(ui_mp) = M_PROTO;
5375 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5376 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5377 udind->PRIM_type = T_UNITDATA_IND;
5378 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5379 udind->SRC_length = tep->te_alen;
5380 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5381 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5382 udind->OPT_offset =
5383 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5384 udind->OPT_length = olen;
5385 if (peer_tep->te_flag &
5386 (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5387
5388 if (oldolen != 0) {
5389 bcopy((void *)((uintptr_t)udreq + ooff),
5390 (void *)((uintptr_t)udind +
5391 udind->OPT_offset),
5392 oldolen);
5393 }
5394 ASSERT(cr != NULL);
5395
5396 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5397 oldolen, cr, cpid,
5398 peer_tep->te_flag, peer_tep->te_credp);
5399 } else {
5400 bcopy((void *)((uintptr_t)udreq + ooff),
5401 (void *)((uintptr_t)udind + udind->OPT_offset),
5402 olen);
5403 }
5404
5405 /*
5406 * relink data blocks from mp to ui_mp
5407 */
5408 ui_mp->b_cont = mp->b_cont;
5409 freeb(mp);
5410 mp = ui_mp;
5411 }
5412 /*
5413 * send indication message
5414 */
5415 peer_tep->te_state = nextstate[TE_UNITDATA_IND][peer_tep->te_state];
5416 putnext(peer_tep->te_rq, mp);
5417 }
5418
5419
5420
5421 /*
5422 * Check if a given addr is in use.
5423 * Endpoint ptr returned or NULL if not found.
5424 * The name space is separate for each mode. This implies that
5425 * sockets get their own name space.
5426 */
5427 static tl_endpt_t *
tl_find_peer(tl_endpt_t * tep,tl_addr_t * ap)5428 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5429 {
5430 tl_endpt_t *peer_tep = NULL;
5431 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5432 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5433
5434 ASSERT(!IS_SOCKET(tep));
5435
5436 ASSERT(ap != NULL && ap->ta_alen > 0);
5437 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5438 ASSERT(ap->ta_abuf != NULL);
5439 EQUIV(rc == 0, peer_tep != NULL);
5440 IMPLY(rc == 0,
5441 (tep->te_zoneid == peer_tep->te_zoneid) &&
5442 (tep->te_transport == peer_tep->te_transport));
5443
5444 if ((rc == 0) && (peer_tep->te_closing)) {
5445 tl_refrele(peer_tep);
5446 peer_tep = NULL;
5447 }
5448
5449 return (peer_tep);
5450 }
5451
5452 /*
5453 * Find peer for a socket based on unix domain address.
5454 * For implicit addresses our peer can be found by minor number in ai hash. For
5455 * explicit binds we look vnode address at addr_hash.
5456 */
5457 static tl_endpt_t *
tl_sock_find_peer(tl_endpt_t * tep,soux_addr_t * ux_addr)5458 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5459 {
5460 tl_endpt_t *peer_tep = NULL;
5461 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5462 tep->te_aihash : tep->te_addrhash;
5463 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5464 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5465
5466 ASSERT(IS_SOCKET(tep));
5467 EQUIV(rc == 0, peer_tep != NULL);
5468 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5469
5470 if (peer_tep != NULL) {
5471 /* Don't attempt to use closing peer. */
5472 if (peer_tep->te_closing)
5473 goto errout;
5474
5475 /*
5476 * Cross-zone unix sockets are permitted, but for Trusted
5477 * Extensions only, the "server" for these must be in the
5478 * global zone.
5479 */
5480 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5481 is_system_labeled() &&
5482 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5483 goto errout;
5484 }
5485
5486 return (peer_tep);
5487
5488 errout:
5489 tl_refrele(peer_tep);
5490 return (NULL);
5491 }
5492
5493 /*
5494 * Generate a free addr and return it in struct pointed by ap
5495 * but allocating space for address buffer.
5496 * The generated address will be at least 4 bytes long and, if req->ta_alen
5497 * exceeds 4 bytes, be req->ta_alen bytes long.
5498 *
5499 * If address is found it will be inserted in the hash.
5500 *
5501 * If req->ta_alen is larger than the default alen (4 bytes) the last
5502 * alen-4 bytes will always be the same as in req.
5503 *
5504 * Return 0 for failure.
5505 * Return non-zero for success.
5506 */
5507 static boolean_t
tl_get_any_addr(tl_endpt_t * tep,tl_addr_t * req)5508 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5509 {
5510 t_scalar_t alen;
5511 uint32_t loopcnt; /* Limit loop to 2^32 */
5512
5513 ASSERT(tep->te_hash_hndl != NULL);
5514 ASSERT(!IS_SOCKET(tep));
5515
5516 if (tep->te_hash_hndl == NULL)
5517 return (B_FALSE);
5518
5519 /*
5520 * check if default addr is in use
5521 * if it is - bump it and try again
5522 */
5523 if (req == NULL) {
5524 alen = sizeof (uint32_t);
5525 } else {
5526 alen = max(req->ta_alen, sizeof (uint32_t));
5527 ASSERT(tep->te_zoneid == req->ta_zoneid);
5528 }
5529
5530 if (tep->te_alen < alen) {
5531 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5532
5533 /*
5534 * Not enough space in tep->ta_ap to hold the address,
5535 * allocate a bigger space.
5536 */
5537 if (abuf == NULL)
5538 return (B_FALSE);
5539
5540 if (tep->te_alen > 0)
5541 kmem_free(tep->te_abuf, tep->te_alen);
5542
5543 tep->te_alen = alen;
5544 tep->te_abuf = abuf;
5545 }
5546
5547 /* Copy in the address in req */
5548 if (req != NULL) {
5549 ASSERT(alen >= req->ta_alen);
5550 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5551 }
5552
5553 /*
5554 * First try minor number then try default addresses.
5555 */
5556 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5557
5558 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5559 if (mod_hash_insert_reserve(tep->te_addrhash,
5560 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5561 tep->te_hash_hndl) == 0) {
5562 /*
5563 * found free address
5564 */
5565 tep->te_flag |= TL_ADDRHASHED;
5566 tep->te_hash_hndl = NULL;
5567
5568 return (B_TRUE); /* successful return */
5569 }
5570 /*
5571 * Use default address.
5572 */
5573 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5574 atomic_inc_32(&tep->te_defaddr);
5575 }
5576
5577 /*
5578 * Failed to find anything.
5579 */
5580 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5581 "tl_get_any_addr:looped 2^32 times"));
5582 return (B_FALSE);
5583 }
5584
5585 /*
5586 * reallocb + set r/w ptrs to reflect size.
5587 */
5588 static mblk_t *
tl_resizemp(mblk_t * mp,ssize_t new_size)5589 tl_resizemp(mblk_t *mp, ssize_t new_size)
5590 {
5591 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5592 return (NULL);
5593
5594 mp->b_rptr = DB_BASE(mp);
5595 mp->b_wptr = mp->b_rptr + new_size;
5596 return (mp);
5597 }
5598
5599 static void
tl_cl_backenable(tl_endpt_t * tep)5600 tl_cl_backenable(tl_endpt_t *tep)
5601 {
5602 list_t *l = &tep->te_flowlist;
5603 tl_endpt_t *elp;
5604
5605 ASSERT(IS_CLTS(tep));
5606
5607 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5608 ASSERT(tep->te_ser == elp->te_ser);
5609 ASSERT(elp->te_flowq == tep);
5610 if (!elp->te_closing)
5611 TL_QENABLE(elp);
5612 elp->te_flowq = NULL;
5613 list_remove(l, elp);
5614 }
5615 }
5616
5617 /*
5618 * Unconnect endpoints.
5619 */
5620 static void
tl_co_unconnect(tl_endpt_t * tep)5621 tl_co_unconnect(tl_endpt_t *tep)
5622 {
5623 tl_endpt_t *peer_tep = tep->te_conp;
5624 tl_endpt_t *srv_tep = tep->te_oconp;
5625 list_t *l;
5626 tl_icon_t *tip;
5627 tl_endpt_t *cl_tep;
5628 mblk_t *d_mp;
5629
5630 ASSERT(IS_COTS(tep));
5631 /*
5632 * If our peer is closing, don't use it.
5633 */
5634 if ((peer_tep != NULL) && peer_tep->te_closing) {
5635 TL_UNCONNECT(tep->te_conp);
5636 peer_tep = NULL;
5637 }
5638 if ((srv_tep != NULL) && srv_tep->te_closing) {
5639 TL_UNCONNECT(tep->te_oconp);
5640 srv_tep = NULL;
5641 }
5642
5643 if (tep->te_nicon > 0) {
5644 l = &tep->te_iconp;
5645 /*
5646 * If incoming requests pending, change state
5647 * of clients on disconnect ind event and send
5648 * discon_ind pdu to modules above them
5649 * for server: all clients get disconnect
5650 */
5651
5652 while (tep->te_nicon > 0) {
5653 tip = list_head(l);
5654 cl_tep = tip->ti_tep;
5655
5656 if (cl_tep == NULL) {
5657 tl_freetip(tep, tip);
5658 continue;
5659 }
5660
5661 if (cl_tep->te_oconp != NULL) {
5662 ASSERT(cl_tep != cl_tep->te_oconp);
5663 TL_UNCONNECT(cl_tep->te_oconp);
5664 }
5665
5666 if (cl_tep->te_closing) {
5667 tl_freetip(tep, tip);
5668 continue;
5669 }
5670
5671 enableok(cl_tep->te_wq);
5672 TL_QENABLE(cl_tep);
5673 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5674 if (d_mp != NULL) {
5675 cl_tep->te_state = TS_IDLE;
5676 putnext(cl_tep->te_rq, d_mp);
5677 } else {
5678 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5679 SL_TRACE | SL_ERROR,
5680 "tl_co_unconnect:icmng: "
5681 "allocb failure"));
5682 }
5683 tl_freetip(tep, tip);
5684 }
5685 } else if (srv_tep != NULL) {
5686 /*
5687 * If outgoing request pending, change state
5688 * of server on discon ind event
5689 */
5690
5691 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5692 IS_COTSORD(srv_tep) &&
5693 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5694 /*
5695 * Queue ordrel_ind for server to be picked up
5696 * when the connection is accepted.
5697 */
5698 d_mp = tl_ordrel_ind_alloc();
5699 } else {
5700 /*
5701 * send discon_ind to server
5702 */
5703 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5704 }
5705 if (d_mp == NULL) {
5706 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5707 SL_TRACE | SL_ERROR,
5708 "tl_co_unconnect:outgoing:allocb failure"));
5709 TL_UNCONNECT(tep->te_oconp);
5710 goto discon_peer;
5711 }
5712
5713 /*
5714 * If this is a socket the T_DISCON_IND is queued with
5715 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5716 * from the list of pending connections.
5717 * Note that when te_oconp is set the peer better have
5718 * a t_connind_t for the client.
5719 */
5720 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5721 /*
5722 * Queue the disconnection message.
5723 */
5724 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5725 } else {
5726 tip = tl_icon_find(srv_tep, tep->te_seqno);
5727 if (tip == NULL) {
5728 freemsg(d_mp);
5729 } else {
5730 ASSERT(tep == tip->ti_tep);
5731 ASSERT(tep->te_ser == srv_tep->te_ser);
5732 /*
5733 * Delete tip from the server list.
5734 */
5735 if (srv_tep->te_nicon == 1) {
5736 srv_tep->te_state =
5737 nextstate[TE_DISCON_IND2]
5738 [srv_tep->te_state];
5739 } else {
5740 srv_tep->te_state =
5741 nextstate[TE_DISCON_IND3]
5742 [srv_tep->te_state];
5743 }
5744 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5745 T_DISCON_IND);
5746 putnext(srv_tep->te_rq, d_mp);
5747 tl_freetip(srv_tep, tip);
5748 }
5749 TL_UNCONNECT(tep->te_oconp);
5750 srv_tep = NULL;
5751 }
5752 } else if (peer_tep != NULL) {
5753 /*
5754 * unconnect existing connection
5755 * If connected, change state of peer on
5756 * discon ind event and send discon ind pdu
5757 * to module above it
5758 */
5759
5760 ASSERT(tep->te_ser == peer_tep->te_ser);
5761 if (IS_COTSORD(peer_tep) &&
5762 (peer_tep->te_state == TS_WIND_ORDREL ||
5763 peer_tep->te_state == TS_DATA_XFER)) {
5764 /*
5765 * send ordrel ind
5766 */
5767 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5768 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5769 peer_tep->te_state,
5770 nextstate[TE_ORDREL_IND][peer_tep->te_state]));
5771 d_mp = tl_ordrel_ind_alloc();
5772 if (d_mp == NULL) {
5773 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5774 SL_TRACE | SL_ERROR,
5775 "tl_co_unconnect:connected:"
5776 "allocb failure"));
5777 /*
5778 * Continue with cleaning up peer as
5779 * this side may go away with the close
5780 */
5781 TL_QENABLE(peer_tep);
5782 goto discon_peer;
5783 }
5784 peer_tep->te_state =
5785 nextstate[TE_ORDREL_IND][peer_tep->te_state];
5786
5787 putnext(peer_tep->te_rq, d_mp);
5788 /*
5789 * Handle flow control case. This will generate
5790 * a t_discon_ind message with reason 0 if there
5791 * is data queued on the write side.
5792 */
5793 TL_QENABLE(peer_tep);
5794 } else if (IS_COTSORD(peer_tep) &&
5795 peer_tep->te_state == TS_WREQ_ORDREL) {
5796 /*
5797 * Sent an ordrel_ind. We send a discon with
5798 * with error 0 to inform that the peer is gone.
5799 */
5800 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5801 SL_TRACE | SL_ERROR,
5802 "tl_co_unconnect: discon in state %d",
5803 tep->te_state));
5804 tl_discon_ind(peer_tep, 0);
5805 } else {
5806 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5807 SL_TRACE | SL_ERROR,
5808 "tl_co_unconnect: state %d", tep->te_state));
5809 tl_discon_ind(peer_tep, ECONNRESET);
5810 }
5811
5812 discon_peer:
5813 /*
5814 * Disconnect cross-pointers only for close
5815 */
5816 if (tep->te_closing) {
5817 peer_tep = tep->te_conp;
5818 TL_REMOVE_PEER(peer_tep->te_conp);
5819 TL_REMOVE_PEER(tep->te_conp);
5820 }
5821 }
5822 }
5823
5824 /*
5825 * Note: The following routine does not recover from allocb()
5826 * failures
5827 * The reason should be from the <sys/errno.h> space.
5828 */
5829 static void
tl_discon_ind(tl_endpt_t * tep,uint32_t reason)5830 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5831 {
5832 mblk_t *d_mp;
5833
5834 if (tep->te_closing)
5835 return;
5836
5837 /*
5838 * flush the queues.
5839 */
5840 flushq(tep->te_rq, FLUSHDATA);
5841 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5842
5843 /*
5844 * send discon ind
5845 */
5846 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5847 if (d_mp == NULL) {
5848 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5849 "tl_discon_ind:allocb failure"));
5850 return;
5851 }
5852 tep->te_state = TS_IDLE;
5853 putnext(tep->te_rq, d_mp);
5854 }
5855
5856 /*
5857 * Note: The following routine does not recover from allocb()
5858 * failures
5859 * The reason should be from the <sys/errno.h> space.
5860 */
5861 static mblk_t *
tl_discon_ind_alloc(uint32_t reason,t_scalar_t seqnum)5862 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5863 {
5864 mblk_t *mp;
5865 struct T_discon_ind *tdi;
5866
5867 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5868 DB_TYPE(mp) = M_PROTO;
5869 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5870 tdi = (struct T_discon_ind *)mp->b_rptr;
5871 tdi->PRIM_type = T_DISCON_IND;
5872 tdi->DISCON_reason = reason;
5873 tdi->SEQ_number = seqnum;
5874 }
5875 return (mp);
5876 }
5877
5878
5879 /*
5880 * Note: The following routine does not recover from allocb()
5881 * failures
5882 */
5883 static mblk_t *
tl_ordrel_ind_alloc(void)5884 tl_ordrel_ind_alloc(void)
5885 {
5886 mblk_t *mp;
5887 struct T_ordrel_ind *toi;
5888
5889 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5890 DB_TYPE(mp) = M_PROTO;
5891 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5892 toi = (struct T_ordrel_ind *)mp->b_rptr;
5893 toi->PRIM_type = T_ORDREL_IND;
5894 }
5895 return (mp);
5896 }
5897
5898
5899 /*
5900 * Lookup the seqno in the list of queued connections.
5901 */
5902 static tl_icon_t *
tl_icon_find(tl_endpt_t * tep,t_scalar_t seqno)5903 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5904 {
5905 list_t *l = &tep->te_iconp;
5906 tl_icon_t *tip = list_head(l);
5907
5908 ASSERT(seqno != 0);
5909
5910 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5911 ;
5912
5913 return (tip);
5914 }
5915
5916 /*
5917 * Queue data for a given T_CONN_IND while verifying that redundant
5918 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5919 * Used when the originator of the connection closes.
5920 */
5921 static void
tl_icon_queuemsg(tl_endpt_t * tep,t_scalar_t seqno,mblk_t * nmp)5922 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5923 {
5924 tl_icon_t *tip;
5925 mblk_t **mpp, *mp;
5926 int prim, nprim;
5927
5928 if (nmp->b_datap->db_type == M_PROTO)
5929 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5930 else
5931 nprim = -1; /* M_DATA */
5932
5933 tip = tl_icon_find(tep, seqno);
5934 if (tip == NULL) {
5935 freemsg(nmp);
5936 return;
5937 }
5938
5939 ASSERT(tip->ti_seqno != 0);
5940 mpp = &tip->ti_mp;
5941 while (*mpp != NULL) {
5942 mp = *mpp;
5943
5944 if (mp->b_datap->db_type == M_PROTO)
5945 prim = ((union T_primitives *)mp->b_rptr)->type;
5946 else
5947 prim = -1; /* M_DATA */
5948
5949 /*
5950 * Allow nothing after a T_DISCON_IND
5951 */
5952 if (prim == T_DISCON_IND) {
5953 freemsg(nmp);
5954 return;
5955 }
5956 /*
5957 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5958 */
5959 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5960 freemsg(nmp);
5961 return;
5962 }
5963 mpp = &(mp->b_next);
5964 }
5965 *mpp = nmp;
5966 }
5967
5968 /*
5969 * Verify if a certain TPI primitive exists on the connind queue.
5970 * Use prim -1 for M_DATA.
5971 * Return non-zero if found.
5972 */
5973 static boolean_t
tl_icon_hasprim(tl_endpt_t * tep,t_scalar_t seqno,t_scalar_t prim)5974 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5975 {
5976 tl_icon_t *tip = tl_icon_find(tep, seqno);
5977 boolean_t found = B_FALSE;
5978
5979 if (tip != NULL) {
5980 mblk_t *mp;
5981 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5982 found = (DB_TYPE(mp) == M_PROTO &&
5983 ((union T_primitives *)mp->b_rptr)->type == prim);
5984 }
5985 }
5986 return (found);
5987 }
5988
5989 /*
5990 * Send the b_next mblk chain that has accumulated before the connection
5991 * was accepted. Perform the necessary state transitions.
5992 */
5993 static void
tl_icon_sendmsgs(tl_endpt_t * tep,mblk_t ** mpp)5994 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5995 {
5996 mblk_t *mp;
5997 union T_primitives *primp;
5998
5999 if (tep->te_closing) {
6000 tl_icon_freemsgs(mpp);
6001 return;
6002 }
6003
6004 ASSERT(tep->te_state == TS_DATA_XFER);
6005 ASSERT(tep->te_rq->q_first == NULL);
6006
6007 while ((mp = *mpp) != NULL) {
6008 *mpp = mp->b_next;
6009 mp->b_next = NULL;
6010
6011 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
6012 switch (DB_TYPE(mp)) {
6013 default:
6014 freemsg(mp);
6015 break;
6016 case M_DATA:
6017 putnext(tep->te_rq, mp);
6018 break;
6019 case M_PROTO:
6020 primp = (union T_primitives *)mp->b_rptr;
6021 switch (primp->type) {
6022 case T_UNITDATA_IND:
6023 case T_DATA_IND:
6024 case T_OPTDATA_IND:
6025 case T_EXDATA_IND:
6026 putnext(tep->te_rq, mp);
6027 break;
6028 case T_ORDREL_IND:
6029 tep->te_state = nextstate[TE_ORDREL_IND]
6030 [tep->te_state];
6031 putnext(tep->te_rq, mp);
6032 break;
6033 case T_DISCON_IND:
6034 tep->te_state = TS_IDLE;
6035 putnext(tep->te_rq, mp);
6036 break;
6037 default:
6038 #ifdef DEBUG
6039 cmn_err(CE_PANIC,
6040 "tl_icon_sendmsgs: unknown primitive");
6041 #endif /* DEBUG */
6042 freemsg(mp);
6043 break;
6044 }
6045 break;
6046 }
6047 }
6048 }
6049
6050 /*
6051 * Free the b_next mblk chain that has accumulated before the connection
6052 * was accepted.
6053 */
6054 static void
tl_icon_freemsgs(mblk_t ** mpp)6055 tl_icon_freemsgs(mblk_t **mpp)
6056 {
6057 mblk_t *mp;
6058
6059 while ((mp = *mpp) != NULL) {
6060 *mpp = mp->b_next;
6061 mp->b_next = NULL;
6062 freemsg(mp);
6063 }
6064 }
6065
6066 /*
6067 * Send M_ERROR
6068 * Note: assumes caller ensured enough space in mp or enough
6069 * memory available. Does not attempt recovery from allocb()
6070 * failures
6071 */
6072
6073 static void
tl_merror(queue_t * wq,mblk_t * mp,int error)6074 tl_merror(queue_t *wq, mblk_t *mp, int error)
6075 {
6076 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6077
6078 if (tep->te_closing) {
6079 freemsg(mp);
6080 return;
6081 }
6082
6083 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6084 SL_TRACE | SL_ERROR,
6085 "tl_merror: tep=%p, err=%d", (void *)tep, error));
6086
6087 /*
6088 * flush all messages on queue. we are shutting
6089 * the stream down on fatal error
6090 */
6091 flushq(wq, FLUSHALL);
6092 if (IS_COTS(tep)) {
6093 /* connection oriented - unconnect endpoints */
6094 tl_co_unconnect(tep);
6095 }
6096 if (mp->b_cont) {
6097 freemsg(mp->b_cont);
6098 mp->b_cont = NULL;
6099 }
6100
6101 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6102 freemsg(mp);
6103 mp = allocb(1, BPRI_HI);
6104 if (mp == NULL) {
6105 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6106 SL_TRACE | SL_ERROR,
6107 "tl_merror:M_PROTO: out of memory"));
6108 return;
6109 }
6110 }
6111 if (mp) {
6112 DB_TYPE(mp) = M_ERROR;
6113 mp->b_rptr = DB_BASE(mp);
6114 *mp->b_rptr = (char)error;
6115 mp->b_wptr = mp->b_rptr + sizeof (char);
6116 qreply(wq, mp);
6117 } else {
6118 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6119 }
6120 }
6121
6122 static void
tl_fill_option(uchar_t * buf,cred_t * cr,pid_t cpid,int flag,cred_t * pcr)6123 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6124 {
6125 ASSERT(cr != NULL);
6126
6127 if (flag & TL_SETCRED) {
6128 struct opthdr *opt = (struct opthdr *)buf;
6129 tl_credopt_t *tlcred;
6130
6131 opt->level = TL_PROT_LEVEL;
6132 opt->name = TL_OPT_PEER_CRED;
6133 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6134
6135 tlcred = (tl_credopt_t *)(opt + 1);
6136 tlcred->tc_uid = crgetuid(cr);
6137 tlcred->tc_gid = crgetgid(cr);
6138 tlcred->tc_ruid = crgetruid(cr);
6139 tlcred->tc_rgid = crgetrgid(cr);
6140 tlcred->tc_suid = crgetsuid(cr);
6141 tlcred->tc_sgid = crgetsgid(cr);
6142 tlcred->tc_ngroups = crgetngroups(cr);
6143 } else if (flag & TL_SETUCRED) {
6144 struct opthdr *opt = (struct opthdr *)buf;
6145
6146 opt->level = TL_PROT_LEVEL;
6147 opt->name = TL_OPT_PEER_UCRED;
6148 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6149
6150 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6151 } else {
6152 struct T_opthdr *topt = (struct T_opthdr *)buf;
6153 ASSERT(flag & TL_SOCKUCRED);
6154
6155 topt->level = SOL_SOCKET;
6156 topt->name = SCM_UCRED;
6157 topt->len = ucredminsize(cr) + sizeof (*topt);
6158 topt->status = 0;
6159 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6160 }
6161 }
6162
6163 /* ARGSUSED */
6164 static int
tl_default_opt(queue_t * wq,int level,int name,uchar_t * ptr)6165 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6166 {
6167 /* no default value processed in protocol specific code currently */
6168 return (-1);
6169 }
6170
6171 /* ARGSUSED */
6172 static int
tl_get_opt(queue_t * wq,int level,int name,uchar_t * ptr)6173 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6174 {
6175 int len;
6176 tl_endpt_t *tep;
6177 int *valp;
6178
6179 tep = (tl_endpt_t *)wq->q_ptr;
6180
6181 len = 0;
6182
6183 /*
6184 * Assumes: option level and name sanity check done elsewhere
6185 */
6186
6187 switch (level) {
6188 case SOL_SOCKET:
6189 if (!IS_SOCKET(tep))
6190 break;
6191 switch (name) {
6192 case SO_RECVUCRED:
6193 len = sizeof (int);
6194 valp = (int *)ptr;
6195 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6196 break;
6197 default:
6198 break;
6199 }
6200 break;
6201 case TL_PROT_LEVEL:
6202 switch (name) {
6203 case TL_OPT_PEER_CRED:
6204 case TL_OPT_PEER_UCRED:
6205 /*
6206 * option not supposed to retrieved directly
6207 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6208 * when some internal flags set by other options
6209 * Direct retrieval always designed to fail(ignored)
6210 * for this option.
6211 */
6212 break;
6213 }
6214 }
6215 return (len);
6216 }
6217
6218 /* ARGSUSED */
6219 static int
tl_set_opt(queue_t * wq,uint_t mgmt_flags,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)6220 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen,
6221 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs,
6222 cred_t *cr)
6223 {
6224 int error;
6225 tl_endpt_t *tep;
6226
6227 tep = (tl_endpt_t *)wq->q_ptr;
6228
6229 error = 0; /* NOERROR */
6230
6231 /*
6232 * Assumes: option level and name sanity checks done elsewhere
6233 */
6234
6235 switch (level) {
6236 case SOL_SOCKET:
6237 if (!IS_SOCKET(tep)) {
6238 error = EINVAL;
6239 break;
6240 }
6241 /*
6242 * TBD: fill in other AF_UNIX socket options and then stop
6243 * returning error.
6244 */
6245 switch (name) {
6246 case SO_RECVUCRED:
6247 /*
6248 * We only support this for datagram sockets;
6249 * getpeerucred handles the connection oriented
6250 * transports.
6251 */
6252 if (!IS_CLTS(tep)) {
6253 error = EINVAL;
6254 break;
6255 }
6256 if (*(int *)invalp == 0)
6257 tep->te_flag &= ~TL_SOCKUCRED;
6258 else
6259 tep->te_flag |= TL_SOCKUCRED;
6260 break;
6261 default:
6262 error = EINVAL;
6263 break;
6264 }
6265 break;
6266 case TL_PROT_LEVEL:
6267 switch (name) {
6268 case TL_OPT_PEER_CRED:
6269 case TL_OPT_PEER_UCRED:
6270 /*
6271 * option not supposed to be set directly
6272 * Its value in initialized for each endpoint at
6273 * driver open time.
6274 * Direct setting always designed to fail for this
6275 * option.
6276 */
6277 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6278 SL_TRACE | SL_ERROR,
6279 "tl_set_opt: option is not supported"));
6280 error = EPROTO;
6281 break;
6282 }
6283 }
6284 return (error);
6285 }
6286
6287
6288 static void
tl_timer(void * arg)6289 tl_timer(void *arg)
6290 {
6291 queue_t *wq = arg;
6292 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6293
6294 ASSERT(tep);
6295
6296 tep->te_timoutid = 0;
6297
6298 enableok(wq);
6299 /*
6300 * Note: can call wsrv directly here and save context switch
6301 * Consider change when qtimeout (not timeout) is active
6302 */
6303 qenable(wq);
6304 }
6305
6306 static void
tl_buffer(void * arg)6307 tl_buffer(void *arg)
6308 {
6309 queue_t *wq = arg;
6310 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6311
6312 ASSERT(tep);
6313
6314 tep->te_bufcid = 0;
6315 tep->te_nowsrv = B_FALSE;
6316
6317 enableok(wq);
6318 /*
6319 * Note: can call wsrv directly here and save context switch
6320 * Consider change when qbufcall (not bufcall) is active
6321 */
6322 qenable(wq);
6323 }
6324
6325 static void
tl_memrecover(queue_t * wq,mblk_t * mp,size_t size)6326 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6327 {
6328 tl_endpt_t *tep;
6329
6330 tep = (tl_endpt_t *)wq->q_ptr;
6331
6332 if (tep->te_closing) {
6333 freemsg(mp);
6334 return;
6335 }
6336 noenable(wq);
6337
6338 (void) insq(wq, wq->q_first, mp);
6339
6340 if (tep->te_bufcid || tep->te_timoutid) {
6341 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
6342 "tl_memrecover:recover %p pending", (void *)wq));
6343 return;
6344 }
6345
6346 tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq);
6347 if (tep->te_bufcid == NULL) {
6348 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6349 drv_usectohz(TL_BUFWAIT));
6350 }
6351 }
6352
6353 static void
tl_freetip(tl_endpt_t * tep,tl_icon_t * tip)6354 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6355 {
6356 ASSERT(tip->ti_seqno != 0);
6357
6358 if (tip->ti_mp != NULL) {
6359 tl_icon_freemsgs(&tip->ti_mp);
6360 tip->ti_mp = NULL;
6361 }
6362 if (tip->ti_tep != NULL) {
6363 tl_refrele(tip->ti_tep);
6364 tip->ti_tep = NULL;
6365 }
6366 list_remove(&tep->te_iconp, tip);
6367 kmem_free(tip, sizeof (tl_icon_t));
6368 tep->te_nicon--;
6369 }
6370
6371 /*
6372 * Remove address from address hash.
6373 */
6374 static void
tl_addr_unbind(tl_endpt_t * tep)6375 tl_addr_unbind(tl_endpt_t *tep)
6376 {
6377 tl_endpt_t *elp;
6378
6379 if (tep->te_flag & TL_ADDRHASHED) {
6380 if (IS_SOCKET(tep)) {
6381 (void) mod_hash_remove(tep->te_addrhash,
6382 (mod_hash_key_t)tep->te_vp,
6383 (mod_hash_val_t *)&elp);
6384 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6385 tep->te_magic = SOU_MAGIC_IMPLICIT;
6386 } else {
6387 (void) mod_hash_remove(tep->te_addrhash,
6388 (mod_hash_key_t)&tep->te_ap,
6389 (mod_hash_val_t *)&elp);
6390 (void) kmem_free(tep->te_abuf, tep->te_alen);
6391 tep->te_alen = -1;
6392 tep->te_abuf = NULL;
6393 }
6394 tep->te_flag &= ~TL_ADDRHASHED;
6395 }
6396 }
6397