1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multithreaded STREAMS Local Transport Provider. 30 * 31 * OVERVIEW 32 * ======== 33 * 34 * This driver provides TLI as well as socket semantics. It provides 35 * connectionless, connection oriented, and connection oriented with orderly 36 * release transports for TLI and sockets. Each transport type has separate name 37 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 38 * this removes any name space conflicts when binding to socket style transport 39 * addresses. 40 * 41 * NOTE: There is one exception: Socket ticots and ticotsord transports share 42 * the same namespace. In fact, sockets always use ticotsord type transport. 43 * 44 * The driver mode is specified during open() by the minor number used for 45 * open. 46 * 47 * The sockets in addition have the following semantic differences: 48 * No support for passing up credentials (TL_SET[U]CRED). 49 * 50 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 51 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 52 * T_OPTDATA_IND. 53 * 54 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 55 * a T_CONN_RES is received from the acceptor. This means that a socket 56 * connect will complete before the peer has called accept. 57 * 58 * 59 * MULTITHREADING 60 * ============== 61 * 62 * The driver does not use STREAMS protection mechanisms. Instead it uses a 63 * generic "serializer" abstraction. Most of the operations are executed behind 64 * the serializer and are, essentially single-threaded. All functions executed 65 * behind the same serializer are strictly serialized. So if one thread calls 66 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 67 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 68 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 69 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 70 * same time. 71 * 72 * Connectionless transport use a single serializer per transport type (one for 73 * TLI and one for sockets. Connection-oriented transports use finer-grained 74 * serializers. 75 * 76 * All COTS-type endpoints start their life with private serializers. During 77 * connection request processing the endpoint serializer is switched to the 78 * listener's serializer and the rest of T_CONN_REQ processing is done on the 79 * listener serializer. During T_CONN_RES processing the eager serializer is 80 * switched from listener to acceptor serializer and after that point all 81 * processing for eager and acceptor happens on this serializer. To avoid races 82 * with endpoint closes while its serializer may be changing closes are blocked 83 * while serializers are manipulated. 84 * 85 * References accounting 86 * --------------------- 87 * 88 * Endpoints are reference counted and freed when the last reference is 89 * dropped. Functions within the serializer may access an endpoint state even 90 * after an endpoint closed. The te_closing being set on the endpoint indicates 91 * that the endpoint entered its close routine. 92 * 93 * One reference is held for each opened endpoint instance. The reference 94 * counter is incremented when the endpoint is linked to another endpoint and 95 * decremented when the link disappears. It is also incremented when the 96 * endpoint is found by the hash table lookup. This increment is atomic with the 97 * lookup itself and happens while the hash table read lock is held. 98 * 99 * Close synchronization 100 * --------------------- 101 * 102 * During close the endpoint as marked as closing using te_closing flag. It is 103 * usually enough to check for te_closing flag since all other state changes 104 * happen after this flag is set and the close entered serializer. Immediately 105 * after setting te_closing flag tl_close() enters serializer and waits until 106 * the callback finishes. This allows all functions called within serializer to 107 * simply check te_closing without any locks. 108 * 109 * Serializer management. 110 * --------------------- 111 * 112 * For COTS transports serializers are created when the endpoint is constructed 113 * and destroyed when the endpoint is destructed. CLTS transports use global 114 * serializers - one for sockets and one for TLI. 115 * 116 * COTS serializers have separate reference counts to deal with several 117 * endpoints sharing the same serializer. There is a subtle problem related to 118 * the serializer destruction. The serializer should never be destroyed by any 119 * function executed inside serializer. This means that close has to wait till 120 * all serializer activity for this endpoint is finished before it can drop the 121 * last reference on the endpoint (which may as well free the serializer). This 122 * is only relevant for COTS transports which manage serializers 123 * dynamically. For CLTS transports close may complete without waiting for all 124 * serializer activity to finish since serializer is only destroyed at driver 125 * detach time. 126 * 127 * COTS endpoints keep track of the number of outstanding requests on the 128 * serializer for the endpoint. The code handling accept() avoids changing 129 * client serializer if it has any pending messages on the serializer and 130 * instead moves acceptor to listener's serializer. 131 * 132 * 133 * Use of hash tables 134 * ------------------ 135 * 136 * The driver uses modhash hash table implementation. Each transport uses two 137 * hash tables - one for finding endpoints by acceptor ID and another one for 138 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 139 * pair of hash tables since sockets only use TICOTSORD. 140 * 141 * All hash tables lookups increment a reference count for returned endpoints, 142 * so we may safely check the endpoint state even when the endpoint is removed 143 * from the hash by another thread immediately after it is found. 144 * 145 * 146 * CLOSE processing 147 * ================ 148 * 149 * The driver enters serializer twice on close(). The close sequence is the 150 * following: 151 * 152 * 1) Wait until closing is safe (te_closewait becomes zero) 153 * This step is needed to prevent close during serializer switches. In most 154 * cases (close happening after connection establishment) te_closewait is 155 * zero. 156 * 1) Set te_closing. 157 * 2) Call tl_close_ser() within serializer and wait for it to complete. 158 * 159 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 160 * It also needs to clear write-side q_next pointers - this should be done 161 * before qprocsoff(). 162 * 163 * This synchronous serializer entry during close is needed to ensure that 164 * the queue is valid everywhere inside the serializer. 165 * 166 * Note that in many cases close will execute tl_close_ser() synchronously, 167 * so it will not wait at all. 168 * 169 * 3) Calls qprocsoff(). 170 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 171 * complete (for COTS transports). For CLTS transport there is no wait. 172 * 173 * tl_close_finish_ser() Finishes the close process and wakes up waiting 174 * close if there is any. 175 * 176 * Note that in most cases close will enter te_close_ser_finish() 177 * synchronously and will not wait at all. 178 * 179 * 180 * Flow Control 181 * ============ 182 * 183 * The driver implements both read and write side service routines. No one calls 184 * putq() on the read queue. The read side service routine tl_rsrv() is called 185 * when the read side stream is back-enabled. It enters serializer synchronously 186 * (waits till serializer processing is complete). Within serializer it 187 * back-enables all endpoints blocked by the queue for connection-less 188 * transports and enables write side service processing for the peer for 189 * connection-oriented transports. 190 * 191 * Read and write side service routines use special mblk_sized space in the 192 * endpoint structure to enter perimeter. 193 * 194 * Write-side flow control 195 * ----------------------- 196 * 197 * Write side flow control is a bit tricky. The driver needs to deal with two 198 * message queues - the explicit STREAMS message queue maintained by 199 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 200 * queues should be synchronized to preserve message ordering and should 201 * maintain a single order determined by the order in which messages enter 202 * tl_wput(). In order to maintain the ordering between these two queues the 203 * STREAMS queue is only manipulated within the serializer, so the ordering is 204 * provided by the serializer. 205 * 206 * Functions called from the tl_wsrv() sometimes may call putbq(). To 207 * immediately stop any further processing of the STREAMS message queues the 208 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 209 * side service processing stops when the flag is set. 210 * 211 * The tl_wsrv() function enters serializer synchronously and waits for it to 212 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 213 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 214 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 215 * always bounded by the amount of messages on the STREAMS queue at the time 216 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 217 * queue from another serialized entry which can't happen in parallel. This 218 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 219 * of it draining forever while writer places new messages on the STREAMS 220 * queue). 221 * 222 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 223 * 224 * 225 * Unix Domain Sockets 226 * =================== 227 * 228 * The driver knows the structure of Unix Domain sockets addresses and treats 229 * them differently from generic TLI addresses. For sockets implicit binds are 230 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 231 * instead of using address length of zero. Explicit binds specify 232 * SOU_MAGIC_EXPLICIT as magic. 233 * 234 * For implicit binds we always use minor number as soua_vp part of the address 235 * and avoid any hash table lookups. This saves two hash tables lookups per 236 * anonymous bind. 237 * 238 * For explicit address we hash the vnode pointer instead of hashing the 239 * full-scale address+zone+length. Hashing by pointer is more efficient then 240 * hashing by the full address. 241 * 242 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 243 * tep structure, so it should be never freed. 244 * 245 * Also for sockets the driver always uses minor number as acceptor id. 246 * 247 * TPI VIOLATIONS 248 * -------------- 249 * 250 * This driver violates TPI in several respects for Unix Domain Sockets: 251 * 252 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 253 * is requested and the endpoint is already in use. There is no point in 254 * generating an unused address since this address will be rejected by 255 * sockfs anyway. For implicit binds it always generates a new address 256 * (sets soua_vp to its minor number). 257 * 258 * 2) It always uses minor number as acceptor ID and never uses queue 259 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 260 * message and they do not use the queue pointer. 261 * 262 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 263 * followed by listen(). The listen() should be issued with non-zero 264 * backlog, so sotpi_listen() issues unbind request followed by bind 265 * request to the same address but with a non-zero qlen value. Both 266 * tl_bind() and tl_unbind() require write lock on the hash table to 267 * insert/remove the address. The driver does not remove the address from 268 * the hash for endpoints that are bound to the explicit address and have 269 * backlog of zero. During T_BIND_REQ processing if the address requested 270 * is equal to the address the endpoint already has it updates the backlog 271 * without reinserting the address in the hash table. This optimization 272 * avoids two hash table updates for each listener created. It always 273 * avoids the problem of a "stolen" address when another listener may use 274 * the same address between the unbind and bind and suddenly listen() fails 275 * because address is in use even though the bind() succeeded. 276 * 277 * 278 * CONNECTIONLESS TRANSPORTS 279 * ========================= 280 * 281 * Connectionless transports all share the same serializer (one for TLI and one 282 * for Sockets). Functions executing behind serializer can check or modify state 283 * of any endpoint. 284 * 285 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 286 * te_lastep field. The next time X talks to some address A it checks whether A 287 * is the same as Y's address and if it is there is no need to lookup Y. If the 288 * address is different or the state of Y is not appropriate (e.g. closed or not 289 * idle) X does a lookup using tl_find_peer() and caches the new address. 290 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 291 * on the endpoint found. 292 * 293 * During close of endpoint Y it doesn't try to remove itself from other 294 * endpoints caches. They will detect that Y is gone and will search the peer 295 * endpoint again. 296 * 297 * Flow Control Handling. 298 * ---------------------- 299 * 300 * Each connectionless endpoint keeps a list of endpoints which are 301 * flow-controlled by its queue. It also keeps a pointer to the queue which 302 * flow-controls itself. Whenever flow control releases for endpoint X it 303 * enables all queues from the list. During close it also back-enables everyone 304 * in the list. If X is flow-controlled when it is closing it removes it from 305 * the peers list. 306 * 307 * DATA STRUCTURES 308 * =============== 309 * 310 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 311 * endpoint state. For connection-oriented transports it has a keeps a list 312 * of pending connections (tl_icon_t). For connectionless transports it keeps a 313 * list of endpoints flow controlled by this one. 314 * 315 * Each transport type is represented by a per-transport data structure 316 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 317 * endpoint address hash tables for each transport. It also contains pointer to 318 * transport serializer for connectionless transports. 319 * 320 * Each endpoint keeps a link to its transport structure, so the code can find 321 * all per-transport information quickly. 322 */ 323 324 #include <sys/types.h> 325 #include <sys/inttypes.h> 326 #include <sys/stream.h> 327 #include <sys/stropts.h> 328 #define _SUN_TPI_VERSION 2 329 #include <sys/tihdr.h> 330 #include <sys/strlog.h> 331 #include <sys/debug.h> 332 #include <sys/cred.h> 333 #include <sys/errno.h> 334 #include <sys/kmem.h> 335 #include <sys/id_space.h> 336 #include <sys/modhash.h> 337 #include <sys/mkdev.h> 338 #include <sys/tl.h> 339 #include <sys/stat.h> 340 #include <sys/conf.h> 341 #include <sys/modctl.h> 342 #include <sys/strsun.h> 343 #include <sys/socket.h> 344 #include <sys/socketvar.h> 345 #include <sys/sysmacros.h> 346 #include <sys/xti_xtiopt.h> 347 #include <sys/ddi.h> 348 #include <sys/sunddi.h> 349 #include <sys/zone.h> 350 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 351 #include <inet/optcom.h> 352 #include <sys/strsubr.h> 353 #include <sys/ucred.h> 354 #include <sys/suntpi.h> 355 #include <sys/list.h> 356 #include <sys/serializer.h> 357 358 /* 359 * TBD List 360 * 14 Eliminate state changes through table 361 * 16. AF_UNIX socket options 362 * 17. connect() for ticlts 363 * 18. support for "netstat" to show AF_UNIX plus TLI local 364 * transport connections 365 * 21. sanity check to flushing on sending M_ERROR 366 */ 367 368 /* 369 * CONSTANT DECLARATIONS 370 * -------------------- 371 */ 372 373 /* 374 * Local declarations 375 */ 376 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 377 378 #define TL_MAXQLEN 128 /* Max conn indications allowed. */ 379 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 380 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 381 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 382 /* 383 * Hash tables size. 384 */ 385 #define TL_HASH_SIZE 311 386 387 /* 388 * Definitions for module_info 389 */ 390 #define TL_ID (104) /* module ID number */ 391 #define TL_NAME "tl" /* module name */ 392 #define TL_MINPSZ (0) /* min packet size */ 393 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 394 #define TL_HIWAT (16*1024) /* hi water mark */ 395 #define TL_LOWAT (256) /* lo water mark */ 396 /* 397 * Definition of minor numbers/modes for new transport provider modes. 398 * We view the socket use as a separate mode to get a separate name space. 399 */ 400 #define TL_TICOTS 0 /* connection oriented transport */ 401 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 402 #define TL_TICLTS 2 /* connectionless transport */ 403 #define TL_UNUSED 3 404 #define TL_SOCKET 4 /* Socket */ 405 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 406 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 407 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 408 409 #define TL_MINOR_MASK 0x7 410 #define TL_MINOR_START (TL_TICLTS + 1) 411 412 /* 413 * LOCAL MACROS 414 */ 415 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 416 417 /* 418 * EXTERNAL VARIABLE DECLARATIONS 419 * ----------------------------- 420 */ 421 /* 422 * state table defined in the OS space.c 423 */ 424 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 425 426 /* 427 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 428 */ 429 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 430 static int tl_close(queue_t *, int, cred_t *); 431 static void tl_wput(queue_t *, mblk_t *); 432 static void tl_wsrv(queue_t *); 433 static void tl_rsrv(queue_t *); 434 435 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 436 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 437 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 438 439 440 /* 441 * GLOBAL DATA STRUCTURES AND VARIABLES 442 * ----------------------------------- 443 */ 444 445 /* 446 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 447 * For now, we only manage the SO_RECVUCRED option but we also have 448 * harmless dummy options to make things work with some common code we access. 449 */ 450 opdes_t tl_opt_arr[] = { 451 /* The SO_TYPE is needed for the hack below */ 452 { 453 SO_TYPE, 454 SOL_SOCKET, 455 OA_R, 456 OA_R, 457 OP_NP, 458 OP_PASSNEXT, 459 sizeof (t_scalar_t), 460 0 461 }, 462 { 463 SO_RECVUCRED, 464 SOL_SOCKET, 465 OA_RW, 466 OA_RW, 467 OP_NP, 468 OP_PASSNEXT, 469 sizeof (int), 470 0 471 } 472 }; 473 474 /* 475 * Table of all supported levels 476 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 477 * any supported options so we need this info separately. 478 * 479 * This is needed only for topmost tpi providers. 480 */ 481 optlevel_t tl_valid_levels_arr[] = { 482 XTI_GENERIC, 483 SOL_SOCKET, 484 TL_PROT_LEVEL 485 }; 486 487 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 488 /* 489 * Current upper bound on the amount of space needed to return all options. 490 * Additional options with data size of sizeof(long) are handled automatically. 491 * Others need hand job. 492 */ 493 #define TL_MAX_OPT_BUF_LEN \ 494 ((A_CNT(tl_opt_arr) << 2) + \ 495 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 496 + 64 + sizeof (struct T_optmgmt_ack)) 497 498 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 499 500 /* 501 * transport addr structure 502 */ 503 typedef struct tl_addr { 504 zoneid_t ta_zoneid; /* Zone scope of address */ 505 t_scalar_t ta_alen; /* length of abuf */ 506 void *ta_abuf; /* the addr itself */ 507 } tl_addr_t; 508 509 /* 510 * Refcounted version of serializer. 511 */ 512 typedef struct tl_serializer { 513 uint_t ts_refcnt; 514 serializer_t *ts_serializer; 515 } tl_serializer_t; 516 517 /* 518 * Each transport type has a separate state. 519 * Per-transport state. 520 */ 521 typedef struct tl_transport_state { 522 char *tr_name; 523 minor_t tr_minor; 524 uint32_t tr_defaddr; 525 mod_hash_t *tr_ai_hash; 526 mod_hash_t *tr_addr_hash; 527 tl_serializer_t *tr_serializer; 528 } tl_transport_state_t; 529 530 #define TL_DFADDR 0x1000 531 532 static tl_transport_state_t tl_transports[] = { 533 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 534 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 535 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 536 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 537 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 538 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 539 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 540 }; 541 542 #define TL_MAXTRANSPORT A_CNT(tl_transports) 543 544 struct tl_endpt; 545 typedef struct tl_endpt tl_endpt_t; 546 547 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 548 549 /* 550 * Data structure used to represent pending connects. 551 * Records enough information so that the connecting peer can close 552 * before the connection gets accepted. 553 */ 554 typedef struct tl_icon { 555 list_node_t ti_node; 556 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 557 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 558 t_scalar_t ti_seqno; /* Sequence number */ 559 } tl_icon_t; 560 561 typedef struct so_ux_addr soux_addr_t; 562 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 563 564 /* 565 * transport endpoint structure 566 */ 567 struct tl_endpt { 568 queue_t *te_rq; /* stream read queue */ 569 queue_t *te_wq; /* stream write queue */ 570 uint32_t te_refcnt; 571 int32_t te_state; /* TPI state of endpoint */ 572 minor_t te_minor; /* minor number */ 573 #define te_seqno te_minor 574 uint_t te_flag; /* flag field */ 575 boolean_t te_nowsrv; 576 tl_serializer_t *te_ser; /* Serializer to use */ 577 #define te_serializer te_ser->ts_serializer 578 579 soux_addr_t te_uxaddr; /* Socket address */ 580 #define te_magic te_uxaddr.soua_magic 581 #define te_vp te_uxaddr.soua_vp 582 tl_addr_t te_ap; /* addr bound to this endpt */ 583 #define te_zoneid te_ap.ta_zoneid 584 #define te_alen te_ap.ta_alen 585 #define te_abuf te_ap.ta_abuf 586 587 tl_transport_state_t *te_transport; 588 #define te_addrhash te_transport->tr_addr_hash 589 #define te_aihash te_transport->tr_ai_hash 590 #define te_defaddr te_transport->tr_defaddr 591 cred_t *te_credp; /* endpoint user credentials */ 592 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 593 594 /* 595 * State specific for connection-oriented and connectionless transports. 596 */ 597 union { 598 /* Connection-oriented state. */ 599 struct { 600 t_uscalar_t _te_nicon; /* count of conn requests */ 601 t_uscalar_t _te_qlen; /* max conn requests */ 602 tl_endpt_t *_te_oconp; /* conn request pending */ 603 tl_endpt_t *_te_conp; /* connected endpt */ 604 #ifndef _ILP32 605 void *_te_pad; 606 #endif 607 list_t _te_iconp; /* list of conn ind. pending */ 608 } _te_cots_state; 609 /* Connection-less state. */ 610 struct { 611 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 612 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 613 list_node_t _te_flows; /* lists of connections */ 614 list_t _te_flowlist; /* Who flowcontrols on me */ 615 } _te_clts_state; 616 } _te_transport_state; 617 #define te_nicon _te_transport_state._te_cots_state._te_nicon 618 #define te_qlen _te_transport_state._te_cots_state._te_qlen 619 #define te_oconp _te_transport_state._te_cots_state._te_oconp 620 #define te_conp _te_transport_state._te_cots_state._te_conp 621 #define te_iconp _te_transport_state._te_cots_state._te_iconp 622 #define te_lastep _te_transport_state._te_clts_state._te_lastep 623 #define te_flowq _te_transport_state._te_clts_state._te_flowq 624 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 625 #define te_flows _te_transport_state._te_clts_state._te_flows 626 627 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 628 timeout_id_t te_timoutid; /* outstanding timeout id */ 629 pid_t te_cpid; /* cached pid of endpoint */ 630 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 631 /* 632 * Pieces of the endpoint state needed for closing. 633 */ 634 kmutex_t te_closelock; 635 kcondvar_t te_closecv; 636 uint8_t te_closing; /* The endpoint started closing */ 637 uint8_t te_closewait; /* Wait in close until zero */ 638 mblk_t te_closemp; /* for entering serializer on close */ 639 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 640 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 641 kmutex_t te_srv_lock; 642 kcondvar_t te_srv_cv; 643 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 644 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 645 /* 646 * Pieces of the endpoint state needed for serializer transitions. 647 */ 648 kmutex_t te_ser_lock; /* Protects the count below */ 649 uint_t te_ser_count; /* Number of messages on serializer */ 650 }; 651 652 /* 653 * Flag values. Lower 4 bits specify that transport used. 654 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 655 * they allow to identify the endpoint more easily. 656 */ 657 #define TL_LISTENER 0x00010 /* the listener endpoint */ 658 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 659 #define TL_EAGER 0x00040 /* connecting endpoint */ 660 #define TL_ACCEPTED 0x00080 /* accepted connection */ 661 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 662 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 663 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 664 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 665 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 666 /* 667 * Boolean checks for the endpoint type. 668 */ 669 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 670 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 671 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 672 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 673 674 #define TLPID(mp, tep) (DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp)) 675 676 /* 677 * Certain operations are always used together. These macros reduce the chance 678 * of missing a part of a combination. 679 */ 680 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 681 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 682 683 #define TL_PUTBQ(x, mp) { \ 684 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 685 (x)->te_nowsrv = B_TRUE; \ 686 (void) putbq((x)->te_wq, mp); \ 687 } 688 689 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 690 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 691 692 /* 693 * STREAMS driver glue data structures. 694 */ 695 static struct module_info tl_minfo = { 696 TL_ID, /* mi_idnum */ 697 TL_NAME, /* mi_idname */ 698 TL_MINPSZ, /* mi_minpsz */ 699 TL_MAXPSZ, /* mi_maxpsz */ 700 TL_HIWAT, /* mi_hiwat */ 701 TL_LOWAT /* mi_lowat */ 702 }; 703 704 static struct qinit tl_rinit = { 705 NULL, /* qi_putp */ 706 (int (*)())tl_rsrv, /* qi_srvp */ 707 tl_open, /* qi_qopen */ 708 tl_close, /* qi_qclose */ 709 NULL, /* qi_qadmin */ 710 &tl_minfo, /* qi_minfo */ 711 NULL /* qi_mstat */ 712 }; 713 714 static struct qinit tl_winit = { 715 (int (*)())tl_wput, /* qi_putp */ 716 (int (*)())tl_wsrv, /* qi_srvp */ 717 NULL, /* qi_qopen */ 718 NULL, /* qi_qclose */ 719 NULL, /* qi_qadmin */ 720 &tl_minfo, /* qi_minfo */ 721 NULL /* qi_mstat */ 722 }; 723 724 static struct streamtab tlinfo = { 725 &tl_rinit, /* st_rdinit */ 726 &tl_winit, /* st_wrinit */ 727 NULL, /* st_muxrinit */ 728 NULL /* st_muxwrinit */ 729 }; 730 731 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 732 nulldev, tl_info, D_MP, &tlinfo); 733 734 static struct modldrv modldrv = { 735 &mod_driverops, /* Type of module -- pseudo driver here */ 736 "TPI Local Transport (tl) %I%", 737 &tl_devops, /* driver ops */ 738 }; 739 740 /* 741 * Module linkage information for the kernel. 742 */ 743 static struct modlinkage modlinkage = { 744 MODREV_1, 745 &modldrv, 746 NULL 747 }; 748 749 /* 750 * Templates for response to info request 751 * Check sanity of unlimited connect data etc. 752 */ 753 754 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 755 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 756 757 static struct T_info_ack tl_cots_info_ack = 758 { 759 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 760 T_INFINITE, /* TSDU size */ 761 T_INFINITE, /* ETSDU size */ 762 T_INFINITE, /* CDATA_size */ 763 T_INFINITE, /* DDATA_size */ 764 T_INFINITE, /* ADDR_size */ 765 T_INFINITE, /* OPT_size */ 766 0, /* TIDU_size - fill at run time */ 767 T_COTS, /* SERV_type */ 768 -1, /* CURRENT_state */ 769 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 770 }; 771 772 static struct T_info_ack tl_clts_info_ack = 773 { 774 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 775 0, /* TSDU_size - fill at run time */ 776 -2, /* ETSDU_size -2 => not supported */ 777 -2, /* CDATA_size -2 => not supported */ 778 -2, /* DDATA_size -2 => not supported */ 779 -1, /* ADDR_size -1 => unlimited */ 780 -1, /* OPT_size */ 781 0, /* TIDU_size - fill at run time */ 782 T_CLTS, /* SERV_type */ 783 -1, /* CURRENT_state */ 784 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 785 }; 786 787 /* 788 * private copy of devinfo pointer used in tl_info 789 */ 790 static dev_info_t *tl_dip; 791 792 /* 793 * Endpoints cache. 794 */ 795 static kmem_cache_t *tl_cache; 796 /* 797 * Minor number space. 798 */ 799 static id_space_t *tl_minors; 800 801 /* 802 * Default Data Unit size. 803 */ 804 static t_scalar_t tl_tidusz; 805 806 /* 807 * Size of hash tables. 808 */ 809 static size_t tl_hash_size = TL_HASH_SIZE; 810 811 /* 812 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 813 * for sockets. 814 */ 815 static int tl_disable_early_connect = 0; 816 static int tl_client_closing_when_accepting; 817 818 static int tl_serializer_noswitch; 819 820 /* 821 * LOCAL FUNCTION PROTOTYPES 822 * ------------------------- 823 */ 824 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 825 static void tl_do_proto(mblk_t *, tl_endpt_t *); 826 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 827 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 828 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 829 t_scalar_t); 830 static void tl_bind(mblk_t *, tl_endpt_t *); 831 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 832 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 833 static void tl_unbind(mblk_t *, tl_endpt_t *); 834 static void tl_optmgmt(queue_t *, mblk_t *); 835 static void tl_conn_req(queue_t *, mblk_t *); 836 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 837 static void tl_conn_res(mblk_t *, tl_endpt_t *); 838 static void tl_discon_req(mblk_t *, tl_endpt_t *); 839 static void tl_capability_req(mblk_t *, tl_endpt_t *); 840 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 841 static void tl_info_req(mblk_t *, tl_endpt_t *); 842 static void tl_addr_req(mblk_t *, tl_endpt_t *); 843 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 844 static void tl_data(mblk_t *, tl_endpt_t *); 845 static void tl_exdata(mblk_t *, tl_endpt_t *); 846 static void tl_ordrel(mblk_t *, tl_endpt_t *); 847 static void tl_unitdata(mblk_t *, tl_endpt_t *); 848 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 849 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 850 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 851 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 852 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 853 static void tl_cl_backenable(tl_endpt_t *); 854 static void tl_co_unconnect(tl_endpt_t *); 855 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 856 static void tl_discon_ind(tl_endpt_t *, uint32_t); 857 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 858 static mblk_t *tl_ordrel_ind_alloc(void); 859 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 860 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 861 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 862 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 863 static void tl_icon_freemsgs(mblk_t **); 864 static void tl_merror(queue_t *, mblk_t *, int); 865 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 866 static int tl_default_opt(queue_t *, int, int, uchar_t *); 867 static int tl_get_opt(queue_t *, int, int, uchar_t *); 868 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 869 uchar_t *, void *, cred_t *, mblk_t *); 870 static void tl_memrecover(queue_t *, mblk_t *, size_t); 871 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 872 static void tl_free(tl_endpt_t *); 873 static int tl_constructor(void *, void *, int); 874 static void tl_destructor(void *, void *); 875 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 876 static tl_serializer_t *tl_serializer_alloc(int); 877 static void tl_serializer_refhold(tl_serializer_t *); 878 static void tl_serializer_refrele(tl_serializer_t *); 879 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 880 static void tl_serializer_exit(tl_endpt_t *); 881 static boolean_t tl_noclose(tl_endpt_t *); 882 static void tl_closeok(tl_endpt_t *); 883 static void tl_refhold(tl_endpt_t *); 884 static void tl_refrele(tl_endpt_t *); 885 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 886 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 887 static void tl_close_ser(mblk_t *, tl_endpt_t *); 888 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 889 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 890 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 891 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 892 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 893 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 894 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 895 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 896 static void tl_addr_unbind(tl_endpt_t *); 897 898 /* 899 * Intialize option database object for TL 900 */ 901 902 optdb_obj_t tl_opt_obj = { 903 tl_default_opt, /* TL default value function pointer */ 904 tl_get_opt, /* TL get function pointer */ 905 tl_set_opt, /* TL set function pointer */ 906 B_TRUE, /* TL is tpi provider */ 907 TL_OPT_ARR_CNT, /* TL option database count of entries */ 908 tl_opt_arr, /* TL option database */ 909 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 910 tl_valid_levels_arr /* TL valid level array */ 911 }; 912 913 /* 914 * Logical operations. 915 * 916 * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y 917 * should also be true. 918 * 919 * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or falce at 920 * the same time. 921 */ 922 #define IMPLY(X, Y) (!(X) || (Y)) 923 #define EQUIV(X, Y) (IMPLY(X, Y) && IMPLY(Y, X)) 924 925 /* 926 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 927 * --------------------------------------- 928 */ 929 930 /* 931 * Loadable module routines 932 */ 933 int 934 _init(void) 935 { 936 return (mod_install(&modlinkage)); 937 } 938 939 int 940 _fini(void) 941 { 942 return (mod_remove(&modlinkage)); 943 } 944 945 int 946 _info(struct modinfo *modinfop) 947 { 948 return (mod_info(&modlinkage, modinfop)); 949 } 950 951 /* 952 * Driver Entry Points and Other routines 953 */ 954 static int 955 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 956 { 957 int i; 958 char name[32]; 959 960 /* 961 * Resume from a checkpoint state. 962 */ 963 if (cmd == DDI_RESUME) 964 return (DDI_SUCCESS); 965 966 if (cmd != DDI_ATTACH) 967 return (DDI_FAILURE); 968 969 /* 970 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 971 * streams message sizes can be unlimited. We use a defined constant 972 * instead. 973 */ 974 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 975 976 /* 977 * Create subdevices for each transport. 978 */ 979 for (i = 0; i < TL_UNUSED; i++) { 980 if (ddi_create_minor_node(devi, 981 tl_transports[i].tr_name, 982 S_IFCHR, tl_transports[i].tr_minor, 983 DDI_PSEUDO, NULL) == DDI_FAILURE) { 984 ddi_remove_minor_node(devi, NULL); 985 return (DDI_FAILURE); 986 } 987 } 988 989 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 990 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 991 992 if (tl_cache == NULL) { 993 ddi_remove_minor_node(devi, NULL); 994 return (DDI_FAILURE); 995 } 996 997 tl_minors = id_space_create("tl_minor_space", 998 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 999 1000 /* 1001 * Create ID space for minor numbers 1002 */ 1003 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1004 tl_transport_state_t *t = &tl_transports[i]; 1005 1006 if (i == TL_UNUSED) 1007 continue; 1008 1009 /* Socket COTSORD shares namespace with COTS */ 1010 if (i == TL_SOCK_COTSORD) { 1011 t->tr_ai_hash = 1012 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1013 ASSERT(t->tr_ai_hash != NULL); 1014 t->tr_addr_hash = 1015 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1016 ASSERT(t->tr_addr_hash != NULL); 1017 continue; 1018 } 1019 1020 /* 1021 * Create hash tables. 1022 */ 1023 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1024 t->tr_name); 1025 #ifdef _ILP32 1026 if (i & TL_SOCKET) 1027 t->tr_ai_hash = 1028 mod_hash_create_idhash(name, tl_hash_size - 1, 1029 mod_hash_null_valdtor); 1030 else 1031 t->tr_ai_hash = 1032 mod_hash_create_ptrhash(name, tl_hash_size, 1033 mod_hash_null_valdtor, sizeof (queue_t)); 1034 #else 1035 t->tr_ai_hash = 1036 mod_hash_create_idhash(name, tl_hash_size - 1, 1037 mod_hash_null_valdtor); 1038 #endif /* _ILP32 */ 1039 1040 if (i & TL_SOCKET) { 1041 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1042 t->tr_name); 1043 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1044 tl_hash_size, mod_hash_null_valdtor, 1045 sizeof (uintptr_t)); 1046 } else { 1047 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1048 t->tr_name); 1049 t->tr_addr_hash = mod_hash_create_extended(name, 1050 tl_hash_size, mod_hash_null_keydtor, 1051 mod_hash_null_valdtor, 1052 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1053 } 1054 1055 /* Create serializer for connectionless transports. */ 1056 if (i & TL_TICLTS) 1057 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1058 } 1059 1060 tl_dip = devi; 1061 1062 return (DDI_SUCCESS); 1063 } 1064 1065 static int 1066 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1067 { 1068 int i; 1069 1070 if (cmd == DDI_SUSPEND) 1071 return (DDI_SUCCESS); 1072 1073 if (cmd != DDI_DETACH) 1074 return (DDI_FAILURE); 1075 1076 /* 1077 * Destroy arenas and hash tables. 1078 */ 1079 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1080 tl_transport_state_t *t = &tl_transports[i]; 1081 1082 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1083 continue; 1084 1085 ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL)); 1086 if (t->tr_serializer != NULL) { 1087 tl_serializer_refrele(t->tr_serializer); 1088 t->tr_serializer = NULL; 1089 } 1090 1091 #ifdef _ILP32 1092 if (i & TL_SOCKET) 1093 mod_hash_destroy_idhash(t->tr_ai_hash); 1094 else 1095 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1096 #else 1097 mod_hash_destroy_idhash(t->tr_ai_hash); 1098 #endif /* _ILP32 */ 1099 t->tr_ai_hash = NULL; 1100 if (i & TL_SOCKET) 1101 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1102 else 1103 mod_hash_destroy_hash(t->tr_addr_hash); 1104 t->tr_addr_hash = NULL; 1105 } 1106 1107 kmem_cache_destroy(tl_cache); 1108 tl_cache = NULL; 1109 id_space_destroy(tl_minors); 1110 tl_minors = NULL; 1111 ddi_remove_minor_node(devi, NULL); 1112 return (DDI_SUCCESS); 1113 } 1114 1115 /* ARGSUSED */ 1116 static int 1117 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1118 { 1119 1120 int retcode = DDI_FAILURE; 1121 1122 switch (infocmd) { 1123 1124 case DDI_INFO_DEVT2DEVINFO: 1125 if (tl_dip != NULL) { 1126 *result = (void *)tl_dip; 1127 retcode = DDI_SUCCESS; 1128 } 1129 break; 1130 1131 case DDI_INFO_DEVT2INSTANCE: 1132 *result = (void *)0; 1133 retcode = DDI_SUCCESS; 1134 break; 1135 1136 default: 1137 break; 1138 } 1139 return (retcode); 1140 } 1141 1142 /* 1143 * Endpoint reference management. 1144 */ 1145 static void 1146 tl_refhold(tl_endpt_t *tep) 1147 { 1148 atomic_add_32(&tep->te_refcnt, 1); 1149 } 1150 1151 static void 1152 tl_refrele(tl_endpt_t *tep) 1153 { 1154 ASSERT(tep->te_refcnt != 0); 1155 1156 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1157 tl_free(tep); 1158 } 1159 1160 /*ARGSUSED*/ 1161 static int 1162 tl_constructor(void *buf, void *cdrarg, int kmflags) 1163 { 1164 tl_endpt_t *tep = buf; 1165 1166 bzero(tep, sizeof (tl_endpt_t)); 1167 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1168 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1169 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1170 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1171 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1172 1173 return (0); 1174 } 1175 1176 /*ARGSUSED*/ 1177 static void 1178 tl_destructor(void *buf, void *cdrarg) 1179 { 1180 tl_endpt_t *tep = buf; 1181 1182 mutex_destroy(&tep->te_closelock); 1183 cv_destroy(&tep->te_closecv); 1184 mutex_destroy(&tep->te_srv_lock); 1185 cv_destroy(&tep->te_srv_cv); 1186 mutex_destroy(&tep->te_ser_lock); 1187 } 1188 1189 static void 1190 tl_free(tl_endpt_t *tep) 1191 { 1192 ASSERT(tep->te_refcnt == 0); 1193 ASSERT(tep->te_transport != NULL); 1194 ASSERT(tep->te_rq == NULL); 1195 ASSERT(tep->te_wq == NULL); 1196 ASSERT(tep->te_ser != NULL); 1197 ASSERT(tep->te_ser_count == 0); 1198 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1199 1200 if (IS_SOCKET(tep)) { 1201 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1202 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1203 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1204 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1205 } else if (tep->te_abuf != NULL) { 1206 kmem_free(tep->te_abuf, tep->te_alen); 1207 tep->te_alen = -1; /* uninitialized */ 1208 tep->te_abuf = NULL; 1209 } else { 1210 ASSERT(tep->te_alen == -1); 1211 } 1212 1213 id_free(tl_minors, tep->te_minor); 1214 ASSERT(tep->te_credp == NULL); 1215 1216 if (tep->te_hash_hndl != NULL) 1217 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1218 1219 if (IS_COTS(tep)) { 1220 TL_REMOVE_PEER(tep->te_conp); 1221 TL_REMOVE_PEER(tep->te_oconp); 1222 tl_serializer_refrele(tep->te_ser); 1223 tep->te_ser = NULL; 1224 ASSERT(tep->te_nicon == 0); 1225 ASSERT(list_head(&tep->te_iconp) == NULL); 1226 } else { 1227 ASSERT(tep->te_lastep == NULL); 1228 ASSERT(list_head(&tep->te_flowlist) == NULL); 1229 ASSERT(tep->te_flowq == NULL); 1230 } 1231 1232 ASSERT(tep->te_bufcid == 0); 1233 ASSERT(tep->te_timoutid == 0); 1234 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1235 tep->te_acceptor_id = 0; 1236 1237 ASSERT(tep->te_closewait == 0); 1238 ASSERT(!tep->te_rsrv_active); 1239 ASSERT(!tep->te_wsrv_active); 1240 tep->te_closing = 0; 1241 tep->te_nowsrv = B_FALSE; 1242 tep->te_flag = 0; 1243 1244 kmem_cache_free(tl_cache, tep); 1245 } 1246 1247 /* 1248 * Allocate/free reference-counted wrappers for serializers. 1249 */ 1250 static tl_serializer_t * 1251 tl_serializer_alloc(int flags) 1252 { 1253 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1254 serializer_t *ser; 1255 1256 if (s == NULL) 1257 return (NULL); 1258 1259 ser = serializer_create(flags); 1260 1261 if (ser == NULL) { 1262 kmem_free(s, sizeof (tl_serializer_t)); 1263 return (NULL); 1264 } 1265 1266 s->ts_refcnt = 1; 1267 s->ts_serializer = ser; 1268 return (s); 1269 } 1270 1271 static void 1272 tl_serializer_refhold(tl_serializer_t *s) 1273 { 1274 atomic_add_32(&s->ts_refcnt, 1); 1275 } 1276 1277 static void 1278 tl_serializer_refrele(tl_serializer_t *s) 1279 { 1280 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1281 serializer_destroy(s->ts_serializer); 1282 kmem_free(s, sizeof (tl_serializer_t)); 1283 } 1284 } 1285 1286 /* 1287 * Post a request on the endpoint serializer. For COTS transports keep track of 1288 * the number of pending requests. 1289 */ 1290 static void 1291 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1292 { 1293 if (IS_COTS(tep)) { 1294 mutex_enter(&tep->te_ser_lock); 1295 tep->te_ser_count++; 1296 mutex_exit(&tep->te_ser_lock); 1297 } 1298 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1299 } 1300 1301 /* 1302 * Complete processing the request on the serializer. Decrement the counter for 1303 * pending requests for COTS transports. 1304 */ 1305 static void 1306 tl_serializer_exit(tl_endpt_t *tep) 1307 { 1308 if (IS_COTS(tep)) { 1309 mutex_enter(&tep->te_ser_lock); 1310 ASSERT(tep->te_ser_count != 0); 1311 tep->te_ser_count--; 1312 mutex_exit(&tep->te_ser_lock); 1313 } 1314 } 1315 1316 /* 1317 * Hash management functions. 1318 */ 1319 1320 /* 1321 * Return TRUE if two addresses are equal, false otherwise. 1322 */ 1323 static boolean_t 1324 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1325 { 1326 return ((ap1->ta_alen > 0) && 1327 (ap1->ta_alen == ap2->ta_alen) && 1328 (ap1->ta_zoneid == ap2->ta_zoneid) && 1329 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1330 } 1331 1332 /* 1333 * This function is called whenever an endpoint is found in the hash table. 1334 */ 1335 /* ARGSUSED0 */ 1336 static void 1337 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1338 { 1339 tl_refhold((tl_endpt_t *)val); 1340 } 1341 1342 /* 1343 * Address hash function. 1344 */ 1345 /* ARGSUSED */ 1346 static uint_t 1347 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1348 { 1349 tl_addr_t *ap = (tl_addr_t *)key; 1350 size_t len = ap->ta_alen; 1351 uchar_t *p = ap->ta_abuf; 1352 uint_t i, g; 1353 1354 ASSERT((len > 0) && (p != NULL)); 1355 1356 for (i = ap->ta_zoneid; len -- != 0; p++) { 1357 i = (i << 4) + (*p); 1358 if ((g = (i & 0xf0000000U)) != 0) { 1359 i ^= (g >> 24); 1360 i ^= g; 1361 } 1362 } 1363 return (i); 1364 } 1365 1366 /* 1367 * This function is used by hash lookups. It compares two generic addresses. 1368 */ 1369 static int 1370 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1371 { 1372 #ifdef DEBUG 1373 tl_addr_t *ap1 = (tl_addr_t *)key1; 1374 tl_addr_t *ap2 = (tl_addr_t *)key2; 1375 1376 ASSERT(key1 != NULL); 1377 ASSERT(key2 != NULL); 1378 1379 ASSERT(ap1->ta_abuf != NULL); 1380 ASSERT(ap2->ta_abuf != NULL); 1381 ASSERT(ap1->ta_alen > 0); 1382 ASSERT(ap2->ta_alen > 0); 1383 #endif 1384 1385 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1386 } 1387 1388 /* 1389 * Prevent endpoint from closing if possible. 1390 * Return B_TRUE on success, B_FALSE on failure. 1391 */ 1392 static boolean_t 1393 tl_noclose(tl_endpt_t *tep) 1394 { 1395 boolean_t rc = B_FALSE; 1396 1397 mutex_enter(&tep->te_closelock); 1398 if (! tep->te_closing) { 1399 ASSERT(tep->te_closewait == 0); 1400 tep->te_closewait++; 1401 rc = B_TRUE; 1402 } 1403 mutex_exit(&tep->te_closelock); 1404 return (rc); 1405 } 1406 1407 /* 1408 * Allow endpoint to close if needed. 1409 */ 1410 static void 1411 tl_closeok(tl_endpt_t *tep) 1412 { 1413 ASSERT(tep->te_closewait > 0); 1414 mutex_enter(&tep->te_closelock); 1415 ASSERT(tep->te_closewait == 1); 1416 tep->te_closewait--; 1417 cv_signal(&tep->te_closecv); 1418 mutex_exit(&tep->te_closelock); 1419 } 1420 1421 /* 1422 * STREAMS open entry point. 1423 */ 1424 /* ARGSUSED */ 1425 static int 1426 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1427 { 1428 tl_endpt_t *tep; 1429 minor_t minor = getminor(*devp); 1430 1431 /* 1432 * Driver is called directly. Both CLONEOPEN and MODOPEN 1433 * are illegal 1434 */ 1435 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1436 return (ENXIO); 1437 1438 if (rq->q_ptr != NULL) 1439 return (0); 1440 1441 /* Minor number should specify the mode used for the driver. */ 1442 if ((minor >= TL_UNUSED)) 1443 return (ENXIO); 1444 1445 if (oflag & SO_SOCKSTR) { 1446 minor |= TL_SOCKET; 1447 } 1448 1449 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1450 tep->te_refcnt = 1; 1451 tep->te_cpid = curproc->p_pid; 1452 rq->q_ptr = WR(rq)->q_ptr = tep; 1453 tep->te_state = TS_UNBND; 1454 tep->te_credp = credp; 1455 crhold(credp); 1456 tep->te_zoneid = getzoneid(); 1457 1458 tep->te_flag = minor & TL_MINOR_MASK; 1459 tep->te_transport = &tl_transports[minor]; 1460 1461 /* Allocate a unique minor number for this instance. */ 1462 tep->te_minor = (minor_t)id_alloc(tl_minors); 1463 1464 /* Reserve hash handle for bind(). */ 1465 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1466 1467 /* Transport-specific initialization */ 1468 if (IS_COTS(tep)) { 1469 /* Use private serializer */ 1470 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1471 1472 /* Create list for pending connections */ 1473 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1474 offsetof(tl_icon_t, ti_node)); 1475 tep->te_qlen = 0; 1476 tep->te_nicon = 0; 1477 tep->te_oconp = NULL; 1478 tep->te_conp = NULL; 1479 } else { 1480 /* Use shared serializer */ 1481 tep->te_ser = tep->te_transport->tr_serializer; 1482 bzero(&tep->te_flows, sizeof (list_node_t)); 1483 /* Create list for flow control */ 1484 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1485 offsetof(tl_endpt_t, te_flows)); 1486 tep->te_flowq = NULL; 1487 tep->te_lastep = NULL; 1488 1489 } 1490 1491 /* Initialize endpoint address */ 1492 if (IS_SOCKET(tep)) { 1493 /* Socket-specific address handling. */ 1494 tep->te_alen = TL_SOUX_ADDRLEN; 1495 tep->te_abuf = &tep->te_uxaddr; 1496 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1497 tep->te_magic = SOU_MAGIC_IMPLICIT; 1498 } else { 1499 tep->te_alen = -1; 1500 tep->te_abuf = NULL; 1501 } 1502 1503 /* clone the driver */ 1504 *devp = makedevice(getmajor(*devp), tep->te_minor); 1505 1506 tep->te_rq = rq; 1507 tep->te_wq = WR(rq); 1508 1509 #ifdef _ILP32 1510 if (IS_SOCKET(tep)) 1511 tep->te_acceptor_id = tep->te_minor; 1512 else 1513 tep->te_acceptor_id = (t_uscalar_t)rq; 1514 #else 1515 tep->te_acceptor_id = tep->te_minor; 1516 #endif /* _ILP32 */ 1517 1518 1519 qprocson(rq); 1520 1521 /* 1522 * Insert acceptor ID in the hash. The AI hash always sleeps on 1523 * insertion so insertion can't fail. 1524 */ 1525 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1526 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1527 (mod_hash_val_t)tep); 1528 1529 return (0); 1530 } 1531 1532 /* ARGSUSED1 */ 1533 static int 1534 tl_close(queue_t *rq, int flag, cred_t *credp) 1535 { 1536 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1537 tl_endpt_t *elp = NULL; 1538 queue_t *wq = tep->te_wq; 1539 int rc; 1540 1541 ASSERT(wq == WR(rq)); 1542 1543 /* 1544 * Remove the endpoint from acceptor hash. 1545 */ 1546 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1547 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1548 (mod_hash_val_t *)&elp); 1549 ASSERT(rc == 0 && tep == elp); 1550 if ((rc != 0) || (tep != elp)) { 1551 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1552 SL_TRACE|SL_ERROR, 1553 "tl_close:inconsistency in AI hash")); 1554 } 1555 1556 /* 1557 * Wait till close is safe, then mark endpoint as closing. 1558 */ 1559 mutex_enter(&tep->te_closelock); 1560 while (tep->te_closewait) 1561 cv_wait(&tep->te_closecv, &tep->te_closelock); 1562 tep->te_closing = B_TRUE; 1563 /* 1564 * Will wait for the serializer part of the close to finish, so set 1565 * te_closewait now. 1566 */ 1567 tep->te_closewait = 1; 1568 tep->te_nowsrv = B_FALSE; 1569 mutex_exit(&tep->te_closelock); 1570 1571 /* 1572 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1573 * It is safe because close will wait for tl_close_ser to finish. 1574 */ 1575 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1576 1577 /* 1578 * Wait for the first phase of close to complete before qprocsoff(). 1579 */ 1580 mutex_enter(&tep->te_closelock); 1581 while (tep->te_closewait) 1582 cv_wait(&tep->te_closecv, &tep->te_closelock); 1583 mutex_exit(&tep->te_closelock); 1584 1585 qprocsoff(rq); 1586 1587 if (tep->te_bufcid) { 1588 qunbufcall(rq, tep->te_bufcid); 1589 tep->te_bufcid = 0; 1590 } 1591 if (tep->te_timoutid) { 1592 (void) quntimeout(rq, tep->te_timoutid); 1593 tep->te_timoutid = 0; 1594 } 1595 1596 /* 1597 * Finish close behind serializer. 1598 * 1599 * For a CLTS endpoint increase a refcount and continue close processing 1600 * with serializer protection. This processing may happen asynchronously 1601 * with the completion of tl_close(). 1602 * 1603 * Fot a COTS endpoint wait before destroying tep since the serializer 1604 * may go away together with tep and we need to destroy serializer 1605 * outside of serializer context. 1606 */ 1607 ASSERT(tep->te_closewait == 0); 1608 if (IS_COTS(tep)) 1609 tep->te_closewait = 1; 1610 else 1611 tl_refhold(tep); 1612 1613 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1614 1615 /* 1616 * For connection-oriented transports wait for all serializer activity 1617 * to settle down. 1618 */ 1619 if (IS_COTS(tep)) { 1620 mutex_enter(&tep->te_closelock); 1621 while (tep->te_closewait) 1622 cv_wait(&tep->te_closecv, &tep->te_closelock); 1623 mutex_exit(&tep->te_closelock); 1624 } 1625 1626 crfree(tep->te_credp); 1627 tep->te_credp = NULL; 1628 tep->te_wq = NULL; 1629 tl_refrele(tep); 1630 /* 1631 * tep is likely to be destroyed now, so can't reference it any more. 1632 */ 1633 1634 rq->q_ptr = wq->q_ptr = NULL; 1635 return (0); 1636 } 1637 1638 /* 1639 * First phase of close processing done behind the serializer. 1640 * 1641 * Do not drop the reference in the end - tl_close() wants this reference to 1642 * stay. 1643 */ 1644 /* ARGSUSED0 */ 1645 static void 1646 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1647 { 1648 ASSERT(tep->te_closing); 1649 ASSERT(tep->te_closewait == 1); 1650 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1651 1652 tep->te_flag |= TL_CLOSE_SER; 1653 1654 /* 1655 * Drain out all messages on queue except for TL_TICOTS where the 1656 * abortive release semantics permit discarding of data on close 1657 */ 1658 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1659 tl_wsrv_ser(NULL, tep); 1660 } 1661 1662 /* Remove address from hash table. */ 1663 tl_addr_unbind(tep); 1664 /* 1665 * qprocsoff() gets confused when q->q_next is not NULL on the write 1666 * queue of the driver, so clear these before qprocsoff() is called. 1667 * Also clear q_next for the peer since this queue is going away. 1668 */ 1669 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1670 tl_endpt_t *peer_tep = tep->te_conp; 1671 1672 tep->te_wq->q_next = NULL; 1673 if ((peer_tep != NULL) && !peer_tep->te_closing) 1674 peer_tep->te_wq->q_next = NULL; 1675 } 1676 1677 tep->te_rq = NULL; 1678 1679 /* wake up tl_close() */ 1680 tl_closeok(tep); 1681 tl_serializer_exit(tep); 1682 } 1683 1684 /* 1685 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1686 * the reference for CLTS. 1687 * 1688 * Called from serializer. Should drop reference count for CLTS only. 1689 */ 1690 /* ARGSUSED0 */ 1691 static void 1692 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1693 { 1694 ASSERT(tep->te_closing); 1695 ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0)); 1696 ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1)); 1697 1698 tep->te_state = -1; /* Uninitialized */ 1699 if (IS_COTS(tep)) { 1700 tl_co_unconnect(tep); 1701 } else { 1702 /* Connectionless specific cleanup */ 1703 TL_REMOVE_PEER(tep->te_lastep); 1704 /* 1705 * Backenable anybody that is flow controlled waiting for 1706 * this endpoint. 1707 */ 1708 tl_cl_backenable(tep); 1709 if (tep->te_flowq != NULL) { 1710 list_remove(&(tep->te_flowq->te_flowlist), tep); 1711 tep->te_flowq = NULL; 1712 } 1713 } 1714 1715 tl_serializer_exit(tep); 1716 if (IS_COTS(tep)) 1717 tl_closeok(tep); 1718 else 1719 tl_refrele(tep); 1720 } 1721 1722 /* 1723 * STREAMS write-side put procedure. 1724 * Enter serializer for most of the processing. 1725 * 1726 * The T_CONN_REQ is processed outside of serializer. 1727 */ 1728 static void 1729 tl_wput(queue_t *wq, mblk_t *mp) 1730 { 1731 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1732 ssize_t msz = MBLKL(mp); 1733 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1734 tlproc_t *tl_proc = NULL; 1735 1736 switch (DB_TYPE(mp)) { 1737 case M_DATA: 1738 /* Only valid for connection-oriented transports */ 1739 if (IS_CLTS(tep)) { 1740 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1741 SL_TRACE|SL_ERROR, 1742 "tl_wput:M_DATA invalid for ticlts driver")); 1743 tl_merror(wq, mp, EPROTO); 1744 return; 1745 } 1746 tl_proc = tl_wput_data_ser; 1747 break; 1748 1749 case M_IOCTL: 1750 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1751 case TL_IOC_CREDOPT: 1752 /* FALLTHROUGH */ 1753 case TL_IOC_UCREDOPT: 1754 /* 1755 * Serialize endpoint state change. 1756 */ 1757 tl_proc = tl_do_ioctl_ser; 1758 break; 1759 1760 default: 1761 miocnak(wq, mp, 0, EINVAL); 1762 return; 1763 } 1764 break; 1765 1766 case M_FLUSH: 1767 /* 1768 * do canonical M_FLUSH processing 1769 */ 1770 if (*mp->b_rptr & FLUSHW) { 1771 flushq(wq, FLUSHALL); 1772 *mp->b_rptr &= ~FLUSHW; 1773 } 1774 if (*mp->b_rptr & FLUSHR) { 1775 flushq(RD(wq), FLUSHALL); 1776 qreply(wq, mp); 1777 } else { 1778 freemsg(mp); 1779 } 1780 return; 1781 1782 case M_PROTO: 1783 if (msz < sizeof (prim->type)) { 1784 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1785 SL_TRACE|SL_ERROR, 1786 "tl_wput:M_PROTO data too short")); 1787 tl_merror(wq, mp, EPROTO); 1788 return; 1789 } 1790 switch (prim->type) { 1791 case T_OPTMGMT_REQ: 1792 case T_SVR4_OPTMGMT_REQ: 1793 /* 1794 * Process TPI option management requests immediately 1795 * in put procedure regardless of in-order processing 1796 * of already queued messages. 1797 * (Note: This driver supports AF_UNIX socket 1798 * implementation. Unless we implement this processing, 1799 * setsockopt() on socket endpoint will block on flow 1800 * controlled endpoints which it should not. That is 1801 * required for successful execution of VSU socket tests 1802 * and is consistent with BSD socket behavior). 1803 */ 1804 tl_optmgmt(wq, mp); 1805 return; 1806 case O_T_BIND_REQ: 1807 case T_BIND_REQ: 1808 tl_proc = tl_bind_ser; 1809 break; 1810 case T_CONN_REQ: 1811 if (IS_CLTS(tep)) { 1812 tl_merror(wq, mp, EPROTO); 1813 return; 1814 } 1815 tl_conn_req(wq, mp); 1816 return; 1817 case T_DATA_REQ: 1818 case T_OPTDATA_REQ: 1819 case T_EXDATA_REQ: 1820 case T_ORDREL_REQ: 1821 tl_proc = tl_putq_ser; 1822 break; 1823 case T_UNITDATA_REQ: 1824 if (IS_COTS(tep) || 1825 (msz < sizeof (struct T_unitdata_req))) { 1826 tl_merror(wq, mp, EPROTO); 1827 return; 1828 } 1829 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1830 tl_proc = tl_unitdata_ser; 1831 } else { 1832 tl_proc = tl_putq_ser; 1833 } 1834 break; 1835 default: 1836 /* 1837 * process in service procedure if message already 1838 * queued (maintain in-order processing) 1839 */ 1840 if (wq->q_first != NULL) { 1841 tl_proc = tl_putq_ser; 1842 } else { 1843 tl_proc = tl_wput_ser; 1844 } 1845 break; 1846 } 1847 break; 1848 1849 case M_PCPROTO: 1850 /* 1851 * Check that the message has enough data to figure out TPI 1852 * primitive. 1853 */ 1854 if (msz < sizeof (prim->type)) { 1855 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1856 SL_TRACE|SL_ERROR, 1857 "tl_wput:M_PCROTO data too short")); 1858 tl_merror(wq, mp, EPROTO); 1859 return; 1860 } 1861 switch (prim->type) { 1862 case T_CAPABILITY_REQ: 1863 tl_capability_req(mp, tep); 1864 return; 1865 case T_INFO_REQ: 1866 tl_proc = tl_info_req_ser; 1867 break; 1868 default: 1869 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1870 SL_TRACE|SL_ERROR, 1871 "tl_wput:unknown TPI msg primitive")); 1872 tl_merror(wq, mp, EPROTO); 1873 return; 1874 } 1875 break; 1876 default: 1877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1878 "tl_wput:default:unexpected Streams message")); 1879 freemsg(mp); 1880 return; 1881 } 1882 1883 /* 1884 * Continue processing via serializer. 1885 */ 1886 ASSERT(tl_proc != NULL); 1887 tl_refhold(tep); 1888 tl_serializer_enter(tep, tl_proc, mp); 1889 } 1890 1891 /* 1892 * Place message on the queue while preserving order. 1893 */ 1894 static void 1895 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1896 { 1897 if (tep->te_closing) { 1898 tl_wput_ser(mp, tep); 1899 } else { 1900 TL_PUTQ(tep, mp); 1901 tl_serializer_exit(tep); 1902 tl_refrele(tep); 1903 } 1904 1905 } 1906 1907 static void 1908 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1909 { 1910 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1911 1912 switch (DB_TYPE(mp)) { 1913 case M_DATA: 1914 tl_data(mp, tep); 1915 break; 1916 case M_PROTO: 1917 tl_do_proto(mp, tep); 1918 break; 1919 default: 1920 freemsg(mp); 1921 break; 1922 } 1923 } 1924 1925 /* 1926 * Write side put procedure called from serializer. 1927 */ 1928 static void 1929 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1930 { 1931 tl_wput_common_ser(mp, tep); 1932 tl_serializer_exit(tep); 1933 tl_refrele(tep); 1934 } 1935 1936 /* 1937 * M_DATA processing. Called from serializer. 1938 */ 1939 static void 1940 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1941 { 1942 tl_endpt_t *peer_tep = tep->te_conp; 1943 queue_t *peer_rq; 1944 1945 ASSERT(DB_TYPE(mp) == M_DATA); 1946 ASSERT(IS_COTS(tep)); 1947 1948 ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer)); 1949 1950 /* 1951 * fastpath for data. Ignore flow control if tep is closing. 1952 */ 1953 if ((peer_tep != NULL) && 1954 !peer_tep->te_closing && 1955 ((tep->te_state == TS_DATA_XFER) || 1956 (tep->te_state == TS_WREQ_ORDREL)) && 1957 (tep->te_wq != NULL) && 1958 (tep->te_wq->q_first == NULL) && 1959 ((peer_tep->te_state == TS_DATA_XFER) || 1960 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1961 ((peer_rq = peer_tep->te_rq) != NULL) && 1962 (canputnext(peer_rq) || tep->te_closing)) { 1963 putnext(peer_rq, mp); 1964 } else if (tep->te_closing) { 1965 /* 1966 * It is possible that by the time we got here tep started to 1967 * close. If the write queue is not empty, and the state is 1968 * TS_DATA_XFER the data should be delivered in order, so we 1969 * call putq() instead of freeing the data. 1970 */ 1971 if ((tep->te_wq != NULL) && 1972 ((tep->te_state == TS_DATA_XFER) || 1973 (tep->te_state == TS_WREQ_ORDREL))) { 1974 TL_PUTQ(tep, mp); 1975 } else { 1976 freemsg(mp); 1977 } 1978 } else { 1979 TL_PUTQ(tep, mp); 1980 } 1981 1982 tl_serializer_exit(tep); 1983 tl_refrele(tep); 1984 } 1985 1986 /* 1987 * Write side service routine. 1988 * 1989 * All actual processing happens within serializer which is entered 1990 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1991 * messages that need processing may have arrived, so tl_wsrv repeats until 1992 * queue is empty or te_nowsrv is set. 1993 */ 1994 static void 1995 tl_wsrv(queue_t *wq) 1996 { 1997 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1998 1999 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 2000 mutex_enter(&tep->te_srv_lock); 2001 ASSERT(tep->te_wsrv_active == B_FALSE); 2002 tep->te_wsrv_active = B_TRUE; 2003 mutex_exit(&tep->te_srv_lock); 2004 2005 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2006 2007 /* 2008 * Wait for serializer job to complete. 2009 */ 2010 mutex_enter(&tep->te_srv_lock); 2011 while (tep->te_wsrv_active) { 2012 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2013 } 2014 cv_signal(&tep->te_srv_cv); 2015 mutex_exit(&tep->te_srv_lock); 2016 } 2017 } 2018 2019 /* 2020 * Serialized write side processing of the STREAMS queue. 2021 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2022 * is NULL. 2023 */ 2024 static void 2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2026 { 2027 mblk_t *mp; 2028 queue_t *wq = tep->te_wq; 2029 2030 ASSERT(wq != NULL); 2031 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2032 tl_wput_common_ser(mp, tep); 2033 } 2034 2035 /* 2036 * Wakeup service routine unless called from close. 2037 * If ser_mp is specified, the caller is tl_wsrv(). 2038 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2039 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2040 * be no matching tl_serializer_exit() in this case. 2041 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2042 * waiting on te_srv_cv. 2043 */ 2044 if (ser_mp != NULL) { 2045 /* 2046 * We are called from tl_wsrv. 2047 */ 2048 mutex_enter(&tep->te_srv_lock); 2049 ASSERT(tep->te_wsrv_active); 2050 tep->te_wsrv_active = B_FALSE; 2051 cv_signal(&tep->te_srv_cv); 2052 mutex_exit(&tep->te_srv_lock); 2053 tl_serializer_exit(tep); 2054 } 2055 } 2056 2057 /* 2058 * Called when the stream is backenabled. Enter serializer and qenable everyone 2059 * flow controlled by tep. 2060 * 2061 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2062 * is possible that two instances of tl_rsrv will be running reusing the same 2063 * rsrv mblk. 2064 */ 2065 static void 2066 tl_rsrv(queue_t *rq) 2067 { 2068 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2069 2070 ASSERT(rq->q_first == NULL); 2071 ASSERT(tep->te_rsrv_active == 0); 2072 2073 tep->te_rsrv_active = B_TRUE; 2074 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2075 /* 2076 * Wait for serializer job to complete. 2077 */ 2078 mutex_enter(&tep->te_srv_lock); 2079 while (tep->te_rsrv_active) { 2080 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2081 } 2082 cv_signal(&tep->te_srv_cv); 2083 mutex_exit(&tep->te_srv_lock); 2084 } 2085 2086 /* ARGSUSED */ 2087 static void 2088 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2089 { 2090 tl_endpt_t *peer_tep; 2091 2092 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2093 tl_cl_backenable(tep); 2094 } else if ( 2095 IS_COTS(tep) && 2096 ((peer_tep = tep->te_conp) != NULL) && 2097 !peer_tep->te_closing && 2098 ((tep->te_state == TS_DATA_XFER) || 2099 (tep->te_state == TS_WIND_ORDREL)|| 2100 (tep->te_state == TS_WREQ_ORDREL))) { 2101 TL_QENABLE(peer_tep); 2102 } 2103 2104 /* 2105 * Wakeup read side service routine. 2106 */ 2107 mutex_enter(&tep->te_srv_lock); 2108 ASSERT(tep->te_rsrv_active); 2109 tep->te_rsrv_active = B_FALSE; 2110 cv_signal(&tep->te_srv_cv); 2111 mutex_exit(&tep->te_srv_lock); 2112 tl_serializer_exit(tep); 2113 } 2114 2115 /* 2116 * process M_PROTO messages. Always called from serializer. 2117 */ 2118 static void 2119 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2120 { 2121 ssize_t msz = MBLKL(mp); 2122 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2123 2124 /* Message size was validated by tl_wput(). */ 2125 ASSERT(msz >= sizeof (prim->type)); 2126 2127 switch (prim->type) { 2128 case T_UNBIND_REQ: 2129 tl_unbind(mp, tep); 2130 break; 2131 2132 case T_ADDR_REQ: 2133 tl_addr_req(mp, tep); 2134 break; 2135 2136 case O_T_CONN_RES: 2137 case T_CONN_RES: 2138 if (IS_CLTS(tep)) { 2139 tl_merror(tep->te_wq, mp, EPROTO); 2140 break; 2141 } 2142 tl_conn_res(mp, tep); 2143 break; 2144 2145 case T_DISCON_REQ: 2146 if (IS_CLTS(tep)) { 2147 tl_merror(tep->te_wq, mp, EPROTO); 2148 break; 2149 } 2150 tl_discon_req(mp, tep); 2151 break; 2152 2153 case T_DATA_REQ: 2154 if (IS_CLTS(tep)) { 2155 tl_merror(tep->te_wq, mp, EPROTO); 2156 break; 2157 } 2158 tl_data(mp, tep); 2159 break; 2160 2161 case T_OPTDATA_REQ: 2162 if (IS_CLTS(tep)) { 2163 tl_merror(tep->te_wq, mp, EPROTO); 2164 break; 2165 } 2166 tl_data(mp, tep); 2167 break; 2168 2169 case T_EXDATA_REQ: 2170 if (IS_CLTS(tep)) { 2171 tl_merror(tep->te_wq, mp, EPROTO); 2172 break; 2173 } 2174 tl_exdata(mp, tep); 2175 break; 2176 2177 case T_ORDREL_REQ: 2178 if (! IS_COTSORD(tep)) { 2179 tl_merror(tep->te_wq, mp, EPROTO); 2180 break; 2181 } 2182 tl_ordrel(mp, tep); 2183 break; 2184 2185 case T_UNITDATA_REQ: 2186 if (IS_COTS(tep)) { 2187 tl_merror(tep->te_wq, mp, EPROTO); 2188 break; 2189 } 2190 tl_unitdata(mp, tep); 2191 break; 2192 2193 default: 2194 tl_merror(tep->te_wq, mp, EPROTO); 2195 break; 2196 } 2197 } 2198 2199 /* 2200 * Process ioctl from serializer. 2201 * This is a wrapper around tl_do_ioctl(). 2202 */ 2203 static void 2204 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2205 { 2206 if (! tep->te_closing) 2207 tl_do_ioctl(mp, tep); 2208 else 2209 freemsg(mp); 2210 2211 tl_serializer_exit(tep); 2212 tl_refrele(tep); 2213 } 2214 2215 static void 2216 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2217 { 2218 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2219 int cmd = iocbp->ioc_cmd; 2220 queue_t *wq = tep->te_wq; 2221 int error; 2222 int thisopt, otheropt; 2223 2224 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2225 2226 switch (cmd) { 2227 case TL_IOC_CREDOPT: 2228 if (cmd == TL_IOC_CREDOPT) { 2229 thisopt = TL_SETCRED; 2230 otheropt = TL_SETUCRED; 2231 } else { 2232 /* FALLTHROUGH */ 2233 case TL_IOC_UCREDOPT: 2234 thisopt = TL_SETUCRED; 2235 otheropt = TL_SETCRED; 2236 } 2237 /* 2238 * The credentials passing does not apply to sockets. 2239 * Only one of the cred options can be set at a given time. 2240 */ 2241 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2242 miocnak(wq, mp, 0, EINVAL); 2243 return; 2244 } 2245 2246 /* 2247 * Turn on generation of credential options for 2248 * T_conn_req, T_conn_con, T_unidata_ind. 2249 */ 2250 error = miocpullup(mp, sizeof (uint32_t)); 2251 if (error != 0) { 2252 miocnak(wq, mp, 0, error); 2253 return; 2254 } 2255 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2256 miocnak(wq, mp, 0, EINVAL); 2257 return; 2258 } 2259 2260 if (*(uint32_t *)mp->b_cont->b_rptr) 2261 tep->te_flag |= thisopt; 2262 else 2263 tep->te_flag &= ~thisopt; 2264 2265 miocack(wq, mp, 0, 0); 2266 break; 2267 2268 default: 2269 /* Should not be here */ 2270 miocnak(wq, mp, 0, EINVAL); 2271 break; 2272 } 2273 } 2274 2275 2276 /* 2277 * send T_ERROR_ACK 2278 * Note: assumes enough memory or caller passed big enough mp 2279 * - no recovery from allocb failures 2280 */ 2281 2282 static void 2283 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2284 t_scalar_t unix_err, t_scalar_t type) 2285 { 2286 struct T_error_ack *err_ack; 2287 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2288 M_PCPROTO, T_ERROR_ACK); 2289 2290 if (ackmp == NULL) { 2291 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2292 "tl_error_ack:out of mblk memory")); 2293 tl_merror(wq, NULL, ENOSR); 2294 return; 2295 } 2296 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2297 err_ack->ERROR_prim = type; 2298 err_ack->TLI_error = tli_err; 2299 err_ack->UNIX_error = unix_err; 2300 2301 /* 2302 * send error ack message 2303 */ 2304 qreply(wq, ackmp); 2305 } 2306 2307 2308 2309 /* 2310 * send T_OK_ACK 2311 * Note: assumes enough memory or caller passed big enough mp 2312 * - no recovery from allocb failures 2313 */ 2314 static void 2315 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2316 { 2317 struct T_ok_ack *ok_ack; 2318 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2319 M_PCPROTO, T_OK_ACK); 2320 2321 if (ackmp == NULL) { 2322 tl_merror(wq, NULL, ENOMEM); 2323 return; 2324 } 2325 2326 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2327 ok_ack->CORRECT_prim = type; 2328 2329 (void) qreply(wq, ackmp); 2330 } 2331 2332 /* 2333 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2334 * This is a wrapper around tl_bind(). 2335 */ 2336 static void 2337 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2338 { 2339 if (! tep->te_closing) 2340 tl_bind(mp, tep); 2341 else 2342 freemsg(mp); 2343 2344 tl_serializer_exit(tep); 2345 tl_refrele(tep); 2346 } 2347 2348 /* 2349 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2350 * Assumes that the endpoint is in the unbound. 2351 */ 2352 static void 2353 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2354 { 2355 queue_t *wq = tep->te_wq; 2356 struct T_bind_ack *b_ack; 2357 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2358 mblk_t *ackmp, *bamp; 2359 soux_addr_t ux_addr; 2360 t_uscalar_t qlen = 0; 2361 t_scalar_t alen, aoff; 2362 tl_addr_t addr_req; 2363 void *addr_startp; 2364 ssize_t msz = MBLKL(mp), basize; 2365 t_scalar_t tli_err = 0, unix_err = 0; 2366 t_scalar_t save_prim_type = bind->PRIM_type; 2367 t_scalar_t save_state = tep->te_state; 2368 2369 if (tep->te_state != TS_UNBND) { 2370 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2371 SL_TRACE|SL_ERROR, 2372 "tl_wput:bind_request:out of state, state=%d", 2373 tep->te_state)); 2374 tli_err = TOUTSTATE; 2375 goto error; 2376 } 2377 2378 if (msz < sizeof (struct T_bind_req)) { 2379 tli_err = TSYSERR; unix_err = EINVAL; 2380 goto error; 2381 } 2382 2383 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2384 2385 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2386 (bind->PRIM_type == T_BIND_REQ)); 2387 2388 alen = bind->ADDR_length; 2389 aoff = bind->ADDR_offset; 2390 2391 /* negotiate max conn req pending */ 2392 if (IS_COTS(tep)) { 2393 qlen = bind->CONIND_number; 2394 if (qlen > TL_MAXQLEN) 2395 qlen = TL_MAXQLEN; 2396 } 2397 2398 /* 2399 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2400 * and bound again. 2401 */ 2402 if ((tep->te_hash_hndl == NULL) && 2403 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2404 mod_hash_reserve_nosleep(tep->te_addrhash, 2405 &tep->te_hash_hndl) != 0) { 2406 tli_err = TSYSERR; unix_err = ENOSR; 2407 goto error; 2408 } 2409 2410 /* 2411 * Verify address correctness. 2412 */ 2413 if (IS_SOCKET(tep)) { 2414 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2415 2416 if ((alen != TL_SOUX_ADDRLEN) || 2417 (aoff < 0) || 2418 (aoff + alen > msz)) { 2419 (void) (STRLOG(TL_ID, tep->te_minor, 2420 1, SL_TRACE|SL_ERROR, 2421 "tl_bind: invalid socket addr")); 2422 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2423 tli_err = TSYSERR; unix_err = EINVAL; 2424 goto error; 2425 } 2426 /* Copy address from message to local buffer. */ 2427 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2428 /* 2429 * Check that we got correct address from sockets 2430 */ 2431 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2432 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2433 (void) (STRLOG(TL_ID, tep->te_minor, 2434 1, SL_TRACE|SL_ERROR, 2435 "tl_bind: invalid socket magic")); 2436 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2437 tli_err = TSYSERR; unix_err = EINVAL; 2438 goto error; 2439 } 2440 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2441 (ux_addr.soua_vp != NULL)) { 2442 (void) (STRLOG(TL_ID, tep->te_minor, 2443 1, SL_TRACE|SL_ERROR, 2444 "tl_bind: implicit addr non-empty")); 2445 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2446 tli_err = TSYSERR; unix_err = EINVAL; 2447 goto error; 2448 } 2449 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2450 (ux_addr.soua_vp == NULL)) { 2451 (void) (STRLOG(TL_ID, tep->te_minor, 2452 1, SL_TRACE|SL_ERROR, 2453 "tl_bind: explicit addr empty")); 2454 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2455 tli_err = TSYSERR; unix_err = EINVAL; 2456 goto error; 2457 } 2458 } else { 2459 if ((alen > 0) && ((aoff < 0) || 2460 ((ssize_t)(aoff + alen) > msz) || 2461 ((aoff + alen) < 0))) { 2462 (void) (STRLOG(TL_ID, tep->te_minor, 2463 1, SL_TRACE|SL_ERROR, 2464 "tl_bind: invalid message")); 2465 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2466 tli_err = TSYSERR; unix_err = EINVAL; 2467 goto error; 2468 } 2469 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2470 (void) (STRLOG(TL_ID, tep->te_minor, 2471 1, SL_TRACE|SL_ERROR, 2472 "tl_bind: bad addr in message")); 2473 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2474 tli_err = TBADADDR; 2475 goto error; 2476 } 2477 #ifdef DEBUG 2478 /* 2479 * Mild form of ASSERT()ion to detect broken TPI apps. 2480 * if (! assertion) 2481 * log warning; 2482 */ 2483 if (! ((alen == 0 && aoff == 0) || 2484 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2485 (void) (STRLOG(TL_ID, tep->te_minor, 2486 3, SL_TRACE|SL_ERROR, 2487 "tl_bind: addr overlaps TPI message")); 2488 } 2489 #endif 2490 } 2491 2492 /* 2493 * Bind the address provided or allocate one if requested. 2494 * Allow rebinds with a new qlen value. 2495 */ 2496 if (IS_SOCKET(tep)) { 2497 /* 2498 * For anonymous requests the te_ap is already set up properly 2499 * so use minor number as an address. 2500 * For explicit requests need to check whether the address is 2501 * already in use. 2502 */ 2503 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2504 int rc; 2505 2506 if (tep->te_flag & TL_ADDRHASHED) { 2507 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2508 if (tep->te_vp == ux_addr.soua_vp) 2509 goto skip_addr_bind; 2510 else /* Rebind to a new address. */ 2511 tl_addr_unbind(tep); 2512 } 2513 /* 2514 * Insert address in the hash if it is not already 2515 * there. Since we use preallocated handle, the insert 2516 * can fail only if the key is already present. 2517 */ 2518 rc = mod_hash_insert_reserve(tep->te_addrhash, 2519 (mod_hash_key_t)ux_addr.soua_vp, 2520 (mod_hash_val_t)tep, tep->te_hash_hndl); 2521 2522 if (rc != 0) { 2523 ASSERT(rc == MH_ERR_DUPLICATE); 2524 /* 2525 * Violate O_T_BIND_REQ semantics and fail with 2526 * TADDRBUSY - sockets will not use any address 2527 * other than supplied one for explicit binds. 2528 */ 2529 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2530 SL_TRACE|SL_ERROR, 2531 "tl_bind:requested addr %p is busy", 2532 ux_addr.soua_vp)); 2533 tli_err = TADDRBUSY; unix_err = 0; 2534 goto error; 2535 } 2536 tep->te_uxaddr = ux_addr; 2537 tep->te_flag |= TL_ADDRHASHED; 2538 tep->te_hash_hndl = NULL; 2539 } 2540 } else if (alen == 0) { 2541 /* 2542 * assign any free address 2543 */ 2544 if (! tl_get_any_addr(tep, NULL)) { 2545 (void) (STRLOG(TL_ID, tep->te_minor, 2546 1, SL_TRACE|SL_ERROR, 2547 "tl_bind:failed to get buffer for any " 2548 "address")); 2549 tli_err = TSYSERR; unix_err = ENOSR; 2550 goto error; 2551 } 2552 } else { 2553 addr_req.ta_alen = alen; 2554 addr_req.ta_abuf = (mp->b_rptr + aoff); 2555 addr_req.ta_zoneid = tep->te_zoneid; 2556 2557 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2558 if (tep->te_abuf == NULL) { 2559 tli_err = TSYSERR; unix_err = ENOSR; 2560 goto error; 2561 } 2562 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2563 tep->te_alen = alen; 2564 2565 if (mod_hash_insert_reserve(tep->te_addrhash, 2566 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2567 tep->te_hash_hndl) != 0) { 2568 if (save_prim_type == T_BIND_REQ) { 2569 /* 2570 * The bind semantics for this primitive 2571 * require a failure if the exact address 2572 * requested is busy 2573 */ 2574 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2575 SL_TRACE|SL_ERROR, 2576 "tl_bind:requested addr is busy")); 2577 tli_err = TADDRBUSY; unix_err = 0; 2578 goto error; 2579 } 2580 2581 /* 2582 * O_T_BIND_REQ semantics say if address if requested 2583 * address is busy, bind to any available free address 2584 */ 2585 if (! tl_get_any_addr(tep, &addr_req)) { 2586 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2587 SL_TRACE|SL_ERROR, 2588 "tl_bind:unable to get any addr buf")); 2589 tli_err = TSYSERR; unix_err = ENOMEM; 2590 goto error; 2591 } 2592 } else { 2593 tep->te_flag |= TL_ADDRHASHED; 2594 tep->te_hash_hndl = NULL; 2595 } 2596 } 2597 2598 ASSERT(tep->te_alen >= 0); 2599 2600 skip_addr_bind: 2601 /* 2602 * prepare T_BIND_ACK TPI message 2603 */ 2604 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2605 bamp = reallocb(mp, basize, 0); 2606 if (bamp == NULL) { 2607 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2608 "tl_wput:tl_bind: allocb failed")); 2609 /* 2610 * roll back state changes 2611 */ 2612 tl_addr_unbind(tep); 2613 tep->te_state = TS_UNBND; 2614 tl_memrecover(wq, mp, basize); 2615 return; 2616 } 2617 2618 DB_TYPE(bamp) = M_PCPROTO; 2619 bamp->b_wptr = bamp->b_rptr + basize; 2620 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2621 b_ack->PRIM_type = T_BIND_ACK; 2622 b_ack->CONIND_number = qlen; 2623 b_ack->ADDR_length = tep->te_alen; 2624 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2625 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2626 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2627 2628 if (IS_COTS(tep)) { 2629 tep->te_qlen = qlen; 2630 if (qlen > 0) 2631 tep->te_flag |= TL_LISTENER; 2632 } 2633 2634 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2635 /* 2636 * send T_BIND_ACK message 2637 */ 2638 (void) qreply(wq, bamp); 2639 return; 2640 2641 error: 2642 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2643 if (ackmp == NULL) { 2644 /* 2645 * roll back state changes 2646 */ 2647 tep->te_state = save_state; 2648 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2649 return; 2650 } 2651 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2652 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2653 } 2654 2655 /* 2656 * Process T_UNBIND_REQ. 2657 * Called from serializer. 2658 */ 2659 static void 2660 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2661 { 2662 queue_t *wq; 2663 mblk_t *ackmp; 2664 2665 if (tep->te_closing) { 2666 freemsg(mp); 2667 return; 2668 } 2669 2670 wq = tep->te_wq; 2671 2672 /* 2673 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2674 * ==> allocate for T_ERROR_ACK (known max) 2675 */ 2676 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2677 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2678 return; 2679 } 2680 /* 2681 * memory resources committed 2682 * Note: no message validation. T_UNBIND_REQ message is 2683 * same size as PRIM_type field so already verified earlier. 2684 */ 2685 2686 /* 2687 * validate state 2688 */ 2689 if (tep->te_state != TS_IDLE) { 2690 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2691 SL_TRACE|SL_ERROR, 2692 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2693 tep->te_state)); 2694 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2695 return; 2696 } 2697 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2698 2699 /* 2700 * TPI says on T_UNBIND_REQ: 2701 * send up a M_FLUSH to flush both 2702 * read and write queues 2703 */ 2704 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2705 2706 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2707 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2708 2709 /* 2710 * Sockets use bind with qlen==0 followed by bind() to 2711 * the same address with qlen > 0 for listeners. 2712 * We allow rebind with a new qlen value. 2713 */ 2714 tl_addr_unbind(tep); 2715 } 2716 2717 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2718 /* 2719 * send T_OK_ACK 2720 */ 2721 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2722 } 2723 2724 2725 /* 2726 * Option management code from drv/ip is used here 2727 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2728 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2729 * However, that is what we want as that option is 'unorthodox' 2730 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2731 * and not in T_SVR4_OPTMGMT_REQ/ACK 2732 * Note2: use of optcom_req means this routine is an exception to 2733 * recovery from allocb() failures. 2734 */ 2735 2736 static void 2737 tl_optmgmt(queue_t *wq, mblk_t *mp) 2738 { 2739 tl_endpt_t *tep; 2740 mblk_t *ackmp; 2741 union T_primitives *prim; 2742 2743 tep = (tl_endpt_t *)wq->q_ptr; 2744 prim = (union T_primitives *)mp->b_rptr; 2745 2746 /* all states OK for AF_UNIX options ? */ 2747 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2748 prim->type == T_SVR4_OPTMGMT_REQ) { 2749 /* 2750 * Broken TLI semantics that options can only be managed 2751 * in TS_IDLE state. Needed for Sparc ABI test suite that 2752 * tests this TLI (mis)feature using this device driver. 2753 */ 2754 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2755 SL_TRACE|SL_ERROR, 2756 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2757 tep->te_state)); 2758 /* 2759 * preallocate memory for T_ERROR_ACK 2760 */ 2761 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2762 if (! ackmp) { 2763 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2764 return; 2765 } 2766 2767 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2768 freemsg(mp); 2769 return; 2770 } 2771 2772 /* 2773 * call common option management routine from drv/ip 2774 */ 2775 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2776 (void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj); 2777 } else { 2778 ASSERT(prim->type == T_OPTMGMT_REQ); 2779 (void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj); 2780 } 2781 } 2782 2783 /* 2784 * Handle T_conn_req - the driver part of accept(). 2785 * If TL_SET[U]CRED generate the credentials options. 2786 * If this is a socket pass through options unmodified. 2787 * For sockets generate the T_CONN_CON here instead of 2788 * waiting for the T_CONN_RES. 2789 */ 2790 static void 2791 tl_conn_req(queue_t *wq, mblk_t *mp) 2792 { 2793 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2794 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2795 ssize_t msz = MBLKL(mp); 2796 t_scalar_t alen, aoff, olen, ooff, err = 0; 2797 tl_endpt_t *peer_tep = NULL; 2798 mblk_t *ackmp; 2799 mblk_t *dimp; 2800 struct T_discon_ind *di; 2801 soux_addr_t ux_addr; 2802 tl_addr_t dst; 2803 2804 ASSERT(IS_COTS(tep)); 2805 2806 if (tep->te_closing) { 2807 freemsg(mp); 2808 return; 2809 } 2810 2811 /* 2812 * preallocate memory for: 2813 * 1. max of T_ERROR_ACK and T_OK_ACK 2814 * ==> known max T_ERROR_ACK 2815 * 2. max of T_DISCON_IND and T_CONN_IND 2816 */ 2817 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2818 if (! ackmp) { 2819 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2820 return; 2821 } 2822 /* 2823 * memory committed for T_OK_ACK/T_ERROR_ACK now 2824 * will be committed for T_DISCON_IND/T_CONN_IND later 2825 */ 2826 2827 if (tep->te_state != TS_IDLE) { 2828 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2829 SL_TRACE|SL_ERROR, 2830 "tl_wput:T_CONN_REQ:out of state, state=%d", 2831 tep->te_state)); 2832 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2833 freemsg(mp); 2834 return; 2835 } 2836 2837 /* 2838 * validate the message 2839 * Note: dereference fields in struct inside message only 2840 * after validating the message length. 2841 */ 2842 if (msz < sizeof (struct T_conn_req)) { 2843 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2844 "tl_conn_req:invalid message length")); 2845 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2846 freemsg(mp); 2847 return; 2848 } 2849 alen = creq->DEST_length; 2850 aoff = creq->DEST_offset; 2851 olen = creq->OPT_length; 2852 ooff = creq->OPT_offset; 2853 if (olen == 0) 2854 ooff = 0; 2855 2856 if (IS_SOCKET(tep)) { 2857 if ((alen != TL_SOUX_ADDRLEN) || 2858 (aoff < 0) || 2859 (aoff + alen > msz) || 2860 (alen > msz - sizeof (struct T_conn_req))) { 2861 (void) (STRLOG(TL_ID, tep->te_minor, 2862 1, SL_TRACE|SL_ERROR, 2863 "tl_conn_req: invalid socket addr")); 2864 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2865 freemsg(mp); 2866 return; 2867 } 2868 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2869 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2870 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2871 (void) (STRLOG(TL_ID, tep->te_minor, 2872 1, SL_TRACE|SL_ERROR, 2873 "tl_conn_req: invalid socket magic")); 2874 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2875 freemsg(mp); 2876 return; 2877 } 2878 } else { 2879 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2880 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2881 ooff + olen < 0)) || 2882 olen < 0 || ooff < 0) { 2883 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2884 SL_TRACE|SL_ERROR, 2885 "tl_conn_req:invalid message")); 2886 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2887 freemsg(mp); 2888 return; 2889 } 2890 2891 if (alen <= 0 || aoff < 0 || 2892 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2893 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2894 SL_TRACE|SL_ERROR, 2895 "tl_conn_req:bad addr in message, " 2896 "alen=%d, msz=%ld", 2897 alen, msz)); 2898 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2899 freemsg(mp); 2900 return; 2901 } 2902 #ifdef DEBUG 2903 /* 2904 * Mild form of ASSERT()ion to detect broken TPI apps. 2905 * if (! assertion) 2906 * log warning; 2907 */ 2908 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2909 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2910 SL_TRACE|SL_ERROR, 2911 "tl_conn_req: addr overlaps TPI message")); 2912 } 2913 #endif 2914 if (olen) { 2915 /* 2916 * no opts in connect req 2917 * supported in this provider except for sockets. 2918 */ 2919 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2920 SL_TRACE|SL_ERROR, 2921 "tl_conn_req:options not supported " 2922 "in message")); 2923 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2924 freemsg(mp); 2925 return; 2926 } 2927 } 2928 2929 /* 2930 * Prevent tep from closing on us. 2931 */ 2932 if (! tl_noclose(tep)) { 2933 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2934 "tl_conn_req:endpoint is closing")); 2935 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2936 freemsg(mp); 2937 return; 2938 } 2939 2940 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2941 /* 2942 * get endpoint to connect to 2943 * check that peer with DEST addr is bound to addr 2944 * and has CONIND_number > 0 2945 */ 2946 dst.ta_alen = alen; 2947 dst.ta_abuf = mp->b_rptr + aoff; 2948 dst.ta_zoneid = tep->te_zoneid; 2949 2950 /* 2951 * Verify if remote addr is in use 2952 */ 2953 peer_tep = (IS_SOCKET(tep) ? 2954 tl_sock_find_peer(tep, &ux_addr) : 2955 tl_find_peer(tep, &dst)); 2956 2957 if (peer_tep == NULL) { 2958 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2959 "tl_conn_req:no one at connect address")); 2960 err = ECONNREFUSED; 2961 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2962 /* 2963 * validate that number of incoming connection is 2964 * not to capacity on destination endpoint 2965 */ 2966 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2967 "tl_conn_req: qlen overflow connection refused")); 2968 err = ECONNREFUSED; 2969 } else if (!((peer_tep->te_state == TS_IDLE) || 2970 (peer_tep->te_state == TS_WRES_CIND))) { 2971 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2972 "tl_conn_req:peer in bad state")); 2973 err = ECONNREFUSED; 2974 } 2975 2976 /* 2977 * preallocate now for T_DISCON_IND or T_CONN_IND 2978 */ 2979 if (err != 0) { 2980 if (peer_tep != NULL) 2981 tl_refrele(peer_tep); 2982 /* We are still expected to send T_OK_ACK */ 2983 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2984 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2985 tl_closeok(tep); 2986 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2987 M_PROTO, T_DISCON_IND); 2988 if (dimp == NULL) { 2989 tl_merror(wq, NULL, ENOSR); 2990 return; 2991 } 2992 di = (struct T_discon_ind *)dimp->b_rptr; 2993 di->DISCON_reason = err; 2994 di->SEQ_number = BADSEQNUM; 2995 2996 tep->te_state = TS_IDLE; 2997 /* 2998 * send T_DISCON_IND message 2999 */ 3000 putnext(tep->te_rq, dimp); 3001 return; 3002 } 3003 3004 ASSERT(IS_COTS(peer_tep)); 3005 3006 /* 3007 * Found the listener. At this point processing will continue on 3008 * listener serializer. Close of the endpoint should be blocked while we 3009 * switch serializers. 3010 */ 3011 tl_serializer_refhold(peer_tep->te_ser); 3012 tl_serializer_refrele(tep->te_ser); 3013 tep->te_ser = peer_tep->te_ser; 3014 ASSERT(tep->te_oconp == NULL); 3015 tep->te_oconp = peer_tep; 3016 3017 /* 3018 * It is safe to close now. Close may continue on listener serializer. 3019 */ 3020 tl_closeok(tep); 3021 3022 /* 3023 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3024 * data, so we link mp to ackmp. 3025 */ 3026 ackmp->b_cont = mp; 3027 mp = ackmp; 3028 3029 tl_refhold(tep); 3030 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3031 } 3032 3033 /* 3034 * Finish T_CONN_REQ processing on listener serializer. 3035 */ 3036 static void 3037 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3038 { 3039 queue_t *wq; 3040 tl_endpt_t *peer_tep = tep->te_oconp; 3041 mblk_t *confmp, *cimp, *indmp; 3042 void *opts = NULL; 3043 mblk_t *ackmp = mp; 3044 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3045 struct T_conn_ind *ci; 3046 tl_icon_t *tip; 3047 void *addr_startp; 3048 t_scalar_t olen = creq->OPT_length; 3049 t_scalar_t ooff = creq->OPT_offset; 3050 size_t ci_msz; 3051 size_t size; 3052 3053 if (tep->te_closing) { 3054 TL_UNCONNECT(tep->te_oconp); 3055 tl_serializer_exit(tep); 3056 tl_refrele(tep); 3057 freemsg(mp); 3058 return; 3059 } 3060 3061 wq = tep->te_wq; 3062 tep->te_flag |= TL_EAGER; 3063 3064 /* 3065 * Extract preallocated ackmp from mp. 3066 */ 3067 mp = mp->b_cont; 3068 ackmp->b_cont = NULL; 3069 3070 if (olen == 0) 3071 ooff = 0; 3072 3073 if (peer_tep->te_closing || 3074 !((peer_tep->te_state == TS_IDLE) || 3075 (peer_tep->te_state == TS_WRES_CIND))) { 3076 TL_UNCONNECT(tep->te_oconp); 3077 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3078 freemsg(ackmp); 3079 tl_serializer_exit(tep); 3080 tl_refrele(tep); 3081 return; 3082 } 3083 3084 /* 3085 * preallocate now for T_DISCON_IND or T_CONN_IND 3086 */ 3087 /* 3088 * calculate length of T_CONN_IND message 3089 */ 3090 if (peer_tep->te_flag & TL_SETCRED) { 3091 ooff = 0; 3092 olen = (t_scalar_t) sizeof (struct opthdr) + 3093 OPTLEN(sizeof (tl_credopt_t)); 3094 /* 1 option only */ 3095 } else if (peer_tep->te_flag & TL_SETUCRED) { 3096 ooff = 0; 3097 olen = (t_scalar_t)sizeof (struct opthdr) + 3098 OPTLEN(ucredsize); 3099 /* 1 option only */ 3100 } 3101 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3102 ci_msz = T_ALIGN(ci_msz) + olen; 3103 size = max(ci_msz, sizeof (struct T_discon_ind)); 3104 3105 /* 3106 * Save options from mp - we'll need them for T_CONN_IND. 3107 */ 3108 if (ooff != 0) { 3109 opts = kmem_alloc(olen, KM_NOSLEEP); 3110 if (opts == NULL) { 3111 /* 3112 * roll back state changes 3113 */ 3114 tep->te_state = TS_IDLE; 3115 tl_memrecover(wq, mp, size); 3116 freemsg(ackmp); 3117 TL_UNCONNECT(tep->te_oconp); 3118 tl_serializer_exit(tep); 3119 tl_refrele(tep); 3120 return; 3121 } 3122 /* Copy options to a temp buffer */ 3123 bcopy(mp->b_rptr + ooff, opts, olen); 3124 } 3125 3126 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3127 /* 3128 * Generate a T_CONN_CON that has the identical address 3129 * (and options) as the T_CONN_REQ. 3130 * NOTE: assumes that the T_conn_req and T_conn_con structures 3131 * are isomorphic. 3132 */ 3133 confmp = copyb(mp); 3134 if (! confmp) { 3135 /* 3136 * roll back state changes 3137 */ 3138 tep->te_state = TS_IDLE; 3139 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3140 freemsg(ackmp); 3141 if (opts != NULL) 3142 kmem_free(opts, olen); 3143 TL_UNCONNECT(tep->te_oconp); 3144 tl_serializer_exit(tep); 3145 tl_refrele(tep); 3146 return; 3147 } 3148 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3149 T_CONN_CON; 3150 } else { 3151 confmp = NULL; 3152 } 3153 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3154 /* 3155 * roll back state changes 3156 */ 3157 tep->te_state = TS_IDLE; 3158 tl_memrecover(wq, mp, size); 3159 freemsg(ackmp); 3160 if (opts != NULL) 3161 kmem_free(opts, olen); 3162 freemsg(confmp); 3163 TL_UNCONNECT(tep->te_oconp); 3164 tl_serializer_exit(tep); 3165 tl_refrele(tep); 3166 return; 3167 } 3168 3169 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3170 if (tip == NULL) { 3171 /* 3172 * roll back state changes 3173 */ 3174 tep->te_state = TS_IDLE; 3175 tl_memrecover(wq, indmp, sizeof (*tip)); 3176 freemsg(ackmp); 3177 if (opts != NULL) 3178 kmem_free(opts, olen); 3179 freemsg(confmp); 3180 TL_UNCONNECT(tep->te_oconp); 3181 tl_serializer_exit(tep); 3182 tl_refrele(tep); 3183 return; 3184 } 3185 tip->ti_mp = NULL; 3186 3187 /* 3188 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3189 * and tl_icon_t cell. 3190 */ 3191 3192 /* 3193 * ack validity of request and send the peer credential in the ACK. 3194 */ 3195 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3196 3197 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3198 confmp != NULL) { 3199 mblk_setcred(confmp, peer_tep->te_credp); 3200 DB_CPID(confmp) = peer_tep->te_cpid; 3201 } 3202 3203 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3204 3205 /* 3206 * prepare message to send T_CONN_IND 3207 */ 3208 /* 3209 * allocate the message - original data blocks retained 3210 * in the returned mblk 3211 */ 3212 cimp = tl_resizemp(indmp, size); 3213 if (! cimp) { 3214 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3215 "tl_conn_req:con_ind:allocb failure")); 3216 tl_merror(wq, indmp, ENOMEM); 3217 TL_UNCONNECT(tep->te_oconp); 3218 tl_serializer_exit(tep); 3219 tl_refrele(tep); 3220 if (opts != NULL) 3221 kmem_free(opts, olen); 3222 freemsg(confmp); 3223 ASSERT(tip->ti_mp == NULL); 3224 kmem_free(tip, sizeof (*tip)); 3225 return; 3226 } 3227 3228 DB_TYPE(cimp) = M_PROTO; 3229 ci = (struct T_conn_ind *)cimp->b_rptr; 3230 ci->PRIM_type = T_CONN_IND; 3231 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3232 ci->SRC_length = tep->te_alen; 3233 ci->SEQ_number = tep->te_seqno; 3234 3235 addr_startp = cimp->b_rptr + ci->SRC_offset; 3236 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3237 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3238 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3239 ci->SRC_length); 3240 ci->OPT_length = olen; /* because only 1 option */ 3241 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3242 DB_CREDDEF(cimp, tep->te_credp), 3243 TLPID(cimp, tep), 3244 peer_tep->te_flag, peer_tep->te_credp); 3245 } else if (ooff != 0) { 3246 /* Copy option from T_CONN_REQ */ 3247 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3248 ci->SRC_length); 3249 ci->OPT_length = olen; 3250 ASSERT(opts != NULL); 3251 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3252 } else { 3253 ci->OPT_offset = 0; 3254 ci->OPT_length = 0; 3255 } 3256 if (opts != NULL) 3257 kmem_free(opts, olen); 3258 3259 /* 3260 * register connection request with server peer 3261 * append to list of incoming connections 3262 * increment references for both peer_tep and tep: peer_tep is placed on 3263 * te_oconp and tep is placed on listeners queue. 3264 */ 3265 tip->ti_tep = tep; 3266 tip->ti_seqno = tep->te_seqno; 3267 list_insert_tail(&peer_tep->te_iconp, tip); 3268 peer_tep->te_nicon++; 3269 3270 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3271 /* 3272 * send the T_CONN_IND message 3273 */ 3274 putnext(peer_tep->te_rq, cimp); 3275 3276 /* 3277 * Send a T_CONN_CON message for sockets. 3278 * Disable the queues until we have reached the correct state! 3279 */ 3280 if (confmp != NULL) { 3281 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3282 noenable(wq); 3283 putnext(tep->te_rq, confmp); 3284 } 3285 /* 3286 * Now we need to increment tep reference because tep is referenced by 3287 * server list of pending connections. We also need to decrement 3288 * reference before exiting serializer. Two operations void each other 3289 * so we don't modify reference at all. 3290 */ 3291 ASSERT(tep->te_refcnt >= 2); 3292 ASSERT(peer_tep->te_refcnt >= 2); 3293 tl_serializer_exit(tep); 3294 } 3295 3296 3297 3298 /* 3299 * Handle T_conn_res on listener stream. Called on listener serializer. 3300 * tl_conn_req has already generated the T_CONN_CON. 3301 * tl_conn_res is called on listener serializer. 3302 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3303 * Switch eager serializer to acceptor's. 3304 * 3305 * If TL_SET[U]CRED generate the credentials options. 3306 * For sockets tl_conn_req has already generated the T_CONN_CON. 3307 */ 3308 static void 3309 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3310 { 3311 queue_t *wq; 3312 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3313 ssize_t msz = MBLKL(mp); 3314 t_scalar_t olen, ooff, err = 0; 3315 t_scalar_t prim = cres->PRIM_type; 3316 uchar_t *addr_startp; 3317 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3318 tl_icon_t *tip; 3319 size_t size; 3320 mblk_t *ackmp, *respmp; 3321 mblk_t *dimp, *ccmp = NULL; 3322 struct T_discon_ind *di; 3323 struct T_conn_con *cc; 3324 boolean_t client_noclose_set = B_FALSE; 3325 boolean_t switch_client_serializer = B_TRUE; 3326 3327 ASSERT(IS_COTS(tep)); 3328 3329 if (tep->te_closing) { 3330 freemsg(mp); 3331 return; 3332 } 3333 3334 wq = tep->te_wq; 3335 3336 /* 3337 * preallocate memory for: 3338 * 1. max of T_ERROR_ACK and T_OK_ACK 3339 * ==> known max T_ERROR_ACK 3340 * 2. max of T_DISCON_IND and T_CONN_CON 3341 */ 3342 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3343 if (! ackmp) { 3344 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3345 return; 3346 } 3347 /* 3348 * memory committed for T_OK_ACK/T_ERROR_ACK now 3349 * will be committed for T_DISCON_IND/T_CONN_CON later 3350 */ 3351 3352 3353 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3354 3355 /* 3356 * validate state 3357 */ 3358 if (tep->te_state != TS_WRES_CIND) { 3359 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3360 SL_TRACE|SL_ERROR, 3361 "tl_wput:T_CONN_RES:out of state, state=%d", 3362 tep->te_state)); 3363 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3364 freemsg(mp); 3365 return; 3366 } 3367 3368 /* 3369 * validate the message 3370 * Note: dereference fields in struct inside message only 3371 * after validating the message length. 3372 */ 3373 if (msz < sizeof (struct T_conn_res)) { 3374 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3375 "tl_conn_res:invalid message length")); 3376 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3377 freemsg(mp); 3378 return; 3379 } 3380 olen = cres->OPT_length; 3381 ooff = cres->OPT_offset; 3382 if (((olen > 0) && ((ooff + olen) > msz))) { 3383 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3384 "tl_conn_res:invalid message")); 3385 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3386 freemsg(mp); 3387 return; 3388 } 3389 if (olen) { 3390 /* 3391 * no opts in connect res 3392 * supported in this provider 3393 */ 3394 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3395 "tl_conn_res:options not supported in message")); 3396 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3397 freemsg(mp); 3398 return; 3399 } 3400 3401 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3402 ASSERT(tep->te_state == TS_WACK_CRES); 3403 3404 if (cres->SEQ_number < TL_MINOR_START && 3405 cres->SEQ_number >= BADSEQNUM) { 3406 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3407 "tl_conn_res:remote endpoint sequence number bad")); 3408 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3409 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3410 freemsg(mp); 3411 return; 3412 } 3413 3414 /* 3415 * find accepting endpoint. Will have extra reference if found. 3416 */ 3417 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3418 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3419 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3420 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3421 "tl_conn_res:bad accepting endpoint")); 3422 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3423 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3424 freemsg(mp); 3425 return; 3426 } 3427 3428 /* 3429 * Prevent acceptor from closing. 3430 */ 3431 if (! tl_noclose(acc_ep)) { 3432 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3433 "tl_conn_res:bad accepting endpoint")); 3434 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3435 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3436 tl_refrele(acc_ep); 3437 freemsg(mp); 3438 return; 3439 } 3440 3441 acc_ep->te_flag |= TL_ACCEPTOR; 3442 3443 /* 3444 * validate that accepting endpoint, if different from listening 3445 * has address bound => state is TS_IDLE 3446 * TROUBLE in XPG4 !!? 3447 */ 3448 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3449 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3450 "tl_conn_res:accepting endpoint has no address bound," 3451 "state=%d", acc_ep->te_state)); 3452 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3453 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3454 freemsg(mp); 3455 tl_closeok(acc_ep); 3456 tl_refrele(acc_ep); 3457 return; 3458 } 3459 3460 /* 3461 * validate if accepting endpt same as listening, then 3462 * no other incoming connection should be on the queue 3463 */ 3464 3465 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3466 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3467 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3468 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3469 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3470 freemsg(mp); 3471 tl_closeok(acc_ep); 3472 tl_refrele(acc_ep); 3473 return; 3474 } 3475 3476 /* 3477 * Mark for deletion, the entry corresponding to client 3478 * on list of pending connections made by the listener 3479 * search list to see if client is one of the 3480 * recorded as a listener. 3481 */ 3482 tip = tl_icon_find(tep, cres->SEQ_number); 3483 if (tip == NULL) { 3484 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3485 "tl_conn_res:no client in listener list")); 3486 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3487 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3488 freemsg(mp); 3489 tl_closeok(acc_ep); 3490 tl_refrele(acc_ep); 3491 return; 3492 } 3493 3494 /* 3495 * If ti_tep is NULL the client has already closed. In this case 3496 * the code below will avoid any action on the client side 3497 * but complete the server and acceptor state transitions. 3498 */ 3499 ASSERT(tip->ti_tep == NULL || 3500 tip->ti_tep->te_seqno == cres->SEQ_number); 3501 cl_ep = tip->ti_tep; 3502 3503 /* 3504 * If the client is present it is switched from listener's to acceptor's 3505 * serializer. We should block client closes while serializers are 3506 * being switched. 3507 * 3508 * It is possible that the client is present but is currently being 3509 * closed. There are two possible cases: 3510 * 3511 * 1) The client has already entered tl_close_finish_ser() and sent 3512 * T_ORDREL_IND. In this case we can just ignore the client (but we 3513 * still need to send all messages from tip->ti_mp to the acceptor). 3514 * 3515 * 2) The client started the close but has not entered 3516 * tl_close_finish_ser() yet. In this case, the client is already 3517 * proceeding asynchronously on the listener's serializer, so we're 3518 * forced to change the acceptor to use the listener's serializer to 3519 * ensure that any operations on the acceptor are serialized with 3520 * respect to the close that's in-progress. 3521 */ 3522 if (cl_ep != NULL) { 3523 if (tl_noclose(cl_ep)) { 3524 client_noclose_set = B_TRUE; 3525 } else { 3526 /* 3527 * Client is closing. If it it has sent the 3528 * T_ORDREL_IND, we can simply ignore it - otherwise, 3529 * we have to let let the client continue until it is 3530 * sent. 3531 * 3532 * If we do continue using the client, acceptor will 3533 * switch to client's serializer which is used by client 3534 * for its close. 3535 */ 3536 tl_client_closing_when_accepting++; 3537 switch_client_serializer = B_FALSE; 3538 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3539 cl_ep->te_state == -1) 3540 cl_ep = NULL; 3541 } 3542 } 3543 3544 if (cl_ep != NULL) { 3545 /* 3546 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3547 * (latter for sockets only) 3548 */ 3549 if (cl_ep->te_state != TS_WCON_CREQ && 3550 (cl_ep->te_state != TS_DATA_XFER && 3551 IS_SOCKET(cl_ep))) { 3552 err = ECONNREFUSED; 3553 /* 3554 * T_DISCON_IND sent later after committing memory 3555 * and acking validity of request 3556 */ 3557 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3558 "tl_conn_res:peer in bad state")); 3559 } 3560 3561 /* 3562 * preallocate now for T_DISCON_IND or T_CONN_CONN 3563 * ack validity of request (T_OK_ACK) after memory committed 3564 */ 3565 3566 if (err) 3567 size = sizeof (struct T_discon_ind); 3568 else { 3569 /* 3570 * calculate length of T_CONN_CON message 3571 */ 3572 olen = 0; 3573 if (cl_ep->te_flag & TL_SETCRED) { 3574 olen = (t_scalar_t)sizeof (struct opthdr) + 3575 OPTLEN(sizeof (tl_credopt_t)); 3576 } else if (cl_ep->te_flag & TL_SETUCRED) { 3577 olen = (t_scalar_t)sizeof (struct opthdr) + 3578 OPTLEN(ucredsize); 3579 } 3580 size = T_ALIGN(sizeof (struct T_conn_con) + 3581 acc_ep->te_alen) + olen; 3582 } 3583 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3584 /* 3585 * roll back state changes 3586 */ 3587 tep->te_state = TS_WRES_CIND; 3588 tl_memrecover(wq, mp, size); 3589 freemsg(ackmp); 3590 if (client_noclose_set) 3591 tl_closeok(cl_ep); 3592 tl_closeok(acc_ep); 3593 tl_refrele(acc_ep); 3594 return; 3595 } 3596 mp = NULL; 3597 } 3598 3599 /* 3600 * Now ack validity of request 3601 */ 3602 if (tep->te_nicon == 1) { 3603 if (tep == acc_ep) 3604 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3605 else 3606 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3607 } else 3608 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3609 3610 /* 3611 * send T_DISCON_IND now if client state validation failed earlier 3612 */ 3613 if (err) { 3614 tl_ok_ack(wq, ackmp, prim); 3615 /* 3616 * flush the queues - why always ? 3617 */ 3618 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3619 3620 dimp = tl_resizemp(respmp, size); 3621 if (! dimp) { 3622 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3623 SL_TRACE|SL_ERROR, 3624 "tl_conn_res:con_ind:allocb failure")); 3625 tl_merror(wq, respmp, ENOMEM); 3626 tl_closeok(acc_ep); 3627 if (client_noclose_set) 3628 tl_closeok(cl_ep); 3629 tl_refrele(acc_ep); 3630 return; 3631 } 3632 if (dimp->b_cont) { 3633 /* no user data in provider generated discon ind */ 3634 freemsg(dimp->b_cont); 3635 dimp->b_cont = NULL; 3636 } 3637 3638 DB_TYPE(dimp) = M_PROTO; 3639 di = (struct T_discon_ind *)dimp->b_rptr; 3640 di->PRIM_type = T_DISCON_IND; 3641 di->DISCON_reason = err; 3642 di->SEQ_number = BADSEQNUM; 3643 3644 tep->te_state = TS_IDLE; 3645 /* 3646 * send T_DISCON_IND message 3647 */ 3648 putnext(acc_ep->te_rq, dimp); 3649 if (client_noclose_set) 3650 tl_closeok(cl_ep); 3651 tl_closeok(acc_ep); 3652 tl_refrele(acc_ep); 3653 return; 3654 } 3655 3656 /* 3657 * now start connecting the accepting endpoint 3658 */ 3659 if (tep != acc_ep) 3660 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3661 3662 if (cl_ep == NULL) { 3663 /* 3664 * The client has already closed. Send up any queued messages 3665 * and change the state accordingly. 3666 */ 3667 tl_ok_ack(wq, ackmp, prim); 3668 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3669 3670 /* 3671 * remove endpoint from incoming connection 3672 * delete client from list of incoming connections 3673 */ 3674 tl_freetip(tep, tip); 3675 freemsg(mp); 3676 tl_closeok(acc_ep); 3677 tl_refrele(acc_ep); 3678 return; 3679 } else if (tip->ti_mp != NULL) { 3680 /* 3681 * The client could have queued a T_DISCON_IND which needs 3682 * to be sent up. 3683 * Note that t_discon_req can not operate the same as 3684 * t_data_req since it is not possible for it to putbq 3685 * the message and return -1 due to the use of qwriter. 3686 */ 3687 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3688 } 3689 3690 /* 3691 * prepare connect confirm T_CONN_CON message 3692 */ 3693 3694 /* 3695 * allocate the message - original data blocks 3696 * retained in the returned mblk 3697 */ 3698 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3699 ccmp = tl_resizemp(respmp, size); 3700 if (ccmp == NULL) { 3701 tl_ok_ack(wq, ackmp, prim); 3702 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3703 SL_TRACE|SL_ERROR, 3704 "tl_conn_res:conn_con:allocb failure")); 3705 tl_merror(wq, respmp, ENOMEM); 3706 tl_closeok(acc_ep); 3707 if (client_noclose_set) 3708 tl_closeok(cl_ep); 3709 tl_refrele(acc_ep); 3710 return; 3711 } 3712 3713 DB_TYPE(ccmp) = M_PROTO; 3714 cc = (struct T_conn_con *)ccmp->b_rptr; 3715 cc->PRIM_type = T_CONN_CON; 3716 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3717 cc->RES_length = acc_ep->te_alen; 3718 addr_startp = ccmp->b_rptr + cc->RES_offset; 3719 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3720 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3721 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3722 cc->RES_length); 3723 cc->OPT_length = olen; 3724 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3725 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3726 cl_ep->te_credp); 3727 } else { 3728 cc->OPT_offset = 0; 3729 cc->OPT_length = 0; 3730 } 3731 /* 3732 * Forward the credential in the packet so it can be picked up 3733 * at the higher layers for more complete credential processing 3734 */ 3735 mblk_setcred(ccmp, acc_ep->te_credp); 3736 DB_CPID(ccmp) = acc_ep->te_cpid; 3737 } else { 3738 freemsg(respmp); 3739 respmp = NULL; 3740 } 3741 3742 /* 3743 * make connection linking 3744 * accepting and client endpoints 3745 * No need to increment references: 3746 * on client: it should already have one from tip->ti_tep linkage. 3747 * on acceptor is should already have one from the table lookup. 3748 * 3749 * At this point both client and acceptor can't close. Set client 3750 * serializer to acceptor's. 3751 */ 3752 ASSERT(cl_ep->te_refcnt >= 2); 3753 ASSERT(acc_ep->te_refcnt >= 2); 3754 ASSERT(cl_ep->te_conp == NULL); 3755 ASSERT(acc_ep->te_conp == NULL); 3756 cl_ep->te_conp = acc_ep; 3757 acc_ep->te_conp = cl_ep; 3758 ASSERT(cl_ep->te_ser == tep->te_ser); 3759 if (switch_client_serializer) { 3760 mutex_enter(&cl_ep->te_ser_lock); 3761 if (cl_ep->te_ser_count > 0) { 3762 switch_client_serializer = B_FALSE; 3763 tl_serializer_noswitch++; 3764 } else { 3765 /* 3766 * Move client to the acceptor's serializer. 3767 */ 3768 tl_serializer_refhold(acc_ep->te_ser); 3769 tl_serializer_refrele(cl_ep->te_ser); 3770 cl_ep->te_ser = acc_ep->te_ser; 3771 } 3772 mutex_exit(&cl_ep->te_ser_lock); 3773 } 3774 if (!switch_client_serializer) { 3775 /* 3776 * It is not possible to switch client to use acceptor's. 3777 * Move acceptor to client's serializer (which is the same as 3778 * listener's). 3779 */ 3780 tl_serializer_refhold(cl_ep->te_ser); 3781 tl_serializer_refrele(acc_ep->te_ser); 3782 acc_ep->te_ser = cl_ep->te_ser; 3783 } 3784 3785 TL_REMOVE_PEER(cl_ep->te_oconp); 3786 TL_REMOVE_PEER(acc_ep->te_oconp); 3787 3788 /* 3789 * remove endpoint from incoming connection 3790 * delete client from list of incoming connections 3791 */ 3792 tip->ti_tep = NULL; 3793 tl_freetip(tep, tip); 3794 tl_ok_ack(wq, ackmp, prim); 3795 3796 /* 3797 * data blocks already linked in reallocb() 3798 */ 3799 3800 /* 3801 * link queues so that I_SENDFD will work 3802 */ 3803 if (! IS_SOCKET(tep)) { 3804 acc_ep->te_wq->q_next = cl_ep->te_rq; 3805 cl_ep->te_wq->q_next = acc_ep->te_rq; 3806 } 3807 3808 /* 3809 * send T_CONN_CON up on client side unless it was already 3810 * done (for a socket). In cases any data or ordrel req has been 3811 * queued make sure that the service procedure runs. 3812 */ 3813 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3814 enableok(cl_ep->te_wq); 3815 TL_QENABLE(cl_ep); 3816 if (ccmp != NULL) 3817 freemsg(ccmp); 3818 } else { 3819 /* 3820 * change client state on TE_CONN_CON event 3821 */ 3822 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3823 putnext(cl_ep->te_rq, ccmp); 3824 } 3825 3826 /* Mark the both endpoints as accepted */ 3827 cl_ep->te_flag |= TL_ACCEPTED; 3828 acc_ep->te_flag |= TL_ACCEPTED; 3829 3830 /* 3831 * Allow client and acceptor to close. 3832 */ 3833 tl_closeok(acc_ep); 3834 if (client_noclose_set) 3835 tl_closeok(cl_ep); 3836 } 3837 3838 3839 3840 3841 static void 3842 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3843 { 3844 queue_t *wq; 3845 struct T_discon_req *dr; 3846 ssize_t msz; 3847 tl_endpt_t *peer_tep = tep->te_conp; 3848 tl_endpt_t *srv_tep = tep->te_oconp; 3849 tl_icon_t *tip; 3850 size_t size; 3851 mblk_t *ackmp, *dimp, *respmp; 3852 struct T_discon_ind *di; 3853 t_scalar_t save_state, new_state; 3854 3855 if (tep->te_closing) { 3856 freemsg(mp); 3857 return; 3858 } 3859 3860 if ((peer_tep != NULL) && peer_tep->te_closing) { 3861 TL_UNCONNECT(tep->te_conp); 3862 peer_tep = NULL; 3863 } 3864 if ((srv_tep != NULL) && srv_tep->te_closing) { 3865 TL_UNCONNECT(tep->te_oconp); 3866 srv_tep = NULL; 3867 } 3868 3869 wq = tep->te_wq; 3870 3871 /* 3872 * preallocate memory for: 3873 * 1. max of T_ERROR_ACK and T_OK_ACK 3874 * ==> known max T_ERROR_ACK 3875 * 2. for T_DISCON_IND 3876 */ 3877 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3878 if (! ackmp) { 3879 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3880 return; 3881 } 3882 /* 3883 * memory committed for T_OK_ACK/T_ERROR_ACK now 3884 * will be committed for T_DISCON_IND later 3885 */ 3886 3887 dr = (struct T_discon_req *)mp->b_rptr; 3888 msz = MBLKL(mp); 3889 3890 /* 3891 * validate the state 3892 */ 3893 save_state = new_state = tep->te_state; 3894 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3895 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3896 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3897 SL_TRACE|SL_ERROR, 3898 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3899 tep->te_state)); 3900 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3901 freemsg(mp); 3902 return; 3903 } 3904 /* 3905 * Defer committing the state change until it is determined if 3906 * the message will be queued with the tl_icon or not. 3907 */ 3908 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3909 3910 /* validate the message */ 3911 if (msz < sizeof (struct T_discon_req)) { 3912 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3913 "tl_discon_req:invalid message")); 3914 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3915 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3916 freemsg(mp); 3917 return; 3918 } 3919 3920 /* 3921 * if server, then validate that client exists 3922 * by connection sequence number etc. 3923 */ 3924 if (tep->te_nicon > 0) { /* server */ 3925 3926 /* 3927 * search server list for disconnect client 3928 */ 3929 tip = tl_icon_find(tep, dr->SEQ_number); 3930 if (tip == NULL) { 3931 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3932 SL_TRACE|SL_ERROR, 3933 "tl_discon_req:no disconnect endpoint")); 3934 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3935 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3936 freemsg(mp); 3937 return; 3938 } 3939 /* 3940 * If ti_tep is NULL the client has already closed. In this case 3941 * the code below will avoid any action on the client side. 3942 */ 3943 3944 ASSERT(IMPLY(tip->ti_tep != NULL, 3945 tip->ti_tep->te_seqno == dr->SEQ_number)); 3946 peer_tep = tip->ti_tep; 3947 } 3948 3949 /* 3950 * preallocate now for T_DISCON_IND 3951 * ack validity of request (T_OK_ACK) after memory committed 3952 */ 3953 size = sizeof (struct T_discon_ind); 3954 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3955 tl_memrecover(wq, mp, size); 3956 freemsg(ackmp); 3957 return; 3958 } 3959 3960 /* 3961 * prepare message to ack validity of request 3962 */ 3963 if (tep->te_nicon == 0) 3964 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3965 else 3966 if (tep->te_nicon == 1) 3967 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3968 else 3969 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3970 3971 /* 3972 * Flushing queues according to TPI. Using the old state. 3973 */ 3974 if ((tep->te_nicon <= 1) && 3975 ((save_state == TS_DATA_XFER) || 3976 (save_state == TS_WIND_ORDREL) || 3977 (save_state == TS_WREQ_ORDREL))) 3978 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3979 3980 /* send T_OK_ACK up */ 3981 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3982 3983 /* 3984 * now do disconnect business 3985 */ 3986 if (tep->te_nicon > 0) { /* listener */ 3987 if (peer_tep != NULL && !peer_tep->te_closing) { 3988 /* 3989 * disconnect incoming connect request pending to tep 3990 */ 3991 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 3992 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3993 SL_TRACE|SL_ERROR, 3994 "tl_discon_req: reallocb failed")); 3995 tep->te_state = new_state; 3996 tl_merror(wq, respmp, ENOMEM); 3997 return; 3998 } 3999 di = (struct T_discon_ind *)dimp->b_rptr; 4000 di->SEQ_number = BADSEQNUM; 4001 save_state = peer_tep->te_state; 4002 peer_tep->te_state = TS_IDLE; 4003 4004 TL_REMOVE_PEER(peer_tep->te_oconp); 4005 enableok(peer_tep->te_wq); 4006 TL_QENABLE(peer_tep); 4007 } else { 4008 freemsg(respmp); 4009 dimp = NULL; 4010 } 4011 4012 /* 4013 * remove endpoint from incoming connection list 4014 * - remove disconnect client from list on server 4015 */ 4016 tl_freetip(tep, tip); 4017 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4018 /* 4019 * disconnect an outgoing request pending from tep 4020 */ 4021 4022 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4023 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4024 SL_TRACE|SL_ERROR, 4025 "tl_discon_req: reallocb failed")); 4026 tep->te_state = new_state; 4027 tl_merror(wq, respmp, ENOMEM); 4028 return; 4029 } 4030 di = (struct T_discon_ind *)dimp->b_rptr; 4031 DB_TYPE(dimp) = M_PROTO; 4032 di->PRIM_type = T_DISCON_IND; 4033 di->DISCON_reason = ECONNRESET; 4034 di->SEQ_number = tep->te_seqno; 4035 4036 /* 4037 * If this is a socket the T_DISCON_IND is queued with 4038 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4039 * from the list of pending connections. 4040 * Note that when te_oconp is set the peer better have 4041 * a t_connind_t for the client. 4042 */ 4043 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4044 /* 4045 * No need to check that 4046 * ti_tep == NULL since the T_DISCON_IND 4047 * takes precedence over other queued 4048 * messages. 4049 */ 4050 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4051 peer_tep = NULL; 4052 dimp = NULL; 4053 /* 4054 * Can't clear te_oconp since tl_co_unconnect needs 4055 * it as a hint not to free the tep. 4056 * Keep the state unchanged since tl_conn_res inspects 4057 * it. 4058 */ 4059 new_state = tep->te_state; 4060 } else { 4061 /* Found - delete it */ 4062 tip = tl_icon_find(peer_tep, tep->te_seqno); 4063 if (tip != NULL) { 4064 ASSERT(tep == tip->ti_tep); 4065 save_state = peer_tep->te_state; 4066 if (peer_tep->te_nicon == 1) 4067 peer_tep->te_state = 4068 NEXTSTATE(TE_DISCON_IND2, 4069 peer_tep->te_state); 4070 else 4071 peer_tep->te_state = 4072 NEXTSTATE(TE_DISCON_IND3, 4073 peer_tep->te_state); 4074 tl_freetip(peer_tep, tip); 4075 } 4076 ASSERT(tep->te_oconp != NULL); 4077 TL_UNCONNECT(tep->te_oconp); 4078 } 4079 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4080 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4081 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4082 SL_TRACE|SL_ERROR, 4083 "tl_discon_req: reallocb failed")); 4084 tep->te_state = new_state; 4085 tl_merror(wq, respmp, ENOMEM); 4086 return; 4087 } 4088 di = (struct T_discon_ind *)dimp->b_rptr; 4089 di->SEQ_number = BADSEQNUM; 4090 4091 save_state = peer_tep->te_state; 4092 peer_tep->te_state = TS_IDLE; 4093 } else { 4094 /* Not connected */ 4095 tep->te_state = new_state; 4096 freemsg(respmp); 4097 return; 4098 } 4099 4100 /* Commit state changes */ 4101 tep->te_state = new_state; 4102 4103 if (peer_tep == NULL) { 4104 ASSERT(dimp == NULL); 4105 goto done; 4106 } 4107 /* 4108 * Flush queues on peer before sending up 4109 * T_DISCON_IND according to TPI 4110 */ 4111 4112 if ((save_state == TS_DATA_XFER) || 4113 (save_state == TS_WIND_ORDREL) || 4114 (save_state == TS_WREQ_ORDREL)) 4115 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4116 4117 DB_TYPE(dimp) = M_PROTO; 4118 di->PRIM_type = T_DISCON_IND; 4119 di->DISCON_reason = ECONNRESET; 4120 4121 /* 4122 * data blocks already linked into dimp by reallocb() 4123 */ 4124 /* 4125 * send indication message to peer user module 4126 */ 4127 ASSERT(dimp != NULL); 4128 putnext(peer_tep->te_rq, dimp); 4129 done: 4130 if (tep->te_conp) { /* disconnect pointers if connected */ 4131 ASSERT(! peer_tep->te_closing); 4132 4133 /* 4134 * Messages may be queued on peer's write queue 4135 * waiting to be processed by its write service 4136 * procedure. Before the pointer to the peer transport 4137 * structure is set to NULL, qenable the peer's write 4138 * queue so that the queued up messages are processed. 4139 */ 4140 if ((save_state == TS_DATA_XFER) || 4141 (save_state == TS_WIND_ORDREL) || 4142 (save_state == TS_WREQ_ORDREL)) 4143 TL_QENABLE(peer_tep); 4144 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4145 TL_UNCONNECT(peer_tep->te_conp); 4146 if (! IS_SOCKET(tep)) { 4147 /* 4148 * unlink the streams 4149 */ 4150 tep->te_wq->q_next = NULL; 4151 peer_tep->te_wq->q_next = NULL; 4152 } 4153 TL_UNCONNECT(tep->te_conp); 4154 } 4155 } 4156 4157 4158 static void 4159 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4160 { 4161 queue_t *wq; 4162 size_t ack_sz; 4163 mblk_t *ackmp; 4164 struct T_addr_ack *taa; 4165 4166 if (tep->te_closing) { 4167 freemsg(mp); 4168 return; 4169 } 4170 4171 wq = tep->te_wq; 4172 4173 /* 4174 * Note: T_ADDR_REQ message has only PRIM_type field 4175 * so it is already validated earlier. 4176 */ 4177 4178 if (IS_CLTS(tep) || 4179 (tep->te_state > TS_WREQ_ORDREL) || 4180 (tep->te_state < TS_DATA_XFER)) { 4181 /* 4182 * Either connectionless or connection oriented but not 4183 * in connected data transfer state or half-closed states. 4184 */ 4185 ack_sz = sizeof (struct T_addr_ack); 4186 if (tep->te_state >= TS_IDLE) 4187 /* is bound */ 4188 ack_sz += tep->te_alen; 4189 ackmp = reallocb(mp, ack_sz, 0); 4190 if (ackmp == NULL) { 4191 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4192 SL_TRACE|SL_ERROR, 4193 "tl_addr_req: reallocb failed")); 4194 tl_memrecover(wq, mp, ack_sz); 4195 return; 4196 } 4197 4198 taa = (struct T_addr_ack *)ackmp->b_rptr; 4199 4200 bzero(taa, sizeof (struct T_addr_ack)); 4201 4202 taa->PRIM_type = T_ADDR_ACK; 4203 ackmp->b_datap->db_type = M_PCPROTO; 4204 ackmp->b_wptr = (uchar_t *)&taa[1]; 4205 4206 if (tep->te_state >= TS_IDLE) { 4207 /* endpoint is bound */ 4208 taa->LOCADDR_length = tep->te_alen; 4209 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4210 4211 bcopy(tep->te_abuf, ackmp->b_wptr, 4212 tep->te_alen); 4213 ackmp->b_wptr += tep->te_alen; 4214 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4215 } 4216 4217 (void) qreply(wq, ackmp); 4218 } else { 4219 ASSERT(tep->te_state == TS_DATA_XFER || 4220 tep->te_state == TS_WIND_ORDREL || 4221 tep->te_state == TS_WREQ_ORDREL); 4222 /* connection oriented in data transfer */ 4223 tl_connected_cots_addr_req(mp, tep); 4224 } 4225 } 4226 4227 4228 static void 4229 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4230 { 4231 tl_endpt_t *peer_tep; 4232 size_t ack_sz; 4233 mblk_t *ackmp; 4234 struct T_addr_ack *taa; 4235 uchar_t *addr_startp; 4236 4237 if (tep->te_closing) { 4238 freemsg(mp); 4239 return; 4240 } 4241 4242 ASSERT(tep->te_state >= TS_IDLE); 4243 4244 ack_sz = sizeof (struct T_addr_ack); 4245 ack_sz += T_ALIGN(tep->te_alen); 4246 peer_tep = tep->te_conp; 4247 ack_sz += peer_tep->te_alen; 4248 4249 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4250 if (ackmp == NULL) { 4251 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4252 "tl_connected_cots_addr_req: reallocb failed")); 4253 tl_memrecover(tep->te_wq, mp, ack_sz); 4254 return; 4255 } 4256 4257 taa = (struct T_addr_ack *)ackmp->b_rptr; 4258 4259 /* endpoint is bound */ 4260 taa->LOCADDR_length = tep->te_alen; 4261 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4262 4263 addr_startp = (uchar_t *)&taa[1]; 4264 4265 bcopy(tep->te_abuf, addr_startp, 4266 tep->te_alen); 4267 4268 taa->REMADDR_length = peer_tep->te_alen; 4269 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4270 taa->LOCADDR_length); 4271 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4272 bcopy(peer_tep->te_abuf, addr_startp, 4273 peer_tep->te_alen); 4274 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4275 taa->REMADDR_offset + peer_tep->te_alen; 4276 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4277 4278 putnext(tep->te_rq, ackmp); 4279 } 4280 4281 static void 4282 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4283 { 4284 if (IS_CLTS(tep)) { 4285 *ia = tl_clts_info_ack; 4286 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4287 } else { 4288 *ia = tl_cots_info_ack; 4289 if (IS_COTSORD(tep)) 4290 ia->SERV_type = T_COTS_ORD; 4291 } 4292 ia->TIDU_size = tl_tidusz; 4293 ia->CURRENT_state = tep->te_state; 4294 } 4295 4296 /* 4297 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4298 * tl_wput. 4299 */ 4300 static void 4301 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4302 { 4303 mblk_t *ackmp; 4304 t_uscalar_t cap_bits1; 4305 struct T_capability_ack *tcap; 4306 4307 if (tep->te_closing) { 4308 freemsg(mp); 4309 return; 4310 } 4311 4312 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4313 4314 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4315 M_PCPROTO, T_CAPABILITY_ACK); 4316 if (ackmp == NULL) { 4317 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4318 "tl_capability_req: reallocb failed")); 4319 tl_memrecover(tep->te_wq, mp, 4320 sizeof (struct T_capability_ack)); 4321 return; 4322 } 4323 4324 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4325 tcap->CAP_bits1 = 0; 4326 4327 if (cap_bits1 & TC1_INFO) { 4328 tl_copy_info(&tcap->INFO_ack, tep); 4329 tcap->CAP_bits1 |= TC1_INFO; 4330 } 4331 4332 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4333 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4334 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4335 } 4336 4337 putnext(tep->te_rq, ackmp); 4338 } 4339 4340 static void 4341 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4342 { 4343 if (! tep->te_closing) 4344 tl_info_req(mp, tep); 4345 else 4346 freemsg(mp); 4347 4348 tl_serializer_exit(tep); 4349 tl_refrele(tep); 4350 } 4351 4352 static void 4353 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4354 { 4355 mblk_t *ackmp; 4356 4357 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4358 M_PCPROTO, T_INFO_ACK); 4359 if (ackmp == NULL) { 4360 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4361 "tl_info_req: reallocb failed")); 4362 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4363 return; 4364 } 4365 4366 /* 4367 * fill in T_INFO_ACK contents 4368 */ 4369 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4370 4371 /* 4372 * send ack message 4373 */ 4374 putnext(tep->te_rq, ackmp); 4375 } 4376 4377 /* 4378 * Handle M_DATA, T_data_req and T_optdata_req. 4379 * If this is a socket pass through T_optdata_req options unmodified. 4380 */ 4381 static void 4382 tl_data(mblk_t *mp, tl_endpt_t *tep) 4383 { 4384 queue_t *wq = tep->te_wq; 4385 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4386 ssize_t msz = MBLKL(mp); 4387 tl_endpt_t *peer_tep; 4388 queue_t *peer_rq; 4389 boolean_t closing = tep->te_closing; 4390 4391 if (IS_CLTS(tep)) { 4392 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4393 SL_TRACE|SL_ERROR, 4394 "tl_wput:clts:unattached M_DATA")); 4395 if (!closing) { 4396 tl_merror(wq, mp, EPROTO); 4397 } else { 4398 freemsg(mp); 4399 } 4400 return; 4401 } 4402 4403 /* 4404 * If the endpoint is closing it should still forward any data to the 4405 * peer (if it has one). If it is not allowed to forward it can just 4406 * free the message. 4407 */ 4408 if (closing && 4409 (tep->te_state != TS_DATA_XFER) && 4410 (tep->te_state != TS_WREQ_ORDREL)) { 4411 freemsg(mp); 4412 return; 4413 } 4414 4415 if (DB_TYPE(mp) == M_PROTO) { 4416 if (prim->type == T_DATA_REQ && 4417 msz < sizeof (struct T_data_req)) { 4418 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4419 SL_TRACE|SL_ERROR, 4420 "tl_data:T_DATA_REQ:invalid message")); 4421 if (!closing) { 4422 tl_merror(wq, mp, EPROTO); 4423 } else { 4424 freemsg(mp); 4425 } 4426 return; 4427 } else if (prim->type == T_OPTDATA_REQ && 4428 (msz < sizeof (struct T_optdata_req) || 4429 !IS_SOCKET(tep))) { 4430 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4431 SL_TRACE|SL_ERROR, 4432 "tl_data:T_OPTDATA_REQ:invalid message")); 4433 if (!closing) { 4434 tl_merror(wq, mp, EPROTO); 4435 } else { 4436 freemsg(mp); 4437 } 4438 return; 4439 } 4440 } 4441 4442 /* 4443 * connection oriented provider 4444 */ 4445 switch (tep->te_state) { 4446 case TS_IDLE: 4447 /* 4448 * Other end not here - do nothing. 4449 */ 4450 freemsg(mp); 4451 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4452 "tl_data:cots with endpoint idle")); 4453 return; 4454 4455 case TS_DATA_XFER: 4456 /* valid states */ 4457 if (tep->te_conp != NULL) 4458 break; 4459 4460 if (tep->te_oconp == NULL) { 4461 if (!closing) { 4462 tl_merror(wq, mp, EPROTO); 4463 } else { 4464 freemsg(mp); 4465 } 4466 return; 4467 } 4468 /* 4469 * For a socket the T_CONN_CON is sent early thus 4470 * the peer might not yet have accepted the connection. 4471 * If we are closing queue the packet with the T_CONN_IND. 4472 * Otherwise defer processing the packet until the peer 4473 * accepts the connection. 4474 * Note that the queue is noenabled when we go into this 4475 * state. 4476 */ 4477 if (!closing) { 4478 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4479 SL_TRACE|SL_ERROR, 4480 "tl_data: ocon")); 4481 TL_PUTBQ(tep, mp); 4482 return; 4483 } 4484 if (DB_TYPE(mp) == M_PROTO) { 4485 if (msz < sizeof (t_scalar_t)) { 4486 freemsg(mp); 4487 return; 4488 } 4489 /* reuse message block - just change REQ to IND */ 4490 if (prim->type == T_DATA_REQ) 4491 prim->type = T_DATA_IND; 4492 else 4493 prim->type = T_OPTDATA_IND; 4494 } 4495 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4496 return; 4497 4498 case TS_WREQ_ORDREL: 4499 if (tep->te_conp == NULL) { 4500 /* 4501 * Other end closed - generate discon_ind 4502 * with reason 0 to cause an EPIPE but no 4503 * read side error on AF_UNIX sockets. 4504 */ 4505 freemsg(mp); 4506 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4507 SL_TRACE|SL_ERROR, 4508 "tl_data: WREQ_ORDREL and no peer")); 4509 tl_discon_ind(tep, 0); 4510 return; 4511 } 4512 break; 4513 4514 default: 4515 /* invalid state for event TE_DATA_REQ */ 4516 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4517 "tl_data:cots:out of state")); 4518 tl_merror(wq, mp, EPROTO); 4519 return; 4520 } 4521 /* 4522 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4523 * (State stays same on this event) 4524 */ 4525 4526 /* 4527 * get connected endpoint 4528 */ 4529 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4530 freemsg(mp); 4531 /* Peer closed */ 4532 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4533 "tl_data: peer gone")); 4534 return; 4535 } 4536 4537 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4538 peer_rq = peer_tep->te_rq; 4539 4540 /* 4541 * Put it back if flow controlled 4542 * Note: Messages already on queue when we are closing is bounded 4543 * so we can ignore flow control. 4544 */ 4545 if (!canputnext(peer_rq) && !closing) { 4546 TL_PUTBQ(tep, mp); 4547 return; 4548 } 4549 4550 /* 4551 * validate peer state 4552 */ 4553 switch (peer_tep->te_state) { 4554 case TS_DATA_XFER: 4555 case TS_WIND_ORDREL: 4556 /* valid states */ 4557 break; 4558 default: 4559 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4560 "tl_data:rx side:invalid state")); 4561 tl_merror(peer_tep->te_wq, mp, EPROTO); 4562 return; 4563 } 4564 if (DB_TYPE(mp) == M_PROTO) { 4565 /* reuse message block - just change REQ to IND */ 4566 if (prim->type == T_DATA_REQ) 4567 prim->type = T_DATA_IND; 4568 else 4569 prim->type = T_OPTDATA_IND; 4570 } 4571 /* 4572 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4573 * (peer state stays same on this event) 4574 */ 4575 /* 4576 * send data to connected peer 4577 */ 4578 putnext(peer_rq, mp); 4579 } 4580 4581 4582 4583 static void 4584 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4585 { 4586 queue_t *wq = tep->te_wq; 4587 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4588 ssize_t msz = MBLKL(mp); 4589 tl_endpt_t *peer_tep; 4590 queue_t *peer_rq; 4591 boolean_t closing = tep->te_closing; 4592 4593 if (msz < sizeof (struct T_exdata_req)) { 4594 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4595 "tl_exdata:invalid message")); 4596 if (!closing) { 4597 tl_merror(wq, mp, EPROTO); 4598 } else { 4599 freemsg(mp); 4600 } 4601 return; 4602 } 4603 4604 /* 4605 * If the endpoint is closing it should still forward any data to the 4606 * peer (if it has one). If it is not allowed to forward it can just 4607 * free the message. 4608 */ 4609 if (closing && 4610 (tep->te_state != TS_DATA_XFER) && 4611 (tep->te_state != TS_WREQ_ORDREL)) { 4612 freemsg(mp); 4613 return; 4614 } 4615 4616 /* 4617 * validate state 4618 */ 4619 switch (tep->te_state) { 4620 case TS_IDLE: 4621 /* 4622 * Other end not here - do nothing. 4623 */ 4624 freemsg(mp); 4625 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4626 "tl_exdata:cots with endpoint idle")); 4627 return; 4628 4629 case TS_DATA_XFER: 4630 /* valid states */ 4631 if (tep->te_conp != NULL) 4632 break; 4633 4634 if (tep->te_oconp == NULL) { 4635 if (!closing) { 4636 tl_merror(wq, mp, EPROTO); 4637 } else { 4638 freemsg(mp); 4639 } 4640 return; 4641 } 4642 /* 4643 * For a socket the T_CONN_CON is sent early thus 4644 * the peer might not yet have accepted the connection. 4645 * If we are closing queue the packet with the T_CONN_IND. 4646 * Otherwise defer processing the packet until the peer 4647 * accepts the connection. 4648 * Note that the queue is noenabled when we go into this 4649 * state. 4650 */ 4651 if (!closing) { 4652 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4653 SL_TRACE|SL_ERROR, 4654 "tl_exdata: ocon")); 4655 TL_PUTBQ(tep, mp); 4656 return; 4657 } 4658 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4659 "tl_exdata: closing socket ocon")); 4660 prim->type = T_EXDATA_IND; 4661 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4662 return; 4663 4664 case TS_WREQ_ORDREL: 4665 if (tep->te_conp == NULL) { 4666 /* 4667 * Other end closed - generate discon_ind 4668 * with reason 0 to cause an EPIPE but no 4669 * read side error on AF_UNIX sockets. 4670 */ 4671 freemsg(mp); 4672 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4673 SL_TRACE|SL_ERROR, 4674 "tl_exdata: WREQ_ORDREL and no peer")); 4675 tl_discon_ind(tep, 0); 4676 return; 4677 } 4678 break; 4679 4680 default: 4681 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4682 SL_TRACE|SL_ERROR, 4683 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4684 tep->te_state)); 4685 tl_merror(wq, mp, EPROTO); 4686 return; 4687 } 4688 /* 4689 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4690 * (state stays same on this event) 4691 */ 4692 4693 /* 4694 * get connected endpoint 4695 */ 4696 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4697 freemsg(mp); 4698 /* Peer closed */ 4699 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4700 "tl_exdata: peer gone")); 4701 return; 4702 } 4703 4704 peer_rq = peer_tep->te_rq; 4705 4706 /* 4707 * Put it back if flow controlled 4708 * Note: Messages already on queue when we are closing is bounded 4709 * so we can ignore flow control. 4710 */ 4711 if (!canputnext(peer_rq) && !closing) { 4712 TL_PUTBQ(tep, mp); 4713 return; 4714 } 4715 4716 /* 4717 * validate state on peer 4718 */ 4719 switch (peer_tep->te_state) { 4720 case TS_DATA_XFER: 4721 case TS_WIND_ORDREL: 4722 /* valid states */ 4723 break; 4724 default: 4725 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4726 "tl_exdata:rx side:invalid state")); 4727 tl_merror(peer_tep->te_wq, mp, EPROTO); 4728 return; 4729 } 4730 /* 4731 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4732 * (peer state stays same on this event) 4733 */ 4734 /* 4735 * reuse message block 4736 */ 4737 prim->type = T_EXDATA_IND; 4738 4739 /* 4740 * send data to connected peer 4741 */ 4742 putnext(peer_rq, mp); 4743 } 4744 4745 4746 4747 static void 4748 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4749 { 4750 queue_t *wq = tep->te_wq; 4751 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4752 ssize_t msz = MBLKL(mp); 4753 tl_endpt_t *peer_tep; 4754 queue_t *peer_rq; 4755 boolean_t closing = tep->te_closing; 4756 4757 if (msz < sizeof (struct T_ordrel_req)) { 4758 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4759 "tl_ordrel:invalid message")); 4760 if (!closing) { 4761 tl_merror(wq, mp, EPROTO); 4762 } else { 4763 freemsg(mp); 4764 } 4765 return; 4766 } 4767 4768 /* 4769 * validate state 4770 */ 4771 switch (tep->te_state) { 4772 case TS_DATA_XFER: 4773 case TS_WREQ_ORDREL: 4774 /* valid states */ 4775 if (tep->te_conp != NULL) 4776 break; 4777 4778 if (tep->te_oconp == NULL) 4779 break; 4780 4781 /* 4782 * For a socket the T_CONN_CON is sent early thus 4783 * the peer might not yet have accepted the connection. 4784 * If we are closing queue the packet with the T_CONN_IND. 4785 * Otherwise defer processing the packet until the peer 4786 * accepts the connection. 4787 * Note that the queue is noenabled when we go into this 4788 * state. 4789 */ 4790 if (!closing) { 4791 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4792 SL_TRACE|SL_ERROR, 4793 "tl_ordlrel: ocon")); 4794 TL_PUTBQ(tep, mp); 4795 return; 4796 } 4797 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4798 "tl_ordlrel: closing socket ocon")); 4799 prim->type = T_ORDREL_IND; 4800 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4801 return; 4802 4803 default: 4804 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4805 SL_TRACE|SL_ERROR, 4806 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4807 tep->te_state)); 4808 if (!closing) { 4809 tl_merror(wq, mp, EPROTO); 4810 } else { 4811 freemsg(mp); 4812 } 4813 return; 4814 } 4815 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4816 4817 /* 4818 * get connected endpoint 4819 */ 4820 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4821 /* Peer closed */ 4822 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4823 "tl_ordrel: peer gone")); 4824 freemsg(mp); 4825 return; 4826 } 4827 4828 peer_rq = peer_tep->te_rq; 4829 4830 /* 4831 * Put it back if flow controlled except when we are closing. 4832 * Note: Messages already on queue when we are closing is bounded 4833 * so we can ignore flow control. 4834 */ 4835 if (! canputnext(peer_rq) && !closing) { 4836 TL_PUTBQ(tep, mp); 4837 return; 4838 } 4839 4840 /* 4841 * validate state on peer 4842 */ 4843 switch (peer_tep->te_state) { 4844 case TS_DATA_XFER: 4845 case TS_WIND_ORDREL: 4846 /* valid states */ 4847 break; 4848 default: 4849 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4850 "tl_ordrel:rx side:invalid state")); 4851 tl_merror(peer_tep->te_wq, mp, EPROTO); 4852 return; 4853 } 4854 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4855 4856 /* 4857 * reuse message block 4858 */ 4859 prim->type = T_ORDREL_IND; 4860 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4861 "tl_ordrel: send ordrel_ind")); 4862 4863 /* 4864 * send data to connected peer 4865 */ 4866 putnext(peer_rq, mp); 4867 } 4868 4869 4870 /* 4871 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4872 */ 4873 static void 4874 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4875 { 4876 size_t err_sz; 4877 tl_endpt_t *tep; 4878 struct T_unitdata_req *udreq; 4879 mblk_t *err_mp; 4880 t_scalar_t alen; 4881 t_scalar_t olen; 4882 struct T_uderror_ind *uderr; 4883 uchar_t *addr_startp; 4884 4885 err_sz = sizeof (struct T_uderror_ind); 4886 tep = (tl_endpt_t *)wq->q_ptr; 4887 udreq = (struct T_unitdata_req *)mp->b_rptr; 4888 alen = udreq->DEST_length; 4889 olen = udreq->OPT_length; 4890 4891 if (alen > 0) 4892 err_sz = T_ALIGN(err_sz + alen); 4893 if (olen > 0) 4894 err_sz += olen; 4895 4896 err_mp = allocb(err_sz, BPRI_MED); 4897 if (! err_mp) { 4898 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4899 "tl_uderr:allocb failure")); 4900 /* 4901 * Note: no rollback of state needed as it does 4902 * not change in connectionless transport 4903 */ 4904 tl_memrecover(wq, mp, err_sz); 4905 return; 4906 } 4907 4908 DB_TYPE(err_mp) = M_PROTO; 4909 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4910 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4911 uderr->PRIM_type = T_UDERROR_IND; 4912 uderr->ERROR_type = err; 4913 uderr->DEST_length = alen; 4914 uderr->OPT_length = olen; 4915 if (alen <= 0) { 4916 uderr->DEST_offset = 0; 4917 } else { 4918 uderr->DEST_offset = 4919 (t_scalar_t)sizeof (struct T_uderror_ind); 4920 addr_startp = mp->b_rptr + udreq->DEST_offset; 4921 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4922 (size_t)alen); 4923 } 4924 if (olen <= 0) { 4925 uderr->OPT_offset = 0; 4926 } else { 4927 uderr->OPT_offset = 4928 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4929 uderr->DEST_length); 4930 addr_startp = mp->b_rptr + udreq->OPT_offset; 4931 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4932 (size_t)olen); 4933 } 4934 freemsg(mp); 4935 4936 /* 4937 * send indication message 4938 */ 4939 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4940 4941 qreply(wq, err_mp); 4942 } 4943 4944 static void 4945 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4946 { 4947 queue_t *wq = tep->te_wq; 4948 4949 if (!tep->te_closing && (wq->q_first != NULL)) { 4950 TL_PUTQ(tep, mp); 4951 } else if (tep->te_rq != NULL) 4952 tl_unitdata(mp, tep); 4953 else 4954 freemsg(mp); 4955 4956 tl_serializer_exit(tep); 4957 tl_refrele(tep); 4958 } 4959 4960 /* 4961 * Handle T_unitdata_req. 4962 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4963 * If this is a socket pass through options unmodified. 4964 */ 4965 static void 4966 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4967 { 4968 queue_t *wq = tep->te_wq; 4969 soux_addr_t ux_addr; 4970 tl_addr_t destaddr; 4971 uchar_t *addr_startp; 4972 tl_endpt_t *peer_tep; 4973 struct T_unitdata_ind *udind; 4974 struct T_unitdata_req *udreq; 4975 ssize_t msz, ui_sz; 4976 t_scalar_t alen, aoff, olen, ooff; 4977 t_scalar_t oldolen = 0; 4978 4979 udreq = (struct T_unitdata_req *)mp->b_rptr; 4980 msz = MBLKL(mp); 4981 4982 /* 4983 * validate the state 4984 */ 4985 if (tep->te_state != TS_IDLE) { 4986 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4987 SL_TRACE|SL_ERROR, 4988 "tl_wput:T_CONN_REQ:out of state")); 4989 tl_merror(wq, mp, EPROTO); 4990 return; 4991 } 4992 /* 4993 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 4994 * (state does not change on this event) 4995 */ 4996 4997 /* 4998 * validate the message 4999 * Note: dereference fields in struct inside message only 5000 * after validating the message length. 5001 */ 5002 if (msz < sizeof (struct T_unitdata_req)) { 5003 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5004 "tl_unitdata:invalid message length")); 5005 tl_merror(wq, mp, EINVAL); 5006 return; 5007 } 5008 alen = udreq->DEST_length; 5009 aoff = udreq->DEST_offset; 5010 oldolen = olen = udreq->OPT_length; 5011 ooff = udreq->OPT_offset; 5012 if (olen == 0) 5013 ooff = 0; 5014 5015 if (IS_SOCKET(tep)) { 5016 if ((alen != TL_SOUX_ADDRLEN) || 5017 (aoff < 0) || 5018 (aoff + alen > msz) || 5019 (olen < 0) || (ooff < 0) || 5020 ((olen > 0) && ((ooff + olen) > msz))) { 5021 (void) (STRLOG(TL_ID, tep->te_minor, 5022 1, SL_TRACE|SL_ERROR, 5023 "tl_unitdata_req: invalid socket addr " 5024 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5025 (int)msz, alen, aoff, olen, ooff)); 5026 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5027 return; 5028 } 5029 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5030 5031 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5032 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5033 (void) (STRLOG(TL_ID, tep->te_minor, 5034 1, SL_TRACE|SL_ERROR, 5035 "tl_conn_req: invalid socket magic")); 5036 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5037 return; 5038 } 5039 } else { 5040 if ((alen < 0) || 5041 (aoff < 0) || 5042 ((alen > 0) && ((aoff + alen) > msz)) || 5043 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5044 ((aoff + alen) < 0) || 5045 ((olen > 0) && ((ooff + olen) > msz)) || 5046 (olen < 0) || 5047 (ooff < 0) || 5048 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5049 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5050 SL_TRACE|SL_ERROR, 5051 "tl_unitdata:invalid unit data message")); 5052 tl_merror(wq, mp, EINVAL); 5053 return; 5054 } 5055 } 5056 5057 /* Options not supported unless it's a socket */ 5058 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5059 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5060 "tl_unitdata:option use(unsupported) or zero len addr")); 5061 tl_uderr(wq, mp, EPROTO); 5062 return; 5063 } 5064 #ifdef DEBUG 5065 /* 5066 * Mild form of ASSERT()ion to detect broken TPI apps. 5067 * if (! assertion) 5068 * log warning; 5069 */ 5070 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5071 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5072 "tl_unitdata:addr overlaps TPI message")); 5073 } 5074 #endif 5075 /* 5076 * get destination endpoint 5077 */ 5078 destaddr.ta_alen = alen; 5079 destaddr.ta_abuf = mp->b_rptr + aoff; 5080 destaddr.ta_zoneid = tep->te_zoneid; 5081 5082 /* 5083 * Check whether the destination is the same that was used previously 5084 * and the destination endpoint is in the right state. If something is 5085 * wrong, find destination again and cache it. 5086 */ 5087 peer_tep = tep->te_lastep; 5088 5089 if ((peer_tep == NULL) || peer_tep->te_closing || 5090 (peer_tep->te_state != TS_IDLE) || 5091 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5092 /* 5093 * Not the same as cached destination , need to find the right 5094 * destination. 5095 */ 5096 peer_tep = (IS_SOCKET(tep) ? 5097 tl_sock_find_peer(tep, &ux_addr) : 5098 tl_find_peer(tep, &destaddr)); 5099 5100 if (peer_tep == NULL) { 5101 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5102 SL_TRACE|SL_ERROR, 5103 "tl_unitdata:no one at destination address")); 5104 tl_uderr(wq, mp, ECONNRESET); 5105 return; 5106 } 5107 5108 /* 5109 * Cache the new peer. 5110 */ 5111 if (tep->te_lastep != NULL) 5112 tl_refrele(tep->te_lastep); 5113 5114 tep->te_lastep = peer_tep; 5115 } 5116 5117 if (peer_tep->te_state != TS_IDLE) { 5118 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5119 "tl_unitdata:provider in invalid state")); 5120 tl_uderr(wq, mp, EPROTO); 5121 return; 5122 } 5123 5124 ASSERT(peer_tep->te_rq != NULL); 5125 5126 /* 5127 * Put it back if flow controlled except when we are closing. 5128 * Note: Messages already on queue when we are closing is bounded 5129 * so we can ignore flow control. 5130 */ 5131 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5132 /* record what we are flow controlled on */ 5133 if (tep->te_flowq != NULL) { 5134 list_remove(&tep->te_flowq->te_flowlist, tep); 5135 } 5136 list_insert_head(&peer_tep->te_flowlist, tep); 5137 tep->te_flowq = peer_tep; 5138 TL_PUTBQ(tep, mp); 5139 return; 5140 } 5141 /* 5142 * prepare indication message 5143 */ 5144 5145 /* 5146 * calculate length of message 5147 */ 5148 if (peer_tep->te_flag & TL_SETCRED) { 5149 ASSERT(olen == 0); 5150 olen = (t_scalar_t)sizeof (struct opthdr) + 5151 OPTLEN(sizeof (tl_credopt_t)); 5152 /* 1 option only */ 5153 } else if (peer_tep->te_flag & TL_SETUCRED) { 5154 ASSERT(olen == 0); 5155 olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize); 5156 /* 1 option only */ 5157 } else if (peer_tep->te_flag & TL_SOCKUCRED) { 5158 /* Possibly more than one option */ 5159 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5160 OPTLEN(ucredsize); 5161 } 5162 5163 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5164 olen; 5165 /* 5166 * If the unitdata_ind fits and we are not adding options 5167 * reuse the udreq mblk. 5168 */ 5169 if (msz >= ui_sz && alen >= tep->te_alen && 5170 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5171 /* 5172 * Reuse the original mblk. Leave options in place. 5173 */ 5174 udind = (struct T_unitdata_ind *)mp->b_rptr; 5175 udind->PRIM_type = T_UNITDATA_IND; 5176 udind->SRC_length = tep->te_alen; 5177 addr_startp = mp->b_rptr + udind->SRC_offset; 5178 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5179 } else { 5180 /* Allocate a new T_unidata_ind message */ 5181 mblk_t *ui_mp; 5182 5183 ui_mp = allocb(ui_sz, BPRI_MED); 5184 if (! ui_mp) { 5185 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5186 "tl_unitdata:allocb failure:message queued")); 5187 tl_memrecover(wq, mp, ui_sz); 5188 return; 5189 } 5190 5191 /* 5192 * fill in T_UNITDATA_IND contents 5193 */ 5194 DB_TYPE(ui_mp) = M_PROTO; 5195 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5196 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5197 udind->PRIM_type = T_UNITDATA_IND; 5198 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5199 udind->SRC_length = tep->te_alen; 5200 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5201 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5202 udind->OPT_offset = 5203 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5204 udind->OPT_length = olen; 5205 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5206 if (oldolen != 0) { 5207 bcopy((void *)((uintptr_t)udreq + ooff), 5208 (void *)((uintptr_t)udind + 5209 udind->OPT_offset), 5210 oldolen); 5211 } 5212 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5213 oldolen, 5214 DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep), 5215 peer_tep->te_flag, peer_tep->te_credp); 5216 } else { 5217 bcopy((void *)((uintptr_t)udreq + ooff), 5218 (void *)((uintptr_t)udind + udind->OPT_offset), 5219 olen); 5220 } 5221 5222 /* 5223 * relink data blocks from mp to ui_mp 5224 */ 5225 ui_mp->b_cont = mp->b_cont; 5226 freeb(mp); 5227 mp = ui_mp; 5228 } 5229 /* 5230 * send indication message 5231 */ 5232 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5233 putnext(peer_tep->te_rq, mp); 5234 } 5235 5236 5237 5238 /* 5239 * Check if a given addr is in use. 5240 * Endpoint ptr returned or NULL if not found. 5241 * The name space is separate for each mode. This implies that 5242 * sockets get their own name space. 5243 */ 5244 static tl_endpt_t * 5245 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5246 { 5247 tl_endpt_t *peer_tep = NULL; 5248 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5249 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5250 5251 ASSERT(! IS_SOCKET(tep)); 5252 5253 ASSERT(ap != NULL && ap->ta_alen > 0); 5254 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5255 ASSERT(ap->ta_abuf != NULL); 5256 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5257 ASSERT(IMPLY(rc == 0, 5258 (tep->te_zoneid == peer_tep->te_zoneid) && 5259 (tep->te_transport == peer_tep->te_transport))); 5260 5261 if ((rc == 0) && (peer_tep->te_closing)) { 5262 tl_refrele(peer_tep); 5263 peer_tep = NULL; 5264 } 5265 5266 return (peer_tep); 5267 } 5268 5269 /* 5270 * Find peer for a socket based on unix domain address. 5271 * For implicit addresses our peer can be found by minor number in ai hash. For 5272 * explici binds we look vnode address at addr_hash. 5273 */ 5274 static tl_endpt_t * 5275 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5276 { 5277 tl_endpt_t *peer_tep = NULL; 5278 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5279 tep->te_aihash : tep->te_addrhash; 5280 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5281 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5282 5283 ASSERT(IS_SOCKET(tep)); 5284 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5285 ASSERT(IMPLY(rc == 0, 5286 (tep->te_zoneid == peer_tep->te_zoneid) && 5287 (tep->te_transport == peer_tep->te_transport))); 5288 /* 5289 * Don't attempt to use closing peer. 5290 */ 5291 if ((peer_tep != NULL) && 5292 (peer_tep->te_closing || 5293 (peer_tep->te_zoneid != tep->te_zoneid))) { 5294 tl_refrele(peer_tep); 5295 peer_tep = NULL; 5296 } 5297 5298 return (peer_tep); 5299 } 5300 5301 /* 5302 * Generate a free addr and return it in struct pointed by ap 5303 * but allocating space for address buffer. 5304 * The generated address will be at least 4 bytes long and, if req->ta_alen 5305 * exceeds 4 bytes, be req->ta_alen bytes long. 5306 * 5307 * If address is found it will be inserted in the hash. 5308 * 5309 * If req->ta_alen is larger than the default alen (4 bytes) the last 5310 * alen-4 bytes will always be the same as in req. 5311 * 5312 * Return 0 for failure. 5313 * Return non-zero for success. 5314 */ 5315 static boolean_t 5316 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5317 { 5318 t_scalar_t alen; 5319 uint32_t loopcnt; /* Limit loop to 2^32 */ 5320 5321 ASSERT(tep->te_hash_hndl != NULL); 5322 ASSERT(! IS_SOCKET(tep)); 5323 5324 if (tep->te_hash_hndl == NULL) 5325 return (B_FALSE); 5326 5327 /* 5328 * check if default addr is in use 5329 * if it is - bump it and try again 5330 */ 5331 if (req == NULL) { 5332 alen = sizeof (uint32_t); 5333 } else { 5334 alen = max(req->ta_alen, sizeof (uint32_t)); 5335 ASSERT(tep->te_zoneid == req->ta_zoneid); 5336 } 5337 5338 if (tep->te_alen < alen) { 5339 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5340 5341 /* 5342 * Not enough space in tep->ta_ap to hold the address, 5343 * allocate a bigger space. 5344 */ 5345 if (abuf == NULL) 5346 return (B_FALSE); 5347 5348 if (tep->te_alen > 0) 5349 kmem_free(tep->te_abuf, tep->te_alen); 5350 5351 tep->te_alen = alen; 5352 tep->te_abuf = abuf; 5353 } 5354 5355 /* Copy in the address in req */ 5356 if (req != NULL) { 5357 ASSERT(alen >= req->ta_alen); 5358 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5359 } 5360 5361 /* 5362 * First try minor number then try default addresses. 5363 */ 5364 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5365 5366 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5367 if (mod_hash_insert_reserve(tep->te_addrhash, 5368 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5369 tep->te_hash_hndl) == 0) { 5370 /* 5371 * found free address 5372 */ 5373 tep->te_flag |= TL_ADDRHASHED; 5374 tep->te_hash_hndl = NULL; 5375 5376 return (B_TRUE); /* successful return */ 5377 } 5378 /* 5379 * Use default address. 5380 */ 5381 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5382 atomic_add_32(&tep->te_defaddr, 1); 5383 } 5384 5385 /* 5386 * Failed to find anything. 5387 */ 5388 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5389 "tl_get_any_addr:looped 2^32 times")); 5390 return (B_FALSE); 5391 } 5392 5393 /* 5394 * reallocb + set r/w ptrs to reflect size. 5395 */ 5396 static mblk_t * 5397 tl_resizemp(mblk_t *mp, ssize_t new_size) 5398 { 5399 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5400 return (NULL); 5401 5402 mp->b_rptr = DB_BASE(mp); 5403 mp->b_wptr = mp->b_rptr + new_size; 5404 return (mp); 5405 } 5406 5407 static void 5408 tl_cl_backenable(tl_endpt_t *tep) 5409 { 5410 list_t *l = &tep->te_flowlist; 5411 tl_endpt_t *elp; 5412 5413 ASSERT(IS_CLTS(tep)); 5414 5415 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5416 ASSERT(tep->te_ser == elp->te_ser); 5417 ASSERT(elp->te_flowq == tep); 5418 if (! elp->te_closing) 5419 TL_QENABLE(elp); 5420 elp->te_flowq = NULL; 5421 list_remove(l, elp); 5422 } 5423 } 5424 5425 /* 5426 * Unconnect endpoints. 5427 */ 5428 static void 5429 tl_co_unconnect(tl_endpt_t *tep) 5430 { 5431 tl_endpt_t *peer_tep = tep->te_conp; 5432 tl_endpt_t *srv_tep = tep->te_oconp; 5433 list_t *l; 5434 tl_icon_t *tip; 5435 tl_endpt_t *cl_tep; 5436 mblk_t *d_mp; 5437 5438 ASSERT(IS_COTS(tep)); 5439 /* 5440 * If our peer is closing, don't use it. 5441 */ 5442 if ((peer_tep != NULL) && peer_tep->te_closing) { 5443 TL_UNCONNECT(tep->te_conp); 5444 peer_tep = NULL; 5445 } 5446 if ((srv_tep != NULL) && srv_tep->te_closing) { 5447 TL_UNCONNECT(tep->te_oconp); 5448 srv_tep = NULL; 5449 } 5450 5451 if (tep->te_nicon > 0) { 5452 l = &tep->te_iconp; 5453 /* 5454 * If incoming requests pending, change state 5455 * of clients on disconnect ind event and send 5456 * discon_ind pdu to modules above them 5457 * for server: all clients get disconnect 5458 */ 5459 5460 while (tep->te_nicon > 0) { 5461 tip = list_head(l); 5462 cl_tep = tip->ti_tep; 5463 5464 if (cl_tep == NULL) { 5465 tl_freetip(tep, tip); 5466 continue; 5467 } 5468 5469 if (cl_tep->te_oconp != NULL) { 5470 ASSERT(cl_tep != cl_tep->te_oconp); 5471 TL_UNCONNECT(cl_tep->te_oconp); 5472 } 5473 5474 if (cl_tep->te_closing) { 5475 tl_freetip(tep, tip); 5476 continue; 5477 } 5478 5479 enableok(cl_tep->te_wq); 5480 TL_QENABLE(cl_tep); 5481 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5482 if (d_mp != NULL) { 5483 cl_tep->te_state = TS_IDLE; 5484 putnext(cl_tep->te_rq, d_mp); 5485 } else { 5486 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5487 SL_TRACE|SL_ERROR, 5488 "tl_co_unconnect:icmng: " 5489 "allocb failure")); 5490 } 5491 tl_freetip(tep, tip); 5492 } 5493 } else if (srv_tep != NULL) { 5494 /* 5495 * If outgoing request pending, change state 5496 * of server on discon ind event 5497 */ 5498 5499 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5500 IS_COTSORD(srv_tep) && 5501 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5502 /* 5503 * Queue ordrel_ind for server to be picked up 5504 * when the connection is accepted. 5505 */ 5506 d_mp = tl_ordrel_ind_alloc(); 5507 } else { 5508 /* 5509 * send discon_ind to server 5510 */ 5511 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5512 } 5513 if (d_mp == NULL) { 5514 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5515 SL_TRACE|SL_ERROR, 5516 "tl_co_unconnect:outgoing:allocb failure")); 5517 TL_UNCONNECT(tep->te_oconp); 5518 goto discon_peer; 5519 } 5520 5521 /* 5522 * If this is a socket the T_DISCON_IND is queued with 5523 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5524 * from the list of pending connections. 5525 * Note that when te_oconp is set the peer better have 5526 * a t_connind_t for the client. 5527 */ 5528 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5529 /* 5530 * Queue the disconnection message. 5531 */ 5532 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5533 } else { 5534 tip = tl_icon_find(srv_tep, tep->te_seqno); 5535 if (tip == NULL) { 5536 freemsg(d_mp); 5537 } else { 5538 ASSERT(tep == tip->ti_tep); 5539 ASSERT(tep->te_ser == srv_tep->te_ser); 5540 /* 5541 * Delete tip from the server list. 5542 */ 5543 if (srv_tep->te_nicon == 1) { 5544 srv_tep->te_state = 5545 NEXTSTATE(TE_DISCON_IND2, 5546 srv_tep->te_state); 5547 } else { 5548 srv_tep->te_state = 5549 NEXTSTATE(TE_DISCON_IND3, 5550 srv_tep->te_state); 5551 } 5552 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5553 T_DISCON_IND); 5554 putnext(srv_tep->te_rq, d_mp); 5555 tl_freetip(srv_tep, tip); 5556 } 5557 TL_UNCONNECT(tep->te_oconp); 5558 srv_tep = NULL; 5559 } 5560 } else if (peer_tep != NULL) { 5561 /* 5562 * unconnect existing connection 5563 * If connected, change state of peer on 5564 * discon ind event and send discon ind pdu 5565 * to module above it 5566 */ 5567 5568 ASSERT(tep->te_ser == peer_tep->te_ser); 5569 if (IS_COTSORD(peer_tep) && 5570 (peer_tep->te_state == TS_WIND_ORDREL || 5571 peer_tep->te_state == TS_DATA_XFER)) { 5572 /* 5573 * send ordrel ind 5574 */ 5575 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5576 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5577 peer_tep->te_state, 5578 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5579 d_mp = tl_ordrel_ind_alloc(); 5580 if (! d_mp) { 5581 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5582 SL_TRACE|SL_ERROR, 5583 "tl_co_unconnect:connected:" 5584 "allocb failure")); 5585 /* 5586 * Continue with cleaning up peer as 5587 * this side may go away with the close 5588 */ 5589 TL_QENABLE(peer_tep); 5590 goto discon_peer; 5591 } 5592 peer_tep->te_state = 5593 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5594 5595 putnext(peer_tep->te_rq, d_mp); 5596 /* 5597 * Handle flow control case. This will generate 5598 * a t_discon_ind message with reason 0 if there 5599 * is data queued on the write side. 5600 */ 5601 TL_QENABLE(peer_tep); 5602 } else if (IS_COTSORD(peer_tep) && 5603 peer_tep->te_state == TS_WREQ_ORDREL) { 5604 /* 5605 * Sent an ordrel_ind. We send a discon with 5606 * with error 0 to inform that the peer is gone. 5607 */ 5608 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5609 SL_TRACE|SL_ERROR, 5610 "tl_co_unconnect: discon in state %d", 5611 tep->te_state)); 5612 tl_discon_ind(peer_tep, 0); 5613 } else { 5614 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5615 SL_TRACE|SL_ERROR, 5616 "tl_co_unconnect: state %d", tep->te_state)); 5617 tl_discon_ind(peer_tep, ECONNRESET); 5618 } 5619 5620 discon_peer: 5621 /* 5622 * Disconnect cross-pointers only for close 5623 */ 5624 if (tep->te_closing) { 5625 peer_tep = tep->te_conp; 5626 TL_REMOVE_PEER(peer_tep->te_conp); 5627 TL_REMOVE_PEER(tep->te_conp); 5628 } 5629 } 5630 } 5631 5632 /* 5633 * Note: The following routine does not recover from allocb() 5634 * failures 5635 * The reason should be from the <sys/errno.h> space. 5636 */ 5637 static void 5638 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5639 { 5640 mblk_t *d_mp; 5641 5642 if (tep->te_closing) 5643 return; 5644 5645 /* 5646 * flush the queues. 5647 */ 5648 flushq(tep->te_rq, FLUSHDATA); 5649 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5650 5651 /* 5652 * send discon ind 5653 */ 5654 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5655 if (! d_mp) { 5656 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5657 "tl_discon_ind:allocb failure")); 5658 return; 5659 } 5660 tep->te_state = TS_IDLE; 5661 putnext(tep->te_rq, d_mp); 5662 } 5663 5664 /* 5665 * Note: The following routine does not recover from allocb() 5666 * failures 5667 * The reason should be from the <sys/errno.h> space. 5668 */ 5669 static mblk_t * 5670 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5671 { 5672 mblk_t *mp; 5673 struct T_discon_ind *tdi; 5674 5675 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5676 DB_TYPE(mp) = M_PROTO; 5677 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5678 tdi = (struct T_discon_ind *)mp->b_rptr; 5679 tdi->PRIM_type = T_DISCON_IND; 5680 tdi->DISCON_reason = reason; 5681 tdi->SEQ_number = seqnum; 5682 } 5683 return (mp); 5684 } 5685 5686 5687 /* 5688 * Note: The following routine does not recover from allocb() 5689 * failures 5690 */ 5691 static mblk_t * 5692 tl_ordrel_ind_alloc(void) 5693 { 5694 mblk_t *mp; 5695 struct T_ordrel_ind *toi; 5696 5697 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5698 DB_TYPE(mp) = M_PROTO; 5699 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5700 toi = (struct T_ordrel_ind *)mp->b_rptr; 5701 toi->PRIM_type = T_ORDREL_IND; 5702 } 5703 return (mp); 5704 } 5705 5706 5707 /* 5708 * Lookup the seqno in the list of queued connections. 5709 */ 5710 static tl_icon_t * 5711 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5712 { 5713 list_t *l = &tep->te_iconp; 5714 tl_icon_t *tip = list_head(l); 5715 5716 ASSERT(seqno != 0); 5717 5718 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5719 ; 5720 5721 return (tip); 5722 } 5723 5724 /* 5725 * Queue data for a given T_CONN_IND while verifying that redundant 5726 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5727 * Used when the originator of the connection closes. 5728 */ 5729 static void 5730 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5731 { 5732 tl_icon_t *tip; 5733 mblk_t **mpp, *mp; 5734 int prim, nprim; 5735 5736 if (nmp->b_datap->db_type == M_PROTO) 5737 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5738 else 5739 nprim = -1; /* M_DATA */ 5740 5741 tip = tl_icon_find(tep, seqno); 5742 if (tip == NULL) { 5743 freemsg(nmp); 5744 return; 5745 } 5746 5747 ASSERT(tip->ti_seqno != 0); 5748 mpp = &tip->ti_mp; 5749 while (*mpp != NULL) { 5750 mp = *mpp; 5751 5752 if (mp->b_datap->db_type == M_PROTO) 5753 prim = ((union T_primitives *)mp->b_rptr)->type; 5754 else 5755 prim = -1; /* M_DATA */ 5756 5757 /* 5758 * Allow nothing after a T_DISCON_IND 5759 */ 5760 if (prim == T_DISCON_IND) { 5761 freemsg(nmp); 5762 return; 5763 } 5764 /* 5765 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5766 */ 5767 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5768 freemsg(nmp); 5769 return; 5770 } 5771 mpp = &(mp->b_next); 5772 } 5773 *mpp = nmp; 5774 } 5775 5776 /* 5777 * Verify if a certain TPI primitive exists on the connind queue. 5778 * Use prim -1 for M_DATA. 5779 * Return non-zero if found. 5780 */ 5781 static boolean_t 5782 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5783 { 5784 tl_icon_t *tip = tl_icon_find(tep, seqno); 5785 boolean_t found = B_FALSE; 5786 5787 if (tip != NULL) { 5788 mblk_t *mp; 5789 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5790 found = (DB_TYPE(mp) == M_PROTO && 5791 ((union T_primitives *)mp->b_rptr)->type == prim); 5792 } 5793 } 5794 return (found); 5795 } 5796 5797 /* 5798 * Send the b_next mblk chain that has accumulated before the connection 5799 * was accepted. Perform the necessary state transitions. 5800 */ 5801 static void 5802 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5803 { 5804 mblk_t *mp; 5805 union T_primitives *primp; 5806 5807 if (tep->te_closing) { 5808 tl_icon_freemsgs(mpp); 5809 return; 5810 } 5811 5812 ASSERT(tep->te_state == TS_DATA_XFER); 5813 ASSERT(tep->te_rq->q_first == NULL); 5814 5815 while ((mp = *mpp) != NULL) { 5816 *mpp = mp->b_next; 5817 mp->b_next = NULL; 5818 5819 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5820 switch (DB_TYPE(mp)) { 5821 default: 5822 freemsg(mp); 5823 break; 5824 case M_DATA: 5825 putnext(tep->te_rq, mp); 5826 break; 5827 case M_PROTO: 5828 primp = (union T_primitives *)mp->b_rptr; 5829 switch (primp->type) { 5830 case T_UNITDATA_IND: 5831 case T_DATA_IND: 5832 case T_OPTDATA_IND: 5833 case T_EXDATA_IND: 5834 putnext(tep->te_rq, mp); 5835 break; 5836 case T_ORDREL_IND: 5837 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5838 tep->te_state); 5839 putnext(tep->te_rq, mp); 5840 break; 5841 case T_DISCON_IND: 5842 tep->te_state = TS_IDLE; 5843 putnext(tep->te_rq, mp); 5844 break; 5845 default: 5846 #ifdef DEBUG 5847 cmn_err(CE_PANIC, 5848 "tl_icon_sendmsgs: unknown primitive"); 5849 #endif /* DEBUG */ 5850 freemsg(mp); 5851 break; 5852 } 5853 break; 5854 } 5855 } 5856 } 5857 5858 /* 5859 * Free the b_next mblk chain that has accumulated before the connection 5860 * was accepted. 5861 */ 5862 static void 5863 tl_icon_freemsgs(mblk_t **mpp) 5864 { 5865 mblk_t *mp; 5866 5867 while ((mp = *mpp) != NULL) { 5868 *mpp = mp->b_next; 5869 mp->b_next = NULL; 5870 freemsg(mp); 5871 } 5872 } 5873 5874 /* 5875 * Send M_ERROR 5876 * Note: assumes caller ensured enough space in mp or enough 5877 * memory available. Does not attempt recovery from allocb() 5878 * failures 5879 */ 5880 5881 static void 5882 tl_merror(queue_t *wq, mblk_t *mp, int error) 5883 { 5884 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5885 5886 if (tep->te_closing) { 5887 freemsg(mp); 5888 return; 5889 } 5890 5891 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5892 SL_TRACE|SL_ERROR, 5893 "tl_merror: tep=%p, err=%d", tep, error)); 5894 5895 /* 5896 * flush all messages on queue. we are shutting 5897 * the stream down on fatal error 5898 */ 5899 flushq(wq, FLUSHALL); 5900 if (IS_COTS(tep)) { 5901 /* connection oriented - unconnect endpoints */ 5902 tl_co_unconnect(tep); 5903 } 5904 if (mp->b_cont) { 5905 freemsg(mp->b_cont); 5906 mp->b_cont = NULL; 5907 } 5908 5909 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5910 freemsg(mp); 5911 mp = allocb(1, BPRI_HI); 5912 if (!mp) { 5913 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5914 SL_TRACE|SL_ERROR, 5915 "tl_merror:M_PROTO: out of memory")); 5916 return; 5917 } 5918 } 5919 if (mp) { 5920 DB_TYPE(mp) = M_ERROR; 5921 mp->b_rptr = DB_BASE(mp); 5922 *mp->b_rptr = (char)error; 5923 mp->b_wptr = mp->b_rptr + sizeof (char); 5924 qreply(wq, mp); 5925 } else { 5926 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5927 } 5928 } 5929 5930 static void 5931 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5932 { 5933 if (flag & TL_SETCRED) { 5934 struct opthdr *opt = (struct opthdr *)buf; 5935 tl_credopt_t *tlcred; 5936 5937 opt->level = TL_PROT_LEVEL; 5938 opt->name = TL_OPT_PEER_CRED; 5939 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5940 5941 tlcred = (tl_credopt_t *)(opt + 1); 5942 tlcred->tc_uid = crgetuid(cr); 5943 tlcred->tc_gid = crgetgid(cr); 5944 tlcred->tc_ruid = crgetruid(cr); 5945 tlcred->tc_rgid = crgetrgid(cr); 5946 tlcred->tc_suid = crgetsuid(cr); 5947 tlcred->tc_sgid = crgetsgid(cr); 5948 tlcred->tc_ngroups = crgetngroups(cr); 5949 } else if (flag & TL_SETUCRED) { 5950 struct opthdr *opt = (struct opthdr *)buf; 5951 5952 opt->level = TL_PROT_LEVEL; 5953 opt->name = TL_OPT_PEER_UCRED; 5954 opt->len = (t_uscalar_t)OPTLEN(ucredsize); 5955 5956 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 5957 } else { 5958 struct T_opthdr *topt = (struct T_opthdr *)buf; 5959 ASSERT(flag & TL_SOCKUCRED); 5960 5961 topt->level = SOL_SOCKET; 5962 topt->name = SCM_UCRED; 5963 topt->len = ucredsize + sizeof (*topt); 5964 topt->status = 0; 5965 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 5966 } 5967 } 5968 5969 /* ARGSUSED */ 5970 static int 5971 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5972 { 5973 /* no default value processed in protocol specific code currently */ 5974 return (-1); 5975 } 5976 5977 /* ARGSUSED */ 5978 static int 5979 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5980 { 5981 int len; 5982 tl_endpt_t *tep; 5983 int *valp; 5984 5985 tep = (tl_endpt_t *)wq->q_ptr; 5986 5987 len = 0; 5988 5989 /* 5990 * Assumes: option level and name sanity check done elsewhere 5991 */ 5992 5993 switch (level) { 5994 case SOL_SOCKET: 5995 if (! IS_SOCKET(tep)) 5996 break; 5997 switch (name) { 5998 case SO_RECVUCRED: 5999 len = sizeof (int); 6000 valp = (int *)ptr; 6001 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6002 break; 6003 default: 6004 break; 6005 } 6006 break; 6007 case TL_PROT_LEVEL: 6008 switch (name) { 6009 case TL_OPT_PEER_CRED: 6010 case TL_OPT_PEER_UCRED: 6011 /* 6012 * option not supposed to retrieved directly 6013 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6014 * when some internal flags set by other options 6015 * Direct retrieval always designed to fail(ignored) 6016 * for this option. 6017 */ 6018 break; 6019 } 6020 } 6021 return (len); 6022 } 6023 6024 /* ARGSUSED */ 6025 static int 6026 tl_set_opt( 6027 queue_t *wq, 6028 uint_t mgmt_flags, 6029 int level, 6030 int name, 6031 uint_t inlen, 6032 uchar_t *invalp, 6033 uint_t *outlenp, 6034 uchar_t *outvalp, 6035 void *thisdg_attrs, 6036 cred_t *cr, 6037 mblk_t *mblk) 6038 { 6039 int error; 6040 tl_endpt_t *tep; 6041 6042 tep = (tl_endpt_t *)wq->q_ptr; 6043 6044 error = 0; /* NOERROR */ 6045 6046 /* 6047 * Assumes: option level and name sanity checks done elsewhere 6048 */ 6049 6050 switch (level) { 6051 case SOL_SOCKET: 6052 if (! IS_SOCKET(tep)) { 6053 error = EINVAL; 6054 break; 6055 } 6056 /* 6057 * TBD: fill in other AF_UNIX socket options and then stop 6058 * returning error. 6059 */ 6060 switch (name) { 6061 case SO_RECVUCRED: 6062 /* 6063 * We only support this for datagram sockets; 6064 * getpeerucred handles the connection oriented 6065 * transports. 6066 */ 6067 if (! IS_CLTS(tep)) { 6068 error = EINVAL; 6069 break; 6070 } 6071 if (*(int *)invalp == 0) 6072 tep->te_flag &= ~TL_SOCKUCRED; 6073 else 6074 tep->te_flag |= TL_SOCKUCRED; 6075 break; 6076 default: 6077 error = EINVAL; 6078 break; 6079 } 6080 break; 6081 case TL_PROT_LEVEL: 6082 switch (name) { 6083 case TL_OPT_PEER_CRED: 6084 case TL_OPT_PEER_UCRED: 6085 /* 6086 * option not supposed to be set directly 6087 * Its value in initialized for each endpoint at 6088 * driver open time. 6089 * Direct setting always designed to fail for this 6090 * option. 6091 */ 6092 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6093 SL_TRACE|SL_ERROR, 6094 "tl_set_opt: option is not supported")); 6095 error = EPROTO; 6096 break; 6097 } 6098 } 6099 return (error); 6100 } 6101 6102 6103 static void 6104 tl_timer(void *arg) 6105 { 6106 queue_t *wq = arg; 6107 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6108 6109 ASSERT(tep); 6110 6111 tep->te_timoutid = 0; 6112 6113 enableok(wq); 6114 /* 6115 * Note: can call wsrv directly here and save context switch 6116 * Consider change when qtimeout (not timeout) is active 6117 */ 6118 qenable(wq); 6119 } 6120 6121 static void 6122 tl_buffer(void *arg) 6123 { 6124 queue_t *wq = arg; 6125 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6126 6127 ASSERT(tep); 6128 6129 tep->te_bufcid = 0; 6130 tep->te_nowsrv = B_FALSE; 6131 6132 enableok(wq); 6133 /* 6134 * Note: can call wsrv directly here and save context switch 6135 * Consider change when qbufcall (not bufcall) is active 6136 */ 6137 qenable(wq); 6138 } 6139 6140 static void 6141 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6142 { 6143 tl_endpt_t *tep; 6144 6145 tep = (tl_endpt_t *)wq->q_ptr; 6146 6147 if (tep->te_closing) { 6148 freemsg(mp); 6149 return; 6150 } 6151 noenable(wq); 6152 6153 (void) insq(wq, wq->q_first, mp); 6154 6155 if (tep->te_bufcid || tep->te_timoutid) { 6156 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6157 "tl_memrecover:recover %p pending", (void *)wq)); 6158 return; 6159 } 6160 6161 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6162 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6163 drv_usectohz(TL_BUFWAIT)); 6164 } 6165 } 6166 6167 static void 6168 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6169 { 6170 ASSERT(tip->ti_seqno != 0); 6171 6172 if (tip->ti_mp != NULL) { 6173 tl_icon_freemsgs(&tip->ti_mp); 6174 tip->ti_mp = NULL; 6175 } 6176 if (tip->ti_tep != NULL) { 6177 tl_refrele(tip->ti_tep); 6178 tip->ti_tep = NULL; 6179 } 6180 list_remove(&tep->te_iconp, tip); 6181 kmem_free(tip, sizeof (tl_icon_t)); 6182 tep->te_nicon--; 6183 } 6184 6185 /* 6186 * Remove address from address hash. 6187 */ 6188 static void 6189 tl_addr_unbind(tl_endpt_t *tep) 6190 { 6191 tl_endpt_t *elp; 6192 6193 if (tep->te_flag & TL_ADDRHASHED) { 6194 if (IS_SOCKET(tep)) { 6195 (void) mod_hash_remove(tep->te_addrhash, 6196 (mod_hash_key_t)tep->te_vp, 6197 (mod_hash_val_t *)&elp); 6198 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6199 tep->te_magic = SOU_MAGIC_IMPLICIT; 6200 } else { 6201 (void) mod_hash_remove(tep->te_addrhash, 6202 (mod_hash_key_t)&tep->te_ap, 6203 (mod_hash_val_t *)&elp); 6204 (void) kmem_free(tep->te_abuf, tep->te_alen); 6205 tep->te_alen = -1; 6206 tep->te_abuf = NULL; 6207 } 6208 tep->te_flag &= ~TL_ADDRHASHED; 6209 } 6210 } 6211