1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multithreaded STREAMS Local Transport Provider. 28 * 29 * OVERVIEW 30 * ======== 31 * 32 * This driver provides TLI as well as socket semantics. It provides 33 * connectionless, connection oriented, and connection oriented with orderly 34 * release transports for TLI and sockets. Each transport type has separate name 35 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 36 * this removes any name space conflicts when binding to socket style transport 37 * addresses. 38 * 39 * NOTE: There is one exception: Socket ticots and ticotsord transports share 40 * the same namespace. In fact, sockets always use ticotsord type transport. 41 * 42 * The driver mode is specified during open() by the minor number used for 43 * open. 44 * 45 * The sockets in addition have the following semantic differences: 46 * No support for passing up credentials (TL_SET[U]CRED). 47 * 48 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 49 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 50 * T_OPTDATA_IND. 51 * 52 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 53 * a T_CONN_RES is received from the acceptor. This means that a socket 54 * connect will complete before the peer has called accept. 55 * 56 * 57 * MULTITHREADING 58 * ============== 59 * 60 * The driver does not use STREAMS protection mechanisms. Instead it uses a 61 * generic "serializer" abstraction. Most of the operations are executed behind 62 * the serializer and are, essentially single-threaded. All functions executed 63 * behind the same serializer are strictly serialized. So if one thread calls 64 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 65 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 66 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 67 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 68 * same time. 69 * 70 * Connectionless transport use a single serializer per transport type (one for 71 * TLI and one for sockets. Connection-oriented transports use finer-grained 72 * serializers. 73 * 74 * All COTS-type endpoints start their life with private serializers. During 75 * connection request processing the endpoint serializer is switched to the 76 * listener's serializer and the rest of T_CONN_REQ processing is done on the 77 * listener serializer. During T_CONN_RES processing the eager serializer is 78 * switched from listener to acceptor serializer and after that point all 79 * processing for eager and acceptor happens on this serializer. To avoid races 80 * with endpoint closes while its serializer may be changing closes are blocked 81 * while serializers are manipulated. 82 * 83 * References accounting 84 * --------------------- 85 * 86 * Endpoints are reference counted and freed when the last reference is 87 * dropped. Functions within the serializer may access an endpoint state even 88 * after an endpoint closed. The te_closing being set on the endpoint indicates 89 * that the endpoint entered its close routine. 90 * 91 * One reference is held for each opened endpoint instance. The reference 92 * counter is incremented when the endpoint is linked to another endpoint and 93 * decremented when the link disappears. It is also incremented when the 94 * endpoint is found by the hash table lookup. This increment is atomic with the 95 * lookup itself and happens while the hash table read lock is held. 96 * 97 * Close synchronization 98 * --------------------- 99 * 100 * During close the endpoint as marked as closing using te_closing flag. It is 101 * usually enough to check for te_closing flag since all other state changes 102 * happen after this flag is set and the close entered serializer. Immediately 103 * after setting te_closing flag tl_close() enters serializer and waits until 104 * the callback finishes. This allows all functions called within serializer to 105 * simply check te_closing without any locks. 106 * 107 * Serializer management. 108 * --------------------- 109 * 110 * For COTS transports serializers are created when the endpoint is constructed 111 * and destroyed when the endpoint is destructed. CLTS transports use global 112 * serializers - one for sockets and one for TLI. 113 * 114 * COTS serializers have separate reference counts to deal with several 115 * endpoints sharing the same serializer. There is a subtle problem related to 116 * the serializer destruction. The serializer should never be destroyed by any 117 * function executed inside serializer. This means that close has to wait till 118 * all serializer activity for this endpoint is finished before it can drop the 119 * last reference on the endpoint (which may as well free the serializer). This 120 * is only relevant for COTS transports which manage serializers 121 * dynamically. For CLTS transports close may complete without waiting for all 122 * serializer activity to finish since serializer is only destroyed at driver 123 * detach time. 124 * 125 * COTS endpoints keep track of the number of outstanding requests on the 126 * serializer for the endpoint. The code handling accept() avoids changing 127 * client serializer if it has any pending messages on the serializer and 128 * instead moves acceptor to listener's serializer. 129 * 130 * 131 * Use of hash tables 132 * ------------------ 133 * 134 * The driver uses modhash hash table implementation. Each transport uses two 135 * hash tables - one for finding endpoints by acceptor ID and another one for 136 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 137 * pair of hash tables since sockets only use TICOTSORD. 138 * 139 * All hash tables lookups increment a reference count for returned endpoints, 140 * so we may safely check the endpoint state even when the endpoint is removed 141 * from the hash by another thread immediately after it is found. 142 * 143 * 144 * CLOSE processing 145 * ================ 146 * 147 * The driver enters serializer twice on close(). The close sequence is the 148 * following: 149 * 150 * 1) Wait until closing is safe (te_closewait becomes zero) 151 * This step is needed to prevent close during serializer switches. In most 152 * cases (close happening after connection establishment) te_closewait is 153 * zero. 154 * 1) Set te_closing. 155 * 2) Call tl_close_ser() within serializer and wait for it to complete. 156 * 157 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 158 * It also needs to clear write-side q_next pointers - this should be done 159 * before qprocsoff(). 160 * 161 * This synchronous serializer entry during close is needed to ensure that 162 * the queue is valid everywhere inside the serializer. 163 * 164 * Note that in many cases close will execute tl_close_ser() synchronously, 165 * so it will not wait at all. 166 * 167 * 3) Calls qprocsoff(). 168 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 169 * complete (for COTS transports). For CLTS transport there is no wait. 170 * 171 * tl_close_finish_ser() Finishes the close process and wakes up waiting 172 * close if there is any. 173 * 174 * Note that in most cases close will enter te_close_ser_finish() 175 * synchronously and will not wait at all. 176 * 177 * 178 * Flow Control 179 * ============ 180 * 181 * The driver implements both read and write side service routines. No one calls 182 * putq() on the read queue. The read side service routine tl_rsrv() is called 183 * when the read side stream is back-enabled. It enters serializer synchronously 184 * (waits till serializer processing is complete). Within serializer it 185 * back-enables all endpoints blocked by the queue for connection-less 186 * transports and enables write side service processing for the peer for 187 * connection-oriented transports. 188 * 189 * Read and write side service routines use special mblk_sized space in the 190 * endpoint structure to enter perimeter. 191 * 192 * Write-side flow control 193 * ----------------------- 194 * 195 * Write side flow control is a bit tricky. The driver needs to deal with two 196 * message queues - the explicit STREAMS message queue maintained by 197 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 198 * queues should be synchronized to preserve message ordering and should 199 * maintain a single order determined by the order in which messages enter 200 * tl_wput(). In order to maintain the ordering between these two queues the 201 * STREAMS queue is only manipulated within the serializer, so the ordering is 202 * provided by the serializer. 203 * 204 * Functions called from the tl_wsrv() sometimes may call putbq(). To 205 * immediately stop any further processing of the STREAMS message queues the 206 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 207 * side service processing stops when the flag is set. 208 * 209 * The tl_wsrv() function enters serializer synchronously and waits for it to 210 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 211 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 212 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 213 * always bounded by the amount of messages on the STREAMS queue at the time 214 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 215 * queue from another serialized entry which can't happen in parallel. This 216 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 217 * of it draining forever while writer places new messages on the STREAMS 218 * queue). 219 * 220 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 221 * 222 * 223 * Unix Domain Sockets 224 * =================== 225 * 226 * The driver knows the structure of Unix Domain sockets addresses and treats 227 * them differently from generic TLI addresses. For sockets implicit binds are 228 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 229 * instead of using address length of zero. Explicit binds specify 230 * SOU_MAGIC_EXPLICIT as magic. 231 * 232 * For implicit binds we always use minor number as soua_vp part of the address 233 * and avoid any hash table lookups. This saves two hash tables lookups per 234 * anonymous bind. 235 * 236 * For explicit address we hash the vnode pointer instead of hashing the 237 * full-scale address+zone+length. Hashing by pointer is more efficient then 238 * hashing by the full address. 239 * 240 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 241 * tep structure, so it should be never freed. 242 * 243 * Also for sockets the driver always uses minor number as acceptor id. 244 * 245 * TPI VIOLATIONS 246 * -------------- 247 * 248 * This driver violates TPI in several respects for Unix Domain Sockets: 249 * 250 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 251 * is requested and the endpoint is already in use. There is no point in 252 * generating an unused address since this address will be rejected by 253 * sockfs anyway. For implicit binds it always generates a new address 254 * (sets soua_vp to its minor number). 255 * 256 * 2) It always uses minor number as acceptor ID and never uses queue 257 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 258 * message and they do not use the queue pointer. 259 * 260 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 261 * followed by listen(). The listen() should be issued with non-zero 262 * backlog, so sotpi_listen() issues unbind request followed by bind 263 * request to the same address but with a non-zero qlen value. Both 264 * tl_bind() and tl_unbind() require write lock on the hash table to 265 * insert/remove the address. The driver does not remove the address from 266 * the hash for endpoints that are bound to the explicit address and have 267 * backlog of zero. During T_BIND_REQ processing if the address requested 268 * is equal to the address the endpoint already has it updates the backlog 269 * without reinserting the address in the hash table. This optimization 270 * avoids two hash table updates for each listener created. It always 271 * avoids the problem of a "stolen" address when another listener may use 272 * the same address between the unbind and bind and suddenly listen() fails 273 * because address is in use even though the bind() succeeded. 274 * 275 * 276 * CONNECTIONLESS TRANSPORTS 277 * ========================= 278 * 279 * Connectionless transports all share the same serializer (one for TLI and one 280 * for Sockets). Functions executing behind serializer can check or modify state 281 * of any endpoint. 282 * 283 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 284 * te_lastep field. The next time X talks to some address A it checks whether A 285 * is the same as Y's address and if it is there is no need to lookup Y. If the 286 * address is different or the state of Y is not appropriate (e.g. closed or not 287 * idle) X does a lookup using tl_find_peer() and caches the new address. 288 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 289 * on the endpoint found. 290 * 291 * During close of endpoint Y it doesn't try to remove itself from other 292 * endpoints caches. They will detect that Y is gone and will search the peer 293 * endpoint again. 294 * 295 * Flow Control Handling. 296 * ---------------------- 297 * 298 * Each connectionless endpoint keeps a list of endpoints which are 299 * flow-controlled by its queue. It also keeps a pointer to the queue which 300 * flow-controls itself. Whenever flow control releases for endpoint X it 301 * enables all queues from the list. During close it also back-enables everyone 302 * in the list. If X is flow-controlled when it is closing it removes it from 303 * the peers list. 304 * 305 * DATA STRUCTURES 306 * =============== 307 * 308 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 309 * endpoint state. For connection-oriented transports it has a keeps a list 310 * of pending connections (tl_icon_t). For connectionless transports it keeps a 311 * list of endpoints flow controlled by this one. 312 * 313 * Each transport type is represented by a per-transport data structure 314 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 315 * endpoint address hash tables for each transport. It also contains pointer to 316 * transport serializer for connectionless transports. 317 * 318 * Each endpoint keeps a link to its transport structure, so the code can find 319 * all per-transport information quickly. 320 */ 321 322 #include <sys/types.h> 323 #include <sys/inttypes.h> 324 #include <sys/stream.h> 325 #include <sys/stropts.h> 326 #define _SUN_TPI_VERSION 2 327 #include <sys/tihdr.h> 328 #include <sys/strlog.h> 329 #include <sys/debug.h> 330 #include <sys/cred.h> 331 #include <sys/errno.h> 332 #include <sys/kmem.h> 333 #include <sys/id_space.h> 334 #include <sys/modhash.h> 335 #include <sys/mkdev.h> 336 #include <sys/tl.h> 337 #include <sys/stat.h> 338 #include <sys/conf.h> 339 #include <sys/modctl.h> 340 #include <sys/strsun.h> 341 #include <sys/socket.h> 342 #include <sys/socketvar.h> 343 #include <sys/sysmacros.h> 344 #include <sys/xti_xtiopt.h> 345 #include <sys/ddi.h> 346 #include <sys/sunddi.h> 347 #include <sys/zone.h> 348 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 349 #include <inet/optcom.h> 350 #include <sys/strsubr.h> 351 #include <sys/ucred.h> 352 #include <sys/suntpi.h> 353 #include <sys/list.h> 354 #include <sys/serializer.h> 355 356 /* 357 * TBD List 358 * 14 Eliminate state changes through table 359 * 16. AF_UNIX socket options 360 * 17. connect() for ticlts 361 * 18. support for "netstat" to show AF_UNIX plus TLI local 362 * transport connections 363 * 21. sanity check to flushing on sending M_ERROR 364 */ 365 366 /* 367 * CONSTANT DECLARATIONS 368 * -------------------- 369 */ 370 371 /* 372 * Local declarations 373 */ 374 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 375 376 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 377 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 378 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 379 /* 380 * Hash tables size. 381 */ 382 #define TL_HASH_SIZE 311 383 384 /* 385 * Definitions for module_info 386 */ 387 #define TL_ID (104) /* module ID number */ 388 #define TL_NAME "tl" /* module name */ 389 #define TL_MINPSZ (0) /* min packet size */ 390 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 391 #define TL_HIWAT (16*1024) /* hi water mark */ 392 #define TL_LOWAT (256) /* lo water mark */ 393 /* 394 * Definition of minor numbers/modes for new transport provider modes. 395 * We view the socket use as a separate mode to get a separate name space. 396 */ 397 #define TL_TICOTS 0 /* connection oriented transport */ 398 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 399 #define TL_TICLTS 2 /* connectionless transport */ 400 #define TL_UNUSED 3 401 #define TL_SOCKET 4 /* Socket */ 402 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 403 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 404 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 405 406 #define TL_MINOR_MASK 0x7 407 #define TL_MINOR_START (TL_TICLTS + 1) 408 409 /* 410 * LOCAL MACROS 411 */ 412 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 413 414 /* 415 * EXTERNAL VARIABLE DECLARATIONS 416 * ----------------------------- 417 */ 418 /* 419 * state table defined in the OS space.c 420 */ 421 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 422 423 /* 424 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 425 */ 426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 427 static int tl_close(queue_t *, int, cred_t *); 428 static void tl_wput(queue_t *, mblk_t *); 429 static void tl_wsrv(queue_t *); 430 static void tl_rsrv(queue_t *); 431 432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 435 436 437 /* 438 * GLOBAL DATA STRUCTURES AND VARIABLES 439 * ----------------------------------- 440 */ 441 442 /* 443 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 444 * For now, we only manage the SO_RECVUCRED option but we also have 445 * harmless dummy options to make things work with some common code we access. 446 */ 447 opdes_t tl_opt_arr[] = { 448 /* The SO_TYPE is needed for the hack below */ 449 { 450 SO_TYPE, 451 SOL_SOCKET, 452 OA_R, 453 OA_R, 454 OP_NP, 455 0, 456 sizeof (t_scalar_t), 457 0 458 }, 459 { 460 SO_RECVUCRED, 461 SOL_SOCKET, 462 OA_RW, 463 OA_RW, 464 OP_NP, 465 0, 466 sizeof (int), 467 0 468 } 469 }; 470 471 /* 472 * Table of all supported levels 473 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 474 * any supported options so we need this info separately. 475 * 476 * This is needed only for topmost tpi providers. 477 */ 478 optlevel_t tl_valid_levels_arr[] = { 479 XTI_GENERIC, 480 SOL_SOCKET, 481 TL_PROT_LEVEL 482 }; 483 484 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 485 /* 486 * Current upper bound on the amount of space needed to return all options. 487 * Additional options with data size of sizeof(long) are handled automatically. 488 * Others need hand job. 489 */ 490 #define TL_MAX_OPT_BUF_LEN \ 491 ((A_CNT(tl_opt_arr) << 2) + \ 492 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 493 + 64 + sizeof (struct T_optmgmt_ack)) 494 495 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 496 497 /* 498 * transport addr structure 499 */ 500 typedef struct tl_addr { 501 zoneid_t ta_zoneid; /* Zone scope of address */ 502 t_scalar_t ta_alen; /* length of abuf */ 503 void *ta_abuf; /* the addr itself */ 504 } tl_addr_t; 505 506 /* 507 * Refcounted version of serializer. 508 */ 509 typedef struct tl_serializer { 510 uint_t ts_refcnt; 511 serializer_t *ts_serializer; 512 } tl_serializer_t; 513 514 /* 515 * Each transport type has a separate state. 516 * Per-transport state. 517 */ 518 typedef struct tl_transport_state { 519 char *tr_name; 520 minor_t tr_minor; 521 uint32_t tr_defaddr; 522 mod_hash_t *tr_ai_hash; 523 mod_hash_t *tr_addr_hash; 524 tl_serializer_t *tr_serializer; 525 } tl_transport_state_t; 526 527 #define TL_DFADDR 0x1000 528 529 static tl_transport_state_t tl_transports[] = { 530 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 531 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 532 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 533 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 534 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 535 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 536 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 537 }; 538 539 #define TL_MAXTRANSPORT A_CNT(tl_transports) 540 541 struct tl_endpt; 542 typedef struct tl_endpt tl_endpt_t; 543 544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 545 546 /* 547 * Data structure used to represent pending connects. 548 * Records enough information so that the connecting peer can close 549 * before the connection gets accepted. 550 */ 551 typedef struct tl_icon { 552 list_node_t ti_node; 553 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 554 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 555 t_scalar_t ti_seqno; /* Sequence number */ 556 } tl_icon_t; 557 558 typedef struct so_ux_addr soux_addr_t; 559 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 560 561 /* 562 * Maximum number of unaccepted connection indications allowed per listener. 563 */ 564 #define TL_MAXQLEN 4096 565 int tl_maxqlen = TL_MAXQLEN; 566 567 /* 568 * transport endpoint structure 569 */ 570 struct tl_endpt { 571 queue_t *te_rq; /* stream read queue */ 572 queue_t *te_wq; /* stream write queue */ 573 uint32_t te_refcnt; 574 int32_t te_state; /* TPI state of endpoint */ 575 minor_t te_minor; /* minor number */ 576 #define te_seqno te_minor 577 uint_t te_flag; /* flag field */ 578 boolean_t te_nowsrv; 579 tl_serializer_t *te_ser; /* Serializer to use */ 580 #define te_serializer te_ser->ts_serializer 581 582 soux_addr_t te_uxaddr; /* Socket address */ 583 #define te_magic te_uxaddr.soua_magic 584 #define te_vp te_uxaddr.soua_vp 585 tl_addr_t te_ap; /* addr bound to this endpt */ 586 #define te_zoneid te_ap.ta_zoneid 587 #define te_alen te_ap.ta_alen 588 #define te_abuf te_ap.ta_abuf 589 590 tl_transport_state_t *te_transport; 591 #define te_addrhash te_transport->tr_addr_hash 592 #define te_aihash te_transport->tr_ai_hash 593 #define te_defaddr te_transport->tr_defaddr 594 cred_t *te_credp; /* endpoint user credentials */ 595 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 596 597 /* 598 * State specific for connection-oriented and connectionless transports. 599 */ 600 union { 601 /* Connection-oriented state. */ 602 struct { 603 t_uscalar_t _te_nicon; /* count of conn requests */ 604 t_uscalar_t _te_qlen; /* max conn requests */ 605 tl_endpt_t *_te_oconp; /* conn request pending */ 606 tl_endpt_t *_te_conp; /* connected endpt */ 607 #ifndef _ILP32 608 void *_te_pad; 609 #endif 610 list_t _te_iconp; /* list of conn ind. pending */ 611 } _te_cots_state; 612 /* Connection-less state. */ 613 struct { 614 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 615 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 616 list_node_t _te_flows; /* lists of connections */ 617 list_t _te_flowlist; /* Who flowcontrols on me */ 618 } _te_clts_state; 619 } _te_transport_state; 620 #define te_nicon _te_transport_state._te_cots_state._te_nicon 621 #define te_qlen _te_transport_state._te_cots_state._te_qlen 622 #define te_oconp _te_transport_state._te_cots_state._te_oconp 623 #define te_conp _te_transport_state._te_cots_state._te_conp 624 #define te_iconp _te_transport_state._te_cots_state._te_iconp 625 #define te_lastep _te_transport_state._te_clts_state._te_lastep 626 #define te_flowq _te_transport_state._te_clts_state._te_flowq 627 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 628 #define te_flows _te_transport_state._te_clts_state._te_flows 629 630 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 631 timeout_id_t te_timoutid; /* outstanding timeout id */ 632 pid_t te_cpid; /* cached pid of endpoint */ 633 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 634 /* 635 * Pieces of the endpoint state needed for closing. 636 */ 637 kmutex_t te_closelock; 638 kcondvar_t te_closecv; 639 uint8_t te_closing; /* The endpoint started closing */ 640 uint8_t te_closewait; /* Wait in close until zero */ 641 mblk_t te_closemp; /* for entering serializer on close */ 642 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 643 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 644 kmutex_t te_srv_lock; 645 kcondvar_t te_srv_cv; 646 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 647 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 648 /* 649 * Pieces of the endpoint state needed for serializer transitions. 650 */ 651 kmutex_t te_ser_lock; /* Protects the count below */ 652 uint_t te_ser_count; /* Number of messages on serializer */ 653 }; 654 655 /* 656 * Flag values. Lower 4 bits specify that transport used. 657 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 658 * they allow to identify the endpoint more easily. 659 */ 660 #define TL_LISTENER 0x00010 /* the listener endpoint */ 661 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 662 #define TL_EAGER 0x00040 /* connecting endpoint */ 663 #define TL_ACCEPTED 0x00080 /* accepted connection */ 664 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 665 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 666 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 667 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 668 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 669 /* 670 * Boolean checks for the endpoint type. 671 */ 672 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 673 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 674 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 675 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 676 677 /* 678 * Certain operations are always used together. These macros reduce the chance 679 * of missing a part of a combination. 680 */ 681 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 682 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 683 684 #define TL_PUTBQ(x, mp) { \ 685 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 686 (x)->te_nowsrv = B_TRUE; \ 687 (void) putbq((x)->te_wq, mp); \ 688 } 689 690 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 691 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 692 693 /* 694 * STREAMS driver glue data structures. 695 */ 696 static struct module_info tl_minfo = { 697 TL_ID, /* mi_idnum */ 698 TL_NAME, /* mi_idname */ 699 TL_MINPSZ, /* mi_minpsz */ 700 TL_MAXPSZ, /* mi_maxpsz */ 701 TL_HIWAT, /* mi_hiwat */ 702 TL_LOWAT /* mi_lowat */ 703 }; 704 705 static struct qinit tl_rinit = { 706 NULL, /* qi_putp */ 707 (int (*)())tl_rsrv, /* qi_srvp */ 708 tl_open, /* qi_qopen */ 709 tl_close, /* qi_qclose */ 710 NULL, /* qi_qadmin */ 711 &tl_minfo, /* qi_minfo */ 712 NULL /* qi_mstat */ 713 }; 714 715 static struct qinit tl_winit = { 716 (int (*)())tl_wput, /* qi_putp */ 717 (int (*)())tl_wsrv, /* qi_srvp */ 718 NULL, /* qi_qopen */ 719 NULL, /* qi_qclose */ 720 NULL, /* qi_qadmin */ 721 &tl_minfo, /* qi_minfo */ 722 NULL /* qi_mstat */ 723 }; 724 725 static struct streamtab tlinfo = { 726 &tl_rinit, /* st_rdinit */ 727 &tl_winit, /* st_wrinit */ 728 NULL, /* st_muxrinit */ 729 NULL /* st_muxwrinit */ 730 }; 731 732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 733 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 734 735 static struct modldrv modldrv = { 736 &mod_driverops, /* Type of module -- pseudo driver here */ 737 "TPI Local Transport (tl)", 738 &tl_devops, /* driver ops */ 739 }; 740 741 /* 742 * Module linkage information for the kernel. 743 */ 744 static struct modlinkage modlinkage = { 745 MODREV_1, 746 &modldrv, 747 NULL 748 }; 749 750 /* 751 * Templates for response to info request 752 * Check sanity of unlimited connect data etc. 753 */ 754 755 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 756 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 757 758 static struct T_info_ack tl_cots_info_ack = 759 { 760 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 761 T_INFINITE, /* TSDU size */ 762 T_INFINITE, /* ETSDU size */ 763 T_INFINITE, /* CDATA_size */ 764 T_INFINITE, /* DDATA_size */ 765 T_INFINITE, /* ADDR_size */ 766 T_INFINITE, /* OPT_size */ 767 0, /* TIDU_size - fill at run time */ 768 T_COTS, /* SERV_type */ 769 -1, /* CURRENT_state */ 770 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 771 }; 772 773 static struct T_info_ack tl_clts_info_ack = 774 { 775 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 776 0, /* TSDU_size - fill at run time */ 777 -2, /* ETSDU_size -2 => not supported */ 778 -2, /* CDATA_size -2 => not supported */ 779 -2, /* DDATA_size -2 => not supported */ 780 -1, /* ADDR_size -1 => unlimited */ 781 -1, /* OPT_size */ 782 0, /* TIDU_size - fill at run time */ 783 T_CLTS, /* SERV_type */ 784 -1, /* CURRENT_state */ 785 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 786 }; 787 788 /* 789 * private copy of devinfo pointer used in tl_info 790 */ 791 static dev_info_t *tl_dip; 792 793 /* 794 * Endpoints cache. 795 */ 796 static kmem_cache_t *tl_cache; 797 /* 798 * Minor number space. 799 */ 800 static id_space_t *tl_minors; 801 802 /* 803 * Default Data Unit size. 804 */ 805 static t_scalar_t tl_tidusz; 806 807 /* 808 * Size of hash tables. 809 */ 810 static size_t tl_hash_size = TL_HASH_SIZE; 811 812 /* 813 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 814 * for sockets. 815 */ 816 static int tl_disable_early_connect = 0; 817 static int tl_client_closing_when_accepting; 818 819 static int tl_serializer_noswitch; 820 821 /* 822 * LOCAL FUNCTION PROTOTYPES 823 * ------------------------- 824 */ 825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 826 static void tl_do_proto(mblk_t *, tl_endpt_t *); 827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 830 t_scalar_t); 831 static void tl_bind(mblk_t *, tl_endpt_t *); 832 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 833 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 834 static void tl_unbind(mblk_t *, tl_endpt_t *); 835 static void tl_optmgmt(queue_t *, mblk_t *); 836 static void tl_conn_req(queue_t *, mblk_t *); 837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 838 static void tl_conn_res(mblk_t *, tl_endpt_t *); 839 static void tl_discon_req(mblk_t *, tl_endpt_t *); 840 static void tl_capability_req(mblk_t *, tl_endpt_t *); 841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 842 static void tl_info_req(mblk_t *, tl_endpt_t *); 843 static void tl_addr_req(mblk_t *, tl_endpt_t *); 844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 845 static void tl_data(mblk_t *, tl_endpt_t *); 846 static void tl_exdata(mblk_t *, tl_endpt_t *); 847 static void tl_ordrel(mblk_t *, tl_endpt_t *); 848 static void tl_unitdata(mblk_t *, tl_endpt_t *); 849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 854 static void tl_cl_backenable(tl_endpt_t *); 855 static void tl_co_unconnect(tl_endpt_t *); 856 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 857 static void tl_discon_ind(tl_endpt_t *, uint32_t); 858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 859 static mblk_t *tl_ordrel_ind_alloc(void); 860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 864 static void tl_icon_freemsgs(mblk_t **); 865 static void tl_merror(queue_t *, mblk_t *, int); 866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 867 static int tl_default_opt(queue_t *, int, int, uchar_t *); 868 static int tl_get_opt(queue_t *, int, int, uchar_t *); 869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 870 uchar_t *, void *, cred_t *); 871 static void tl_memrecover(queue_t *, mblk_t *, size_t); 872 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 873 static void tl_free(tl_endpt_t *); 874 static int tl_constructor(void *, void *, int); 875 static void tl_destructor(void *, void *); 876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 877 static tl_serializer_t *tl_serializer_alloc(int); 878 static void tl_serializer_refhold(tl_serializer_t *); 879 static void tl_serializer_refrele(tl_serializer_t *); 880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 881 static void tl_serializer_exit(tl_endpt_t *); 882 static boolean_t tl_noclose(tl_endpt_t *); 883 static void tl_closeok(tl_endpt_t *); 884 static void tl_refhold(tl_endpt_t *); 885 static void tl_refrele(tl_endpt_t *); 886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 888 static void tl_close_ser(mblk_t *, tl_endpt_t *); 889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 891 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 892 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 894 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 897 static void tl_addr_unbind(tl_endpt_t *); 898 899 /* 900 * Intialize option database object for TL 901 */ 902 903 optdb_obj_t tl_opt_obj = { 904 tl_default_opt, /* TL default value function pointer */ 905 tl_get_opt, /* TL get function pointer */ 906 tl_set_opt, /* TL set function pointer */ 907 TL_OPT_ARR_CNT, /* TL option database count of entries */ 908 tl_opt_arr, /* TL option database */ 909 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 910 tl_valid_levels_arr /* TL valid level array */ 911 }; 912 913 /* 914 * Logical operations. 915 * 916 * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y 917 * should also be true. 918 * 919 * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or false at 920 * the same time. 921 */ 922 #define IMPLY(X, Y) (!(X) || (Y)) 923 #define EQUIV(X, Y) (IMPLY(X, Y) && IMPLY(Y, X)) 924 925 /* 926 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 927 * --------------------------------------- 928 */ 929 930 /* 931 * Loadable module routines 932 */ 933 int 934 _init(void) 935 { 936 return (mod_install(&modlinkage)); 937 } 938 939 int 940 _fini(void) 941 { 942 return (mod_remove(&modlinkage)); 943 } 944 945 int 946 _info(struct modinfo *modinfop) 947 { 948 return (mod_info(&modlinkage, modinfop)); 949 } 950 951 /* 952 * Driver Entry Points and Other routines 953 */ 954 static int 955 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 956 { 957 int i; 958 char name[32]; 959 960 /* 961 * Resume from a checkpoint state. 962 */ 963 if (cmd == DDI_RESUME) 964 return (DDI_SUCCESS); 965 966 if (cmd != DDI_ATTACH) 967 return (DDI_FAILURE); 968 969 /* 970 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 971 * streams message sizes can be unlimited. We use a defined constant 972 * instead. 973 */ 974 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 975 976 /* 977 * Create subdevices for each transport. 978 */ 979 for (i = 0; i < TL_UNUSED; i++) { 980 if (ddi_create_minor_node(devi, 981 tl_transports[i].tr_name, 982 S_IFCHR, tl_transports[i].tr_minor, 983 DDI_PSEUDO, NULL) == DDI_FAILURE) { 984 ddi_remove_minor_node(devi, NULL); 985 return (DDI_FAILURE); 986 } 987 } 988 989 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 990 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 991 992 if (tl_cache == NULL) { 993 ddi_remove_minor_node(devi, NULL); 994 return (DDI_FAILURE); 995 } 996 997 tl_minors = id_space_create("tl_minor_space", 998 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 999 1000 /* 1001 * Create ID space for minor numbers 1002 */ 1003 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1004 tl_transport_state_t *t = &tl_transports[i]; 1005 1006 if (i == TL_UNUSED) 1007 continue; 1008 1009 /* Socket COTSORD shares namespace with COTS */ 1010 if (i == TL_SOCK_COTSORD) { 1011 t->tr_ai_hash = 1012 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1013 ASSERT(t->tr_ai_hash != NULL); 1014 t->tr_addr_hash = 1015 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1016 ASSERT(t->tr_addr_hash != NULL); 1017 continue; 1018 } 1019 1020 /* 1021 * Create hash tables. 1022 */ 1023 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1024 t->tr_name); 1025 #ifdef _ILP32 1026 if (i & TL_SOCKET) 1027 t->tr_ai_hash = 1028 mod_hash_create_idhash(name, tl_hash_size - 1, 1029 mod_hash_null_valdtor); 1030 else 1031 t->tr_ai_hash = 1032 mod_hash_create_ptrhash(name, tl_hash_size, 1033 mod_hash_null_valdtor, sizeof (queue_t)); 1034 #else 1035 t->tr_ai_hash = 1036 mod_hash_create_idhash(name, tl_hash_size - 1, 1037 mod_hash_null_valdtor); 1038 #endif /* _ILP32 */ 1039 1040 if (i & TL_SOCKET) { 1041 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1042 t->tr_name); 1043 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1044 tl_hash_size, mod_hash_null_valdtor, 1045 sizeof (uintptr_t)); 1046 } else { 1047 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1048 t->tr_name); 1049 t->tr_addr_hash = mod_hash_create_extended(name, 1050 tl_hash_size, mod_hash_null_keydtor, 1051 mod_hash_null_valdtor, 1052 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1053 } 1054 1055 /* Create serializer for connectionless transports. */ 1056 if (i & TL_TICLTS) 1057 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1058 } 1059 1060 tl_dip = devi; 1061 1062 return (DDI_SUCCESS); 1063 } 1064 1065 static int 1066 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1067 { 1068 int i; 1069 1070 if (cmd == DDI_SUSPEND) 1071 return (DDI_SUCCESS); 1072 1073 if (cmd != DDI_DETACH) 1074 return (DDI_FAILURE); 1075 1076 /* 1077 * Destroy arenas and hash tables. 1078 */ 1079 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1080 tl_transport_state_t *t = &tl_transports[i]; 1081 1082 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1083 continue; 1084 1085 ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL)); 1086 if (t->tr_serializer != NULL) { 1087 tl_serializer_refrele(t->tr_serializer); 1088 t->tr_serializer = NULL; 1089 } 1090 1091 #ifdef _ILP32 1092 if (i & TL_SOCKET) 1093 mod_hash_destroy_idhash(t->tr_ai_hash); 1094 else 1095 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1096 #else 1097 mod_hash_destroy_idhash(t->tr_ai_hash); 1098 #endif /* _ILP32 */ 1099 t->tr_ai_hash = NULL; 1100 if (i & TL_SOCKET) 1101 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1102 else 1103 mod_hash_destroy_hash(t->tr_addr_hash); 1104 t->tr_addr_hash = NULL; 1105 } 1106 1107 kmem_cache_destroy(tl_cache); 1108 tl_cache = NULL; 1109 id_space_destroy(tl_minors); 1110 tl_minors = NULL; 1111 ddi_remove_minor_node(devi, NULL); 1112 return (DDI_SUCCESS); 1113 } 1114 1115 /* ARGSUSED */ 1116 static int 1117 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1118 { 1119 1120 int retcode = DDI_FAILURE; 1121 1122 switch (infocmd) { 1123 1124 case DDI_INFO_DEVT2DEVINFO: 1125 if (tl_dip != NULL) { 1126 *result = (void *)tl_dip; 1127 retcode = DDI_SUCCESS; 1128 } 1129 break; 1130 1131 case DDI_INFO_DEVT2INSTANCE: 1132 *result = (void *)0; 1133 retcode = DDI_SUCCESS; 1134 break; 1135 1136 default: 1137 break; 1138 } 1139 return (retcode); 1140 } 1141 1142 /* 1143 * Endpoint reference management. 1144 */ 1145 static void 1146 tl_refhold(tl_endpt_t *tep) 1147 { 1148 atomic_add_32(&tep->te_refcnt, 1); 1149 } 1150 1151 static void 1152 tl_refrele(tl_endpt_t *tep) 1153 { 1154 ASSERT(tep->te_refcnt != 0); 1155 1156 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1157 tl_free(tep); 1158 } 1159 1160 /*ARGSUSED*/ 1161 static int 1162 tl_constructor(void *buf, void *cdrarg, int kmflags) 1163 { 1164 tl_endpt_t *tep = buf; 1165 1166 bzero(tep, sizeof (tl_endpt_t)); 1167 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1168 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1169 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1170 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1171 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1172 1173 return (0); 1174 } 1175 1176 /*ARGSUSED*/ 1177 static void 1178 tl_destructor(void *buf, void *cdrarg) 1179 { 1180 tl_endpt_t *tep = buf; 1181 1182 mutex_destroy(&tep->te_closelock); 1183 cv_destroy(&tep->te_closecv); 1184 mutex_destroy(&tep->te_srv_lock); 1185 cv_destroy(&tep->te_srv_cv); 1186 mutex_destroy(&tep->te_ser_lock); 1187 } 1188 1189 static void 1190 tl_free(tl_endpt_t *tep) 1191 { 1192 ASSERT(tep->te_refcnt == 0); 1193 ASSERT(tep->te_transport != NULL); 1194 ASSERT(tep->te_rq == NULL); 1195 ASSERT(tep->te_wq == NULL); 1196 ASSERT(tep->te_ser != NULL); 1197 ASSERT(tep->te_ser_count == 0); 1198 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1199 1200 if (IS_SOCKET(tep)) { 1201 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1202 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1203 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1204 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1205 } else if (tep->te_abuf != NULL) { 1206 kmem_free(tep->te_abuf, tep->te_alen); 1207 tep->te_alen = -1; /* uninitialized */ 1208 tep->te_abuf = NULL; 1209 } else { 1210 ASSERT(tep->te_alen == -1); 1211 } 1212 1213 id_free(tl_minors, tep->te_minor); 1214 ASSERT(tep->te_credp == NULL); 1215 1216 if (tep->te_hash_hndl != NULL) 1217 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1218 1219 if (IS_COTS(tep)) { 1220 TL_REMOVE_PEER(tep->te_conp); 1221 TL_REMOVE_PEER(tep->te_oconp); 1222 tl_serializer_refrele(tep->te_ser); 1223 tep->te_ser = NULL; 1224 ASSERT(tep->te_nicon == 0); 1225 ASSERT(list_head(&tep->te_iconp) == NULL); 1226 } else { 1227 ASSERT(tep->te_lastep == NULL); 1228 ASSERT(list_head(&tep->te_flowlist) == NULL); 1229 ASSERT(tep->te_flowq == NULL); 1230 } 1231 1232 ASSERT(tep->te_bufcid == 0); 1233 ASSERT(tep->te_timoutid == 0); 1234 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1235 tep->te_acceptor_id = 0; 1236 1237 ASSERT(tep->te_closewait == 0); 1238 ASSERT(!tep->te_rsrv_active); 1239 ASSERT(!tep->te_wsrv_active); 1240 tep->te_closing = 0; 1241 tep->te_nowsrv = B_FALSE; 1242 tep->te_flag = 0; 1243 1244 kmem_cache_free(tl_cache, tep); 1245 } 1246 1247 /* 1248 * Allocate/free reference-counted wrappers for serializers. 1249 */ 1250 static tl_serializer_t * 1251 tl_serializer_alloc(int flags) 1252 { 1253 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1254 serializer_t *ser; 1255 1256 if (s == NULL) 1257 return (NULL); 1258 1259 ser = serializer_create(flags); 1260 1261 if (ser == NULL) { 1262 kmem_free(s, sizeof (tl_serializer_t)); 1263 return (NULL); 1264 } 1265 1266 s->ts_refcnt = 1; 1267 s->ts_serializer = ser; 1268 return (s); 1269 } 1270 1271 static void 1272 tl_serializer_refhold(tl_serializer_t *s) 1273 { 1274 atomic_add_32(&s->ts_refcnt, 1); 1275 } 1276 1277 static void 1278 tl_serializer_refrele(tl_serializer_t *s) 1279 { 1280 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1281 serializer_destroy(s->ts_serializer); 1282 kmem_free(s, sizeof (tl_serializer_t)); 1283 } 1284 } 1285 1286 /* 1287 * Post a request on the endpoint serializer. For COTS transports keep track of 1288 * the number of pending requests. 1289 */ 1290 static void 1291 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1292 { 1293 if (IS_COTS(tep)) { 1294 mutex_enter(&tep->te_ser_lock); 1295 tep->te_ser_count++; 1296 mutex_exit(&tep->te_ser_lock); 1297 } 1298 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1299 } 1300 1301 /* 1302 * Complete processing the request on the serializer. Decrement the counter for 1303 * pending requests for COTS transports. 1304 */ 1305 static void 1306 tl_serializer_exit(tl_endpt_t *tep) 1307 { 1308 if (IS_COTS(tep)) { 1309 mutex_enter(&tep->te_ser_lock); 1310 ASSERT(tep->te_ser_count != 0); 1311 tep->te_ser_count--; 1312 mutex_exit(&tep->te_ser_lock); 1313 } 1314 } 1315 1316 /* 1317 * Hash management functions. 1318 */ 1319 1320 /* 1321 * Return TRUE if two addresses are equal, false otherwise. 1322 */ 1323 static boolean_t 1324 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1325 { 1326 return ((ap1->ta_alen > 0) && 1327 (ap1->ta_alen == ap2->ta_alen) && 1328 (ap1->ta_zoneid == ap2->ta_zoneid) && 1329 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1330 } 1331 1332 /* 1333 * This function is called whenever an endpoint is found in the hash table. 1334 */ 1335 /* ARGSUSED0 */ 1336 static void 1337 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1338 { 1339 tl_refhold((tl_endpt_t *)val); 1340 } 1341 1342 /* 1343 * Address hash function. 1344 */ 1345 /* ARGSUSED */ 1346 static uint_t 1347 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1348 { 1349 tl_addr_t *ap = (tl_addr_t *)key; 1350 size_t len = ap->ta_alen; 1351 uchar_t *p = ap->ta_abuf; 1352 uint_t i, g; 1353 1354 ASSERT((len > 0) && (p != NULL)); 1355 1356 for (i = ap->ta_zoneid; len -- != 0; p++) { 1357 i = (i << 4) + (*p); 1358 if ((g = (i & 0xf0000000U)) != 0) { 1359 i ^= (g >> 24); 1360 i ^= g; 1361 } 1362 } 1363 return (i); 1364 } 1365 1366 /* 1367 * This function is used by hash lookups. It compares two generic addresses. 1368 */ 1369 static int 1370 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1371 { 1372 #ifdef DEBUG 1373 tl_addr_t *ap1 = (tl_addr_t *)key1; 1374 tl_addr_t *ap2 = (tl_addr_t *)key2; 1375 1376 ASSERT(key1 != NULL); 1377 ASSERT(key2 != NULL); 1378 1379 ASSERT(ap1->ta_abuf != NULL); 1380 ASSERT(ap2->ta_abuf != NULL); 1381 ASSERT(ap1->ta_alen > 0); 1382 ASSERT(ap2->ta_alen > 0); 1383 #endif 1384 1385 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1386 } 1387 1388 /* 1389 * Prevent endpoint from closing if possible. 1390 * Return B_TRUE on success, B_FALSE on failure. 1391 */ 1392 static boolean_t 1393 tl_noclose(tl_endpt_t *tep) 1394 { 1395 boolean_t rc = B_FALSE; 1396 1397 mutex_enter(&tep->te_closelock); 1398 if (! tep->te_closing) { 1399 ASSERT(tep->te_closewait == 0); 1400 tep->te_closewait++; 1401 rc = B_TRUE; 1402 } 1403 mutex_exit(&tep->te_closelock); 1404 return (rc); 1405 } 1406 1407 /* 1408 * Allow endpoint to close if needed. 1409 */ 1410 static void 1411 tl_closeok(tl_endpt_t *tep) 1412 { 1413 ASSERT(tep->te_closewait > 0); 1414 mutex_enter(&tep->te_closelock); 1415 ASSERT(tep->te_closewait == 1); 1416 tep->te_closewait--; 1417 cv_signal(&tep->te_closecv); 1418 mutex_exit(&tep->te_closelock); 1419 } 1420 1421 /* 1422 * STREAMS open entry point. 1423 */ 1424 /* ARGSUSED */ 1425 static int 1426 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1427 { 1428 tl_endpt_t *tep; 1429 minor_t minor = getminor(*devp); 1430 1431 /* 1432 * Driver is called directly. Both CLONEOPEN and MODOPEN 1433 * are illegal 1434 */ 1435 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1436 return (ENXIO); 1437 1438 if (rq->q_ptr != NULL) 1439 return (0); 1440 1441 /* Minor number should specify the mode used for the driver. */ 1442 if ((minor >= TL_UNUSED)) 1443 return (ENXIO); 1444 1445 if (oflag & SO_SOCKSTR) { 1446 minor |= TL_SOCKET; 1447 } 1448 1449 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1450 tep->te_refcnt = 1; 1451 tep->te_cpid = curproc->p_pid; 1452 rq->q_ptr = WR(rq)->q_ptr = tep; 1453 tep->te_state = TS_UNBND; 1454 tep->te_credp = credp; 1455 crhold(credp); 1456 tep->te_zoneid = getzoneid(); 1457 1458 tep->te_flag = minor & TL_MINOR_MASK; 1459 tep->te_transport = &tl_transports[minor]; 1460 1461 /* Allocate a unique minor number for this instance. */ 1462 tep->te_minor = (minor_t)id_alloc(tl_minors); 1463 1464 /* Reserve hash handle for bind(). */ 1465 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1466 1467 /* Transport-specific initialization */ 1468 if (IS_COTS(tep)) { 1469 /* Use private serializer */ 1470 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1471 1472 /* Create list for pending connections */ 1473 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1474 offsetof(tl_icon_t, ti_node)); 1475 tep->te_qlen = 0; 1476 tep->te_nicon = 0; 1477 tep->te_oconp = NULL; 1478 tep->te_conp = NULL; 1479 } else { 1480 /* Use shared serializer */ 1481 tep->te_ser = tep->te_transport->tr_serializer; 1482 bzero(&tep->te_flows, sizeof (list_node_t)); 1483 /* Create list for flow control */ 1484 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1485 offsetof(tl_endpt_t, te_flows)); 1486 tep->te_flowq = NULL; 1487 tep->te_lastep = NULL; 1488 1489 } 1490 1491 /* Initialize endpoint address */ 1492 if (IS_SOCKET(tep)) { 1493 /* Socket-specific address handling. */ 1494 tep->te_alen = TL_SOUX_ADDRLEN; 1495 tep->te_abuf = &tep->te_uxaddr; 1496 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1497 tep->te_magic = SOU_MAGIC_IMPLICIT; 1498 } else { 1499 tep->te_alen = -1; 1500 tep->te_abuf = NULL; 1501 } 1502 1503 /* clone the driver */ 1504 *devp = makedevice(getmajor(*devp), tep->te_minor); 1505 1506 tep->te_rq = rq; 1507 tep->te_wq = WR(rq); 1508 1509 #ifdef _ILP32 1510 if (IS_SOCKET(tep)) 1511 tep->te_acceptor_id = tep->te_minor; 1512 else 1513 tep->te_acceptor_id = (t_uscalar_t)rq; 1514 #else 1515 tep->te_acceptor_id = tep->te_minor; 1516 #endif /* _ILP32 */ 1517 1518 1519 qprocson(rq); 1520 1521 /* 1522 * Insert acceptor ID in the hash. The AI hash always sleeps on 1523 * insertion so insertion can't fail. 1524 */ 1525 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1526 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1527 (mod_hash_val_t)tep); 1528 1529 return (0); 1530 } 1531 1532 /* ARGSUSED1 */ 1533 static int 1534 tl_close(queue_t *rq, int flag, cred_t *credp) 1535 { 1536 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1537 tl_endpt_t *elp = NULL; 1538 queue_t *wq = tep->te_wq; 1539 int rc; 1540 1541 ASSERT(wq == WR(rq)); 1542 1543 /* 1544 * Remove the endpoint from acceptor hash. 1545 */ 1546 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1547 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1548 (mod_hash_val_t *)&elp); 1549 ASSERT(rc == 0 && tep == elp); 1550 if ((rc != 0) || (tep != elp)) { 1551 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1552 SL_TRACE|SL_ERROR, 1553 "tl_close:inconsistency in AI hash")); 1554 } 1555 1556 /* 1557 * Wait till close is safe, then mark endpoint as closing. 1558 */ 1559 mutex_enter(&tep->te_closelock); 1560 while (tep->te_closewait) 1561 cv_wait(&tep->te_closecv, &tep->te_closelock); 1562 tep->te_closing = B_TRUE; 1563 /* 1564 * Will wait for the serializer part of the close to finish, so set 1565 * te_closewait now. 1566 */ 1567 tep->te_closewait = 1; 1568 tep->te_nowsrv = B_FALSE; 1569 mutex_exit(&tep->te_closelock); 1570 1571 /* 1572 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1573 * It is safe because close will wait for tl_close_ser to finish. 1574 */ 1575 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1576 1577 /* 1578 * Wait for the first phase of close to complete before qprocsoff(). 1579 */ 1580 mutex_enter(&tep->te_closelock); 1581 while (tep->te_closewait) 1582 cv_wait(&tep->te_closecv, &tep->te_closelock); 1583 mutex_exit(&tep->te_closelock); 1584 1585 qprocsoff(rq); 1586 1587 if (tep->te_bufcid) { 1588 qunbufcall(rq, tep->te_bufcid); 1589 tep->te_bufcid = 0; 1590 } 1591 if (tep->te_timoutid) { 1592 (void) quntimeout(rq, tep->te_timoutid); 1593 tep->te_timoutid = 0; 1594 } 1595 1596 /* 1597 * Finish close behind serializer. 1598 * 1599 * For a CLTS endpoint increase a refcount and continue close processing 1600 * with serializer protection. This processing may happen asynchronously 1601 * with the completion of tl_close(). 1602 * 1603 * Fot a COTS endpoint wait before destroying tep since the serializer 1604 * may go away together with tep and we need to destroy serializer 1605 * outside of serializer context. 1606 */ 1607 ASSERT(tep->te_closewait == 0); 1608 if (IS_COTS(tep)) 1609 tep->te_closewait = 1; 1610 else 1611 tl_refhold(tep); 1612 1613 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1614 1615 /* 1616 * For connection-oriented transports wait for all serializer activity 1617 * to settle down. 1618 */ 1619 if (IS_COTS(tep)) { 1620 mutex_enter(&tep->te_closelock); 1621 while (tep->te_closewait) 1622 cv_wait(&tep->te_closecv, &tep->te_closelock); 1623 mutex_exit(&tep->te_closelock); 1624 } 1625 1626 crfree(tep->te_credp); 1627 tep->te_credp = NULL; 1628 tep->te_wq = NULL; 1629 tl_refrele(tep); 1630 /* 1631 * tep is likely to be destroyed now, so can't reference it any more. 1632 */ 1633 1634 rq->q_ptr = wq->q_ptr = NULL; 1635 return (0); 1636 } 1637 1638 /* 1639 * First phase of close processing done behind the serializer. 1640 * 1641 * Do not drop the reference in the end - tl_close() wants this reference to 1642 * stay. 1643 */ 1644 /* ARGSUSED0 */ 1645 static void 1646 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1647 { 1648 ASSERT(tep->te_closing); 1649 ASSERT(tep->te_closewait == 1); 1650 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1651 1652 tep->te_flag |= TL_CLOSE_SER; 1653 1654 /* 1655 * Drain out all messages on queue except for TL_TICOTS where the 1656 * abortive release semantics permit discarding of data on close 1657 */ 1658 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1659 tl_wsrv_ser(NULL, tep); 1660 } 1661 1662 /* Remove address from hash table. */ 1663 tl_addr_unbind(tep); 1664 /* 1665 * qprocsoff() gets confused when q->q_next is not NULL on the write 1666 * queue of the driver, so clear these before qprocsoff() is called. 1667 * Also clear q_next for the peer since this queue is going away. 1668 */ 1669 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1670 tl_endpt_t *peer_tep = tep->te_conp; 1671 1672 tep->te_wq->q_next = NULL; 1673 if ((peer_tep != NULL) && !peer_tep->te_closing) 1674 peer_tep->te_wq->q_next = NULL; 1675 } 1676 1677 tep->te_rq = NULL; 1678 1679 /* wake up tl_close() */ 1680 tl_closeok(tep); 1681 tl_serializer_exit(tep); 1682 } 1683 1684 /* 1685 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1686 * the reference for CLTS. 1687 * 1688 * Called from serializer. Should drop reference count for CLTS only. 1689 */ 1690 /* ARGSUSED0 */ 1691 static void 1692 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1693 { 1694 ASSERT(tep->te_closing); 1695 ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0)); 1696 ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1)); 1697 1698 tep->te_state = -1; /* Uninitialized */ 1699 if (IS_COTS(tep)) { 1700 tl_co_unconnect(tep); 1701 } else { 1702 /* Connectionless specific cleanup */ 1703 TL_REMOVE_PEER(tep->te_lastep); 1704 /* 1705 * Backenable anybody that is flow controlled waiting for 1706 * this endpoint. 1707 */ 1708 tl_cl_backenable(tep); 1709 if (tep->te_flowq != NULL) { 1710 list_remove(&(tep->te_flowq->te_flowlist), tep); 1711 tep->te_flowq = NULL; 1712 } 1713 } 1714 1715 tl_serializer_exit(tep); 1716 if (IS_COTS(tep)) 1717 tl_closeok(tep); 1718 else 1719 tl_refrele(tep); 1720 } 1721 1722 /* 1723 * STREAMS write-side put procedure. 1724 * Enter serializer for most of the processing. 1725 * 1726 * The T_CONN_REQ is processed outside of serializer. 1727 */ 1728 static void 1729 tl_wput(queue_t *wq, mblk_t *mp) 1730 { 1731 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1732 ssize_t msz = MBLKL(mp); 1733 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1734 tlproc_t *tl_proc = NULL; 1735 1736 switch (DB_TYPE(mp)) { 1737 case M_DATA: 1738 /* Only valid for connection-oriented transports */ 1739 if (IS_CLTS(tep)) { 1740 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1741 SL_TRACE|SL_ERROR, 1742 "tl_wput:M_DATA invalid for ticlts driver")); 1743 tl_merror(wq, mp, EPROTO); 1744 return; 1745 } 1746 tl_proc = tl_wput_data_ser; 1747 break; 1748 1749 case M_IOCTL: 1750 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1751 case TL_IOC_CREDOPT: 1752 /* FALLTHROUGH */ 1753 case TL_IOC_UCREDOPT: 1754 /* 1755 * Serialize endpoint state change. 1756 */ 1757 tl_proc = tl_do_ioctl_ser; 1758 break; 1759 1760 default: 1761 miocnak(wq, mp, 0, EINVAL); 1762 return; 1763 } 1764 break; 1765 1766 case M_FLUSH: 1767 /* 1768 * do canonical M_FLUSH processing 1769 */ 1770 if (*mp->b_rptr & FLUSHW) { 1771 flushq(wq, FLUSHALL); 1772 *mp->b_rptr &= ~FLUSHW; 1773 } 1774 if (*mp->b_rptr & FLUSHR) { 1775 flushq(RD(wq), FLUSHALL); 1776 qreply(wq, mp); 1777 } else { 1778 freemsg(mp); 1779 } 1780 return; 1781 1782 case M_PROTO: 1783 if (msz < sizeof (prim->type)) { 1784 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1785 SL_TRACE|SL_ERROR, 1786 "tl_wput:M_PROTO data too short")); 1787 tl_merror(wq, mp, EPROTO); 1788 return; 1789 } 1790 switch (prim->type) { 1791 case T_OPTMGMT_REQ: 1792 case T_SVR4_OPTMGMT_REQ: 1793 /* 1794 * Process TPI option management requests immediately 1795 * in put procedure regardless of in-order processing 1796 * of already queued messages. 1797 * (Note: This driver supports AF_UNIX socket 1798 * implementation. Unless we implement this processing, 1799 * setsockopt() on socket endpoint will block on flow 1800 * controlled endpoints which it should not. That is 1801 * required for successful execution of VSU socket tests 1802 * and is consistent with BSD socket behavior). 1803 */ 1804 tl_optmgmt(wq, mp); 1805 return; 1806 case O_T_BIND_REQ: 1807 case T_BIND_REQ: 1808 tl_proc = tl_bind_ser; 1809 break; 1810 case T_CONN_REQ: 1811 if (IS_CLTS(tep)) { 1812 tl_merror(wq, mp, EPROTO); 1813 return; 1814 } 1815 tl_conn_req(wq, mp); 1816 return; 1817 case T_DATA_REQ: 1818 case T_OPTDATA_REQ: 1819 case T_EXDATA_REQ: 1820 case T_ORDREL_REQ: 1821 tl_proc = tl_putq_ser; 1822 break; 1823 case T_UNITDATA_REQ: 1824 if (IS_COTS(tep) || 1825 (msz < sizeof (struct T_unitdata_req))) { 1826 tl_merror(wq, mp, EPROTO); 1827 return; 1828 } 1829 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1830 tl_proc = tl_unitdata_ser; 1831 } else { 1832 tl_proc = tl_putq_ser; 1833 } 1834 break; 1835 default: 1836 /* 1837 * process in service procedure if message already 1838 * queued (maintain in-order processing) 1839 */ 1840 if (wq->q_first != NULL) { 1841 tl_proc = tl_putq_ser; 1842 } else { 1843 tl_proc = tl_wput_ser; 1844 } 1845 break; 1846 } 1847 break; 1848 1849 case M_PCPROTO: 1850 /* 1851 * Check that the message has enough data to figure out TPI 1852 * primitive. 1853 */ 1854 if (msz < sizeof (prim->type)) { 1855 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1856 SL_TRACE|SL_ERROR, 1857 "tl_wput:M_PCROTO data too short")); 1858 tl_merror(wq, mp, EPROTO); 1859 return; 1860 } 1861 switch (prim->type) { 1862 case T_CAPABILITY_REQ: 1863 tl_capability_req(mp, tep); 1864 return; 1865 case T_INFO_REQ: 1866 tl_proc = tl_info_req_ser; 1867 break; 1868 default: 1869 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1870 SL_TRACE|SL_ERROR, 1871 "tl_wput:unknown TPI msg primitive")); 1872 tl_merror(wq, mp, EPROTO); 1873 return; 1874 } 1875 break; 1876 default: 1877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1878 "tl_wput:default:unexpected Streams message")); 1879 freemsg(mp); 1880 return; 1881 } 1882 1883 /* 1884 * Continue processing via serializer. 1885 */ 1886 ASSERT(tl_proc != NULL); 1887 tl_refhold(tep); 1888 tl_serializer_enter(tep, tl_proc, mp); 1889 } 1890 1891 /* 1892 * Place message on the queue while preserving order. 1893 */ 1894 static void 1895 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1896 { 1897 if (tep->te_closing) { 1898 tl_wput_ser(mp, tep); 1899 } else { 1900 TL_PUTQ(tep, mp); 1901 tl_serializer_exit(tep); 1902 tl_refrele(tep); 1903 } 1904 1905 } 1906 1907 static void 1908 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1909 { 1910 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1911 1912 switch (DB_TYPE(mp)) { 1913 case M_DATA: 1914 tl_data(mp, tep); 1915 break; 1916 case M_PROTO: 1917 tl_do_proto(mp, tep); 1918 break; 1919 default: 1920 freemsg(mp); 1921 break; 1922 } 1923 } 1924 1925 /* 1926 * Write side put procedure called from serializer. 1927 */ 1928 static void 1929 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1930 { 1931 tl_wput_common_ser(mp, tep); 1932 tl_serializer_exit(tep); 1933 tl_refrele(tep); 1934 } 1935 1936 /* 1937 * M_DATA processing. Called from serializer. 1938 */ 1939 static void 1940 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1941 { 1942 tl_endpt_t *peer_tep = tep->te_conp; 1943 queue_t *peer_rq; 1944 1945 ASSERT(DB_TYPE(mp) == M_DATA); 1946 ASSERT(IS_COTS(tep)); 1947 1948 ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer)); 1949 1950 /* 1951 * fastpath for data. Ignore flow control if tep is closing. 1952 */ 1953 if ((peer_tep != NULL) && 1954 !peer_tep->te_closing && 1955 ((tep->te_state == TS_DATA_XFER) || 1956 (tep->te_state == TS_WREQ_ORDREL)) && 1957 (tep->te_wq != NULL) && 1958 (tep->te_wq->q_first == NULL) && 1959 ((peer_tep->te_state == TS_DATA_XFER) || 1960 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1961 ((peer_rq = peer_tep->te_rq) != NULL) && 1962 (canputnext(peer_rq) || tep->te_closing)) { 1963 putnext(peer_rq, mp); 1964 } else if (tep->te_closing) { 1965 /* 1966 * It is possible that by the time we got here tep started to 1967 * close. If the write queue is not empty, and the state is 1968 * TS_DATA_XFER the data should be delivered in order, so we 1969 * call putq() instead of freeing the data. 1970 */ 1971 if ((tep->te_wq != NULL) && 1972 ((tep->te_state == TS_DATA_XFER) || 1973 (tep->te_state == TS_WREQ_ORDREL))) { 1974 TL_PUTQ(tep, mp); 1975 } else { 1976 freemsg(mp); 1977 } 1978 } else { 1979 TL_PUTQ(tep, mp); 1980 } 1981 1982 tl_serializer_exit(tep); 1983 tl_refrele(tep); 1984 } 1985 1986 /* 1987 * Write side service routine. 1988 * 1989 * All actual processing happens within serializer which is entered 1990 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1991 * messages that need processing may have arrived, so tl_wsrv repeats until 1992 * queue is empty or te_nowsrv is set. 1993 */ 1994 static void 1995 tl_wsrv(queue_t *wq) 1996 { 1997 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1998 1999 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 2000 mutex_enter(&tep->te_srv_lock); 2001 ASSERT(tep->te_wsrv_active == B_FALSE); 2002 tep->te_wsrv_active = B_TRUE; 2003 mutex_exit(&tep->te_srv_lock); 2004 2005 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2006 2007 /* 2008 * Wait for serializer job to complete. 2009 */ 2010 mutex_enter(&tep->te_srv_lock); 2011 while (tep->te_wsrv_active) { 2012 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2013 } 2014 cv_signal(&tep->te_srv_cv); 2015 mutex_exit(&tep->te_srv_lock); 2016 } 2017 } 2018 2019 /* 2020 * Serialized write side processing of the STREAMS queue. 2021 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2022 * is NULL. 2023 */ 2024 static void 2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2026 { 2027 mblk_t *mp; 2028 queue_t *wq = tep->te_wq; 2029 2030 ASSERT(wq != NULL); 2031 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2032 tl_wput_common_ser(mp, tep); 2033 } 2034 2035 /* 2036 * Wakeup service routine unless called from close. 2037 * If ser_mp is specified, the caller is tl_wsrv(). 2038 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2039 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2040 * be no matching tl_serializer_exit() in this case. 2041 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2042 * waiting on te_srv_cv. 2043 */ 2044 if (ser_mp != NULL) { 2045 /* 2046 * We are called from tl_wsrv. 2047 */ 2048 mutex_enter(&tep->te_srv_lock); 2049 ASSERT(tep->te_wsrv_active); 2050 tep->te_wsrv_active = B_FALSE; 2051 cv_signal(&tep->te_srv_cv); 2052 mutex_exit(&tep->te_srv_lock); 2053 tl_serializer_exit(tep); 2054 } 2055 } 2056 2057 /* 2058 * Called when the stream is backenabled. Enter serializer and qenable everyone 2059 * flow controlled by tep. 2060 * 2061 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2062 * is possible that two instances of tl_rsrv will be running reusing the same 2063 * rsrv mblk. 2064 */ 2065 static void 2066 tl_rsrv(queue_t *rq) 2067 { 2068 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2069 2070 ASSERT(rq->q_first == NULL); 2071 ASSERT(tep->te_rsrv_active == 0); 2072 2073 tep->te_rsrv_active = B_TRUE; 2074 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2075 /* 2076 * Wait for serializer job to complete. 2077 */ 2078 mutex_enter(&tep->te_srv_lock); 2079 while (tep->te_rsrv_active) { 2080 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2081 } 2082 cv_signal(&tep->te_srv_cv); 2083 mutex_exit(&tep->te_srv_lock); 2084 } 2085 2086 /* ARGSUSED */ 2087 static void 2088 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2089 { 2090 tl_endpt_t *peer_tep; 2091 2092 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2093 tl_cl_backenable(tep); 2094 } else if ( 2095 IS_COTS(tep) && 2096 ((peer_tep = tep->te_conp) != NULL) && 2097 !peer_tep->te_closing && 2098 ((tep->te_state == TS_DATA_XFER) || 2099 (tep->te_state == TS_WIND_ORDREL)|| 2100 (tep->te_state == TS_WREQ_ORDREL))) { 2101 TL_QENABLE(peer_tep); 2102 } 2103 2104 /* 2105 * Wakeup read side service routine. 2106 */ 2107 mutex_enter(&tep->te_srv_lock); 2108 ASSERT(tep->te_rsrv_active); 2109 tep->te_rsrv_active = B_FALSE; 2110 cv_signal(&tep->te_srv_cv); 2111 mutex_exit(&tep->te_srv_lock); 2112 tl_serializer_exit(tep); 2113 } 2114 2115 /* 2116 * process M_PROTO messages. Always called from serializer. 2117 */ 2118 static void 2119 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2120 { 2121 ssize_t msz = MBLKL(mp); 2122 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2123 2124 /* Message size was validated by tl_wput(). */ 2125 ASSERT(msz >= sizeof (prim->type)); 2126 2127 switch (prim->type) { 2128 case T_UNBIND_REQ: 2129 tl_unbind(mp, tep); 2130 break; 2131 2132 case T_ADDR_REQ: 2133 tl_addr_req(mp, tep); 2134 break; 2135 2136 case O_T_CONN_RES: 2137 case T_CONN_RES: 2138 if (IS_CLTS(tep)) { 2139 tl_merror(tep->te_wq, mp, EPROTO); 2140 break; 2141 } 2142 tl_conn_res(mp, tep); 2143 break; 2144 2145 case T_DISCON_REQ: 2146 if (IS_CLTS(tep)) { 2147 tl_merror(tep->te_wq, mp, EPROTO); 2148 break; 2149 } 2150 tl_discon_req(mp, tep); 2151 break; 2152 2153 case T_DATA_REQ: 2154 if (IS_CLTS(tep)) { 2155 tl_merror(tep->te_wq, mp, EPROTO); 2156 break; 2157 } 2158 tl_data(mp, tep); 2159 break; 2160 2161 case T_OPTDATA_REQ: 2162 if (IS_CLTS(tep)) { 2163 tl_merror(tep->te_wq, mp, EPROTO); 2164 break; 2165 } 2166 tl_data(mp, tep); 2167 break; 2168 2169 case T_EXDATA_REQ: 2170 if (IS_CLTS(tep)) { 2171 tl_merror(tep->te_wq, mp, EPROTO); 2172 break; 2173 } 2174 tl_exdata(mp, tep); 2175 break; 2176 2177 case T_ORDREL_REQ: 2178 if (! IS_COTSORD(tep)) { 2179 tl_merror(tep->te_wq, mp, EPROTO); 2180 break; 2181 } 2182 tl_ordrel(mp, tep); 2183 break; 2184 2185 case T_UNITDATA_REQ: 2186 if (IS_COTS(tep)) { 2187 tl_merror(tep->te_wq, mp, EPROTO); 2188 break; 2189 } 2190 tl_unitdata(mp, tep); 2191 break; 2192 2193 default: 2194 tl_merror(tep->te_wq, mp, EPROTO); 2195 break; 2196 } 2197 } 2198 2199 /* 2200 * Process ioctl from serializer. 2201 * This is a wrapper around tl_do_ioctl(). 2202 */ 2203 static void 2204 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2205 { 2206 if (! tep->te_closing) 2207 tl_do_ioctl(mp, tep); 2208 else 2209 freemsg(mp); 2210 2211 tl_serializer_exit(tep); 2212 tl_refrele(tep); 2213 } 2214 2215 static void 2216 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2217 { 2218 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2219 int cmd = iocbp->ioc_cmd; 2220 queue_t *wq = tep->te_wq; 2221 int error; 2222 int thisopt, otheropt; 2223 2224 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2225 2226 switch (cmd) { 2227 case TL_IOC_CREDOPT: 2228 if (cmd == TL_IOC_CREDOPT) { 2229 thisopt = TL_SETCRED; 2230 otheropt = TL_SETUCRED; 2231 } else { 2232 /* FALLTHROUGH */ 2233 case TL_IOC_UCREDOPT: 2234 thisopt = TL_SETUCRED; 2235 otheropt = TL_SETCRED; 2236 } 2237 /* 2238 * The credentials passing does not apply to sockets. 2239 * Only one of the cred options can be set at a given time. 2240 */ 2241 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2242 miocnak(wq, mp, 0, EINVAL); 2243 return; 2244 } 2245 2246 /* 2247 * Turn on generation of credential options for 2248 * T_conn_req, T_conn_con, T_unidata_ind. 2249 */ 2250 error = miocpullup(mp, sizeof (uint32_t)); 2251 if (error != 0) { 2252 miocnak(wq, mp, 0, error); 2253 return; 2254 } 2255 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2256 miocnak(wq, mp, 0, EINVAL); 2257 return; 2258 } 2259 2260 if (*(uint32_t *)mp->b_cont->b_rptr) 2261 tep->te_flag |= thisopt; 2262 else 2263 tep->te_flag &= ~thisopt; 2264 2265 miocack(wq, mp, 0, 0); 2266 break; 2267 2268 default: 2269 /* Should not be here */ 2270 miocnak(wq, mp, 0, EINVAL); 2271 break; 2272 } 2273 } 2274 2275 2276 /* 2277 * send T_ERROR_ACK 2278 * Note: assumes enough memory or caller passed big enough mp 2279 * - no recovery from allocb failures 2280 */ 2281 2282 static void 2283 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2284 t_scalar_t unix_err, t_scalar_t type) 2285 { 2286 struct T_error_ack *err_ack; 2287 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2288 M_PCPROTO, T_ERROR_ACK); 2289 2290 if (ackmp == NULL) { 2291 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2292 "tl_error_ack:out of mblk memory")); 2293 tl_merror(wq, NULL, ENOSR); 2294 return; 2295 } 2296 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2297 err_ack->ERROR_prim = type; 2298 err_ack->TLI_error = tli_err; 2299 err_ack->UNIX_error = unix_err; 2300 2301 /* 2302 * send error ack message 2303 */ 2304 qreply(wq, ackmp); 2305 } 2306 2307 2308 2309 /* 2310 * send T_OK_ACK 2311 * Note: assumes enough memory or caller passed big enough mp 2312 * - no recovery from allocb failures 2313 */ 2314 static void 2315 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2316 { 2317 struct T_ok_ack *ok_ack; 2318 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2319 M_PCPROTO, T_OK_ACK); 2320 2321 if (ackmp == NULL) { 2322 tl_merror(wq, NULL, ENOMEM); 2323 return; 2324 } 2325 2326 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2327 ok_ack->CORRECT_prim = type; 2328 2329 (void) qreply(wq, ackmp); 2330 } 2331 2332 /* 2333 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2334 * This is a wrapper around tl_bind(). 2335 */ 2336 static void 2337 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2338 { 2339 if (! tep->te_closing) 2340 tl_bind(mp, tep); 2341 else 2342 freemsg(mp); 2343 2344 tl_serializer_exit(tep); 2345 tl_refrele(tep); 2346 } 2347 2348 /* 2349 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2350 * Assumes that the endpoint is in the unbound. 2351 */ 2352 static void 2353 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2354 { 2355 queue_t *wq = tep->te_wq; 2356 struct T_bind_ack *b_ack; 2357 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2358 mblk_t *ackmp, *bamp; 2359 soux_addr_t ux_addr; 2360 t_uscalar_t qlen = 0; 2361 t_scalar_t alen, aoff; 2362 tl_addr_t addr_req; 2363 void *addr_startp; 2364 ssize_t msz = MBLKL(mp), basize; 2365 t_scalar_t tli_err = 0, unix_err = 0; 2366 t_scalar_t save_prim_type = bind->PRIM_type; 2367 t_scalar_t save_state = tep->te_state; 2368 2369 if (tep->te_state != TS_UNBND) { 2370 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2371 SL_TRACE|SL_ERROR, 2372 "tl_wput:bind_request:out of state, state=%d", 2373 tep->te_state)); 2374 tli_err = TOUTSTATE; 2375 goto error; 2376 } 2377 2378 if (msz < sizeof (struct T_bind_req)) { 2379 tli_err = TSYSERR; unix_err = EINVAL; 2380 goto error; 2381 } 2382 2383 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2384 2385 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2386 (bind->PRIM_type == T_BIND_REQ)); 2387 2388 alen = bind->ADDR_length; 2389 aoff = bind->ADDR_offset; 2390 2391 /* negotiate max conn req pending */ 2392 if (IS_COTS(tep)) { 2393 qlen = bind->CONIND_number; 2394 if (qlen > tl_maxqlen) 2395 qlen = tl_maxqlen; 2396 } 2397 2398 /* 2399 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2400 * and bound again. 2401 */ 2402 if ((tep->te_hash_hndl == NULL) && 2403 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2404 mod_hash_reserve_nosleep(tep->te_addrhash, 2405 &tep->te_hash_hndl) != 0) { 2406 tli_err = TSYSERR; unix_err = ENOSR; 2407 goto error; 2408 } 2409 2410 /* 2411 * Verify address correctness. 2412 */ 2413 if (IS_SOCKET(tep)) { 2414 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2415 2416 if ((alen != TL_SOUX_ADDRLEN) || 2417 (aoff < 0) || 2418 (aoff + alen > msz)) { 2419 (void) (STRLOG(TL_ID, tep->te_minor, 2420 1, SL_TRACE|SL_ERROR, 2421 "tl_bind: invalid socket addr")); 2422 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2423 tli_err = TSYSERR; unix_err = EINVAL; 2424 goto error; 2425 } 2426 /* Copy address from message to local buffer. */ 2427 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2428 /* 2429 * Check that we got correct address from sockets 2430 */ 2431 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2432 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2433 (void) (STRLOG(TL_ID, tep->te_minor, 2434 1, SL_TRACE|SL_ERROR, 2435 "tl_bind: invalid socket magic")); 2436 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2437 tli_err = TSYSERR; unix_err = EINVAL; 2438 goto error; 2439 } 2440 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2441 (ux_addr.soua_vp != NULL)) { 2442 (void) (STRLOG(TL_ID, tep->te_minor, 2443 1, SL_TRACE|SL_ERROR, 2444 "tl_bind: implicit addr non-empty")); 2445 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2446 tli_err = TSYSERR; unix_err = EINVAL; 2447 goto error; 2448 } 2449 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2450 (ux_addr.soua_vp == NULL)) { 2451 (void) (STRLOG(TL_ID, tep->te_minor, 2452 1, SL_TRACE|SL_ERROR, 2453 "tl_bind: explicit addr empty")); 2454 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2455 tli_err = TSYSERR; unix_err = EINVAL; 2456 goto error; 2457 } 2458 } else { 2459 if ((alen > 0) && ((aoff < 0) || 2460 ((ssize_t)(aoff + alen) > msz) || 2461 ((aoff + alen) < 0))) { 2462 (void) (STRLOG(TL_ID, tep->te_minor, 2463 1, SL_TRACE|SL_ERROR, 2464 "tl_bind: invalid message")); 2465 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2466 tli_err = TSYSERR; unix_err = EINVAL; 2467 goto error; 2468 } 2469 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2470 (void) (STRLOG(TL_ID, tep->te_minor, 2471 1, SL_TRACE|SL_ERROR, 2472 "tl_bind: bad addr in message")); 2473 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2474 tli_err = TBADADDR; 2475 goto error; 2476 } 2477 #ifdef DEBUG 2478 /* 2479 * Mild form of ASSERT()ion to detect broken TPI apps. 2480 * if (! assertion) 2481 * log warning; 2482 */ 2483 if (! ((alen == 0 && aoff == 0) || 2484 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2485 (void) (STRLOG(TL_ID, tep->te_minor, 2486 3, SL_TRACE|SL_ERROR, 2487 "tl_bind: addr overlaps TPI message")); 2488 } 2489 #endif 2490 } 2491 2492 /* 2493 * Bind the address provided or allocate one if requested. 2494 * Allow rebinds with a new qlen value. 2495 */ 2496 if (IS_SOCKET(tep)) { 2497 /* 2498 * For anonymous requests the te_ap is already set up properly 2499 * so use minor number as an address. 2500 * For explicit requests need to check whether the address is 2501 * already in use. 2502 */ 2503 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2504 int rc; 2505 2506 if (tep->te_flag & TL_ADDRHASHED) { 2507 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2508 if (tep->te_vp == ux_addr.soua_vp) 2509 goto skip_addr_bind; 2510 else /* Rebind to a new address. */ 2511 tl_addr_unbind(tep); 2512 } 2513 /* 2514 * Insert address in the hash if it is not already 2515 * there. Since we use preallocated handle, the insert 2516 * can fail only if the key is already present. 2517 */ 2518 rc = mod_hash_insert_reserve(tep->te_addrhash, 2519 (mod_hash_key_t)ux_addr.soua_vp, 2520 (mod_hash_val_t)tep, tep->te_hash_hndl); 2521 2522 if (rc != 0) { 2523 ASSERT(rc == MH_ERR_DUPLICATE); 2524 /* 2525 * Violate O_T_BIND_REQ semantics and fail with 2526 * TADDRBUSY - sockets will not use any address 2527 * other than supplied one for explicit binds. 2528 */ 2529 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2530 SL_TRACE|SL_ERROR, 2531 "tl_bind:requested addr %p is busy", 2532 ux_addr.soua_vp)); 2533 tli_err = TADDRBUSY; unix_err = 0; 2534 goto error; 2535 } 2536 tep->te_uxaddr = ux_addr; 2537 tep->te_flag |= TL_ADDRHASHED; 2538 tep->te_hash_hndl = NULL; 2539 } 2540 } else if (alen == 0) { 2541 /* 2542 * assign any free address 2543 */ 2544 if (! tl_get_any_addr(tep, NULL)) { 2545 (void) (STRLOG(TL_ID, tep->te_minor, 2546 1, SL_TRACE|SL_ERROR, 2547 "tl_bind:failed to get buffer for any " 2548 "address")); 2549 tli_err = TSYSERR; unix_err = ENOSR; 2550 goto error; 2551 } 2552 } else { 2553 addr_req.ta_alen = alen; 2554 addr_req.ta_abuf = (mp->b_rptr + aoff); 2555 addr_req.ta_zoneid = tep->te_zoneid; 2556 2557 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2558 if (tep->te_abuf == NULL) { 2559 tli_err = TSYSERR; unix_err = ENOSR; 2560 goto error; 2561 } 2562 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2563 tep->te_alen = alen; 2564 2565 if (mod_hash_insert_reserve(tep->te_addrhash, 2566 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2567 tep->te_hash_hndl) != 0) { 2568 if (save_prim_type == T_BIND_REQ) { 2569 /* 2570 * The bind semantics for this primitive 2571 * require a failure if the exact address 2572 * requested is busy 2573 */ 2574 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2575 SL_TRACE|SL_ERROR, 2576 "tl_bind:requested addr is busy")); 2577 tli_err = TADDRBUSY; unix_err = 0; 2578 goto error; 2579 } 2580 2581 /* 2582 * O_T_BIND_REQ semantics say if address if requested 2583 * address is busy, bind to any available free address 2584 */ 2585 if (! tl_get_any_addr(tep, &addr_req)) { 2586 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2587 SL_TRACE|SL_ERROR, 2588 "tl_bind:unable to get any addr buf")); 2589 tli_err = TSYSERR; unix_err = ENOMEM; 2590 goto error; 2591 } 2592 } else { 2593 tep->te_flag |= TL_ADDRHASHED; 2594 tep->te_hash_hndl = NULL; 2595 } 2596 } 2597 2598 ASSERT(tep->te_alen >= 0); 2599 2600 skip_addr_bind: 2601 /* 2602 * prepare T_BIND_ACK TPI message 2603 */ 2604 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2605 bamp = reallocb(mp, basize, 0); 2606 if (bamp == NULL) { 2607 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2608 "tl_wput:tl_bind: allocb failed")); 2609 /* 2610 * roll back state changes 2611 */ 2612 tl_addr_unbind(tep); 2613 tep->te_state = TS_UNBND; 2614 tl_memrecover(wq, mp, basize); 2615 return; 2616 } 2617 2618 DB_TYPE(bamp) = M_PCPROTO; 2619 bamp->b_wptr = bamp->b_rptr + basize; 2620 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2621 b_ack->PRIM_type = T_BIND_ACK; 2622 b_ack->CONIND_number = qlen; 2623 b_ack->ADDR_length = tep->te_alen; 2624 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2625 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2626 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2627 2628 if (IS_COTS(tep)) { 2629 tep->te_qlen = qlen; 2630 if (qlen > 0) 2631 tep->te_flag |= TL_LISTENER; 2632 } 2633 2634 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2635 /* 2636 * send T_BIND_ACK message 2637 */ 2638 (void) qreply(wq, bamp); 2639 return; 2640 2641 error: 2642 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2643 if (ackmp == NULL) { 2644 /* 2645 * roll back state changes 2646 */ 2647 tep->te_state = save_state; 2648 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2649 return; 2650 } 2651 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2652 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2653 } 2654 2655 /* 2656 * Process T_UNBIND_REQ. 2657 * Called from serializer. 2658 */ 2659 static void 2660 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2661 { 2662 queue_t *wq; 2663 mblk_t *ackmp; 2664 2665 if (tep->te_closing) { 2666 freemsg(mp); 2667 return; 2668 } 2669 2670 wq = tep->te_wq; 2671 2672 /* 2673 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2674 * ==> allocate for T_ERROR_ACK (known max) 2675 */ 2676 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2677 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2678 return; 2679 } 2680 /* 2681 * memory resources committed 2682 * Note: no message validation. T_UNBIND_REQ message is 2683 * same size as PRIM_type field so already verified earlier. 2684 */ 2685 2686 /* 2687 * validate state 2688 */ 2689 if (tep->te_state != TS_IDLE) { 2690 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2691 SL_TRACE|SL_ERROR, 2692 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2693 tep->te_state)); 2694 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2695 return; 2696 } 2697 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2698 2699 /* 2700 * TPI says on T_UNBIND_REQ: 2701 * send up a M_FLUSH to flush both 2702 * read and write queues 2703 */ 2704 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2705 2706 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2707 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2708 2709 /* 2710 * Sockets use bind with qlen==0 followed by bind() to 2711 * the same address with qlen > 0 for listeners. 2712 * We allow rebind with a new qlen value. 2713 */ 2714 tl_addr_unbind(tep); 2715 } 2716 2717 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2718 /* 2719 * send T_OK_ACK 2720 */ 2721 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2722 } 2723 2724 2725 /* 2726 * Option management code from drv/ip is used here 2727 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2728 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2729 * However, that is what we want as that option is 'unorthodox' 2730 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2731 * and not in T_SVR4_OPTMGMT_REQ/ACK 2732 * Note2: use of optcom_req means this routine is an exception to 2733 * recovery from allocb() failures. 2734 */ 2735 2736 static void 2737 tl_optmgmt(queue_t *wq, mblk_t *mp) 2738 { 2739 tl_endpt_t *tep; 2740 mblk_t *ackmp; 2741 union T_primitives *prim; 2742 cred_t *cr; 2743 2744 tep = (tl_endpt_t *)wq->q_ptr; 2745 prim = (union T_primitives *)mp->b_rptr; 2746 2747 /* 2748 * All Solaris components should pass a db_credp 2749 * for this TPI message, hence we ASSERT. 2750 * But in case there is some other M_PROTO that looks 2751 * like a TPI message sent by some other kernel 2752 * component, we check and return an error. 2753 */ 2754 cr = msg_getcred(mp, NULL); 2755 ASSERT(cr != NULL); 2756 if (cr == NULL) { 2757 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2758 return; 2759 } 2760 2761 /* all states OK for AF_UNIX options ? */ 2762 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2763 prim->type == T_SVR4_OPTMGMT_REQ) { 2764 /* 2765 * Broken TLI semantics that options can only be managed 2766 * in TS_IDLE state. Needed for Sparc ABI test suite that 2767 * tests this TLI (mis)feature using this device driver. 2768 */ 2769 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2770 SL_TRACE|SL_ERROR, 2771 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2772 tep->te_state)); 2773 /* 2774 * preallocate memory for T_ERROR_ACK 2775 */ 2776 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2777 if (! ackmp) { 2778 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2779 return; 2780 } 2781 2782 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2783 freemsg(mp); 2784 return; 2785 } 2786 2787 /* 2788 * call common option management routine from drv/ip 2789 */ 2790 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2791 svr4_optcom_req(wq, mp, cr, &tl_opt_obj); 2792 } else { 2793 ASSERT(prim->type == T_OPTMGMT_REQ); 2794 tpi_optcom_req(wq, mp, cr, &tl_opt_obj); 2795 } 2796 } 2797 2798 /* 2799 * Handle T_conn_req - the driver part of accept(). 2800 * If TL_SET[U]CRED generate the credentials options. 2801 * If this is a socket pass through options unmodified. 2802 * For sockets generate the T_CONN_CON here instead of 2803 * waiting for the T_CONN_RES. 2804 */ 2805 static void 2806 tl_conn_req(queue_t *wq, mblk_t *mp) 2807 { 2808 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2809 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2810 ssize_t msz = MBLKL(mp); 2811 t_scalar_t alen, aoff, olen, ooff, err = 0; 2812 tl_endpt_t *peer_tep = NULL; 2813 mblk_t *ackmp; 2814 mblk_t *dimp; 2815 struct T_discon_ind *di; 2816 soux_addr_t ux_addr; 2817 tl_addr_t dst; 2818 2819 ASSERT(IS_COTS(tep)); 2820 2821 if (tep->te_closing) { 2822 freemsg(mp); 2823 return; 2824 } 2825 2826 /* 2827 * preallocate memory for: 2828 * 1. max of T_ERROR_ACK and T_OK_ACK 2829 * ==> known max T_ERROR_ACK 2830 * 2. max of T_DISCON_IND and T_CONN_IND 2831 */ 2832 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2833 if (! ackmp) { 2834 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2835 return; 2836 } 2837 /* 2838 * memory committed for T_OK_ACK/T_ERROR_ACK now 2839 * will be committed for T_DISCON_IND/T_CONN_IND later 2840 */ 2841 2842 if (tep->te_state != TS_IDLE) { 2843 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2844 SL_TRACE|SL_ERROR, 2845 "tl_wput:T_CONN_REQ:out of state, state=%d", 2846 tep->te_state)); 2847 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2848 freemsg(mp); 2849 return; 2850 } 2851 2852 /* 2853 * validate the message 2854 * Note: dereference fields in struct inside message only 2855 * after validating the message length. 2856 */ 2857 if (msz < sizeof (struct T_conn_req)) { 2858 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2859 "tl_conn_req:invalid message length")); 2860 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2861 freemsg(mp); 2862 return; 2863 } 2864 alen = creq->DEST_length; 2865 aoff = creq->DEST_offset; 2866 olen = creq->OPT_length; 2867 ooff = creq->OPT_offset; 2868 if (olen == 0) 2869 ooff = 0; 2870 2871 if (IS_SOCKET(tep)) { 2872 if ((alen != TL_SOUX_ADDRLEN) || 2873 (aoff < 0) || 2874 (aoff + alen > msz) || 2875 (alen > msz - sizeof (struct T_conn_req))) { 2876 (void) (STRLOG(TL_ID, tep->te_minor, 2877 1, SL_TRACE|SL_ERROR, 2878 "tl_conn_req: invalid socket addr")); 2879 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2880 freemsg(mp); 2881 return; 2882 } 2883 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2884 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2885 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2886 (void) (STRLOG(TL_ID, tep->te_minor, 2887 1, SL_TRACE|SL_ERROR, 2888 "tl_conn_req: invalid socket magic")); 2889 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2890 freemsg(mp); 2891 return; 2892 } 2893 } else { 2894 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2895 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2896 ooff + olen < 0)) || 2897 olen < 0 || ooff < 0) { 2898 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2899 SL_TRACE|SL_ERROR, 2900 "tl_conn_req:invalid message")); 2901 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2902 freemsg(mp); 2903 return; 2904 } 2905 2906 if (alen <= 0 || aoff < 0 || 2907 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2908 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2909 SL_TRACE|SL_ERROR, 2910 "tl_conn_req:bad addr in message, " 2911 "alen=%d, msz=%ld", 2912 alen, msz)); 2913 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2914 freemsg(mp); 2915 return; 2916 } 2917 #ifdef DEBUG 2918 /* 2919 * Mild form of ASSERT()ion to detect broken TPI apps. 2920 * if (! assertion) 2921 * log warning; 2922 */ 2923 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2924 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2925 SL_TRACE|SL_ERROR, 2926 "tl_conn_req: addr overlaps TPI message")); 2927 } 2928 #endif 2929 if (olen) { 2930 /* 2931 * no opts in connect req 2932 * supported in this provider except for sockets. 2933 */ 2934 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2935 SL_TRACE|SL_ERROR, 2936 "tl_conn_req:options not supported " 2937 "in message")); 2938 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2939 freemsg(mp); 2940 return; 2941 } 2942 } 2943 2944 /* 2945 * Prevent tep from closing on us. 2946 */ 2947 if (! tl_noclose(tep)) { 2948 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2949 "tl_conn_req:endpoint is closing")); 2950 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2951 freemsg(mp); 2952 return; 2953 } 2954 2955 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2956 /* 2957 * get endpoint to connect to 2958 * check that peer with DEST addr is bound to addr 2959 * and has CONIND_number > 0 2960 */ 2961 dst.ta_alen = alen; 2962 dst.ta_abuf = mp->b_rptr + aoff; 2963 dst.ta_zoneid = tep->te_zoneid; 2964 2965 /* 2966 * Verify if remote addr is in use 2967 */ 2968 peer_tep = (IS_SOCKET(tep) ? 2969 tl_sock_find_peer(tep, &ux_addr) : 2970 tl_find_peer(tep, &dst)); 2971 2972 if (peer_tep == NULL) { 2973 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2974 "tl_conn_req:no one at connect address")); 2975 err = ECONNREFUSED; 2976 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2977 /* 2978 * validate that number of incoming connection is 2979 * not to capacity on destination endpoint 2980 */ 2981 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2982 "tl_conn_req: qlen overflow connection refused")); 2983 err = ECONNREFUSED; 2984 } 2985 2986 /* 2987 * Send T_DISCON_IND in case of error 2988 */ 2989 if (err != 0) { 2990 if (peer_tep != NULL) 2991 tl_refrele(peer_tep); 2992 /* We are still expected to send T_OK_ACK */ 2993 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2994 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2995 tl_closeok(tep); 2996 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2997 M_PROTO, T_DISCON_IND); 2998 if (dimp == NULL) { 2999 tl_merror(wq, NULL, ENOSR); 3000 return; 3001 } 3002 di = (struct T_discon_ind *)dimp->b_rptr; 3003 di->DISCON_reason = err; 3004 di->SEQ_number = BADSEQNUM; 3005 3006 tep->te_state = TS_IDLE; 3007 /* 3008 * send T_DISCON_IND message 3009 */ 3010 putnext(tep->te_rq, dimp); 3011 return; 3012 } 3013 3014 ASSERT(IS_COTS(peer_tep)); 3015 3016 /* 3017 * Found the listener. At this point processing will continue on 3018 * listener serializer. Close of the endpoint should be blocked while we 3019 * switch serializers. 3020 */ 3021 tl_serializer_refhold(peer_tep->te_ser); 3022 tl_serializer_refrele(tep->te_ser); 3023 tep->te_ser = peer_tep->te_ser; 3024 ASSERT(tep->te_oconp == NULL); 3025 tep->te_oconp = peer_tep; 3026 3027 /* 3028 * It is safe to close now. Close may continue on listener serializer. 3029 */ 3030 tl_closeok(tep); 3031 3032 /* 3033 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3034 * data, so we link mp to ackmp. 3035 */ 3036 ackmp->b_cont = mp; 3037 mp = ackmp; 3038 3039 tl_refhold(tep); 3040 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3041 } 3042 3043 /* 3044 * Finish T_CONN_REQ processing on listener serializer. 3045 */ 3046 static void 3047 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3048 { 3049 queue_t *wq; 3050 tl_endpt_t *peer_tep = tep->te_oconp; 3051 mblk_t *confmp, *cimp, *indmp; 3052 void *opts = NULL; 3053 mblk_t *ackmp = mp; 3054 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3055 struct T_conn_ind *ci; 3056 tl_icon_t *tip; 3057 void *addr_startp; 3058 t_scalar_t olen = creq->OPT_length; 3059 t_scalar_t ooff = creq->OPT_offset; 3060 size_t ci_msz; 3061 size_t size; 3062 cred_t *cr = NULL; 3063 pid_t cpid; 3064 3065 if (tep->te_closing) { 3066 TL_UNCONNECT(tep->te_oconp); 3067 tl_serializer_exit(tep); 3068 tl_refrele(tep); 3069 freemsg(mp); 3070 return; 3071 } 3072 3073 wq = tep->te_wq; 3074 tep->te_flag |= TL_EAGER; 3075 3076 /* 3077 * Extract preallocated ackmp from mp. 3078 */ 3079 mp = mp->b_cont; 3080 ackmp->b_cont = NULL; 3081 3082 if (olen == 0) 3083 ooff = 0; 3084 3085 if (peer_tep->te_closing || 3086 !((peer_tep->te_state == TS_IDLE) || 3087 (peer_tep->te_state == TS_WRES_CIND))) { 3088 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3089 "tl_conn_req:peer in bad state (%d)", 3090 peer_tep->te_state)); 3091 TL_UNCONNECT(tep->te_oconp); 3092 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3093 freemsg(ackmp); 3094 tl_serializer_exit(tep); 3095 tl_refrele(tep); 3096 return; 3097 } 3098 3099 /* 3100 * preallocate now for T_DISCON_IND or T_CONN_IND 3101 */ 3102 /* 3103 * calculate length of T_CONN_IND message 3104 */ 3105 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3106 cr = msg_getcred(mp, &cpid); 3107 ASSERT(cr != NULL); 3108 if (peer_tep->te_flag & TL_SETCRED) { 3109 ooff = 0; 3110 olen = (t_scalar_t) sizeof (struct opthdr) + 3111 OPTLEN(sizeof (tl_credopt_t)); 3112 /* 1 option only */ 3113 } else { 3114 ooff = 0; 3115 olen = (t_scalar_t)sizeof (struct opthdr) + 3116 OPTLEN(ucredminsize(cr)); 3117 /* 1 option only */ 3118 } 3119 } 3120 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3121 ci_msz = T_ALIGN(ci_msz) + olen; 3122 size = max(ci_msz, sizeof (struct T_discon_ind)); 3123 3124 /* 3125 * Save options from mp - we'll need them for T_CONN_IND. 3126 */ 3127 if (ooff != 0) { 3128 opts = kmem_alloc(olen, KM_NOSLEEP); 3129 if (opts == NULL) { 3130 /* 3131 * roll back state changes 3132 */ 3133 tep->te_state = TS_IDLE; 3134 tl_memrecover(wq, mp, size); 3135 freemsg(ackmp); 3136 TL_UNCONNECT(tep->te_oconp); 3137 tl_serializer_exit(tep); 3138 tl_refrele(tep); 3139 return; 3140 } 3141 /* Copy options to a temp buffer */ 3142 bcopy(mp->b_rptr + ooff, opts, olen); 3143 } 3144 3145 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3146 /* 3147 * Generate a T_CONN_CON that has the identical address 3148 * (and options) as the T_CONN_REQ. 3149 * NOTE: assumes that the T_conn_req and T_conn_con structures 3150 * are isomorphic. 3151 */ 3152 confmp = copyb(mp); 3153 if (! confmp) { 3154 /* 3155 * roll back state changes 3156 */ 3157 tep->te_state = TS_IDLE; 3158 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3159 freemsg(ackmp); 3160 if (opts != NULL) 3161 kmem_free(opts, olen); 3162 TL_UNCONNECT(tep->te_oconp); 3163 tl_serializer_exit(tep); 3164 tl_refrele(tep); 3165 return; 3166 } 3167 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3168 T_CONN_CON; 3169 } else { 3170 confmp = NULL; 3171 } 3172 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3173 /* 3174 * roll back state changes 3175 */ 3176 tep->te_state = TS_IDLE; 3177 tl_memrecover(wq, mp, size); 3178 freemsg(ackmp); 3179 if (opts != NULL) 3180 kmem_free(opts, olen); 3181 freemsg(confmp); 3182 TL_UNCONNECT(tep->te_oconp); 3183 tl_serializer_exit(tep); 3184 tl_refrele(tep); 3185 return; 3186 } 3187 3188 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3189 if (tip == NULL) { 3190 /* 3191 * roll back state changes 3192 */ 3193 tep->te_state = TS_IDLE; 3194 tl_memrecover(wq, indmp, sizeof (*tip)); 3195 freemsg(ackmp); 3196 if (opts != NULL) 3197 kmem_free(opts, olen); 3198 freemsg(confmp); 3199 TL_UNCONNECT(tep->te_oconp); 3200 tl_serializer_exit(tep); 3201 tl_refrele(tep); 3202 return; 3203 } 3204 tip->ti_mp = NULL; 3205 3206 /* 3207 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3208 * and tl_icon_t cell. 3209 */ 3210 3211 /* 3212 * ack validity of request and send the peer credential in the ACK. 3213 */ 3214 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3215 3216 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3217 confmp != NULL) { 3218 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3219 } 3220 3221 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3222 3223 /* 3224 * prepare message to send T_CONN_IND 3225 */ 3226 /* 3227 * allocate the message - original data blocks retained 3228 * in the returned mblk 3229 */ 3230 cimp = tl_resizemp(indmp, size); 3231 if (! cimp) { 3232 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3233 "tl_conn_req:con_ind:allocb failure")); 3234 tl_merror(wq, indmp, ENOMEM); 3235 TL_UNCONNECT(tep->te_oconp); 3236 tl_serializer_exit(tep); 3237 tl_refrele(tep); 3238 if (opts != NULL) 3239 kmem_free(opts, olen); 3240 freemsg(confmp); 3241 ASSERT(tip->ti_mp == NULL); 3242 kmem_free(tip, sizeof (*tip)); 3243 return; 3244 } 3245 3246 DB_TYPE(cimp) = M_PROTO; 3247 ci = (struct T_conn_ind *)cimp->b_rptr; 3248 ci->PRIM_type = T_CONN_IND; 3249 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3250 ci->SRC_length = tep->te_alen; 3251 ci->SEQ_number = tep->te_seqno; 3252 3253 addr_startp = cimp->b_rptr + ci->SRC_offset; 3254 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3255 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3256 3257 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3258 ci->SRC_length); 3259 ci->OPT_length = olen; /* because only 1 option */ 3260 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3261 cr, cpid, 3262 peer_tep->te_flag, peer_tep->te_credp); 3263 } else if (ooff != 0) { 3264 /* Copy option from T_CONN_REQ */ 3265 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3266 ci->SRC_length); 3267 ci->OPT_length = olen; 3268 ASSERT(opts != NULL); 3269 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3270 } else { 3271 ci->OPT_offset = 0; 3272 ci->OPT_length = 0; 3273 } 3274 if (opts != NULL) 3275 kmem_free(opts, olen); 3276 3277 /* 3278 * register connection request with server peer 3279 * append to list of incoming connections 3280 * increment references for both peer_tep and tep: peer_tep is placed on 3281 * te_oconp and tep is placed on listeners queue. 3282 */ 3283 tip->ti_tep = tep; 3284 tip->ti_seqno = tep->te_seqno; 3285 list_insert_tail(&peer_tep->te_iconp, tip); 3286 peer_tep->te_nicon++; 3287 3288 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3289 /* 3290 * send the T_CONN_IND message 3291 */ 3292 putnext(peer_tep->te_rq, cimp); 3293 3294 /* 3295 * Send a T_CONN_CON message for sockets. 3296 * Disable the queues until we have reached the correct state! 3297 */ 3298 if (confmp != NULL) { 3299 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3300 noenable(wq); 3301 putnext(tep->te_rq, confmp); 3302 } 3303 /* 3304 * Now we need to increment tep reference because tep is referenced by 3305 * server list of pending connections. We also need to decrement 3306 * reference before exiting serializer. Two operations void each other 3307 * so we don't modify reference at all. 3308 */ 3309 ASSERT(tep->te_refcnt >= 2); 3310 ASSERT(peer_tep->te_refcnt >= 2); 3311 tl_serializer_exit(tep); 3312 } 3313 3314 3315 3316 /* 3317 * Handle T_conn_res on listener stream. Called on listener serializer. 3318 * tl_conn_req has already generated the T_CONN_CON. 3319 * tl_conn_res is called on listener serializer. 3320 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3321 * Switch eager serializer to acceptor's. 3322 * 3323 * If TL_SET[U]CRED generate the credentials options. 3324 * For sockets tl_conn_req has already generated the T_CONN_CON. 3325 */ 3326 static void 3327 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3328 { 3329 queue_t *wq; 3330 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3331 ssize_t msz = MBLKL(mp); 3332 t_scalar_t olen, ooff, err = 0; 3333 t_scalar_t prim = cres->PRIM_type; 3334 uchar_t *addr_startp; 3335 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3336 tl_icon_t *tip; 3337 size_t size; 3338 mblk_t *ackmp, *respmp; 3339 mblk_t *dimp, *ccmp = NULL; 3340 struct T_discon_ind *di; 3341 struct T_conn_con *cc; 3342 boolean_t client_noclose_set = B_FALSE; 3343 boolean_t switch_client_serializer = B_TRUE; 3344 3345 ASSERT(IS_COTS(tep)); 3346 3347 if (tep->te_closing) { 3348 freemsg(mp); 3349 return; 3350 } 3351 3352 wq = tep->te_wq; 3353 3354 /* 3355 * preallocate memory for: 3356 * 1. max of T_ERROR_ACK and T_OK_ACK 3357 * ==> known max T_ERROR_ACK 3358 * 2. max of T_DISCON_IND and T_CONN_CON 3359 */ 3360 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3361 if (! ackmp) { 3362 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3363 return; 3364 } 3365 /* 3366 * memory committed for T_OK_ACK/T_ERROR_ACK now 3367 * will be committed for T_DISCON_IND/T_CONN_CON later 3368 */ 3369 3370 3371 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3372 3373 /* 3374 * validate state 3375 */ 3376 if (tep->te_state != TS_WRES_CIND) { 3377 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3378 SL_TRACE|SL_ERROR, 3379 "tl_wput:T_CONN_RES:out of state, state=%d", 3380 tep->te_state)); 3381 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3382 freemsg(mp); 3383 return; 3384 } 3385 3386 /* 3387 * validate the message 3388 * Note: dereference fields in struct inside message only 3389 * after validating the message length. 3390 */ 3391 if (msz < sizeof (struct T_conn_res)) { 3392 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3393 "tl_conn_res:invalid message length")); 3394 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3395 freemsg(mp); 3396 return; 3397 } 3398 olen = cres->OPT_length; 3399 ooff = cres->OPT_offset; 3400 if (((olen > 0) && ((ooff + olen) > msz))) { 3401 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3402 "tl_conn_res:invalid message")); 3403 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3404 freemsg(mp); 3405 return; 3406 } 3407 if (olen) { 3408 /* 3409 * no opts in connect res 3410 * supported in this provider 3411 */ 3412 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3413 "tl_conn_res:options not supported in message")); 3414 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3415 freemsg(mp); 3416 return; 3417 } 3418 3419 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3420 ASSERT(tep->te_state == TS_WACK_CRES); 3421 3422 if (cres->SEQ_number < TL_MINOR_START && 3423 cres->SEQ_number >= BADSEQNUM) { 3424 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3425 "tl_conn_res:remote endpoint sequence number bad")); 3426 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3427 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3428 freemsg(mp); 3429 return; 3430 } 3431 3432 /* 3433 * find accepting endpoint. Will have extra reference if found. 3434 */ 3435 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3436 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3437 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3438 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3439 "tl_conn_res:bad accepting endpoint")); 3440 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3441 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3442 freemsg(mp); 3443 return; 3444 } 3445 3446 /* 3447 * Prevent acceptor from closing. 3448 */ 3449 if (! tl_noclose(acc_ep)) { 3450 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3451 "tl_conn_res:bad accepting endpoint")); 3452 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3453 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3454 tl_refrele(acc_ep); 3455 freemsg(mp); 3456 return; 3457 } 3458 3459 acc_ep->te_flag |= TL_ACCEPTOR; 3460 3461 /* 3462 * validate that accepting endpoint, if different from listening 3463 * has address bound => state is TS_IDLE 3464 * TROUBLE in XPG4 !!? 3465 */ 3466 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3467 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3468 "tl_conn_res:accepting endpoint has no address bound," 3469 "state=%d", acc_ep->te_state)); 3470 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3471 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3472 freemsg(mp); 3473 tl_closeok(acc_ep); 3474 tl_refrele(acc_ep); 3475 return; 3476 } 3477 3478 /* 3479 * validate if accepting endpt same as listening, then 3480 * no other incoming connection should be on the queue 3481 */ 3482 3483 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3484 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3485 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3486 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3487 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3488 freemsg(mp); 3489 tl_closeok(acc_ep); 3490 tl_refrele(acc_ep); 3491 return; 3492 } 3493 3494 /* 3495 * Mark for deletion, the entry corresponding to client 3496 * on list of pending connections made by the listener 3497 * search list to see if client is one of the 3498 * recorded as a listener. 3499 */ 3500 tip = tl_icon_find(tep, cres->SEQ_number); 3501 if (tip == NULL) { 3502 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3503 "tl_conn_res:no client in listener list")); 3504 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3505 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3506 freemsg(mp); 3507 tl_closeok(acc_ep); 3508 tl_refrele(acc_ep); 3509 return; 3510 } 3511 3512 /* 3513 * If ti_tep is NULL the client has already closed. In this case 3514 * the code below will avoid any action on the client side 3515 * but complete the server and acceptor state transitions. 3516 */ 3517 ASSERT(tip->ti_tep == NULL || 3518 tip->ti_tep->te_seqno == cres->SEQ_number); 3519 cl_ep = tip->ti_tep; 3520 3521 /* 3522 * If the client is present it is switched from listener's to acceptor's 3523 * serializer. We should block client closes while serializers are 3524 * being switched. 3525 * 3526 * It is possible that the client is present but is currently being 3527 * closed. There are two possible cases: 3528 * 3529 * 1) The client has already entered tl_close_finish_ser() and sent 3530 * T_ORDREL_IND. In this case we can just ignore the client (but we 3531 * still need to send all messages from tip->ti_mp to the acceptor). 3532 * 3533 * 2) The client started the close but has not entered 3534 * tl_close_finish_ser() yet. In this case, the client is already 3535 * proceeding asynchronously on the listener's serializer, so we're 3536 * forced to change the acceptor to use the listener's serializer to 3537 * ensure that any operations on the acceptor are serialized with 3538 * respect to the close that's in-progress. 3539 */ 3540 if (cl_ep != NULL) { 3541 if (tl_noclose(cl_ep)) { 3542 client_noclose_set = B_TRUE; 3543 } else { 3544 /* 3545 * Client is closing. If it it has sent the 3546 * T_ORDREL_IND, we can simply ignore it - otherwise, 3547 * we have to let let the client continue until it is 3548 * sent. 3549 * 3550 * If we do continue using the client, acceptor will 3551 * switch to client's serializer which is used by client 3552 * for its close. 3553 */ 3554 tl_client_closing_when_accepting++; 3555 switch_client_serializer = B_FALSE; 3556 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3557 cl_ep->te_state == -1) 3558 cl_ep = NULL; 3559 } 3560 } 3561 3562 if (cl_ep != NULL) { 3563 /* 3564 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3565 * (latter for sockets only) 3566 */ 3567 if (cl_ep->te_state != TS_WCON_CREQ && 3568 (cl_ep->te_state != TS_DATA_XFER && 3569 IS_SOCKET(cl_ep))) { 3570 err = ECONNREFUSED; 3571 /* 3572 * T_DISCON_IND sent later after committing memory 3573 * and acking validity of request 3574 */ 3575 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3576 "tl_conn_res:peer in bad state")); 3577 } 3578 3579 /* 3580 * preallocate now for T_DISCON_IND or T_CONN_CONN 3581 * ack validity of request (T_OK_ACK) after memory committed 3582 */ 3583 3584 if (err) 3585 size = sizeof (struct T_discon_ind); 3586 else { 3587 /* 3588 * calculate length of T_CONN_CON message 3589 */ 3590 olen = 0; 3591 if (cl_ep->te_flag & TL_SETCRED) { 3592 olen = (t_scalar_t)sizeof (struct opthdr) + 3593 OPTLEN(sizeof (tl_credopt_t)); 3594 } else if (cl_ep->te_flag & TL_SETUCRED) { 3595 olen = (t_scalar_t)sizeof (struct opthdr) + 3596 OPTLEN(ucredminsize(acc_ep->te_credp)); 3597 } 3598 size = T_ALIGN(sizeof (struct T_conn_con) + 3599 acc_ep->te_alen) + olen; 3600 } 3601 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3602 /* 3603 * roll back state changes 3604 */ 3605 tep->te_state = TS_WRES_CIND; 3606 tl_memrecover(wq, mp, size); 3607 freemsg(ackmp); 3608 if (client_noclose_set) 3609 tl_closeok(cl_ep); 3610 tl_closeok(acc_ep); 3611 tl_refrele(acc_ep); 3612 return; 3613 } 3614 mp = NULL; 3615 } 3616 3617 /* 3618 * Now ack validity of request 3619 */ 3620 if (tep->te_nicon == 1) { 3621 if (tep == acc_ep) 3622 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3623 else 3624 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3625 } else 3626 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3627 3628 /* 3629 * send T_DISCON_IND now if client state validation failed earlier 3630 */ 3631 if (err) { 3632 tl_ok_ack(wq, ackmp, prim); 3633 /* 3634 * flush the queues - why always ? 3635 */ 3636 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3637 3638 dimp = tl_resizemp(respmp, size); 3639 if (! dimp) { 3640 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3641 SL_TRACE|SL_ERROR, 3642 "tl_conn_res:con_ind:allocb failure")); 3643 tl_merror(wq, respmp, ENOMEM); 3644 tl_closeok(acc_ep); 3645 if (client_noclose_set) 3646 tl_closeok(cl_ep); 3647 tl_refrele(acc_ep); 3648 return; 3649 } 3650 if (dimp->b_cont) { 3651 /* no user data in provider generated discon ind */ 3652 freemsg(dimp->b_cont); 3653 dimp->b_cont = NULL; 3654 } 3655 3656 DB_TYPE(dimp) = M_PROTO; 3657 di = (struct T_discon_ind *)dimp->b_rptr; 3658 di->PRIM_type = T_DISCON_IND; 3659 di->DISCON_reason = err; 3660 di->SEQ_number = BADSEQNUM; 3661 3662 tep->te_state = TS_IDLE; 3663 /* 3664 * send T_DISCON_IND message 3665 */ 3666 putnext(acc_ep->te_rq, dimp); 3667 if (client_noclose_set) 3668 tl_closeok(cl_ep); 3669 tl_closeok(acc_ep); 3670 tl_refrele(acc_ep); 3671 return; 3672 } 3673 3674 /* 3675 * now start connecting the accepting endpoint 3676 */ 3677 if (tep != acc_ep) 3678 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3679 3680 if (cl_ep == NULL) { 3681 /* 3682 * The client has already closed. Send up any queued messages 3683 * and change the state accordingly. 3684 */ 3685 tl_ok_ack(wq, ackmp, prim); 3686 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3687 3688 /* 3689 * remove endpoint from incoming connection 3690 * delete client from list of incoming connections 3691 */ 3692 tl_freetip(tep, tip); 3693 freemsg(mp); 3694 tl_closeok(acc_ep); 3695 tl_refrele(acc_ep); 3696 return; 3697 } else if (tip->ti_mp != NULL) { 3698 /* 3699 * The client could have queued a T_DISCON_IND which needs 3700 * to be sent up. 3701 * Note that t_discon_req can not operate the same as 3702 * t_data_req since it is not possible for it to putbq 3703 * the message and return -1 due to the use of qwriter. 3704 */ 3705 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3706 } 3707 3708 /* 3709 * prepare connect confirm T_CONN_CON message 3710 */ 3711 3712 /* 3713 * allocate the message - original data blocks 3714 * retained in the returned mblk 3715 */ 3716 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3717 ccmp = tl_resizemp(respmp, size); 3718 if (ccmp == NULL) { 3719 tl_ok_ack(wq, ackmp, prim); 3720 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3721 SL_TRACE|SL_ERROR, 3722 "tl_conn_res:conn_con:allocb failure")); 3723 tl_merror(wq, respmp, ENOMEM); 3724 tl_closeok(acc_ep); 3725 if (client_noclose_set) 3726 tl_closeok(cl_ep); 3727 tl_refrele(acc_ep); 3728 return; 3729 } 3730 3731 DB_TYPE(ccmp) = M_PROTO; 3732 cc = (struct T_conn_con *)ccmp->b_rptr; 3733 cc->PRIM_type = T_CONN_CON; 3734 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3735 cc->RES_length = acc_ep->te_alen; 3736 addr_startp = ccmp->b_rptr + cc->RES_offset; 3737 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3738 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3739 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3740 cc->RES_length); 3741 cc->OPT_length = olen; 3742 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3743 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3744 cl_ep->te_credp); 3745 } else { 3746 cc->OPT_offset = 0; 3747 cc->OPT_length = 0; 3748 } 3749 /* 3750 * Forward the credential in the packet so it can be picked up 3751 * at the higher layers for more complete credential processing 3752 */ 3753 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3754 } else { 3755 freemsg(respmp); 3756 respmp = NULL; 3757 } 3758 3759 /* 3760 * make connection linking 3761 * accepting and client endpoints 3762 * No need to increment references: 3763 * on client: it should already have one from tip->ti_tep linkage. 3764 * on acceptor is should already have one from the table lookup. 3765 * 3766 * At this point both client and acceptor can't close. Set client 3767 * serializer to acceptor's. 3768 */ 3769 ASSERT(cl_ep->te_refcnt >= 2); 3770 ASSERT(acc_ep->te_refcnt >= 2); 3771 ASSERT(cl_ep->te_conp == NULL); 3772 ASSERT(acc_ep->te_conp == NULL); 3773 cl_ep->te_conp = acc_ep; 3774 acc_ep->te_conp = cl_ep; 3775 ASSERT(cl_ep->te_ser == tep->te_ser); 3776 if (switch_client_serializer) { 3777 mutex_enter(&cl_ep->te_ser_lock); 3778 if (cl_ep->te_ser_count > 0) { 3779 switch_client_serializer = B_FALSE; 3780 tl_serializer_noswitch++; 3781 } else { 3782 /* 3783 * Move client to the acceptor's serializer. 3784 */ 3785 tl_serializer_refhold(acc_ep->te_ser); 3786 tl_serializer_refrele(cl_ep->te_ser); 3787 cl_ep->te_ser = acc_ep->te_ser; 3788 } 3789 mutex_exit(&cl_ep->te_ser_lock); 3790 } 3791 if (!switch_client_serializer) { 3792 /* 3793 * It is not possible to switch client to use acceptor's. 3794 * Move acceptor to client's serializer (which is the same as 3795 * listener's). 3796 */ 3797 tl_serializer_refhold(cl_ep->te_ser); 3798 tl_serializer_refrele(acc_ep->te_ser); 3799 acc_ep->te_ser = cl_ep->te_ser; 3800 } 3801 3802 TL_REMOVE_PEER(cl_ep->te_oconp); 3803 TL_REMOVE_PEER(acc_ep->te_oconp); 3804 3805 /* 3806 * remove endpoint from incoming connection 3807 * delete client from list of incoming connections 3808 */ 3809 tip->ti_tep = NULL; 3810 tl_freetip(tep, tip); 3811 tl_ok_ack(wq, ackmp, prim); 3812 3813 /* 3814 * data blocks already linked in reallocb() 3815 */ 3816 3817 /* 3818 * link queues so that I_SENDFD will work 3819 */ 3820 if (! IS_SOCKET(tep)) { 3821 acc_ep->te_wq->q_next = cl_ep->te_rq; 3822 cl_ep->te_wq->q_next = acc_ep->te_rq; 3823 } 3824 3825 /* 3826 * send T_CONN_CON up on client side unless it was already 3827 * done (for a socket). In cases any data or ordrel req has been 3828 * queued make sure that the service procedure runs. 3829 */ 3830 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3831 enableok(cl_ep->te_wq); 3832 TL_QENABLE(cl_ep); 3833 if (ccmp != NULL) 3834 freemsg(ccmp); 3835 } else { 3836 /* 3837 * change client state on TE_CONN_CON event 3838 */ 3839 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3840 putnext(cl_ep->te_rq, ccmp); 3841 } 3842 3843 /* Mark the both endpoints as accepted */ 3844 cl_ep->te_flag |= TL_ACCEPTED; 3845 acc_ep->te_flag |= TL_ACCEPTED; 3846 3847 /* 3848 * Allow client and acceptor to close. 3849 */ 3850 tl_closeok(acc_ep); 3851 if (client_noclose_set) 3852 tl_closeok(cl_ep); 3853 } 3854 3855 3856 3857 3858 static void 3859 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3860 { 3861 queue_t *wq; 3862 struct T_discon_req *dr; 3863 ssize_t msz; 3864 tl_endpt_t *peer_tep = tep->te_conp; 3865 tl_endpt_t *srv_tep = tep->te_oconp; 3866 tl_icon_t *tip; 3867 size_t size; 3868 mblk_t *ackmp, *dimp, *respmp; 3869 struct T_discon_ind *di; 3870 t_scalar_t save_state, new_state; 3871 3872 if (tep->te_closing) { 3873 freemsg(mp); 3874 return; 3875 } 3876 3877 if ((peer_tep != NULL) && peer_tep->te_closing) { 3878 TL_UNCONNECT(tep->te_conp); 3879 peer_tep = NULL; 3880 } 3881 if ((srv_tep != NULL) && srv_tep->te_closing) { 3882 TL_UNCONNECT(tep->te_oconp); 3883 srv_tep = NULL; 3884 } 3885 3886 wq = tep->te_wq; 3887 3888 /* 3889 * preallocate memory for: 3890 * 1. max of T_ERROR_ACK and T_OK_ACK 3891 * ==> known max T_ERROR_ACK 3892 * 2. for T_DISCON_IND 3893 */ 3894 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3895 if (! ackmp) { 3896 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3897 return; 3898 } 3899 /* 3900 * memory committed for T_OK_ACK/T_ERROR_ACK now 3901 * will be committed for T_DISCON_IND later 3902 */ 3903 3904 dr = (struct T_discon_req *)mp->b_rptr; 3905 msz = MBLKL(mp); 3906 3907 /* 3908 * validate the state 3909 */ 3910 save_state = new_state = tep->te_state; 3911 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3912 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3913 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3914 SL_TRACE|SL_ERROR, 3915 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3916 tep->te_state)); 3917 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3918 freemsg(mp); 3919 return; 3920 } 3921 /* 3922 * Defer committing the state change until it is determined if 3923 * the message will be queued with the tl_icon or not. 3924 */ 3925 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3926 3927 /* validate the message */ 3928 if (msz < sizeof (struct T_discon_req)) { 3929 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3930 "tl_discon_req:invalid message")); 3931 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3932 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3933 freemsg(mp); 3934 return; 3935 } 3936 3937 /* 3938 * if server, then validate that client exists 3939 * by connection sequence number etc. 3940 */ 3941 if (tep->te_nicon > 0) { /* server */ 3942 3943 /* 3944 * search server list for disconnect client 3945 */ 3946 tip = tl_icon_find(tep, dr->SEQ_number); 3947 if (tip == NULL) { 3948 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3949 SL_TRACE|SL_ERROR, 3950 "tl_discon_req:no disconnect endpoint")); 3951 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3952 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3953 freemsg(mp); 3954 return; 3955 } 3956 /* 3957 * If ti_tep is NULL the client has already closed. In this case 3958 * the code below will avoid any action on the client side. 3959 */ 3960 3961 ASSERT(IMPLY(tip->ti_tep != NULL, 3962 tip->ti_tep->te_seqno == dr->SEQ_number)); 3963 peer_tep = tip->ti_tep; 3964 } 3965 3966 /* 3967 * preallocate now for T_DISCON_IND 3968 * ack validity of request (T_OK_ACK) after memory committed 3969 */ 3970 size = sizeof (struct T_discon_ind); 3971 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3972 tl_memrecover(wq, mp, size); 3973 freemsg(ackmp); 3974 return; 3975 } 3976 3977 /* 3978 * prepare message to ack validity of request 3979 */ 3980 if (tep->te_nicon == 0) 3981 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3982 else 3983 if (tep->te_nicon == 1) 3984 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3985 else 3986 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3987 3988 /* 3989 * Flushing queues according to TPI. Using the old state. 3990 */ 3991 if ((tep->te_nicon <= 1) && 3992 ((save_state == TS_DATA_XFER) || 3993 (save_state == TS_WIND_ORDREL) || 3994 (save_state == TS_WREQ_ORDREL))) 3995 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3996 3997 /* send T_OK_ACK up */ 3998 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3999 4000 /* 4001 * now do disconnect business 4002 */ 4003 if (tep->te_nicon > 0) { /* listener */ 4004 if (peer_tep != NULL && !peer_tep->te_closing) { 4005 /* 4006 * disconnect incoming connect request pending to tep 4007 */ 4008 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4009 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4010 SL_TRACE|SL_ERROR, 4011 "tl_discon_req: reallocb failed")); 4012 tep->te_state = new_state; 4013 tl_merror(wq, respmp, ENOMEM); 4014 return; 4015 } 4016 di = (struct T_discon_ind *)dimp->b_rptr; 4017 di->SEQ_number = BADSEQNUM; 4018 save_state = peer_tep->te_state; 4019 peer_tep->te_state = TS_IDLE; 4020 4021 TL_REMOVE_PEER(peer_tep->te_oconp); 4022 enableok(peer_tep->te_wq); 4023 TL_QENABLE(peer_tep); 4024 } else { 4025 freemsg(respmp); 4026 dimp = NULL; 4027 } 4028 4029 /* 4030 * remove endpoint from incoming connection list 4031 * - remove disconnect client from list on server 4032 */ 4033 tl_freetip(tep, tip); 4034 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4035 /* 4036 * disconnect an outgoing request pending from tep 4037 */ 4038 4039 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4040 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4041 SL_TRACE|SL_ERROR, 4042 "tl_discon_req: reallocb failed")); 4043 tep->te_state = new_state; 4044 tl_merror(wq, respmp, ENOMEM); 4045 return; 4046 } 4047 di = (struct T_discon_ind *)dimp->b_rptr; 4048 DB_TYPE(dimp) = M_PROTO; 4049 di->PRIM_type = T_DISCON_IND; 4050 di->DISCON_reason = ECONNRESET; 4051 di->SEQ_number = tep->te_seqno; 4052 4053 /* 4054 * If this is a socket the T_DISCON_IND is queued with 4055 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4056 * from the list of pending connections. 4057 * Note that when te_oconp is set the peer better have 4058 * a t_connind_t for the client. 4059 */ 4060 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4061 /* 4062 * No need to check that 4063 * ti_tep == NULL since the T_DISCON_IND 4064 * takes precedence over other queued 4065 * messages. 4066 */ 4067 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4068 peer_tep = NULL; 4069 dimp = NULL; 4070 /* 4071 * Can't clear te_oconp since tl_co_unconnect needs 4072 * it as a hint not to free the tep. 4073 * Keep the state unchanged since tl_conn_res inspects 4074 * it. 4075 */ 4076 new_state = tep->te_state; 4077 } else { 4078 /* Found - delete it */ 4079 tip = tl_icon_find(peer_tep, tep->te_seqno); 4080 if (tip != NULL) { 4081 ASSERT(tep == tip->ti_tep); 4082 save_state = peer_tep->te_state; 4083 if (peer_tep->te_nicon == 1) 4084 peer_tep->te_state = 4085 NEXTSTATE(TE_DISCON_IND2, 4086 peer_tep->te_state); 4087 else 4088 peer_tep->te_state = 4089 NEXTSTATE(TE_DISCON_IND3, 4090 peer_tep->te_state); 4091 tl_freetip(peer_tep, tip); 4092 } 4093 ASSERT(tep->te_oconp != NULL); 4094 TL_UNCONNECT(tep->te_oconp); 4095 } 4096 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4097 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4098 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4099 SL_TRACE|SL_ERROR, 4100 "tl_discon_req: reallocb failed")); 4101 tep->te_state = new_state; 4102 tl_merror(wq, respmp, ENOMEM); 4103 return; 4104 } 4105 di = (struct T_discon_ind *)dimp->b_rptr; 4106 di->SEQ_number = BADSEQNUM; 4107 4108 save_state = peer_tep->te_state; 4109 peer_tep->te_state = TS_IDLE; 4110 } else { 4111 /* Not connected */ 4112 tep->te_state = new_state; 4113 freemsg(respmp); 4114 return; 4115 } 4116 4117 /* Commit state changes */ 4118 tep->te_state = new_state; 4119 4120 if (peer_tep == NULL) { 4121 ASSERT(dimp == NULL); 4122 goto done; 4123 } 4124 /* 4125 * Flush queues on peer before sending up 4126 * T_DISCON_IND according to TPI 4127 */ 4128 4129 if ((save_state == TS_DATA_XFER) || 4130 (save_state == TS_WIND_ORDREL) || 4131 (save_state == TS_WREQ_ORDREL)) 4132 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4133 4134 DB_TYPE(dimp) = M_PROTO; 4135 di->PRIM_type = T_DISCON_IND; 4136 di->DISCON_reason = ECONNRESET; 4137 4138 /* 4139 * data blocks already linked into dimp by reallocb() 4140 */ 4141 /* 4142 * send indication message to peer user module 4143 */ 4144 ASSERT(dimp != NULL); 4145 putnext(peer_tep->te_rq, dimp); 4146 done: 4147 if (tep->te_conp) { /* disconnect pointers if connected */ 4148 ASSERT(! peer_tep->te_closing); 4149 4150 /* 4151 * Messages may be queued on peer's write queue 4152 * waiting to be processed by its write service 4153 * procedure. Before the pointer to the peer transport 4154 * structure is set to NULL, qenable the peer's write 4155 * queue so that the queued up messages are processed. 4156 */ 4157 if ((save_state == TS_DATA_XFER) || 4158 (save_state == TS_WIND_ORDREL) || 4159 (save_state == TS_WREQ_ORDREL)) 4160 TL_QENABLE(peer_tep); 4161 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4162 TL_UNCONNECT(peer_tep->te_conp); 4163 if (! IS_SOCKET(tep)) { 4164 /* 4165 * unlink the streams 4166 */ 4167 tep->te_wq->q_next = NULL; 4168 peer_tep->te_wq->q_next = NULL; 4169 } 4170 TL_UNCONNECT(tep->te_conp); 4171 } 4172 } 4173 4174 4175 static void 4176 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4177 { 4178 queue_t *wq; 4179 size_t ack_sz; 4180 mblk_t *ackmp; 4181 struct T_addr_ack *taa; 4182 4183 if (tep->te_closing) { 4184 freemsg(mp); 4185 return; 4186 } 4187 4188 wq = tep->te_wq; 4189 4190 /* 4191 * Note: T_ADDR_REQ message has only PRIM_type field 4192 * so it is already validated earlier. 4193 */ 4194 4195 if (IS_CLTS(tep) || 4196 (tep->te_state > TS_WREQ_ORDREL) || 4197 (tep->te_state < TS_DATA_XFER)) { 4198 /* 4199 * Either connectionless or connection oriented but not 4200 * in connected data transfer state or half-closed states. 4201 */ 4202 ack_sz = sizeof (struct T_addr_ack); 4203 if (tep->te_state >= TS_IDLE) 4204 /* is bound */ 4205 ack_sz += tep->te_alen; 4206 ackmp = reallocb(mp, ack_sz, 0); 4207 if (ackmp == NULL) { 4208 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4209 SL_TRACE|SL_ERROR, 4210 "tl_addr_req: reallocb failed")); 4211 tl_memrecover(wq, mp, ack_sz); 4212 return; 4213 } 4214 4215 taa = (struct T_addr_ack *)ackmp->b_rptr; 4216 4217 bzero(taa, sizeof (struct T_addr_ack)); 4218 4219 taa->PRIM_type = T_ADDR_ACK; 4220 ackmp->b_datap->db_type = M_PCPROTO; 4221 ackmp->b_wptr = (uchar_t *)&taa[1]; 4222 4223 if (tep->te_state >= TS_IDLE) { 4224 /* endpoint is bound */ 4225 taa->LOCADDR_length = tep->te_alen; 4226 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4227 4228 bcopy(tep->te_abuf, ackmp->b_wptr, 4229 tep->te_alen); 4230 ackmp->b_wptr += tep->te_alen; 4231 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4232 } 4233 4234 (void) qreply(wq, ackmp); 4235 } else { 4236 ASSERT(tep->te_state == TS_DATA_XFER || 4237 tep->te_state == TS_WIND_ORDREL || 4238 tep->te_state == TS_WREQ_ORDREL); 4239 /* connection oriented in data transfer */ 4240 tl_connected_cots_addr_req(mp, tep); 4241 } 4242 } 4243 4244 4245 static void 4246 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4247 { 4248 tl_endpt_t *peer_tep; 4249 size_t ack_sz; 4250 mblk_t *ackmp; 4251 struct T_addr_ack *taa; 4252 uchar_t *addr_startp; 4253 4254 if (tep->te_closing) { 4255 freemsg(mp); 4256 return; 4257 } 4258 4259 ASSERT(tep->te_state >= TS_IDLE); 4260 4261 ack_sz = sizeof (struct T_addr_ack); 4262 ack_sz += T_ALIGN(tep->te_alen); 4263 peer_tep = tep->te_conp; 4264 ack_sz += peer_tep->te_alen; 4265 4266 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4267 if (ackmp == NULL) { 4268 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4269 "tl_connected_cots_addr_req: reallocb failed")); 4270 tl_memrecover(tep->te_wq, mp, ack_sz); 4271 return; 4272 } 4273 4274 taa = (struct T_addr_ack *)ackmp->b_rptr; 4275 4276 /* endpoint is bound */ 4277 taa->LOCADDR_length = tep->te_alen; 4278 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4279 4280 addr_startp = (uchar_t *)&taa[1]; 4281 4282 bcopy(tep->te_abuf, addr_startp, 4283 tep->te_alen); 4284 4285 taa->REMADDR_length = peer_tep->te_alen; 4286 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4287 taa->LOCADDR_length); 4288 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4289 bcopy(peer_tep->te_abuf, addr_startp, 4290 peer_tep->te_alen); 4291 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4292 taa->REMADDR_offset + peer_tep->te_alen; 4293 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4294 4295 putnext(tep->te_rq, ackmp); 4296 } 4297 4298 static void 4299 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4300 { 4301 if (IS_CLTS(tep)) { 4302 *ia = tl_clts_info_ack; 4303 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4304 } else { 4305 *ia = tl_cots_info_ack; 4306 if (IS_COTSORD(tep)) 4307 ia->SERV_type = T_COTS_ORD; 4308 } 4309 ia->TIDU_size = tl_tidusz; 4310 ia->CURRENT_state = tep->te_state; 4311 } 4312 4313 /* 4314 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4315 * tl_wput. 4316 */ 4317 static void 4318 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4319 { 4320 mblk_t *ackmp; 4321 t_uscalar_t cap_bits1; 4322 struct T_capability_ack *tcap; 4323 4324 if (tep->te_closing) { 4325 freemsg(mp); 4326 return; 4327 } 4328 4329 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4330 4331 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4332 M_PCPROTO, T_CAPABILITY_ACK); 4333 if (ackmp == NULL) { 4334 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4335 "tl_capability_req: reallocb failed")); 4336 tl_memrecover(tep->te_wq, mp, 4337 sizeof (struct T_capability_ack)); 4338 return; 4339 } 4340 4341 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4342 tcap->CAP_bits1 = 0; 4343 4344 if (cap_bits1 & TC1_INFO) { 4345 tl_copy_info(&tcap->INFO_ack, tep); 4346 tcap->CAP_bits1 |= TC1_INFO; 4347 } 4348 4349 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4350 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4351 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4352 } 4353 4354 putnext(tep->te_rq, ackmp); 4355 } 4356 4357 static void 4358 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4359 { 4360 if (! tep->te_closing) 4361 tl_info_req(mp, tep); 4362 else 4363 freemsg(mp); 4364 4365 tl_serializer_exit(tep); 4366 tl_refrele(tep); 4367 } 4368 4369 static void 4370 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4371 { 4372 mblk_t *ackmp; 4373 4374 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4375 M_PCPROTO, T_INFO_ACK); 4376 if (ackmp == NULL) { 4377 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4378 "tl_info_req: reallocb failed")); 4379 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4380 return; 4381 } 4382 4383 /* 4384 * fill in T_INFO_ACK contents 4385 */ 4386 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4387 4388 /* 4389 * send ack message 4390 */ 4391 putnext(tep->te_rq, ackmp); 4392 } 4393 4394 /* 4395 * Handle M_DATA, T_data_req and T_optdata_req. 4396 * If this is a socket pass through T_optdata_req options unmodified. 4397 */ 4398 static void 4399 tl_data(mblk_t *mp, tl_endpt_t *tep) 4400 { 4401 queue_t *wq = tep->te_wq; 4402 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4403 ssize_t msz = MBLKL(mp); 4404 tl_endpt_t *peer_tep; 4405 queue_t *peer_rq; 4406 boolean_t closing = tep->te_closing; 4407 4408 if (IS_CLTS(tep)) { 4409 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4410 SL_TRACE|SL_ERROR, 4411 "tl_wput:clts:unattached M_DATA")); 4412 if (!closing) { 4413 tl_merror(wq, mp, EPROTO); 4414 } else { 4415 freemsg(mp); 4416 } 4417 return; 4418 } 4419 4420 /* 4421 * If the endpoint is closing it should still forward any data to the 4422 * peer (if it has one). If it is not allowed to forward it can just 4423 * free the message. 4424 */ 4425 if (closing && 4426 (tep->te_state != TS_DATA_XFER) && 4427 (tep->te_state != TS_WREQ_ORDREL)) { 4428 freemsg(mp); 4429 return; 4430 } 4431 4432 if (DB_TYPE(mp) == M_PROTO) { 4433 if (prim->type == T_DATA_REQ && 4434 msz < sizeof (struct T_data_req)) { 4435 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4436 SL_TRACE|SL_ERROR, 4437 "tl_data:T_DATA_REQ:invalid message")); 4438 if (!closing) { 4439 tl_merror(wq, mp, EPROTO); 4440 } else { 4441 freemsg(mp); 4442 } 4443 return; 4444 } else if (prim->type == T_OPTDATA_REQ && 4445 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4446 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4447 SL_TRACE|SL_ERROR, 4448 "tl_data:T_OPTDATA_REQ:invalid message")); 4449 if (!closing) { 4450 tl_merror(wq, mp, EPROTO); 4451 } else { 4452 freemsg(mp); 4453 } 4454 return; 4455 } 4456 } 4457 4458 /* 4459 * connection oriented provider 4460 */ 4461 switch (tep->te_state) { 4462 case TS_IDLE: 4463 /* 4464 * Other end not here - do nothing. 4465 */ 4466 freemsg(mp); 4467 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4468 "tl_data:cots with endpoint idle")); 4469 return; 4470 4471 case TS_DATA_XFER: 4472 /* valid states */ 4473 if (tep->te_conp != NULL) 4474 break; 4475 4476 if (tep->te_oconp == NULL) { 4477 if (!closing) { 4478 tl_merror(wq, mp, EPROTO); 4479 } else { 4480 freemsg(mp); 4481 } 4482 return; 4483 } 4484 /* 4485 * For a socket the T_CONN_CON is sent early thus 4486 * the peer might not yet have accepted the connection. 4487 * If we are closing queue the packet with the T_CONN_IND. 4488 * Otherwise defer processing the packet until the peer 4489 * accepts the connection. 4490 * Note that the queue is noenabled when we go into this 4491 * state. 4492 */ 4493 if (!closing) { 4494 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4495 SL_TRACE|SL_ERROR, 4496 "tl_data: ocon")); 4497 TL_PUTBQ(tep, mp); 4498 return; 4499 } 4500 if (DB_TYPE(mp) == M_PROTO) { 4501 if (msz < sizeof (t_scalar_t)) { 4502 freemsg(mp); 4503 return; 4504 } 4505 /* reuse message block - just change REQ to IND */ 4506 if (prim->type == T_DATA_REQ) 4507 prim->type = T_DATA_IND; 4508 else 4509 prim->type = T_OPTDATA_IND; 4510 } 4511 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4512 return; 4513 4514 case TS_WREQ_ORDREL: 4515 if (tep->te_conp == NULL) { 4516 /* 4517 * Other end closed - generate discon_ind 4518 * with reason 0 to cause an EPIPE but no 4519 * read side error on AF_UNIX sockets. 4520 */ 4521 freemsg(mp); 4522 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4523 SL_TRACE|SL_ERROR, 4524 "tl_data: WREQ_ORDREL and no peer")); 4525 tl_discon_ind(tep, 0); 4526 return; 4527 } 4528 break; 4529 4530 default: 4531 /* invalid state for event TE_DATA_REQ */ 4532 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4533 "tl_data:cots:out of state")); 4534 tl_merror(wq, mp, EPROTO); 4535 return; 4536 } 4537 /* 4538 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4539 * (State stays same on this event) 4540 */ 4541 4542 /* 4543 * get connected endpoint 4544 */ 4545 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4546 freemsg(mp); 4547 /* Peer closed */ 4548 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4549 "tl_data: peer gone")); 4550 return; 4551 } 4552 4553 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4554 peer_rq = peer_tep->te_rq; 4555 4556 /* 4557 * Put it back if flow controlled 4558 * Note: Messages already on queue when we are closing is bounded 4559 * so we can ignore flow control. 4560 */ 4561 if (!canputnext(peer_rq) && !closing) { 4562 TL_PUTBQ(tep, mp); 4563 return; 4564 } 4565 4566 /* 4567 * validate peer state 4568 */ 4569 switch (peer_tep->te_state) { 4570 case TS_DATA_XFER: 4571 case TS_WIND_ORDREL: 4572 /* valid states */ 4573 break; 4574 default: 4575 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4576 "tl_data:rx side:invalid state")); 4577 tl_merror(peer_tep->te_wq, mp, EPROTO); 4578 return; 4579 } 4580 if (DB_TYPE(mp) == M_PROTO) { 4581 /* reuse message block - just change REQ to IND */ 4582 if (prim->type == T_DATA_REQ) 4583 prim->type = T_DATA_IND; 4584 else 4585 prim->type = T_OPTDATA_IND; 4586 } 4587 /* 4588 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4589 * (peer state stays same on this event) 4590 */ 4591 /* 4592 * send data to connected peer 4593 */ 4594 putnext(peer_rq, mp); 4595 } 4596 4597 4598 4599 static void 4600 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4601 { 4602 queue_t *wq = tep->te_wq; 4603 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4604 ssize_t msz = MBLKL(mp); 4605 tl_endpt_t *peer_tep; 4606 queue_t *peer_rq; 4607 boolean_t closing = tep->te_closing; 4608 4609 if (msz < sizeof (struct T_exdata_req)) { 4610 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4611 "tl_exdata:invalid message")); 4612 if (!closing) { 4613 tl_merror(wq, mp, EPROTO); 4614 } else { 4615 freemsg(mp); 4616 } 4617 return; 4618 } 4619 4620 /* 4621 * If the endpoint is closing it should still forward any data to the 4622 * peer (if it has one). If it is not allowed to forward it can just 4623 * free the message. 4624 */ 4625 if (closing && 4626 (tep->te_state != TS_DATA_XFER) && 4627 (tep->te_state != TS_WREQ_ORDREL)) { 4628 freemsg(mp); 4629 return; 4630 } 4631 4632 /* 4633 * validate state 4634 */ 4635 switch (tep->te_state) { 4636 case TS_IDLE: 4637 /* 4638 * Other end not here - do nothing. 4639 */ 4640 freemsg(mp); 4641 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4642 "tl_exdata:cots with endpoint idle")); 4643 return; 4644 4645 case TS_DATA_XFER: 4646 /* valid states */ 4647 if (tep->te_conp != NULL) 4648 break; 4649 4650 if (tep->te_oconp == NULL) { 4651 if (!closing) { 4652 tl_merror(wq, mp, EPROTO); 4653 } else { 4654 freemsg(mp); 4655 } 4656 return; 4657 } 4658 /* 4659 * For a socket the T_CONN_CON is sent early thus 4660 * the peer might not yet have accepted the connection. 4661 * If we are closing queue the packet with the T_CONN_IND. 4662 * Otherwise defer processing the packet until the peer 4663 * accepts the connection. 4664 * Note that the queue is noenabled when we go into this 4665 * state. 4666 */ 4667 if (!closing) { 4668 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4669 SL_TRACE|SL_ERROR, 4670 "tl_exdata: ocon")); 4671 TL_PUTBQ(tep, mp); 4672 return; 4673 } 4674 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4675 "tl_exdata: closing socket ocon")); 4676 prim->type = T_EXDATA_IND; 4677 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4678 return; 4679 4680 case TS_WREQ_ORDREL: 4681 if (tep->te_conp == NULL) { 4682 /* 4683 * Other end closed - generate discon_ind 4684 * with reason 0 to cause an EPIPE but no 4685 * read side error on AF_UNIX sockets. 4686 */ 4687 freemsg(mp); 4688 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4689 SL_TRACE|SL_ERROR, 4690 "tl_exdata: WREQ_ORDREL and no peer")); 4691 tl_discon_ind(tep, 0); 4692 return; 4693 } 4694 break; 4695 4696 default: 4697 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4698 SL_TRACE|SL_ERROR, 4699 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4700 tep->te_state)); 4701 tl_merror(wq, mp, EPROTO); 4702 return; 4703 } 4704 /* 4705 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4706 * (state stays same on this event) 4707 */ 4708 4709 /* 4710 * get connected endpoint 4711 */ 4712 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4713 freemsg(mp); 4714 /* Peer closed */ 4715 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4716 "tl_exdata: peer gone")); 4717 return; 4718 } 4719 4720 peer_rq = peer_tep->te_rq; 4721 4722 /* 4723 * Put it back if flow controlled 4724 * Note: Messages already on queue when we are closing is bounded 4725 * so we can ignore flow control. 4726 */ 4727 if (!canputnext(peer_rq) && !closing) { 4728 TL_PUTBQ(tep, mp); 4729 return; 4730 } 4731 4732 /* 4733 * validate state on peer 4734 */ 4735 switch (peer_tep->te_state) { 4736 case TS_DATA_XFER: 4737 case TS_WIND_ORDREL: 4738 /* valid states */ 4739 break; 4740 default: 4741 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4742 "tl_exdata:rx side:invalid state")); 4743 tl_merror(peer_tep->te_wq, mp, EPROTO); 4744 return; 4745 } 4746 /* 4747 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4748 * (peer state stays same on this event) 4749 */ 4750 /* 4751 * reuse message block 4752 */ 4753 prim->type = T_EXDATA_IND; 4754 4755 /* 4756 * send data to connected peer 4757 */ 4758 putnext(peer_rq, mp); 4759 } 4760 4761 4762 4763 static void 4764 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4765 { 4766 queue_t *wq = tep->te_wq; 4767 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4768 ssize_t msz = MBLKL(mp); 4769 tl_endpt_t *peer_tep; 4770 queue_t *peer_rq; 4771 boolean_t closing = tep->te_closing; 4772 4773 if (msz < sizeof (struct T_ordrel_req)) { 4774 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4775 "tl_ordrel:invalid message")); 4776 if (!closing) { 4777 tl_merror(wq, mp, EPROTO); 4778 } else { 4779 freemsg(mp); 4780 } 4781 return; 4782 } 4783 4784 /* 4785 * validate state 4786 */ 4787 switch (tep->te_state) { 4788 case TS_DATA_XFER: 4789 case TS_WREQ_ORDREL: 4790 /* valid states */ 4791 if (tep->te_conp != NULL) 4792 break; 4793 4794 if (tep->te_oconp == NULL) 4795 break; 4796 4797 /* 4798 * For a socket the T_CONN_CON is sent early thus 4799 * the peer might not yet have accepted the connection. 4800 * If we are closing queue the packet with the T_CONN_IND. 4801 * Otherwise defer processing the packet until the peer 4802 * accepts the connection. 4803 * Note that the queue is noenabled when we go into this 4804 * state. 4805 */ 4806 if (!closing) { 4807 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4808 SL_TRACE|SL_ERROR, 4809 "tl_ordlrel: ocon")); 4810 TL_PUTBQ(tep, mp); 4811 return; 4812 } 4813 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4814 "tl_ordlrel: closing socket ocon")); 4815 prim->type = T_ORDREL_IND; 4816 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4817 return; 4818 4819 default: 4820 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4821 SL_TRACE|SL_ERROR, 4822 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4823 tep->te_state)); 4824 if (!closing) { 4825 tl_merror(wq, mp, EPROTO); 4826 } else { 4827 freemsg(mp); 4828 } 4829 return; 4830 } 4831 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4832 4833 /* 4834 * get connected endpoint 4835 */ 4836 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4837 /* Peer closed */ 4838 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4839 "tl_ordrel: peer gone")); 4840 freemsg(mp); 4841 return; 4842 } 4843 4844 peer_rq = peer_tep->te_rq; 4845 4846 /* 4847 * Put it back if flow controlled except when we are closing. 4848 * Note: Messages already on queue when we are closing is bounded 4849 * so we can ignore flow control. 4850 */ 4851 if (! canputnext(peer_rq) && !closing) { 4852 TL_PUTBQ(tep, mp); 4853 return; 4854 } 4855 4856 /* 4857 * validate state on peer 4858 */ 4859 switch (peer_tep->te_state) { 4860 case TS_DATA_XFER: 4861 case TS_WIND_ORDREL: 4862 /* valid states */ 4863 break; 4864 default: 4865 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4866 "tl_ordrel:rx side:invalid state")); 4867 tl_merror(peer_tep->te_wq, mp, EPROTO); 4868 return; 4869 } 4870 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4871 4872 /* 4873 * reuse message block 4874 */ 4875 prim->type = T_ORDREL_IND; 4876 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4877 "tl_ordrel: send ordrel_ind")); 4878 4879 /* 4880 * send data to connected peer 4881 */ 4882 putnext(peer_rq, mp); 4883 } 4884 4885 4886 /* 4887 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4888 */ 4889 static void 4890 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4891 { 4892 size_t err_sz; 4893 tl_endpt_t *tep; 4894 struct T_unitdata_req *udreq; 4895 mblk_t *err_mp; 4896 t_scalar_t alen; 4897 t_scalar_t olen; 4898 struct T_uderror_ind *uderr; 4899 uchar_t *addr_startp; 4900 4901 err_sz = sizeof (struct T_uderror_ind); 4902 tep = (tl_endpt_t *)wq->q_ptr; 4903 udreq = (struct T_unitdata_req *)mp->b_rptr; 4904 alen = udreq->DEST_length; 4905 olen = udreq->OPT_length; 4906 4907 if (alen > 0) 4908 err_sz = T_ALIGN(err_sz + alen); 4909 if (olen > 0) 4910 err_sz += olen; 4911 4912 err_mp = allocb(err_sz, BPRI_MED); 4913 if (! err_mp) { 4914 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4915 "tl_uderr:allocb failure")); 4916 /* 4917 * Note: no rollback of state needed as it does 4918 * not change in connectionless transport 4919 */ 4920 tl_memrecover(wq, mp, err_sz); 4921 return; 4922 } 4923 4924 DB_TYPE(err_mp) = M_PROTO; 4925 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4926 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4927 uderr->PRIM_type = T_UDERROR_IND; 4928 uderr->ERROR_type = err; 4929 uderr->DEST_length = alen; 4930 uderr->OPT_length = olen; 4931 if (alen <= 0) { 4932 uderr->DEST_offset = 0; 4933 } else { 4934 uderr->DEST_offset = 4935 (t_scalar_t)sizeof (struct T_uderror_ind); 4936 addr_startp = mp->b_rptr + udreq->DEST_offset; 4937 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4938 (size_t)alen); 4939 } 4940 if (olen <= 0) { 4941 uderr->OPT_offset = 0; 4942 } else { 4943 uderr->OPT_offset = 4944 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4945 uderr->DEST_length); 4946 addr_startp = mp->b_rptr + udreq->OPT_offset; 4947 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4948 (size_t)olen); 4949 } 4950 freemsg(mp); 4951 4952 /* 4953 * send indication message 4954 */ 4955 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4956 4957 qreply(wq, err_mp); 4958 } 4959 4960 static void 4961 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4962 { 4963 queue_t *wq = tep->te_wq; 4964 4965 if (!tep->te_closing && (wq->q_first != NULL)) { 4966 TL_PUTQ(tep, mp); 4967 } else if (tep->te_rq != NULL) 4968 tl_unitdata(mp, tep); 4969 else 4970 freemsg(mp); 4971 4972 tl_serializer_exit(tep); 4973 tl_refrele(tep); 4974 } 4975 4976 /* 4977 * Handle T_unitdata_req. 4978 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4979 * If this is a socket pass through options unmodified. 4980 */ 4981 static void 4982 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4983 { 4984 queue_t *wq = tep->te_wq; 4985 soux_addr_t ux_addr; 4986 tl_addr_t destaddr; 4987 uchar_t *addr_startp; 4988 tl_endpt_t *peer_tep; 4989 struct T_unitdata_ind *udind; 4990 struct T_unitdata_req *udreq; 4991 ssize_t msz, ui_sz; 4992 t_scalar_t alen, aoff, olen, ooff; 4993 t_scalar_t oldolen = 0; 4994 cred_t *cr = NULL; 4995 pid_t cpid; 4996 4997 udreq = (struct T_unitdata_req *)mp->b_rptr; 4998 msz = MBLKL(mp); 4999 5000 /* 5001 * validate the state 5002 */ 5003 if (tep->te_state != TS_IDLE) { 5004 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5005 SL_TRACE|SL_ERROR, 5006 "tl_wput:T_CONN_REQ:out of state")); 5007 tl_merror(wq, mp, EPROTO); 5008 return; 5009 } 5010 /* 5011 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5012 * (state does not change on this event) 5013 */ 5014 5015 /* 5016 * validate the message 5017 * Note: dereference fields in struct inside message only 5018 * after validating the message length. 5019 */ 5020 if (msz < sizeof (struct T_unitdata_req)) { 5021 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5022 "tl_unitdata:invalid message length")); 5023 tl_merror(wq, mp, EINVAL); 5024 return; 5025 } 5026 alen = udreq->DEST_length; 5027 aoff = udreq->DEST_offset; 5028 oldolen = olen = udreq->OPT_length; 5029 ooff = udreq->OPT_offset; 5030 if (olen == 0) 5031 ooff = 0; 5032 5033 if (IS_SOCKET(tep)) { 5034 if ((alen != TL_SOUX_ADDRLEN) || 5035 (aoff < 0) || 5036 (aoff + alen > msz) || 5037 (olen < 0) || (ooff < 0) || 5038 ((olen > 0) && ((ooff + olen) > msz))) { 5039 (void) (STRLOG(TL_ID, tep->te_minor, 5040 1, SL_TRACE|SL_ERROR, 5041 "tl_unitdata_req: invalid socket addr " 5042 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5043 (int)msz, alen, aoff, olen, ooff)); 5044 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5045 return; 5046 } 5047 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5048 5049 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5050 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5051 (void) (STRLOG(TL_ID, tep->te_minor, 5052 1, SL_TRACE|SL_ERROR, 5053 "tl_conn_req: invalid socket magic")); 5054 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5055 return; 5056 } 5057 } else { 5058 if ((alen < 0) || 5059 (aoff < 0) || 5060 ((alen > 0) && ((aoff + alen) > msz)) || 5061 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5062 ((aoff + alen) < 0) || 5063 ((olen > 0) && ((ooff + olen) > msz)) || 5064 (olen < 0) || 5065 (ooff < 0) || 5066 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5067 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5068 SL_TRACE|SL_ERROR, 5069 "tl_unitdata:invalid unit data message")); 5070 tl_merror(wq, mp, EINVAL); 5071 return; 5072 } 5073 } 5074 5075 /* Options not supported unless it's a socket */ 5076 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5077 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5078 "tl_unitdata:option use(unsupported) or zero len addr")); 5079 tl_uderr(wq, mp, EPROTO); 5080 return; 5081 } 5082 #ifdef DEBUG 5083 /* 5084 * Mild form of ASSERT()ion to detect broken TPI apps. 5085 * if (! assertion) 5086 * log warning; 5087 */ 5088 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5089 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5090 "tl_unitdata:addr overlaps TPI message")); 5091 } 5092 #endif 5093 /* 5094 * get destination endpoint 5095 */ 5096 destaddr.ta_alen = alen; 5097 destaddr.ta_abuf = mp->b_rptr + aoff; 5098 destaddr.ta_zoneid = tep->te_zoneid; 5099 5100 /* 5101 * Check whether the destination is the same that was used previously 5102 * and the destination endpoint is in the right state. If something is 5103 * wrong, find destination again and cache it. 5104 */ 5105 peer_tep = tep->te_lastep; 5106 5107 if ((peer_tep == NULL) || peer_tep->te_closing || 5108 (peer_tep->te_state != TS_IDLE) || 5109 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5110 /* 5111 * Not the same as cached destination , need to find the right 5112 * destination. 5113 */ 5114 peer_tep = (IS_SOCKET(tep) ? 5115 tl_sock_find_peer(tep, &ux_addr) : 5116 tl_find_peer(tep, &destaddr)); 5117 5118 if (peer_tep == NULL) { 5119 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5120 SL_TRACE|SL_ERROR, 5121 "tl_unitdata:no one at destination address")); 5122 tl_uderr(wq, mp, ECONNRESET); 5123 return; 5124 } 5125 5126 /* 5127 * Cache the new peer. 5128 */ 5129 if (tep->te_lastep != NULL) 5130 tl_refrele(tep->te_lastep); 5131 5132 tep->te_lastep = peer_tep; 5133 } 5134 5135 if (peer_tep->te_state != TS_IDLE) { 5136 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5137 "tl_unitdata:provider in invalid state")); 5138 tl_uderr(wq, mp, EPROTO); 5139 return; 5140 } 5141 5142 ASSERT(peer_tep->te_rq != NULL); 5143 5144 /* 5145 * Put it back if flow controlled except when we are closing. 5146 * Note: Messages already on queue when we are closing is bounded 5147 * so we can ignore flow control. 5148 */ 5149 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5150 /* record what we are flow controlled on */ 5151 if (tep->te_flowq != NULL) { 5152 list_remove(&tep->te_flowq->te_flowlist, tep); 5153 } 5154 list_insert_head(&peer_tep->te_flowlist, tep); 5155 tep->te_flowq = peer_tep; 5156 TL_PUTBQ(tep, mp); 5157 return; 5158 } 5159 /* 5160 * prepare indication message 5161 */ 5162 5163 /* 5164 * calculate length of message 5165 */ 5166 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5167 cr = msg_getcred(mp, &cpid); 5168 ASSERT(cr != NULL); 5169 5170 if (peer_tep->te_flag & TL_SETCRED) { 5171 ASSERT(olen == 0); 5172 olen = (t_scalar_t)sizeof (struct opthdr) + 5173 OPTLEN(sizeof (tl_credopt_t)); 5174 /* 1 option only */ 5175 } else if (peer_tep->te_flag & TL_SETUCRED) { 5176 ASSERT(olen == 0); 5177 olen = (t_scalar_t)sizeof (struct opthdr) + 5178 OPTLEN(ucredminsize(cr)); 5179 /* 1 option only */ 5180 } else { 5181 /* Possibly more than one option */ 5182 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5183 OPTLEN(ucredminsize(cr)); 5184 } 5185 } 5186 5187 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5188 olen; 5189 /* 5190 * If the unitdata_ind fits and we are not adding options 5191 * reuse the udreq mblk. 5192 */ 5193 if (msz >= ui_sz && alen >= tep->te_alen && 5194 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5195 /* 5196 * Reuse the original mblk. Leave options in place. 5197 */ 5198 udind = (struct T_unitdata_ind *)mp->b_rptr; 5199 udind->PRIM_type = T_UNITDATA_IND; 5200 udind->SRC_length = tep->te_alen; 5201 addr_startp = mp->b_rptr + udind->SRC_offset; 5202 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5203 } else { 5204 /* Allocate a new T_unidata_ind message */ 5205 mblk_t *ui_mp; 5206 5207 ui_mp = allocb(ui_sz, BPRI_MED); 5208 if (! ui_mp) { 5209 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5210 "tl_unitdata:allocb failure:message queued")); 5211 tl_memrecover(wq, mp, ui_sz); 5212 return; 5213 } 5214 5215 /* 5216 * fill in T_UNITDATA_IND contents 5217 */ 5218 DB_TYPE(ui_mp) = M_PROTO; 5219 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5220 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5221 udind->PRIM_type = T_UNITDATA_IND; 5222 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5223 udind->SRC_length = tep->te_alen; 5224 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5225 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5226 udind->OPT_offset = 5227 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5228 udind->OPT_length = olen; 5229 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5230 5231 if (oldolen != 0) { 5232 bcopy((void *)((uintptr_t)udreq + ooff), 5233 (void *)((uintptr_t)udind + 5234 udind->OPT_offset), 5235 oldolen); 5236 } 5237 ASSERT(cr != NULL); 5238 5239 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5240 oldolen, cr, cpid, 5241 peer_tep->te_flag, peer_tep->te_credp); 5242 } else { 5243 bcopy((void *)((uintptr_t)udreq + ooff), 5244 (void *)((uintptr_t)udind + udind->OPT_offset), 5245 olen); 5246 } 5247 5248 /* 5249 * relink data blocks from mp to ui_mp 5250 */ 5251 ui_mp->b_cont = mp->b_cont; 5252 freeb(mp); 5253 mp = ui_mp; 5254 } 5255 /* 5256 * send indication message 5257 */ 5258 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5259 putnext(peer_tep->te_rq, mp); 5260 } 5261 5262 5263 5264 /* 5265 * Check if a given addr is in use. 5266 * Endpoint ptr returned or NULL if not found. 5267 * The name space is separate for each mode. This implies that 5268 * sockets get their own name space. 5269 */ 5270 static tl_endpt_t * 5271 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5272 { 5273 tl_endpt_t *peer_tep = NULL; 5274 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5275 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5276 5277 ASSERT(! IS_SOCKET(tep)); 5278 5279 ASSERT(ap != NULL && ap->ta_alen > 0); 5280 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5281 ASSERT(ap->ta_abuf != NULL); 5282 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5283 ASSERT(IMPLY(rc == 0, 5284 (tep->te_zoneid == peer_tep->te_zoneid) && 5285 (tep->te_transport == peer_tep->te_transport))); 5286 5287 if ((rc == 0) && (peer_tep->te_closing)) { 5288 tl_refrele(peer_tep); 5289 peer_tep = NULL; 5290 } 5291 5292 return (peer_tep); 5293 } 5294 5295 /* 5296 * Find peer for a socket based on unix domain address. 5297 * For implicit addresses our peer can be found by minor number in ai hash. For 5298 * explicit binds we look vnode address at addr_hash. 5299 */ 5300 static tl_endpt_t * 5301 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5302 { 5303 tl_endpt_t *peer_tep = NULL; 5304 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5305 tep->te_aihash : tep->te_addrhash; 5306 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5307 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5308 5309 ASSERT(IS_SOCKET(tep)); 5310 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5311 ASSERT(IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport))); 5312 5313 if (peer_tep != NULL) { 5314 /* Don't attempt to use closing peer. */ 5315 if (peer_tep->te_closing) 5316 goto errout; 5317 5318 /* 5319 * Cross-zone unix sockets are permitted, but for Trusted 5320 * Extensions only, the "server" for these must be in the 5321 * global zone. 5322 */ 5323 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5324 is_system_labeled() && 5325 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5326 goto errout; 5327 } 5328 5329 return (peer_tep); 5330 5331 errout: 5332 tl_refrele(peer_tep); 5333 return (NULL); 5334 } 5335 5336 /* 5337 * Generate a free addr and return it in struct pointed by ap 5338 * but allocating space for address buffer. 5339 * The generated address will be at least 4 bytes long and, if req->ta_alen 5340 * exceeds 4 bytes, be req->ta_alen bytes long. 5341 * 5342 * If address is found it will be inserted in the hash. 5343 * 5344 * If req->ta_alen is larger than the default alen (4 bytes) the last 5345 * alen-4 bytes will always be the same as in req. 5346 * 5347 * Return 0 for failure. 5348 * Return non-zero for success. 5349 */ 5350 static boolean_t 5351 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5352 { 5353 t_scalar_t alen; 5354 uint32_t loopcnt; /* Limit loop to 2^32 */ 5355 5356 ASSERT(tep->te_hash_hndl != NULL); 5357 ASSERT(! IS_SOCKET(tep)); 5358 5359 if (tep->te_hash_hndl == NULL) 5360 return (B_FALSE); 5361 5362 /* 5363 * check if default addr is in use 5364 * if it is - bump it and try again 5365 */ 5366 if (req == NULL) { 5367 alen = sizeof (uint32_t); 5368 } else { 5369 alen = max(req->ta_alen, sizeof (uint32_t)); 5370 ASSERT(tep->te_zoneid == req->ta_zoneid); 5371 } 5372 5373 if (tep->te_alen < alen) { 5374 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5375 5376 /* 5377 * Not enough space in tep->ta_ap to hold the address, 5378 * allocate a bigger space. 5379 */ 5380 if (abuf == NULL) 5381 return (B_FALSE); 5382 5383 if (tep->te_alen > 0) 5384 kmem_free(tep->te_abuf, tep->te_alen); 5385 5386 tep->te_alen = alen; 5387 tep->te_abuf = abuf; 5388 } 5389 5390 /* Copy in the address in req */ 5391 if (req != NULL) { 5392 ASSERT(alen >= req->ta_alen); 5393 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5394 } 5395 5396 /* 5397 * First try minor number then try default addresses. 5398 */ 5399 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5400 5401 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5402 if (mod_hash_insert_reserve(tep->te_addrhash, 5403 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5404 tep->te_hash_hndl) == 0) { 5405 /* 5406 * found free address 5407 */ 5408 tep->te_flag |= TL_ADDRHASHED; 5409 tep->te_hash_hndl = NULL; 5410 5411 return (B_TRUE); /* successful return */ 5412 } 5413 /* 5414 * Use default address. 5415 */ 5416 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5417 atomic_add_32(&tep->te_defaddr, 1); 5418 } 5419 5420 /* 5421 * Failed to find anything. 5422 */ 5423 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5424 "tl_get_any_addr:looped 2^32 times")); 5425 return (B_FALSE); 5426 } 5427 5428 /* 5429 * reallocb + set r/w ptrs to reflect size. 5430 */ 5431 static mblk_t * 5432 tl_resizemp(mblk_t *mp, ssize_t new_size) 5433 { 5434 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5435 return (NULL); 5436 5437 mp->b_rptr = DB_BASE(mp); 5438 mp->b_wptr = mp->b_rptr + new_size; 5439 return (mp); 5440 } 5441 5442 static void 5443 tl_cl_backenable(tl_endpt_t *tep) 5444 { 5445 list_t *l = &tep->te_flowlist; 5446 tl_endpt_t *elp; 5447 5448 ASSERT(IS_CLTS(tep)); 5449 5450 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5451 ASSERT(tep->te_ser == elp->te_ser); 5452 ASSERT(elp->te_flowq == tep); 5453 if (! elp->te_closing) 5454 TL_QENABLE(elp); 5455 elp->te_flowq = NULL; 5456 list_remove(l, elp); 5457 } 5458 } 5459 5460 /* 5461 * Unconnect endpoints. 5462 */ 5463 static void 5464 tl_co_unconnect(tl_endpt_t *tep) 5465 { 5466 tl_endpt_t *peer_tep = tep->te_conp; 5467 tl_endpt_t *srv_tep = tep->te_oconp; 5468 list_t *l; 5469 tl_icon_t *tip; 5470 tl_endpt_t *cl_tep; 5471 mblk_t *d_mp; 5472 5473 ASSERT(IS_COTS(tep)); 5474 /* 5475 * If our peer is closing, don't use it. 5476 */ 5477 if ((peer_tep != NULL) && peer_tep->te_closing) { 5478 TL_UNCONNECT(tep->te_conp); 5479 peer_tep = NULL; 5480 } 5481 if ((srv_tep != NULL) && srv_tep->te_closing) { 5482 TL_UNCONNECT(tep->te_oconp); 5483 srv_tep = NULL; 5484 } 5485 5486 if (tep->te_nicon > 0) { 5487 l = &tep->te_iconp; 5488 /* 5489 * If incoming requests pending, change state 5490 * of clients on disconnect ind event and send 5491 * discon_ind pdu to modules above them 5492 * for server: all clients get disconnect 5493 */ 5494 5495 while (tep->te_nicon > 0) { 5496 tip = list_head(l); 5497 cl_tep = tip->ti_tep; 5498 5499 if (cl_tep == NULL) { 5500 tl_freetip(tep, tip); 5501 continue; 5502 } 5503 5504 if (cl_tep->te_oconp != NULL) { 5505 ASSERT(cl_tep != cl_tep->te_oconp); 5506 TL_UNCONNECT(cl_tep->te_oconp); 5507 } 5508 5509 if (cl_tep->te_closing) { 5510 tl_freetip(tep, tip); 5511 continue; 5512 } 5513 5514 enableok(cl_tep->te_wq); 5515 TL_QENABLE(cl_tep); 5516 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5517 if (d_mp != NULL) { 5518 cl_tep->te_state = TS_IDLE; 5519 putnext(cl_tep->te_rq, d_mp); 5520 } else { 5521 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5522 SL_TRACE|SL_ERROR, 5523 "tl_co_unconnect:icmng: " 5524 "allocb failure")); 5525 } 5526 tl_freetip(tep, tip); 5527 } 5528 } else if (srv_tep != NULL) { 5529 /* 5530 * If outgoing request pending, change state 5531 * of server on discon ind event 5532 */ 5533 5534 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5535 IS_COTSORD(srv_tep) && 5536 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5537 /* 5538 * Queue ordrel_ind for server to be picked up 5539 * when the connection is accepted. 5540 */ 5541 d_mp = tl_ordrel_ind_alloc(); 5542 } else { 5543 /* 5544 * send discon_ind to server 5545 */ 5546 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5547 } 5548 if (d_mp == NULL) { 5549 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5550 SL_TRACE|SL_ERROR, 5551 "tl_co_unconnect:outgoing:allocb failure")); 5552 TL_UNCONNECT(tep->te_oconp); 5553 goto discon_peer; 5554 } 5555 5556 /* 5557 * If this is a socket the T_DISCON_IND is queued with 5558 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5559 * from the list of pending connections. 5560 * Note that when te_oconp is set the peer better have 5561 * a t_connind_t for the client. 5562 */ 5563 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5564 /* 5565 * Queue the disconnection message. 5566 */ 5567 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5568 } else { 5569 tip = tl_icon_find(srv_tep, tep->te_seqno); 5570 if (tip == NULL) { 5571 freemsg(d_mp); 5572 } else { 5573 ASSERT(tep == tip->ti_tep); 5574 ASSERT(tep->te_ser == srv_tep->te_ser); 5575 /* 5576 * Delete tip from the server list. 5577 */ 5578 if (srv_tep->te_nicon == 1) { 5579 srv_tep->te_state = 5580 NEXTSTATE(TE_DISCON_IND2, 5581 srv_tep->te_state); 5582 } else { 5583 srv_tep->te_state = 5584 NEXTSTATE(TE_DISCON_IND3, 5585 srv_tep->te_state); 5586 } 5587 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5588 T_DISCON_IND); 5589 putnext(srv_tep->te_rq, d_mp); 5590 tl_freetip(srv_tep, tip); 5591 } 5592 TL_UNCONNECT(tep->te_oconp); 5593 srv_tep = NULL; 5594 } 5595 } else if (peer_tep != NULL) { 5596 /* 5597 * unconnect existing connection 5598 * If connected, change state of peer on 5599 * discon ind event and send discon ind pdu 5600 * to module above it 5601 */ 5602 5603 ASSERT(tep->te_ser == peer_tep->te_ser); 5604 if (IS_COTSORD(peer_tep) && 5605 (peer_tep->te_state == TS_WIND_ORDREL || 5606 peer_tep->te_state == TS_DATA_XFER)) { 5607 /* 5608 * send ordrel ind 5609 */ 5610 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5611 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5612 peer_tep->te_state, 5613 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5614 d_mp = tl_ordrel_ind_alloc(); 5615 if (! d_mp) { 5616 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5617 SL_TRACE|SL_ERROR, 5618 "tl_co_unconnect:connected:" 5619 "allocb failure")); 5620 /* 5621 * Continue with cleaning up peer as 5622 * this side may go away with the close 5623 */ 5624 TL_QENABLE(peer_tep); 5625 goto discon_peer; 5626 } 5627 peer_tep->te_state = 5628 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5629 5630 putnext(peer_tep->te_rq, d_mp); 5631 /* 5632 * Handle flow control case. This will generate 5633 * a t_discon_ind message with reason 0 if there 5634 * is data queued on the write side. 5635 */ 5636 TL_QENABLE(peer_tep); 5637 } else if (IS_COTSORD(peer_tep) && 5638 peer_tep->te_state == TS_WREQ_ORDREL) { 5639 /* 5640 * Sent an ordrel_ind. We send a discon with 5641 * with error 0 to inform that the peer is gone. 5642 */ 5643 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5644 SL_TRACE|SL_ERROR, 5645 "tl_co_unconnect: discon in state %d", 5646 tep->te_state)); 5647 tl_discon_ind(peer_tep, 0); 5648 } else { 5649 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5650 SL_TRACE|SL_ERROR, 5651 "tl_co_unconnect: state %d", tep->te_state)); 5652 tl_discon_ind(peer_tep, ECONNRESET); 5653 } 5654 5655 discon_peer: 5656 /* 5657 * Disconnect cross-pointers only for close 5658 */ 5659 if (tep->te_closing) { 5660 peer_tep = tep->te_conp; 5661 TL_REMOVE_PEER(peer_tep->te_conp); 5662 TL_REMOVE_PEER(tep->te_conp); 5663 } 5664 } 5665 } 5666 5667 /* 5668 * Note: The following routine does not recover from allocb() 5669 * failures 5670 * The reason should be from the <sys/errno.h> space. 5671 */ 5672 static void 5673 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5674 { 5675 mblk_t *d_mp; 5676 5677 if (tep->te_closing) 5678 return; 5679 5680 /* 5681 * flush the queues. 5682 */ 5683 flushq(tep->te_rq, FLUSHDATA); 5684 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5685 5686 /* 5687 * send discon ind 5688 */ 5689 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5690 if (! d_mp) { 5691 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5692 "tl_discon_ind:allocb failure")); 5693 return; 5694 } 5695 tep->te_state = TS_IDLE; 5696 putnext(tep->te_rq, d_mp); 5697 } 5698 5699 /* 5700 * Note: The following routine does not recover from allocb() 5701 * failures 5702 * The reason should be from the <sys/errno.h> space. 5703 */ 5704 static mblk_t * 5705 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5706 { 5707 mblk_t *mp; 5708 struct T_discon_ind *tdi; 5709 5710 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5711 DB_TYPE(mp) = M_PROTO; 5712 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5713 tdi = (struct T_discon_ind *)mp->b_rptr; 5714 tdi->PRIM_type = T_DISCON_IND; 5715 tdi->DISCON_reason = reason; 5716 tdi->SEQ_number = seqnum; 5717 } 5718 return (mp); 5719 } 5720 5721 5722 /* 5723 * Note: The following routine does not recover from allocb() 5724 * failures 5725 */ 5726 static mblk_t * 5727 tl_ordrel_ind_alloc(void) 5728 { 5729 mblk_t *mp; 5730 struct T_ordrel_ind *toi; 5731 5732 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5733 DB_TYPE(mp) = M_PROTO; 5734 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5735 toi = (struct T_ordrel_ind *)mp->b_rptr; 5736 toi->PRIM_type = T_ORDREL_IND; 5737 } 5738 return (mp); 5739 } 5740 5741 5742 /* 5743 * Lookup the seqno in the list of queued connections. 5744 */ 5745 static tl_icon_t * 5746 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5747 { 5748 list_t *l = &tep->te_iconp; 5749 tl_icon_t *tip = list_head(l); 5750 5751 ASSERT(seqno != 0); 5752 5753 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5754 ; 5755 5756 return (tip); 5757 } 5758 5759 /* 5760 * Queue data for a given T_CONN_IND while verifying that redundant 5761 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5762 * Used when the originator of the connection closes. 5763 */ 5764 static void 5765 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5766 { 5767 tl_icon_t *tip; 5768 mblk_t **mpp, *mp; 5769 int prim, nprim; 5770 5771 if (nmp->b_datap->db_type == M_PROTO) 5772 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5773 else 5774 nprim = -1; /* M_DATA */ 5775 5776 tip = tl_icon_find(tep, seqno); 5777 if (tip == NULL) { 5778 freemsg(nmp); 5779 return; 5780 } 5781 5782 ASSERT(tip->ti_seqno != 0); 5783 mpp = &tip->ti_mp; 5784 while (*mpp != NULL) { 5785 mp = *mpp; 5786 5787 if (mp->b_datap->db_type == M_PROTO) 5788 prim = ((union T_primitives *)mp->b_rptr)->type; 5789 else 5790 prim = -1; /* M_DATA */ 5791 5792 /* 5793 * Allow nothing after a T_DISCON_IND 5794 */ 5795 if (prim == T_DISCON_IND) { 5796 freemsg(nmp); 5797 return; 5798 } 5799 /* 5800 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5801 */ 5802 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5803 freemsg(nmp); 5804 return; 5805 } 5806 mpp = &(mp->b_next); 5807 } 5808 *mpp = nmp; 5809 } 5810 5811 /* 5812 * Verify if a certain TPI primitive exists on the connind queue. 5813 * Use prim -1 for M_DATA. 5814 * Return non-zero if found. 5815 */ 5816 static boolean_t 5817 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5818 { 5819 tl_icon_t *tip = tl_icon_find(tep, seqno); 5820 boolean_t found = B_FALSE; 5821 5822 if (tip != NULL) { 5823 mblk_t *mp; 5824 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5825 found = (DB_TYPE(mp) == M_PROTO && 5826 ((union T_primitives *)mp->b_rptr)->type == prim); 5827 } 5828 } 5829 return (found); 5830 } 5831 5832 /* 5833 * Send the b_next mblk chain that has accumulated before the connection 5834 * was accepted. Perform the necessary state transitions. 5835 */ 5836 static void 5837 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5838 { 5839 mblk_t *mp; 5840 union T_primitives *primp; 5841 5842 if (tep->te_closing) { 5843 tl_icon_freemsgs(mpp); 5844 return; 5845 } 5846 5847 ASSERT(tep->te_state == TS_DATA_XFER); 5848 ASSERT(tep->te_rq->q_first == NULL); 5849 5850 while ((mp = *mpp) != NULL) { 5851 *mpp = mp->b_next; 5852 mp->b_next = NULL; 5853 5854 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5855 switch (DB_TYPE(mp)) { 5856 default: 5857 freemsg(mp); 5858 break; 5859 case M_DATA: 5860 putnext(tep->te_rq, mp); 5861 break; 5862 case M_PROTO: 5863 primp = (union T_primitives *)mp->b_rptr; 5864 switch (primp->type) { 5865 case T_UNITDATA_IND: 5866 case T_DATA_IND: 5867 case T_OPTDATA_IND: 5868 case T_EXDATA_IND: 5869 putnext(tep->te_rq, mp); 5870 break; 5871 case T_ORDREL_IND: 5872 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5873 tep->te_state); 5874 putnext(tep->te_rq, mp); 5875 break; 5876 case T_DISCON_IND: 5877 tep->te_state = TS_IDLE; 5878 putnext(tep->te_rq, mp); 5879 break; 5880 default: 5881 #ifdef DEBUG 5882 cmn_err(CE_PANIC, 5883 "tl_icon_sendmsgs: unknown primitive"); 5884 #endif /* DEBUG */ 5885 freemsg(mp); 5886 break; 5887 } 5888 break; 5889 } 5890 } 5891 } 5892 5893 /* 5894 * Free the b_next mblk chain that has accumulated before the connection 5895 * was accepted. 5896 */ 5897 static void 5898 tl_icon_freemsgs(mblk_t **mpp) 5899 { 5900 mblk_t *mp; 5901 5902 while ((mp = *mpp) != NULL) { 5903 *mpp = mp->b_next; 5904 mp->b_next = NULL; 5905 freemsg(mp); 5906 } 5907 } 5908 5909 /* 5910 * Send M_ERROR 5911 * Note: assumes caller ensured enough space in mp or enough 5912 * memory available. Does not attempt recovery from allocb() 5913 * failures 5914 */ 5915 5916 static void 5917 tl_merror(queue_t *wq, mblk_t *mp, int error) 5918 { 5919 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5920 5921 if (tep->te_closing) { 5922 freemsg(mp); 5923 return; 5924 } 5925 5926 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5927 SL_TRACE|SL_ERROR, 5928 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5929 5930 /* 5931 * flush all messages on queue. we are shutting 5932 * the stream down on fatal error 5933 */ 5934 flushq(wq, FLUSHALL); 5935 if (IS_COTS(tep)) { 5936 /* connection oriented - unconnect endpoints */ 5937 tl_co_unconnect(tep); 5938 } 5939 if (mp->b_cont) { 5940 freemsg(mp->b_cont); 5941 mp->b_cont = NULL; 5942 } 5943 5944 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5945 freemsg(mp); 5946 mp = allocb(1, BPRI_HI); 5947 if (!mp) { 5948 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5949 SL_TRACE|SL_ERROR, 5950 "tl_merror:M_PROTO: out of memory")); 5951 return; 5952 } 5953 } 5954 if (mp) { 5955 DB_TYPE(mp) = M_ERROR; 5956 mp->b_rptr = DB_BASE(mp); 5957 *mp->b_rptr = (char)error; 5958 mp->b_wptr = mp->b_rptr + sizeof (char); 5959 qreply(wq, mp); 5960 } else { 5961 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5962 } 5963 } 5964 5965 static void 5966 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5967 { 5968 ASSERT(cr != NULL); 5969 5970 if (flag & TL_SETCRED) { 5971 struct opthdr *opt = (struct opthdr *)buf; 5972 tl_credopt_t *tlcred; 5973 5974 opt->level = TL_PROT_LEVEL; 5975 opt->name = TL_OPT_PEER_CRED; 5976 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5977 5978 tlcred = (tl_credopt_t *)(opt + 1); 5979 tlcred->tc_uid = crgetuid(cr); 5980 tlcred->tc_gid = crgetgid(cr); 5981 tlcred->tc_ruid = crgetruid(cr); 5982 tlcred->tc_rgid = crgetrgid(cr); 5983 tlcred->tc_suid = crgetsuid(cr); 5984 tlcred->tc_sgid = crgetsgid(cr); 5985 tlcred->tc_ngroups = crgetngroups(cr); 5986 } else if (flag & TL_SETUCRED) { 5987 struct opthdr *opt = (struct opthdr *)buf; 5988 5989 opt->level = TL_PROT_LEVEL; 5990 opt->name = TL_OPT_PEER_UCRED; 5991 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); 5992 5993 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 5994 } else { 5995 struct T_opthdr *topt = (struct T_opthdr *)buf; 5996 ASSERT(flag & TL_SOCKUCRED); 5997 5998 topt->level = SOL_SOCKET; 5999 topt->name = SCM_UCRED; 6000 topt->len = ucredminsize(cr) + sizeof (*topt); 6001 topt->status = 0; 6002 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 6003 } 6004 } 6005 6006 /* ARGSUSED */ 6007 static int 6008 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6009 { 6010 /* no default value processed in protocol specific code currently */ 6011 return (-1); 6012 } 6013 6014 /* ARGSUSED */ 6015 static int 6016 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6017 { 6018 int len; 6019 tl_endpt_t *tep; 6020 int *valp; 6021 6022 tep = (tl_endpt_t *)wq->q_ptr; 6023 6024 len = 0; 6025 6026 /* 6027 * Assumes: option level and name sanity check done elsewhere 6028 */ 6029 6030 switch (level) { 6031 case SOL_SOCKET: 6032 if (! IS_SOCKET(tep)) 6033 break; 6034 switch (name) { 6035 case SO_RECVUCRED: 6036 len = sizeof (int); 6037 valp = (int *)ptr; 6038 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6039 break; 6040 default: 6041 break; 6042 } 6043 break; 6044 case TL_PROT_LEVEL: 6045 switch (name) { 6046 case TL_OPT_PEER_CRED: 6047 case TL_OPT_PEER_UCRED: 6048 /* 6049 * option not supposed to retrieved directly 6050 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6051 * when some internal flags set by other options 6052 * Direct retrieval always designed to fail(ignored) 6053 * for this option. 6054 */ 6055 break; 6056 } 6057 } 6058 return (len); 6059 } 6060 6061 /* ARGSUSED */ 6062 static int 6063 tl_set_opt( 6064 queue_t *wq, 6065 uint_t mgmt_flags, 6066 int level, 6067 int name, 6068 uint_t inlen, 6069 uchar_t *invalp, 6070 uint_t *outlenp, 6071 uchar_t *outvalp, 6072 void *thisdg_attrs, 6073 cred_t *cr) 6074 { 6075 int error; 6076 tl_endpt_t *tep; 6077 6078 tep = (tl_endpt_t *)wq->q_ptr; 6079 6080 error = 0; /* NOERROR */ 6081 6082 /* 6083 * Assumes: option level and name sanity checks done elsewhere 6084 */ 6085 6086 switch (level) { 6087 case SOL_SOCKET: 6088 if (! IS_SOCKET(tep)) { 6089 error = EINVAL; 6090 break; 6091 } 6092 /* 6093 * TBD: fill in other AF_UNIX socket options and then stop 6094 * returning error. 6095 */ 6096 switch (name) { 6097 case SO_RECVUCRED: 6098 /* 6099 * We only support this for datagram sockets; 6100 * getpeerucred handles the connection oriented 6101 * transports. 6102 */ 6103 if (! IS_CLTS(tep)) { 6104 error = EINVAL; 6105 break; 6106 } 6107 if (*(int *)invalp == 0) 6108 tep->te_flag &= ~TL_SOCKUCRED; 6109 else 6110 tep->te_flag |= TL_SOCKUCRED; 6111 break; 6112 default: 6113 error = EINVAL; 6114 break; 6115 } 6116 break; 6117 case TL_PROT_LEVEL: 6118 switch (name) { 6119 case TL_OPT_PEER_CRED: 6120 case TL_OPT_PEER_UCRED: 6121 /* 6122 * option not supposed to be set directly 6123 * Its value in initialized for each endpoint at 6124 * driver open time. 6125 * Direct setting always designed to fail for this 6126 * option. 6127 */ 6128 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6129 SL_TRACE|SL_ERROR, 6130 "tl_set_opt: option is not supported")); 6131 error = EPROTO; 6132 break; 6133 } 6134 } 6135 return (error); 6136 } 6137 6138 6139 static void 6140 tl_timer(void *arg) 6141 { 6142 queue_t *wq = arg; 6143 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6144 6145 ASSERT(tep); 6146 6147 tep->te_timoutid = 0; 6148 6149 enableok(wq); 6150 /* 6151 * Note: can call wsrv directly here and save context switch 6152 * Consider change when qtimeout (not timeout) is active 6153 */ 6154 qenable(wq); 6155 } 6156 6157 static void 6158 tl_buffer(void *arg) 6159 { 6160 queue_t *wq = arg; 6161 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6162 6163 ASSERT(tep); 6164 6165 tep->te_bufcid = 0; 6166 tep->te_nowsrv = B_FALSE; 6167 6168 enableok(wq); 6169 /* 6170 * Note: can call wsrv directly here and save context switch 6171 * Consider change when qbufcall (not bufcall) is active 6172 */ 6173 qenable(wq); 6174 } 6175 6176 static void 6177 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6178 { 6179 tl_endpt_t *tep; 6180 6181 tep = (tl_endpt_t *)wq->q_ptr; 6182 6183 if (tep->te_closing) { 6184 freemsg(mp); 6185 return; 6186 } 6187 noenable(wq); 6188 6189 (void) insq(wq, wq->q_first, mp); 6190 6191 if (tep->te_bufcid || tep->te_timoutid) { 6192 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6193 "tl_memrecover:recover %p pending", (void *)wq)); 6194 return; 6195 } 6196 6197 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6198 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6199 drv_usectohz(TL_BUFWAIT)); 6200 } 6201 } 6202 6203 static void 6204 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6205 { 6206 ASSERT(tip->ti_seqno != 0); 6207 6208 if (tip->ti_mp != NULL) { 6209 tl_icon_freemsgs(&tip->ti_mp); 6210 tip->ti_mp = NULL; 6211 } 6212 if (tip->ti_tep != NULL) { 6213 tl_refrele(tip->ti_tep); 6214 tip->ti_tep = NULL; 6215 } 6216 list_remove(&tep->te_iconp, tip); 6217 kmem_free(tip, sizeof (tl_icon_t)); 6218 tep->te_nicon--; 6219 } 6220 6221 /* 6222 * Remove address from address hash. 6223 */ 6224 static void 6225 tl_addr_unbind(tl_endpt_t *tep) 6226 { 6227 tl_endpt_t *elp; 6228 6229 if (tep->te_flag & TL_ADDRHASHED) { 6230 if (IS_SOCKET(tep)) { 6231 (void) mod_hash_remove(tep->te_addrhash, 6232 (mod_hash_key_t)tep->te_vp, 6233 (mod_hash_val_t *)&elp); 6234 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6235 tep->te_magic = SOU_MAGIC_IMPLICIT; 6236 } else { 6237 (void) mod_hash_remove(tep->te_addrhash, 6238 (mod_hash_key_t)&tep->te_ap, 6239 (mod_hash_val_t *)&elp); 6240 (void) kmem_free(tep->te_abuf, tep->te_alen); 6241 tep->te_alen = -1; 6242 tep->te_abuf = NULL; 6243 } 6244 tep->te_flag &= ~TL_ADDRHASHED; 6245 } 6246 } 6247