1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multithreaded STREAMS Local Transport Provider. 28 * 29 * OVERVIEW 30 * ======== 31 * 32 * This driver provides TLI as well as socket semantics. It provides 33 * connectionless, connection oriented, and connection oriented with orderly 34 * release transports for TLI and sockets. Each transport type has separate name 35 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 36 * this removes any name space conflicts when binding to socket style transport 37 * addresses. 38 * 39 * NOTE: There is one exception: Socket ticots and ticotsord transports share 40 * the same namespace. In fact, sockets always use ticotsord type transport. 41 * 42 * The driver mode is specified during open() by the minor number used for 43 * open. 44 * 45 * The sockets in addition have the following semantic differences: 46 * No support for passing up credentials (TL_SET[U]CRED). 47 * 48 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 49 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 50 * T_OPTDATA_IND. 51 * 52 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 53 * a T_CONN_RES is received from the acceptor. This means that a socket 54 * connect will complete before the peer has called accept. 55 * 56 * 57 * MULTITHREADING 58 * ============== 59 * 60 * The driver does not use STREAMS protection mechanisms. Instead it uses a 61 * generic "serializer" abstraction. Most of the operations are executed behind 62 * the serializer and are, essentially single-threaded. All functions executed 63 * behind the same serializer are strictly serialized. So if one thread calls 64 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 65 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 66 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 67 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 68 * same time. 69 * 70 * Connectionless transport use a single serializer per transport type (one for 71 * TLI and one for sockets. Connection-oriented transports use finer-grained 72 * serializers. 73 * 74 * All COTS-type endpoints start their life with private serializers. During 75 * connection request processing the endpoint serializer is switched to the 76 * listener's serializer and the rest of T_CONN_REQ processing is done on the 77 * listener serializer. During T_CONN_RES processing the eager serializer is 78 * switched from listener to acceptor serializer and after that point all 79 * processing for eager and acceptor happens on this serializer. To avoid races 80 * with endpoint closes while its serializer may be changing closes are blocked 81 * while serializers are manipulated. 82 * 83 * References accounting 84 * --------------------- 85 * 86 * Endpoints are reference counted and freed when the last reference is 87 * dropped. Functions within the serializer may access an endpoint state even 88 * after an endpoint closed. The te_closing being set on the endpoint indicates 89 * that the endpoint entered its close routine. 90 * 91 * One reference is held for each opened endpoint instance. The reference 92 * counter is incremented when the endpoint is linked to another endpoint and 93 * decremented when the link disappears. It is also incremented when the 94 * endpoint is found by the hash table lookup. This increment is atomic with the 95 * lookup itself and happens while the hash table read lock is held. 96 * 97 * Close synchronization 98 * --------------------- 99 * 100 * During close the endpoint as marked as closing using te_closing flag. It is 101 * usually enough to check for te_closing flag since all other state changes 102 * happen after this flag is set and the close entered serializer. Immediately 103 * after setting te_closing flag tl_close() enters serializer and waits until 104 * the callback finishes. This allows all functions called within serializer to 105 * simply check te_closing without any locks. 106 * 107 * Serializer management. 108 * --------------------- 109 * 110 * For COTS transports serializers are created when the endpoint is constructed 111 * and destroyed when the endpoint is destructed. CLTS transports use global 112 * serializers - one for sockets and one for TLI. 113 * 114 * COTS serializers have separate reference counts to deal with several 115 * endpoints sharing the same serializer. There is a subtle problem related to 116 * the serializer destruction. The serializer should never be destroyed by any 117 * function executed inside serializer. This means that close has to wait till 118 * all serializer activity for this endpoint is finished before it can drop the 119 * last reference on the endpoint (which may as well free the serializer). This 120 * is only relevant for COTS transports which manage serializers 121 * dynamically. For CLTS transports close may complete without waiting for all 122 * serializer activity to finish since serializer is only destroyed at driver 123 * detach time. 124 * 125 * COTS endpoints keep track of the number of outstanding requests on the 126 * serializer for the endpoint. The code handling accept() avoids changing 127 * client serializer if it has any pending messages on the serializer and 128 * instead moves acceptor to listener's serializer. 129 * 130 * 131 * Use of hash tables 132 * ------------------ 133 * 134 * The driver uses modhash hash table implementation. Each transport uses two 135 * hash tables - one for finding endpoints by acceptor ID and another one for 136 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 137 * pair of hash tables since sockets only use TICOTSORD. 138 * 139 * All hash tables lookups increment a reference count for returned endpoints, 140 * so we may safely check the endpoint state even when the endpoint is removed 141 * from the hash by another thread immediately after it is found. 142 * 143 * 144 * CLOSE processing 145 * ================ 146 * 147 * The driver enters serializer twice on close(). The close sequence is the 148 * following: 149 * 150 * 1) Wait until closing is safe (te_closewait becomes zero) 151 * This step is needed to prevent close during serializer switches. In most 152 * cases (close happening after connection establishment) te_closewait is 153 * zero. 154 * 1) Set te_closing. 155 * 2) Call tl_close_ser() within serializer and wait for it to complete. 156 * 157 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 158 * It also needs to clear write-side q_next pointers - this should be done 159 * before qprocsoff(). 160 * 161 * This synchronous serializer entry during close is needed to ensure that 162 * the queue is valid everywhere inside the serializer. 163 * 164 * Note that in many cases close will execute tl_close_ser() synchronously, 165 * so it will not wait at all. 166 * 167 * 3) Calls qprocsoff(). 168 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 169 * complete (for COTS transports). For CLTS transport there is no wait. 170 * 171 * tl_close_finish_ser() Finishes the close process and wakes up waiting 172 * close if there is any. 173 * 174 * Note that in most cases close will enter te_close_ser_finish() 175 * synchronously and will not wait at all. 176 * 177 * 178 * Flow Control 179 * ============ 180 * 181 * The driver implements both read and write side service routines. No one calls 182 * putq() on the read queue. The read side service routine tl_rsrv() is called 183 * when the read side stream is back-enabled. It enters serializer synchronously 184 * (waits till serializer processing is complete). Within serializer it 185 * back-enables all endpoints blocked by the queue for connection-less 186 * transports and enables write side service processing for the peer for 187 * connection-oriented transports. 188 * 189 * Read and write side service routines use special mblk_sized space in the 190 * endpoint structure to enter perimeter. 191 * 192 * Write-side flow control 193 * ----------------------- 194 * 195 * Write side flow control is a bit tricky. The driver needs to deal with two 196 * message queues - the explicit STREAMS message queue maintained by 197 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 198 * queues should be synchronized to preserve message ordering and should 199 * maintain a single order determined by the order in which messages enter 200 * tl_wput(). In order to maintain the ordering between these two queues the 201 * STREAMS queue is only manipulated within the serializer, so the ordering is 202 * provided by the serializer. 203 * 204 * Functions called from the tl_wsrv() sometimes may call putbq(). To 205 * immediately stop any further processing of the STREAMS message queues the 206 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 207 * side service processing stops when the flag is set. 208 * 209 * The tl_wsrv() function enters serializer synchronously and waits for it to 210 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 211 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 212 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 213 * always bounded by the amount of messages on the STREAMS queue at the time 214 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 215 * queue from another serialized entry which can't happen in parallel. This 216 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 217 * of it draining forever while writer places new messages on the STREAMS 218 * queue). 219 * 220 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 221 * 222 * 223 * Unix Domain Sockets 224 * =================== 225 * 226 * The driver knows the structure of Unix Domain sockets addresses and treats 227 * them differently from generic TLI addresses. For sockets implicit binds are 228 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 229 * instead of using address length of zero. Explicit binds specify 230 * SOU_MAGIC_EXPLICIT as magic. 231 * 232 * For implicit binds we always use minor number as soua_vp part of the address 233 * and avoid any hash table lookups. This saves two hash tables lookups per 234 * anonymous bind. 235 * 236 * For explicit address we hash the vnode pointer instead of hashing the 237 * full-scale address+zone+length. Hashing by pointer is more efficient then 238 * hashing by the full address. 239 * 240 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 241 * tep structure, so it should be never freed. 242 * 243 * Also for sockets the driver always uses minor number as acceptor id. 244 * 245 * TPI VIOLATIONS 246 * -------------- 247 * 248 * This driver violates TPI in several respects for Unix Domain Sockets: 249 * 250 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 251 * is requested and the endpoint is already in use. There is no point in 252 * generating an unused address since this address will be rejected by 253 * sockfs anyway. For implicit binds it always generates a new address 254 * (sets soua_vp to its minor number). 255 * 256 * 2) It always uses minor number as acceptor ID and never uses queue 257 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 258 * message and they do not use the queue pointer. 259 * 260 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 261 * followed by listen(). The listen() should be issued with non-zero 262 * backlog, so sotpi_listen() issues unbind request followed by bind 263 * request to the same address but with a non-zero qlen value. Both 264 * tl_bind() and tl_unbind() require write lock on the hash table to 265 * insert/remove the address. The driver does not remove the address from 266 * the hash for endpoints that are bound to the explicit address and have 267 * backlog of zero. During T_BIND_REQ processing if the address requested 268 * is equal to the address the endpoint already has it updates the backlog 269 * without reinserting the address in the hash table. This optimization 270 * avoids two hash table updates for each listener created. It always 271 * avoids the problem of a "stolen" address when another listener may use 272 * the same address between the unbind and bind and suddenly listen() fails 273 * because address is in use even though the bind() succeeded. 274 * 275 * 276 * CONNECTIONLESS TRANSPORTS 277 * ========================= 278 * 279 * Connectionless transports all share the same serializer (one for TLI and one 280 * for Sockets). Functions executing behind serializer can check or modify state 281 * of any endpoint. 282 * 283 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 284 * te_lastep field. The next time X talks to some address A it checks whether A 285 * is the same as Y's address and if it is there is no need to lookup Y. If the 286 * address is different or the state of Y is not appropriate (e.g. closed or not 287 * idle) X does a lookup using tl_find_peer() and caches the new address. 288 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 289 * on the endpoint found. 290 * 291 * During close of endpoint Y it doesn't try to remove itself from other 292 * endpoints caches. They will detect that Y is gone and will search the peer 293 * endpoint again. 294 * 295 * Flow Control Handling. 296 * ---------------------- 297 * 298 * Each connectionless endpoint keeps a list of endpoints which are 299 * flow-controlled by its queue. It also keeps a pointer to the queue which 300 * flow-controls itself. Whenever flow control releases for endpoint X it 301 * enables all queues from the list. During close it also back-enables everyone 302 * in the list. If X is flow-controlled when it is closing it removes it from 303 * the peers list. 304 * 305 * DATA STRUCTURES 306 * =============== 307 * 308 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 309 * endpoint state. For connection-oriented transports it has a keeps a list 310 * of pending connections (tl_icon_t). For connectionless transports it keeps a 311 * list of endpoints flow controlled by this one. 312 * 313 * Each transport type is represented by a per-transport data structure 314 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 315 * endpoint address hash tables for each transport. It also contains pointer to 316 * transport serializer for connectionless transports. 317 * 318 * Each endpoint keeps a link to its transport structure, so the code can find 319 * all per-transport information quickly. 320 */ 321 322 #include <sys/types.h> 323 #include <sys/inttypes.h> 324 #include <sys/stream.h> 325 #include <sys/stropts.h> 326 #define _SUN_TPI_VERSION 2 327 #include <sys/tihdr.h> 328 #include <sys/strlog.h> 329 #include <sys/debug.h> 330 #include <sys/cred.h> 331 #include <sys/errno.h> 332 #include <sys/kmem.h> 333 #include <sys/id_space.h> 334 #include <sys/modhash.h> 335 #include <sys/mkdev.h> 336 #include <sys/tl.h> 337 #include <sys/stat.h> 338 #include <sys/conf.h> 339 #include <sys/modctl.h> 340 #include <sys/strsun.h> 341 #include <sys/socket.h> 342 #include <sys/socketvar.h> 343 #include <sys/sysmacros.h> 344 #include <sys/xti_xtiopt.h> 345 #include <sys/ddi.h> 346 #include <sys/sunddi.h> 347 #include <sys/zone.h> 348 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 349 #include <inet/optcom.h> 350 #include <sys/strsubr.h> 351 #include <sys/ucred.h> 352 #include <sys/suntpi.h> 353 #include <sys/list.h> 354 #include <sys/serializer.h> 355 356 /* 357 * TBD List 358 * 14 Eliminate state changes through table 359 * 16. AF_UNIX socket options 360 * 17. connect() for ticlts 361 * 18. support for "netstat" to show AF_UNIX plus TLI local 362 * transport connections 363 * 21. sanity check to flushing on sending M_ERROR 364 */ 365 366 /* 367 * CONSTANT DECLARATIONS 368 * -------------------- 369 */ 370 371 /* 372 * Local declarations 373 */ 374 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 375 376 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 377 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 378 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 379 /* 380 * Hash tables size. 381 */ 382 #define TL_HASH_SIZE 311 383 384 /* 385 * Definitions for module_info 386 */ 387 #define TL_ID (104) /* module ID number */ 388 #define TL_NAME "tl" /* module name */ 389 #define TL_MINPSZ (0) /* min packet size */ 390 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 391 #define TL_HIWAT (16*1024) /* hi water mark */ 392 #define TL_LOWAT (256) /* lo water mark */ 393 /* 394 * Definition of minor numbers/modes for new transport provider modes. 395 * We view the socket use as a separate mode to get a separate name space. 396 */ 397 #define TL_TICOTS 0 /* connection oriented transport */ 398 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 399 #define TL_TICLTS 2 /* connectionless transport */ 400 #define TL_UNUSED 3 401 #define TL_SOCKET 4 /* Socket */ 402 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 403 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 404 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 405 406 #define TL_MINOR_MASK 0x7 407 #define TL_MINOR_START (TL_TICLTS + 1) 408 409 /* 410 * LOCAL MACROS 411 */ 412 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 413 414 /* 415 * EXTERNAL VARIABLE DECLARATIONS 416 * ----------------------------- 417 */ 418 /* 419 * state table defined in the OS space.c 420 */ 421 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 422 423 /* 424 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 425 */ 426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 427 static int tl_close(queue_t *, int, cred_t *); 428 static void tl_wput(queue_t *, mblk_t *); 429 static void tl_wsrv(queue_t *); 430 static void tl_rsrv(queue_t *); 431 432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 435 436 437 /* 438 * GLOBAL DATA STRUCTURES AND VARIABLES 439 * ----------------------------------- 440 */ 441 442 /* 443 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 444 * For now, we only manage the SO_RECVUCRED option but we also have 445 * harmless dummy options to make things work with some common code we access. 446 */ 447 opdes_t tl_opt_arr[] = { 448 /* The SO_TYPE is needed for the hack below */ 449 { 450 SO_TYPE, 451 SOL_SOCKET, 452 OA_R, 453 OA_R, 454 OP_NP, 455 OP_PASSNEXT, 456 sizeof (t_scalar_t), 457 0 458 }, 459 { 460 SO_RECVUCRED, 461 SOL_SOCKET, 462 OA_RW, 463 OA_RW, 464 OP_NP, 465 OP_PASSNEXT, 466 sizeof (int), 467 0 468 } 469 }; 470 471 /* 472 * Table of all supported levels 473 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 474 * any supported options so we need this info separately. 475 * 476 * This is needed only for topmost tpi providers. 477 */ 478 optlevel_t tl_valid_levels_arr[] = { 479 XTI_GENERIC, 480 SOL_SOCKET, 481 TL_PROT_LEVEL 482 }; 483 484 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 485 /* 486 * Current upper bound on the amount of space needed to return all options. 487 * Additional options with data size of sizeof(long) are handled automatically. 488 * Others need hand job. 489 */ 490 #define TL_MAX_OPT_BUF_LEN \ 491 ((A_CNT(tl_opt_arr) << 2) + \ 492 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 493 + 64 + sizeof (struct T_optmgmt_ack)) 494 495 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 496 497 /* 498 * transport addr structure 499 */ 500 typedef struct tl_addr { 501 zoneid_t ta_zoneid; /* Zone scope of address */ 502 t_scalar_t ta_alen; /* length of abuf */ 503 void *ta_abuf; /* the addr itself */ 504 } tl_addr_t; 505 506 /* 507 * Refcounted version of serializer. 508 */ 509 typedef struct tl_serializer { 510 uint_t ts_refcnt; 511 serializer_t *ts_serializer; 512 } tl_serializer_t; 513 514 /* 515 * Each transport type has a separate state. 516 * Per-transport state. 517 */ 518 typedef struct tl_transport_state { 519 char *tr_name; 520 minor_t tr_minor; 521 uint32_t tr_defaddr; 522 mod_hash_t *tr_ai_hash; 523 mod_hash_t *tr_addr_hash; 524 tl_serializer_t *tr_serializer; 525 } tl_transport_state_t; 526 527 #define TL_DFADDR 0x1000 528 529 static tl_transport_state_t tl_transports[] = { 530 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 531 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 532 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 533 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 534 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 535 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 536 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 537 }; 538 539 #define TL_MAXTRANSPORT A_CNT(tl_transports) 540 541 struct tl_endpt; 542 typedef struct tl_endpt tl_endpt_t; 543 544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 545 546 /* 547 * Data structure used to represent pending connects. 548 * Records enough information so that the connecting peer can close 549 * before the connection gets accepted. 550 */ 551 typedef struct tl_icon { 552 list_node_t ti_node; 553 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 554 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 555 t_scalar_t ti_seqno; /* Sequence number */ 556 } tl_icon_t; 557 558 typedef struct so_ux_addr soux_addr_t; 559 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 560 561 /* 562 * Maximum number of unaccepted connection indications allowed per listener. 563 */ 564 #define TL_MAXQLEN 4096 565 int tl_maxqlen = TL_MAXQLEN; 566 567 /* 568 * transport endpoint structure 569 */ 570 struct tl_endpt { 571 queue_t *te_rq; /* stream read queue */ 572 queue_t *te_wq; /* stream write queue */ 573 uint32_t te_refcnt; 574 int32_t te_state; /* TPI state of endpoint */ 575 minor_t te_minor; /* minor number */ 576 #define te_seqno te_minor 577 uint_t te_flag; /* flag field */ 578 boolean_t te_nowsrv; 579 tl_serializer_t *te_ser; /* Serializer to use */ 580 #define te_serializer te_ser->ts_serializer 581 582 soux_addr_t te_uxaddr; /* Socket address */ 583 #define te_magic te_uxaddr.soua_magic 584 #define te_vp te_uxaddr.soua_vp 585 tl_addr_t te_ap; /* addr bound to this endpt */ 586 #define te_zoneid te_ap.ta_zoneid 587 #define te_alen te_ap.ta_alen 588 #define te_abuf te_ap.ta_abuf 589 590 tl_transport_state_t *te_transport; 591 #define te_addrhash te_transport->tr_addr_hash 592 #define te_aihash te_transport->tr_ai_hash 593 #define te_defaddr te_transport->tr_defaddr 594 cred_t *te_credp; /* endpoint user credentials */ 595 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 596 597 /* 598 * State specific for connection-oriented and connectionless transports. 599 */ 600 union { 601 /* Connection-oriented state. */ 602 struct { 603 t_uscalar_t _te_nicon; /* count of conn requests */ 604 t_uscalar_t _te_qlen; /* max conn requests */ 605 tl_endpt_t *_te_oconp; /* conn request pending */ 606 tl_endpt_t *_te_conp; /* connected endpt */ 607 #ifndef _ILP32 608 void *_te_pad; 609 #endif 610 list_t _te_iconp; /* list of conn ind. pending */ 611 } _te_cots_state; 612 /* Connection-less state. */ 613 struct { 614 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 615 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 616 list_node_t _te_flows; /* lists of connections */ 617 list_t _te_flowlist; /* Who flowcontrols on me */ 618 } _te_clts_state; 619 } _te_transport_state; 620 #define te_nicon _te_transport_state._te_cots_state._te_nicon 621 #define te_qlen _te_transport_state._te_cots_state._te_qlen 622 #define te_oconp _te_transport_state._te_cots_state._te_oconp 623 #define te_conp _te_transport_state._te_cots_state._te_conp 624 #define te_iconp _te_transport_state._te_cots_state._te_iconp 625 #define te_lastep _te_transport_state._te_clts_state._te_lastep 626 #define te_flowq _te_transport_state._te_clts_state._te_flowq 627 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 628 #define te_flows _te_transport_state._te_clts_state._te_flows 629 630 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 631 timeout_id_t te_timoutid; /* outstanding timeout id */ 632 pid_t te_cpid; /* cached pid of endpoint */ 633 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 634 /* 635 * Pieces of the endpoint state needed for closing. 636 */ 637 kmutex_t te_closelock; 638 kcondvar_t te_closecv; 639 uint8_t te_closing; /* The endpoint started closing */ 640 uint8_t te_closewait; /* Wait in close until zero */ 641 mblk_t te_closemp; /* for entering serializer on close */ 642 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 643 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 644 kmutex_t te_srv_lock; 645 kcondvar_t te_srv_cv; 646 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 647 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 648 /* 649 * Pieces of the endpoint state needed for serializer transitions. 650 */ 651 kmutex_t te_ser_lock; /* Protects the count below */ 652 uint_t te_ser_count; /* Number of messages on serializer */ 653 }; 654 655 /* 656 * Flag values. Lower 4 bits specify that transport used. 657 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 658 * they allow to identify the endpoint more easily. 659 */ 660 #define TL_LISTENER 0x00010 /* the listener endpoint */ 661 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 662 #define TL_EAGER 0x00040 /* connecting endpoint */ 663 #define TL_ACCEPTED 0x00080 /* accepted connection */ 664 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 665 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 666 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 667 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 668 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 669 /* 670 * Boolean checks for the endpoint type. 671 */ 672 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 673 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 674 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 675 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 676 677 /* 678 * Certain operations are always used together. These macros reduce the chance 679 * of missing a part of a combination. 680 */ 681 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 682 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 683 684 #define TL_PUTBQ(x, mp) { \ 685 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 686 (x)->te_nowsrv = B_TRUE; \ 687 (void) putbq((x)->te_wq, mp); \ 688 } 689 690 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 691 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 692 693 /* 694 * STREAMS driver glue data structures. 695 */ 696 static struct module_info tl_minfo = { 697 TL_ID, /* mi_idnum */ 698 TL_NAME, /* mi_idname */ 699 TL_MINPSZ, /* mi_minpsz */ 700 TL_MAXPSZ, /* mi_maxpsz */ 701 TL_HIWAT, /* mi_hiwat */ 702 TL_LOWAT /* mi_lowat */ 703 }; 704 705 static struct qinit tl_rinit = { 706 NULL, /* qi_putp */ 707 (int (*)())tl_rsrv, /* qi_srvp */ 708 tl_open, /* qi_qopen */ 709 tl_close, /* qi_qclose */ 710 NULL, /* qi_qadmin */ 711 &tl_minfo, /* qi_minfo */ 712 NULL /* qi_mstat */ 713 }; 714 715 static struct qinit tl_winit = { 716 (int (*)())tl_wput, /* qi_putp */ 717 (int (*)())tl_wsrv, /* qi_srvp */ 718 NULL, /* qi_qopen */ 719 NULL, /* qi_qclose */ 720 NULL, /* qi_qadmin */ 721 &tl_minfo, /* qi_minfo */ 722 NULL /* qi_mstat */ 723 }; 724 725 static struct streamtab tlinfo = { 726 &tl_rinit, /* st_rdinit */ 727 &tl_winit, /* st_wrinit */ 728 NULL, /* st_muxrinit */ 729 NULL /* st_muxwrinit */ 730 }; 731 732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 733 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 734 735 static struct modldrv modldrv = { 736 &mod_driverops, /* Type of module -- pseudo driver here */ 737 "TPI Local Transport (tl)", 738 &tl_devops, /* driver ops */ 739 }; 740 741 /* 742 * Module linkage information for the kernel. 743 */ 744 static struct modlinkage modlinkage = { 745 MODREV_1, 746 &modldrv, 747 NULL 748 }; 749 750 /* 751 * Templates for response to info request 752 * Check sanity of unlimited connect data etc. 753 */ 754 755 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 756 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 757 758 static struct T_info_ack tl_cots_info_ack = 759 { 760 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 761 T_INFINITE, /* TSDU size */ 762 T_INFINITE, /* ETSDU size */ 763 T_INFINITE, /* CDATA_size */ 764 T_INFINITE, /* DDATA_size */ 765 T_INFINITE, /* ADDR_size */ 766 T_INFINITE, /* OPT_size */ 767 0, /* TIDU_size - fill at run time */ 768 T_COTS, /* SERV_type */ 769 -1, /* CURRENT_state */ 770 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 771 }; 772 773 static struct T_info_ack tl_clts_info_ack = 774 { 775 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 776 0, /* TSDU_size - fill at run time */ 777 -2, /* ETSDU_size -2 => not supported */ 778 -2, /* CDATA_size -2 => not supported */ 779 -2, /* DDATA_size -2 => not supported */ 780 -1, /* ADDR_size -1 => unlimited */ 781 -1, /* OPT_size */ 782 0, /* TIDU_size - fill at run time */ 783 T_CLTS, /* SERV_type */ 784 -1, /* CURRENT_state */ 785 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 786 }; 787 788 /* 789 * private copy of devinfo pointer used in tl_info 790 */ 791 static dev_info_t *tl_dip; 792 793 /* 794 * Endpoints cache. 795 */ 796 static kmem_cache_t *tl_cache; 797 /* 798 * Minor number space. 799 */ 800 static id_space_t *tl_minors; 801 802 /* 803 * Default Data Unit size. 804 */ 805 static t_scalar_t tl_tidusz; 806 807 /* 808 * Size of hash tables. 809 */ 810 static size_t tl_hash_size = TL_HASH_SIZE; 811 812 /* 813 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 814 * for sockets. 815 */ 816 static int tl_disable_early_connect = 0; 817 static int tl_client_closing_when_accepting; 818 819 static int tl_serializer_noswitch; 820 821 /* 822 * LOCAL FUNCTION PROTOTYPES 823 * ------------------------- 824 */ 825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 826 static void tl_do_proto(mblk_t *, tl_endpt_t *); 827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 830 t_scalar_t); 831 static void tl_bind(mblk_t *, tl_endpt_t *); 832 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 833 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 834 static void tl_unbind(mblk_t *, tl_endpt_t *); 835 static void tl_optmgmt(queue_t *, mblk_t *); 836 static void tl_conn_req(queue_t *, mblk_t *); 837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 838 static void tl_conn_res(mblk_t *, tl_endpt_t *); 839 static void tl_discon_req(mblk_t *, tl_endpt_t *); 840 static void tl_capability_req(mblk_t *, tl_endpt_t *); 841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 842 static void tl_info_req(mblk_t *, tl_endpt_t *); 843 static void tl_addr_req(mblk_t *, tl_endpt_t *); 844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 845 static void tl_data(mblk_t *, tl_endpt_t *); 846 static void tl_exdata(mblk_t *, tl_endpt_t *); 847 static void tl_ordrel(mblk_t *, tl_endpt_t *); 848 static void tl_unitdata(mblk_t *, tl_endpt_t *); 849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 854 static void tl_cl_backenable(tl_endpt_t *); 855 static void tl_co_unconnect(tl_endpt_t *); 856 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 857 static void tl_discon_ind(tl_endpt_t *, uint32_t); 858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 859 static mblk_t *tl_ordrel_ind_alloc(void); 860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 864 static void tl_icon_freemsgs(mblk_t **); 865 static void tl_merror(queue_t *, mblk_t *, int); 866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 867 static int tl_default_opt(queue_t *, int, int, uchar_t *); 868 static int tl_get_opt(queue_t *, int, int, uchar_t *); 869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 870 uchar_t *, void *, cred_t *, mblk_t *); 871 static void tl_memrecover(queue_t *, mblk_t *, size_t); 872 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 873 static void tl_free(tl_endpt_t *); 874 static int tl_constructor(void *, void *, int); 875 static void tl_destructor(void *, void *); 876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 877 static tl_serializer_t *tl_serializer_alloc(int); 878 static void tl_serializer_refhold(tl_serializer_t *); 879 static void tl_serializer_refrele(tl_serializer_t *); 880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 881 static void tl_serializer_exit(tl_endpt_t *); 882 static boolean_t tl_noclose(tl_endpt_t *); 883 static void tl_closeok(tl_endpt_t *); 884 static void tl_refhold(tl_endpt_t *); 885 static void tl_refrele(tl_endpt_t *); 886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 888 static void tl_close_ser(mblk_t *, tl_endpt_t *); 889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 891 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 892 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 894 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 897 static void tl_addr_unbind(tl_endpt_t *); 898 899 /* 900 * Intialize option database object for TL 901 */ 902 903 optdb_obj_t tl_opt_obj = { 904 tl_default_opt, /* TL default value function pointer */ 905 tl_get_opt, /* TL get function pointer */ 906 tl_set_opt, /* TL set function pointer */ 907 B_TRUE, /* TL is tpi provider */ 908 TL_OPT_ARR_CNT, /* TL option database count of entries */ 909 tl_opt_arr, /* TL option database */ 910 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 911 tl_valid_levels_arr /* TL valid level array */ 912 }; 913 914 /* 915 * Logical operations. 916 * 917 * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y 918 * should also be true. 919 * 920 * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or false at 921 * the same time. 922 */ 923 #define IMPLY(X, Y) (!(X) || (Y)) 924 #define EQUIV(X, Y) (IMPLY(X, Y) && IMPLY(Y, X)) 925 926 /* 927 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 928 * --------------------------------------- 929 */ 930 931 /* 932 * Loadable module routines 933 */ 934 int 935 _init(void) 936 { 937 return (mod_install(&modlinkage)); 938 } 939 940 int 941 _fini(void) 942 { 943 return (mod_remove(&modlinkage)); 944 } 945 946 int 947 _info(struct modinfo *modinfop) 948 { 949 return (mod_info(&modlinkage, modinfop)); 950 } 951 952 /* 953 * Driver Entry Points and Other routines 954 */ 955 static int 956 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 957 { 958 int i; 959 char name[32]; 960 961 /* 962 * Resume from a checkpoint state. 963 */ 964 if (cmd == DDI_RESUME) 965 return (DDI_SUCCESS); 966 967 if (cmd != DDI_ATTACH) 968 return (DDI_FAILURE); 969 970 /* 971 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 972 * streams message sizes can be unlimited. We use a defined constant 973 * instead. 974 */ 975 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 976 977 /* 978 * Create subdevices for each transport. 979 */ 980 for (i = 0; i < TL_UNUSED; i++) { 981 if (ddi_create_minor_node(devi, 982 tl_transports[i].tr_name, 983 S_IFCHR, tl_transports[i].tr_minor, 984 DDI_PSEUDO, NULL) == DDI_FAILURE) { 985 ddi_remove_minor_node(devi, NULL); 986 return (DDI_FAILURE); 987 } 988 } 989 990 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 991 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 992 993 if (tl_cache == NULL) { 994 ddi_remove_minor_node(devi, NULL); 995 return (DDI_FAILURE); 996 } 997 998 tl_minors = id_space_create("tl_minor_space", 999 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 1000 1001 /* 1002 * Create ID space for minor numbers 1003 */ 1004 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1005 tl_transport_state_t *t = &tl_transports[i]; 1006 1007 if (i == TL_UNUSED) 1008 continue; 1009 1010 /* Socket COTSORD shares namespace with COTS */ 1011 if (i == TL_SOCK_COTSORD) { 1012 t->tr_ai_hash = 1013 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1014 ASSERT(t->tr_ai_hash != NULL); 1015 t->tr_addr_hash = 1016 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1017 ASSERT(t->tr_addr_hash != NULL); 1018 continue; 1019 } 1020 1021 /* 1022 * Create hash tables. 1023 */ 1024 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1025 t->tr_name); 1026 #ifdef _ILP32 1027 if (i & TL_SOCKET) 1028 t->tr_ai_hash = 1029 mod_hash_create_idhash(name, tl_hash_size - 1, 1030 mod_hash_null_valdtor); 1031 else 1032 t->tr_ai_hash = 1033 mod_hash_create_ptrhash(name, tl_hash_size, 1034 mod_hash_null_valdtor, sizeof (queue_t)); 1035 #else 1036 t->tr_ai_hash = 1037 mod_hash_create_idhash(name, tl_hash_size - 1, 1038 mod_hash_null_valdtor); 1039 #endif /* _ILP32 */ 1040 1041 if (i & TL_SOCKET) { 1042 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1043 t->tr_name); 1044 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1045 tl_hash_size, mod_hash_null_valdtor, 1046 sizeof (uintptr_t)); 1047 } else { 1048 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1049 t->tr_name); 1050 t->tr_addr_hash = mod_hash_create_extended(name, 1051 tl_hash_size, mod_hash_null_keydtor, 1052 mod_hash_null_valdtor, 1053 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1054 } 1055 1056 /* Create serializer for connectionless transports. */ 1057 if (i & TL_TICLTS) 1058 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1059 } 1060 1061 tl_dip = devi; 1062 1063 return (DDI_SUCCESS); 1064 } 1065 1066 static int 1067 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1068 { 1069 int i; 1070 1071 if (cmd == DDI_SUSPEND) 1072 return (DDI_SUCCESS); 1073 1074 if (cmd != DDI_DETACH) 1075 return (DDI_FAILURE); 1076 1077 /* 1078 * Destroy arenas and hash tables. 1079 */ 1080 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1081 tl_transport_state_t *t = &tl_transports[i]; 1082 1083 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1084 continue; 1085 1086 ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL)); 1087 if (t->tr_serializer != NULL) { 1088 tl_serializer_refrele(t->tr_serializer); 1089 t->tr_serializer = NULL; 1090 } 1091 1092 #ifdef _ILP32 1093 if (i & TL_SOCKET) 1094 mod_hash_destroy_idhash(t->tr_ai_hash); 1095 else 1096 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1097 #else 1098 mod_hash_destroy_idhash(t->tr_ai_hash); 1099 #endif /* _ILP32 */ 1100 t->tr_ai_hash = NULL; 1101 if (i & TL_SOCKET) 1102 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1103 else 1104 mod_hash_destroy_hash(t->tr_addr_hash); 1105 t->tr_addr_hash = NULL; 1106 } 1107 1108 kmem_cache_destroy(tl_cache); 1109 tl_cache = NULL; 1110 id_space_destroy(tl_minors); 1111 tl_minors = NULL; 1112 ddi_remove_minor_node(devi, NULL); 1113 return (DDI_SUCCESS); 1114 } 1115 1116 /* ARGSUSED */ 1117 static int 1118 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1119 { 1120 1121 int retcode = DDI_FAILURE; 1122 1123 switch (infocmd) { 1124 1125 case DDI_INFO_DEVT2DEVINFO: 1126 if (tl_dip != NULL) { 1127 *result = (void *)tl_dip; 1128 retcode = DDI_SUCCESS; 1129 } 1130 break; 1131 1132 case DDI_INFO_DEVT2INSTANCE: 1133 *result = (void *)0; 1134 retcode = DDI_SUCCESS; 1135 break; 1136 1137 default: 1138 break; 1139 } 1140 return (retcode); 1141 } 1142 1143 /* 1144 * Endpoint reference management. 1145 */ 1146 static void 1147 tl_refhold(tl_endpt_t *tep) 1148 { 1149 atomic_add_32(&tep->te_refcnt, 1); 1150 } 1151 1152 static void 1153 tl_refrele(tl_endpt_t *tep) 1154 { 1155 ASSERT(tep->te_refcnt != 0); 1156 1157 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1158 tl_free(tep); 1159 } 1160 1161 /*ARGSUSED*/ 1162 static int 1163 tl_constructor(void *buf, void *cdrarg, int kmflags) 1164 { 1165 tl_endpt_t *tep = buf; 1166 1167 bzero(tep, sizeof (tl_endpt_t)); 1168 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1169 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1170 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1171 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1172 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1173 1174 return (0); 1175 } 1176 1177 /*ARGSUSED*/ 1178 static void 1179 tl_destructor(void *buf, void *cdrarg) 1180 { 1181 tl_endpt_t *tep = buf; 1182 1183 mutex_destroy(&tep->te_closelock); 1184 cv_destroy(&tep->te_closecv); 1185 mutex_destroy(&tep->te_srv_lock); 1186 cv_destroy(&tep->te_srv_cv); 1187 mutex_destroy(&tep->te_ser_lock); 1188 } 1189 1190 static void 1191 tl_free(tl_endpt_t *tep) 1192 { 1193 ASSERT(tep->te_refcnt == 0); 1194 ASSERT(tep->te_transport != NULL); 1195 ASSERT(tep->te_rq == NULL); 1196 ASSERT(tep->te_wq == NULL); 1197 ASSERT(tep->te_ser != NULL); 1198 ASSERT(tep->te_ser_count == 0); 1199 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1200 1201 if (IS_SOCKET(tep)) { 1202 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1203 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1204 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1205 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1206 } else if (tep->te_abuf != NULL) { 1207 kmem_free(tep->te_abuf, tep->te_alen); 1208 tep->te_alen = -1; /* uninitialized */ 1209 tep->te_abuf = NULL; 1210 } else { 1211 ASSERT(tep->te_alen == -1); 1212 } 1213 1214 id_free(tl_minors, tep->te_minor); 1215 ASSERT(tep->te_credp == NULL); 1216 1217 if (tep->te_hash_hndl != NULL) 1218 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1219 1220 if (IS_COTS(tep)) { 1221 TL_REMOVE_PEER(tep->te_conp); 1222 TL_REMOVE_PEER(tep->te_oconp); 1223 tl_serializer_refrele(tep->te_ser); 1224 tep->te_ser = NULL; 1225 ASSERT(tep->te_nicon == 0); 1226 ASSERT(list_head(&tep->te_iconp) == NULL); 1227 } else { 1228 ASSERT(tep->te_lastep == NULL); 1229 ASSERT(list_head(&tep->te_flowlist) == NULL); 1230 ASSERT(tep->te_flowq == NULL); 1231 } 1232 1233 ASSERT(tep->te_bufcid == 0); 1234 ASSERT(tep->te_timoutid == 0); 1235 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1236 tep->te_acceptor_id = 0; 1237 1238 ASSERT(tep->te_closewait == 0); 1239 ASSERT(!tep->te_rsrv_active); 1240 ASSERT(!tep->te_wsrv_active); 1241 tep->te_closing = 0; 1242 tep->te_nowsrv = B_FALSE; 1243 tep->te_flag = 0; 1244 1245 kmem_cache_free(tl_cache, tep); 1246 } 1247 1248 /* 1249 * Allocate/free reference-counted wrappers for serializers. 1250 */ 1251 static tl_serializer_t * 1252 tl_serializer_alloc(int flags) 1253 { 1254 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1255 serializer_t *ser; 1256 1257 if (s == NULL) 1258 return (NULL); 1259 1260 ser = serializer_create(flags); 1261 1262 if (ser == NULL) { 1263 kmem_free(s, sizeof (tl_serializer_t)); 1264 return (NULL); 1265 } 1266 1267 s->ts_refcnt = 1; 1268 s->ts_serializer = ser; 1269 return (s); 1270 } 1271 1272 static void 1273 tl_serializer_refhold(tl_serializer_t *s) 1274 { 1275 atomic_add_32(&s->ts_refcnt, 1); 1276 } 1277 1278 static void 1279 tl_serializer_refrele(tl_serializer_t *s) 1280 { 1281 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1282 serializer_destroy(s->ts_serializer); 1283 kmem_free(s, sizeof (tl_serializer_t)); 1284 } 1285 } 1286 1287 /* 1288 * Post a request on the endpoint serializer. For COTS transports keep track of 1289 * the number of pending requests. 1290 */ 1291 static void 1292 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1293 { 1294 if (IS_COTS(tep)) { 1295 mutex_enter(&tep->te_ser_lock); 1296 tep->te_ser_count++; 1297 mutex_exit(&tep->te_ser_lock); 1298 } 1299 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1300 } 1301 1302 /* 1303 * Complete processing the request on the serializer. Decrement the counter for 1304 * pending requests for COTS transports. 1305 */ 1306 static void 1307 tl_serializer_exit(tl_endpt_t *tep) 1308 { 1309 if (IS_COTS(tep)) { 1310 mutex_enter(&tep->te_ser_lock); 1311 ASSERT(tep->te_ser_count != 0); 1312 tep->te_ser_count--; 1313 mutex_exit(&tep->te_ser_lock); 1314 } 1315 } 1316 1317 /* 1318 * Hash management functions. 1319 */ 1320 1321 /* 1322 * Return TRUE if two addresses are equal, false otherwise. 1323 */ 1324 static boolean_t 1325 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1326 { 1327 return ((ap1->ta_alen > 0) && 1328 (ap1->ta_alen == ap2->ta_alen) && 1329 (ap1->ta_zoneid == ap2->ta_zoneid) && 1330 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1331 } 1332 1333 /* 1334 * This function is called whenever an endpoint is found in the hash table. 1335 */ 1336 /* ARGSUSED0 */ 1337 static void 1338 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1339 { 1340 tl_refhold((tl_endpt_t *)val); 1341 } 1342 1343 /* 1344 * Address hash function. 1345 */ 1346 /* ARGSUSED */ 1347 static uint_t 1348 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1349 { 1350 tl_addr_t *ap = (tl_addr_t *)key; 1351 size_t len = ap->ta_alen; 1352 uchar_t *p = ap->ta_abuf; 1353 uint_t i, g; 1354 1355 ASSERT((len > 0) && (p != NULL)); 1356 1357 for (i = ap->ta_zoneid; len -- != 0; p++) { 1358 i = (i << 4) + (*p); 1359 if ((g = (i & 0xf0000000U)) != 0) { 1360 i ^= (g >> 24); 1361 i ^= g; 1362 } 1363 } 1364 return (i); 1365 } 1366 1367 /* 1368 * This function is used by hash lookups. It compares two generic addresses. 1369 */ 1370 static int 1371 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1372 { 1373 #ifdef DEBUG 1374 tl_addr_t *ap1 = (tl_addr_t *)key1; 1375 tl_addr_t *ap2 = (tl_addr_t *)key2; 1376 1377 ASSERT(key1 != NULL); 1378 ASSERT(key2 != NULL); 1379 1380 ASSERT(ap1->ta_abuf != NULL); 1381 ASSERT(ap2->ta_abuf != NULL); 1382 ASSERT(ap1->ta_alen > 0); 1383 ASSERT(ap2->ta_alen > 0); 1384 #endif 1385 1386 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1387 } 1388 1389 /* 1390 * Prevent endpoint from closing if possible. 1391 * Return B_TRUE on success, B_FALSE on failure. 1392 */ 1393 static boolean_t 1394 tl_noclose(tl_endpt_t *tep) 1395 { 1396 boolean_t rc = B_FALSE; 1397 1398 mutex_enter(&tep->te_closelock); 1399 if (! tep->te_closing) { 1400 ASSERT(tep->te_closewait == 0); 1401 tep->te_closewait++; 1402 rc = B_TRUE; 1403 } 1404 mutex_exit(&tep->te_closelock); 1405 return (rc); 1406 } 1407 1408 /* 1409 * Allow endpoint to close if needed. 1410 */ 1411 static void 1412 tl_closeok(tl_endpt_t *tep) 1413 { 1414 ASSERT(tep->te_closewait > 0); 1415 mutex_enter(&tep->te_closelock); 1416 ASSERT(tep->te_closewait == 1); 1417 tep->te_closewait--; 1418 cv_signal(&tep->te_closecv); 1419 mutex_exit(&tep->te_closelock); 1420 } 1421 1422 /* 1423 * STREAMS open entry point. 1424 */ 1425 /* ARGSUSED */ 1426 static int 1427 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1428 { 1429 tl_endpt_t *tep; 1430 minor_t minor = getminor(*devp); 1431 1432 /* 1433 * Driver is called directly. Both CLONEOPEN and MODOPEN 1434 * are illegal 1435 */ 1436 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1437 return (ENXIO); 1438 1439 if (rq->q_ptr != NULL) 1440 return (0); 1441 1442 /* Minor number should specify the mode used for the driver. */ 1443 if ((minor >= TL_UNUSED)) 1444 return (ENXIO); 1445 1446 if (oflag & SO_SOCKSTR) { 1447 minor |= TL_SOCKET; 1448 } 1449 1450 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1451 tep->te_refcnt = 1; 1452 tep->te_cpid = curproc->p_pid; 1453 rq->q_ptr = WR(rq)->q_ptr = tep; 1454 tep->te_state = TS_UNBND; 1455 tep->te_credp = credp; 1456 crhold(credp); 1457 tep->te_zoneid = getzoneid(); 1458 1459 tep->te_flag = minor & TL_MINOR_MASK; 1460 tep->te_transport = &tl_transports[minor]; 1461 1462 /* Allocate a unique minor number for this instance. */ 1463 tep->te_minor = (minor_t)id_alloc(tl_minors); 1464 1465 /* Reserve hash handle for bind(). */ 1466 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1467 1468 /* Transport-specific initialization */ 1469 if (IS_COTS(tep)) { 1470 /* Use private serializer */ 1471 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1472 1473 /* Create list for pending connections */ 1474 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1475 offsetof(tl_icon_t, ti_node)); 1476 tep->te_qlen = 0; 1477 tep->te_nicon = 0; 1478 tep->te_oconp = NULL; 1479 tep->te_conp = NULL; 1480 } else { 1481 /* Use shared serializer */ 1482 tep->te_ser = tep->te_transport->tr_serializer; 1483 bzero(&tep->te_flows, sizeof (list_node_t)); 1484 /* Create list for flow control */ 1485 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1486 offsetof(tl_endpt_t, te_flows)); 1487 tep->te_flowq = NULL; 1488 tep->te_lastep = NULL; 1489 1490 } 1491 1492 /* Initialize endpoint address */ 1493 if (IS_SOCKET(tep)) { 1494 /* Socket-specific address handling. */ 1495 tep->te_alen = TL_SOUX_ADDRLEN; 1496 tep->te_abuf = &tep->te_uxaddr; 1497 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1498 tep->te_magic = SOU_MAGIC_IMPLICIT; 1499 } else { 1500 tep->te_alen = -1; 1501 tep->te_abuf = NULL; 1502 } 1503 1504 /* clone the driver */ 1505 *devp = makedevice(getmajor(*devp), tep->te_minor); 1506 1507 tep->te_rq = rq; 1508 tep->te_wq = WR(rq); 1509 1510 #ifdef _ILP32 1511 if (IS_SOCKET(tep)) 1512 tep->te_acceptor_id = tep->te_minor; 1513 else 1514 tep->te_acceptor_id = (t_uscalar_t)rq; 1515 #else 1516 tep->te_acceptor_id = tep->te_minor; 1517 #endif /* _ILP32 */ 1518 1519 1520 qprocson(rq); 1521 1522 /* 1523 * Insert acceptor ID in the hash. The AI hash always sleeps on 1524 * insertion so insertion can't fail. 1525 */ 1526 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1527 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1528 (mod_hash_val_t)tep); 1529 1530 return (0); 1531 } 1532 1533 /* ARGSUSED1 */ 1534 static int 1535 tl_close(queue_t *rq, int flag, cred_t *credp) 1536 { 1537 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1538 tl_endpt_t *elp = NULL; 1539 queue_t *wq = tep->te_wq; 1540 int rc; 1541 1542 ASSERT(wq == WR(rq)); 1543 1544 /* 1545 * Remove the endpoint from acceptor hash. 1546 */ 1547 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1548 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1549 (mod_hash_val_t *)&elp); 1550 ASSERT(rc == 0 && tep == elp); 1551 if ((rc != 0) || (tep != elp)) { 1552 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1553 SL_TRACE|SL_ERROR, 1554 "tl_close:inconsistency in AI hash")); 1555 } 1556 1557 /* 1558 * Wait till close is safe, then mark endpoint as closing. 1559 */ 1560 mutex_enter(&tep->te_closelock); 1561 while (tep->te_closewait) 1562 cv_wait(&tep->te_closecv, &tep->te_closelock); 1563 tep->te_closing = B_TRUE; 1564 /* 1565 * Will wait for the serializer part of the close to finish, so set 1566 * te_closewait now. 1567 */ 1568 tep->te_closewait = 1; 1569 tep->te_nowsrv = B_FALSE; 1570 mutex_exit(&tep->te_closelock); 1571 1572 /* 1573 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1574 * It is safe because close will wait for tl_close_ser to finish. 1575 */ 1576 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1577 1578 /* 1579 * Wait for the first phase of close to complete before qprocsoff(). 1580 */ 1581 mutex_enter(&tep->te_closelock); 1582 while (tep->te_closewait) 1583 cv_wait(&tep->te_closecv, &tep->te_closelock); 1584 mutex_exit(&tep->te_closelock); 1585 1586 qprocsoff(rq); 1587 1588 if (tep->te_bufcid) { 1589 qunbufcall(rq, tep->te_bufcid); 1590 tep->te_bufcid = 0; 1591 } 1592 if (tep->te_timoutid) { 1593 (void) quntimeout(rq, tep->te_timoutid); 1594 tep->te_timoutid = 0; 1595 } 1596 1597 /* 1598 * Finish close behind serializer. 1599 * 1600 * For a CLTS endpoint increase a refcount and continue close processing 1601 * with serializer protection. This processing may happen asynchronously 1602 * with the completion of tl_close(). 1603 * 1604 * Fot a COTS endpoint wait before destroying tep since the serializer 1605 * may go away together with tep and we need to destroy serializer 1606 * outside of serializer context. 1607 */ 1608 ASSERT(tep->te_closewait == 0); 1609 if (IS_COTS(tep)) 1610 tep->te_closewait = 1; 1611 else 1612 tl_refhold(tep); 1613 1614 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1615 1616 /* 1617 * For connection-oriented transports wait for all serializer activity 1618 * to settle down. 1619 */ 1620 if (IS_COTS(tep)) { 1621 mutex_enter(&tep->te_closelock); 1622 while (tep->te_closewait) 1623 cv_wait(&tep->te_closecv, &tep->te_closelock); 1624 mutex_exit(&tep->te_closelock); 1625 } 1626 1627 crfree(tep->te_credp); 1628 tep->te_credp = NULL; 1629 tep->te_wq = NULL; 1630 tl_refrele(tep); 1631 /* 1632 * tep is likely to be destroyed now, so can't reference it any more. 1633 */ 1634 1635 rq->q_ptr = wq->q_ptr = NULL; 1636 return (0); 1637 } 1638 1639 /* 1640 * First phase of close processing done behind the serializer. 1641 * 1642 * Do not drop the reference in the end - tl_close() wants this reference to 1643 * stay. 1644 */ 1645 /* ARGSUSED0 */ 1646 static void 1647 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1648 { 1649 ASSERT(tep->te_closing); 1650 ASSERT(tep->te_closewait == 1); 1651 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1652 1653 tep->te_flag |= TL_CLOSE_SER; 1654 1655 /* 1656 * Drain out all messages on queue except for TL_TICOTS where the 1657 * abortive release semantics permit discarding of data on close 1658 */ 1659 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1660 tl_wsrv_ser(NULL, tep); 1661 } 1662 1663 /* Remove address from hash table. */ 1664 tl_addr_unbind(tep); 1665 /* 1666 * qprocsoff() gets confused when q->q_next is not NULL on the write 1667 * queue of the driver, so clear these before qprocsoff() is called. 1668 * Also clear q_next for the peer since this queue is going away. 1669 */ 1670 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1671 tl_endpt_t *peer_tep = tep->te_conp; 1672 1673 tep->te_wq->q_next = NULL; 1674 if ((peer_tep != NULL) && !peer_tep->te_closing) 1675 peer_tep->te_wq->q_next = NULL; 1676 } 1677 1678 tep->te_rq = NULL; 1679 1680 /* wake up tl_close() */ 1681 tl_closeok(tep); 1682 tl_serializer_exit(tep); 1683 } 1684 1685 /* 1686 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1687 * the reference for CLTS. 1688 * 1689 * Called from serializer. Should drop reference count for CLTS only. 1690 */ 1691 /* ARGSUSED0 */ 1692 static void 1693 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1694 { 1695 ASSERT(tep->te_closing); 1696 ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0)); 1697 ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1)); 1698 1699 tep->te_state = -1; /* Uninitialized */ 1700 if (IS_COTS(tep)) { 1701 tl_co_unconnect(tep); 1702 } else { 1703 /* Connectionless specific cleanup */ 1704 TL_REMOVE_PEER(tep->te_lastep); 1705 /* 1706 * Backenable anybody that is flow controlled waiting for 1707 * this endpoint. 1708 */ 1709 tl_cl_backenable(tep); 1710 if (tep->te_flowq != NULL) { 1711 list_remove(&(tep->te_flowq->te_flowlist), tep); 1712 tep->te_flowq = NULL; 1713 } 1714 } 1715 1716 tl_serializer_exit(tep); 1717 if (IS_COTS(tep)) 1718 tl_closeok(tep); 1719 else 1720 tl_refrele(tep); 1721 } 1722 1723 /* 1724 * STREAMS write-side put procedure. 1725 * Enter serializer for most of the processing. 1726 * 1727 * The T_CONN_REQ is processed outside of serializer. 1728 */ 1729 static void 1730 tl_wput(queue_t *wq, mblk_t *mp) 1731 { 1732 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1733 ssize_t msz = MBLKL(mp); 1734 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1735 tlproc_t *tl_proc = NULL; 1736 1737 switch (DB_TYPE(mp)) { 1738 case M_DATA: 1739 /* Only valid for connection-oriented transports */ 1740 if (IS_CLTS(tep)) { 1741 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1742 SL_TRACE|SL_ERROR, 1743 "tl_wput:M_DATA invalid for ticlts driver")); 1744 tl_merror(wq, mp, EPROTO); 1745 return; 1746 } 1747 tl_proc = tl_wput_data_ser; 1748 break; 1749 1750 case M_IOCTL: 1751 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1752 case TL_IOC_CREDOPT: 1753 /* FALLTHROUGH */ 1754 case TL_IOC_UCREDOPT: 1755 /* 1756 * Serialize endpoint state change. 1757 */ 1758 tl_proc = tl_do_ioctl_ser; 1759 break; 1760 1761 default: 1762 miocnak(wq, mp, 0, EINVAL); 1763 return; 1764 } 1765 break; 1766 1767 case M_FLUSH: 1768 /* 1769 * do canonical M_FLUSH processing 1770 */ 1771 if (*mp->b_rptr & FLUSHW) { 1772 flushq(wq, FLUSHALL); 1773 *mp->b_rptr &= ~FLUSHW; 1774 } 1775 if (*mp->b_rptr & FLUSHR) { 1776 flushq(RD(wq), FLUSHALL); 1777 qreply(wq, mp); 1778 } else { 1779 freemsg(mp); 1780 } 1781 return; 1782 1783 case M_PROTO: 1784 if (msz < sizeof (prim->type)) { 1785 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1786 SL_TRACE|SL_ERROR, 1787 "tl_wput:M_PROTO data too short")); 1788 tl_merror(wq, mp, EPROTO); 1789 return; 1790 } 1791 switch (prim->type) { 1792 case T_OPTMGMT_REQ: 1793 case T_SVR4_OPTMGMT_REQ: 1794 /* 1795 * Process TPI option management requests immediately 1796 * in put procedure regardless of in-order processing 1797 * of already queued messages. 1798 * (Note: This driver supports AF_UNIX socket 1799 * implementation. Unless we implement this processing, 1800 * setsockopt() on socket endpoint will block on flow 1801 * controlled endpoints which it should not. That is 1802 * required for successful execution of VSU socket tests 1803 * and is consistent with BSD socket behavior). 1804 */ 1805 tl_optmgmt(wq, mp); 1806 return; 1807 case O_T_BIND_REQ: 1808 case T_BIND_REQ: 1809 tl_proc = tl_bind_ser; 1810 break; 1811 case T_CONN_REQ: 1812 if (IS_CLTS(tep)) { 1813 tl_merror(wq, mp, EPROTO); 1814 return; 1815 } 1816 tl_conn_req(wq, mp); 1817 return; 1818 case T_DATA_REQ: 1819 case T_OPTDATA_REQ: 1820 case T_EXDATA_REQ: 1821 case T_ORDREL_REQ: 1822 tl_proc = tl_putq_ser; 1823 break; 1824 case T_UNITDATA_REQ: 1825 if (IS_COTS(tep) || 1826 (msz < sizeof (struct T_unitdata_req))) { 1827 tl_merror(wq, mp, EPROTO); 1828 return; 1829 } 1830 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1831 tl_proc = tl_unitdata_ser; 1832 } else { 1833 tl_proc = tl_putq_ser; 1834 } 1835 break; 1836 default: 1837 /* 1838 * process in service procedure if message already 1839 * queued (maintain in-order processing) 1840 */ 1841 if (wq->q_first != NULL) { 1842 tl_proc = tl_putq_ser; 1843 } else { 1844 tl_proc = tl_wput_ser; 1845 } 1846 break; 1847 } 1848 break; 1849 1850 case M_PCPROTO: 1851 /* 1852 * Check that the message has enough data to figure out TPI 1853 * primitive. 1854 */ 1855 if (msz < sizeof (prim->type)) { 1856 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1857 SL_TRACE|SL_ERROR, 1858 "tl_wput:M_PCROTO data too short")); 1859 tl_merror(wq, mp, EPROTO); 1860 return; 1861 } 1862 switch (prim->type) { 1863 case T_CAPABILITY_REQ: 1864 tl_capability_req(mp, tep); 1865 return; 1866 case T_INFO_REQ: 1867 tl_proc = tl_info_req_ser; 1868 break; 1869 default: 1870 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1871 SL_TRACE|SL_ERROR, 1872 "tl_wput:unknown TPI msg primitive")); 1873 tl_merror(wq, mp, EPROTO); 1874 return; 1875 } 1876 break; 1877 default: 1878 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1879 "tl_wput:default:unexpected Streams message")); 1880 freemsg(mp); 1881 return; 1882 } 1883 1884 /* 1885 * Continue processing via serializer. 1886 */ 1887 ASSERT(tl_proc != NULL); 1888 tl_refhold(tep); 1889 tl_serializer_enter(tep, tl_proc, mp); 1890 } 1891 1892 /* 1893 * Place message on the queue while preserving order. 1894 */ 1895 static void 1896 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1897 { 1898 if (tep->te_closing) { 1899 tl_wput_ser(mp, tep); 1900 } else { 1901 TL_PUTQ(tep, mp); 1902 tl_serializer_exit(tep); 1903 tl_refrele(tep); 1904 } 1905 1906 } 1907 1908 static void 1909 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1910 { 1911 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1912 1913 switch (DB_TYPE(mp)) { 1914 case M_DATA: 1915 tl_data(mp, tep); 1916 break; 1917 case M_PROTO: 1918 tl_do_proto(mp, tep); 1919 break; 1920 default: 1921 freemsg(mp); 1922 break; 1923 } 1924 } 1925 1926 /* 1927 * Write side put procedure called from serializer. 1928 */ 1929 static void 1930 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1931 { 1932 tl_wput_common_ser(mp, tep); 1933 tl_serializer_exit(tep); 1934 tl_refrele(tep); 1935 } 1936 1937 /* 1938 * M_DATA processing. Called from serializer. 1939 */ 1940 static void 1941 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1942 { 1943 tl_endpt_t *peer_tep = tep->te_conp; 1944 queue_t *peer_rq; 1945 1946 ASSERT(DB_TYPE(mp) == M_DATA); 1947 ASSERT(IS_COTS(tep)); 1948 1949 ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer)); 1950 1951 /* 1952 * fastpath for data. Ignore flow control if tep is closing. 1953 */ 1954 if ((peer_tep != NULL) && 1955 !peer_tep->te_closing && 1956 ((tep->te_state == TS_DATA_XFER) || 1957 (tep->te_state == TS_WREQ_ORDREL)) && 1958 (tep->te_wq != NULL) && 1959 (tep->te_wq->q_first == NULL) && 1960 ((peer_tep->te_state == TS_DATA_XFER) || 1961 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1962 ((peer_rq = peer_tep->te_rq) != NULL) && 1963 (canputnext(peer_rq) || tep->te_closing)) { 1964 putnext(peer_rq, mp); 1965 } else if (tep->te_closing) { 1966 /* 1967 * It is possible that by the time we got here tep started to 1968 * close. If the write queue is not empty, and the state is 1969 * TS_DATA_XFER the data should be delivered in order, so we 1970 * call putq() instead of freeing the data. 1971 */ 1972 if ((tep->te_wq != NULL) && 1973 ((tep->te_state == TS_DATA_XFER) || 1974 (tep->te_state == TS_WREQ_ORDREL))) { 1975 TL_PUTQ(tep, mp); 1976 } else { 1977 freemsg(mp); 1978 } 1979 } else { 1980 TL_PUTQ(tep, mp); 1981 } 1982 1983 tl_serializer_exit(tep); 1984 tl_refrele(tep); 1985 } 1986 1987 /* 1988 * Write side service routine. 1989 * 1990 * All actual processing happens within serializer which is entered 1991 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1992 * messages that need processing may have arrived, so tl_wsrv repeats until 1993 * queue is empty or te_nowsrv is set. 1994 */ 1995 static void 1996 tl_wsrv(queue_t *wq) 1997 { 1998 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1999 2000 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 2001 mutex_enter(&tep->te_srv_lock); 2002 ASSERT(tep->te_wsrv_active == B_FALSE); 2003 tep->te_wsrv_active = B_TRUE; 2004 mutex_exit(&tep->te_srv_lock); 2005 2006 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2007 2008 /* 2009 * Wait for serializer job to complete. 2010 */ 2011 mutex_enter(&tep->te_srv_lock); 2012 while (tep->te_wsrv_active) { 2013 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2014 } 2015 cv_signal(&tep->te_srv_cv); 2016 mutex_exit(&tep->te_srv_lock); 2017 } 2018 } 2019 2020 /* 2021 * Serialized write side processing of the STREAMS queue. 2022 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2023 * is NULL. 2024 */ 2025 static void 2026 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2027 { 2028 mblk_t *mp; 2029 queue_t *wq = tep->te_wq; 2030 2031 ASSERT(wq != NULL); 2032 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2033 tl_wput_common_ser(mp, tep); 2034 } 2035 2036 /* 2037 * Wakeup service routine unless called from close. 2038 * If ser_mp is specified, the caller is tl_wsrv(). 2039 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2040 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2041 * be no matching tl_serializer_exit() in this case. 2042 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2043 * waiting on te_srv_cv. 2044 */ 2045 if (ser_mp != NULL) { 2046 /* 2047 * We are called from tl_wsrv. 2048 */ 2049 mutex_enter(&tep->te_srv_lock); 2050 ASSERT(tep->te_wsrv_active); 2051 tep->te_wsrv_active = B_FALSE; 2052 cv_signal(&tep->te_srv_cv); 2053 mutex_exit(&tep->te_srv_lock); 2054 tl_serializer_exit(tep); 2055 } 2056 } 2057 2058 /* 2059 * Called when the stream is backenabled. Enter serializer and qenable everyone 2060 * flow controlled by tep. 2061 * 2062 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2063 * is possible that two instances of tl_rsrv will be running reusing the same 2064 * rsrv mblk. 2065 */ 2066 static void 2067 tl_rsrv(queue_t *rq) 2068 { 2069 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2070 2071 ASSERT(rq->q_first == NULL); 2072 ASSERT(tep->te_rsrv_active == 0); 2073 2074 tep->te_rsrv_active = B_TRUE; 2075 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2076 /* 2077 * Wait for serializer job to complete. 2078 */ 2079 mutex_enter(&tep->te_srv_lock); 2080 while (tep->te_rsrv_active) { 2081 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2082 } 2083 cv_signal(&tep->te_srv_cv); 2084 mutex_exit(&tep->te_srv_lock); 2085 } 2086 2087 /* ARGSUSED */ 2088 static void 2089 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2090 { 2091 tl_endpt_t *peer_tep; 2092 2093 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2094 tl_cl_backenable(tep); 2095 } else if ( 2096 IS_COTS(tep) && 2097 ((peer_tep = tep->te_conp) != NULL) && 2098 !peer_tep->te_closing && 2099 ((tep->te_state == TS_DATA_XFER) || 2100 (tep->te_state == TS_WIND_ORDREL)|| 2101 (tep->te_state == TS_WREQ_ORDREL))) { 2102 TL_QENABLE(peer_tep); 2103 } 2104 2105 /* 2106 * Wakeup read side service routine. 2107 */ 2108 mutex_enter(&tep->te_srv_lock); 2109 ASSERT(tep->te_rsrv_active); 2110 tep->te_rsrv_active = B_FALSE; 2111 cv_signal(&tep->te_srv_cv); 2112 mutex_exit(&tep->te_srv_lock); 2113 tl_serializer_exit(tep); 2114 } 2115 2116 /* 2117 * process M_PROTO messages. Always called from serializer. 2118 */ 2119 static void 2120 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2121 { 2122 ssize_t msz = MBLKL(mp); 2123 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2124 2125 /* Message size was validated by tl_wput(). */ 2126 ASSERT(msz >= sizeof (prim->type)); 2127 2128 switch (prim->type) { 2129 case T_UNBIND_REQ: 2130 tl_unbind(mp, tep); 2131 break; 2132 2133 case T_ADDR_REQ: 2134 tl_addr_req(mp, tep); 2135 break; 2136 2137 case O_T_CONN_RES: 2138 case T_CONN_RES: 2139 if (IS_CLTS(tep)) { 2140 tl_merror(tep->te_wq, mp, EPROTO); 2141 break; 2142 } 2143 tl_conn_res(mp, tep); 2144 break; 2145 2146 case T_DISCON_REQ: 2147 if (IS_CLTS(tep)) { 2148 tl_merror(tep->te_wq, mp, EPROTO); 2149 break; 2150 } 2151 tl_discon_req(mp, tep); 2152 break; 2153 2154 case T_DATA_REQ: 2155 if (IS_CLTS(tep)) { 2156 tl_merror(tep->te_wq, mp, EPROTO); 2157 break; 2158 } 2159 tl_data(mp, tep); 2160 break; 2161 2162 case T_OPTDATA_REQ: 2163 if (IS_CLTS(tep)) { 2164 tl_merror(tep->te_wq, mp, EPROTO); 2165 break; 2166 } 2167 tl_data(mp, tep); 2168 break; 2169 2170 case T_EXDATA_REQ: 2171 if (IS_CLTS(tep)) { 2172 tl_merror(tep->te_wq, mp, EPROTO); 2173 break; 2174 } 2175 tl_exdata(mp, tep); 2176 break; 2177 2178 case T_ORDREL_REQ: 2179 if (! IS_COTSORD(tep)) { 2180 tl_merror(tep->te_wq, mp, EPROTO); 2181 break; 2182 } 2183 tl_ordrel(mp, tep); 2184 break; 2185 2186 case T_UNITDATA_REQ: 2187 if (IS_COTS(tep)) { 2188 tl_merror(tep->te_wq, mp, EPROTO); 2189 break; 2190 } 2191 tl_unitdata(mp, tep); 2192 break; 2193 2194 default: 2195 tl_merror(tep->te_wq, mp, EPROTO); 2196 break; 2197 } 2198 } 2199 2200 /* 2201 * Process ioctl from serializer. 2202 * This is a wrapper around tl_do_ioctl(). 2203 */ 2204 static void 2205 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2206 { 2207 if (! tep->te_closing) 2208 tl_do_ioctl(mp, tep); 2209 else 2210 freemsg(mp); 2211 2212 tl_serializer_exit(tep); 2213 tl_refrele(tep); 2214 } 2215 2216 static void 2217 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2218 { 2219 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2220 int cmd = iocbp->ioc_cmd; 2221 queue_t *wq = tep->te_wq; 2222 int error; 2223 int thisopt, otheropt; 2224 2225 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2226 2227 switch (cmd) { 2228 case TL_IOC_CREDOPT: 2229 if (cmd == TL_IOC_CREDOPT) { 2230 thisopt = TL_SETCRED; 2231 otheropt = TL_SETUCRED; 2232 } else { 2233 /* FALLTHROUGH */ 2234 case TL_IOC_UCREDOPT: 2235 thisopt = TL_SETUCRED; 2236 otheropt = TL_SETCRED; 2237 } 2238 /* 2239 * The credentials passing does not apply to sockets. 2240 * Only one of the cred options can be set at a given time. 2241 */ 2242 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2243 miocnak(wq, mp, 0, EINVAL); 2244 return; 2245 } 2246 2247 /* 2248 * Turn on generation of credential options for 2249 * T_conn_req, T_conn_con, T_unidata_ind. 2250 */ 2251 error = miocpullup(mp, sizeof (uint32_t)); 2252 if (error != 0) { 2253 miocnak(wq, mp, 0, error); 2254 return; 2255 } 2256 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2257 miocnak(wq, mp, 0, EINVAL); 2258 return; 2259 } 2260 2261 if (*(uint32_t *)mp->b_cont->b_rptr) 2262 tep->te_flag |= thisopt; 2263 else 2264 tep->te_flag &= ~thisopt; 2265 2266 miocack(wq, mp, 0, 0); 2267 break; 2268 2269 default: 2270 /* Should not be here */ 2271 miocnak(wq, mp, 0, EINVAL); 2272 break; 2273 } 2274 } 2275 2276 2277 /* 2278 * send T_ERROR_ACK 2279 * Note: assumes enough memory or caller passed big enough mp 2280 * - no recovery from allocb failures 2281 */ 2282 2283 static void 2284 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2285 t_scalar_t unix_err, t_scalar_t type) 2286 { 2287 struct T_error_ack *err_ack; 2288 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2289 M_PCPROTO, T_ERROR_ACK); 2290 2291 if (ackmp == NULL) { 2292 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2293 "tl_error_ack:out of mblk memory")); 2294 tl_merror(wq, NULL, ENOSR); 2295 return; 2296 } 2297 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2298 err_ack->ERROR_prim = type; 2299 err_ack->TLI_error = tli_err; 2300 err_ack->UNIX_error = unix_err; 2301 2302 /* 2303 * send error ack message 2304 */ 2305 qreply(wq, ackmp); 2306 } 2307 2308 2309 2310 /* 2311 * send T_OK_ACK 2312 * Note: assumes enough memory or caller passed big enough mp 2313 * - no recovery from allocb failures 2314 */ 2315 static void 2316 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2317 { 2318 struct T_ok_ack *ok_ack; 2319 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2320 M_PCPROTO, T_OK_ACK); 2321 2322 if (ackmp == NULL) { 2323 tl_merror(wq, NULL, ENOMEM); 2324 return; 2325 } 2326 2327 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2328 ok_ack->CORRECT_prim = type; 2329 2330 (void) qreply(wq, ackmp); 2331 } 2332 2333 /* 2334 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2335 * This is a wrapper around tl_bind(). 2336 */ 2337 static void 2338 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2339 { 2340 if (! tep->te_closing) 2341 tl_bind(mp, tep); 2342 else 2343 freemsg(mp); 2344 2345 tl_serializer_exit(tep); 2346 tl_refrele(tep); 2347 } 2348 2349 /* 2350 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2351 * Assumes that the endpoint is in the unbound. 2352 */ 2353 static void 2354 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2355 { 2356 queue_t *wq = tep->te_wq; 2357 struct T_bind_ack *b_ack; 2358 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2359 mblk_t *ackmp, *bamp; 2360 soux_addr_t ux_addr; 2361 t_uscalar_t qlen = 0; 2362 t_scalar_t alen, aoff; 2363 tl_addr_t addr_req; 2364 void *addr_startp; 2365 ssize_t msz = MBLKL(mp), basize; 2366 t_scalar_t tli_err = 0, unix_err = 0; 2367 t_scalar_t save_prim_type = bind->PRIM_type; 2368 t_scalar_t save_state = tep->te_state; 2369 2370 if (tep->te_state != TS_UNBND) { 2371 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2372 SL_TRACE|SL_ERROR, 2373 "tl_wput:bind_request:out of state, state=%d", 2374 tep->te_state)); 2375 tli_err = TOUTSTATE; 2376 goto error; 2377 } 2378 2379 if (msz < sizeof (struct T_bind_req)) { 2380 tli_err = TSYSERR; unix_err = EINVAL; 2381 goto error; 2382 } 2383 2384 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2385 2386 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2387 (bind->PRIM_type == T_BIND_REQ)); 2388 2389 alen = bind->ADDR_length; 2390 aoff = bind->ADDR_offset; 2391 2392 /* negotiate max conn req pending */ 2393 if (IS_COTS(tep)) { 2394 qlen = bind->CONIND_number; 2395 if (qlen > tl_maxqlen) 2396 qlen = tl_maxqlen; 2397 } 2398 2399 /* 2400 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2401 * and bound again. 2402 */ 2403 if ((tep->te_hash_hndl == NULL) && 2404 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2405 mod_hash_reserve_nosleep(tep->te_addrhash, 2406 &tep->te_hash_hndl) != 0) { 2407 tli_err = TSYSERR; unix_err = ENOSR; 2408 goto error; 2409 } 2410 2411 /* 2412 * Verify address correctness. 2413 */ 2414 if (IS_SOCKET(tep)) { 2415 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2416 2417 if ((alen != TL_SOUX_ADDRLEN) || 2418 (aoff < 0) || 2419 (aoff + alen > msz)) { 2420 (void) (STRLOG(TL_ID, tep->te_minor, 2421 1, SL_TRACE|SL_ERROR, 2422 "tl_bind: invalid socket addr")); 2423 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2424 tli_err = TSYSERR; unix_err = EINVAL; 2425 goto error; 2426 } 2427 /* Copy address from message to local buffer. */ 2428 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2429 /* 2430 * Check that we got correct address from sockets 2431 */ 2432 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2433 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2434 (void) (STRLOG(TL_ID, tep->te_minor, 2435 1, SL_TRACE|SL_ERROR, 2436 "tl_bind: invalid socket magic")); 2437 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2438 tli_err = TSYSERR; unix_err = EINVAL; 2439 goto error; 2440 } 2441 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2442 (ux_addr.soua_vp != NULL)) { 2443 (void) (STRLOG(TL_ID, tep->te_minor, 2444 1, SL_TRACE|SL_ERROR, 2445 "tl_bind: implicit addr non-empty")); 2446 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2447 tli_err = TSYSERR; unix_err = EINVAL; 2448 goto error; 2449 } 2450 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2451 (ux_addr.soua_vp == NULL)) { 2452 (void) (STRLOG(TL_ID, tep->te_minor, 2453 1, SL_TRACE|SL_ERROR, 2454 "tl_bind: explicit addr empty")); 2455 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2456 tli_err = TSYSERR; unix_err = EINVAL; 2457 goto error; 2458 } 2459 } else { 2460 if ((alen > 0) && ((aoff < 0) || 2461 ((ssize_t)(aoff + alen) > msz) || 2462 ((aoff + alen) < 0))) { 2463 (void) (STRLOG(TL_ID, tep->te_minor, 2464 1, SL_TRACE|SL_ERROR, 2465 "tl_bind: invalid message")); 2466 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2467 tli_err = TSYSERR; unix_err = EINVAL; 2468 goto error; 2469 } 2470 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2471 (void) (STRLOG(TL_ID, tep->te_minor, 2472 1, SL_TRACE|SL_ERROR, 2473 "tl_bind: bad addr in message")); 2474 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2475 tli_err = TBADADDR; 2476 goto error; 2477 } 2478 #ifdef DEBUG 2479 /* 2480 * Mild form of ASSERT()ion to detect broken TPI apps. 2481 * if (! assertion) 2482 * log warning; 2483 */ 2484 if (! ((alen == 0 && aoff == 0) || 2485 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2486 (void) (STRLOG(TL_ID, tep->te_minor, 2487 3, SL_TRACE|SL_ERROR, 2488 "tl_bind: addr overlaps TPI message")); 2489 } 2490 #endif 2491 } 2492 2493 /* 2494 * Bind the address provided or allocate one if requested. 2495 * Allow rebinds with a new qlen value. 2496 */ 2497 if (IS_SOCKET(tep)) { 2498 /* 2499 * For anonymous requests the te_ap is already set up properly 2500 * so use minor number as an address. 2501 * For explicit requests need to check whether the address is 2502 * already in use. 2503 */ 2504 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2505 int rc; 2506 2507 if (tep->te_flag & TL_ADDRHASHED) { 2508 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2509 if (tep->te_vp == ux_addr.soua_vp) 2510 goto skip_addr_bind; 2511 else /* Rebind to a new address. */ 2512 tl_addr_unbind(tep); 2513 } 2514 /* 2515 * Insert address in the hash if it is not already 2516 * there. Since we use preallocated handle, the insert 2517 * can fail only if the key is already present. 2518 */ 2519 rc = mod_hash_insert_reserve(tep->te_addrhash, 2520 (mod_hash_key_t)ux_addr.soua_vp, 2521 (mod_hash_val_t)tep, tep->te_hash_hndl); 2522 2523 if (rc != 0) { 2524 ASSERT(rc == MH_ERR_DUPLICATE); 2525 /* 2526 * Violate O_T_BIND_REQ semantics and fail with 2527 * TADDRBUSY - sockets will not use any address 2528 * other than supplied one for explicit binds. 2529 */ 2530 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2531 SL_TRACE|SL_ERROR, 2532 "tl_bind:requested addr %p is busy", 2533 ux_addr.soua_vp)); 2534 tli_err = TADDRBUSY; unix_err = 0; 2535 goto error; 2536 } 2537 tep->te_uxaddr = ux_addr; 2538 tep->te_flag |= TL_ADDRHASHED; 2539 tep->te_hash_hndl = NULL; 2540 } 2541 } else if (alen == 0) { 2542 /* 2543 * assign any free address 2544 */ 2545 if (! tl_get_any_addr(tep, NULL)) { 2546 (void) (STRLOG(TL_ID, tep->te_minor, 2547 1, SL_TRACE|SL_ERROR, 2548 "tl_bind:failed to get buffer for any " 2549 "address")); 2550 tli_err = TSYSERR; unix_err = ENOSR; 2551 goto error; 2552 } 2553 } else { 2554 addr_req.ta_alen = alen; 2555 addr_req.ta_abuf = (mp->b_rptr + aoff); 2556 addr_req.ta_zoneid = tep->te_zoneid; 2557 2558 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2559 if (tep->te_abuf == NULL) { 2560 tli_err = TSYSERR; unix_err = ENOSR; 2561 goto error; 2562 } 2563 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2564 tep->te_alen = alen; 2565 2566 if (mod_hash_insert_reserve(tep->te_addrhash, 2567 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2568 tep->te_hash_hndl) != 0) { 2569 if (save_prim_type == T_BIND_REQ) { 2570 /* 2571 * The bind semantics for this primitive 2572 * require a failure if the exact address 2573 * requested is busy 2574 */ 2575 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2576 SL_TRACE|SL_ERROR, 2577 "tl_bind:requested addr is busy")); 2578 tli_err = TADDRBUSY; unix_err = 0; 2579 goto error; 2580 } 2581 2582 /* 2583 * O_T_BIND_REQ semantics say if address if requested 2584 * address is busy, bind to any available free address 2585 */ 2586 if (! tl_get_any_addr(tep, &addr_req)) { 2587 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2588 SL_TRACE|SL_ERROR, 2589 "tl_bind:unable to get any addr buf")); 2590 tli_err = TSYSERR; unix_err = ENOMEM; 2591 goto error; 2592 } 2593 } else { 2594 tep->te_flag |= TL_ADDRHASHED; 2595 tep->te_hash_hndl = NULL; 2596 } 2597 } 2598 2599 ASSERT(tep->te_alen >= 0); 2600 2601 skip_addr_bind: 2602 /* 2603 * prepare T_BIND_ACK TPI message 2604 */ 2605 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2606 bamp = reallocb(mp, basize, 0); 2607 if (bamp == NULL) { 2608 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2609 "tl_wput:tl_bind: allocb failed")); 2610 /* 2611 * roll back state changes 2612 */ 2613 tl_addr_unbind(tep); 2614 tep->te_state = TS_UNBND; 2615 tl_memrecover(wq, mp, basize); 2616 return; 2617 } 2618 2619 DB_TYPE(bamp) = M_PCPROTO; 2620 bamp->b_wptr = bamp->b_rptr + basize; 2621 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2622 b_ack->PRIM_type = T_BIND_ACK; 2623 b_ack->CONIND_number = qlen; 2624 b_ack->ADDR_length = tep->te_alen; 2625 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2626 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2627 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2628 2629 if (IS_COTS(tep)) { 2630 tep->te_qlen = qlen; 2631 if (qlen > 0) 2632 tep->te_flag |= TL_LISTENER; 2633 } 2634 2635 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2636 /* 2637 * send T_BIND_ACK message 2638 */ 2639 (void) qreply(wq, bamp); 2640 return; 2641 2642 error: 2643 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2644 if (ackmp == NULL) { 2645 /* 2646 * roll back state changes 2647 */ 2648 tep->te_state = save_state; 2649 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2650 return; 2651 } 2652 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2653 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2654 } 2655 2656 /* 2657 * Process T_UNBIND_REQ. 2658 * Called from serializer. 2659 */ 2660 static void 2661 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2662 { 2663 queue_t *wq; 2664 mblk_t *ackmp; 2665 2666 if (tep->te_closing) { 2667 freemsg(mp); 2668 return; 2669 } 2670 2671 wq = tep->te_wq; 2672 2673 /* 2674 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2675 * ==> allocate for T_ERROR_ACK (known max) 2676 */ 2677 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2678 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2679 return; 2680 } 2681 /* 2682 * memory resources committed 2683 * Note: no message validation. T_UNBIND_REQ message is 2684 * same size as PRIM_type field so already verified earlier. 2685 */ 2686 2687 /* 2688 * validate state 2689 */ 2690 if (tep->te_state != TS_IDLE) { 2691 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2692 SL_TRACE|SL_ERROR, 2693 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2694 tep->te_state)); 2695 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2696 return; 2697 } 2698 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2699 2700 /* 2701 * TPI says on T_UNBIND_REQ: 2702 * send up a M_FLUSH to flush both 2703 * read and write queues 2704 */ 2705 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2706 2707 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2708 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2709 2710 /* 2711 * Sockets use bind with qlen==0 followed by bind() to 2712 * the same address with qlen > 0 for listeners. 2713 * We allow rebind with a new qlen value. 2714 */ 2715 tl_addr_unbind(tep); 2716 } 2717 2718 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2719 /* 2720 * send T_OK_ACK 2721 */ 2722 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2723 } 2724 2725 2726 /* 2727 * Option management code from drv/ip is used here 2728 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2729 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2730 * However, that is what we want as that option is 'unorthodox' 2731 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2732 * and not in T_SVR4_OPTMGMT_REQ/ACK 2733 * Note2: use of optcom_req means this routine is an exception to 2734 * recovery from allocb() failures. 2735 */ 2736 2737 static void 2738 tl_optmgmt(queue_t *wq, mblk_t *mp) 2739 { 2740 tl_endpt_t *tep; 2741 mblk_t *ackmp; 2742 union T_primitives *prim; 2743 cred_t *cr; 2744 2745 tep = (tl_endpt_t *)wq->q_ptr; 2746 prim = (union T_primitives *)mp->b_rptr; 2747 2748 /* 2749 * All Solaris components should pass a db_credp 2750 * for this TPI message, hence we ASSERT. 2751 * But in case there is some other M_PROTO that looks 2752 * like a TPI message sent by some other kernel 2753 * component, we check and return an error. 2754 */ 2755 cr = msg_getcred(mp, NULL); 2756 ASSERT(cr != NULL); 2757 if (cr == NULL) { 2758 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2759 return; 2760 } 2761 2762 /* all states OK for AF_UNIX options ? */ 2763 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2764 prim->type == T_SVR4_OPTMGMT_REQ) { 2765 /* 2766 * Broken TLI semantics that options can only be managed 2767 * in TS_IDLE state. Needed for Sparc ABI test suite that 2768 * tests this TLI (mis)feature using this device driver. 2769 */ 2770 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2771 SL_TRACE|SL_ERROR, 2772 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2773 tep->te_state)); 2774 /* 2775 * preallocate memory for T_ERROR_ACK 2776 */ 2777 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2778 if (! ackmp) { 2779 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2780 return; 2781 } 2782 2783 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2784 freemsg(mp); 2785 return; 2786 } 2787 2788 /* 2789 * call common option management routine from drv/ip 2790 */ 2791 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2792 (void) svr4_optcom_req(wq, mp, cr, &tl_opt_obj, 2793 B_FALSE); 2794 } else { 2795 ASSERT(prim->type == T_OPTMGMT_REQ); 2796 (void) tpi_optcom_req(wq, mp, cr, &tl_opt_obj, 2797 B_FALSE); 2798 } 2799 } 2800 2801 /* 2802 * Handle T_conn_req - the driver part of accept(). 2803 * If TL_SET[U]CRED generate the credentials options. 2804 * If this is a socket pass through options unmodified. 2805 * For sockets generate the T_CONN_CON here instead of 2806 * waiting for the T_CONN_RES. 2807 */ 2808 static void 2809 tl_conn_req(queue_t *wq, mblk_t *mp) 2810 { 2811 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2812 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2813 ssize_t msz = MBLKL(mp); 2814 t_scalar_t alen, aoff, olen, ooff, err = 0; 2815 tl_endpt_t *peer_tep = NULL; 2816 mblk_t *ackmp; 2817 mblk_t *dimp; 2818 struct T_discon_ind *di; 2819 soux_addr_t ux_addr; 2820 tl_addr_t dst; 2821 2822 ASSERT(IS_COTS(tep)); 2823 2824 if (tep->te_closing) { 2825 freemsg(mp); 2826 return; 2827 } 2828 2829 /* 2830 * preallocate memory for: 2831 * 1. max of T_ERROR_ACK and T_OK_ACK 2832 * ==> known max T_ERROR_ACK 2833 * 2. max of T_DISCON_IND and T_CONN_IND 2834 */ 2835 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2836 if (! ackmp) { 2837 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2838 return; 2839 } 2840 /* 2841 * memory committed for T_OK_ACK/T_ERROR_ACK now 2842 * will be committed for T_DISCON_IND/T_CONN_IND later 2843 */ 2844 2845 if (tep->te_state != TS_IDLE) { 2846 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2847 SL_TRACE|SL_ERROR, 2848 "tl_wput:T_CONN_REQ:out of state, state=%d", 2849 tep->te_state)); 2850 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2851 freemsg(mp); 2852 return; 2853 } 2854 2855 /* 2856 * validate the message 2857 * Note: dereference fields in struct inside message only 2858 * after validating the message length. 2859 */ 2860 if (msz < sizeof (struct T_conn_req)) { 2861 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2862 "tl_conn_req:invalid message length")); 2863 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2864 freemsg(mp); 2865 return; 2866 } 2867 alen = creq->DEST_length; 2868 aoff = creq->DEST_offset; 2869 olen = creq->OPT_length; 2870 ooff = creq->OPT_offset; 2871 if (olen == 0) 2872 ooff = 0; 2873 2874 if (IS_SOCKET(tep)) { 2875 if ((alen != TL_SOUX_ADDRLEN) || 2876 (aoff < 0) || 2877 (aoff + alen > msz) || 2878 (alen > msz - sizeof (struct T_conn_req))) { 2879 (void) (STRLOG(TL_ID, tep->te_minor, 2880 1, SL_TRACE|SL_ERROR, 2881 "tl_conn_req: invalid socket addr")); 2882 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2883 freemsg(mp); 2884 return; 2885 } 2886 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2887 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2888 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2889 (void) (STRLOG(TL_ID, tep->te_minor, 2890 1, SL_TRACE|SL_ERROR, 2891 "tl_conn_req: invalid socket magic")); 2892 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2893 freemsg(mp); 2894 return; 2895 } 2896 } else { 2897 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2898 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2899 ooff + olen < 0)) || 2900 olen < 0 || ooff < 0) { 2901 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2902 SL_TRACE|SL_ERROR, 2903 "tl_conn_req:invalid message")); 2904 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2905 freemsg(mp); 2906 return; 2907 } 2908 2909 if (alen <= 0 || aoff < 0 || 2910 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2911 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2912 SL_TRACE|SL_ERROR, 2913 "tl_conn_req:bad addr in message, " 2914 "alen=%d, msz=%ld", 2915 alen, msz)); 2916 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2917 freemsg(mp); 2918 return; 2919 } 2920 #ifdef DEBUG 2921 /* 2922 * Mild form of ASSERT()ion to detect broken TPI apps. 2923 * if (! assertion) 2924 * log warning; 2925 */ 2926 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2927 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2928 SL_TRACE|SL_ERROR, 2929 "tl_conn_req: addr overlaps TPI message")); 2930 } 2931 #endif 2932 if (olen) { 2933 /* 2934 * no opts in connect req 2935 * supported in this provider except for sockets. 2936 */ 2937 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2938 SL_TRACE|SL_ERROR, 2939 "tl_conn_req:options not supported " 2940 "in message")); 2941 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2942 freemsg(mp); 2943 return; 2944 } 2945 } 2946 2947 /* 2948 * Prevent tep from closing on us. 2949 */ 2950 if (! tl_noclose(tep)) { 2951 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2952 "tl_conn_req:endpoint is closing")); 2953 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2954 freemsg(mp); 2955 return; 2956 } 2957 2958 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2959 /* 2960 * get endpoint to connect to 2961 * check that peer with DEST addr is bound to addr 2962 * and has CONIND_number > 0 2963 */ 2964 dst.ta_alen = alen; 2965 dst.ta_abuf = mp->b_rptr + aoff; 2966 dst.ta_zoneid = tep->te_zoneid; 2967 2968 /* 2969 * Verify if remote addr is in use 2970 */ 2971 peer_tep = (IS_SOCKET(tep) ? 2972 tl_sock_find_peer(tep, &ux_addr) : 2973 tl_find_peer(tep, &dst)); 2974 2975 if (peer_tep == NULL) { 2976 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2977 "tl_conn_req:no one at connect address")); 2978 err = ECONNREFUSED; 2979 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2980 /* 2981 * validate that number of incoming connection is 2982 * not to capacity on destination endpoint 2983 */ 2984 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2985 "tl_conn_req: qlen overflow connection refused")); 2986 err = ECONNREFUSED; 2987 } 2988 2989 /* 2990 * Send T_DISCON_IND in case of error 2991 */ 2992 if (err != 0) { 2993 if (peer_tep != NULL) 2994 tl_refrele(peer_tep); 2995 /* We are still expected to send T_OK_ACK */ 2996 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2997 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2998 tl_closeok(tep); 2999 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 3000 M_PROTO, T_DISCON_IND); 3001 if (dimp == NULL) { 3002 tl_merror(wq, NULL, ENOSR); 3003 return; 3004 } 3005 di = (struct T_discon_ind *)dimp->b_rptr; 3006 di->DISCON_reason = err; 3007 di->SEQ_number = BADSEQNUM; 3008 3009 tep->te_state = TS_IDLE; 3010 /* 3011 * send T_DISCON_IND message 3012 */ 3013 putnext(tep->te_rq, dimp); 3014 return; 3015 } 3016 3017 ASSERT(IS_COTS(peer_tep)); 3018 3019 /* 3020 * Found the listener. At this point processing will continue on 3021 * listener serializer. Close of the endpoint should be blocked while we 3022 * switch serializers. 3023 */ 3024 tl_serializer_refhold(peer_tep->te_ser); 3025 tl_serializer_refrele(tep->te_ser); 3026 tep->te_ser = peer_tep->te_ser; 3027 ASSERT(tep->te_oconp == NULL); 3028 tep->te_oconp = peer_tep; 3029 3030 /* 3031 * It is safe to close now. Close may continue on listener serializer. 3032 */ 3033 tl_closeok(tep); 3034 3035 /* 3036 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3037 * data, so we link mp to ackmp. 3038 */ 3039 ackmp->b_cont = mp; 3040 mp = ackmp; 3041 3042 tl_refhold(tep); 3043 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3044 } 3045 3046 /* 3047 * Finish T_CONN_REQ processing on listener serializer. 3048 */ 3049 static void 3050 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3051 { 3052 queue_t *wq; 3053 tl_endpt_t *peer_tep = tep->te_oconp; 3054 mblk_t *confmp, *cimp, *indmp; 3055 void *opts = NULL; 3056 mblk_t *ackmp = mp; 3057 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3058 struct T_conn_ind *ci; 3059 tl_icon_t *tip; 3060 void *addr_startp; 3061 t_scalar_t olen = creq->OPT_length; 3062 t_scalar_t ooff = creq->OPT_offset; 3063 size_t ci_msz; 3064 size_t size; 3065 3066 if (tep->te_closing) { 3067 TL_UNCONNECT(tep->te_oconp); 3068 tl_serializer_exit(tep); 3069 tl_refrele(tep); 3070 freemsg(mp); 3071 return; 3072 } 3073 3074 wq = tep->te_wq; 3075 tep->te_flag |= TL_EAGER; 3076 3077 /* 3078 * Extract preallocated ackmp from mp. 3079 */ 3080 mp = mp->b_cont; 3081 ackmp->b_cont = NULL; 3082 3083 if (olen == 0) 3084 ooff = 0; 3085 3086 if (peer_tep->te_closing || 3087 !((peer_tep->te_state == TS_IDLE) || 3088 (peer_tep->te_state == TS_WRES_CIND))) { 3089 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3090 "tl_conn_req:peer in bad state (%d)", 3091 peer_tep->te_state)); 3092 TL_UNCONNECT(tep->te_oconp); 3093 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3094 freemsg(ackmp); 3095 tl_serializer_exit(tep); 3096 tl_refrele(tep); 3097 return; 3098 } 3099 3100 /* 3101 * preallocate now for T_DISCON_IND or T_CONN_IND 3102 */ 3103 /* 3104 * calculate length of T_CONN_IND message 3105 */ 3106 if (peer_tep->te_flag & TL_SETCRED) { 3107 ooff = 0; 3108 olen = (t_scalar_t) sizeof (struct opthdr) + 3109 OPTLEN(sizeof (tl_credopt_t)); 3110 /* 1 option only */ 3111 } else if (peer_tep->te_flag & TL_SETUCRED) { 3112 ooff = 0; 3113 olen = (t_scalar_t)sizeof (struct opthdr) + 3114 OPTLEN(ucredsize); 3115 /* 1 option only */ 3116 } 3117 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3118 ci_msz = T_ALIGN(ci_msz) + olen; 3119 size = max(ci_msz, sizeof (struct T_discon_ind)); 3120 3121 /* 3122 * Save options from mp - we'll need them for T_CONN_IND. 3123 */ 3124 if (ooff != 0) { 3125 opts = kmem_alloc(olen, KM_NOSLEEP); 3126 if (opts == NULL) { 3127 /* 3128 * roll back state changes 3129 */ 3130 tep->te_state = TS_IDLE; 3131 tl_memrecover(wq, mp, size); 3132 freemsg(ackmp); 3133 TL_UNCONNECT(tep->te_oconp); 3134 tl_serializer_exit(tep); 3135 tl_refrele(tep); 3136 return; 3137 } 3138 /* Copy options to a temp buffer */ 3139 bcopy(mp->b_rptr + ooff, opts, olen); 3140 } 3141 3142 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3143 /* 3144 * Generate a T_CONN_CON that has the identical address 3145 * (and options) as the T_CONN_REQ. 3146 * NOTE: assumes that the T_conn_req and T_conn_con structures 3147 * are isomorphic. 3148 */ 3149 confmp = copyb(mp); 3150 if (! confmp) { 3151 /* 3152 * roll back state changes 3153 */ 3154 tep->te_state = TS_IDLE; 3155 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3156 freemsg(ackmp); 3157 if (opts != NULL) 3158 kmem_free(opts, olen); 3159 TL_UNCONNECT(tep->te_oconp); 3160 tl_serializer_exit(tep); 3161 tl_refrele(tep); 3162 return; 3163 } 3164 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3165 T_CONN_CON; 3166 } else { 3167 confmp = NULL; 3168 } 3169 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3170 /* 3171 * roll back state changes 3172 */ 3173 tep->te_state = TS_IDLE; 3174 tl_memrecover(wq, mp, size); 3175 freemsg(ackmp); 3176 if (opts != NULL) 3177 kmem_free(opts, olen); 3178 freemsg(confmp); 3179 TL_UNCONNECT(tep->te_oconp); 3180 tl_serializer_exit(tep); 3181 tl_refrele(tep); 3182 return; 3183 } 3184 3185 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3186 if (tip == NULL) { 3187 /* 3188 * roll back state changes 3189 */ 3190 tep->te_state = TS_IDLE; 3191 tl_memrecover(wq, indmp, sizeof (*tip)); 3192 freemsg(ackmp); 3193 if (opts != NULL) 3194 kmem_free(opts, olen); 3195 freemsg(confmp); 3196 TL_UNCONNECT(tep->te_oconp); 3197 tl_serializer_exit(tep); 3198 tl_refrele(tep); 3199 return; 3200 } 3201 tip->ti_mp = NULL; 3202 3203 /* 3204 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3205 * and tl_icon_t cell. 3206 */ 3207 3208 /* 3209 * ack validity of request and send the peer credential in the ACK. 3210 */ 3211 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3212 3213 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3214 confmp != NULL) { 3215 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3216 } 3217 3218 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3219 3220 /* 3221 * prepare message to send T_CONN_IND 3222 */ 3223 /* 3224 * allocate the message - original data blocks retained 3225 * in the returned mblk 3226 */ 3227 cimp = tl_resizemp(indmp, size); 3228 if (! cimp) { 3229 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3230 "tl_conn_req:con_ind:allocb failure")); 3231 tl_merror(wq, indmp, ENOMEM); 3232 TL_UNCONNECT(tep->te_oconp); 3233 tl_serializer_exit(tep); 3234 tl_refrele(tep); 3235 if (opts != NULL) 3236 kmem_free(opts, olen); 3237 freemsg(confmp); 3238 ASSERT(tip->ti_mp == NULL); 3239 kmem_free(tip, sizeof (*tip)); 3240 return; 3241 } 3242 3243 DB_TYPE(cimp) = M_PROTO; 3244 ci = (struct T_conn_ind *)cimp->b_rptr; 3245 ci->PRIM_type = T_CONN_IND; 3246 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3247 ci->SRC_length = tep->te_alen; 3248 ci->SEQ_number = tep->te_seqno; 3249 3250 addr_startp = cimp->b_rptr + ci->SRC_offset; 3251 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3252 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3253 cred_t *cr; 3254 pid_t cpid; 3255 3256 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3257 ci->SRC_length); 3258 ci->OPT_length = olen; /* because only 1 option */ 3259 cr = msg_getcred(cimp, &cpid); 3260 ASSERT(cr != NULL); 3261 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3262 cr, cpid, 3263 peer_tep->te_flag, peer_tep->te_credp); 3264 } else if (ooff != 0) { 3265 /* Copy option from T_CONN_REQ */ 3266 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3267 ci->SRC_length); 3268 ci->OPT_length = olen; 3269 ASSERT(opts != NULL); 3270 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3271 } else { 3272 ci->OPT_offset = 0; 3273 ci->OPT_length = 0; 3274 } 3275 if (opts != NULL) 3276 kmem_free(opts, olen); 3277 3278 /* 3279 * register connection request with server peer 3280 * append to list of incoming connections 3281 * increment references for both peer_tep and tep: peer_tep is placed on 3282 * te_oconp and tep is placed on listeners queue. 3283 */ 3284 tip->ti_tep = tep; 3285 tip->ti_seqno = tep->te_seqno; 3286 list_insert_tail(&peer_tep->te_iconp, tip); 3287 peer_tep->te_nicon++; 3288 3289 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3290 /* 3291 * send the T_CONN_IND message 3292 */ 3293 putnext(peer_tep->te_rq, cimp); 3294 3295 /* 3296 * Send a T_CONN_CON message for sockets. 3297 * Disable the queues until we have reached the correct state! 3298 */ 3299 if (confmp != NULL) { 3300 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3301 noenable(wq); 3302 putnext(tep->te_rq, confmp); 3303 } 3304 /* 3305 * Now we need to increment tep reference because tep is referenced by 3306 * server list of pending connections. We also need to decrement 3307 * reference before exiting serializer. Two operations void each other 3308 * so we don't modify reference at all. 3309 */ 3310 ASSERT(tep->te_refcnt >= 2); 3311 ASSERT(peer_tep->te_refcnt >= 2); 3312 tl_serializer_exit(tep); 3313 } 3314 3315 3316 3317 /* 3318 * Handle T_conn_res on listener stream. Called on listener serializer. 3319 * tl_conn_req has already generated the T_CONN_CON. 3320 * tl_conn_res is called on listener serializer. 3321 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3322 * Switch eager serializer to acceptor's. 3323 * 3324 * If TL_SET[U]CRED generate the credentials options. 3325 * For sockets tl_conn_req has already generated the T_CONN_CON. 3326 */ 3327 static void 3328 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3329 { 3330 queue_t *wq; 3331 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3332 ssize_t msz = MBLKL(mp); 3333 t_scalar_t olen, ooff, err = 0; 3334 t_scalar_t prim = cres->PRIM_type; 3335 uchar_t *addr_startp; 3336 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3337 tl_icon_t *tip; 3338 size_t size; 3339 mblk_t *ackmp, *respmp; 3340 mblk_t *dimp, *ccmp = NULL; 3341 struct T_discon_ind *di; 3342 struct T_conn_con *cc; 3343 boolean_t client_noclose_set = B_FALSE; 3344 boolean_t switch_client_serializer = B_TRUE; 3345 3346 ASSERT(IS_COTS(tep)); 3347 3348 if (tep->te_closing) { 3349 freemsg(mp); 3350 return; 3351 } 3352 3353 wq = tep->te_wq; 3354 3355 /* 3356 * preallocate memory for: 3357 * 1. max of T_ERROR_ACK and T_OK_ACK 3358 * ==> known max T_ERROR_ACK 3359 * 2. max of T_DISCON_IND and T_CONN_CON 3360 */ 3361 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3362 if (! ackmp) { 3363 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3364 return; 3365 } 3366 /* 3367 * memory committed for T_OK_ACK/T_ERROR_ACK now 3368 * will be committed for T_DISCON_IND/T_CONN_CON later 3369 */ 3370 3371 3372 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3373 3374 /* 3375 * validate state 3376 */ 3377 if (tep->te_state != TS_WRES_CIND) { 3378 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3379 SL_TRACE|SL_ERROR, 3380 "tl_wput:T_CONN_RES:out of state, state=%d", 3381 tep->te_state)); 3382 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3383 freemsg(mp); 3384 return; 3385 } 3386 3387 /* 3388 * validate the message 3389 * Note: dereference fields in struct inside message only 3390 * after validating the message length. 3391 */ 3392 if (msz < sizeof (struct T_conn_res)) { 3393 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3394 "tl_conn_res:invalid message length")); 3395 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3396 freemsg(mp); 3397 return; 3398 } 3399 olen = cres->OPT_length; 3400 ooff = cres->OPT_offset; 3401 if (((olen > 0) && ((ooff + olen) > msz))) { 3402 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3403 "tl_conn_res:invalid message")); 3404 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3405 freemsg(mp); 3406 return; 3407 } 3408 if (olen) { 3409 /* 3410 * no opts in connect res 3411 * supported in this provider 3412 */ 3413 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3414 "tl_conn_res:options not supported in message")); 3415 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3416 freemsg(mp); 3417 return; 3418 } 3419 3420 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3421 ASSERT(tep->te_state == TS_WACK_CRES); 3422 3423 if (cres->SEQ_number < TL_MINOR_START && 3424 cres->SEQ_number >= BADSEQNUM) { 3425 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3426 "tl_conn_res:remote endpoint sequence number bad")); 3427 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3428 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3429 freemsg(mp); 3430 return; 3431 } 3432 3433 /* 3434 * find accepting endpoint. Will have extra reference if found. 3435 */ 3436 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3437 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3438 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3439 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3440 "tl_conn_res:bad accepting endpoint")); 3441 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3442 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3443 freemsg(mp); 3444 return; 3445 } 3446 3447 /* 3448 * Prevent acceptor from closing. 3449 */ 3450 if (! tl_noclose(acc_ep)) { 3451 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3452 "tl_conn_res:bad accepting endpoint")); 3453 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3454 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3455 tl_refrele(acc_ep); 3456 freemsg(mp); 3457 return; 3458 } 3459 3460 acc_ep->te_flag |= TL_ACCEPTOR; 3461 3462 /* 3463 * validate that accepting endpoint, if different from listening 3464 * has address bound => state is TS_IDLE 3465 * TROUBLE in XPG4 !!? 3466 */ 3467 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3468 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3469 "tl_conn_res:accepting endpoint has no address bound," 3470 "state=%d", acc_ep->te_state)); 3471 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3472 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3473 freemsg(mp); 3474 tl_closeok(acc_ep); 3475 tl_refrele(acc_ep); 3476 return; 3477 } 3478 3479 /* 3480 * validate if accepting endpt same as listening, then 3481 * no other incoming connection should be on the queue 3482 */ 3483 3484 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3485 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3486 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3487 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3488 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3489 freemsg(mp); 3490 tl_closeok(acc_ep); 3491 tl_refrele(acc_ep); 3492 return; 3493 } 3494 3495 /* 3496 * Mark for deletion, the entry corresponding to client 3497 * on list of pending connections made by the listener 3498 * search list to see if client is one of the 3499 * recorded as a listener. 3500 */ 3501 tip = tl_icon_find(tep, cres->SEQ_number); 3502 if (tip == NULL) { 3503 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3504 "tl_conn_res:no client in listener list")); 3505 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3506 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3507 freemsg(mp); 3508 tl_closeok(acc_ep); 3509 tl_refrele(acc_ep); 3510 return; 3511 } 3512 3513 /* 3514 * If ti_tep is NULL the client has already closed. In this case 3515 * the code below will avoid any action on the client side 3516 * but complete the server and acceptor state transitions. 3517 */ 3518 ASSERT(tip->ti_tep == NULL || 3519 tip->ti_tep->te_seqno == cres->SEQ_number); 3520 cl_ep = tip->ti_tep; 3521 3522 /* 3523 * If the client is present it is switched from listener's to acceptor's 3524 * serializer. We should block client closes while serializers are 3525 * being switched. 3526 * 3527 * It is possible that the client is present but is currently being 3528 * closed. There are two possible cases: 3529 * 3530 * 1) The client has already entered tl_close_finish_ser() and sent 3531 * T_ORDREL_IND. In this case we can just ignore the client (but we 3532 * still need to send all messages from tip->ti_mp to the acceptor). 3533 * 3534 * 2) The client started the close but has not entered 3535 * tl_close_finish_ser() yet. In this case, the client is already 3536 * proceeding asynchronously on the listener's serializer, so we're 3537 * forced to change the acceptor to use the listener's serializer to 3538 * ensure that any operations on the acceptor are serialized with 3539 * respect to the close that's in-progress. 3540 */ 3541 if (cl_ep != NULL) { 3542 if (tl_noclose(cl_ep)) { 3543 client_noclose_set = B_TRUE; 3544 } else { 3545 /* 3546 * Client is closing. If it it has sent the 3547 * T_ORDREL_IND, we can simply ignore it - otherwise, 3548 * we have to let let the client continue until it is 3549 * sent. 3550 * 3551 * If we do continue using the client, acceptor will 3552 * switch to client's serializer which is used by client 3553 * for its close. 3554 */ 3555 tl_client_closing_when_accepting++; 3556 switch_client_serializer = B_FALSE; 3557 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3558 cl_ep->te_state == -1) 3559 cl_ep = NULL; 3560 } 3561 } 3562 3563 if (cl_ep != NULL) { 3564 /* 3565 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3566 * (latter for sockets only) 3567 */ 3568 if (cl_ep->te_state != TS_WCON_CREQ && 3569 (cl_ep->te_state != TS_DATA_XFER && 3570 IS_SOCKET(cl_ep))) { 3571 err = ECONNREFUSED; 3572 /* 3573 * T_DISCON_IND sent later after committing memory 3574 * and acking validity of request 3575 */ 3576 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3577 "tl_conn_res:peer in bad state")); 3578 } 3579 3580 /* 3581 * preallocate now for T_DISCON_IND or T_CONN_CONN 3582 * ack validity of request (T_OK_ACK) after memory committed 3583 */ 3584 3585 if (err) 3586 size = sizeof (struct T_discon_ind); 3587 else { 3588 /* 3589 * calculate length of T_CONN_CON message 3590 */ 3591 olen = 0; 3592 if (cl_ep->te_flag & TL_SETCRED) { 3593 olen = (t_scalar_t)sizeof (struct opthdr) + 3594 OPTLEN(sizeof (tl_credopt_t)); 3595 } else if (cl_ep->te_flag & TL_SETUCRED) { 3596 olen = (t_scalar_t)sizeof (struct opthdr) + 3597 OPTLEN(ucredsize); 3598 } 3599 size = T_ALIGN(sizeof (struct T_conn_con) + 3600 acc_ep->te_alen) + olen; 3601 } 3602 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3603 /* 3604 * roll back state changes 3605 */ 3606 tep->te_state = TS_WRES_CIND; 3607 tl_memrecover(wq, mp, size); 3608 freemsg(ackmp); 3609 if (client_noclose_set) 3610 tl_closeok(cl_ep); 3611 tl_closeok(acc_ep); 3612 tl_refrele(acc_ep); 3613 return; 3614 } 3615 mp = NULL; 3616 } 3617 3618 /* 3619 * Now ack validity of request 3620 */ 3621 if (tep->te_nicon == 1) { 3622 if (tep == acc_ep) 3623 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3624 else 3625 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3626 } else 3627 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3628 3629 /* 3630 * send T_DISCON_IND now if client state validation failed earlier 3631 */ 3632 if (err) { 3633 tl_ok_ack(wq, ackmp, prim); 3634 /* 3635 * flush the queues - why always ? 3636 */ 3637 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3638 3639 dimp = tl_resizemp(respmp, size); 3640 if (! dimp) { 3641 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3642 SL_TRACE|SL_ERROR, 3643 "tl_conn_res:con_ind:allocb failure")); 3644 tl_merror(wq, respmp, ENOMEM); 3645 tl_closeok(acc_ep); 3646 if (client_noclose_set) 3647 tl_closeok(cl_ep); 3648 tl_refrele(acc_ep); 3649 return; 3650 } 3651 if (dimp->b_cont) { 3652 /* no user data in provider generated discon ind */ 3653 freemsg(dimp->b_cont); 3654 dimp->b_cont = NULL; 3655 } 3656 3657 DB_TYPE(dimp) = M_PROTO; 3658 di = (struct T_discon_ind *)dimp->b_rptr; 3659 di->PRIM_type = T_DISCON_IND; 3660 di->DISCON_reason = err; 3661 di->SEQ_number = BADSEQNUM; 3662 3663 tep->te_state = TS_IDLE; 3664 /* 3665 * send T_DISCON_IND message 3666 */ 3667 putnext(acc_ep->te_rq, dimp); 3668 if (client_noclose_set) 3669 tl_closeok(cl_ep); 3670 tl_closeok(acc_ep); 3671 tl_refrele(acc_ep); 3672 return; 3673 } 3674 3675 /* 3676 * now start connecting the accepting endpoint 3677 */ 3678 if (tep != acc_ep) 3679 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3680 3681 if (cl_ep == NULL) { 3682 /* 3683 * The client has already closed. Send up any queued messages 3684 * and change the state accordingly. 3685 */ 3686 tl_ok_ack(wq, ackmp, prim); 3687 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3688 3689 /* 3690 * remove endpoint from incoming connection 3691 * delete client from list of incoming connections 3692 */ 3693 tl_freetip(tep, tip); 3694 freemsg(mp); 3695 tl_closeok(acc_ep); 3696 tl_refrele(acc_ep); 3697 return; 3698 } else if (tip->ti_mp != NULL) { 3699 /* 3700 * The client could have queued a T_DISCON_IND which needs 3701 * to be sent up. 3702 * Note that t_discon_req can not operate the same as 3703 * t_data_req since it is not possible for it to putbq 3704 * the message and return -1 due to the use of qwriter. 3705 */ 3706 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3707 } 3708 3709 /* 3710 * prepare connect confirm T_CONN_CON message 3711 */ 3712 3713 /* 3714 * allocate the message - original data blocks 3715 * retained in the returned mblk 3716 */ 3717 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3718 ccmp = tl_resizemp(respmp, size); 3719 if (ccmp == NULL) { 3720 tl_ok_ack(wq, ackmp, prim); 3721 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3722 SL_TRACE|SL_ERROR, 3723 "tl_conn_res:conn_con:allocb failure")); 3724 tl_merror(wq, respmp, ENOMEM); 3725 tl_closeok(acc_ep); 3726 if (client_noclose_set) 3727 tl_closeok(cl_ep); 3728 tl_refrele(acc_ep); 3729 return; 3730 } 3731 3732 DB_TYPE(ccmp) = M_PROTO; 3733 cc = (struct T_conn_con *)ccmp->b_rptr; 3734 cc->PRIM_type = T_CONN_CON; 3735 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3736 cc->RES_length = acc_ep->te_alen; 3737 addr_startp = ccmp->b_rptr + cc->RES_offset; 3738 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3739 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3740 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3741 cc->RES_length); 3742 cc->OPT_length = olen; 3743 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3744 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3745 cl_ep->te_credp); 3746 } else { 3747 cc->OPT_offset = 0; 3748 cc->OPT_length = 0; 3749 } 3750 /* 3751 * Forward the credential in the packet so it can be picked up 3752 * at the higher layers for more complete credential processing 3753 */ 3754 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3755 } else { 3756 freemsg(respmp); 3757 respmp = NULL; 3758 } 3759 3760 /* 3761 * make connection linking 3762 * accepting and client endpoints 3763 * No need to increment references: 3764 * on client: it should already have one from tip->ti_tep linkage. 3765 * on acceptor is should already have one from the table lookup. 3766 * 3767 * At this point both client and acceptor can't close. Set client 3768 * serializer to acceptor's. 3769 */ 3770 ASSERT(cl_ep->te_refcnt >= 2); 3771 ASSERT(acc_ep->te_refcnt >= 2); 3772 ASSERT(cl_ep->te_conp == NULL); 3773 ASSERT(acc_ep->te_conp == NULL); 3774 cl_ep->te_conp = acc_ep; 3775 acc_ep->te_conp = cl_ep; 3776 ASSERT(cl_ep->te_ser == tep->te_ser); 3777 if (switch_client_serializer) { 3778 mutex_enter(&cl_ep->te_ser_lock); 3779 if (cl_ep->te_ser_count > 0) { 3780 switch_client_serializer = B_FALSE; 3781 tl_serializer_noswitch++; 3782 } else { 3783 /* 3784 * Move client to the acceptor's serializer. 3785 */ 3786 tl_serializer_refhold(acc_ep->te_ser); 3787 tl_serializer_refrele(cl_ep->te_ser); 3788 cl_ep->te_ser = acc_ep->te_ser; 3789 } 3790 mutex_exit(&cl_ep->te_ser_lock); 3791 } 3792 if (!switch_client_serializer) { 3793 /* 3794 * It is not possible to switch client to use acceptor's. 3795 * Move acceptor to client's serializer (which is the same as 3796 * listener's). 3797 */ 3798 tl_serializer_refhold(cl_ep->te_ser); 3799 tl_serializer_refrele(acc_ep->te_ser); 3800 acc_ep->te_ser = cl_ep->te_ser; 3801 } 3802 3803 TL_REMOVE_PEER(cl_ep->te_oconp); 3804 TL_REMOVE_PEER(acc_ep->te_oconp); 3805 3806 /* 3807 * remove endpoint from incoming connection 3808 * delete client from list of incoming connections 3809 */ 3810 tip->ti_tep = NULL; 3811 tl_freetip(tep, tip); 3812 tl_ok_ack(wq, ackmp, prim); 3813 3814 /* 3815 * data blocks already linked in reallocb() 3816 */ 3817 3818 /* 3819 * link queues so that I_SENDFD will work 3820 */ 3821 if (! IS_SOCKET(tep)) { 3822 acc_ep->te_wq->q_next = cl_ep->te_rq; 3823 cl_ep->te_wq->q_next = acc_ep->te_rq; 3824 } 3825 3826 /* 3827 * send T_CONN_CON up on client side unless it was already 3828 * done (for a socket). In cases any data or ordrel req has been 3829 * queued make sure that the service procedure runs. 3830 */ 3831 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3832 enableok(cl_ep->te_wq); 3833 TL_QENABLE(cl_ep); 3834 if (ccmp != NULL) 3835 freemsg(ccmp); 3836 } else { 3837 /* 3838 * change client state on TE_CONN_CON event 3839 */ 3840 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3841 putnext(cl_ep->te_rq, ccmp); 3842 } 3843 3844 /* Mark the both endpoints as accepted */ 3845 cl_ep->te_flag |= TL_ACCEPTED; 3846 acc_ep->te_flag |= TL_ACCEPTED; 3847 3848 /* 3849 * Allow client and acceptor to close. 3850 */ 3851 tl_closeok(acc_ep); 3852 if (client_noclose_set) 3853 tl_closeok(cl_ep); 3854 } 3855 3856 3857 3858 3859 static void 3860 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3861 { 3862 queue_t *wq; 3863 struct T_discon_req *dr; 3864 ssize_t msz; 3865 tl_endpt_t *peer_tep = tep->te_conp; 3866 tl_endpt_t *srv_tep = tep->te_oconp; 3867 tl_icon_t *tip; 3868 size_t size; 3869 mblk_t *ackmp, *dimp, *respmp; 3870 struct T_discon_ind *di; 3871 t_scalar_t save_state, new_state; 3872 3873 if (tep->te_closing) { 3874 freemsg(mp); 3875 return; 3876 } 3877 3878 if ((peer_tep != NULL) && peer_tep->te_closing) { 3879 TL_UNCONNECT(tep->te_conp); 3880 peer_tep = NULL; 3881 } 3882 if ((srv_tep != NULL) && srv_tep->te_closing) { 3883 TL_UNCONNECT(tep->te_oconp); 3884 srv_tep = NULL; 3885 } 3886 3887 wq = tep->te_wq; 3888 3889 /* 3890 * preallocate memory for: 3891 * 1. max of T_ERROR_ACK and T_OK_ACK 3892 * ==> known max T_ERROR_ACK 3893 * 2. for T_DISCON_IND 3894 */ 3895 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3896 if (! ackmp) { 3897 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3898 return; 3899 } 3900 /* 3901 * memory committed for T_OK_ACK/T_ERROR_ACK now 3902 * will be committed for T_DISCON_IND later 3903 */ 3904 3905 dr = (struct T_discon_req *)mp->b_rptr; 3906 msz = MBLKL(mp); 3907 3908 /* 3909 * validate the state 3910 */ 3911 save_state = new_state = tep->te_state; 3912 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3913 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3914 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3915 SL_TRACE|SL_ERROR, 3916 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3917 tep->te_state)); 3918 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3919 freemsg(mp); 3920 return; 3921 } 3922 /* 3923 * Defer committing the state change until it is determined if 3924 * the message will be queued with the tl_icon or not. 3925 */ 3926 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3927 3928 /* validate the message */ 3929 if (msz < sizeof (struct T_discon_req)) { 3930 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3931 "tl_discon_req:invalid message")); 3932 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3933 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3934 freemsg(mp); 3935 return; 3936 } 3937 3938 /* 3939 * if server, then validate that client exists 3940 * by connection sequence number etc. 3941 */ 3942 if (tep->te_nicon > 0) { /* server */ 3943 3944 /* 3945 * search server list for disconnect client 3946 */ 3947 tip = tl_icon_find(tep, dr->SEQ_number); 3948 if (tip == NULL) { 3949 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3950 SL_TRACE|SL_ERROR, 3951 "tl_discon_req:no disconnect endpoint")); 3952 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3953 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3954 freemsg(mp); 3955 return; 3956 } 3957 /* 3958 * If ti_tep is NULL the client has already closed. In this case 3959 * the code below will avoid any action on the client side. 3960 */ 3961 3962 ASSERT(IMPLY(tip->ti_tep != NULL, 3963 tip->ti_tep->te_seqno == dr->SEQ_number)); 3964 peer_tep = tip->ti_tep; 3965 } 3966 3967 /* 3968 * preallocate now for T_DISCON_IND 3969 * ack validity of request (T_OK_ACK) after memory committed 3970 */ 3971 size = sizeof (struct T_discon_ind); 3972 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3973 tl_memrecover(wq, mp, size); 3974 freemsg(ackmp); 3975 return; 3976 } 3977 3978 /* 3979 * prepare message to ack validity of request 3980 */ 3981 if (tep->te_nicon == 0) 3982 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3983 else 3984 if (tep->te_nicon == 1) 3985 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3986 else 3987 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3988 3989 /* 3990 * Flushing queues according to TPI. Using the old state. 3991 */ 3992 if ((tep->te_nicon <= 1) && 3993 ((save_state == TS_DATA_XFER) || 3994 (save_state == TS_WIND_ORDREL) || 3995 (save_state == TS_WREQ_ORDREL))) 3996 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3997 3998 /* send T_OK_ACK up */ 3999 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 4000 4001 /* 4002 * now do disconnect business 4003 */ 4004 if (tep->te_nicon > 0) { /* listener */ 4005 if (peer_tep != NULL && !peer_tep->te_closing) { 4006 /* 4007 * disconnect incoming connect request pending to tep 4008 */ 4009 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4010 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4011 SL_TRACE|SL_ERROR, 4012 "tl_discon_req: reallocb failed")); 4013 tep->te_state = new_state; 4014 tl_merror(wq, respmp, ENOMEM); 4015 return; 4016 } 4017 di = (struct T_discon_ind *)dimp->b_rptr; 4018 di->SEQ_number = BADSEQNUM; 4019 save_state = peer_tep->te_state; 4020 peer_tep->te_state = TS_IDLE; 4021 4022 TL_REMOVE_PEER(peer_tep->te_oconp); 4023 enableok(peer_tep->te_wq); 4024 TL_QENABLE(peer_tep); 4025 } else { 4026 freemsg(respmp); 4027 dimp = NULL; 4028 } 4029 4030 /* 4031 * remove endpoint from incoming connection list 4032 * - remove disconnect client from list on server 4033 */ 4034 tl_freetip(tep, tip); 4035 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4036 /* 4037 * disconnect an outgoing request pending from tep 4038 */ 4039 4040 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4041 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4042 SL_TRACE|SL_ERROR, 4043 "tl_discon_req: reallocb failed")); 4044 tep->te_state = new_state; 4045 tl_merror(wq, respmp, ENOMEM); 4046 return; 4047 } 4048 di = (struct T_discon_ind *)dimp->b_rptr; 4049 DB_TYPE(dimp) = M_PROTO; 4050 di->PRIM_type = T_DISCON_IND; 4051 di->DISCON_reason = ECONNRESET; 4052 di->SEQ_number = tep->te_seqno; 4053 4054 /* 4055 * If this is a socket the T_DISCON_IND is queued with 4056 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4057 * from the list of pending connections. 4058 * Note that when te_oconp is set the peer better have 4059 * a t_connind_t for the client. 4060 */ 4061 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4062 /* 4063 * No need to check that 4064 * ti_tep == NULL since the T_DISCON_IND 4065 * takes precedence over other queued 4066 * messages. 4067 */ 4068 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4069 peer_tep = NULL; 4070 dimp = NULL; 4071 /* 4072 * Can't clear te_oconp since tl_co_unconnect needs 4073 * it as a hint not to free the tep. 4074 * Keep the state unchanged since tl_conn_res inspects 4075 * it. 4076 */ 4077 new_state = tep->te_state; 4078 } else { 4079 /* Found - delete it */ 4080 tip = tl_icon_find(peer_tep, tep->te_seqno); 4081 if (tip != NULL) { 4082 ASSERT(tep == tip->ti_tep); 4083 save_state = peer_tep->te_state; 4084 if (peer_tep->te_nicon == 1) 4085 peer_tep->te_state = 4086 NEXTSTATE(TE_DISCON_IND2, 4087 peer_tep->te_state); 4088 else 4089 peer_tep->te_state = 4090 NEXTSTATE(TE_DISCON_IND3, 4091 peer_tep->te_state); 4092 tl_freetip(peer_tep, tip); 4093 } 4094 ASSERT(tep->te_oconp != NULL); 4095 TL_UNCONNECT(tep->te_oconp); 4096 } 4097 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4098 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4099 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4100 SL_TRACE|SL_ERROR, 4101 "tl_discon_req: reallocb failed")); 4102 tep->te_state = new_state; 4103 tl_merror(wq, respmp, ENOMEM); 4104 return; 4105 } 4106 di = (struct T_discon_ind *)dimp->b_rptr; 4107 di->SEQ_number = BADSEQNUM; 4108 4109 save_state = peer_tep->te_state; 4110 peer_tep->te_state = TS_IDLE; 4111 } else { 4112 /* Not connected */ 4113 tep->te_state = new_state; 4114 freemsg(respmp); 4115 return; 4116 } 4117 4118 /* Commit state changes */ 4119 tep->te_state = new_state; 4120 4121 if (peer_tep == NULL) { 4122 ASSERT(dimp == NULL); 4123 goto done; 4124 } 4125 /* 4126 * Flush queues on peer before sending up 4127 * T_DISCON_IND according to TPI 4128 */ 4129 4130 if ((save_state == TS_DATA_XFER) || 4131 (save_state == TS_WIND_ORDREL) || 4132 (save_state == TS_WREQ_ORDREL)) 4133 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4134 4135 DB_TYPE(dimp) = M_PROTO; 4136 di->PRIM_type = T_DISCON_IND; 4137 di->DISCON_reason = ECONNRESET; 4138 4139 /* 4140 * data blocks already linked into dimp by reallocb() 4141 */ 4142 /* 4143 * send indication message to peer user module 4144 */ 4145 ASSERT(dimp != NULL); 4146 putnext(peer_tep->te_rq, dimp); 4147 done: 4148 if (tep->te_conp) { /* disconnect pointers if connected */ 4149 ASSERT(! peer_tep->te_closing); 4150 4151 /* 4152 * Messages may be queued on peer's write queue 4153 * waiting to be processed by its write service 4154 * procedure. Before the pointer to the peer transport 4155 * structure is set to NULL, qenable the peer's write 4156 * queue so that the queued up messages are processed. 4157 */ 4158 if ((save_state == TS_DATA_XFER) || 4159 (save_state == TS_WIND_ORDREL) || 4160 (save_state == TS_WREQ_ORDREL)) 4161 TL_QENABLE(peer_tep); 4162 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4163 TL_UNCONNECT(peer_tep->te_conp); 4164 if (! IS_SOCKET(tep)) { 4165 /* 4166 * unlink the streams 4167 */ 4168 tep->te_wq->q_next = NULL; 4169 peer_tep->te_wq->q_next = NULL; 4170 } 4171 TL_UNCONNECT(tep->te_conp); 4172 } 4173 } 4174 4175 4176 static void 4177 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4178 { 4179 queue_t *wq; 4180 size_t ack_sz; 4181 mblk_t *ackmp; 4182 struct T_addr_ack *taa; 4183 4184 if (tep->te_closing) { 4185 freemsg(mp); 4186 return; 4187 } 4188 4189 wq = tep->te_wq; 4190 4191 /* 4192 * Note: T_ADDR_REQ message has only PRIM_type field 4193 * so it is already validated earlier. 4194 */ 4195 4196 if (IS_CLTS(tep) || 4197 (tep->te_state > TS_WREQ_ORDREL) || 4198 (tep->te_state < TS_DATA_XFER)) { 4199 /* 4200 * Either connectionless or connection oriented but not 4201 * in connected data transfer state or half-closed states. 4202 */ 4203 ack_sz = sizeof (struct T_addr_ack); 4204 if (tep->te_state >= TS_IDLE) 4205 /* is bound */ 4206 ack_sz += tep->te_alen; 4207 ackmp = reallocb(mp, ack_sz, 0); 4208 if (ackmp == NULL) { 4209 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4210 SL_TRACE|SL_ERROR, 4211 "tl_addr_req: reallocb failed")); 4212 tl_memrecover(wq, mp, ack_sz); 4213 return; 4214 } 4215 4216 taa = (struct T_addr_ack *)ackmp->b_rptr; 4217 4218 bzero(taa, sizeof (struct T_addr_ack)); 4219 4220 taa->PRIM_type = T_ADDR_ACK; 4221 ackmp->b_datap->db_type = M_PCPROTO; 4222 ackmp->b_wptr = (uchar_t *)&taa[1]; 4223 4224 if (tep->te_state >= TS_IDLE) { 4225 /* endpoint is bound */ 4226 taa->LOCADDR_length = tep->te_alen; 4227 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4228 4229 bcopy(tep->te_abuf, ackmp->b_wptr, 4230 tep->te_alen); 4231 ackmp->b_wptr += tep->te_alen; 4232 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4233 } 4234 4235 (void) qreply(wq, ackmp); 4236 } else { 4237 ASSERT(tep->te_state == TS_DATA_XFER || 4238 tep->te_state == TS_WIND_ORDREL || 4239 tep->te_state == TS_WREQ_ORDREL); 4240 /* connection oriented in data transfer */ 4241 tl_connected_cots_addr_req(mp, tep); 4242 } 4243 } 4244 4245 4246 static void 4247 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4248 { 4249 tl_endpt_t *peer_tep; 4250 size_t ack_sz; 4251 mblk_t *ackmp; 4252 struct T_addr_ack *taa; 4253 uchar_t *addr_startp; 4254 4255 if (tep->te_closing) { 4256 freemsg(mp); 4257 return; 4258 } 4259 4260 ASSERT(tep->te_state >= TS_IDLE); 4261 4262 ack_sz = sizeof (struct T_addr_ack); 4263 ack_sz += T_ALIGN(tep->te_alen); 4264 peer_tep = tep->te_conp; 4265 ack_sz += peer_tep->te_alen; 4266 4267 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4268 if (ackmp == NULL) { 4269 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4270 "tl_connected_cots_addr_req: reallocb failed")); 4271 tl_memrecover(tep->te_wq, mp, ack_sz); 4272 return; 4273 } 4274 4275 taa = (struct T_addr_ack *)ackmp->b_rptr; 4276 4277 /* endpoint is bound */ 4278 taa->LOCADDR_length = tep->te_alen; 4279 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4280 4281 addr_startp = (uchar_t *)&taa[1]; 4282 4283 bcopy(tep->te_abuf, addr_startp, 4284 tep->te_alen); 4285 4286 taa->REMADDR_length = peer_tep->te_alen; 4287 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4288 taa->LOCADDR_length); 4289 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4290 bcopy(peer_tep->te_abuf, addr_startp, 4291 peer_tep->te_alen); 4292 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4293 taa->REMADDR_offset + peer_tep->te_alen; 4294 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4295 4296 putnext(tep->te_rq, ackmp); 4297 } 4298 4299 static void 4300 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4301 { 4302 if (IS_CLTS(tep)) { 4303 *ia = tl_clts_info_ack; 4304 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4305 } else { 4306 *ia = tl_cots_info_ack; 4307 if (IS_COTSORD(tep)) 4308 ia->SERV_type = T_COTS_ORD; 4309 } 4310 ia->TIDU_size = tl_tidusz; 4311 ia->CURRENT_state = tep->te_state; 4312 } 4313 4314 /* 4315 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4316 * tl_wput. 4317 */ 4318 static void 4319 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4320 { 4321 mblk_t *ackmp; 4322 t_uscalar_t cap_bits1; 4323 struct T_capability_ack *tcap; 4324 4325 if (tep->te_closing) { 4326 freemsg(mp); 4327 return; 4328 } 4329 4330 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4331 4332 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4333 M_PCPROTO, T_CAPABILITY_ACK); 4334 if (ackmp == NULL) { 4335 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4336 "tl_capability_req: reallocb failed")); 4337 tl_memrecover(tep->te_wq, mp, 4338 sizeof (struct T_capability_ack)); 4339 return; 4340 } 4341 4342 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4343 tcap->CAP_bits1 = 0; 4344 4345 if (cap_bits1 & TC1_INFO) { 4346 tl_copy_info(&tcap->INFO_ack, tep); 4347 tcap->CAP_bits1 |= TC1_INFO; 4348 } 4349 4350 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4351 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4352 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4353 } 4354 4355 putnext(tep->te_rq, ackmp); 4356 } 4357 4358 static void 4359 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4360 { 4361 if (! tep->te_closing) 4362 tl_info_req(mp, tep); 4363 else 4364 freemsg(mp); 4365 4366 tl_serializer_exit(tep); 4367 tl_refrele(tep); 4368 } 4369 4370 static void 4371 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4372 { 4373 mblk_t *ackmp; 4374 4375 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4376 M_PCPROTO, T_INFO_ACK); 4377 if (ackmp == NULL) { 4378 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4379 "tl_info_req: reallocb failed")); 4380 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4381 return; 4382 } 4383 4384 /* 4385 * fill in T_INFO_ACK contents 4386 */ 4387 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4388 4389 /* 4390 * send ack message 4391 */ 4392 putnext(tep->te_rq, ackmp); 4393 } 4394 4395 /* 4396 * Handle M_DATA, T_data_req and T_optdata_req. 4397 * If this is a socket pass through T_optdata_req options unmodified. 4398 */ 4399 static void 4400 tl_data(mblk_t *mp, tl_endpt_t *tep) 4401 { 4402 queue_t *wq = tep->te_wq; 4403 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4404 ssize_t msz = MBLKL(mp); 4405 tl_endpt_t *peer_tep; 4406 queue_t *peer_rq; 4407 boolean_t closing = tep->te_closing; 4408 4409 if (IS_CLTS(tep)) { 4410 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4411 SL_TRACE|SL_ERROR, 4412 "tl_wput:clts:unattached M_DATA")); 4413 if (!closing) { 4414 tl_merror(wq, mp, EPROTO); 4415 } else { 4416 freemsg(mp); 4417 } 4418 return; 4419 } 4420 4421 /* 4422 * If the endpoint is closing it should still forward any data to the 4423 * peer (if it has one). If it is not allowed to forward it can just 4424 * free the message. 4425 */ 4426 if (closing && 4427 (tep->te_state != TS_DATA_XFER) && 4428 (tep->te_state != TS_WREQ_ORDREL)) { 4429 freemsg(mp); 4430 return; 4431 } 4432 4433 if (DB_TYPE(mp) == M_PROTO) { 4434 if (prim->type == T_DATA_REQ && 4435 msz < sizeof (struct T_data_req)) { 4436 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4437 SL_TRACE|SL_ERROR, 4438 "tl_data:T_DATA_REQ:invalid message")); 4439 if (!closing) { 4440 tl_merror(wq, mp, EPROTO); 4441 } else { 4442 freemsg(mp); 4443 } 4444 return; 4445 } else if (prim->type == T_OPTDATA_REQ && 4446 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4447 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4448 SL_TRACE|SL_ERROR, 4449 "tl_data:T_OPTDATA_REQ:invalid message")); 4450 if (!closing) { 4451 tl_merror(wq, mp, EPROTO); 4452 } else { 4453 freemsg(mp); 4454 } 4455 return; 4456 } 4457 } 4458 4459 /* 4460 * connection oriented provider 4461 */ 4462 switch (tep->te_state) { 4463 case TS_IDLE: 4464 /* 4465 * Other end not here - do nothing. 4466 */ 4467 freemsg(mp); 4468 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4469 "tl_data:cots with endpoint idle")); 4470 return; 4471 4472 case TS_DATA_XFER: 4473 /* valid states */ 4474 if (tep->te_conp != NULL) 4475 break; 4476 4477 if (tep->te_oconp == NULL) { 4478 if (!closing) { 4479 tl_merror(wq, mp, EPROTO); 4480 } else { 4481 freemsg(mp); 4482 } 4483 return; 4484 } 4485 /* 4486 * For a socket the T_CONN_CON is sent early thus 4487 * the peer might not yet have accepted the connection. 4488 * If we are closing queue the packet with the T_CONN_IND. 4489 * Otherwise defer processing the packet until the peer 4490 * accepts the connection. 4491 * Note that the queue is noenabled when we go into this 4492 * state. 4493 */ 4494 if (!closing) { 4495 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4496 SL_TRACE|SL_ERROR, 4497 "tl_data: ocon")); 4498 TL_PUTBQ(tep, mp); 4499 return; 4500 } 4501 if (DB_TYPE(mp) == M_PROTO) { 4502 if (msz < sizeof (t_scalar_t)) { 4503 freemsg(mp); 4504 return; 4505 } 4506 /* reuse message block - just change REQ to IND */ 4507 if (prim->type == T_DATA_REQ) 4508 prim->type = T_DATA_IND; 4509 else 4510 prim->type = T_OPTDATA_IND; 4511 } 4512 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4513 return; 4514 4515 case TS_WREQ_ORDREL: 4516 if (tep->te_conp == NULL) { 4517 /* 4518 * Other end closed - generate discon_ind 4519 * with reason 0 to cause an EPIPE but no 4520 * read side error on AF_UNIX sockets. 4521 */ 4522 freemsg(mp); 4523 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4524 SL_TRACE|SL_ERROR, 4525 "tl_data: WREQ_ORDREL and no peer")); 4526 tl_discon_ind(tep, 0); 4527 return; 4528 } 4529 break; 4530 4531 default: 4532 /* invalid state for event TE_DATA_REQ */ 4533 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4534 "tl_data:cots:out of state")); 4535 tl_merror(wq, mp, EPROTO); 4536 return; 4537 } 4538 /* 4539 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4540 * (State stays same on this event) 4541 */ 4542 4543 /* 4544 * get connected endpoint 4545 */ 4546 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4547 freemsg(mp); 4548 /* Peer closed */ 4549 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4550 "tl_data: peer gone")); 4551 return; 4552 } 4553 4554 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4555 peer_rq = peer_tep->te_rq; 4556 4557 /* 4558 * Put it back if flow controlled 4559 * Note: Messages already on queue when we are closing is bounded 4560 * so we can ignore flow control. 4561 */ 4562 if (!canputnext(peer_rq) && !closing) { 4563 TL_PUTBQ(tep, mp); 4564 return; 4565 } 4566 4567 /* 4568 * validate peer state 4569 */ 4570 switch (peer_tep->te_state) { 4571 case TS_DATA_XFER: 4572 case TS_WIND_ORDREL: 4573 /* valid states */ 4574 break; 4575 default: 4576 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4577 "tl_data:rx side:invalid state")); 4578 tl_merror(peer_tep->te_wq, mp, EPROTO); 4579 return; 4580 } 4581 if (DB_TYPE(mp) == M_PROTO) { 4582 /* reuse message block - just change REQ to IND */ 4583 if (prim->type == T_DATA_REQ) 4584 prim->type = T_DATA_IND; 4585 else 4586 prim->type = T_OPTDATA_IND; 4587 } 4588 /* 4589 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4590 * (peer state stays same on this event) 4591 */ 4592 /* 4593 * send data to connected peer 4594 */ 4595 putnext(peer_rq, mp); 4596 } 4597 4598 4599 4600 static void 4601 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4602 { 4603 queue_t *wq = tep->te_wq; 4604 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4605 ssize_t msz = MBLKL(mp); 4606 tl_endpt_t *peer_tep; 4607 queue_t *peer_rq; 4608 boolean_t closing = tep->te_closing; 4609 4610 if (msz < sizeof (struct T_exdata_req)) { 4611 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4612 "tl_exdata:invalid message")); 4613 if (!closing) { 4614 tl_merror(wq, mp, EPROTO); 4615 } else { 4616 freemsg(mp); 4617 } 4618 return; 4619 } 4620 4621 /* 4622 * If the endpoint is closing it should still forward any data to the 4623 * peer (if it has one). If it is not allowed to forward it can just 4624 * free the message. 4625 */ 4626 if (closing && 4627 (tep->te_state != TS_DATA_XFER) && 4628 (tep->te_state != TS_WREQ_ORDREL)) { 4629 freemsg(mp); 4630 return; 4631 } 4632 4633 /* 4634 * validate state 4635 */ 4636 switch (tep->te_state) { 4637 case TS_IDLE: 4638 /* 4639 * Other end not here - do nothing. 4640 */ 4641 freemsg(mp); 4642 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4643 "tl_exdata:cots with endpoint idle")); 4644 return; 4645 4646 case TS_DATA_XFER: 4647 /* valid states */ 4648 if (tep->te_conp != NULL) 4649 break; 4650 4651 if (tep->te_oconp == NULL) { 4652 if (!closing) { 4653 tl_merror(wq, mp, EPROTO); 4654 } else { 4655 freemsg(mp); 4656 } 4657 return; 4658 } 4659 /* 4660 * For a socket the T_CONN_CON is sent early thus 4661 * the peer might not yet have accepted the connection. 4662 * If we are closing queue the packet with the T_CONN_IND. 4663 * Otherwise defer processing the packet until the peer 4664 * accepts the connection. 4665 * Note that the queue is noenabled when we go into this 4666 * state. 4667 */ 4668 if (!closing) { 4669 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4670 SL_TRACE|SL_ERROR, 4671 "tl_exdata: ocon")); 4672 TL_PUTBQ(tep, mp); 4673 return; 4674 } 4675 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4676 "tl_exdata: closing socket ocon")); 4677 prim->type = T_EXDATA_IND; 4678 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4679 return; 4680 4681 case TS_WREQ_ORDREL: 4682 if (tep->te_conp == NULL) { 4683 /* 4684 * Other end closed - generate discon_ind 4685 * with reason 0 to cause an EPIPE but no 4686 * read side error on AF_UNIX sockets. 4687 */ 4688 freemsg(mp); 4689 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4690 SL_TRACE|SL_ERROR, 4691 "tl_exdata: WREQ_ORDREL and no peer")); 4692 tl_discon_ind(tep, 0); 4693 return; 4694 } 4695 break; 4696 4697 default: 4698 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4699 SL_TRACE|SL_ERROR, 4700 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4701 tep->te_state)); 4702 tl_merror(wq, mp, EPROTO); 4703 return; 4704 } 4705 /* 4706 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4707 * (state stays same on this event) 4708 */ 4709 4710 /* 4711 * get connected endpoint 4712 */ 4713 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4714 freemsg(mp); 4715 /* Peer closed */ 4716 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4717 "tl_exdata: peer gone")); 4718 return; 4719 } 4720 4721 peer_rq = peer_tep->te_rq; 4722 4723 /* 4724 * Put it back if flow controlled 4725 * Note: Messages already on queue when we are closing is bounded 4726 * so we can ignore flow control. 4727 */ 4728 if (!canputnext(peer_rq) && !closing) { 4729 TL_PUTBQ(tep, mp); 4730 return; 4731 } 4732 4733 /* 4734 * validate state on peer 4735 */ 4736 switch (peer_tep->te_state) { 4737 case TS_DATA_XFER: 4738 case TS_WIND_ORDREL: 4739 /* valid states */ 4740 break; 4741 default: 4742 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4743 "tl_exdata:rx side:invalid state")); 4744 tl_merror(peer_tep->te_wq, mp, EPROTO); 4745 return; 4746 } 4747 /* 4748 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4749 * (peer state stays same on this event) 4750 */ 4751 /* 4752 * reuse message block 4753 */ 4754 prim->type = T_EXDATA_IND; 4755 4756 /* 4757 * send data to connected peer 4758 */ 4759 putnext(peer_rq, mp); 4760 } 4761 4762 4763 4764 static void 4765 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4766 { 4767 queue_t *wq = tep->te_wq; 4768 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4769 ssize_t msz = MBLKL(mp); 4770 tl_endpt_t *peer_tep; 4771 queue_t *peer_rq; 4772 boolean_t closing = tep->te_closing; 4773 4774 if (msz < sizeof (struct T_ordrel_req)) { 4775 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4776 "tl_ordrel:invalid message")); 4777 if (!closing) { 4778 tl_merror(wq, mp, EPROTO); 4779 } else { 4780 freemsg(mp); 4781 } 4782 return; 4783 } 4784 4785 /* 4786 * validate state 4787 */ 4788 switch (tep->te_state) { 4789 case TS_DATA_XFER: 4790 case TS_WREQ_ORDREL: 4791 /* valid states */ 4792 if (tep->te_conp != NULL) 4793 break; 4794 4795 if (tep->te_oconp == NULL) 4796 break; 4797 4798 /* 4799 * For a socket the T_CONN_CON is sent early thus 4800 * the peer might not yet have accepted the connection. 4801 * If we are closing queue the packet with the T_CONN_IND. 4802 * Otherwise defer processing the packet until the peer 4803 * accepts the connection. 4804 * Note that the queue is noenabled when we go into this 4805 * state. 4806 */ 4807 if (!closing) { 4808 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4809 SL_TRACE|SL_ERROR, 4810 "tl_ordlrel: ocon")); 4811 TL_PUTBQ(tep, mp); 4812 return; 4813 } 4814 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4815 "tl_ordlrel: closing socket ocon")); 4816 prim->type = T_ORDREL_IND; 4817 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4818 return; 4819 4820 default: 4821 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4822 SL_TRACE|SL_ERROR, 4823 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4824 tep->te_state)); 4825 if (!closing) { 4826 tl_merror(wq, mp, EPROTO); 4827 } else { 4828 freemsg(mp); 4829 } 4830 return; 4831 } 4832 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4833 4834 /* 4835 * get connected endpoint 4836 */ 4837 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4838 /* Peer closed */ 4839 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4840 "tl_ordrel: peer gone")); 4841 freemsg(mp); 4842 return; 4843 } 4844 4845 peer_rq = peer_tep->te_rq; 4846 4847 /* 4848 * Put it back if flow controlled except when we are closing. 4849 * Note: Messages already on queue when we are closing is bounded 4850 * so we can ignore flow control. 4851 */ 4852 if (! canputnext(peer_rq) && !closing) { 4853 TL_PUTBQ(tep, mp); 4854 return; 4855 } 4856 4857 /* 4858 * validate state on peer 4859 */ 4860 switch (peer_tep->te_state) { 4861 case TS_DATA_XFER: 4862 case TS_WIND_ORDREL: 4863 /* valid states */ 4864 break; 4865 default: 4866 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4867 "tl_ordrel:rx side:invalid state")); 4868 tl_merror(peer_tep->te_wq, mp, EPROTO); 4869 return; 4870 } 4871 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4872 4873 /* 4874 * reuse message block 4875 */ 4876 prim->type = T_ORDREL_IND; 4877 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4878 "tl_ordrel: send ordrel_ind")); 4879 4880 /* 4881 * send data to connected peer 4882 */ 4883 putnext(peer_rq, mp); 4884 } 4885 4886 4887 /* 4888 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4889 */ 4890 static void 4891 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4892 { 4893 size_t err_sz; 4894 tl_endpt_t *tep; 4895 struct T_unitdata_req *udreq; 4896 mblk_t *err_mp; 4897 t_scalar_t alen; 4898 t_scalar_t olen; 4899 struct T_uderror_ind *uderr; 4900 uchar_t *addr_startp; 4901 4902 err_sz = sizeof (struct T_uderror_ind); 4903 tep = (tl_endpt_t *)wq->q_ptr; 4904 udreq = (struct T_unitdata_req *)mp->b_rptr; 4905 alen = udreq->DEST_length; 4906 olen = udreq->OPT_length; 4907 4908 if (alen > 0) 4909 err_sz = T_ALIGN(err_sz + alen); 4910 if (olen > 0) 4911 err_sz += olen; 4912 4913 err_mp = allocb(err_sz, BPRI_MED); 4914 if (! err_mp) { 4915 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4916 "tl_uderr:allocb failure")); 4917 /* 4918 * Note: no rollback of state needed as it does 4919 * not change in connectionless transport 4920 */ 4921 tl_memrecover(wq, mp, err_sz); 4922 return; 4923 } 4924 4925 DB_TYPE(err_mp) = M_PROTO; 4926 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4927 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4928 uderr->PRIM_type = T_UDERROR_IND; 4929 uderr->ERROR_type = err; 4930 uderr->DEST_length = alen; 4931 uderr->OPT_length = olen; 4932 if (alen <= 0) { 4933 uderr->DEST_offset = 0; 4934 } else { 4935 uderr->DEST_offset = 4936 (t_scalar_t)sizeof (struct T_uderror_ind); 4937 addr_startp = mp->b_rptr + udreq->DEST_offset; 4938 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4939 (size_t)alen); 4940 } 4941 if (olen <= 0) { 4942 uderr->OPT_offset = 0; 4943 } else { 4944 uderr->OPT_offset = 4945 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4946 uderr->DEST_length); 4947 addr_startp = mp->b_rptr + udreq->OPT_offset; 4948 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4949 (size_t)olen); 4950 } 4951 freemsg(mp); 4952 4953 /* 4954 * send indication message 4955 */ 4956 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4957 4958 qreply(wq, err_mp); 4959 } 4960 4961 static void 4962 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4963 { 4964 queue_t *wq = tep->te_wq; 4965 4966 if (!tep->te_closing && (wq->q_first != NULL)) { 4967 TL_PUTQ(tep, mp); 4968 } else if (tep->te_rq != NULL) 4969 tl_unitdata(mp, tep); 4970 else 4971 freemsg(mp); 4972 4973 tl_serializer_exit(tep); 4974 tl_refrele(tep); 4975 } 4976 4977 /* 4978 * Handle T_unitdata_req. 4979 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4980 * If this is a socket pass through options unmodified. 4981 */ 4982 static void 4983 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4984 { 4985 queue_t *wq = tep->te_wq; 4986 soux_addr_t ux_addr; 4987 tl_addr_t destaddr; 4988 uchar_t *addr_startp; 4989 tl_endpt_t *peer_tep; 4990 struct T_unitdata_ind *udind; 4991 struct T_unitdata_req *udreq; 4992 ssize_t msz, ui_sz; 4993 t_scalar_t alen, aoff, olen, ooff; 4994 t_scalar_t oldolen = 0; 4995 4996 udreq = (struct T_unitdata_req *)mp->b_rptr; 4997 msz = MBLKL(mp); 4998 4999 /* 5000 * validate the state 5001 */ 5002 if (tep->te_state != TS_IDLE) { 5003 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5004 SL_TRACE|SL_ERROR, 5005 "tl_wput:T_CONN_REQ:out of state")); 5006 tl_merror(wq, mp, EPROTO); 5007 return; 5008 } 5009 /* 5010 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5011 * (state does not change on this event) 5012 */ 5013 5014 /* 5015 * validate the message 5016 * Note: dereference fields in struct inside message only 5017 * after validating the message length. 5018 */ 5019 if (msz < sizeof (struct T_unitdata_req)) { 5020 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5021 "tl_unitdata:invalid message length")); 5022 tl_merror(wq, mp, EINVAL); 5023 return; 5024 } 5025 alen = udreq->DEST_length; 5026 aoff = udreq->DEST_offset; 5027 oldolen = olen = udreq->OPT_length; 5028 ooff = udreq->OPT_offset; 5029 if (olen == 0) 5030 ooff = 0; 5031 5032 if (IS_SOCKET(tep)) { 5033 if ((alen != TL_SOUX_ADDRLEN) || 5034 (aoff < 0) || 5035 (aoff + alen > msz) || 5036 (olen < 0) || (ooff < 0) || 5037 ((olen > 0) && ((ooff + olen) > msz))) { 5038 (void) (STRLOG(TL_ID, tep->te_minor, 5039 1, SL_TRACE|SL_ERROR, 5040 "tl_unitdata_req: invalid socket addr " 5041 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5042 (int)msz, alen, aoff, olen, ooff)); 5043 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5044 return; 5045 } 5046 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5047 5048 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5049 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5050 (void) (STRLOG(TL_ID, tep->te_minor, 5051 1, SL_TRACE|SL_ERROR, 5052 "tl_conn_req: invalid socket magic")); 5053 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5054 return; 5055 } 5056 } else { 5057 if ((alen < 0) || 5058 (aoff < 0) || 5059 ((alen > 0) && ((aoff + alen) > msz)) || 5060 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5061 ((aoff + alen) < 0) || 5062 ((olen > 0) && ((ooff + olen) > msz)) || 5063 (olen < 0) || 5064 (ooff < 0) || 5065 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5066 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5067 SL_TRACE|SL_ERROR, 5068 "tl_unitdata:invalid unit data message")); 5069 tl_merror(wq, mp, EINVAL); 5070 return; 5071 } 5072 } 5073 5074 /* Options not supported unless it's a socket */ 5075 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5076 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5077 "tl_unitdata:option use(unsupported) or zero len addr")); 5078 tl_uderr(wq, mp, EPROTO); 5079 return; 5080 } 5081 #ifdef DEBUG 5082 /* 5083 * Mild form of ASSERT()ion to detect broken TPI apps. 5084 * if (! assertion) 5085 * log warning; 5086 */ 5087 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5088 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5089 "tl_unitdata:addr overlaps TPI message")); 5090 } 5091 #endif 5092 /* 5093 * get destination endpoint 5094 */ 5095 destaddr.ta_alen = alen; 5096 destaddr.ta_abuf = mp->b_rptr + aoff; 5097 destaddr.ta_zoneid = tep->te_zoneid; 5098 5099 /* 5100 * Check whether the destination is the same that was used previously 5101 * and the destination endpoint is in the right state. If something is 5102 * wrong, find destination again and cache it. 5103 */ 5104 peer_tep = tep->te_lastep; 5105 5106 if ((peer_tep == NULL) || peer_tep->te_closing || 5107 (peer_tep->te_state != TS_IDLE) || 5108 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5109 /* 5110 * Not the same as cached destination , need to find the right 5111 * destination. 5112 */ 5113 peer_tep = (IS_SOCKET(tep) ? 5114 tl_sock_find_peer(tep, &ux_addr) : 5115 tl_find_peer(tep, &destaddr)); 5116 5117 if (peer_tep == NULL) { 5118 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5119 SL_TRACE|SL_ERROR, 5120 "tl_unitdata:no one at destination address")); 5121 tl_uderr(wq, mp, ECONNRESET); 5122 return; 5123 } 5124 5125 /* 5126 * Cache the new peer. 5127 */ 5128 if (tep->te_lastep != NULL) 5129 tl_refrele(tep->te_lastep); 5130 5131 tep->te_lastep = peer_tep; 5132 } 5133 5134 if (peer_tep->te_state != TS_IDLE) { 5135 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5136 "tl_unitdata:provider in invalid state")); 5137 tl_uderr(wq, mp, EPROTO); 5138 return; 5139 } 5140 5141 ASSERT(peer_tep->te_rq != NULL); 5142 5143 /* 5144 * Put it back if flow controlled except when we are closing. 5145 * Note: Messages already on queue when we are closing is bounded 5146 * so we can ignore flow control. 5147 */ 5148 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5149 /* record what we are flow controlled on */ 5150 if (tep->te_flowq != NULL) { 5151 list_remove(&tep->te_flowq->te_flowlist, tep); 5152 } 5153 list_insert_head(&peer_tep->te_flowlist, tep); 5154 tep->te_flowq = peer_tep; 5155 TL_PUTBQ(tep, mp); 5156 return; 5157 } 5158 /* 5159 * prepare indication message 5160 */ 5161 5162 /* 5163 * calculate length of message 5164 */ 5165 if (peer_tep->te_flag & TL_SETCRED) { 5166 ASSERT(olen == 0); 5167 olen = (t_scalar_t)sizeof (struct opthdr) + 5168 OPTLEN(sizeof (tl_credopt_t)); 5169 /* 1 option only */ 5170 } else if (peer_tep->te_flag & TL_SETUCRED) { 5171 ASSERT(olen == 0); 5172 olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize); 5173 /* 1 option only */ 5174 } else if (peer_tep->te_flag & TL_SOCKUCRED) { 5175 /* Possibly more than one option */ 5176 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5177 OPTLEN(ucredsize); 5178 } 5179 5180 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5181 olen; 5182 /* 5183 * If the unitdata_ind fits and we are not adding options 5184 * reuse the udreq mblk. 5185 */ 5186 if (msz >= ui_sz && alen >= tep->te_alen && 5187 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5188 /* 5189 * Reuse the original mblk. Leave options in place. 5190 */ 5191 udind = (struct T_unitdata_ind *)mp->b_rptr; 5192 udind->PRIM_type = T_UNITDATA_IND; 5193 udind->SRC_length = tep->te_alen; 5194 addr_startp = mp->b_rptr + udind->SRC_offset; 5195 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5196 } else { 5197 /* Allocate a new T_unidata_ind message */ 5198 mblk_t *ui_mp; 5199 5200 ui_mp = allocb(ui_sz, BPRI_MED); 5201 if (! ui_mp) { 5202 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5203 "tl_unitdata:allocb failure:message queued")); 5204 tl_memrecover(wq, mp, ui_sz); 5205 return; 5206 } 5207 5208 /* 5209 * fill in T_UNITDATA_IND contents 5210 */ 5211 DB_TYPE(ui_mp) = M_PROTO; 5212 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5213 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5214 udind->PRIM_type = T_UNITDATA_IND; 5215 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5216 udind->SRC_length = tep->te_alen; 5217 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5218 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5219 udind->OPT_offset = 5220 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5221 udind->OPT_length = olen; 5222 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5223 cred_t *cr; 5224 pid_t cpid; 5225 5226 if (oldolen != 0) { 5227 bcopy((void *)((uintptr_t)udreq + ooff), 5228 (void *)((uintptr_t)udind + 5229 udind->OPT_offset), 5230 oldolen); 5231 } 5232 cr = msg_getcred(mp, &cpid); 5233 ASSERT(cr != NULL); 5234 5235 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5236 oldolen, cr, cpid, 5237 peer_tep->te_flag, peer_tep->te_credp); 5238 } else { 5239 bcopy((void *)((uintptr_t)udreq + ooff), 5240 (void *)((uintptr_t)udind + udind->OPT_offset), 5241 olen); 5242 } 5243 5244 /* 5245 * relink data blocks from mp to ui_mp 5246 */ 5247 ui_mp->b_cont = mp->b_cont; 5248 freeb(mp); 5249 mp = ui_mp; 5250 } 5251 /* 5252 * send indication message 5253 */ 5254 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5255 putnext(peer_tep->te_rq, mp); 5256 } 5257 5258 5259 5260 /* 5261 * Check if a given addr is in use. 5262 * Endpoint ptr returned or NULL if not found. 5263 * The name space is separate for each mode. This implies that 5264 * sockets get their own name space. 5265 */ 5266 static tl_endpt_t * 5267 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5268 { 5269 tl_endpt_t *peer_tep = NULL; 5270 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5271 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5272 5273 ASSERT(! IS_SOCKET(tep)); 5274 5275 ASSERT(ap != NULL && ap->ta_alen > 0); 5276 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5277 ASSERT(ap->ta_abuf != NULL); 5278 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5279 ASSERT(IMPLY(rc == 0, 5280 (tep->te_zoneid == peer_tep->te_zoneid) && 5281 (tep->te_transport == peer_tep->te_transport))); 5282 5283 if ((rc == 0) && (peer_tep->te_closing)) { 5284 tl_refrele(peer_tep); 5285 peer_tep = NULL; 5286 } 5287 5288 return (peer_tep); 5289 } 5290 5291 /* 5292 * Find peer for a socket based on unix domain address. 5293 * For implicit addresses our peer can be found by minor number in ai hash. For 5294 * explicit binds we look vnode address at addr_hash. 5295 */ 5296 static tl_endpt_t * 5297 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5298 { 5299 tl_endpt_t *peer_tep = NULL; 5300 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5301 tep->te_aihash : tep->te_addrhash; 5302 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5303 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5304 5305 ASSERT(IS_SOCKET(tep)); 5306 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5307 ASSERT(IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport))); 5308 5309 if (peer_tep != NULL) { 5310 /* Don't attempt to use closing peer. */ 5311 if (peer_tep->te_closing) 5312 goto errout; 5313 5314 /* 5315 * Cross-zone unix sockets are permitted, but for Trusted 5316 * Extensions only, the "server" for these must be in the 5317 * global zone. 5318 */ 5319 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5320 is_system_labeled() && 5321 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5322 goto errout; 5323 } 5324 5325 return (peer_tep); 5326 5327 errout: 5328 tl_refrele(peer_tep); 5329 return (NULL); 5330 } 5331 5332 /* 5333 * Generate a free addr and return it in struct pointed by ap 5334 * but allocating space for address buffer. 5335 * The generated address will be at least 4 bytes long and, if req->ta_alen 5336 * exceeds 4 bytes, be req->ta_alen bytes long. 5337 * 5338 * If address is found it will be inserted in the hash. 5339 * 5340 * If req->ta_alen is larger than the default alen (4 bytes) the last 5341 * alen-4 bytes will always be the same as in req. 5342 * 5343 * Return 0 for failure. 5344 * Return non-zero for success. 5345 */ 5346 static boolean_t 5347 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5348 { 5349 t_scalar_t alen; 5350 uint32_t loopcnt; /* Limit loop to 2^32 */ 5351 5352 ASSERT(tep->te_hash_hndl != NULL); 5353 ASSERT(! IS_SOCKET(tep)); 5354 5355 if (tep->te_hash_hndl == NULL) 5356 return (B_FALSE); 5357 5358 /* 5359 * check if default addr is in use 5360 * if it is - bump it and try again 5361 */ 5362 if (req == NULL) { 5363 alen = sizeof (uint32_t); 5364 } else { 5365 alen = max(req->ta_alen, sizeof (uint32_t)); 5366 ASSERT(tep->te_zoneid == req->ta_zoneid); 5367 } 5368 5369 if (tep->te_alen < alen) { 5370 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5371 5372 /* 5373 * Not enough space in tep->ta_ap to hold the address, 5374 * allocate a bigger space. 5375 */ 5376 if (abuf == NULL) 5377 return (B_FALSE); 5378 5379 if (tep->te_alen > 0) 5380 kmem_free(tep->te_abuf, tep->te_alen); 5381 5382 tep->te_alen = alen; 5383 tep->te_abuf = abuf; 5384 } 5385 5386 /* Copy in the address in req */ 5387 if (req != NULL) { 5388 ASSERT(alen >= req->ta_alen); 5389 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5390 } 5391 5392 /* 5393 * First try minor number then try default addresses. 5394 */ 5395 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5396 5397 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5398 if (mod_hash_insert_reserve(tep->te_addrhash, 5399 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5400 tep->te_hash_hndl) == 0) { 5401 /* 5402 * found free address 5403 */ 5404 tep->te_flag |= TL_ADDRHASHED; 5405 tep->te_hash_hndl = NULL; 5406 5407 return (B_TRUE); /* successful return */ 5408 } 5409 /* 5410 * Use default address. 5411 */ 5412 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5413 atomic_add_32(&tep->te_defaddr, 1); 5414 } 5415 5416 /* 5417 * Failed to find anything. 5418 */ 5419 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5420 "tl_get_any_addr:looped 2^32 times")); 5421 return (B_FALSE); 5422 } 5423 5424 /* 5425 * reallocb + set r/w ptrs to reflect size. 5426 */ 5427 static mblk_t * 5428 tl_resizemp(mblk_t *mp, ssize_t new_size) 5429 { 5430 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5431 return (NULL); 5432 5433 mp->b_rptr = DB_BASE(mp); 5434 mp->b_wptr = mp->b_rptr + new_size; 5435 return (mp); 5436 } 5437 5438 static void 5439 tl_cl_backenable(tl_endpt_t *tep) 5440 { 5441 list_t *l = &tep->te_flowlist; 5442 tl_endpt_t *elp; 5443 5444 ASSERT(IS_CLTS(tep)); 5445 5446 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5447 ASSERT(tep->te_ser == elp->te_ser); 5448 ASSERT(elp->te_flowq == tep); 5449 if (! elp->te_closing) 5450 TL_QENABLE(elp); 5451 elp->te_flowq = NULL; 5452 list_remove(l, elp); 5453 } 5454 } 5455 5456 /* 5457 * Unconnect endpoints. 5458 */ 5459 static void 5460 tl_co_unconnect(tl_endpt_t *tep) 5461 { 5462 tl_endpt_t *peer_tep = tep->te_conp; 5463 tl_endpt_t *srv_tep = tep->te_oconp; 5464 list_t *l; 5465 tl_icon_t *tip; 5466 tl_endpt_t *cl_tep; 5467 mblk_t *d_mp; 5468 5469 ASSERT(IS_COTS(tep)); 5470 /* 5471 * If our peer is closing, don't use it. 5472 */ 5473 if ((peer_tep != NULL) && peer_tep->te_closing) { 5474 TL_UNCONNECT(tep->te_conp); 5475 peer_tep = NULL; 5476 } 5477 if ((srv_tep != NULL) && srv_tep->te_closing) { 5478 TL_UNCONNECT(tep->te_oconp); 5479 srv_tep = NULL; 5480 } 5481 5482 if (tep->te_nicon > 0) { 5483 l = &tep->te_iconp; 5484 /* 5485 * If incoming requests pending, change state 5486 * of clients on disconnect ind event and send 5487 * discon_ind pdu to modules above them 5488 * for server: all clients get disconnect 5489 */ 5490 5491 while (tep->te_nicon > 0) { 5492 tip = list_head(l); 5493 cl_tep = tip->ti_tep; 5494 5495 if (cl_tep == NULL) { 5496 tl_freetip(tep, tip); 5497 continue; 5498 } 5499 5500 if (cl_tep->te_oconp != NULL) { 5501 ASSERT(cl_tep != cl_tep->te_oconp); 5502 TL_UNCONNECT(cl_tep->te_oconp); 5503 } 5504 5505 if (cl_tep->te_closing) { 5506 tl_freetip(tep, tip); 5507 continue; 5508 } 5509 5510 enableok(cl_tep->te_wq); 5511 TL_QENABLE(cl_tep); 5512 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5513 if (d_mp != NULL) { 5514 cl_tep->te_state = TS_IDLE; 5515 putnext(cl_tep->te_rq, d_mp); 5516 } else { 5517 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5518 SL_TRACE|SL_ERROR, 5519 "tl_co_unconnect:icmng: " 5520 "allocb failure")); 5521 } 5522 tl_freetip(tep, tip); 5523 } 5524 } else if (srv_tep != NULL) { 5525 /* 5526 * If outgoing request pending, change state 5527 * of server on discon ind event 5528 */ 5529 5530 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5531 IS_COTSORD(srv_tep) && 5532 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5533 /* 5534 * Queue ordrel_ind for server to be picked up 5535 * when the connection is accepted. 5536 */ 5537 d_mp = tl_ordrel_ind_alloc(); 5538 } else { 5539 /* 5540 * send discon_ind to server 5541 */ 5542 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5543 } 5544 if (d_mp == NULL) { 5545 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5546 SL_TRACE|SL_ERROR, 5547 "tl_co_unconnect:outgoing:allocb failure")); 5548 TL_UNCONNECT(tep->te_oconp); 5549 goto discon_peer; 5550 } 5551 5552 /* 5553 * If this is a socket the T_DISCON_IND is queued with 5554 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5555 * from the list of pending connections. 5556 * Note that when te_oconp is set the peer better have 5557 * a t_connind_t for the client. 5558 */ 5559 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5560 /* 5561 * Queue the disconnection message. 5562 */ 5563 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5564 } else { 5565 tip = tl_icon_find(srv_tep, tep->te_seqno); 5566 if (tip == NULL) { 5567 freemsg(d_mp); 5568 } else { 5569 ASSERT(tep == tip->ti_tep); 5570 ASSERT(tep->te_ser == srv_tep->te_ser); 5571 /* 5572 * Delete tip from the server list. 5573 */ 5574 if (srv_tep->te_nicon == 1) { 5575 srv_tep->te_state = 5576 NEXTSTATE(TE_DISCON_IND2, 5577 srv_tep->te_state); 5578 } else { 5579 srv_tep->te_state = 5580 NEXTSTATE(TE_DISCON_IND3, 5581 srv_tep->te_state); 5582 } 5583 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5584 T_DISCON_IND); 5585 putnext(srv_tep->te_rq, d_mp); 5586 tl_freetip(srv_tep, tip); 5587 } 5588 TL_UNCONNECT(tep->te_oconp); 5589 srv_tep = NULL; 5590 } 5591 } else if (peer_tep != NULL) { 5592 /* 5593 * unconnect existing connection 5594 * If connected, change state of peer on 5595 * discon ind event and send discon ind pdu 5596 * to module above it 5597 */ 5598 5599 ASSERT(tep->te_ser == peer_tep->te_ser); 5600 if (IS_COTSORD(peer_tep) && 5601 (peer_tep->te_state == TS_WIND_ORDREL || 5602 peer_tep->te_state == TS_DATA_XFER)) { 5603 /* 5604 * send ordrel ind 5605 */ 5606 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5607 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5608 peer_tep->te_state, 5609 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5610 d_mp = tl_ordrel_ind_alloc(); 5611 if (! d_mp) { 5612 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5613 SL_TRACE|SL_ERROR, 5614 "tl_co_unconnect:connected:" 5615 "allocb failure")); 5616 /* 5617 * Continue with cleaning up peer as 5618 * this side may go away with the close 5619 */ 5620 TL_QENABLE(peer_tep); 5621 goto discon_peer; 5622 } 5623 peer_tep->te_state = 5624 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5625 5626 putnext(peer_tep->te_rq, d_mp); 5627 /* 5628 * Handle flow control case. This will generate 5629 * a t_discon_ind message with reason 0 if there 5630 * is data queued on the write side. 5631 */ 5632 TL_QENABLE(peer_tep); 5633 } else if (IS_COTSORD(peer_tep) && 5634 peer_tep->te_state == TS_WREQ_ORDREL) { 5635 /* 5636 * Sent an ordrel_ind. We send a discon with 5637 * with error 0 to inform that the peer is gone. 5638 */ 5639 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5640 SL_TRACE|SL_ERROR, 5641 "tl_co_unconnect: discon in state %d", 5642 tep->te_state)); 5643 tl_discon_ind(peer_tep, 0); 5644 } else { 5645 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5646 SL_TRACE|SL_ERROR, 5647 "tl_co_unconnect: state %d", tep->te_state)); 5648 tl_discon_ind(peer_tep, ECONNRESET); 5649 } 5650 5651 discon_peer: 5652 /* 5653 * Disconnect cross-pointers only for close 5654 */ 5655 if (tep->te_closing) { 5656 peer_tep = tep->te_conp; 5657 TL_REMOVE_PEER(peer_tep->te_conp); 5658 TL_REMOVE_PEER(tep->te_conp); 5659 } 5660 } 5661 } 5662 5663 /* 5664 * Note: The following routine does not recover from allocb() 5665 * failures 5666 * The reason should be from the <sys/errno.h> space. 5667 */ 5668 static void 5669 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5670 { 5671 mblk_t *d_mp; 5672 5673 if (tep->te_closing) 5674 return; 5675 5676 /* 5677 * flush the queues. 5678 */ 5679 flushq(tep->te_rq, FLUSHDATA); 5680 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5681 5682 /* 5683 * send discon ind 5684 */ 5685 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5686 if (! d_mp) { 5687 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5688 "tl_discon_ind:allocb failure")); 5689 return; 5690 } 5691 tep->te_state = TS_IDLE; 5692 putnext(tep->te_rq, d_mp); 5693 } 5694 5695 /* 5696 * Note: The following routine does not recover from allocb() 5697 * failures 5698 * The reason should be from the <sys/errno.h> space. 5699 */ 5700 static mblk_t * 5701 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5702 { 5703 mblk_t *mp; 5704 struct T_discon_ind *tdi; 5705 5706 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5707 DB_TYPE(mp) = M_PROTO; 5708 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5709 tdi = (struct T_discon_ind *)mp->b_rptr; 5710 tdi->PRIM_type = T_DISCON_IND; 5711 tdi->DISCON_reason = reason; 5712 tdi->SEQ_number = seqnum; 5713 } 5714 return (mp); 5715 } 5716 5717 5718 /* 5719 * Note: The following routine does not recover from allocb() 5720 * failures 5721 */ 5722 static mblk_t * 5723 tl_ordrel_ind_alloc(void) 5724 { 5725 mblk_t *mp; 5726 struct T_ordrel_ind *toi; 5727 5728 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5729 DB_TYPE(mp) = M_PROTO; 5730 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5731 toi = (struct T_ordrel_ind *)mp->b_rptr; 5732 toi->PRIM_type = T_ORDREL_IND; 5733 } 5734 return (mp); 5735 } 5736 5737 5738 /* 5739 * Lookup the seqno in the list of queued connections. 5740 */ 5741 static tl_icon_t * 5742 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5743 { 5744 list_t *l = &tep->te_iconp; 5745 tl_icon_t *tip = list_head(l); 5746 5747 ASSERT(seqno != 0); 5748 5749 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5750 ; 5751 5752 return (tip); 5753 } 5754 5755 /* 5756 * Queue data for a given T_CONN_IND while verifying that redundant 5757 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5758 * Used when the originator of the connection closes. 5759 */ 5760 static void 5761 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5762 { 5763 tl_icon_t *tip; 5764 mblk_t **mpp, *mp; 5765 int prim, nprim; 5766 5767 if (nmp->b_datap->db_type == M_PROTO) 5768 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5769 else 5770 nprim = -1; /* M_DATA */ 5771 5772 tip = tl_icon_find(tep, seqno); 5773 if (tip == NULL) { 5774 freemsg(nmp); 5775 return; 5776 } 5777 5778 ASSERT(tip->ti_seqno != 0); 5779 mpp = &tip->ti_mp; 5780 while (*mpp != NULL) { 5781 mp = *mpp; 5782 5783 if (mp->b_datap->db_type == M_PROTO) 5784 prim = ((union T_primitives *)mp->b_rptr)->type; 5785 else 5786 prim = -1; /* M_DATA */ 5787 5788 /* 5789 * Allow nothing after a T_DISCON_IND 5790 */ 5791 if (prim == T_DISCON_IND) { 5792 freemsg(nmp); 5793 return; 5794 } 5795 /* 5796 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5797 */ 5798 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5799 freemsg(nmp); 5800 return; 5801 } 5802 mpp = &(mp->b_next); 5803 } 5804 *mpp = nmp; 5805 } 5806 5807 /* 5808 * Verify if a certain TPI primitive exists on the connind queue. 5809 * Use prim -1 for M_DATA. 5810 * Return non-zero if found. 5811 */ 5812 static boolean_t 5813 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5814 { 5815 tl_icon_t *tip = tl_icon_find(tep, seqno); 5816 boolean_t found = B_FALSE; 5817 5818 if (tip != NULL) { 5819 mblk_t *mp; 5820 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5821 found = (DB_TYPE(mp) == M_PROTO && 5822 ((union T_primitives *)mp->b_rptr)->type == prim); 5823 } 5824 } 5825 return (found); 5826 } 5827 5828 /* 5829 * Send the b_next mblk chain that has accumulated before the connection 5830 * was accepted. Perform the necessary state transitions. 5831 */ 5832 static void 5833 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5834 { 5835 mblk_t *mp; 5836 union T_primitives *primp; 5837 5838 if (tep->te_closing) { 5839 tl_icon_freemsgs(mpp); 5840 return; 5841 } 5842 5843 ASSERT(tep->te_state == TS_DATA_XFER); 5844 ASSERT(tep->te_rq->q_first == NULL); 5845 5846 while ((mp = *mpp) != NULL) { 5847 *mpp = mp->b_next; 5848 mp->b_next = NULL; 5849 5850 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5851 switch (DB_TYPE(mp)) { 5852 default: 5853 freemsg(mp); 5854 break; 5855 case M_DATA: 5856 putnext(tep->te_rq, mp); 5857 break; 5858 case M_PROTO: 5859 primp = (union T_primitives *)mp->b_rptr; 5860 switch (primp->type) { 5861 case T_UNITDATA_IND: 5862 case T_DATA_IND: 5863 case T_OPTDATA_IND: 5864 case T_EXDATA_IND: 5865 putnext(tep->te_rq, mp); 5866 break; 5867 case T_ORDREL_IND: 5868 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5869 tep->te_state); 5870 putnext(tep->te_rq, mp); 5871 break; 5872 case T_DISCON_IND: 5873 tep->te_state = TS_IDLE; 5874 putnext(tep->te_rq, mp); 5875 break; 5876 default: 5877 #ifdef DEBUG 5878 cmn_err(CE_PANIC, 5879 "tl_icon_sendmsgs: unknown primitive"); 5880 #endif /* DEBUG */ 5881 freemsg(mp); 5882 break; 5883 } 5884 break; 5885 } 5886 } 5887 } 5888 5889 /* 5890 * Free the b_next mblk chain that has accumulated before the connection 5891 * was accepted. 5892 */ 5893 static void 5894 tl_icon_freemsgs(mblk_t **mpp) 5895 { 5896 mblk_t *mp; 5897 5898 while ((mp = *mpp) != NULL) { 5899 *mpp = mp->b_next; 5900 mp->b_next = NULL; 5901 freemsg(mp); 5902 } 5903 } 5904 5905 /* 5906 * Send M_ERROR 5907 * Note: assumes caller ensured enough space in mp or enough 5908 * memory available. Does not attempt recovery from allocb() 5909 * failures 5910 */ 5911 5912 static void 5913 tl_merror(queue_t *wq, mblk_t *mp, int error) 5914 { 5915 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5916 5917 if (tep->te_closing) { 5918 freemsg(mp); 5919 return; 5920 } 5921 5922 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5923 SL_TRACE|SL_ERROR, 5924 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5925 5926 /* 5927 * flush all messages on queue. we are shutting 5928 * the stream down on fatal error 5929 */ 5930 flushq(wq, FLUSHALL); 5931 if (IS_COTS(tep)) { 5932 /* connection oriented - unconnect endpoints */ 5933 tl_co_unconnect(tep); 5934 } 5935 if (mp->b_cont) { 5936 freemsg(mp->b_cont); 5937 mp->b_cont = NULL; 5938 } 5939 5940 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5941 freemsg(mp); 5942 mp = allocb(1, BPRI_HI); 5943 if (!mp) { 5944 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5945 SL_TRACE|SL_ERROR, 5946 "tl_merror:M_PROTO: out of memory")); 5947 return; 5948 } 5949 } 5950 if (mp) { 5951 DB_TYPE(mp) = M_ERROR; 5952 mp->b_rptr = DB_BASE(mp); 5953 *mp->b_rptr = (char)error; 5954 mp->b_wptr = mp->b_rptr + sizeof (char); 5955 qreply(wq, mp); 5956 } else { 5957 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5958 } 5959 } 5960 5961 static void 5962 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5963 { 5964 ASSERT(cr != NULL); 5965 5966 if (flag & TL_SETCRED) { 5967 struct opthdr *opt = (struct opthdr *)buf; 5968 tl_credopt_t *tlcred; 5969 5970 opt->level = TL_PROT_LEVEL; 5971 opt->name = TL_OPT_PEER_CRED; 5972 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5973 5974 tlcred = (tl_credopt_t *)(opt + 1); 5975 tlcred->tc_uid = crgetuid(cr); 5976 tlcred->tc_gid = crgetgid(cr); 5977 tlcred->tc_ruid = crgetruid(cr); 5978 tlcred->tc_rgid = crgetrgid(cr); 5979 tlcred->tc_suid = crgetsuid(cr); 5980 tlcred->tc_sgid = crgetsgid(cr); 5981 tlcred->tc_ngroups = crgetngroups(cr); 5982 } else if (flag & TL_SETUCRED) { 5983 struct opthdr *opt = (struct opthdr *)buf; 5984 5985 opt->level = TL_PROT_LEVEL; 5986 opt->name = TL_OPT_PEER_UCRED; 5987 opt->len = (t_uscalar_t)OPTLEN(ucredsize); 5988 5989 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 5990 } else { 5991 struct T_opthdr *topt = (struct T_opthdr *)buf; 5992 ASSERT(flag & TL_SOCKUCRED); 5993 5994 topt->level = SOL_SOCKET; 5995 topt->name = SCM_UCRED; 5996 topt->len = ucredsize + sizeof (*topt); 5997 topt->status = 0; 5998 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 5999 } 6000 } 6001 6002 /* ARGSUSED */ 6003 static int 6004 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6005 { 6006 /* no default value processed in protocol specific code currently */ 6007 return (-1); 6008 } 6009 6010 /* ARGSUSED */ 6011 static int 6012 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6013 { 6014 int len; 6015 tl_endpt_t *tep; 6016 int *valp; 6017 6018 tep = (tl_endpt_t *)wq->q_ptr; 6019 6020 len = 0; 6021 6022 /* 6023 * Assumes: option level and name sanity check done elsewhere 6024 */ 6025 6026 switch (level) { 6027 case SOL_SOCKET: 6028 if (! IS_SOCKET(tep)) 6029 break; 6030 switch (name) { 6031 case SO_RECVUCRED: 6032 len = sizeof (int); 6033 valp = (int *)ptr; 6034 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6035 break; 6036 default: 6037 break; 6038 } 6039 break; 6040 case TL_PROT_LEVEL: 6041 switch (name) { 6042 case TL_OPT_PEER_CRED: 6043 case TL_OPT_PEER_UCRED: 6044 /* 6045 * option not supposed to retrieved directly 6046 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6047 * when some internal flags set by other options 6048 * Direct retrieval always designed to fail(ignored) 6049 * for this option. 6050 */ 6051 break; 6052 } 6053 } 6054 return (len); 6055 } 6056 6057 /* ARGSUSED */ 6058 static int 6059 tl_set_opt( 6060 queue_t *wq, 6061 uint_t mgmt_flags, 6062 int level, 6063 int name, 6064 uint_t inlen, 6065 uchar_t *invalp, 6066 uint_t *outlenp, 6067 uchar_t *outvalp, 6068 void *thisdg_attrs, 6069 cred_t *cr, 6070 mblk_t *mblk) 6071 { 6072 int error; 6073 tl_endpt_t *tep; 6074 6075 tep = (tl_endpt_t *)wq->q_ptr; 6076 6077 error = 0; /* NOERROR */ 6078 6079 /* 6080 * Assumes: option level and name sanity checks done elsewhere 6081 */ 6082 6083 switch (level) { 6084 case SOL_SOCKET: 6085 if (! IS_SOCKET(tep)) { 6086 error = EINVAL; 6087 break; 6088 } 6089 /* 6090 * TBD: fill in other AF_UNIX socket options and then stop 6091 * returning error. 6092 */ 6093 switch (name) { 6094 case SO_RECVUCRED: 6095 /* 6096 * We only support this for datagram sockets; 6097 * getpeerucred handles the connection oriented 6098 * transports. 6099 */ 6100 if (! IS_CLTS(tep)) { 6101 error = EINVAL; 6102 break; 6103 } 6104 if (*(int *)invalp == 0) 6105 tep->te_flag &= ~TL_SOCKUCRED; 6106 else 6107 tep->te_flag |= TL_SOCKUCRED; 6108 break; 6109 default: 6110 error = EINVAL; 6111 break; 6112 } 6113 break; 6114 case TL_PROT_LEVEL: 6115 switch (name) { 6116 case TL_OPT_PEER_CRED: 6117 case TL_OPT_PEER_UCRED: 6118 /* 6119 * option not supposed to be set directly 6120 * Its value in initialized for each endpoint at 6121 * driver open time. 6122 * Direct setting always designed to fail for this 6123 * option. 6124 */ 6125 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6126 SL_TRACE|SL_ERROR, 6127 "tl_set_opt: option is not supported")); 6128 error = EPROTO; 6129 break; 6130 } 6131 } 6132 return (error); 6133 } 6134 6135 6136 static void 6137 tl_timer(void *arg) 6138 { 6139 queue_t *wq = arg; 6140 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6141 6142 ASSERT(tep); 6143 6144 tep->te_timoutid = 0; 6145 6146 enableok(wq); 6147 /* 6148 * Note: can call wsrv directly here and save context switch 6149 * Consider change when qtimeout (not timeout) is active 6150 */ 6151 qenable(wq); 6152 } 6153 6154 static void 6155 tl_buffer(void *arg) 6156 { 6157 queue_t *wq = arg; 6158 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6159 6160 ASSERT(tep); 6161 6162 tep->te_bufcid = 0; 6163 tep->te_nowsrv = B_FALSE; 6164 6165 enableok(wq); 6166 /* 6167 * Note: can call wsrv directly here and save context switch 6168 * Consider change when qbufcall (not bufcall) is active 6169 */ 6170 qenable(wq); 6171 } 6172 6173 static void 6174 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6175 { 6176 tl_endpt_t *tep; 6177 6178 tep = (tl_endpt_t *)wq->q_ptr; 6179 6180 if (tep->te_closing) { 6181 freemsg(mp); 6182 return; 6183 } 6184 noenable(wq); 6185 6186 (void) insq(wq, wq->q_first, mp); 6187 6188 if (tep->te_bufcid || tep->te_timoutid) { 6189 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6190 "tl_memrecover:recover %p pending", (void *)wq)); 6191 return; 6192 } 6193 6194 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6195 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6196 drv_usectohz(TL_BUFWAIT)); 6197 } 6198 } 6199 6200 static void 6201 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6202 { 6203 ASSERT(tip->ti_seqno != 0); 6204 6205 if (tip->ti_mp != NULL) { 6206 tl_icon_freemsgs(&tip->ti_mp); 6207 tip->ti_mp = NULL; 6208 } 6209 if (tip->ti_tep != NULL) { 6210 tl_refrele(tip->ti_tep); 6211 tip->ti_tep = NULL; 6212 } 6213 list_remove(&tep->te_iconp, tip); 6214 kmem_free(tip, sizeof (tl_icon_t)); 6215 tep->te_nicon--; 6216 } 6217 6218 /* 6219 * Remove address from address hash. 6220 */ 6221 static void 6222 tl_addr_unbind(tl_endpt_t *tep) 6223 { 6224 tl_endpt_t *elp; 6225 6226 if (tep->te_flag & TL_ADDRHASHED) { 6227 if (IS_SOCKET(tep)) { 6228 (void) mod_hash_remove(tep->te_addrhash, 6229 (mod_hash_key_t)tep->te_vp, 6230 (mod_hash_val_t *)&elp); 6231 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6232 tep->te_magic = SOU_MAGIC_IMPLICIT; 6233 } else { 6234 (void) mod_hash_remove(tep->te_addrhash, 6235 (mod_hash_key_t)&tep->te_ap, 6236 (mod_hash_val_t *)&elp); 6237 (void) kmem_free(tep->te_abuf, tep->te_alen); 6238 tep->te_alen = -1; 6239 tep->te_abuf = NULL; 6240 } 6241 tep->te_flag &= ~TL_ADDRHASHED; 6242 } 6243 } 6244