1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multithreaded STREAMS Local Transport Provider. 28 * 29 * OVERVIEW 30 * ======== 31 * 32 * This driver provides TLI as well as socket semantics. It provides 33 * connectionless, connection oriented, and connection oriented with orderly 34 * release transports for TLI and sockets. Each transport type has separate name 35 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 36 * this removes any name space conflicts when binding to socket style transport 37 * addresses. 38 * 39 * NOTE: There is one exception: Socket ticots and ticotsord transports share 40 * the same namespace. In fact, sockets always use ticotsord type transport. 41 * 42 * The driver mode is specified during open() by the minor number used for 43 * open. 44 * 45 * The sockets in addition have the following semantic differences: 46 * No support for passing up credentials (TL_SET[U]CRED). 47 * 48 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 49 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 50 * T_OPTDATA_IND. 51 * 52 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 53 * a T_CONN_RES is received from the acceptor. This means that a socket 54 * connect will complete before the peer has called accept. 55 * 56 * 57 * MULTITHREADING 58 * ============== 59 * 60 * The driver does not use STREAMS protection mechanisms. Instead it uses a 61 * generic "serializer" abstraction. Most of the operations are executed behind 62 * the serializer and are, essentially single-threaded. All functions executed 63 * behind the same serializer are strictly serialized. So if one thread calls 64 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 65 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 66 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 67 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 68 * same time. 69 * 70 * Connectionless transport use a single serializer per transport type (one for 71 * TLI and one for sockets. Connection-oriented transports use finer-grained 72 * serializers. 73 * 74 * All COTS-type endpoints start their life with private serializers. During 75 * connection request processing the endpoint serializer is switched to the 76 * listener's serializer and the rest of T_CONN_REQ processing is done on the 77 * listener serializer. During T_CONN_RES processing the eager serializer is 78 * switched from listener to acceptor serializer and after that point all 79 * processing for eager and acceptor happens on this serializer. To avoid races 80 * with endpoint closes while its serializer may be changing closes are blocked 81 * while serializers are manipulated. 82 * 83 * References accounting 84 * --------------------- 85 * 86 * Endpoints are reference counted and freed when the last reference is 87 * dropped. Functions within the serializer may access an endpoint state even 88 * after an endpoint closed. The te_closing being set on the endpoint indicates 89 * that the endpoint entered its close routine. 90 * 91 * One reference is held for each opened endpoint instance. The reference 92 * counter is incremented when the endpoint is linked to another endpoint and 93 * decremented when the link disappears. It is also incremented when the 94 * endpoint is found by the hash table lookup. This increment is atomic with the 95 * lookup itself and happens while the hash table read lock is held. 96 * 97 * Close synchronization 98 * --------------------- 99 * 100 * During close the endpoint as marked as closing using te_closing flag. It is 101 * usually enough to check for te_closing flag since all other state changes 102 * happen after this flag is set and the close entered serializer. Immediately 103 * after setting te_closing flag tl_close() enters serializer and waits until 104 * the callback finishes. This allows all functions called within serializer to 105 * simply check te_closing without any locks. 106 * 107 * Serializer management. 108 * --------------------- 109 * 110 * For COTS transports serializers are created when the endpoint is constructed 111 * and destroyed when the endpoint is destructed. CLTS transports use global 112 * serializers - one for sockets and one for TLI. 113 * 114 * COTS serializers have separate reference counts to deal with several 115 * endpoints sharing the same serializer. There is a subtle problem related to 116 * the serializer destruction. The serializer should never be destroyed by any 117 * function executed inside serializer. This means that close has to wait till 118 * all serializer activity for this endpoint is finished before it can drop the 119 * last reference on the endpoint (which may as well free the serializer). This 120 * is only relevant for COTS transports which manage serializers 121 * dynamically. For CLTS transports close may complete without waiting for all 122 * serializer activity to finish since serializer is only destroyed at driver 123 * detach time. 124 * 125 * COTS endpoints keep track of the number of outstanding requests on the 126 * serializer for the endpoint. The code handling accept() avoids changing 127 * client serializer if it has any pending messages on the serializer and 128 * instead moves acceptor to listener's serializer. 129 * 130 * 131 * Use of hash tables 132 * ------------------ 133 * 134 * The driver uses modhash hash table implementation. Each transport uses two 135 * hash tables - one for finding endpoints by acceptor ID and another one for 136 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 137 * pair of hash tables since sockets only use TICOTSORD. 138 * 139 * All hash tables lookups increment a reference count for returned endpoints, 140 * so we may safely check the endpoint state even when the endpoint is removed 141 * from the hash by another thread immediately after it is found. 142 * 143 * 144 * CLOSE processing 145 * ================ 146 * 147 * The driver enters serializer twice on close(). The close sequence is the 148 * following: 149 * 150 * 1) Wait until closing is safe (te_closewait becomes zero) 151 * This step is needed to prevent close during serializer switches. In most 152 * cases (close happening after connection establishment) te_closewait is 153 * zero. 154 * 1) Set te_closing. 155 * 2) Call tl_close_ser() within serializer and wait for it to complete. 156 * 157 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 158 * It also needs to clear write-side q_next pointers - this should be done 159 * before qprocsoff(). 160 * 161 * This synchronous serializer entry during close is needed to ensure that 162 * the queue is valid everywhere inside the serializer. 163 * 164 * Note that in many cases close will execute tl_close_ser() synchronously, 165 * so it will not wait at all. 166 * 167 * 3) Calls qprocsoff(). 168 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 169 * complete (for COTS transports). For CLTS transport there is no wait. 170 * 171 * tl_close_finish_ser() Finishes the close process and wakes up waiting 172 * close if there is any. 173 * 174 * Note that in most cases close will enter te_close_ser_finish() 175 * synchronously and will not wait at all. 176 * 177 * 178 * Flow Control 179 * ============ 180 * 181 * The driver implements both read and write side service routines. No one calls 182 * putq() on the read queue. The read side service routine tl_rsrv() is called 183 * when the read side stream is back-enabled. It enters serializer synchronously 184 * (waits till serializer processing is complete). Within serializer it 185 * back-enables all endpoints blocked by the queue for connection-less 186 * transports and enables write side service processing for the peer for 187 * connection-oriented transports. 188 * 189 * Read and write side service routines use special mblk_sized space in the 190 * endpoint structure to enter perimeter. 191 * 192 * Write-side flow control 193 * ----------------------- 194 * 195 * Write side flow control is a bit tricky. The driver needs to deal with two 196 * message queues - the explicit STREAMS message queue maintained by 197 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 198 * queues should be synchronized to preserve message ordering and should 199 * maintain a single order determined by the order in which messages enter 200 * tl_wput(). In order to maintain the ordering between these two queues the 201 * STREAMS queue is only manipulated within the serializer, so the ordering is 202 * provided by the serializer. 203 * 204 * Functions called from the tl_wsrv() sometimes may call putbq(). To 205 * immediately stop any further processing of the STREAMS message queues the 206 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 207 * side service processing stops when the flag is set. 208 * 209 * The tl_wsrv() function enters serializer synchronously and waits for it to 210 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 211 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 212 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 213 * always bounded by the amount of messages on the STREAMS queue at the time 214 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 215 * queue from another serialized entry which can't happen in parallel. This 216 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 217 * of it draining forever while writer places new messages on the STREAMS 218 * queue). 219 * 220 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 221 * 222 * 223 * Unix Domain Sockets 224 * =================== 225 * 226 * The driver knows the structure of Unix Domain sockets addresses and treats 227 * them differently from generic TLI addresses. For sockets implicit binds are 228 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 229 * instead of using address length of zero. Explicit binds specify 230 * SOU_MAGIC_EXPLICIT as magic. 231 * 232 * For implicit binds we always use minor number as soua_vp part of the address 233 * and avoid any hash table lookups. This saves two hash tables lookups per 234 * anonymous bind. 235 * 236 * For explicit address we hash the vnode pointer instead of hashing the 237 * full-scale address+zone+length. Hashing by pointer is more efficient then 238 * hashing by the full address. 239 * 240 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 241 * tep structure, so it should be never freed. 242 * 243 * Also for sockets the driver always uses minor number as acceptor id. 244 * 245 * TPI VIOLATIONS 246 * -------------- 247 * 248 * This driver violates TPI in several respects for Unix Domain Sockets: 249 * 250 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 251 * is requested and the endpoint is already in use. There is no point in 252 * generating an unused address since this address will be rejected by 253 * sockfs anyway. For implicit binds it always generates a new address 254 * (sets soua_vp to its minor number). 255 * 256 * 2) It always uses minor number as acceptor ID and never uses queue 257 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 258 * message and they do not use the queue pointer. 259 * 260 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 261 * followed by listen(). The listen() should be issued with non-zero 262 * backlog, so sotpi_listen() issues unbind request followed by bind 263 * request to the same address but with a non-zero qlen value. Both 264 * tl_bind() and tl_unbind() require write lock on the hash table to 265 * insert/remove the address. The driver does not remove the address from 266 * the hash for endpoints that are bound to the explicit address and have 267 * backlog of zero. During T_BIND_REQ processing if the address requested 268 * is equal to the address the endpoint already has it updates the backlog 269 * without reinserting the address in the hash table. This optimization 270 * avoids two hash table updates for each listener created. It always 271 * avoids the problem of a "stolen" address when another listener may use 272 * the same address between the unbind and bind and suddenly listen() fails 273 * because address is in use even though the bind() succeeded. 274 * 275 * 276 * CONNECTIONLESS TRANSPORTS 277 * ========================= 278 * 279 * Connectionless transports all share the same serializer (one for TLI and one 280 * for Sockets). Functions executing behind serializer can check or modify state 281 * of any endpoint. 282 * 283 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 284 * te_lastep field. The next time X talks to some address A it checks whether A 285 * is the same as Y's address and if it is there is no need to lookup Y. If the 286 * address is different or the state of Y is not appropriate (e.g. closed or not 287 * idle) X does a lookup using tl_find_peer() and caches the new address. 288 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 289 * on the endpoint found. 290 * 291 * During close of endpoint Y it doesn't try to remove itself from other 292 * endpoints caches. They will detect that Y is gone and will search the peer 293 * endpoint again. 294 * 295 * Flow Control Handling. 296 * ---------------------- 297 * 298 * Each connectionless endpoint keeps a list of endpoints which are 299 * flow-controlled by its queue. It also keeps a pointer to the queue which 300 * flow-controls itself. Whenever flow control releases for endpoint X it 301 * enables all queues from the list. During close it also back-enables everyone 302 * in the list. If X is flow-controlled when it is closing it removes it from 303 * the peers list. 304 * 305 * DATA STRUCTURES 306 * =============== 307 * 308 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 309 * endpoint state. For connection-oriented transports it has a keeps a list 310 * of pending connections (tl_icon_t). For connectionless transports it keeps a 311 * list of endpoints flow controlled by this one. 312 * 313 * Each transport type is represented by a per-transport data structure 314 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 315 * endpoint address hash tables for each transport. It also contains pointer to 316 * transport serializer for connectionless transports. 317 * 318 * Each endpoint keeps a link to its transport structure, so the code can find 319 * all per-transport information quickly. 320 */ 321 322 #include <sys/types.h> 323 #include <sys/inttypes.h> 324 #include <sys/stream.h> 325 #include <sys/stropts.h> 326 #define _SUN_TPI_VERSION 2 327 #include <sys/tihdr.h> 328 #include <sys/strlog.h> 329 #include <sys/debug.h> 330 #include <sys/cred.h> 331 #include <sys/errno.h> 332 #include <sys/kmem.h> 333 #include <sys/id_space.h> 334 #include <sys/modhash.h> 335 #include <sys/mkdev.h> 336 #include <sys/tl.h> 337 #include <sys/stat.h> 338 #include <sys/conf.h> 339 #include <sys/modctl.h> 340 #include <sys/strsun.h> 341 #include <sys/socket.h> 342 #include <sys/socketvar.h> 343 #include <sys/sysmacros.h> 344 #include <sys/xti_xtiopt.h> 345 #include <sys/ddi.h> 346 #include <sys/sunddi.h> 347 #include <sys/zone.h> 348 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 349 #include <inet/optcom.h> 350 #include <sys/strsubr.h> 351 #include <sys/ucred.h> 352 #include <sys/suntpi.h> 353 #include <sys/list.h> 354 #include <sys/serializer.h> 355 356 /* 357 * TBD List 358 * 14 Eliminate state changes through table 359 * 16. AF_UNIX socket options 360 * 17. connect() for ticlts 361 * 18. support for "netstat" to show AF_UNIX plus TLI local 362 * transport connections 363 * 21. sanity check to flushing on sending M_ERROR 364 */ 365 366 /* 367 * CONSTANT DECLARATIONS 368 * -------------------- 369 */ 370 371 /* 372 * Local declarations 373 */ 374 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 375 376 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 377 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 378 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 379 /* 380 * Hash tables size. 381 */ 382 #define TL_HASH_SIZE 311 383 384 /* 385 * Definitions for module_info 386 */ 387 #define TL_ID (104) /* module ID number */ 388 #define TL_NAME "tl" /* module name */ 389 #define TL_MINPSZ (0) /* min packet size */ 390 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 391 #define TL_HIWAT (16*1024) /* hi water mark */ 392 #define TL_LOWAT (256) /* lo water mark */ 393 /* 394 * Definition of minor numbers/modes for new transport provider modes. 395 * We view the socket use as a separate mode to get a separate name space. 396 */ 397 #define TL_TICOTS 0 /* connection oriented transport */ 398 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 399 #define TL_TICLTS 2 /* connectionless transport */ 400 #define TL_UNUSED 3 401 #define TL_SOCKET 4 /* Socket */ 402 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 403 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 404 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 405 406 #define TL_MINOR_MASK 0x7 407 #define TL_MINOR_START (TL_TICLTS + 1) 408 409 /* 410 * LOCAL MACROS 411 */ 412 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 413 414 /* 415 * EXTERNAL VARIABLE DECLARATIONS 416 * ----------------------------- 417 */ 418 /* 419 * state table defined in the OS space.c 420 */ 421 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 422 423 /* 424 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 425 */ 426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 427 static int tl_close(queue_t *, int, cred_t *); 428 static void tl_wput(queue_t *, mblk_t *); 429 static void tl_wsrv(queue_t *); 430 static void tl_rsrv(queue_t *); 431 432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 435 436 437 /* 438 * GLOBAL DATA STRUCTURES AND VARIABLES 439 * ----------------------------------- 440 */ 441 442 /* 443 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 444 * For now, we only manage the SO_RECVUCRED option but we also have 445 * harmless dummy options to make things work with some common code we access. 446 */ 447 opdes_t tl_opt_arr[] = { 448 /* The SO_TYPE is needed for the hack below */ 449 { 450 SO_TYPE, 451 SOL_SOCKET, 452 OA_R, 453 OA_R, 454 OP_NP, 455 OP_PASSNEXT, 456 sizeof (t_scalar_t), 457 0 458 }, 459 { 460 SO_RECVUCRED, 461 SOL_SOCKET, 462 OA_RW, 463 OA_RW, 464 OP_NP, 465 OP_PASSNEXT, 466 sizeof (int), 467 0 468 } 469 }; 470 471 /* 472 * Table of all supported levels 473 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 474 * any supported options so we need this info separately. 475 * 476 * This is needed only for topmost tpi providers. 477 */ 478 optlevel_t tl_valid_levels_arr[] = { 479 XTI_GENERIC, 480 SOL_SOCKET, 481 TL_PROT_LEVEL 482 }; 483 484 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 485 /* 486 * Current upper bound on the amount of space needed to return all options. 487 * Additional options with data size of sizeof(long) are handled automatically. 488 * Others need hand job. 489 */ 490 #define TL_MAX_OPT_BUF_LEN \ 491 ((A_CNT(tl_opt_arr) << 2) + \ 492 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 493 + 64 + sizeof (struct T_optmgmt_ack)) 494 495 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 496 497 /* 498 * transport addr structure 499 */ 500 typedef struct tl_addr { 501 zoneid_t ta_zoneid; /* Zone scope of address */ 502 t_scalar_t ta_alen; /* length of abuf */ 503 void *ta_abuf; /* the addr itself */ 504 } tl_addr_t; 505 506 /* 507 * Refcounted version of serializer. 508 */ 509 typedef struct tl_serializer { 510 uint_t ts_refcnt; 511 serializer_t *ts_serializer; 512 } tl_serializer_t; 513 514 /* 515 * Each transport type has a separate state. 516 * Per-transport state. 517 */ 518 typedef struct tl_transport_state { 519 char *tr_name; 520 minor_t tr_minor; 521 uint32_t tr_defaddr; 522 mod_hash_t *tr_ai_hash; 523 mod_hash_t *tr_addr_hash; 524 tl_serializer_t *tr_serializer; 525 } tl_transport_state_t; 526 527 #define TL_DFADDR 0x1000 528 529 static tl_transport_state_t tl_transports[] = { 530 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 531 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 532 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 533 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 534 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 535 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 536 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 537 }; 538 539 #define TL_MAXTRANSPORT A_CNT(tl_transports) 540 541 struct tl_endpt; 542 typedef struct tl_endpt tl_endpt_t; 543 544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 545 546 /* 547 * Data structure used to represent pending connects. 548 * Records enough information so that the connecting peer can close 549 * before the connection gets accepted. 550 */ 551 typedef struct tl_icon { 552 list_node_t ti_node; 553 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 554 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 555 t_scalar_t ti_seqno; /* Sequence number */ 556 } tl_icon_t; 557 558 typedef struct so_ux_addr soux_addr_t; 559 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 560 561 /* 562 * Maximum number of unaccepted connection indications allowed per listener. 563 */ 564 #define TL_MAXQLEN 4096 565 int tl_maxqlen = TL_MAXQLEN; 566 567 /* 568 * transport endpoint structure 569 */ 570 struct tl_endpt { 571 queue_t *te_rq; /* stream read queue */ 572 queue_t *te_wq; /* stream write queue */ 573 uint32_t te_refcnt; 574 int32_t te_state; /* TPI state of endpoint */ 575 minor_t te_minor; /* minor number */ 576 #define te_seqno te_minor 577 uint_t te_flag; /* flag field */ 578 boolean_t te_nowsrv; 579 tl_serializer_t *te_ser; /* Serializer to use */ 580 #define te_serializer te_ser->ts_serializer 581 582 soux_addr_t te_uxaddr; /* Socket address */ 583 #define te_magic te_uxaddr.soua_magic 584 #define te_vp te_uxaddr.soua_vp 585 tl_addr_t te_ap; /* addr bound to this endpt */ 586 #define te_zoneid te_ap.ta_zoneid 587 #define te_alen te_ap.ta_alen 588 #define te_abuf te_ap.ta_abuf 589 590 tl_transport_state_t *te_transport; 591 #define te_addrhash te_transport->tr_addr_hash 592 #define te_aihash te_transport->tr_ai_hash 593 #define te_defaddr te_transport->tr_defaddr 594 cred_t *te_credp; /* endpoint user credentials */ 595 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 596 597 /* 598 * State specific for connection-oriented and connectionless transports. 599 */ 600 union { 601 /* Connection-oriented state. */ 602 struct { 603 t_uscalar_t _te_nicon; /* count of conn requests */ 604 t_uscalar_t _te_qlen; /* max conn requests */ 605 tl_endpt_t *_te_oconp; /* conn request pending */ 606 tl_endpt_t *_te_conp; /* connected endpt */ 607 #ifndef _ILP32 608 void *_te_pad; 609 #endif 610 list_t _te_iconp; /* list of conn ind. pending */ 611 } _te_cots_state; 612 /* Connection-less state. */ 613 struct { 614 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 615 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 616 list_node_t _te_flows; /* lists of connections */ 617 list_t _te_flowlist; /* Who flowcontrols on me */ 618 } _te_clts_state; 619 } _te_transport_state; 620 #define te_nicon _te_transport_state._te_cots_state._te_nicon 621 #define te_qlen _te_transport_state._te_cots_state._te_qlen 622 #define te_oconp _te_transport_state._te_cots_state._te_oconp 623 #define te_conp _te_transport_state._te_cots_state._te_conp 624 #define te_iconp _te_transport_state._te_cots_state._te_iconp 625 #define te_lastep _te_transport_state._te_clts_state._te_lastep 626 #define te_flowq _te_transport_state._te_clts_state._te_flowq 627 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 628 #define te_flows _te_transport_state._te_clts_state._te_flows 629 630 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 631 timeout_id_t te_timoutid; /* outstanding timeout id */ 632 pid_t te_cpid; /* cached pid of endpoint */ 633 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 634 /* 635 * Pieces of the endpoint state needed for closing. 636 */ 637 kmutex_t te_closelock; 638 kcondvar_t te_closecv; 639 uint8_t te_closing; /* The endpoint started closing */ 640 uint8_t te_closewait; /* Wait in close until zero */ 641 mblk_t te_closemp; /* for entering serializer on close */ 642 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 643 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 644 kmutex_t te_srv_lock; 645 kcondvar_t te_srv_cv; 646 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 647 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 648 /* 649 * Pieces of the endpoint state needed for serializer transitions. 650 */ 651 kmutex_t te_ser_lock; /* Protects the count below */ 652 uint_t te_ser_count; /* Number of messages on serializer */ 653 }; 654 655 /* 656 * Flag values. Lower 4 bits specify that transport used. 657 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 658 * they allow to identify the endpoint more easily. 659 */ 660 #define TL_LISTENER 0x00010 /* the listener endpoint */ 661 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 662 #define TL_EAGER 0x00040 /* connecting endpoint */ 663 #define TL_ACCEPTED 0x00080 /* accepted connection */ 664 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 665 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 666 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 667 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 668 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 669 /* 670 * Boolean checks for the endpoint type. 671 */ 672 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 673 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 674 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 675 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 676 677 #define TLPID(mp, tep) (DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp)) 678 679 /* 680 * Certain operations are always used together. These macros reduce the chance 681 * of missing a part of a combination. 682 */ 683 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 684 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 685 686 #define TL_PUTBQ(x, mp) { \ 687 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 688 (x)->te_nowsrv = B_TRUE; \ 689 (void) putbq((x)->te_wq, mp); \ 690 } 691 692 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 693 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 694 695 /* 696 * STREAMS driver glue data structures. 697 */ 698 static struct module_info tl_minfo = { 699 TL_ID, /* mi_idnum */ 700 TL_NAME, /* mi_idname */ 701 TL_MINPSZ, /* mi_minpsz */ 702 TL_MAXPSZ, /* mi_maxpsz */ 703 TL_HIWAT, /* mi_hiwat */ 704 TL_LOWAT /* mi_lowat */ 705 }; 706 707 static struct qinit tl_rinit = { 708 NULL, /* qi_putp */ 709 (int (*)())tl_rsrv, /* qi_srvp */ 710 tl_open, /* qi_qopen */ 711 tl_close, /* qi_qclose */ 712 NULL, /* qi_qadmin */ 713 &tl_minfo, /* qi_minfo */ 714 NULL /* qi_mstat */ 715 }; 716 717 static struct qinit tl_winit = { 718 (int (*)())tl_wput, /* qi_putp */ 719 (int (*)())tl_wsrv, /* qi_srvp */ 720 NULL, /* qi_qopen */ 721 NULL, /* qi_qclose */ 722 NULL, /* qi_qadmin */ 723 &tl_minfo, /* qi_minfo */ 724 NULL /* qi_mstat */ 725 }; 726 727 static struct streamtab tlinfo = { 728 &tl_rinit, /* st_rdinit */ 729 &tl_winit, /* st_wrinit */ 730 NULL, /* st_muxrinit */ 731 NULL /* st_muxwrinit */ 732 }; 733 734 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 735 nulldev, tl_info, D_MP, &tlinfo); 736 737 static struct modldrv modldrv = { 738 &mod_driverops, /* Type of module -- pseudo driver here */ 739 "TPI Local Transport (tl)", 740 &tl_devops, /* driver ops */ 741 }; 742 743 /* 744 * Module linkage information for the kernel. 745 */ 746 static struct modlinkage modlinkage = { 747 MODREV_1, 748 &modldrv, 749 NULL 750 }; 751 752 /* 753 * Templates for response to info request 754 * Check sanity of unlimited connect data etc. 755 */ 756 757 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 758 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 759 760 static struct T_info_ack tl_cots_info_ack = 761 { 762 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 763 T_INFINITE, /* TSDU size */ 764 T_INFINITE, /* ETSDU size */ 765 T_INFINITE, /* CDATA_size */ 766 T_INFINITE, /* DDATA_size */ 767 T_INFINITE, /* ADDR_size */ 768 T_INFINITE, /* OPT_size */ 769 0, /* TIDU_size - fill at run time */ 770 T_COTS, /* SERV_type */ 771 -1, /* CURRENT_state */ 772 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 773 }; 774 775 static struct T_info_ack tl_clts_info_ack = 776 { 777 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 778 0, /* TSDU_size - fill at run time */ 779 -2, /* ETSDU_size -2 => not supported */ 780 -2, /* CDATA_size -2 => not supported */ 781 -2, /* DDATA_size -2 => not supported */ 782 -1, /* ADDR_size -1 => unlimited */ 783 -1, /* OPT_size */ 784 0, /* TIDU_size - fill at run time */ 785 T_CLTS, /* SERV_type */ 786 -1, /* CURRENT_state */ 787 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 788 }; 789 790 /* 791 * private copy of devinfo pointer used in tl_info 792 */ 793 static dev_info_t *tl_dip; 794 795 /* 796 * Endpoints cache. 797 */ 798 static kmem_cache_t *tl_cache; 799 /* 800 * Minor number space. 801 */ 802 static id_space_t *tl_minors; 803 804 /* 805 * Default Data Unit size. 806 */ 807 static t_scalar_t tl_tidusz; 808 809 /* 810 * Size of hash tables. 811 */ 812 static size_t tl_hash_size = TL_HASH_SIZE; 813 814 /* 815 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 816 * for sockets. 817 */ 818 static int tl_disable_early_connect = 0; 819 static int tl_client_closing_when_accepting; 820 821 static int tl_serializer_noswitch; 822 823 /* 824 * LOCAL FUNCTION PROTOTYPES 825 * ------------------------- 826 */ 827 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 828 static void tl_do_proto(mblk_t *, tl_endpt_t *); 829 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 830 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 831 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 832 t_scalar_t); 833 static void tl_bind(mblk_t *, tl_endpt_t *); 834 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 835 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 836 static void tl_unbind(mblk_t *, tl_endpt_t *); 837 static void tl_optmgmt(queue_t *, mblk_t *); 838 static void tl_conn_req(queue_t *, mblk_t *); 839 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 840 static void tl_conn_res(mblk_t *, tl_endpt_t *); 841 static void tl_discon_req(mblk_t *, tl_endpt_t *); 842 static void tl_capability_req(mblk_t *, tl_endpt_t *); 843 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 844 static void tl_info_req(mblk_t *, tl_endpt_t *); 845 static void tl_addr_req(mblk_t *, tl_endpt_t *); 846 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 847 static void tl_data(mblk_t *, tl_endpt_t *); 848 static void tl_exdata(mblk_t *, tl_endpt_t *); 849 static void tl_ordrel(mblk_t *, tl_endpt_t *); 850 static void tl_unitdata(mblk_t *, tl_endpt_t *); 851 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 852 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 853 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 854 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 855 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 856 static void tl_cl_backenable(tl_endpt_t *); 857 static void tl_co_unconnect(tl_endpt_t *); 858 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 859 static void tl_discon_ind(tl_endpt_t *, uint32_t); 860 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 861 static mblk_t *tl_ordrel_ind_alloc(void); 862 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 863 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 864 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 865 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 866 static void tl_icon_freemsgs(mblk_t **); 867 static void tl_merror(queue_t *, mblk_t *, int); 868 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 869 static int tl_default_opt(queue_t *, int, int, uchar_t *); 870 static int tl_get_opt(queue_t *, int, int, uchar_t *); 871 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 872 uchar_t *, void *, cred_t *, mblk_t *); 873 static void tl_memrecover(queue_t *, mblk_t *, size_t); 874 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 875 static void tl_free(tl_endpt_t *); 876 static int tl_constructor(void *, void *, int); 877 static void tl_destructor(void *, void *); 878 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 879 static tl_serializer_t *tl_serializer_alloc(int); 880 static void tl_serializer_refhold(tl_serializer_t *); 881 static void tl_serializer_refrele(tl_serializer_t *); 882 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 883 static void tl_serializer_exit(tl_endpt_t *); 884 static boolean_t tl_noclose(tl_endpt_t *); 885 static void tl_closeok(tl_endpt_t *); 886 static void tl_refhold(tl_endpt_t *); 887 static void tl_refrele(tl_endpt_t *); 888 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 889 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 890 static void tl_close_ser(mblk_t *, tl_endpt_t *); 891 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 892 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 893 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 894 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 895 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 896 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 897 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 898 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 899 static void tl_addr_unbind(tl_endpt_t *); 900 901 /* 902 * Intialize option database object for TL 903 */ 904 905 optdb_obj_t tl_opt_obj = { 906 tl_default_opt, /* TL default value function pointer */ 907 tl_get_opt, /* TL get function pointer */ 908 tl_set_opt, /* TL set function pointer */ 909 B_TRUE, /* TL is tpi provider */ 910 TL_OPT_ARR_CNT, /* TL option database count of entries */ 911 tl_opt_arr, /* TL option database */ 912 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 913 tl_valid_levels_arr /* TL valid level array */ 914 }; 915 916 /* 917 * Logical operations. 918 * 919 * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y 920 * should also be true. 921 * 922 * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or false at 923 * the same time. 924 */ 925 #define IMPLY(X, Y) (!(X) || (Y)) 926 #define EQUIV(X, Y) (IMPLY(X, Y) && IMPLY(Y, X)) 927 928 /* 929 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 930 * --------------------------------------- 931 */ 932 933 /* 934 * Loadable module routines 935 */ 936 int 937 _init(void) 938 { 939 return (mod_install(&modlinkage)); 940 } 941 942 int 943 _fini(void) 944 { 945 return (mod_remove(&modlinkage)); 946 } 947 948 int 949 _info(struct modinfo *modinfop) 950 { 951 return (mod_info(&modlinkage, modinfop)); 952 } 953 954 /* 955 * Driver Entry Points and Other routines 956 */ 957 static int 958 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 959 { 960 int i; 961 char name[32]; 962 963 /* 964 * Resume from a checkpoint state. 965 */ 966 if (cmd == DDI_RESUME) 967 return (DDI_SUCCESS); 968 969 if (cmd != DDI_ATTACH) 970 return (DDI_FAILURE); 971 972 /* 973 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 974 * streams message sizes can be unlimited. We use a defined constant 975 * instead. 976 */ 977 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 978 979 /* 980 * Create subdevices for each transport. 981 */ 982 for (i = 0; i < TL_UNUSED; i++) { 983 if (ddi_create_minor_node(devi, 984 tl_transports[i].tr_name, 985 S_IFCHR, tl_transports[i].tr_minor, 986 DDI_PSEUDO, NULL) == DDI_FAILURE) { 987 ddi_remove_minor_node(devi, NULL); 988 return (DDI_FAILURE); 989 } 990 } 991 992 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 993 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 994 995 if (tl_cache == NULL) { 996 ddi_remove_minor_node(devi, NULL); 997 return (DDI_FAILURE); 998 } 999 1000 tl_minors = id_space_create("tl_minor_space", 1001 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 1002 1003 /* 1004 * Create ID space for minor numbers 1005 */ 1006 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1007 tl_transport_state_t *t = &tl_transports[i]; 1008 1009 if (i == TL_UNUSED) 1010 continue; 1011 1012 /* Socket COTSORD shares namespace with COTS */ 1013 if (i == TL_SOCK_COTSORD) { 1014 t->tr_ai_hash = 1015 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1016 ASSERT(t->tr_ai_hash != NULL); 1017 t->tr_addr_hash = 1018 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1019 ASSERT(t->tr_addr_hash != NULL); 1020 continue; 1021 } 1022 1023 /* 1024 * Create hash tables. 1025 */ 1026 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1027 t->tr_name); 1028 #ifdef _ILP32 1029 if (i & TL_SOCKET) 1030 t->tr_ai_hash = 1031 mod_hash_create_idhash(name, tl_hash_size - 1, 1032 mod_hash_null_valdtor); 1033 else 1034 t->tr_ai_hash = 1035 mod_hash_create_ptrhash(name, tl_hash_size, 1036 mod_hash_null_valdtor, sizeof (queue_t)); 1037 #else 1038 t->tr_ai_hash = 1039 mod_hash_create_idhash(name, tl_hash_size - 1, 1040 mod_hash_null_valdtor); 1041 #endif /* _ILP32 */ 1042 1043 if (i & TL_SOCKET) { 1044 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1045 t->tr_name); 1046 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1047 tl_hash_size, mod_hash_null_valdtor, 1048 sizeof (uintptr_t)); 1049 } else { 1050 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1051 t->tr_name); 1052 t->tr_addr_hash = mod_hash_create_extended(name, 1053 tl_hash_size, mod_hash_null_keydtor, 1054 mod_hash_null_valdtor, 1055 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1056 } 1057 1058 /* Create serializer for connectionless transports. */ 1059 if (i & TL_TICLTS) 1060 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1061 } 1062 1063 tl_dip = devi; 1064 1065 return (DDI_SUCCESS); 1066 } 1067 1068 static int 1069 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1070 { 1071 int i; 1072 1073 if (cmd == DDI_SUSPEND) 1074 return (DDI_SUCCESS); 1075 1076 if (cmd != DDI_DETACH) 1077 return (DDI_FAILURE); 1078 1079 /* 1080 * Destroy arenas and hash tables. 1081 */ 1082 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1083 tl_transport_state_t *t = &tl_transports[i]; 1084 1085 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1086 continue; 1087 1088 ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL)); 1089 if (t->tr_serializer != NULL) { 1090 tl_serializer_refrele(t->tr_serializer); 1091 t->tr_serializer = NULL; 1092 } 1093 1094 #ifdef _ILP32 1095 if (i & TL_SOCKET) 1096 mod_hash_destroy_idhash(t->tr_ai_hash); 1097 else 1098 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1099 #else 1100 mod_hash_destroy_idhash(t->tr_ai_hash); 1101 #endif /* _ILP32 */ 1102 t->tr_ai_hash = NULL; 1103 if (i & TL_SOCKET) 1104 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1105 else 1106 mod_hash_destroy_hash(t->tr_addr_hash); 1107 t->tr_addr_hash = NULL; 1108 } 1109 1110 kmem_cache_destroy(tl_cache); 1111 tl_cache = NULL; 1112 id_space_destroy(tl_minors); 1113 tl_minors = NULL; 1114 ddi_remove_minor_node(devi, NULL); 1115 return (DDI_SUCCESS); 1116 } 1117 1118 /* ARGSUSED */ 1119 static int 1120 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1121 { 1122 1123 int retcode = DDI_FAILURE; 1124 1125 switch (infocmd) { 1126 1127 case DDI_INFO_DEVT2DEVINFO: 1128 if (tl_dip != NULL) { 1129 *result = (void *)tl_dip; 1130 retcode = DDI_SUCCESS; 1131 } 1132 break; 1133 1134 case DDI_INFO_DEVT2INSTANCE: 1135 *result = (void *)0; 1136 retcode = DDI_SUCCESS; 1137 break; 1138 1139 default: 1140 break; 1141 } 1142 return (retcode); 1143 } 1144 1145 /* 1146 * Endpoint reference management. 1147 */ 1148 static void 1149 tl_refhold(tl_endpt_t *tep) 1150 { 1151 atomic_add_32(&tep->te_refcnt, 1); 1152 } 1153 1154 static void 1155 tl_refrele(tl_endpt_t *tep) 1156 { 1157 ASSERT(tep->te_refcnt != 0); 1158 1159 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1160 tl_free(tep); 1161 } 1162 1163 /*ARGSUSED*/ 1164 static int 1165 tl_constructor(void *buf, void *cdrarg, int kmflags) 1166 { 1167 tl_endpt_t *tep = buf; 1168 1169 bzero(tep, sizeof (tl_endpt_t)); 1170 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1171 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1172 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1173 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1174 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1175 1176 return (0); 1177 } 1178 1179 /*ARGSUSED*/ 1180 static void 1181 tl_destructor(void *buf, void *cdrarg) 1182 { 1183 tl_endpt_t *tep = buf; 1184 1185 mutex_destroy(&tep->te_closelock); 1186 cv_destroy(&tep->te_closecv); 1187 mutex_destroy(&tep->te_srv_lock); 1188 cv_destroy(&tep->te_srv_cv); 1189 mutex_destroy(&tep->te_ser_lock); 1190 } 1191 1192 static void 1193 tl_free(tl_endpt_t *tep) 1194 { 1195 ASSERT(tep->te_refcnt == 0); 1196 ASSERT(tep->te_transport != NULL); 1197 ASSERT(tep->te_rq == NULL); 1198 ASSERT(tep->te_wq == NULL); 1199 ASSERT(tep->te_ser != NULL); 1200 ASSERT(tep->te_ser_count == 0); 1201 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1202 1203 if (IS_SOCKET(tep)) { 1204 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1205 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1206 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1207 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1208 } else if (tep->te_abuf != NULL) { 1209 kmem_free(tep->te_abuf, tep->te_alen); 1210 tep->te_alen = -1; /* uninitialized */ 1211 tep->te_abuf = NULL; 1212 } else { 1213 ASSERT(tep->te_alen == -1); 1214 } 1215 1216 id_free(tl_minors, tep->te_minor); 1217 ASSERT(tep->te_credp == NULL); 1218 1219 if (tep->te_hash_hndl != NULL) 1220 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1221 1222 if (IS_COTS(tep)) { 1223 TL_REMOVE_PEER(tep->te_conp); 1224 TL_REMOVE_PEER(tep->te_oconp); 1225 tl_serializer_refrele(tep->te_ser); 1226 tep->te_ser = NULL; 1227 ASSERT(tep->te_nicon == 0); 1228 ASSERT(list_head(&tep->te_iconp) == NULL); 1229 } else { 1230 ASSERT(tep->te_lastep == NULL); 1231 ASSERT(list_head(&tep->te_flowlist) == NULL); 1232 ASSERT(tep->te_flowq == NULL); 1233 } 1234 1235 ASSERT(tep->te_bufcid == 0); 1236 ASSERT(tep->te_timoutid == 0); 1237 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1238 tep->te_acceptor_id = 0; 1239 1240 ASSERT(tep->te_closewait == 0); 1241 ASSERT(!tep->te_rsrv_active); 1242 ASSERT(!tep->te_wsrv_active); 1243 tep->te_closing = 0; 1244 tep->te_nowsrv = B_FALSE; 1245 tep->te_flag = 0; 1246 1247 kmem_cache_free(tl_cache, tep); 1248 } 1249 1250 /* 1251 * Allocate/free reference-counted wrappers for serializers. 1252 */ 1253 static tl_serializer_t * 1254 tl_serializer_alloc(int flags) 1255 { 1256 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1257 serializer_t *ser; 1258 1259 if (s == NULL) 1260 return (NULL); 1261 1262 ser = serializer_create(flags); 1263 1264 if (ser == NULL) { 1265 kmem_free(s, sizeof (tl_serializer_t)); 1266 return (NULL); 1267 } 1268 1269 s->ts_refcnt = 1; 1270 s->ts_serializer = ser; 1271 return (s); 1272 } 1273 1274 static void 1275 tl_serializer_refhold(tl_serializer_t *s) 1276 { 1277 atomic_add_32(&s->ts_refcnt, 1); 1278 } 1279 1280 static void 1281 tl_serializer_refrele(tl_serializer_t *s) 1282 { 1283 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1284 serializer_destroy(s->ts_serializer); 1285 kmem_free(s, sizeof (tl_serializer_t)); 1286 } 1287 } 1288 1289 /* 1290 * Post a request on the endpoint serializer. For COTS transports keep track of 1291 * the number of pending requests. 1292 */ 1293 static void 1294 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1295 { 1296 if (IS_COTS(tep)) { 1297 mutex_enter(&tep->te_ser_lock); 1298 tep->te_ser_count++; 1299 mutex_exit(&tep->te_ser_lock); 1300 } 1301 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1302 } 1303 1304 /* 1305 * Complete processing the request on the serializer. Decrement the counter for 1306 * pending requests for COTS transports. 1307 */ 1308 static void 1309 tl_serializer_exit(tl_endpt_t *tep) 1310 { 1311 if (IS_COTS(tep)) { 1312 mutex_enter(&tep->te_ser_lock); 1313 ASSERT(tep->te_ser_count != 0); 1314 tep->te_ser_count--; 1315 mutex_exit(&tep->te_ser_lock); 1316 } 1317 } 1318 1319 /* 1320 * Hash management functions. 1321 */ 1322 1323 /* 1324 * Return TRUE if two addresses are equal, false otherwise. 1325 */ 1326 static boolean_t 1327 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1328 { 1329 return ((ap1->ta_alen > 0) && 1330 (ap1->ta_alen == ap2->ta_alen) && 1331 (ap1->ta_zoneid == ap2->ta_zoneid) && 1332 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1333 } 1334 1335 /* 1336 * This function is called whenever an endpoint is found in the hash table. 1337 */ 1338 /* ARGSUSED0 */ 1339 static void 1340 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1341 { 1342 tl_refhold((tl_endpt_t *)val); 1343 } 1344 1345 /* 1346 * Address hash function. 1347 */ 1348 /* ARGSUSED */ 1349 static uint_t 1350 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1351 { 1352 tl_addr_t *ap = (tl_addr_t *)key; 1353 size_t len = ap->ta_alen; 1354 uchar_t *p = ap->ta_abuf; 1355 uint_t i, g; 1356 1357 ASSERT((len > 0) && (p != NULL)); 1358 1359 for (i = ap->ta_zoneid; len -- != 0; p++) { 1360 i = (i << 4) + (*p); 1361 if ((g = (i & 0xf0000000U)) != 0) { 1362 i ^= (g >> 24); 1363 i ^= g; 1364 } 1365 } 1366 return (i); 1367 } 1368 1369 /* 1370 * This function is used by hash lookups. It compares two generic addresses. 1371 */ 1372 static int 1373 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1374 { 1375 #ifdef DEBUG 1376 tl_addr_t *ap1 = (tl_addr_t *)key1; 1377 tl_addr_t *ap2 = (tl_addr_t *)key2; 1378 1379 ASSERT(key1 != NULL); 1380 ASSERT(key2 != NULL); 1381 1382 ASSERT(ap1->ta_abuf != NULL); 1383 ASSERT(ap2->ta_abuf != NULL); 1384 ASSERT(ap1->ta_alen > 0); 1385 ASSERT(ap2->ta_alen > 0); 1386 #endif 1387 1388 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1389 } 1390 1391 /* 1392 * Prevent endpoint from closing if possible. 1393 * Return B_TRUE on success, B_FALSE on failure. 1394 */ 1395 static boolean_t 1396 tl_noclose(tl_endpt_t *tep) 1397 { 1398 boolean_t rc = B_FALSE; 1399 1400 mutex_enter(&tep->te_closelock); 1401 if (! tep->te_closing) { 1402 ASSERT(tep->te_closewait == 0); 1403 tep->te_closewait++; 1404 rc = B_TRUE; 1405 } 1406 mutex_exit(&tep->te_closelock); 1407 return (rc); 1408 } 1409 1410 /* 1411 * Allow endpoint to close if needed. 1412 */ 1413 static void 1414 tl_closeok(tl_endpt_t *tep) 1415 { 1416 ASSERT(tep->te_closewait > 0); 1417 mutex_enter(&tep->te_closelock); 1418 ASSERT(tep->te_closewait == 1); 1419 tep->te_closewait--; 1420 cv_signal(&tep->te_closecv); 1421 mutex_exit(&tep->te_closelock); 1422 } 1423 1424 /* 1425 * STREAMS open entry point. 1426 */ 1427 /* ARGSUSED */ 1428 static int 1429 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1430 { 1431 tl_endpt_t *tep; 1432 minor_t minor = getminor(*devp); 1433 1434 /* 1435 * Driver is called directly. Both CLONEOPEN and MODOPEN 1436 * are illegal 1437 */ 1438 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1439 return (ENXIO); 1440 1441 if (rq->q_ptr != NULL) 1442 return (0); 1443 1444 /* Minor number should specify the mode used for the driver. */ 1445 if ((minor >= TL_UNUSED)) 1446 return (ENXIO); 1447 1448 if (oflag & SO_SOCKSTR) { 1449 minor |= TL_SOCKET; 1450 } 1451 1452 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1453 tep->te_refcnt = 1; 1454 tep->te_cpid = curproc->p_pid; 1455 rq->q_ptr = WR(rq)->q_ptr = tep; 1456 tep->te_state = TS_UNBND; 1457 tep->te_credp = credp; 1458 crhold(credp); 1459 tep->te_zoneid = getzoneid(); 1460 1461 tep->te_flag = minor & TL_MINOR_MASK; 1462 tep->te_transport = &tl_transports[minor]; 1463 1464 /* Allocate a unique minor number for this instance. */ 1465 tep->te_minor = (minor_t)id_alloc(tl_minors); 1466 1467 /* Reserve hash handle for bind(). */ 1468 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1469 1470 /* Transport-specific initialization */ 1471 if (IS_COTS(tep)) { 1472 /* Use private serializer */ 1473 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1474 1475 /* Create list for pending connections */ 1476 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1477 offsetof(tl_icon_t, ti_node)); 1478 tep->te_qlen = 0; 1479 tep->te_nicon = 0; 1480 tep->te_oconp = NULL; 1481 tep->te_conp = NULL; 1482 } else { 1483 /* Use shared serializer */ 1484 tep->te_ser = tep->te_transport->tr_serializer; 1485 bzero(&tep->te_flows, sizeof (list_node_t)); 1486 /* Create list for flow control */ 1487 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1488 offsetof(tl_endpt_t, te_flows)); 1489 tep->te_flowq = NULL; 1490 tep->te_lastep = NULL; 1491 1492 } 1493 1494 /* Initialize endpoint address */ 1495 if (IS_SOCKET(tep)) { 1496 /* Socket-specific address handling. */ 1497 tep->te_alen = TL_SOUX_ADDRLEN; 1498 tep->te_abuf = &tep->te_uxaddr; 1499 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1500 tep->te_magic = SOU_MAGIC_IMPLICIT; 1501 } else { 1502 tep->te_alen = -1; 1503 tep->te_abuf = NULL; 1504 } 1505 1506 /* clone the driver */ 1507 *devp = makedevice(getmajor(*devp), tep->te_minor); 1508 1509 tep->te_rq = rq; 1510 tep->te_wq = WR(rq); 1511 1512 #ifdef _ILP32 1513 if (IS_SOCKET(tep)) 1514 tep->te_acceptor_id = tep->te_minor; 1515 else 1516 tep->te_acceptor_id = (t_uscalar_t)rq; 1517 #else 1518 tep->te_acceptor_id = tep->te_minor; 1519 #endif /* _ILP32 */ 1520 1521 1522 qprocson(rq); 1523 1524 /* 1525 * Insert acceptor ID in the hash. The AI hash always sleeps on 1526 * insertion so insertion can't fail. 1527 */ 1528 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1529 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1530 (mod_hash_val_t)tep); 1531 1532 return (0); 1533 } 1534 1535 /* ARGSUSED1 */ 1536 static int 1537 tl_close(queue_t *rq, int flag, cred_t *credp) 1538 { 1539 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1540 tl_endpt_t *elp = NULL; 1541 queue_t *wq = tep->te_wq; 1542 int rc; 1543 1544 ASSERT(wq == WR(rq)); 1545 1546 /* 1547 * Remove the endpoint from acceptor hash. 1548 */ 1549 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1550 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1551 (mod_hash_val_t *)&elp); 1552 ASSERT(rc == 0 && tep == elp); 1553 if ((rc != 0) || (tep != elp)) { 1554 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1555 SL_TRACE|SL_ERROR, 1556 "tl_close:inconsistency in AI hash")); 1557 } 1558 1559 /* 1560 * Wait till close is safe, then mark endpoint as closing. 1561 */ 1562 mutex_enter(&tep->te_closelock); 1563 while (tep->te_closewait) 1564 cv_wait(&tep->te_closecv, &tep->te_closelock); 1565 tep->te_closing = B_TRUE; 1566 /* 1567 * Will wait for the serializer part of the close to finish, so set 1568 * te_closewait now. 1569 */ 1570 tep->te_closewait = 1; 1571 tep->te_nowsrv = B_FALSE; 1572 mutex_exit(&tep->te_closelock); 1573 1574 /* 1575 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1576 * It is safe because close will wait for tl_close_ser to finish. 1577 */ 1578 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1579 1580 /* 1581 * Wait for the first phase of close to complete before qprocsoff(). 1582 */ 1583 mutex_enter(&tep->te_closelock); 1584 while (tep->te_closewait) 1585 cv_wait(&tep->te_closecv, &tep->te_closelock); 1586 mutex_exit(&tep->te_closelock); 1587 1588 qprocsoff(rq); 1589 1590 if (tep->te_bufcid) { 1591 qunbufcall(rq, tep->te_bufcid); 1592 tep->te_bufcid = 0; 1593 } 1594 if (tep->te_timoutid) { 1595 (void) quntimeout(rq, tep->te_timoutid); 1596 tep->te_timoutid = 0; 1597 } 1598 1599 /* 1600 * Finish close behind serializer. 1601 * 1602 * For a CLTS endpoint increase a refcount and continue close processing 1603 * with serializer protection. This processing may happen asynchronously 1604 * with the completion of tl_close(). 1605 * 1606 * Fot a COTS endpoint wait before destroying tep since the serializer 1607 * may go away together with tep and we need to destroy serializer 1608 * outside of serializer context. 1609 */ 1610 ASSERT(tep->te_closewait == 0); 1611 if (IS_COTS(tep)) 1612 tep->te_closewait = 1; 1613 else 1614 tl_refhold(tep); 1615 1616 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1617 1618 /* 1619 * For connection-oriented transports wait for all serializer activity 1620 * to settle down. 1621 */ 1622 if (IS_COTS(tep)) { 1623 mutex_enter(&tep->te_closelock); 1624 while (tep->te_closewait) 1625 cv_wait(&tep->te_closecv, &tep->te_closelock); 1626 mutex_exit(&tep->te_closelock); 1627 } 1628 1629 crfree(tep->te_credp); 1630 tep->te_credp = NULL; 1631 tep->te_wq = NULL; 1632 tl_refrele(tep); 1633 /* 1634 * tep is likely to be destroyed now, so can't reference it any more. 1635 */ 1636 1637 rq->q_ptr = wq->q_ptr = NULL; 1638 return (0); 1639 } 1640 1641 /* 1642 * First phase of close processing done behind the serializer. 1643 * 1644 * Do not drop the reference in the end - tl_close() wants this reference to 1645 * stay. 1646 */ 1647 /* ARGSUSED0 */ 1648 static void 1649 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1650 { 1651 ASSERT(tep->te_closing); 1652 ASSERT(tep->te_closewait == 1); 1653 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1654 1655 tep->te_flag |= TL_CLOSE_SER; 1656 1657 /* 1658 * Drain out all messages on queue except for TL_TICOTS where the 1659 * abortive release semantics permit discarding of data on close 1660 */ 1661 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1662 tl_wsrv_ser(NULL, tep); 1663 } 1664 1665 /* Remove address from hash table. */ 1666 tl_addr_unbind(tep); 1667 /* 1668 * qprocsoff() gets confused when q->q_next is not NULL on the write 1669 * queue of the driver, so clear these before qprocsoff() is called. 1670 * Also clear q_next for the peer since this queue is going away. 1671 */ 1672 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1673 tl_endpt_t *peer_tep = tep->te_conp; 1674 1675 tep->te_wq->q_next = NULL; 1676 if ((peer_tep != NULL) && !peer_tep->te_closing) 1677 peer_tep->te_wq->q_next = NULL; 1678 } 1679 1680 tep->te_rq = NULL; 1681 1682 /* wake up tl_close() */ 1683 tl_closeok(tep); 1684 tl_serializer_exit(tep); 1685 } 1686 1687 /* 1688 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1689 * the reference for CLTS. 1690 * 1691 * Called from serializer. Should drop reference count for CLTS only. 1692 */ 1693 /* ARGSUSED0 */ 1694 static void 1695 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1696 { 1697 ASSERT(tep->te_closing); 1698 ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0)); 1699 ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1)); 1700 1701 tep->te_state = -1; /* Uninitialized */ 1702 if (IS_COTS(tep)) { 1703 tl_co_unconnect(tep); 1704 } else { 1705 /* Connectionless specific cleanup */ 1706 TL_REMOVE_PEER(tep->te_lastep); 1707 /* 1708 * Backenable anybody that is flow controlled waiting for 1709 * this endpoint. 1710 */ 1711 tl_cl_backenable(tep); 1712 if (tep->te_flowq != NULL) { 1713 list_remove(&(tep->te_flowq->te_flowlist), tep); 1714 tep->te_flowq = NULL; 1715 } 1716 } 1717 1718 tl_serializer_exit(tep); 1719 if (IS_COTS(tep)) 1720 tl_closeok(tep); 1721 else 1722 tl_refrele(tep); 1723 } 1724 1725 /* 1726 * STREAMS write-side put procedure. 1727 * Enter serializer for most of the processing. 1728 * 1729 * The T_CONN_REQ is processed outside of serializer. 1730 */ 1731 static void 1732 tl_wput(queue_t *wq, mblk_t *mp) 1733 { 1734 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1735 ssize_t msz = MBLKL(mp); 1736 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1737 tlproc_t *tl_proc = NULL; 1738 1739 switch (DB_TYPE(mp)) { 1740 case M_DATA: 1741 /* Only valid for connection-oriented transports */ 1742 if (IS_CLTS(tep)) { 1743 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1744 SL_TRACE|SL_ERROR, 1745 "tl_wput:M_DATA invalid for ticlts driver")); 1746 tl_merror(wq, mp, EPROTO); 1747 return; 1748 } 1749 tl_proc = tl_wput_data_ser; 1750 break; 1751 1752 case M_IOCTL: 1753 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1754 case TL_IOC_CREDOPT: 1755 /* FALLTHROUGH */ 1756 case TL_IOC_UCREDOPT: 1757 /* 1758 * Serialize endpoint state change. 1759 */ 1760 tl_proc = tl_do_ioctl_ser; 1761 break; 1762 1763 default: 1764 miocnak(wq, mp, 0, EINVAL); 1765 return; 1766 } 1767 break; 1768 1769 case M_FLUSH: 1770 /* 1771 * do canonical M_FLUSH processing 1772 */ 1773 if (*mp->b_rptr & FLUSHW) { 1774 flushq(wq, FLUSHALL); 1775 *mp->b_rptr &= ~FLUSHW; 1776 } 1777 if (*mp->b_rptr & FLUSHR) { 1778 flushq(RD(wq), FLUSHALL); 1779 qreply(wq, mp); 1780 } else { 1781 freemsg(mp); 1782 } 1783 return; 1784 1785 case M_PROTO: 1786 if (msz < sizeof (prim->type)) { 1787 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1788 SL_TRACE|SL_ERROR, 1789 "tl_wput:M_PROTO data too short")); 1790 tl_merror(wq, mp, EPROTO); 1791 return; 1792 } 1793 switch (prim->type) { 1794 case T_OPTMGMT_REQ: 1795 case T_SVR4_OPTMGMT_REQ: 1796 /* 1797 * Process TPI option management requests immediately 1798 * in put procedure regardless of in-order processing 1799 * of already queued messages. 1800 * (Note: This driver supports AF_UNIX socket 1801 * implementation. Unless we implement this processing, 1802 * setsockopt() on socket endpoint will block on flow 1803 * controlled endpoints which it should not. That is 1804 * required for successful execution of VSU socket tests 1805 * and is consistent with BSD socket behavior). 1806 */ 1807 tl_optmgmt(wq, mp); 1808 return; 1809 case O_T_BIND_REQ: 1810 case T_BIND_REQ: 1811 tl_proc = tl_bind_ser; 1812 break; 1813 case T_CONN_REQ: 1814 if (IS_CLTS(tep)) { 1815 tl_merror(wq, mp, EPROTO); 1816 return; 1817 } 1818 tl_conn_req(wq, mp); 1819 return; 1820 case T_DATA_REQ: 1821 case T_OPTDATA_REQ: 1822 case T_EXDATA_REQ: 1823 case T_ORDREL_REQ: 1824 tl_proc = tl_putq_ser; 1825 break; 1826 case T_UNITDATA_REQ: 1827 if (IS_COTS(tep) || 1828 (msz < sizeof (struct T_unitdata_req))) { 1829 tl_merror(wq, mp, EPROTO); 1830 return; 1831 } 1832 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1833 tl_proc = tl_unitdata_ser; 1834 } else { 1835 tl_proc = tl_putq_ser; 1836 } 1837 break; 1838 default: 1839 /* 1840 * process in service procedure if message already 1841 * queued (maintain in-order processing) 1842 */ 1843 if (wq->q_first != NULL) { 1844 tl_proc = tl_putq_ser; 1845 } else { 1846 tl_proc = tl_wput_ser; 1847 } 1848 break; 1849 } 1850 break; 1851 1852 case M_PCPROTO: 1853 /* 1854 * Check that the message has enough data to figure out TPI 1855 * primitive. 1856 */ 1857 if (msz < sizeof (prim->type)) { 1858 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1859 SL_TRACE|SL_ERROR, 1860 "tl_wput:M_PCROTO data too short")); 1861 tl_merror(wq, mp, EPROTO); 1862 return; 1863 } 1864 switch (prim->type) { 1865 case T_CAPABILITY_REQ: 1866 tl_capability_req(mp, tep); 1867 return; 1868 case T_INFO_REQ: 1869 tl_proc = tl_info_req_ser; 1870 break; 1871 default: 1872 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1873 SL_TRACE|SL_ERROR, 1874 "tl_wput:unknown TPI msg primitive")); 1875 tl_merror(wq, mp, EPROTO); 1876 return; 1877 } 1878 break; 1879 default: 1880 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1881 "tl_wput:default:unexpected Streams message")); 1882 freemsg(mp); 1883 return; 1884 } 1885 1886 /* 1887 * Continue processing via serializer. 1888 */ 1889 ASSERT(tl_proc != NULL); 1890 tl_refhold(tep); 1891 tl_serializer_enter(tep, tl_proc, mp); 1892 } 1893 1894 /* 1895 * Place message on the queue while preserving order. 1896 */ 1897 static void 1898 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1899 { 1900 if (tep->te_closing) { 1901 tl_wput_ser(mp, tep); 1902 } else { 1903 TL_PUTQ(tep, mp); 1904 tl_serializer_exit(tep); 1905 tl_refrele(tep); 1906 } 1907 1908 } 1909 1910 static void 1911 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1912 { 1913 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1914 1915 switch (DB_TYPE(mp)) { 1916 case M_DATA: 1917 tl_data(mp, tep); 1918 break; 1919 case M_PROTO: 1920 tl_do_proto(mp, tep); 1921 break; 1922 default: 1923 freemsg(mp); 1924 break; 1925 } 1926 } 1927 1928 /* 1929 * Write side put procedure called from serializer. 1930 */ 1931 static void 1932 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1933 { 1934 tl_wput_common_ser(mp, tep); 1935 tl_serializer_exit(tep); 1936 tl_refrele(tep); 1937 } 1938 1939 /* 1940 * M_DATA processing. Called from serializer. 1941 */ 1942 static void 1943 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1944 { 1945 tl_endpt_t *peer_tep = tep->te_conp; 1946 queue_t *peer_rq; 1947 1948 ASSERT(DB_TYPE(mp) == M_DATA); 1949 ASSERT(IS_COTS(tep)); 1950 1951 ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer)); 1952 1953 /* 1954 * fastpath for data. Ignore flow control if tep is closing. 1955 */ 1956 if ((peer_tep != NULL) && 1957 !peer_tep->te_closing && 1958 ((tep->te_state == TS_DATA_XFER) || 1959 (tep->te_state == TS_WREQ_ORDREL)) && 1960 (tep->te_wq != NULL) && 1961 (tep->te_wq->q_first == NULL) && 1962 ((peer_tep->te_state == TS_DATA_XFER) || 1963 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1964 ((peer_rq = peer_tep->te_rq) != NULL) && 1965 (canputnext(peer_rq) || tep->te_closing)) { 1966 putnext(peer_rq, mp); 1967 } else if (tep->te_closing) { 1968 /* 1969 * It is possible that by the time we got here tep started to 1970 * close. If the write queue is not empty, and the state is 1971 * TS_DATA_XFER the data should be delivered in order, so we 1972 * call putq() instead of freeing the data. 1973 */ 1974 if ((tep->te_wq != NULL) && 1975 ((tep->te_state == TS_DATA_XFER) || 1976 (tep->te_state == TS_WREQ_ORDREL))) { 1977 TL_PUTQ(tep, mp); 1978 } else { 1979 freemsg(mp); 1980 } 1981 } else { 1982 TL_PUTQ(tep, mp); 1983 } 1984 1985 tl_serializer_exit(tep); 1986 tl_refrele(tep); 1987 } 1988 1989 /* 1990 * Write side service routine. 1991 * 1992 * All actual processing happens within serializer which is entered 1993 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1994 * messages that need processing may have arrived, so tl_wsrv repeats until 1995 * queue is empty or te_nowsrv is set. 1996 */ 1997 static void 1998 tl_wsrv(queue_t *wq) 1999 { 2000 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2001 2002 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 2003 mutex_enter(&tep->te_srv_lock); 2004 ASSERT(tep->te_wsrv_active == B_FALSE); 2005 tep->te_wsrv_active = B_TRUE; 2006 mutex_exit(&tep->te_srv_lock); 2007 2008 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2009 2010 /* 2011 * Wait for serializer job to complete. 2012 */ 2013 mutex_enter(&tep->te_srv_lock); 2014 while (tep->te_wsrv_active) { 2015 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2016 } 2017 cv_signal(&tep->te_srv_cv); 2018 mutex_exit(&tep->te_srv_lock); 2019 } 2020 } 2021 2022 /* 2023 * Serialized write side processing of the STREAMS queue. 2024 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2025 * is NULL. 2026 */ 2027 static void 2028 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2029 { 2030 mblk_t *mp; 2031 queue_t *wq = tep->te_wq; 2032 2033 ASSERT(wq != NULL); 2034 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2035 tl_wput_common_ser(mp, tep); 2036 } 2037 2038 /* 2039 * Wakeup service routine unless called from close. 2040 * If ser_mp is specified, the caller is tl_wsrv(). 2041 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2042 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2043 * be no matching tl_serializer_exit() in this case. 2044 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2045 * waiting on te_srv_cv. 2046 */ 2047 if (ser_mp != NULL) { 2048 /* 2049 * We are called from tl_wsrv. 2050 */ 2051 mutex_enter(&tep->te_srv_lock); 2052 ASSERT(tep->te_wsrv_active); 2053 tep->te_wsrv_active = B_FALSE; 2054 cv_signal(&tep->te_srv_cv); 2055 mutex_exit(&tep->te_srv_lock); 2056 tl_serializer_exit(tep); 2057 } 2058 } 2059 2060 /* 2061 * Called when the stream is backenabled. Enter serializer and qenable everyone 2062 * flow controlled by tep. 2063 * 2064 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2065 * is possible that two instances of tl_rsrv will be running reusing the same 2066 * rsrv mblk. 2067 */ 2068 static void 2069 tl_rsrv(queue_t *rq) 2070 { 2071 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2072 2073 ASSERT(rq->q_first == NULL); 2074 ASSERT(tep->te_rsrv_active == 0); 2075 2076 tep->te_rsrv_active = B_TRUE; 2077 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2078 /* 2079 * Wait for serializer job to complete. 2080 */ 2081 mutex_enter(&tep->te_srv_lock); 2082 while (tep->te_rsrv_active) { 2083 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2084 } 2085 cv_signal(&tep->te_srv_cv); 2086 mutex_exit(&tep->te_srv_lock); 2087 } 2088 2089 /* ARGSUSED */ 2090 static void 2091 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2092 { 2093 tl_endpt_t *peer_tep; 2094 2095 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2096 tl_cl_backenable(tep); 2097 } else if ( 2098 IS_COTS(tep) && 2099 ((peer_tep = tep->te_conp) != NULL) && 2100 !peer_tep->te_closing && 2101 ((tep->te_state == TS_DATA_XFER) || 2102 (tep->te_state == TS_WIND_ORDREL)|| 2103 (tep->te_state == TS_WREQ_ORDREL))) { 2104 TL_QENABLE(peer_tep); 2105 } 2106 2107 /* 2108 * Wakeup read side service routine. 2109 */ 2110 mutex_enter(&tep->te_srv_lock); 2111 ASSERT(tep->te_rsrv_active); 2112 tep->te_rsrv_active = B_FALSE; 2113 cv_signal(&tep->te_srv_cv); 2114 mutex_exit(&tep->te_srv_lock); 2115 tl_serializer_exit(tep); 2116 } 2117 2118 /* 2119 * process M_PROTO messages. Always called from serializer. 2120 */ 2121 static void 2122 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2123 { 2124 ssize_t msz = MBLKL(mp); 2125 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2126 2127 /* Message size was validated by tl_wput(). */ 2128 ASSERT(msz >= sizeof (prim->type)); 2129 2130 switch (prim->type) { 2131 case T_UNBIND_REQ: 2132 tl_unbind(mp, tep); 2133 break; 2134 2135 case T_ADDR_REQ: 2136 tl_addr_req(mp, tep); 2137 break; 2138 2139 case O_T_CONN_RES: 2140 case T_CONN_RES: 2141 if (IS_CLTS(tep)) { 2142 tl_merror(tep->te_wq, mp, EPROTO); 2143 break; 2144 } 2145 tl_conn_res(mp, tep); 2146 break; 2147 2148 case T_DISCON_REQ: 2149 if (IS_CLTS(tep)) { 2150 tl_merror(tep->te_wq, mp, EPROTO); 2151 break; 2152 } 2153 tl_discon_req(mp, tep); 2154 break; 2155 2156 case T_DATA_REQ: 2157 if (IS_CLTS(tep)) { 2158 tl_merror(tep->te_wq, mp, EPROTO); 2159 break; 2160 } 2161 tl_data(mp, tep); 2162 break; 2163 2164 case T_OPTDATA_REQ: 2165 if (IS_CLTS(tep)) { 2166 tl_merror(tep->te_wq, mp, EPROTO); 2167 break; 2168 } 2169 tl_data(mp, tep); 2170 break; 2171 2172 case T_EXDATA_REQ: 2173 if (IS_CLTS(tep)) { 2174 tl_merror(tep->te_wq, mp, EPROTO); 2175 break; 2176 } 2177 tl_exdata(mp, tep); 2178 break; 2179 2180 case T_ORDREL_REQ: 2181 if (! IS_COTSORD(tep)) { 2182 tl_merror(tep->te_wq, mp, EPROTO); 2183 break; 2184 } 2185 tl_ordrel(mp, tep); 2186 break; 2187 2188 case T_UNITDATA_REQ: 2189 if (IS_COTS(tep)) { 2190 tl_merror(tep->te_wq, mp, EPROTO); 2191 break; 2192 } 2193 tl_unitdata(mp, tep); 2194 break; 2195 2196 default: 2197 tl_merror(tep->te_wq, mp, EPROTO); 2198 break; 2199 } 2200 } 2201 2202 /* 2203 * Process ioctl from serializer. 2204 * This is a wrapper around tl_do_ioctl(). 2205 */ 2206 static void 2207 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2208 { 2209 if (! tep->te_closing) 2210 tl_do_ioctl(mp, tep); 2211 else 2212 freemsg(mp); 2213 2214 tl_serializer_exit(tep); 2215 tl_refrele(tep); 2216 } 2217 2218 static void 2219 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2220 { 2221 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2222 int cmd = iocbp->ioc_cmd; 2223 queue_t *wq = tep->te_wq; 2224 int error; 2225 int thisopt, otheropt; 2226 2227 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2228 2229 switch (cmd) { 2230 case TL_IOC_CREDOPT: 2231 if (cmd == TL_IOC_CREDOPT) { 2232 thisopt = TL_SETCRED; 2233 otheropt = TL_SETUCRED; 2234 } else { 2235 /* FALLTHROUGH */ 2236 case TL_IOC_UCREDOPT: 2237 thisopt = TL_SETUCRED; 2238 otheropt = TL_SETCRED; 2239 } 2240 /* 2241 * The credentials passing does not apply to sockets. 2242 * Only one of the cred options can be set at a given time. 2243 */ 2244 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2245 miocnak(wq, mp, 0, EINVAL); 2246 return; 2247 } 2248 2249 /* 2250 * Turn on generation of credential options for 2251 * T_conn_req, T_conn_con, T_unidata_ind. 2252 */ 2253 error = miocpullup(mp, sizeof (uint32_t)); 2254 if (error != 0) { 2255 miocnak(wq, mp, 0, error); 2256 return; 2257 } 2258 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2259 miocnak(wq, mp, 0, EINVAL); 2260 return; 2261 } 2262 2263 if (*(uint32_t *)mp->b_cont->b_rptr) 2264 tep->te_flag |= thisopt; 2265 else 2266 tep->te_flag &= ~thisopt; 2267 2268 miocack(wq, mp, 0, 0); 2269 break; 2270 2271 default: 2272 /* Should not be here */ 2273 miocnak(wq, mp, 0, EINVAL); 2274 break; 2275 } 2276 } 2277 2278 2279 /* 2280 * send T_ERROR_ACK 2281 * Note: assumes enough memory or caller passed big enough mp 2282 * - no recovery from allocb failures 2283 */ 2284 2285 static void 2286 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2287 t_scalar_t unix_err, t_scalar_t type) 2288 { 2289 struct T_error_ack *err_ack; 2290 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2291 M_PCPROTO, T_ERROR_ACK); 2292 2293 if (ackmp == NULL) { 2294 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2295 "tl_error_ack:out of mblk memory")); 2296 tl_merror(wq, NULL, ENOSR); 2297 return; 2298 } 2299 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2300 err_ack->ERROR_prim = type; 2301 err_ack->TLI_error = tli_err; 2302 err_ack->UNIX_error = unix_err; 2303 2304 /* 2305 * send error ack message 2306 */ 2307 qreply(wq, ackmp); 2308 } 2309 2310 2311 2312 /* 2313 * send T_OK_ACK 2314 * Note: assumes enough memory or caller passed big enough mp 2315 * - no recovery from allocb failures 2316 */ 2317 static void 2318 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2319 { 2320 struct T_ok_ack *ok_ack; 2321 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2322 M_PCPROTO, T_OK_ACK); 2323 2324 if (ackmp == NULL) { 2325 tl_merror(wq, NULL, ENOMEM); 2326 return; 2327 } 2328 2329 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2330 ok_ack->CORRECT_prim = type; 2331 2332 (void) qreply(wq, ackmp); 2333 } 2334 2335 /* 2336 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2337 * This is a wrapper around tl_bind(). 2338 */ 2339 static void 2340 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2341 { 2342 if (! tep->te_closing) 2343 tl_bind(mp, tep); 2344 else 2345 freemsg(mp); 2346 2347 tl_serializer_exit(tep); 2348 tl_refrele(tep); 2349 } 2350 2351 /* 2352 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2353 * Assumes that the endpoint is in the unbound. 2354 */ 2355 static void 2356 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2357 { 2358 queue_t *wq = tep->te_wq; 2359 struct T_bind_ack *b_ack; 2360 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2361 mblk_t *ackmp, *bamp; 2362 soux_addr_t ux_addr; 2363 t_uscalar_t qlen = 0; 2364 t_scalar_t alen, aoff; 2365 tl_addr_t addr_req; 2366 void *addr_startp; 2367 ssize_t msz = MBLKL(mp), basize; 2368 t_scalar_t tli_err = 0, unix_err = 0; 2369 t_scalar_t save_prim_type = bind->PRIM_type; 2370 t_scalar_t save_state = tep->te_state; 2371 2372 if (tep->te_state != TS_UNBND) { 2373 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2374 SL_TRACE|SL_ERROR, 2375 "tl_wput:bind_request:out of state, state=%d", 2376 tep->te_state)); 2377 tli_err = TOUTSTATE; 2378 goto error; 2379 } 2380 2381 if (msz < sizeof (struct T_bind_req)) { 2382 tli_err = TSYSERR; unix_err = EINVAL; 2383 goto error; 2384 } 2385 2386 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2387 2388 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2389 (bind->PRIM_type == T_BIND_REQ)); 2390 2391 alen = bind->ADDR_length; 2392 aoff = bind->ADDR_offset; 2393 2394 /* negotiate max conn req pending */ 2395 if (IS_COTS(tep)) { 2396 qlen = bind->CONIND_number; 2397 if (qlen > tl_maxqlen) 2398 qlen = tl_maxqlen; 2399 } 2400 2401 /* 2402 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2403 * and bound again. 2404 */ 2405 if ((tep->te_hash_hndl == NULL) && 2406 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2407 mod_hash_reserve_nosleep(tep->te_addrhash, 2408 &tep->te_hash_hndl) != 0) { 2409 tli_err = TSYSERR; unix_err = ENOSR; 2410 goto error; 2411 } 2412 2413 /* 2414 * Verify address correctness. 2415 */ 2416 if (IS_SOCKET(tep)) { 2417 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2418 2419 if ((alen != TL_SOUX_ADDRLEN) || 2420 (aoff < 0) || 2421 (aoff + alen > msz)) { 2422 (void) (STRLOG(TL_ID, tep->te_minor, 2423 1, SL_TRACE|SL_ERROR, 2424 "tl_bind: invalid socket addr")); 2425 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2426 tli_err = TSYSERR; unix_err = EINVAL; 2427 goto error; 2428 } 2429 /* Copy address from message to local buffer. */ 2430 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2431 /* 2432 * Check that we got correct address from sockets 2433 */ 2434 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2435 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2436 (void) (STRLOG(TL_ID, tep->te_minor, 2437 1, SL_TRACE|SL_ERROR, 2438 "tl_bind: invalid socket magic")); 2439 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2440 tli_err = TSYSERR; unix_err = EINVAL; 2441 goto error; 2442 } 2443 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2444 (ux_addr.soua_vp != NULL)) { 2445 (void) (STRLOG(TL_ID, tep->te_minor, 2446 1, SL_TRACE|SL_ERROR, 2447 "tl_bind: implicit addr non-empty")); 2448 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2449 tli_err = TSYSERR; unix_err = EINVAL; 2450 goto error; 2451 } 2452 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2453 (ux_addr.soua_vp == NULL)) { 2454 (void) (STRLOG(TL_ID, tep->te_minor, 2455 1, SL_TRACE|SL_ERROR, 2456 "tl_bind: explicit addr empty")); 2457 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2458 tli_err = TSYSERR; unix_err = EINVAL; 2459 goto error; 2460 } 2461 } else { 2462 if ((alen > 0) && ((aoff < 0) || 2463 ((ssize_t)(aoff + alen) > msz) || 2464 ((aoff + alen) < 0))) { 2465 (void) (STRLOG(TL_ID, tep->te_minor, 2466 1, SL_TRACE|SL_ERROR, 2467 "tl_bind: invalid message")); 2468 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2469 tli_err = TSYSERR; unix_err = EINVAL; 2470 goto error; 2471 } 2472 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2473 (void) (STRLOG(TL_ID, tep->te_minor, 2474 1, SL_TRACE|SL_ERROR, 2475 "tl_bind: bad addr in message")); 2476 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2477 tli_err = TBADADDR; 2478 goto error; 2479 } 2480 #ifdef DEBUG 2481 /* 2482 * Mild form of ASSERT()ion to detect broken TPI apps. 2483 * if (! assertion) 2484 * log warning; 2485 */ 2486 if (! ((alen == 0 && aoff == 0) || 2487 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2488 (void) (STRLOG(TL_ID, tep->te_minor, 2489 3, SL_TRACE|SL_ERROR, 2490 "tl_bind: addr overlaps TPI message")); 2491 } 2492 #endif 2493 } 2494 2495 /* 2496 * Bind the address provided or allocate one if requested. 2497 * Allow rebinds with a new qlen value. 2498 */ 2499 if (IS_SOCKET(tep)) { 2500 /* 2501 * For anonymous requests the te_ap is already set up properly 2502 * so use minor number as an address. 2503 * For explicit requests need to check whether the address is 2504 * already in use. 2505 */ 2506 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2507 int rc; 2508 2509 if (tep->te_flag & TL_ADDRHASHED) { 2510 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2511 if (tep->te_vp == ux_addr.soua_vp) 2512 goto skip_addr_bind; 2513 else /* Rebind to a new address. */ 2514 tl_addr_unbind(tep); 2515 } 2516 /* 2517 * Insert address in the hash if it is not already 2518 * there. Since we use preallocated handle, the insert 2519 * can fail only if the key is already present. 2520 */ 2521 rc = mod_hash_insert_reserve(tep->te_addrhash, 2522 (mod_hash_key_t)ux_addr.soua_vp, 2523 (mod_hash_val_t)tep, tep->te_hash_hndl); 2524 2525 if (rc != 0) { 2526 ASSERT(rc == MH_ERR_DUPLICATE); 2527 /* 2528 * Violate O_T_BIND_REQ semantics and fail with 2529 * TADDRBUSY - sockets will not use any address 2530 * other than supplied one for explicit binds. 2531 */ 2532 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2533 SL_TRACE|SL_ERROR, 2534 "tl_bind:requested addr %p is busy", 2535 ux_addr.soua_vp)); 2536 tli_err = TADDRBUSY; unix_err = 0; 2537 goto error; 2538 } 2539 tep->te_uxaddr = ux_addr; 2540 tep->te_flag |= TL_ADDRHASHED; 2541 tep->te_hash_hndl = NULL; 2542 } 2543 } else if (alen == 0) { 2544 /* 2545 * assign any free address 2546 */ 2547 if (! tl_get_any_addr(tep, NULL)) { 2548 (void) (STRLOG(TL_ID, tep->te_minor, 2549 1, SL_TRACE|SL_ERROR, 2550 "tl_bind:failed to get buffer for any " 2551 "address")); 2552 tli_err = TSYSERR; unix_err = ENOSR; 2553 goto error; 2554 } 2555 } else { 2556 addr_req.ta_alen = alen; 2557 addr_req.ta_abuf = (mp->b_rptr + aoff); 2558 addr_req.ta_zoneid = tep->te_zoneid; 2559 2560 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2561 if (tep->te_abuf == NULL) { 2562 tli_err = TSYSERR; unix_err = ENOSR; 2563 goto error; 2564 } 2565 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2566 tep->te_alen = alen; 2567 2568 if (mod_hash_insert_reserve(tep->te_addrhash, 2569 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2570 tep->te_hash_hndl) != 0) { 2571 if (save_prim_type == T_BIND_REQ) { 2572 /* 2573 * The bind semantics for this primitive 2574 * require a failure if the exact address 2575 * requested is busy 2576 */ 2577 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2578 SL_TRACE|SL_ERROR, 2579 "tl_bind:requested addr is busy")); 2580 tli_err = TADDRBUSY; unix_err = 0; 2581 goto error; 2582 } 2583 2584 /* 2585 * O_T_BIND_REQ semantics say if address if requested 2586 * address is busy, bind to any available free address 2587 */ 2588 if (! tl_get_any_addr(tep, &addr_req)) { 2589 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2590 SL_TRACE|SL_ERROR, 2591 "tl_bind:unable to get any addr buf")); 2592 tli_err = TSYSERR; unix_err = ENOMEM; 2593 goto error; 2594 } 2595 } else { 2596 tep->te_flag |= TL_ADDRHASHED; 2597 tep->te_hash_hndl = NULL; 2598 } 2599 } 2600 2601 ASSERT(tep->te_alen >= 0); 2602 2603 skip_addr_bind: 2604 /* 2605 * prepare T_BIND_ACK TPI message 2606 */ 2607 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2608 bamp = reallocb(mp, basize, 0); 2609 if (bamp == NULL) { 2610 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2611 "tl_wput:tl_bind: allocb failed")); 2612 /* 2613 * roll back state changes 2614 */ 2615 tl_addr_unbind(tep); 2616 tep->te_state = TS_UNBND; 2617 tl_memrecover(wq, mp, basize); 2618 return; 2619 } 2620 2621 DB_TYPE(bamp) = M_PCPROTO; 2622 bamp->b_wptr = bamp->b_rptr + basize; 2623 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2624 b_ack->PRIM_type = T_BIND_ACK; 2625 b_ack->CONIND_number = qlen; 2626 b_ack->ADDR_length = tep->te_alen; 2627 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2628 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2629 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2630 2631 if (IS_COTS(tep)) { 2632 tep->te_qlen = qlen; 2633 if (qlen > 0) 2634 tep->te_flag |= TL_LISTENER; 2635 } 2636 2637 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2638 /* 2639 * send T_BIND_ACK message 2640 */ 2641 (void) qreply(wq, bamp); 2642 return; 2643 2644 error: 2645 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2646 if (ackmp == NULL) { 2647 /* 2648 * roll back state changes 2649 */ 2650 tep->te_state = save_state; 2651 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2652 return; 2653 } 2654 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2655 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2656 } 2657 2658 /* 2659 * Process T_UNBIND_REQ. 2660 * Called from serializer. 2661 */ 2662 static void 2663 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2664 { 2665 queue_t *wq; 2666 mblk_t *ackmp; 2667 2668 if (tep->te_closing) { 2669 freemsg(mp); 2670 return; 2671 } 2672 2673 wq = tep->te_wq; 2674 2675 /* 2676 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2677 * ==> allocate for T_ERROR_ACK (known max) 2678 */ 2679 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2680 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2681 return; 2682 } 2683 /* 2684 * memory resources committed 2685 * Note: no message validation. T_UNBIND_REQ message is 2686 * same size as PRIM_type field so already verified earlier. 2687 */ 2688 2689 /* 2690 * validate state 2691 */ 2692 if (tep->te_state != TS_IDLE) { 2693 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2694 SL_TRACE|SL_ERROR, 2695 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2696 tep->te_state)); 2697 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2698 return; 2699 } 2700 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2701 2702 /* 2703 * TPI says on T_UNBIND_REQ: 2704 * send up a M_FLUSH to flush both 2705 * read and write queues 2706 */ 2707 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2708 2709 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2710 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2711 2712 /* 2713 * Sockets use bind with qlen==0 followed by bind() to 2714 * the same address with qlen > 0 for listeners. 2715 * We allow rebind with a new qlen value. 2716 */ 2717 tl_addr_unbind(tep); 2718 } 2719 2720 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2721 /* 2722 * send T_OK_ACK 2723 */ 2724 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2725 } 2726 2727 2728 /* 2729 * Option management code from drv/ip is used here 2730 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2731 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2732 * However, that is what we want as that option is 'unorthodox' 2733 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2734 * and not in T_SVR4_OPTMGMT_REQ/ACK 2735 * Note2: use of optcom_req means this routine is an exception to 2736 * recovery from allocb() failures. 2737 */ 2738 2739 static void 2740 tl_optmgmt(queue_t *wq, mblk_t *mp) 2741 { 2742 tl_endpt_t *tep; 2743 mblk_t *ackmp; 2744 union T_primitives *prim; 2745 2746 tep = (tl_endpt_t *)wq->q_ptr; 2747 prim = (union T_primitives *)mp->b_rptr; 2748 2749 /* all states OK for AF_UNIX options ? */ 2750 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2751 prim->type == T_SVR4_OPTMGMT_REQ) { 2752 /* 2753 * Broken TLI semantics that options can only be managed 2754 * in TS_IDLE state. Needed for Sparc ABI test suite that 2755 * tests this TLI (mis)feature using this device driver. 2756 */ 2757 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2758 SL_TRACE|SL_ERROR, 2759 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2760 tep->te_state)); 2761 /* 2762 * preallocate memory for T_ERROR_ACK 2763 */ 2764 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2765 if (! ackmp) { 2766 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2767 return; 2768 } 2769 2770 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2771 freemsg(mp); 2772 return; 2773 } 2774 2775 /* 2776 * call common option management routine from drv/ip 2777 */ 2778 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2779 (void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj, 2780 B_FALSE); 2781 } else { 2782 ASSERT(prim->type == T_OPTMGMT_REQ); 2783 (void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj, 2784 B_FALSE); 2785 } 2786 } 2787 2788 /* 2789 * Handle T_conn_req - the driver part of accept(). 2790 * If TL_SET[U]CRED generate the credentials options. 2791 * If this is a socket pass through options unmodified. 2792 * For sockets generate the T_CONN_CON here instead of 2793 * waiting for the T_CONN_RES. 2794 */ 2795 static void 2796 tl_conn_req(queue_t *wq, mblk_t *mp) 2797 { 2798 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2799 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2800 ssize_t msz = MBLKL(mp); 2801 t_scalar_t alen, aoff, olen, ooff, err = 0; 2802 tl_endpt_t *peer_tep = NULL; 2803 mblk_t *ackmp; 2804 mblk_t *dimp; 2805 struct T_discon_ind *di; 2806 soux_addr_t ux_addr; 2807 tl_addr_t dst; 2808 2809 ASSERT(IS_COTS(tep)); 2810 2811 if (tep->te_closing) { 2812 freemsg(mp); 2813 return; 2814 } 2815 2816 /* 2817 * preallocate memory for: 2818 * 1. max of T_ERROR_ACK and T_OK_ACK 2819 * ==> known max T_ERROR_ACK 2820 * 2. max of T_DISCON_IND and T_CONN_IND 2821 */ 2822 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2823 if (! ackmp) { 2824 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2825 return; 2826 } 2827 /* 2828 * memory committed for T_OK_ACK/T_ERROR_ACK now 2829 * will be committed for T_DISCON_IND/T_CONN_IND later 2830 */ 2831 2832 if (tep->te_state != TS_IDLE) { 2833 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2834 SL_TRACE|SL_ERROR, 2835 "tl_wput:T_CONN_REQ:out of state, state=%d", 2836 tep->te_state)); 2837 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2838 freemsg(mp); 2839 return; 2840 } 2841 2842 /* 2843 * validate the message 2844 * Note: dereference fields in struct inside message only 2845 * after validating the message length. 2846 */ 2847 if (msz < sizeof (struct T_conn_req)) { 2848 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2849 "tl_conn_req:invalid message length")); 2850 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2851 freemsg(mp); 2852 return; 2853 } 2854 alen = creq->DEST_length; 2855 aoff = creq->DEST_offset; 2856 olen = creq->OPT_length; 2857 ooff = creq->OPT_offset; 2858 if (olen == 0) 2859 ooff = 0; 2860 2861 if (IS_SOCKET(tep)) { 2862 if ((alen != TL_SOUX_ADDRLEN) || 2863 (aoff < 0) || 2864 (aoff + alen > msz) || 2865 (alen > msz - sizeof (struct T_conn_req))) { 2866 (void) (STRLOG(TL_ID, tep->te_minor, 2867 1, SL_TRACE|SL_ERROR, 2868 "tl_conn_req: invalid socket addr")); 2869 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2870 freemsg(mp); 2871 return; 2872 } 2873 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2874 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2875 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2876 (void) (STRLOG(TL_ID, tep->te_minor, 2877 1, SL_TRACE|SL_ERROR, 2878 "tl_conn_req: invalid socket magic")); 2879 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2880 freemsg(mp); 2881 return; 2882 } 2883 } else { 2884 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2885 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2886 ooff + olen < 0)) || 2887 olen < 0 || ooff < 0) { 2888 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2889 SL_TRACE|SL_ERROR, 2890 "tl_conn_req:invalid message")); 2891 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2892 freemsg(mp); 2893 return; 2894 } 2895 2896 if (alen <= 0 || aoff < 0 || 2897 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2898 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2899 SL_TRACE|SL_ERROR, 2900 "tl_conn_req:bad addr in message, " 2901 "alen=%d, msz=%ld", 2902 alen, msz)); 2903 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2904 freemsg(mp); 2905 return; 2906 } 2907 #ifdef DEBUG 2908 /* 2909 * Mild form of ASSERT()ion to detect broken TPI apps. 2910 * if (! assertion) 2911 * log warning; 2912 */ 2913 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2914 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2915 SL_TRACE|SL_ERROR, 2916 "tl_conn_req: addr overlaps TPI message")); 2917 } 2918 #endif 2919 if (olen) { 2920 /* 2921 * no opts in connect req 2922 * supported in this provider except for sockets. 2923 */ 2924 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2925 SL_TRACE|SL_ERROR, 2926 "tl_conn_req:options not supported " 2927 "in message")); 2928 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2929 freemsg(mp); 2930 return; 2931 } 2932 } 2933 2934 /* 2935 * Prevent tep from closing on us. 2936 */ 2937 if (! tl_noclose(tep)) { 2938 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2939 "tl_conn_req:endpoint is closing")); 2940 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2941 freemsg(mp); 2942 return; 2943 } 2944 2945 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2946 /* 2947 * get endpoint to connect to 2948 * check that peer with DEST addr is bound to addr 2949 * and has CONIND_number > 0 2950 */ 2951 dst.ta_alen = alen; 2952 dst.ta_abuf = mp->b_rptr + aoff; 2953 dst.ta_zoneid = tep->te_zoneid; 2954 2955 /* 2956 * Verify if remote addr is in use 2957 */ 2958 peer_tep = (IS_SOCKET(tep) ? 2959 tl_sock_find_peer(tep, &ux_addr) : 2960 tl_find_peer(tep, &dst)); 2961 2962 if (peer_tep == NULL) { 2963 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2964 "tl_conn_req:no one at connect address")); 2965 err = ECONNREFUSED; 2966 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2967 /* 2968 * validate that number of incoming connection is 2969 * not to capacity on destination endpoint 2970 */ 2971 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2972 "tl_conn_req: qlen overflow connection refused")); 2973 err = ECONNREFUSED; 2974 } 2975 2976 /* 2977 * Send T_DISCON_IND in case of error 2978 */ 2979 if (err != 0) { 2980 if (peer_tep != NULL) 2981 tl_refrele(peer_tep); 2982 /* We are still expected to send T_OK_ACK */ 2983 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2984 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2985 tl_closeok(tep); 2986 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2987 M_PROTO, T_DISCON_IND); 2988 if (dimp == NULL) { 2989 tl_merror(wq, NULL, ENOSR); 2990 return; 2991 } 2992 di = (struct T_discon_ind *)dimp->b_rptr; 2993 di->DISCON_reason = err; 2994 di->SEQ_number = BADSEQNUM; 2995 2996 tep->te_state = TS_IDLE; 2997 /* 2998 * send T_DISCON_IND message 2999 */ 3000 putnext(tep->te_rq, dimp); 3001 return; 3002 } 3003 3004 ASSERT(IS_COTS(peer_tep)); 3005 3006 /* 3007 * Found the listener. At this point processing will continue on 3008 * listener serializer. Close of the endpoint should be blocked while we 3009 * switch serializers. 3010 */ 3011 tl_serializer_refhold(peer_tep->te_ser); 3012 tl_serializer_refrele(tep->te_ser); 3013 tep->te_ser = peer_tep->te_ser; 3014 ASSERT(tep->te_oconp == NULL); 3015 tep->te_oconp = peer_tep; 3016 3017 /* 3018 * It is safe to close now. Close may continue on listener serializer. 3019 */ 3020 tl_closeok(tep); 3021 3022 /* 3023 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3024 * data, so we link mp to ackmp. 3025 */ 3026 ackmp->b_cont = mp; 3027 mp = ackmp; 3028 3029 tl_refhold(tep); 3030 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3031 } 3032 3033 /* 3034 * Finish T_CONN_REQ processing on listener serializer. 3035 */ 3036 static void 3037 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3038 { 3039 queue_t *wq; 3040 tl_endpt_t *peer_tep = tep->te_oconp; 3041 mblk_t *confmp, *cimp, *indmp; 3042 void *opts = NULL; 3043 mblk_t *ackmp = mp; 3044 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3045 struct T_conn_ind *ci; 3046 tl_icon_t *tip; 3047 void *addr_startp; 3048 t_scalar_t olen = creq->OPT_length; 3049 t_scalar_t ooff = creq->OPT_offset; 3050 size_t ci_msz; 3051 size_t size; 3052 3053 if (tep->te_closing) { 3054 TL_UNCONNECT(tep->te_oconp); 3055 tl_serializer_exit(tep); 3056 tl_refrele(tep); 3057 freemsg(mp); 3058 return; 3059 } 3060 3061 wq = tep->te_wq; 3062 tep->te_flag |= TL_EAGER; 3063 3064 /* 3065 * Extract preallocated ackmp from mp. 3066 */ 3067 mp = mp->b_cont; 3068 ackmp->b_cont = NULL; 3069 3070 if (olen == 0) 3071 ooff = 0; 3072 3073 if (peer_tep->te_closing || 3074 !((peer_tep->te_state == TS_IDLE) || 3075 (peer_tep->te_state == TS_WRES_CIND))) { 3076 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3077 "tl_conn_req:peer in bad state (%d)", 3078 peer_tep->te_state)); 3079 TL_UNCONNECT(tep->te_oconp); 3080 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3081 freemsg(ackmp); 3082 tl_serializer_exit(tep); 3083 tl_refrele(tep); 3084 return; 3085 } 3086 3087 /* 3088 * preallocate now for T_DISCON_IND or T_CONN_IND 3089 */ 3090 /* 3091 * calculate length of T_CONN_IND message 3092 */ 3093 if (peer_tep->te_flag & TL_SETCRED) { 3094 ooff = 0; 3095 olen = (t_scalar_t) sizeof (struct opthdr) + 3096 OPTLEN(sizeof (tl_credopt_t)); 3097 /* 1 option only */ 3098 } else if (peer_tep->te_flag & TL_SETUCRED) { 3099 ooff = 0; 3100 olen = (t_scalar_t)sizeof (struct opthdr) + 3101 OPTLEN(ucredsize); 3102 /* 1 option only */ 3103 } 3104 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3105 ci_msz = T_ALIGN(ci_msz) + olen; 3106 size = max(ci_msz, sizeof (struct T_discon_ind)); 3107 3108 /* 3109 * Save options from mp - we'll need them for T_CONN_IND. 3110 */ 3111 if (ooff != 0) { 3112 opts = kmem_alloc(olen, KM_NOSLEEP); 3113 if (opts == NULL) { 3114 /* 3115 * roll back state changes 3116 */ 3117 tep->te_state = TS_IDLE; 3118 tl_memrecover(wq, mp, size); 3119 freemsg(ackmp); 3120 TL_UNCONNECT(tep->te_oconp); 3121 tl_serializer_exit(tep); 3122 tl_refrele(tep); 3123 return; 3124 } 3125 /* Copy options to a temp buffer */ 3126 bcopy(mp->b_rptr + ooff, opts, olen); 3127 } 3128 3129 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3130 /* 3131 * Generate a T_CONN_CON that has the identical address 3132 * (and options) as the T_CONN_REQ. 3133 * NOTE: assumes that the T_conn_req and T_conn_con structures 3134 * are isomorphic. 3135 */ 3136 confmp = copyb(mp); 3137 if (! confmp) { 3138 /* 3139 * roll back state changes 3140 */ 3141 tep->te_state = TS_IDLE; 3142 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3143 freemsg(ackmp); 3144 if (opts != NULL) 3145 kmem_free(opts, olen); 3146 TL_UNCONNECT(tep->te_oconp); 3147 tl_serializer_exit(tep); 3148 tl_refrele(tep); 3149 return; 3150 } 3151 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3152 T_CONN_CON; 3153 } else { 3154 confmp = NULL; 3155 } 3156 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3157 /* 3158 * roll back state changes 3159 */ 3160 tep->te_state = TS_IDLE; 3161 tl_memrecover(wq, mp, size); 3162 freemsg(ackmp); 3163 if (opts != NULL) 3164 kmem_free(opts, olen); 3165 freemsg(confmp); 3166 TL_UNCONNECT(tep->te_oconp); 3167 tl_serializer_exit(tep); 3168 tl_refrele(tep); 3169 return; 3170 } 3171 3172 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3173 if (tip == NULL) { 3174 /* 3175 * roll back state changes 3176 */ 3177 tep->te_state = TS_IDLE; 3178 tl_memrecover(wq, indmp, sizeof (*tip)); 3179 freemsg(ackmp); 3180 if (opts != NULL) 3181 kmem_free(opts, olen); 3182 freemsg(confmp); 3183 TL_UNCONNECT(tep->te_oconp); 3184 tl_serializer_exit(tep); 3185 tl_refrele(tep); 3186 return; 3187 } 3188 tip->ti_mp = NULL; 3189 3190 /* 3191 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3192 * and tl_icon_t cell. 3193 */ 3194 3195 /* 3196 * ack validity of request and send the peer credential in the ACK. 3197 */ 3198 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3199 3200 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3201 confmp != NULL) { 3202 mblk_setcred(confmp, peer_tep->te_credp); 3203 DB_CPID(confmp) = peer_tep->te_cpid; 3204 } 3205 3206 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3207 3208 /* 3209 * prepare message to send T_CONN_IND 3210 */ 3211 /* 3212 * allocate the message - original data blocks retained 3213 * in the returned mblk 3214 */ 3215 cimp = tl_resizemp(indmp, size); 3216 if (! cimp) { 3217 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3218 "tl_conn_req:con_ind:allocb failure")); 3219 tl_merror(wq, indmp, ENOMEM); 3220 TL_UNCONNECT(tep->te_oconp); 3221 tl_serializer_exit(tep); 3222 tl_refrele(tep); 3223 if (opts != NULL) 3224 kmem_free(opts, olen); 3225 freemsg(confmp); 3226 ASSERT(tip->ti_mp == NULL); 3227 kmem_free(tip, sizeof (*tip)); 3228 return; 3229 } 3230 3231 DB_TYPE(cimp) = M_PROTO; 3232 ci = (struct T_conn_ind *)cimp->b_rptr; 3233 ci->PRIM_type = T_CONN_IND; 3234 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3235 ci->SRC_length = tep->te_alen; 3236 ci->SEQ_number = tep->te_seqno; 3237 3238 addr_startp = cimp->b_rptr + ci->SRC_offset; 3239 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3240 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3241 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3242 ci->SRC_length); 3243 ci->OPT_length = olen; /* because only 1 option */ 3244 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3245 DB_CREDDEF(cimp, tep->te_credp), 3246 TLPID(cimp, tep), 3247 peer_tep->te_flag, peer_tep->te_credp); 3248 } else if (ooff != 0) { 3249 /* Copy option from T_CONN_REQ */ 3250 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3251 ci->SRC_length); 3252 ci->OPT_length = olen; 3253 ASSERT(opts != NULL); 3254 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3255 } else { 3256 ci->OPT_offset = 0; 3257 ci->OPT_length = 0; 3258 } 3259 if (opts != NULL) 3260 kmem_free(opts, olen); 3261 3262 /* 3263 * register connection request with server peer 3264 * append to list of incoming connections 3265 * increment references for both peer_tep and tep: peer_tep is placed on 3266 * te_oconp and tep is placed on listeners queue. 3267 */ 3268 tip->ti_tep = tep; 3269 tip->ti_seqno = tep->te_seqno; 3270 list_insert_tail(&peer_tep->te_iconp, tip); 3271 peer_tep->te_nicon++; 3272 3273 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3274 /* 3275 * send the T_CONN_IND message 3276 */ 3277 putnext(peer_tep->te_rq, cimp); 3278 3279 /* 3280 * Send a T_CONN_CON message for sockets. 3281 * Disable the queues until we have reached the correct state! 3282 */ 3283 if (confmp != NULL) { 3284 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3285 noenable(wq); 3286 putnext(tep->te_rq, confmp); 3287 } 3288 /* 3289 * Now we need to increment tep reference because tep is referenced by 3290 * server list of pending connections. We also need to decrement 3291 * reference before exiting serializer. Two operations void each other 3292 * so we don't modify reference at all. 3293 */ 3294 ASSERT(tep->te_refcnt >= 2); 3295 ASSERT(peer_tep->te_refcnt >= 2); 3296 tl_serializer_exit(tep); 3297 } 3298 3299 3300 3301 /* 3302 * Handle T_conn_res on listener stream. Called on listener serializer. 3303 * tl_conn_req has already generated the T_CONN_CON. 3304 * tl_conn_res is called on listener serializer. 3305 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3306 * Switch eager serializer to acceptor's. 3307 * 3308 * If TL_SET[U]CRED generate the credentials options. 3309 * For sockets tl_conn_req has already generated the T_CONN_CON. 3310 */ 3311 static void 3312 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3313 { 3314 queue_t *wq; 3315 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3316 ssize_t msz = MBLKL(mp); 3317 t_scalar_t olen, ooff, err = 0; 3318 t_scalar_t prim = cres->PRIM_type; 3319 uchar_t *addr_startp; 3320 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3321 tl_icon_t *tip; 3322 size_t size; 3323 mblk_t *ackmp, *respmp; 3324 mblk_t *dimp, *ccmp = NULL; 3325 struct T_discon_ind *di; 3326 struct T_conn_con *cc; 3327 boolean_t client_noclose_set = B_FALSE; 3328 boolean_t switch_client_serializer = B_TRUE; 3329 3330 ASSERT(IS_COTS(tep)); 3331 3332 if (tep->te_closing) { 3333 freemsg(mp); 3334 return; 3335 } 3336 3337 wq = tep->te_wq; 3338 3339 /* 3340 * preallocate memory for: 3341 * 1. max of T_ERROR_ACK and T_OK_ACK 3342 * ==> known max T_ERROR_ACK 3343 * 2. max of T_DISCON_IND and T_CONN_CON 3344 */ 3345 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3346 if (! ackmp) { 3347 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3348 return; 3349 } 3350 /* 3351 * memory committed for T_OK_ACK/T_ERROR_ACK now 3352 * will be committed for T_DISCON_IND/T_CONN_CON later 3353 */ 3354 3355 3356 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3357 3358 /* 3359 * validate state 3360 */ 3361 if (tep->te_state != TS_WRES_CIND) { 3362 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3363 SL_TRACE|SL_ERROR, 3364 "tl_wput:T_CONN_RES:out of state, state=%d", 3365 tep->te_state)); 3366 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3367 freemsg(mp); 3368 return; 3369 } 3370 3371 /* 3372 * validate the message 3373 * Note: dereference fields in struct inside message only 3374 * after validating the message length. 3375 */ 3376 if (msz < sizeof (struct T_conn_res)) { 3377 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3378 "tl_conn_res:invalid message length")); 3379 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3380 freemsg(mp); 3381 return; 3382 } 3383 olen = cres->OPT_length; 3384 ooff = cres->OPT_offset; 3385 if (((olen > 0) && ((ooff + olen) > msz))) { 3386 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3387 "tl_conn_res:invalid message")); 3388 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3389 freemsg(mp); 3390 return; 3391 } 3392 if (olen) { 3393 /* 3394 * no opts in connect res 3395 * supported in this provider 3396 */ 3397 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3398 "tl_conn_res:options not supported in message")); 3399 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3400 freemsg(mp); 3401 return; 3402 } 3403 3404 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3405 ASSERT(tep->te_state == TS_WACK_CRES); 3406 3407 if (cres->SEQ_number < TL_MINOR_START && 3408 cres->SEQ_number >= BADSEQNUM) { 3409 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3410 "tl_conn_res:remote endpoint sequence number bad")); 3411 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3412 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3413 freemsg(mp); 3414 return; 3415 } 3416 3417 /* 3418 * find accepting endpoint. Will have extra reference if found. 3419 */ 3420 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3421 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3422 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3423 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3424 "tl_conn_res:bad accepting endpoint")); 3425 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3426 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3427 freemsg(mp); 3428 return; 3429 } 3430 3431 /* 3432 * Prevent acceptor from closing. 3433 */ 3434 if (! tl_noclose(acc_ep)) { 3435 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3436 "tl_conn_res:bad accepting endpoint")); 3437 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3438 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3439 tl_refrele(acc_ep); 3440 freemsg(mp); 3441 return; 3442 } 3443 3444 acc_ep->te_flag |= TL_ACCEPTOR; 3445 3446 /* 3447 * validate that accepting endpoint, if different from listening 3448 * has address bound => state is TS_IDLE 3449 * TROUBLE in XPG4 !!? 3450 */ 3451 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3452 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3453 "tl_conn_res:accepting endpoint has no address bound," 3454 "state=%d", acc_ep->te_state)); 3455 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3456 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3457 freemsg(mp); 3458 tl_closeok(acc_ep); 3459 tl_refrele(acc_ep); 3460 return; 3461 } 3462 3463 /* 3464 * validate if accepting endpt same as listening, then 3465 * no other incoming connection should be on the queue 3466 */ 3467 3468 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3469 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3470 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3471 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3472 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3473 freemsg(mp); 3474 tl_closeok(acc_ep); 3475 tl_refrele(acc_ep); 3476 return; 3477 } 3478 3479 /* 3480 * Mark for deletion, the entry corresponding to client 3481 * on list of pending connections made by the listener 3482 * search list to see if client is one of the 3483 * recorded as a listener. 3484 */ 3485 tip = tl_icon_find(tep, cres->SEQ_number); 3486 if (tip == NULL) { 3487 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3488 "tl_conn_res:no client in listener list")); 3489 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3490 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3491 freemsg(mp); 3492 tl_closeok(acc_ep); 3493 tl_refrele(acc_ep); 3494 return; 3495 } 3496 3497 /* 3498 * If ti_tep is NULL the client has already closed. In this case 3499 * the code below will avoid any action on the client side 3500 * but complete the server and acceptor state transitions. 3501 */ 3502 ASSERT(tip->ti_tep == NULL || 3503 tip->ti_tep->te_seqno == cres->SEQ_number); 3504 cl_ep = tip->ti_tep; 3505 3506 /* 3507 * If the client is present it is switched from listener's to acceptor's 3508 * serializer. We should block client closes while serializers are 3509 * being switched. 3510 * 3511 * It is possible that the client is present but is currently being 3512 * closed. There are two possible cases: 3513 * 3514 * 1) The client has already entered tl_close_finish_ser() and sent 3515 * T_ORDREL_IND. In this case we can just ignore the client (but we 3516 * still need to send all messages from tip->ti_mp to the acceptor). 3517 * 3518 * 2) The client started the close but has not entered 3519 * tl_close_finish_ser() yet. In this case, the client is already 3520 * proceeding asynchronously on the listener's serializer, so we're 3521 * forced to change the acceptor to use the listener's serializer to 3522 * ensure that any operations on the acceptor are serialized with 3523 * respect to the close that's in-progress. 3524 */ 3525 if (cl_ep != NULL) { 3526 if (tl_noclose(cl_ep)) { 3527 client_noclose_set = B_TRUE; 3528 } else { 3529 /* 3530 * Client is closing. If it it has sent the 3531 * T_ORDREL_IND, we can simply ignore it - otherwise, 3532 * we have to let let the client continue until it is 3533 * sent. 3534 * 3535 * If we do continue using the client, acceptor will 3536 * switch to client's serializer which is used by client 3537 * for its close. 3538 */ 3539 tl_client_closing_when_accepting++; 3540 switch_client_serializer = B_FALSE; 3541 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3542 cl_ep->te_state == -1) 3543 cl_ep = NULL; 3544 } 3545 } 3546 3547 if (cl_ep != NULL) { 3548 /* 3549 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3550 * (latter for sockets only) 3551 */ 3552 if (cl_ep->te_state != TS_WCON_CREQ && 3553 (cl_ep->te_state != TS_DATA_XFER && 3554 IS_SOCKET(cl_ep))) { 3555 err = ECONNREFUSED; 3556 /* 3557 * T_DISCON_IND sent later after committing memory 3558 * and acking validity of request 3559 */ 3560 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3561 "tl_conn_res:peer in bad state")); 3562 } 3563 3564 /* 3565 * preallocate now for T_DISCON_IND or T_CONN_CONN 3566 * ack validity of request (T_OK_ACK) after memory committed 3567 */ 3568 3569 if (err) 3570 size = sizeof (struct T_discon_ind); 3571 else { 3572 /* 3573 * calculate length of T_CONN_CON message 3574 */ 3575 olen = 0; 3576 if (cl_ep->te_flag & TL_SETCRED) { 3577 olen = (t_scalar_t)sizeof (struct opthdr) + 3578 OPTLEN(sizeof (tl_credopt_t)); 3579 } else if (cl_ep->te_flag & TL_SETUCRED) { 3580 olen = (t_scalar_t)sizeof (struct opthdr) + 3581 OPTLEN(ucredsize); 3582 } 3583 size = T_ALIGN(sizeof (struct T_conn_con) + 3584 acc_ep->te_alen) + olen; 3585 } 3586 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3587 /* 3588 * roll back state changes 3589 */ 3590 tep->te_state = TS_WRES_CIND; 3591 tl_memrecover(wq, mp, size); 3592 freemsg(ackmp); 3593 if (client_noclose_set) 3594 tl_closeok(cl_ep); 3595 tl_closeok(acc_ep); 3596 tl_refrele(acc_ep); 3597 return; 3598 } 3599 mp = NULL; 3600 } 3601 3602 /* 3603 * Now ack validity of request 3604 */ 3605 if (tep->te_nicon == 1) { 3606 if (tep == acc_ep) 3607 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3608 else 3609 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3610 } else 3611 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3612 3613 /* 3614 * send T_DISCON_IND now if client state validation failed earlier 3615 */ 3616 if (err) { 3617 tl_ok_ack(wq, ackmp, prim); 3618 /* 3619 * flush the queues - why always ? 3620 */ 3621 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3622 3623 dimp = tl_resizemp(respmp, size); 3624 if (! dimp) { 3625 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3626 SL_TRACE|SL_ERROR, 3627 "tl_conn_res:con_ind:allocb failure")); 3628 tl_merror(wq, respmp, ENOMEM); 3629 tl_closeok(acc_ep); 3630 if (client_noclose_set) 3631 tl_closeok(cl_ep); 3632 tl_refrele(acc_ep); 3633 return; 3634 } 3635 if (dimp->b_cont) { 3636 /* no user data in provider generated discon ind */ 3637 freemsg(dimp->b_cont); 3638 dimp->b_cont = NULL; 3639 } 3640 3641 DB_TYPE(dimp) = M_PROTO; 3642 di = (struct T_discon_ind *)dimp->b_rptr; 3643 di->PRIM_type = T_DISCON_IND; 3644 di->DISCON_reason = err; 3645 di->SEQ_number = BADSEQNUM; 3646 3647 tep->te_state = TS_IDLE; 3648 /* 3649 * send T_DISCON_IND message 3650 */ 3651 putnext(acc_ep->te_rq, dimp); 3652 if (client_noclose_set) 3653 tl_closeok(cl_ep); 3654 tl_closeok(acc_ep); 3655 tl_refrele(acc_ep); 3656 return; 3657 } 3658 3659 /* 3660 * now start connecting the accepting endpoint 3661 */ 3662 if (tep != acc_ep) 3663 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3664 3665 if (cl_ep == NULL) { 3666 /* 3667 * The client has already closed. Send up any queued messages 3668 * and change the state accordingly. 3669 */ 3670 tl_ok_ack(wq, ackmp, prim); 3671 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3672 3673 /* 3674 * remove endpoint from incoming connection 3675 * delete client from list of incoming connections 3676 */ 3677 tl_freetip(tep, tip); 3678 freemsg(mp); 3679 tl_closeok(acc_ep); 3680 tl_refrele(acc_ep); 3681 return; 3682 } else if (tip->ti_mp != NULL) { 3683 /* 3684 * The client could have queued a T_DISCON_IND which needs 3685 * to be sent up. 3686 * Note that t_discon_req can not operate the same as 3687 * t_data_req since it is not possible for it to putbq 3688 * the message and return -1 due to the use of qwriter. 3689 */ 3690 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3691 } 3692 3693 /* 3694 * prepare connect confirm T_CONN_CON message 3695 */ 3696 3697 /* 3698 * allocate the message - original data blocks 3699 * retained in the returned mblk 3700 */ 3701 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3702 ccmp = tl_resizemp(respmp, size); 3703 if (ccmp == NULL) { 3704 tl_ok_ack(wq, ackmp, prim); 3705 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3706 SL_TRACE|SL_ERROR, 3707 "tl_conn_res:conn_con:allocb failure")); 3708 tl_merror(wq, respmp, ENOMEM); 3709 tl_closeok(acc_ep); 3710 if (client_noclose_set) 3711 tl_closeok(cl_ep); 3712 tl_refrele(acc_ep); 3713 return; 3714 } 3715 3716 DB_TYPE(ccmp) = M_PROTO; 3717 cc = (struct T_conn_con *)ccmp->b_rptr; 3718 cc->PRIM_type = T_CONN_CON; 3719 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3720 cc->RES_length = acc_ep->te_alen; 3721 addr_startp = ccmp->b_rptr + cc->RES_offset; 3722 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3723 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3724 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3725 cc->RES_length); 3726 cc->OPT_length = olen; 3727 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3728 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3729 cl_ep->te_credp); 3730 } else { 3731 cc->OPT_offset = 0; 3732 cc->OPT_length = 0; 3733 } 3734 /* 3735 * Forward the credential in the packet so it can be picked up 3736 * at the higher layers for more complete credential processing 3737 */ 3738 mblk_setcred(ccmp, acc_ep->te_credp); 3739 DB_CPID(ccmp) = acc_ep->te_cpid; 3740 } else { 3741 freemsg(respmp); 3742 respmp = NULL; 3743 } 3744 3745 /* 3746 * make connection linking 3747 * accepting and client endpoints 3748 * No need to increment references: 3749 * on client: it should already have one from tip->ti_tep linkage. 3750 * on acceptor is should already have one from the table lookup. 3751 * 3752 * At this point both client and acceptor can't close. Set client 3753 * serializer to acceptor's. 3754 */ 3755 ASSERT(cl_ep->te_refcnt >= 2); 3756 ASSERT(acc_ep->te_refcnt >= 2); 3757 ASSERT(cl_ep->te_conp == NULL); 3758 ASSERT(acc_ep->te_conp == NULL); 3759 cl_ep->te_conp = acc_ep; 3760 acc_ep->te_conp = cl_ep; 3761 ASSERT(cl_ep->te_ser == tep->te_ser); 3762 if (switch_client_serializer) { 3763 mutex_enter(&cl_ep->te_ser_lock); 3764 if (cl_ep->te_ser_count > 0) { 3765 switch_client_serializer = B_FALSE; 3766 tl_serializer_noswitch++; 3767 } else { 3768 /* 3769 * Move client to the acceptor's serializer. 3770 */ 3771 tl_serializer_refhold(acc_ep->te_ser); 3772 tl_serializer_refrele(cl_ep->te_ser); 3773 cl_ep->te_ser = acc_ep->te_ser; 3774 } 3775 mutex_exit(&cl_ep->te_ser_lock); 3776 } 3777 if (!switch_client_serializer) { 3778 /* 3779 * It is not possible to switch client to use acceptor's. 3780 * Move acceptor to client's serializer (which is the same as 3781 * listener's). 3782 */ 3783 tl_serializer_refhold(cl_ep->te_ser); 3784 tl_serializer_refrele(acc_ep->te_ser); 3785 acc_ep->te_ser = cl_ep->te_ser; 3786 } 3787 3788 TL_REMOVE_PEER(cl_ep->te_oconp); 3789 TL_REMOVE_PEER(acc_ep->te_oconp); 3790 3791 /* 3792 * remove endpoint from incoming connection 3793 * delete client from list of incoming connections 3794 */ 3795 tip->ti_tep = NULL; 3796 tl_freetip(tep, tip); 3797 tl_ok_ack(wq, ackmp, prim); 3798 3799 /* 3800 * data blocks already linked in reallocb() 3801 */ 3802 3803 /* 3804 * link queues so that I_SENDFD will work 3805 */ 3806 if (! IS_SOCKET(tep)) { 3807 acc_ep->te_wq->q_next = cl_ep->te_rq; 3808 cl_ep->te_wq->q_next = acc_ep->te_rq; 3809 } 3810 3811 /* 3812 * send T_CONN_CON up on client side unless it was already 3813 * done (for a socket). In cases any data or ordrel req has been 3814 * queued make sure that the service procedure runs. 3815 */ 3816 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3817 enableok(cl_ep->te_wq); 3818 TL_QENABLE(cl_ep); 3819 if (ccmp != NULL) 3820 freemsg(ccmp); 3821 } else { 3822 /* 3823 * change client state on TE_CONN_CON event 3824 */ 3825 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3826 putnext(cl_ep->te_rq, ccmp); 3827 } 3828 3829 /* Mark the both endpoints as accepted */ 3830 cl_ep->te_flag |= TL_ACCEPTED; 3831 acc_ep->te_flag |= TL_ACCEPTED; 3832 3833 /* 3834 * Allow client and acceptor to close. 3835 */ 3836 tl_closeok(acc_ep); 3837 if (client_noclose_set) 3838 tl_closeok(cl_ep); 3839 } 3840 3841 3842 3843 3844 static void 3845 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3846 { 3847 queue_t *wq; 3848 struct T_discon_req *dr; 3849 ssize_t msz; 3850 tl_endpt_t *peer_tep = tep->te_conp; 3851 tl_endpt_t *srv_tep = tep->te_oconp; 3852 tl_icon_t *tip; 3853 size_t size; 3854 mblk_t *ackmp, *dimp, *respmp; 3855 struct T_discon_ind *di; 3856 t_scalar_t save_state, new_state; 3857 3858 if (tep->te_closing) { 3859 freemsg(mp); 3860 return; 3861 } 3862 3863 if ((peer_tep != NULL) && peer_tep->te_closing) { 3864 TL_UNCONNECT(tep->te_conp); 3865 peer_tep = NULL; 3866 } 3867 if ((srv_tep != NULL) && srv_tep->te_closing) { 3868 TL_UNCONNECT(tep->te_oconp); 3869 srv_tep = NULL; 3870 } 3871 3872 wq = tep->te_wq; 3873 3874 /* 3875 * preallocate memory for: 3876 * 1. max of T_ERROR_ACK and T_OK_ACK 3877 * ==> known max T_ERROR_ACK 3878 * 2. for T_DISCON_IND 3879 */ 3880 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3881 if (! ackmp) { 3882 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3883 return; 3884 } 3885 /* 3886 * memory committed for T_OK_ACK/T_ERROR_ACK now 3887 * will be committed for T_DISCON_IND later 3888 */ 3889 3890 dr = (struct T_discon_req *)mp->b_rptr; 3891 msz = MBLKL(mp); 3892 3893 /* 3894 * validate the state 3895 */ 3896 save_state = new_state = tep->te_state; 3897 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3898 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3899 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3900 SL_TRACE|SL_ERROR, 3901 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3902 tep->te_state)); 3903 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3904 freemsg(mp); 3905 return; 3906 } 3907 /* 3908 * Defer committing the state change until it is determined if 3909 * the message will be queued with the tl_icon or not. 3910 */ 3911 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3912 3913 /* validate the message */ 3914 if (msz < sizeof (struct T_discon_req)) { 3915 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3916 "tl_discon_req:invalid message")); 3917 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3918 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3919 freemsg(mp); 3920 return; 3921 } 3922 3923 /* 3924 * if server, then validate that client exists 3925 * by connection sequence number etc. 3926 */ 3927 if (tep->te_nicon > 0) { /* server */ 3928 3929 /* 3930 * search server list for disconnect client 3931 */ 3932 tip = tl_icon_find(tep, dr->SEQ_number); 3933 if (tip == NULL) { 3934 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3935 SL_TRACE|SL_ERROR, 3936 "tl_discon_req:no disconnect endpoint")); 3937 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3938 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3939 freemsg(mp); 3940 return; 3941 } 3942 /* 3943 * If ti_tep is NULL the client has already closed. In this case 3944 * the code below will avoid any action on the client side. 3945 */ 3946 3947 ASSERT(IMPLY(tip->ti_tep != NULL, 3948 tip->ti_tep->te_seqno == dr->SEQ_number)); 3949 peer_tep = tip->ti_tep; 3950 } 3951 3952 /* 3953 * preallocate now for T_DISCON_IND 3954 * ack validity of request (T_OK_ACK) after memory committed 3955 */ 3956 size = sizeof (struct T_discon_ind); 3957 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3958 tl_memrecover(wq, mp, size); 3959 freemsg(ackmp); 3960 return; 3961 } 3962 3963 /* 3964 * prepare message to ack validity of request 3965 */ 3966 if (tep->te_nicon == 0) 3967 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3968 else 3969 if (tep->te_nicon == 1) 3970 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3971 else 3972 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3973 3974 /* 3975 * Flushing queues according to TPI. Using the old state. 3976 */ 3977 if ((tep->te_nicon <= 1) && 3978 ((save_state == TS_DATA_XFER) || 3979 (save_state == TS_WIND_ORDREL) || 3980 (save_state == TS_WREQ_ORDREL))) 3981 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3982 3983 /* send T_OK_ACK up */ 3984 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3985 3986 /* 3987 * now do disconnect business 3988 */ 3989 if (tep->te_nicon > 0) { /* listener */ 3990 if (peer_tep != NULL && !peer_tep->te_closing) { 3991 /* 3992 * disconnect incoming connect request pending to tep 3993 */ 3994 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 3995 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3996 SL_TRACE|SL_ERROR, 3997 "tl_discon_req: reallocb failed")); 3998 tep->te_state = new_state; 3999 tl_merror(wq, respmp, ENOMEM); 4000 return; 4001 } 4002 di = (struct T_discon_ind *)dimp->b_rptr; 4003 di->SEQ_number = BADSEQNUM; 4004 save_state = peer_tep->te_state; 4005 peer_tep->te_state = TS_IDLE; 4006 4007 TL_REMOVE_PEER(peer_tep->te_oconp); 4008 enableok(peer_tep->te_wq); 4009 TL_QENABLE(peer_tep); 4010 } else { 4011 freemsg(respmp); 4012 dimp = NULL; 4013 } 4014 4015 /* 4016 * remove endpoint from incoming connection list 4017 * - remove disconnect client from list on server 4018 */ 4019 tl_freetip(tep, tip); 4020 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4021 /* 4022 * disconnect an outgoing request pending from tep 4023 */ 4024 4025 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4026 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4027 SL_TRACE|SL_ERROR, 4028 "tl_discon_req: reallocb failed")); 4029 tep->te_state = new_state; 4030 tl_merror(wq, respmp, ENOMEM); 4031 return; 4032 } 4033 di = (struct T_discon_ind *)dimp->b_rptr; 4034 DB_TYPE(dimp) = M_PROTO; 4035 di->PRIM_type = T_DISCON_IND; 4036 di->DISCON_reason = ECONNRESET; 4037 di->SEQ_number = tep->te_seqno; 4038 4039 /* 4040 * If this is a socket the T_DISCON_IND is queued with 4041 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4042 * from the list of pending connections. 4043 * Note that when te_oconp is set the peer better have 4044 * a t_connind_t for the client. 4045 */ 4046 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4047 /* 4048 * No need to check that 4049 * ti_tep == NULL since the T_DISCON_IND 4050 * takes precedence over other queued 4051 * messages. 4052 */ 4053 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4054 peer_tep = NULL; 4055 dimp = NULL; 4056 /* 4057 * Can't clear te_oconp since tl_co_unconnect needs 4058 * it as a hint not to free the tep. 4059 * Keep the state unchanged since tl_conn_res inspects 4060 * it. 4061 */ 4062 new_state = tep->te_state; 4063 } else { 4064 /* Found - delete it */ 4065 tip = tl_icon_find(peer_tep, tep->te_seqno); 4066 if (tip != NULL) { 4067 ASSERT(tep == tip->ti_tep); 4068 save_state = peer_tep->te_state; 4069 if (peer_tep->te_nicon == 1) 4070 peer_tep->te_state = 4071 NEXTSTATE(TE_DISCON_IND2, 4072 peer_tep->te_state); 4073 else 4074 peer_tep->te_state = 4075 NEXTSTATE(TE_DISCON_IND3, 4076 peer_tep->te_state); 4077 tl_freetip(peer_tep, tip); 4078 } 4079 ASSERT(tep->te_oconp != NULL); 4080 TL_UNCONNECT(tep->te_oconp); 4081 } 4082 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4083 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4084 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4085 SL_TRACE|SL_ERROR, 4086 "tl_discon_req: reallocb failed")); 4087 tep->te_state = new_state; 4088 tl_merror(wq, respmp, ENOMEM); 4089 return; 4090 } 4091 di = (struct T_discon_ind *)dimp->b_rptr; 4092 di->SEQ_number = BADSEQNUM; 4093 4094 save_state = peer_tep->te_state; 4095 peer_tep->te_state = TS_IDLE; 4096 } else { 4097 /* Not connected */ 4098 tep->te_state = new_state; 4099 freemsg(respmp); 4100 return; 4101 } 4102 4103 /* Commit state changes */ 4104 tep->te_state = new_state; 4105 4106 if (peer_tep == NULL) { 4107 ASSERT(dimp == NULL); 4108 goto done; 4109 } 4110 /* 4111 * Flush queues on peer before sending up 4112 * T_DISCON_IND according to TPI 4113 */ 4114 4115 if ((save_state == TS_DATA_XFER) || 4116 (save_state == TS_WIND_ORDREL) || 4117 (save_state == TS_WREQ_ORDREL)) 4118 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4119 4120 DB_TYPE(dimp) = M_PROTO; 4121 di->PRIM_type = T_DISCON_IND; 4122 di->DISCON_reason = ECONNRESET; 4123 4124 /* 4125 * data blocks already linked into dimp by reallocb() 4126 */ 4127 /* 4128 * send indication message to peer user module 4129 */ 4130 ASSERT(dimp != NULL); 4131 putnext(peer_tep->te_rq, dimp); 4132 done: 4133 if (tep->te_conp) { /* disconnect pointers if connected */ 4134 ASSERT(! peer_tep->te_closing); 4135 4136 /* 4137 * Messages may be queued on peer's write queue 4138 * waiting to be processed by its write service 4139 * procedure. Before the pointer to the peer transport 4140 * structure is set to NULL, qenable the peer's write 4141 * queue so that the queued up messages are processed. 4142 */ 4143 if ((save_state == TS_DATA_XFER) || 4144 (save_state == TS_WIND_ORDREL) || 4145 (save_state == TS_WREQ_ORDREL)) 4146 TL_QENABLE(peer_tep); 4147 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4148 TL_UNCONNECT(peer_tep->te_conp); 4149 if (! IS_SOCKET(tep)) { 4150 /* 4151 * unlink the streams 4152 */ 4153 tep->te_wq->q_next = NULL; 4154 peer_tep->te_wq->q_next = NULL; 4155 } 4156 TL_UNCONNECT(tep->te_conp); 4157 } 4158 } 4159 4160 4161 static void 4162 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4163 { 4164 queue_t *wq; 4165 size_t ack_sz; 4166 mblk_t *ackmp; 4167 struct T_addr_ack *taa; 4168 4169 if (tep->te_closing) { 4170 freemsg(mp); 4171 return; 4172 } 4173 4174 wq = tep->te_wq; 4175 4176 /* 4177 * Note: T_ADDR_REQ message has only PRIM_type field 4178 * so it is already validated earlier. 4179 */ 4180 4181 if (IS_CLTS(tep) || 4182 (tep->te_state > TS_WREQ_ORDREL) || 4183 (tep->te_state < TS_DATA_XFER)) { 4184 /* 4185 * Either connectionless or connection oriented but not 4186 * in connected data transfer state or half-closed states. 4187 */ 4188 ack_sz = sizeof (struct T_addr_ack); 4189 if (tep->te_state >= TS_IDLE) 4190 /* is bound */ 4191 ack_sz += tep->te_alen; 4192 ackmp = reallocb(mp, ack_sz, 0); 4193 if (ackmp == NULL) { 4194 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4195 SL_TRACE|SL_ERROR, 4196 "tl_addr_req: reallocb failed")); 4197 tl_memrecover(wq, mp, ack_sz); 4198 return; 4199 } 4200 4201 taa = (struct T_addr_ack *)ackmp->b_rptr; 4202 4203 bzero(taa, sizeof (struct T_addr_ack)); 4204 4205 taa->PRIM_type = T_ADDR_ACK; 4206 ackmp->b_datap->db_type = M_PCPROTO; 4207 ackmp->b_wptr = (uchar_t *)&taa[1]; 4208 4209 if (tep->te_state >= TS_IDLE) { 4210 /* endpoint is bound */ 4211 taa->LOCADDR_length = tep->te_alen; 4212 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4213 4214 bcopy(tep->te_abuf, ackmp->b_wptr, 4215 tep->te_alen); 4216 ackmp->b_wptr += tep->te_alen; 4217 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4218 } 4219 4220 (void) qreply(wq, ackmp); 4221 } else { 4222 ASSERT(tep->te_state == TS_DATA_XFER || 4223 tep->te_state == TS_WIND_ORDREL || 4224 tep->te_state == TS_WREQ_ORDREL); 4225 /* connection oriented in data transfer */ 4226 tl_connected_cots_addr_req(mp, tep); 4227 } 4228 } 4229 4230 4231 static void 4232 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4233 { 4234 tl_endpt_t *peer_tep; 4235 size_t ack_sz; 4236 mblk_t *ackmp; 4237 struct T_addr_ack *taa; 4238 uchar_t *addr_startp; 4239 4240 if (tep->te_closing) { 4241 freemsg(mp); 4242 return; 4243 } 4244 4245 ASSERT(tep->te_state >= TS_IDLE); 4246 4247 ack_sz = sizeof (struct T_addr_ack); 4248 ack_sz += T_ALIGN(tep->te_alen); 4249 peer_tep = tep->te_conp; 4250 ack_sz += peer_tep->te_alen; 4251 4252 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4253 if (ackmp == NULL) { 4254 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4255 "tl_connected_cots_addr_req: reallocb failed")); 4256 tl_memrecover(tep->te_wq, mp, ack_sz); 4257 return; 4258 } 4259 4260 taa = (struct T_addr_ack *)ackmp->b_rptr; 4261 4262 /* endpoint is bound */ 4263 taa->LOCADDR_length = tep->te_alen; 4264 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4265 4266 addr_startp = (uchar_t *)&taa[1]; 4267 4268 bcopy(tep->te_abuf, addr_startp, 4269 tep->te_alen); 4270 4271 taa->REMADDR_length = peer_tep->te_alen; 4272 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4273 taa->LOCADDR_length); 4274 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4275 bcopy(peer_tep->te_abuf, addr_startp, 4276 peer_tep->te_alen); 4277 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4278 taa->REMADDR_offset + peer_tep->te_alen; 4279 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4280 4281 putnext(tep->te_rq, ackmp); 4282 } 4283 4284 static void 4285 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4286 { 4287 if (IS_CLTS(tep)) { 4288 *ia = tl_clts_info_ack; 4289 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4290 } else { 4291 *ia = tl_cots_info_ack; 4292 if (IS_COTSORD(tep)) 4293 ia->SERV_type = T_COTS_ORD; 4294 } 4295 ia->TIDU_size = tl_tidusz; 4296 ia->CURRENT_state = tep->te_state; 4297 } 4298 4299 /* 4300 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4301 * tl_wput. 4302 */ 4303 static void 4304 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4305 { 4306 mblk_t *ackmp; 4307 t_uscalar_t cap_bits1; 4308 struct T_capability_ack *tcap; 4309 4310 if (tep->te_closing) { 4311 freemsg(mp); 4312 return; 4313 } 4314 4315 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4316 4317 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4318 M_PCPROTO, T_CAPABILITY_ACK); 4319 if (ackmp == NULL) { 4320 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4321 "tl_capability_req: reallocb failed")); 4322 tl_memrecover(tep->te_wq, mp, 4323 sizeof (struct T_capability_ack)); 4324 return; 4325 } 4326 4327 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4328 tcap->CAP_bits1 = 0; 4329 4330 if (cap_bits1 & TC1_INFO) { 4331 tl_copy_info(&tcap->INFO_ack, tep); 4332 tcap->CAP_bits1 |= TC1_INFO; 4333 } 4334 4335 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4336 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4337 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4338 } 4339 4340 putnext(tep->te_rq, ackmp); 4341 } 4342 4343 static void 4344 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4345 { 4346 if (! tep->te_closing) 4347 tl_info_req(mp, tep); 4348 else 4349 freemsg(mp); 4350 4351 tl_serializer_exit(tep); 4352 tl_refrele(tep); 4353 } 4354 4355 static void 4356 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4357 { 4358 mblk_t *ackmp; 4359 4360 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4361 M_PCPROTO, T_INFO_ACK); 4362 if (ackmp == NULL) { 4363 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4364 "tl_info_req: reallocb failed")); 4365 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4366 return; 4367 } 4368 4369 /* 4370 * fill in T_INFO_ACK contents 4371 */ 4372 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4373 4374 /* 4375 * send ack message 4376 */ 4377 putnext(tep->te_rq, ackmp); 4378 } 4379 4380 /* 4381 * Handle M_DATA, T_data_req and T_optdata_req. 4382 * If this is a socket pass through T_optdata_req options unmodified. 4383 */ 4384 static void 4385 tl_data(mblk_t *mp, tl_endpt_t *tep) 4386 { 4387 queue_t *wq = tep->te_wq; 4388 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4389 ssize_t msz = MBLKL(mp); 4390 tl_endpt_t *peer_tep; 4391 queue_t *peer_rq; 4392 boolean_t closing = tep->te_closing; 4393 4394 if (IS_CLTS(tep)) { 4395 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4396 SL_TRACE|SL_ERROR, 4397 "tl_wput:clts:unattached M_DATA")); 4398 if (!closing) { 4399 tl_merror(wq, mp, EPROTO); 4400 } else { 4401 freemsg(mp); 4402 } 4403 return; 4404 } 4405 4406 /* 4407 * If the endpoint is closing it should still forward any data to the 4408 * peer (if it has one). If it is not allowed to forward it can just 4409 * free the message. 4410 */ 4411 if (closing && 4412 (tep->te_state != TS_DATA_XFER) && 4413 (tep->te_state != TS_WREQ_ORDREL)) { 4414 freemsg(mp); 4415 return; 4416 } 4417 4418 if (DB_TYPE(mp) == M_PROTO) { 4419 if (prim->type == T_DATA_REQ && 4420 msz < sizeof (struct T_data_req)) { 4421 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4422 SL_TRACE|SL_ERROR, 4423 "tl_data:T_DATA_REQ:invalid message")); 4424 if (!closing) { 4425 tl_merror(wq, mp, EPROTO); 4426 } else { 4427 freemsg(mp); 4428 } 4429 return; 4430 } else if (prim->type == T_OPTDATA_REQ && 4431 (msz < sizeof (struct T_optdata_req) || 4432 !IS_SOCKET(tep))) { 4433 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4434 SL_TRACE|SL_ERROR, 4435 "tl_data:T_OPTDATA_REQ:invalid message")); 4436 if (!closing) { 4437 tl_merror(wq, mp, EPROTO); 4438 } else { 4439 freemsg(mp); 4440 } 4441 return; 4442 } 4443 } 4444 4445 /* 4446 * connection oriented provider 4447 */ 4448 switch (tep->te_state) { 4449 case TS_IDLE: 4450 /* 4451 * Other end not here - do nothing. 4452 */ 4453 freemsg(mp); 4454 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4455 "tl_data:cots with endpoint idle")); 4456 return; 4457 4458 case TS_DATA_XFER: 4459 /* valid states */ 4460 if (tep->te_conp != NULL) 4461 break; 4462 4463 if (tep->te_oconp == NULL) { 4464 if (!closing) { 4465 tl_merror(wq, mp, EPROTO); 4466 } else { 4467 freemsg(mp); 4468 } 4469 return; 4470 } 4471 /* 4472 * For a socket the T_CONN_CON is sent early thus 4473 * the peer might not yet have accepted the connection. 4474 * If we are closing queue the packet with the T_CONN_IND. 4475 * Otherwise defer processing the packet until the peer 4476 * accepts the connection. 4477 * Note that the queue is noenabled when we go into this 4478 * state. 4479 */ 4480 if (!closing) { 4481 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4482 SL_TRACE|SL_ERROR, 4483 "tl_data: ocon")); 4484 TL_PUTBQ(tep, mp); 4485 return; 4486 } 4487 if (DB_TYPE(mp) == M_PROTO) { 4488 if (msz < sizeof (t_scalar_t)) { 4489 freemsg(mp); 4490 return; 4491 } 4492 /* reuse message block - just change REQ to IND */ 4493 if (prim->type == T_DATA_REQ) 4494 prim->type = T_DATA_IND; 4495 else 4496 prim->type = T_OPTDATA_IND; 4497 } 4498 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4499 return; 4500 4501 case TS_WREQ_ORDREL: 4502 if (tep->te_conp == NULL) { 4503 /* 4504 * Other end closed - generate discon_ind 4505 * with reason 0 to cause an EPIPE but no 4506 * read side error on AF_UNIX sockets. 4507 */ 4508 freemsg(mp); 4509 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4510 SL_TRACE|SL_ERROR, 4511 "tl_data: WREQ_ORDREL and no peer")); 4512 tl_discon_ind(tep, 0); 4513 return; 4514 } 4515 break; 4516 4517 default: 4518 /* invalid state for event TE_DATA_REQ */ 4519 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4520 "tl_data:cots:out of state")); 4521 tl_merror(wq, mp, EPROTO); 4522 return; 4523 } 4524 /* 4525 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4526 * (State stays same on this event) 4527 */ 4528 4529 /* 4530 * get connected endpoint 4531 */ 4532 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4533 freemsg(mp); 4534 /* Peer closed */ 4535 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4536 "tl_data: peer gone")); 4537 return; 4538 } 4539 4540 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4541 peer_rq = peer_tep->te_rq; 4542 4543 /* 4544 * Put it back if flow controlled 4545 * Note: Messages already on queue when we are closing is bounded 4546 * so we can ignore flow control. 4547 */ 4548 if (!canputnext(peer_rq) && !closing) { 4549 TL_PUTBQ(tep, mp); 4550 return; 4551 } 4552 4553 /* 4554 * validate peer state 4555 */ 4556 switch (peer_tep->te_state) { 4557 case TS_DATA_XFER: 4558 case TS_WIND_ORDREL: 4559 /* valid states */ 4560 break; 4561 default: 4562 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4563 "tl_data:rx side:invalid state")); 4564 tl_merror(peer_tep->te_wq, mp, EPROTO); 4565 return; 4566 } 4567 if (DB_TYPE(mp) == M_PROTO) { 4568 /* reuse message block - just change REQ to IND */ 4569 if (prim->type == T_DATA_REQ) 4570 prim->type = T_DATA_IND; 4571 else 4572 prim->type = T_OPTDATA_IND; 4573 } 4574 /* 4575 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4576 * (peer state stays same on this event) 4577 */ 4578 /* 4579 * send data to connected peer 4580 */ 4581 putnext(peer_rq, mp); 4582 } 4583 4584 4585 4586 static void 4587 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4588 { 4589 queue_t *wq = tep->te_wq; 4590 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4591 ssize_t msz = MBLKL(mp); 4592 tl_endpt_t *peer_tep; 4593 queue_t *peer_rq; 4594 boolean_t closing = tep->te_closing; 4595 4596 if (msz < sizeof (struct T_exdata_req)) { 4597 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4598 "tl_exdata:invalid message")); 4599 if (!closing) { 4600 tl_merror(wq, mp, EPROTO); 4601 } else { 4602 freemsg(mp); 4603 } 4604 return; 4605 } 4606 4607 /* 4608 * If the endpoint is closing it should still forward any data to the 4609 * peer (if it has one). If it is not allowed to forward it can just 4610 * free the message. 4611 */ 4612 if (closing && 4613 (tep->te_state != TS_DATA_XFER) && 4614 (tep->te_state != TS_WREQ_ORDREL)) { 4615 freemsg(mp); 4616 return; 4617 } 4618 4619 /* 4620 * validate state 4621 */ 4622 switch (tep->te_state) { 4623 case TS_IDLE: 4624 /* 4625 * Other end not here - do nothing. 4626 */ 4627 freemsg(mp); 4628 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4629 "tl_exdata:cots with endpoint idle")); 4630 return; 4631 4632 case TS_DATA_XFER: 4633 /* valid states */ 4634 if (tep->te_conp != NULL) 4635 break; 4636 4637 if (tep->te_oconp == NULL) { 4638 if (!closing) { 4639 tl_merror(wq, mp, EPROTO); 4640 } else { 4641 freemsg(mp); 4642 } 4643 return; 4644 } 4645 /* 4646 * For a socket the T_CONN_CON is sent early thus 4647 * the peer might not yet have accepted the connection. 4648 * If we are closing queue the packet with the T_CONN_IND. 4649 * Otherwise defer processing the packet until the peer 4650 * accepts the connection. 4651 * Note that the queue is noenabled when we go into this 4652 * state. 4653 */ 4654 if (!closing) { 4655 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4656 SL_TRACE|SL_ERROR, 4657 "tl_exdata: ocon")); 4658 TL_PUTBQ(tep, mp); 4659 return; 4660 } 4661 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4662 "tl_exdata: closing socket ocon")); 4663 prim->type = T_EXDATA_IND; 4664 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4665 return; 4666 4667 case TS_WREQ_ORDREL: 4668 if (tep->te_conp == NULL) { 4669 /* 4670 * Other end closed - generate discon_ind 4671 * with reason 0 to cause an EPIPE but no 4672 * read side error on AF_UNIX sockets. 4673 */ 4674 freemsg(mp); 4675 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4676 SL_TRACE|SL_ERROR, 4677 "tl_exdata: WREQ_ORDREL and no peer")); 4678 tl_discon_ind(tep, 0); 4679 return; 4680 } 4681 break; 4682 4683 default: 4684 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4685 SL_TRACE|SL_ERROR, 4686 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4687 tep->te_state)); 4688 tl_merror(wq, mp, EPROTO); 4689 return; 4690 } 4691 /* 4692 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4693 * (state stays same on this event) 4694 */ 4695 4696 /* 4697 * get connected endpoint 4698 */ 4699 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4700 freemsg(mp); 4701 /* Peer closed */ 4702 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4703 "tl_exdata: peer gone")); 4704 return; 4705 } 4706 4707 peer_rq = peer_tep->te_rq; 4708 4709 /* 4710 * Put it back if flow controlled 4711 * Note: Messages already on queue when we are closing is bounded 4712 * so we can ignore flow control. 4713 */ 4714 if (!canputnext(peer_rq) && !closing) { 4715 TL_PUTBQ(tep, mp); 4716 return; 4717 } 4718 4719 /* 4720 * validate state on peer 4721 */ 4722 switch (peer_tep->te_state) { 4723 case TS_DATA_XFER: 4724 case TS_WIND_ORDREL: 4725 /* valid states */ 4726 break; 4727 default: 4728 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4729 "tl_exdata:rx side:invalid state")); 4730 tl_merror(peer_tep->te_wq, mp, EPROTO); 4731 return; 4732 } 4733 /* 4734 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4735 * (peer state stays same on this event) 4736 */ 4737 /* 4738 * reuse message block 4739 */ 4740 prim->type = T_EXDATA_IND; 4741 4742 /* 4743 * send data to connected peer 4744 */ 4745 putnext(peer_rq, mp); 4746 } 4747 4748 4749 4750 static void 4751 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4752 { 4753 queue_t *wq = tep->te_wq; 4754 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4755 ssize_t msz = MBLKL(mp); 4756 tl_endpt_t *peer_tep; 4757 queue_t *peer_rq; 4758 boolean_t closing = tep->te_closing; 4759 4760 if (msz < sizeof (struct T_ordrel_req)) { 4761 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4762 "tl_ordrel:invalid message")); 4763 if (!closing) { 4764 tl_merror(wq, mp, EPROTO); 4765 } else { 4766 freemsg(mp); 4767 } 4768 return; 4769 } 4770 4771 /* 4772 * validate state 4773 */ 4774 switch (tep->te_state) { 4775 case TS_DATA_XFER: 4776 case TS_WREQ_ORDREL: 4777 /* valid states */ 4778 if (tep->te_conp != NULL) 4779 break; 4780 4781 if (tep->te_oconp == NULL) 4782 break; 4783 4784 /* 4785 * For a socket the T_CONN_CON is sent early thus 4786 * the peer might not yet have accepted the connection. 4787 * If we are closing queue the packet with the T_CONN_IND. 4788 * Otherwise defer processing the packet until the peer 4789 * accepts the connection. 4790 * Note that the queue is noenabled when we go into this 4791 * state. 4792 */ 4793 if (!closing) { 4794 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4795 SL_TRACE|SL_ERROR, 4796 "tl_ordlrel: ocon")); 4797 TL_PUTBQ(tep, mp); 4798 return; 4799 } 4800 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4801 "tl_ordlrel: closing socket ocon")); 4802 prim->type = T_ORDREL_IND; 4803 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4804 return; 4805 4806 default: 4807 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4808 SL_TRACE|SL_ERROR, 4809 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4810 tep->te_state)); 4811 if (!closing) { 4812 tl_merror(wq, mp, EPROTO); 4813 } else { 4814 freemsg(mp); 4815 } 4816 return; 4817 } 4818 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4819 4820 /* 4821 * get connected endpoint 4822 */ 4823 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4824 /* Peer closed */ 4825 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4826 "tl_ordrel: peer gone")); 4827 freemsg(mp); 4828 return; 4829 } 4830 4831 peer_rq = peer_tep->te_rq; 4832 4833 /* 4834 * Put it back if flow controlled except when we are closing. 4835 * Note: Messages already on queue when we are closing is bounded 4836 * so we can ignore flow control. 4837 */ 4838 if (! canputnext(peer_rq) && !closing) { 4839 TL_PUTBQ(tep, mp); 4840 return; 4841 } 4842 4843 /* 4844 * validate state on peer 4845 */ 4846 switch (peer_tep->te_state) { 4847 case TS_DATA_XFER: 4848 case TS_WIND_ORDREL: 4849 /* valid states */ 4850 break; 4851 default: 4852 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4853 "tl_ordrel:rx side:invalid state")); 4854 tl_merror(peer_tep->te_wq, mp, EPROTO); 4855 return; 4856 } 4857 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4858 4859 /* 4860 * reuse message block 4861 */ 4862 prim->type = T_ORDREL_IND; 4863 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4864 "tl_ordrel: send ordrel_ind")); 4865 4866 /* 4867 * send data to connected peer 4868 */ 4869 putnext(peer_rq, mp); 4870 } 4871 4872 4873 /* 4874 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4875 */ 4876 static void 4877 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4878 { 4879 size_t err_sz; 4880 tl_endpt_t *tep; 4881 struct T_unitdata_req *udreq; 4882 mblk_t *err_mp; 4883 t_scalar_t alen; 4884 t_scalar_t olen; 4885 struct T_uderror_ind *uderr; 4886 uchar_t *addr_startp; 4887 4888 err_sz = sizeof (struct T_uderror_ind); 4889 tep = (tl_endpt_t *)wq->q_ptr; 4890 udreq = (struct T_unitdata_req *)mp->b_rptr; 4891 alen = udreq->DEST_length; 4892 olen = udreq->OPT_length; 4893 4894 if (alen > 0) 4895 err_sz = T_ALIGN(err_sz + alen); 4896 if (olen > 0) 4897 err_sz += olen; 4898 4899 err_mp = allocb(err_sz, BPRI_MED); 4900 if (! err_mp) { 4901 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4902 "tl_uderr:allocb failure")); 4903 /* 4904 * Note: no rollback of state needed as it does 4905 * not change in connectionless transport 4906 */ 4907 tl_memrecover(wq, mp, err_sz); 4908 return; 4909 } 4910 4911 DB_TYPE(err_mp) = M_PROTO; 4912 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4913 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4914 uderr->PRIM_type = T_UDERROR_IND; 4915 uderr->ERROR_type = err; 4916 uderr->DEST_length = alen; 4917 uderr->OPT_length = olen; 4918 if (alen <= 0) { 4919 uderr->DEST_offset = 0; 4920 } else { 4921 uderr->DEST_offset = 4922 (t_scalar_t)sizeof (struct T_uderror_ind); 4923 addr_startp = mp->b_rptr + udreq->DEST_offset; 4924 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4925 (size_t)alen); 4926 } 4927 if (olen <= 0) { 4928 uderr->OPT_offset = 0; 4929 } else { 4930 uderr->OPT_offset = 4931 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4932 uderr->DEST_length); 4933 addr_startp = mp->b_rptr + udreq->OPT_offset; 4934 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4935 (size_t)olen); 4936 } 4937 freemsg(mp); 4938 4939 /* 4940 * send indication message 4941 */ 4942 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4943 4944 qreply(wq, err_mp); 4945 } 4946 4947 static void 4948 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4949 { 4950 queue_t *wq = tep->te_wq; 4951 4952 if (!tep->te_closing && (wq->q_first != NULL)) { 4953 TL_PUTQ(tep, mp); 4954 } else if (tep->te_rq != NULL) 4955 tl_unitdata(mp, tep); 4956 else 4957 freemsg(mp); 4958 4959 tl_serializer_exit(tep); 4960 tl_refrele(tep); 4961 } 4962 4963 /* 4964 * Handle T_unitdata_req. 4965 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4966 * If this is a socket pass through options unmodified. 4967 */ 4968 static void 4969 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4970 { 4971 queue_t *wq = tep->te_wq; 4972 soux_addr_t ux_addr; 4973 tl_addr_t destaddr; 4974 uchar_t *addr_startp; 4975 tl_endpt_t *peer_tep; 4976 struct T_unitdata_ind *udind; 4977 struct T_unitdata_req *udreq; 4978 ssize_t msz, ui_sz; 4979 t_scalar_t alen, aoff, olen, ooff; 4980 t_scalar_t oldolen = 0; 4981 4982 udreq = (struct T_unitdata_req *)mp->b_rptr; 4983 msz = MBLKL(mp); 4984 4985 /* 4986 * validate the state 4987 */ 4988 if (tep->te_state != TS_IDLE) { 4989 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4990 SL_TRACE|SL_ERROR, 4991 "tl_wput:T_CONN_REQ:out of state")); 4992 tl_merror(wq, mp, EPROTO); 4993 return; 4994 } 4995 /* 4996 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 4997 * (state does not change on this event) 4998 */ 4999 5000 /* 5001 * validate the message 5002 * Note: dereference fields in struct inside message only 5003 * after validating the message length. 5004 */ 5005 if (msz < sizeof (struct T_unitdata_req)) { 5006 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5007 "tl_unitdata:invalid message length")); 5008 tl_merror(wq, mp, EINVAL); 5009 return; 5010 } 5011 alen = udreq->DEST_length; 5012 aoff = udreq->DEST_offset; 5013 oldolen = olen = udreq->OPT_length; 5014 ooff = udreq->OPT_offset; 5015 if (olen == 0) 5016 ooff = 0; 5017 5018 if (IS_SOCKET(tep)) { 5019 if ((alen != TL_SOUX_ADDRLEN) || 5020 (aoff < 0) || 5021 (aoff + alen > msz) || 5022 (olen < 0) || (ooff < 0) || 5023 ((olen > 0) && ((ooff + olen) > msz))) { 5024 (void) (STRLOG(TL_ID, tep->te_minor, 5025 1, SL_TRACE|SL_ERROR, 5026 "tl_unitdata_req: invalid socket addr " 5027 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5028 (int)msz, alen, aoff, olen, ooff)); 5029 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5030 return; 5031 } 5032 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5033 5034 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5035 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5036 (void) (STRLOG(TL_ID, tep->te_minor, 5037 1, SL_TRACE|SL_ERROR, 5038 "tl_conn_req: invalid socket magic")); 5039 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5040 return; 5041 } 5042 } else { 5043 if ((alen < 0) || 5044 (aoff < 0) || 5045 ((alen > 0) && ((aoff + alen) > msz)) || 5046 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5047 ((aoff + alen) < 0) || 5048 ((olen > 0) && ((ooff + olen) > msz)) || 5049 (olen < 0) || 5050 (ooff < 0) || 5051 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5052 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5053 SL_TRACE|SL_ERROR, 5054 "tl_unitdata:invalid unit data message")); 5055 tl_merror(wq, mp, EINVAL); 5056 return; 5057 } 5058 } 5059 5060 /* Options not supported unless it's a socket */ 5061 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5062 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5063 "tl_unitdata:option use(unsupported) or zero len addr")); 5064 tl_uderr(wq, mp, EPROTO); 5065 return; 5066 } 5067 #ifdef DEBUG 5068 /* 5069 * Mild form of ASSERT()ion to detect broken TPI apps. 5070 * if (! assertion) 5071 * log warning; 5072 */ 5073 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5074 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5075 "tl_unitdata:addr overlaps TPI message")); 5076 } 5077 #endif 5078 /* 5079 * get destination endpoint 5080 */ 5081 destaddr.ta_alen = alen; 5082 destaddr.ta_abuf = mp->b_rptr + aoff; 5083 destaddr.ta_zoneid = tep->te_zoneid; 5084 5085 /* 5086 * Check whether the destination is the same that was used previously 5087 * and the destination endpoint is in the right state. If something is 5088 * wrong, find destination again and cache it. 5089 */ 5090 peer_tep = tep->te_lastep; 5091 5092 if ((peer_tep == NULL) || peer_tep->te_closing || 5093 (peer_tep->te_state != TS_IDLE) || 5094 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5095 /* 5096 * Not the same as cached destination , need to find the right 5097 * destination. 5098 */ 5099 peer_tep = (IS_SOCKET(tep) ? 5100 tl_sock_find_peer(tep, &ux_addr) : 5101 tl_find_peer(tep, &destaddr)); 5102 5103 if (peer_tep == NULL) { 5104 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5105 SL_TRACE|SL_ERROR, 5106 "tl_unitdata:no one at destination address")); 5107 tl_uderr(wq, mp, ECONNRESET); 5108 return; 5109 } 5110 5111 /* 5112 * Cache the new peer. 5113 */ 5114 if (tep->te_lastep != NULL) 5115 tl_refrele(tep->te_lastep); 5116 5117 tep->te_lastep = peer_tep; 5118 } 5119 5120 if (peer_tep->te_state != TS_IDLE) { 5121 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5122 "tl_unitdata:provider in invalid state")); 5123 tl_uderr(wq, mp, EPROTO); 5124 return; 5125 } 5126 5127 ASSERT(peer_tep->te_rq != NULL); 5128 5129 /* 5130 * Put it back if flow controlled except when we are closing. 5131 * Note: Messages already on queue when we are closing is bounded 5132 * so we can ignore flow control. 5133 */ 5134 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5135 /* record what we are flow controlled on */ 5136 if (tep->te_flowq != NULL) { 5137 list_remove(&tep->te_flowq->te_flowlist, tep); 5138 } 5139 list_insert_head(&peer_tep->te_flowlist, tep); 5140 tep->te_flowq = peer_tep; 5141 TL_PUTBQ(tep, mp); 5142 return; 5143 } 5144 /* 5145 * prepare indication message 5146 */ 5147 5148 /* 5149 * calculate length of message 5150 */ 5151 if (peer_tep->te_flag & TL_SETCRED) { 5152 ASSERT(olen == 0); 5153 olen = (t_scalar_t)sizeof (struct opthdr) + 5154 OPTLEN(sizeof (tl_credopt_t)); 5155 /* 1 option only */ 5156 } else if (peer_tep->te_flag & TL_SETUCRED) { 5157 ASSERT(olen == 0); 5158 olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize); 5159 /* 1 option only */ 5160 } else if (peer_tep->te_flag & TL_SOCKUCRED) { 5161 /* Possibly more than one option */ 5162 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5163 OPTLEN(ucredsize); 5164 } 5165 5166 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5167 olen; 5168 /* 5169 * If the unitdata_ind fits and we are not adding options 5170 * reuse the udreq mblk. 5171 */ 5172 if (msz >= ui_sz && alen >= tep->te_alen && 5173 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5174 /* 5175 * Reuse the original mblk. Leave options in place. 5176 */ 5177 udind = (struct T_unitdata_ind *)mp->b_rptr; 5178 udind->PRIM_type = T_UNITDATA_IND; 5179 udind->SRC_length = tep->te_alen; 5180 addr_startp = mp->b_rptr + udind->SRC_offset; 5181 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5182 } else { 5183 /* Allocate a new T_unidata_ind message */ 5184 mblk_t *ui_mp; 5185 5186 ui_mp = allocb(ui_sz, BPRI_MED); 5187 if (! ui_mp) { 5188 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5189 "tl_unitdata:allocb failure:message queued")); 5190 tl_memrecover(wq, mp, ui_sz); 5191 return; 5192 } 5193 5194 /* 5195 * fill in T_UNITDATA_IND contents 5196 */ 5197 DB_TYPE(ui_mp) = M_PROTO; 5198 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5199 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5200 udind->PRIM_type = T_UNITDATA_IND; 5201 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5202 udind->SRC_length = tep->te_alen; 5203 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5204 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5205 udind->OPT_offset = 5206 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5207 udind->OPT_length = olen; 5208 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5209 if (oldolen != 0) { 5210 bcopy((void *)((uintptr_t)udreq + ooff), 5211 (void *)((uintptr_t)udind + 5212 udind->OPT_offset), 5213 oldolen); 5214 } 5215 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5216 oldolen, 5217 DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep), 5218 peer_tep->te_flag, peer_tep->te_credp); 5219 } else { 5220 bcopy((void *)((uintptr_t)udreq + ooff), 5221 (void *)((uintptr_t)udind + udind->OPT_offset), 5222 olen); 5223 } 5224 5225 /* 5226 * relink data blocks from mp to ui_mp 5227 */ 5228 ui_mp->b_cont = mp->b_cont; 5229 freeb(mp); 5230 mp = ui_mp; 5231 } 5232 /* 5233 * send indication message 5234 */ 5235 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5236 putnext(peer_tep->te_rq, mp); 5237 } 5238 5239 5240 5241 /* 5242 * Check if a given addr is in use. 5243 * Endpoint ptr returned or NULL if not found. 5244 * The name space is separate for each mode. This implies that 5245 * sockets get their own name space. 5246 */ 5247 static tl_endpt_t * 5248 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5249 { 5250 tl_endpt_t *peer_tep = NULL; 5251 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5252 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5253 5254 ASSERT(! IS_SOCKET(tep)); 5255 5256 ASSERT(ap != NULL && ap->ta_alen > 0); 5257 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5258 ASSERT(ap->ta_abuf != NULL); 5259 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5260 ASSERT(IMPLY(rc == 0, 5261 (tep->te_zoneid == peer_tep->te_zoneid) && 5262 (tep->te_transport == peer_tep->te_transport))); 5263 5264 if ((rc == 0) && (peer_tep->te_closing)) { 5265 tl_refrele(peer_tep); 5266 peer_tep = NULL; 5267 } 5268 5269 return (peer_tep); 5270 } 5271 5272 /* 5273 * Find peer for a socket based on unix domain address. 5274 * For implicit addresses our peer can be found by minor number in ai hash. For 5275 * explicit binds we look vnode address at addr_hash. 5276 */ 5277 static tl_endpt_t * 5278 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5279 { 5280 tl_endpt_t *peer_tep = NULL; 5281 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5282 tep->te_aihash : tep->te_addrhash; 5283 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5284 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5285 5286 ASSERT(IS_SOCKET(tep)); 5287 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5288 ASSERT(IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport))); 5289 5290 if (peer_tep != NULL) { 5291 /* Don't attempt to use closing peer. */ 5292 if (peer_tep->te_closing) 5293 goto errout; 5294 5295 /* 5296 * Cross-zone unix sockets are permitted, but for Trusted 5297 * Extensions only, the "server" for these must be in the 5298 * global zone. 5299 */ 5300 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5301 is_system_labeled() && 5302 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5303 goto errout; 5304 } 5305 5306 return (peer_tep); 5307 5308 errout: 5309 tl_refrele(peer_tep); 5310 return (NULL); 5311 } 5312 5313 /* 5314 * Generate a free addr and return it in struct pointed by ap 5315 * but allocating space for address buffer. 5316 * The generated address will be at least 4 bytes long and, if req->ta_alen 5317 * exceeds 4 bytes, be req->ta_alen bytes long. 5318 * 5319 * If address is found it will be inserted in the hash. 5320 * 5321 * If req->ta_alen is larger than the default alen (4 bytes) the last 5322 * alen-4 bytes will always be the same as in req. 5323 * 5324 * Return 0 for failure. 5325 * Return non-zero for success. 5326 */ 5327 static boolean_t 5328 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5329 { 5330 t_scalar_t alen; 5331 uint32_t loopcnt; /* Limit loop to 2^32 */ 5332 5333 ASSERT(tep->te_hash_hndl != NULL); 5334 ASSERT(! IS_SOCKET(tep)); 5335 5336 if (tep->te_hash_hndl == NULL) 5337 return (B_FALSE); 5338 5339 /* 5340 * check if default addr is in use 5341 * if it is - bump it and try again 5342 */ 5343 if (req == NULL) { 5344 alen = sizeof (uint32_t); 5345 } else { 5346 alen = max(req->ta_alen, sizeof (uint32_t)); 5347 ASSERT(tep->te_zoneid == req->ta_zoneid); 5348 } 5349 5350 if (tep->te_alen < alen) { 5351 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5352 5353 /* 5354 * Not enough space in tep->ta_ap to hold the address, 5355 * allocate a bigger space. 5356 */ 5357 if (abuf == NULL) 5358 return (B_FALSE); 5359 5360 if (tep->te_alen > 0) 5361 kmem_free(tep->te_abuf, tep->te_alen); 5362 5363 tep->te_alen = alen; 5364 tep->te_abuf = abuf; 5365 } 5366 5367 /* Copy in the address in req */ 5368 if (req != NULL) { 5369 ASSERT(alen >= req->ta_alen); 5370 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5371 } 5372 5373 /* 5374 * First try minor number then try default addresses. 5375 */ 5376 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5377 5378 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5379 if (mod_hash_insert_reserve(tep->te_addrhash, 5380 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5381 tep->te_hash_hndl) == 0) { 5382 /* 5383 * found free address 5384 */ 5385 tep->te_flag |= TL_ADDRHASHED; 5386 tep->te_hash_hndl = NULL; 5387 5388 return (B_TRUE); /* successful return */ 5389 } 5390 /* 5391 * Use default address. 5392 */ 5393 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5394 atomic_add_32(&tep->te_defaddr, 1); 5395 } 5396 5397 /* 5398 * Failed to find anything. 5399 */ 5400 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5401 "tl_get_any_addr:looped 2^32 times")); 5402 return (B_FALSE); 5403 } 5404 5405 /* 5406 * reallocb + set r/w ptrs to reflect size. 5407 */ 5408 static mblk_t * 5409 tl_resizemp(mblk_t *mp, ssize_t new_size) 5410 { 5411 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5412 return (NULL); 5413 5414 mp->b_rptr = DB_BASE(mp); 5415 mp->b_wptr = mp->b_rptr + new_size; 5416 return (mp); 5417 } 5418 5419 static void 5420 tl_cl_backenable(tl_endpt_t *tep) 5421 { 5422 list_t *l = &tep->te_flowlist; 5423 tl_endpt_t *elp; 5424 5425 ASSERT(IS_CLTS(tep)); 5426 5427 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5428 ASSERT(tep->te_ser == elp->te_ser); 5429 ASSERT(elp->te_flowq == tep); 5430 if (! elp->te_closing) 5431 TL_QENABLE(elp); 5432 elp->te_flowq = NULL; 5433 list_remove(l, elp); 5434 } 5435 } 5436 5437 /* 5438 * Unconnect endpoints. 5439 */ 5440 static void 5441 tl_co_unconnect(tl_endpt_t *tep) 5442 { 5443 tl_endpt_t *peer_tep = tep->te_conp; 5444 tl_endpt_t *srv_tep = tep->te_oconp; 5445 list_t *l; 5446 tl_icon_t *tip; 5447 tl_endpt_t *cl_tep; 5448 mblk_t *d_mp; 5449 5450 ASSERT(IS_COTS(tep)); 5451 /* 5452 * If our peer is closing, don't use it. 5453 */ 5454 if ((peer_tep != NULL) && peer_tep->te_closing) { 5455 TL_UNCONNECT(tep->te_conp); 5456 peer_tep = NULL; 5457 } 5458 if ((srv_tep != NULL) && srv_tep->te_closing) { 5459 TL_UNCONNECT(tep->te_oconp); 5460 srv_tep = NULL; 5461 } 5462 5463 if (tep->te_nicon > 0) { 5464 l = &tep->te_iconp; 5465 /* 5466 * If incoming requests pending, change state 5467 * of clients on disconnect ind event and send 5468 * discon_ind pdu to modules above them 5469 * for server: all clients get disconnect 5470 */ 5471 5472 while (tep->te_nicon > 0) { 5473 tip = list_head(l); 5474 cl_tep = tip->ti_tep; 5475 5476 if (cl_tep == NULL) { 5477 tl_freetip(tep, tip); 5478 continue; 5479 } 5480 5481 if (cl_tep->te_oconp != NULL) { 5482 ASSERT(cl_tep != cl_tep->te_oconp); 5483 TL_UNCONNECT(cl_tep->te_oconp); 5484 } 5485 5486 if (cl_tep->te_closing) { 5487 tl_freetip(tep, tip); 5488 continue; 5489 } 5490 5491 enableok(cl_tep->te_wq); 5492 TL_QENABLE(cl_tep); 5493 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5494 if (d_mp != NULL) { 5495 cl_tep->te_state = TS_IDLE; 5496 putnext(cl_tep->te_rq, d_mp); 5497 } else { 5498 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5499 SL_TRACE|SL_ERROR, 5500 "tl_co_unconnect:icmng: " 5501 "allocb failure")); 5502 } 5503 tl_freetip(tep, tip); 5504 } 5505 } else if (srv_tep != NULL) { 5506 /* 5507 * If outgoing request pending, change state 5508 * of server on discon ind event 5509 */ 5510 5511 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5512 IS_COTSORD(srv_tep) && 5513 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5514 /* 5515 * Queue ordrel_ind for server to be picked up 5516 * when the connection is accepted. 5517 */ 5518 d_mp = tl_ordrel_ind_alloc(); 5519 } else { 5520 /* 5521 * send discon_ind to server 5522 */ 5523 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5524 } 5525 if (d_mp == NULL) { 5526 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5527 SL_TRACE|SL_ERROR, 5528 "tl_co_unconnect:outgoing:allocb failure")); 5529 TL_UNCONNECT(tep->te_oconp); 5530 goto discon_peer; 5531 } 5532 5533 /* 5534 * If this is a socket the T_DISCON_IND is queued with 5535 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5536 * from the list of pending connections. 5537 * Note that when te_oconp is set the peer better have 5538 * a t_connind_t for the client. 5539 */ 5540 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5541 /* 5542 * Queue the disconnection message. 5543 */ 5544 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5545 } else { 5546 tip = tl_icon_find(srv_tep, tep->te_seqno); 5547 if (tip == NULL) { 5548 freemsg(d_mp); 5549 } else { 5550 ASSERT(tep == tip->ti_tep); 5551 ASSERT(tep->te_ser == srv_tep->te_ser); 5552 /* 5553 * Delete tip from the server list. 5554 */ 5555 if (srv_tep->te_nicon == 1) { 5556 srv_tep->te_state = 5557 NEXTSTATE(TE_DISCON_IND2, 5558 srv_tep->te_state); 5559 } else { 5560 srv_tep->te_state = 5561 NEXTSTATE(TE_DISCON_IND3, 5562 srv_tep->te_state); 5563 } 5564 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5565 T_DISCON_IND); 5566 putnext(srv_tep->te_rq, d_mp); 5567 tl_freetip(srv_tep, tip); 5568 } 5569 TL_UNCONNECT(tep->te_oconp); 5570 srv_tep = NULL; 5571 } 5572 } else if (peer_tep != NULL) { 5573 /* 5574 * unconnect existing connection 5575 * If connected, change state of peer on 5576 * discon ind event and send discon ind pdu 5577 * to module above it 5578 */ 5579 5580 ASSERT(tep->te_ser == peer_tep->te_ser); 5581 if (IS_COTSORD(peer_tep) && 5582 (peer_tep->te_state == TS_WIND_ORDREL || 5583 peer_tep->te_state == TS_DATA_XFER)) { 5584 /* 5585 * send ordrel ind 5586 */ 5587 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5588 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5589 peer_tep->te_state, 5590 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5591 d_mp = tl_ordrel_ind_alloc(); 5592 if (! d_mp) { 5593 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5594 SL_TRACE|SL_ERROR, 5595 "tl_co_unconnect:connected:" 5596 "allocb failure")); 5597 /* 5598 * Continue with cleaning up peer as 5599 * this side may go away with the close 5600 */ 5601 TL_QENABLE(peer_tep); 5602 goto discon_peer; 5603 } 5604 peer_tep->te_state = 5605 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5606 5607 putnext(peer_tep->te_rq, d_mp); 5608 /* 5609 * Handle flow control case. This will generate 5610 * a t_discon_ind message with reason 0 if there 5611 * is data queued on the write side. 5612 */ 5613 TL_QENABLE(peer_tep); 5614 } else if (IS_COTSORD(peer_tep) && 5615 peer_tep->te_state == TS_WREQ_ORDREL) { 5616 /* 5617 * Sent an ordrel_ind. We send a discon with 5618 * with error 0 to inform that the peer is gone. 5619 */ 5620 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5621 SL_TRACE|SL_ERROR, 5622 "tl_co_unconnect: discon in state %d", 5623 tep->te_state)); 5624 tl_discon_ind(peer_tep, 0); 5625 } else { 5626 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5627 SL_TRACE|SL_ERROR, 5628 "tl_co_unconnect: state %d", tep->te_state)); 5629 tl_discon_ind(peer_tep, ECONNRESET); 5630 } 5631 5632 discon_peer: 5633 /* 5634 * Disconnect cross-pointers only for close 5635 */ 5636 if (tep->te_closing) { 5637 peer_tep = tep->te_conp; 5638 TL_REMOVE_PEER(peer_tep->te_conp); 5639 TL_REMOVE_PEER(tep->te_conp); 5640 } 5641 } 5642 } 5643 5644 /* 5645 * Note: The following routine does not recover from allocb() 5646 * failures 5647 * The reason should be from the <sys/errno.h> space. 5648 */ 5649 static void 5650 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5651 { 5652 mblk_t *d_mp; 5653 5654 if (tep->te_closing) 5655 return; 5656 5657 /* 5658 * flush the queues. 5659 */ 5660 flushq(tep->te_rq, FLUSHDATA); 5661 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5662 5663 /* 5664 * send discon ind 5665 */ 5666 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5667 if (! d_mp) { 5668 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5669 "tl_discon_ind:allocb failure")); 5670 return; 5671 } 5672 tep->te_state = TS_IDLE; 5673 putnext(tep->te_rq, d_mp); 5674 } 5675 5676 /* 5677 * Note: The following routine does not recover from allocb() 5678 * failures 5679 * The reason should be from the <sys/errno.h> space. 5680 */ 5681 static mblk_t * 5682 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5683 { 5684 mblk_t *mp; 5685 struct T_discon_ind *tdi; 5686 5687 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5688 DB_TYPE(mp) = M_PROTO; 5689 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5690 tdi = (struct T_discon_ind *)mp->b_rptr; 5691 tdi->PRIM_type = T_DISCON_IND; 5692 tdi->DISCON_reason = reason; 5693 tdi->SEQ_number = seqnum; 5694 } 5695 return (mp); 5696 } 5697 5698 5699 /* 5700 * Note: The following routine does not recover from allocb() 5701 * failures 5702 */ 5703 static mblk_t * 5704 tl_ordrel_ind_alloc(void) 5705 { 5706 mblk_t *mp; 5707 struct T_ordrel_ind *toi; 5708 5709 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5710 DB_TYPE(mp) = M_PROTO; 5711 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5712 toi = (struct T_ordrel_ind *)mp->b_rptr; 5713 toi->PRIM_type = T_ORDREL_IND; 5714 } 5715 return (mp); 5716 } 5717 5718 5719 /* 5720 * Lookup the seqno in the list of queued connections. 5721 */ 5722 static tl_icon_t * 5723 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5724 { 5725 list_t *l = &tep->te_iconp; 5726 tl_icon_t *tip = list_head(l); 5727 5728 ASSERT(seqno != 0); 5729 5730 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5731 ; 5732 5733 return (tip); 5734 } 5735 5736 /* 5737 * Queue data for a given T_CONN_IND while verifying that redundant 5738 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5739 * Used when the originator of the connection closes. 5740 */ 5741 static void 5742 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5743 { 5744 tl_icon_t *tip; 5745 mblk_t **mpp, *mp; 5746 int prim, nprim; 5747 5748 if (nmp->b_datap->db_type == M_PROTO) 5749 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5750 else 5751 nprim = -1; /* M_DATA */ 5752 5753 tip = tl_icon_find(tep, seqno); 5754 if (tip == NULL) { 5755 freemsg(nmp); 5756 return; 5757 } 5758 5759 ASSERT(tip->ti_seqno != 0); 5760 mpp = &tip->ti_mp; 5761 while (*mpp != NULL) { 5762 mp = *mpp; 5763 5764 if (mp->b_datap->db_type == M_PROTO) 5765 prim = ((union T_primitives *)mp->b_rptr)->type; 5766 else 5767 prim = -1; /* M_DATA */ 5768 5769 /* 5770 * Allow nothing after a T_DISCON_IND 5771 */ 5772 if (prim == T_DISCON_IND) { 5773 freemsg(nmp); 5774 return; 5775 } 5776 /* 5777 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5778 */ 5779 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5780 freemsg(nmp); 5781 return; 5782 } 5783 mpp = &(mp->b_next); 5784 } 5785 *mpp = nmp; 5786 } 5787 5788 /* 5789 * Verify if a certain TPI primitive exists on the connind queue. 5790 * Use prim -1 for M_DATA. 5791 * Return non-zero if found. 5792 */ 5793 static boolean_t 5794 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5795 { 5796 tl_icon_t *tip = tl_icon_find(tep, seqno); 5797 boolean_t found = B_FALSE; 5798 5799 if (tip != NULL) { 5800 mblk_t *mp; 5801 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5802 found = (DB_TYPE(mp) == M_PROTO && 5803 ((union T_primitives *)mp->b_rptr)->type == prim); 5804 } 5805 } 5806 return (found); 5807 } 5808 5809 /* 5810 * Send the b_next mblk chain that has accumulated before the connection 5811 * was accepted. Perform the necessary state transitions. 5812 */ 5813 static void 5814 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5815 { 5816 mblk_t *mp; 5817 union T_primitives *primp; 5818 5819 if (tep->te_closing) { 5820 tl_icon_freemsgs(mpp); 5821 return; 5822 } 5823 5824 ASSERT(tep->te_state == TS_DATA_XFER); 5825 ASSERT(tep->te_rq->q_first == NULL); 5826 5827 while ((mp = *mpp) != NULL) { 5828 *mpp = mp->b_next; 5829 mp->b_next = NULL; 5830 5831 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5832 switch (DB_TYPE(mp)) { 5833 default: 5834 freemsg(mp); 5835 break; 5836 case M_DATA: 5837 putnext(tep->te_rq, mp); 5838 break; 5839 case M_PROTO: 5840 primp = (union T_primitives *)mp->b_rptr; 5841 switch (primp->type) { 5842 case T_UNITDATA_IND: 5843 case T_DATA_IND: 5844 case T_OPTDATA_IND: 5845 case T_EXDATA_IND: 5846 putnext(tep->te_rq, mp); 5847 break; 5848 case T_ORDREL_IND: 5849 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5850 tep->te_state); 5851 putnext(tep->te_rq, mp); 5852 break; 5853 case T_DISCON_IND: 5854 tep->te_state = TS_IDLE; 5855 putnext(tep->te_rq, mp); 5856 break; 5857 default: 5858 #ifdef DEBUG 5859 cmn_err(CE_PANIC, 5860 "tl_icon_sendmsgs: unknown primitive"); 5861 #endif /* DEBUG */ 5862 freemsg(mp); 5863 break; 5864 } 5865 break; 5866 } 5867 } 5868 } 5869 5870 /* 5871 * Free the b_next mblk chain that has accumulated before the connection 5872 * was accepted. 5873 */ 5874 static void 5875 tl_icon_freemsgs(mblk_t **mpp) 5876 { 5877 mblk_t *mp; 5878 5879 while ((mp = *mpp) != NULL) { 5880 *mpp = mp->b_next; 5881 mp->b_next = NULL; 5882 freemsg(mp); 5883 } 5884 } 5885 5886 /* 5887 * Send M_ERROR 5888 * Note: assumes caller ensured enough space in mp or enough 5889 * memory available. Does not attempt recovery from allocb() 5890 * failures 5891 */ 5892 5893 static void 5894 tl_merror(queue_t *wq, mblk_t *mp, int error) 5895 { 5896 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5897 5898 if (tep->te_closing) { 5899 freemsg(mp); 5900 return; 5901 } 5902 5903 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5904 SL_TRACE|SL_ERROR, 5905 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5906 5907 /* 5908 * flush all messages on queue. we are shutting 5909 * the stream down on fatal error 5910 */ 5911 flushq(wq, FLUSHALL); 5912 if (IS_COTS(tep)) { 5913 /* connection oriented - unconnect endpoints */ 5914 tl_co_unconnect(tep); 5915 } 5916 if (mp->b_cont) { 5917 freemsg(mp->b_cont); 5918 mp->b_cont = NULL; 5919 } 5920 5921 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5922 freemsg(mp); 5923 mp = allocb(1, BPRI_HI); 5924 if (!mp) { 5925 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5926 SL_TRACE|SL_ERROR, 5927 "tl_merror:M_PROTO: out of memory")); 5928 return; 5929 } 5930 } 5931 if (mp) { 5932 DB_TYPE(mp) = M_ERROR; 5933 mp->b_rptr = DB_BASE(mp); 5934 *mp->b_rptr = (char)error; 5935 mp->b_wptr = mp->b_rptr + sizeof (char); 5936 qreply(wq, mp); 5937 } else { 5938 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5939 } 5940 } 5941 5942 static void 5943 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5944 { 5945 if (flag & TL_SETCRED) { 5946 struct opthdr *opt = (struct opthdr *)buf; 5947 tl_credopt_t *tlcred; 5948 5949 opt->level = TL_PROT_LEVEL; 5950 opt->name = TL_OPT_PEER_CRED; 5951 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5952 5953 tlcred = (tl_credopt_t *)(opt + 1); 5954 tlcred->tc_uid = crgetuid(cr); 5955 tlcred->tc_gid = crgetgid(cr); 5956 tlcred->tc_ruid = crgetruid(cr); 5957 tlcred->tc_rgid = crgetrgid(cr); 5958 tlcred->tc_suid = crgetsuid(cr); 5959 tlcred->tc_sgid = crgetsgid(cr); 5960 tlcred->tc_ngroups = crgetngroups(cr); 5961 } else if (flag & TL_SETUCRED) { 5962 struct opthdr *opt = (struct opthdr *)buf; 5963 5964 opt->level = TL_PROT_LEVEL; 5965 opt->name = TL_OPT_PEER_UCRED; 5966 opt->len = (t_uscalar_t)OPTLEN(ucredsize); 5967 5968 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 5969 } else { 5970 struct T_opthdr *topt = (struct T_opthdr *)buf; 5971 ASSERT(flag & TL_SOCKUCRED); 5972 5973 topt->level = SOL_SOCKET; 5974 topt->name = SCM_UCRED; 5975 topt->len = ucredsize + sizeof (*topt); 5976 topt->status = 0; 5977 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 5978 } 5979 } 5980 5981 /* ARGSUSED */ 5982 static int 5983 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5984 { 5985 /* no default value processed in protocol specific code currently */ 5986 return (-1); 5987 } 5988 5989 /* ARGSUSED */ 5990 static int 5991 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5992 { 5993 int len; 5994 tl_endpt_t *tep; 5995 int *valp; 5996 5997 tep = (tl_endpt_t *)wq->q_ptr; 5998 5999 len = 0; 6000 6001 /* 6002 * Assumes: option level and name sanity check done elsewhere 6003 */ 6004 6005 switch (level) { 6006 case SOL_SOCKET: 6007 if (! IS_SOCKET(tep)) 6008 break; 6009 switch (name) { 6010 case SO_RECVUCRED: 6011 len = sizeof (int); 6012 valp = (int *)ptr; 6013 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6014 break; 6015 default: 6016 break; 6017 } 6018 break; 6019 case TL_PROT_LEVEL: 6020 switch (name) { 6021 case TL_OPT_PEER_CRED: 6022 case TL_OPT_PEER_UCRED: 6023 /* 6024 * option not supposed to retrieved directly 6025 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6026 * when some internal flags set by other options 6027 * Direct retrieval always designed to fail(ignored) 6028 * for this option. 6029 */ 6030 break; 6031 } 6032 } 6033 return (len); 6034 } 6035 6036 /* ARGSUSED */ 6037 static int 6038 tl_set_opt( 6039 queue_t *wq, 6040 uint_t mgmt_flags, 6041 int level, 6042 int name, 6043 uint_t inlen, 6044 uchar_t *invalp, 6045 uint_t *outlenp, 6046 uchar_t *outvalp, 6047 void *thisdg_attrs, 6048 cred_t *cr, 6049 mblk_t *mblk) 6050 { 6051 int error; 6052 tl_endpt_t *tep; 6053 6054 tep = (tl_endpt_t *)wq->q_ptr; 6055 6056 error = 0; /* NOERROR */ 6057 6058 /* 6059 * Assumes: option level and name sanity checks done elsewhere 6060 */ 6061 6062 switch (level) { 6063 case SOL_SOCKET: 6064 if (! IS_SOCKET(tep)) { 6065 error = EINVAL; 6066 break; 6067 } 6068 /* 6069 * TBD: fill in other AF_UNIX socket options and then stop 6070 * returning error. 6071 */ 6072 switch (name) { 6073 case SO_RECVUCRED: 6074 /* 6075 * We only support this for datagram sockets; 6076 * getpeerucred handles the connection oriented 6077 * transports. 6078 */ 6079 if (! IS_CLTS(tep)) { 6080 error = EINVAL; 6081 break; 6082 } 6083 if (*(int *)invalp == 0) 6084 tep->te_flag &= ~TL_SOCKUCRED; 6085 else 6086 tep->te_flag |= TL_SOCKUCRED; 6087 break; 6088 default: 6089 error = EINVAL; 6090 break; 6091 } 6092 break; 6093 case TL_PROT_LEVEL: 6094 switch (name) { 6095 case TL_OPT_PEER_CRED: 6096 case TL_OPT_PEER_UCRED: 6097 /* 6098 * option not supposed to be set directly 6099 * Its value in initialized for each endpoint at 6100 * driver open time. 6101 * Direct setting always designed to fail for this 6102 * option. 6103 */ 6104 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6105 SL_TRACE|SL_ERROR, 6106 "tl_set_opt: option is not supported")); 6107 error = EPROTO; 6108 break; 6109 } 6110 } 6111 return (error); 6112 } 6113 6114 6115 static void 6116 tl_timer(void *arg) 6117 { 6118 queue_t *wq = arg; 6119 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6120 6121 ASSERT(tep); 6122 6123 tep->te_timoutid = 0; 6124 6125 enableok(wq); 6126 /* 6127 * Note: can call wsrv directly here and save context switch 6128 * Consider change when qtimeout (not timeout) is active 6129 */ 6130 qenable(wq); 6131 } 6132 6133 static void 6134 tl_buffer(void *arg) 6135 { 6136 queue_t *wq = arg; 6137 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6138 6139 ASSERT(tep); 6140 6141 tep->te_bufcid = 0; 6142 tep->te_nowsrv = B_FALSE; 6143 6144 enableok(wq); 6145 /* 6146 * Note: can call wsrv directly here and save context switch 6147 * Consider change when qbufcall (not bufcall) is active 6148 */ 6149 qenable(wq); 6150 } 6151 6152 static void 6153 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6154 { 6155 tl_endpt_t *tep; 6156 6157 tep = (tl_endpt_t *)wq->q_ptr; 6158 6159 if (tep->te_closing) { 6160 freemsg(mp); 6161 return; 6162 } 6163 noenable(wq); 6164 6165 (void) insq(wq, wq->q_first, mp); 6166 6167 if (tep->te_bufcid || tep->te_timoutid) { 6168 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6169 "tl_memrecover:recover %p pending", (void *)wq)); 6170 return; 6171 } 6172 6173 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6174 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6175 drv_usectohz(TL_BUFWAIT)); 6176 } 6177 } 6178 6179 static void 6180 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6181 { 6182 ASSERT(tip->ti_seqno != 0); 6183 6184 if (tip->ti_mp != NULL) { 6185 tl_icon_freemsgs(&tip->ti_mp); 6186 tip->ti_mp = NULL; 6187 } 6188 if (tip->ti_tep != NULL) { 6189 tl_refrele(tip->ti_tep); 6190 tip->ti_tep = NULL; 6191 } 6192 list_remove(&tep->te_iconp, tip); 6193 kmem_free(tip, sizeof (tl_icon_t)); 6194 tep->te_nicon--; 6195 } 6196 6197 /* 6198 * Remove address from address hash. 6199 */ 6200 static void 6201 tl_addr_unbind(tl_endpt_t *tep) 6202 { 6203 tl_endpt_t *elp; 6204 6205 if (tep->te_flag & TL_ADDRHASHED) { 6206 if (IS_SOCKET(tep)) { 6207 (void) mod_hash_remove(tep->te_addrhash, 6208 (mod_hash_key_t)tep->te_vp, 6209 (mod_hash_val_t *)&elp); 6210 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6211 tep->te_magic = SOU_MAGIC_IMPLICIT; 6212 } else { 6213 (void) mod_hash_remove(tep->te_addrhash, 6214 (mod_hash_key_t)&tep->te_ap, 6215 (mod_hash_val_t *)&elp); 6216 (void) kmem_free(tep->te_abuf, tep->te_alen); 6217 tep->te_alen = -1; 6218 tep->te_abuf = NULL; 6219 } 6220 tep->te_flag &= ~TL_ADDRHASHED; 6221 } 6222 } 6223