1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27 */ 28 29 /* 30 * Multithreaded STREAMS Local Transport Provider. 31 * 32 * OVERVIEW 33 * ======== 34 * 35 * This driver provides TLI as well as socket semantics. It provides 36 * connectionless, connection oriented, and connection oriented with orderly 37 * release transports for TLI and sockets. Each transport type has separate name 38 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 39 * this removes any name space conflicts when binding to socket style transport 40 * addresses. 41 * 42 * NOTE: There is one exception: Socket ticots and ticotsord transports share 43 * the same namespace. In fact, sockets always use ticotsord type transport. 44 * 45 * The driver mode is specified during open() by the minor number used for 46 * open. 47 * 48 * The sockets in addition have the following semantic differences: 49 * No support for passing up credentials (TL_SET[U]CRED). 50 * 51 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 52 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 53 * T_OPTDATA_IND. 54 * 55 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 56 * a T_CONN_RES is received from the acceptor. This means that a socket 57 * connect will complete before the peer has called accept. 58 * 59 * 60 * MULTITHREADING 61 * ============== 62 * 63 * The driver does not use STREAMS protection mechanisms. Instead it uses a 64 * generic "serializer" abstraction. Most of the operations are executed behind 65 * the serializer and are, essentially single-threaded. All functions executed 66 * behind the same serializer are strictly serialized. So if one thread calls 67 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 68 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 69 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 70 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 71 * same time. 72 * 73 * Connectionless transport use a single serializer per transport type (one for 74 * TLI and one for sockets. Connection-oriented transports use finer-grained 75 * serializers. 76 * 77 * All COTS-type endpoints start their life with private serializers. During 78 * connection request processing the endpoint serializer is switched to the 79 * listener's serializer and the rest of T_CONN_REQ processing is done on the 80 * listener serializer. During T_CONN_RES processing the eager serializer is 81 * switched from listener to acceptor serializer and after that point all 82 * processing for eager and acceptor happens on this serializer. To avoid races 83 * with endpoint closes while its serializer may be changing closes are blocked 84 * while serializers are manipulated. 85 * 86 * References accounting 87 * --------------------- 88 * 89 * Endpoints are reference counted and freed when the last reference is 90 * dropped. Functions within the serializer may access an endpoint state even 91 * after an endpoint closed. The te_closing being set on the endpoint indicates 92 * that the endpoint entered its close routine. 93 * 94 * One reference is held for each opened endpoint instance. The reference 95 * counter is incremented when the endpoint is linked to another endpoint and 96 * decremented when the link disappears. It is also incremented when the 97 * endpoint is found by the hash table lookup. This increment is atomic with the 98 * lookup itself and happens while the hash table read lock is held. 99 * 100 * Close synchronization 101 * --------------------- 102 * 103 * During close the endpoint as marked as closing using te_closing flag. It is 104 * usually enough to check for te_closing flag since all other state changes 105 * happen after this flag is set and the close entered serializer. Immediately 106 * after setting te_closing flag tl_close() enters serializer and waits until 107 * the callback finishes. This allows all functions called within serializer to 108 * simply check te_closing without any locks. 109 * 110 * Serializer management. 111 * --------------------- 112 * 113 * For COTS transports serializers are created when the endpoint is constructed 114 * and destroyed when the endpoint is destructed. CLTS transports use global 115 * serializers - one for sockets and one for TLI. 116 * 117 * COTS serializers have separate reference counts to deal with several 118 * endpoints sharing the same serializer. There is a subtle problem related to 119 * the serializer destruction. The serializer should never be destroyed by any 120 * function executed inside serializer. This means that close has to wait till 121 * all serializer activity for this endpoint is finished before it can drop the 122 * last reference on the endpoint (which may as well free the serializer). This 123 * is only relevant for COTS transports which manage serializers 124 * dynamically. For CLTS transports close may complete without waiting for all 125 * serializer activity to finish since serializer is only destroyed at driver 126 * detach time. 127 * 128 * COTS endpoints keep track of the number of outstanding requests on the 129 * serializer for the endpoint. The code handling accept() avoids changing 130 * client serializer if it has any pending messages on the serializer and 131 * instead moves acceptor to listener's serializer. 132 * 133 * 134 * Use of hash tables 135 * ------------------ 136 * 137 * The driver uses modhash hash table implementation. Each transport uses two 138 * hash tables - one for finding endpoints by acceptor ID and another one for 139 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 140 * pair of hash tables since sockets only use TICOTSORD. 141 * 142 * All hash tables lookups increment a reference count for returned endpoints, 143 * so we may safely check the endpoint state even when the endpoint is removed 144 * from the hash by another thread immediately after it is found. 145 * 146 * 147 * CLOSE processing 148 * ================ 149 * 150 * The driver enters serializer twice on close(). The close sequence is the 151 * following: 152 * 153 * 1) Wait until closing is safe (te_closewait becomes zero) 154 * This step is needed to prevent close during serializer switches. In most 155 * cases (close happening after connection establishment) te_closewait is 156 * zero. 157 * 1) Set te_closing. 158 * 2) Call tl_close_ser() within serializer and wait for it to complete. 159 * 160 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 161 * It also needs to clear write-side q_next pointers - this should be done 162 * before qprocsoff(). 163 * 164 * This synchronous serializer entry during close is needed to ensure that 165 * the queue is valid everywhere inside the serializer. 166 * 167 * Note that in many cases close will execute tl_close_ser() synchronously, 168 * so it will not wait at all. 169 * 170 * 3) Calls qprocsoff(). 171 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 172 * complete (for COTS transports). For CLTS transport there is no wait. 173 * 174 * tl_close_finish_ser() Finishes the close process and wakes up waiting 175 * close if there is any. 176 * 177 * Note that in most cases close will enter te_close_ser_finish() 178 * synchronously and will not wait at all. 179 * 180 * 181 * Flow Control 182 * ============ 183 * 184 * The driver implements both read and write side service routines. No one calls 185 * putq() on the read queue. The read side service routine tl_rsrv() is called 186 * when the read side stream is back-enabled. It enters serializer synchronously 187 * (waits till serializer processing is complete). Within serializer it 188 * back-enables all endpoints blocked by the queue for connection-less 189 * transports and enables write side service processing for the peer for 190 * connection-oriented transports. 191 * 192 * Read and write side service routines use special mblk_sized space in the 193 * endpoint structure to enter perimeter. 194 * 195 * Write-side flow control 196 * ----------------------- 197 * 198 * Write side flow control is a bit tricky. The driver needs to deal with two 199 * message queues - the explicit STREAMS message queue maintained by 200 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 201 * queues should be synchronized to preserve message ordering and should 202 * maintain a single order determined by the order in which messages enter 203 * tl_wput(). In order to maintain the ordering between these two queues the 204 * STREAMS queue is only manipulated within the serializer, so the ordering is 205 * provided by the serializer. 206 * 207 * Functions called from the tl_wsrv() sometimes may call putbq(). To 208 * immediately stop any further processing of the STREAMS message queues the 209 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 210 * side service processing stops when the flag is set. 211 * 212 * The tl_wsrv() function enters serializer synchronously and waits for it to 213 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 214 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 215 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 216 * always bounded by the amount of messages on the STREAMS queue at the time 217 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 218 * queue from another serialized entry which can't happen in parallel. This 219 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 220 * of it draining forever while writer places new messages on the STREAMS 221 * queue). 222 * 223 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 224 * 225 * 226 * Unix Domain Sockets 227 * =================== 228 * 229 * The driver knows the structure of Unix Domain sockets addresses and treats 230 * them differently from generic TLI addresses. For sockets implicit binds are 231 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 232 * instead of using address length of zero. Explicit binds specify 233 * SOU_MAGIC_EXPLICIT as magic. 234 * 235 * For implicit binds we always use minor number as soua_vp part of the address 236 * and avoid any hash table lookups. This saves two hash tables lookups per 237 * anonymous bind. 238 * 239 * For explicit address we hash the vnode pointer instead of hashing the 240 * full-scale address+zone+length. Hashing by pointer is more efficient then 241 * hashing by the full address. 242 * 243 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 244 * tep structure, so it should be never freed. 245 * 246 * Also for sockets the driver always uses minor number as acceptor id. 247 * 248 * TPI VIOLATIONS 249 * -------------- 250 * 251 * This driver violates TPI in several respects for Unix Domain Sockets: 252 * 253 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 254 * is requested and the endpoint is already in use. There is no point in 255 * generating an unused address since this address will be rejected by 256 * sockfs anyway. For implicit binds it always generates a new address 257 * (sets soua_vp to its minor number). 258 * 259 * 2) It always uses minor number as acceptor ID and never uses queue 260 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 261 * message and they do not use the queue pointer. 262 * 263 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 264 * followed by listen(). The listen() should be issued with non-zero 265 * backlog, so sotpi_listen() issues unbind request followed by bind 266 * request to the same address but with a non-zero qlen value. Both 267 * tl_bind() and tl_unbind() require write lock on the hash table to 268 * insert/remove the address. The driver does not remove the address from 269 * the hash for endpoints that are bound to the explicit address and have 270 * backlog of zero. During T_BIND_REQ processing if the address requested 271 * is equal to the address the endpoint already has it updates the backlog 272 * without reinserting the address in the hash table. This optimization 273 * avoids two hash table updates for each listener created. It always 274 * avoids the problem of a "stolen" address when another listener may use 275 * the same address between the unbind and bind and suddenly listen() fails 276 * because address is in use even though the bind() succeeded. 277 * 278 * 279 * CONNECTIONLESS TRANSPORTS 280 * ========================= 281 * 282 * Connectionless transports all share the same serializer (one for TLI and one 283 * for Sockets). Functions executing behind serializer can check or modify state 284 * of any endpoint. 285 * 286 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 287 * te_lastep field. The next time X talks to some address A it checks whether A 288 * is the same as Y's address and if it is there is no need to lookup Y. If the 289 * address is different or the state of Y is not appropriate (e.g. closed or not 290 * idle) X does a lookup using tl_find_peer() and caches the new address. 291 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 292 * on the endpoint found. 293 * 294 * During close of endpoint Y it doesn't try to remove itself from other 295 * endpoints caches. They will detect that Y is gone and will search the peer 296 * endpoint again. 297 * 298 * Flow Control Handling. 299 * ---------------------- 300 * 301 * Each connectionless endpoint keeps a list of endpoints which are 302 * flow-controlled by its queue. It also keeps a pointer to the queue which 303 * flow-controls itself. Whenever flow control releases for endpoint X it 304 * enables all queues from the list. During close it also back-enables everyone 305 * in the list. If X is flow-controlled when it is closing it removes it from 306 * the peers list. 307 * 308 * DATA STRUCTURES 309 * =============== 310 * 311 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 312 * endpoint state. For connection-oriented transports it has a keeps a list 313 * of pending connections (tl_icon_t). For connectionless transports it keeps a 314 * list of endpoints flow controlled by this one. 315 * 316 * Each transport type is represented by a per-transport data structure 317 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 318 * endpoint address hash tables for each transport. It also contains pointer to 319 * transport serializer for connectionless transports. 320 * 321 * Each endpoint keeps a link to its transport structure, so the code can find 322 * all per-transport information quickly. 323 */ 324 325 #include <sys/types.h> 326 #include <sys/inttypes.h> 327 #include <sys/stream.h> 328 #include <sys/stropts.h> 329 #define _SUN_TPI_VERSION 2 330 #include <sys/tihdr.h> 331 #include <sys/strlog.h> 332 #include <sys/debug.h> 333 #include <sys/cred.h> 334 #include <sys/errno.h> 335 #include <sys/kmem.h> 336 #include <sys/id_space.h> 337 #include <sys/modhash.h> 338 #include <sys/mkdev.h> 339 #include <sys/tl.h> 340 #include <sys/stat.h> 341 #include <sys/conf.h> 342 #include <sys/modctl.h> 343 #include <sys/strsun.h> 344 #include <sys/socket.h> 345 #include <sys/socketvar.h> 346 #include <sys/sysmacros.h> 347 #include <sys/xti_xtiopt.h> 348 #include <sys/ddi.h> 349 #include <sys/sunddi.h> 350 #include <sys/zone.h> 351 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 352 #include <inet/optcom.h> 353 #include <sys/strsubr.h> 354 #include <sys/ucred.h> 355 #include <sys/suntpi.h> 356 #include <sys/list.h> 357 #include <sys/serializer.h> 358 359 /* 360 * TBD List 361 * 14 Eliminate state changes through table 362 * 16. AF_UNIX socket options 363 * 17. connect() for ticlts 364 * 18. support for "netstat" to show AF_UNIX plus TLI local 365 * transport connections 366 * 21. sanity check to flushing on sending M_ERROR 367 */ 368 369 /* 370 * CONSTANT DECLARATIONS 371 * -------------------- 372 */ 373 374 /* 375 * Local declarations 376 */ 377 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 378 379 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 380 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 381 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 382 /* 383 * Hash tables size. 384 */ 385 #define TL_HASH_SIZE 311 386 387 /* 388 * Definitions for module_info 389 */ 390 #define TL_ID (104) /* module ID number */ 391 #define TL_NAME "tl" /* module name */ 392 #define TL_MINPSZ (0) /* min packet size */ 393 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 394 #define TL_HIWAT (16*1024) /* hi water mark */ 395 #define TL_LOWAT (256) /* lo water mark */ 396 /* 397 * Definition of minor numbers/modes for new transport provider modes. 398 * We view the socket use as a separate mode to get a separate name space. 399 */ 400 #define TL_TICOTS 0 /* connection oriented transport */ 401 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 402 #define TL_TICLTS 2 /* connectionless transport */ 403 #define TL_UNUSED 3 404 #define TL_SOCKET 4 /* Socket */ 405 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 406 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 407 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 408 409 #define TL_MINOR_MASK 0x7 410 #define TL_MINOR_START (TL_TICLTS + 1) 411 412 /* 413 * LOCAL MACROS 414 */ 415 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 416 417 /* 418 * EXTERNAL VARIABLE DECLARATIONS 419 * ----------------------------- 420 */ 421 /* 422 * state table defined in the OS space.c 423 */ 424 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 425 426 /* 427 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 428 */ 429 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 430 static int tl_close(queue_t *, int, cred_t *); 431 static void tl_wput(queue_t *, mblk_t *); 432 static void tl_wsrv(queue_t *); 433 static void tl_rsrv(queue_t *); 434 435 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 436 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 437 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 438 439 440 /* 441 * GLOBAL DATA STRUCTURES AND VARIABLES 442 * ----------------------------------- 443 */ 444 445 /* 446 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 447 * For now, we only manage the SO_RECVUCRED option but we also have 448 * harmless dummy options to make things work with some common code we access. 449 */ 450 opdes_t tl_opt_arr[] = { 451 /* The SO_TYPE is needed for the hack below */ 452 { 453 SO_TYPE, 454 SOL_SOCKET, 455 OA_R, 456 OA_R, 457 OP_NP, 458 0, 459 sizeof (t_scalar_t), 460 0 461 }, 462 { 463 SO_RECVUCRED, 464 SOL_SOCKET, 465 OA_RW, 466 OA_RW, 467 OP_NP, 468 0, 469 sizeof (int), 470 0 471 } 472 }; 473 474 /* 475 * Table of all supported levels 476 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 477 * any supported options so we need this info separately. 478 * 479 * This is needed only for topmost tpi providers. 480 */ 481 optlevel_t tl_valid_levels_arr[] = { 482 XTI_GENERIC, 483 SOL_SOCKET, 484 TL_PROT_LEVEL 485 }; 486 487 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 488 /* 489 * Current upper bound on the amount of space needed to return all options. 490 * Additional options with data size of sizeof(long) are handled automatically. 491 * Others need hand job. 492 */ 493 #define TL_MAX_OPT_BUF_LEN \ 494 ((A_CNT(tl_opt_arr) << 2) + \ 495 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 496 + 64 + sizeof (struct T_optmgmt_ack)) 497 498 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 499 500 /* 501 * transport addr structure 502 */ 503 typedef struct tl_addr { 504 zoneid_t ta_zoneid; /* Zone scope of address */ 505 t_scalar_t ta_alen; /* length of abuf */ 506 void *ta_abuf; /* the addr itself */ 507 } tl_addr_t; 508 509 /* 510 * Refcounted version of serializer. 511 */ 512 typedef struct tl_serializer { 513 uint_t ts_refcnt; 514 serializer_t *ts_serializer; 515 } tl_serializer_t; 516 517 /* 518 * Each transport type has a separate state. 519 * Per-transport state. 520 */ 521 typedef struct tl_transport_state { 522 char *tr_name; 523 minor_t tr_minor; 524 uint32_t tr_defaddr; 525 mod_hash_t *tr_ai_hash; 526 mod_hash_t *tr_addr_hash; 527 tl_serializer_t *tr_serializer; 528 } tl_transport_state_t; 529 530 #define TL_DFADDR 0x1000 531 532 static tl_transport_state_t tl_transports[] = { 533 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 534 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 535 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 536 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 537 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 538 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 539 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 540 }; 541 542 #define TL_MAXTRANSPORT A_CNT(tl_transports) 543 544 struct tl_endpt; 545 typedef struct tl_endpt tl_endpt_t; 546 547 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 548 549 /* 550 * Data structure used to represent pending connects. 551 * Records enough information so that the connecting peer can close 552 * before the connection gets accepted. 553 */ 554 typedef struct tl_icon { 555 list_node_t ti_node; 556 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 557 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 558 t_scalar_t ti_seqno; /* Sequence number */ 559 } tl_icon_t; 560 561 typedef struct so_ux_addr soux_addr_t; 562 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 563 564 /* 565 * Maximum number of unaccepted connection indications allowed per listener. 566 */ 567 #define TL_MAXQLEN 4096 568 int tl_maxqlen = TL_MAXQLEN; 569 570 /* 571 * transport endpoint structure 572 */ 573 struct tl_endpt { 574 queue_t *te_rq; /* stream read queue */ 575 queue_t *te_wq; /* stream write queue */ 576 uint32_t te_refcnt; 577 int32_t te_state; /* TPI state of endpoint */ 578 minor_t te_minor; /* minor number */ 579 #define te_seqno te_minor 580 uint_t te_flag; /* flag field */ 581 boolean_t te_nowsrv; 582 tl_serializer_t *te_ser; /* Serializer to use */ 583 #define te_serializer te_ser->ts_serializer 584 585 soux_addr_t te_uxaddr; /* Socket address */ 586 #define te_magic te_uxaddr.soua_magic 587 #define te_vp te_uxaddr.soua_vp 588 tl_addr_t te_ap; /* addr bound to this endpt */ 589 #define te_zoneid te_ap.ta_zoneid 590 #define te_alen te_ap.ta_alen 591 #define te_abuf te_ap.ta_abuf 592 593 tl_transport_state_t *te_transport; 594 #define te_addrhash te_transport->tr_addr_hash 595 #define te_aihash te_transport->tr_ai_hash 596 #define te_defaddr te_transport->tr_defaddr 597 cred_t *te_credp; /* endpoint user credentials */ 598 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 599 600 /* 601 * State specific for connection-oriented and connectionless transports. 602 */ 603 union { 604 /* Connection-oriented state. */ 605 struct { 606 t_uscalar_t _te_nicon; /* count of conn requests */ 607 t_uscalar_t _te_qlen; /* max conn requests */ 608 tl_endpt_t *_te_oconp; /* conn request pending */ 609 tl_endpt_t *_te_conp; /* connected endpt */ 610 #ifndef _ILP32 611 void *_te_pad; 612 #endif 613 list_t _te_iconp; /* list of conn ind. pending */ 614 } _te_cots_state; 615 /* Connection-less state. */ 616 struct { 617 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 618 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 619 list_node_t _te_flows; /* lists of connections */ 620 list_t _te_flowlist; /* Who flowcontrols on me */ 621 } _te_clts_state; 622 } _te_transport_state; 623 #define te_nicon _te_transport_state._te_cots_state._te_nicon 624 #define te_qlen _te_transport_state._te_cots_state._te_qlen 625 #define te_oconp _te_transport_state._te_cots_state._te_oconp 626 #define te_conp _te_transport_state._te_cots_state._te_conp 627 #define te_iconp _te_transport_state._te_cots_state._te_iconp 628 #define te_lastep _te_transport_state._te_clts_state._te_lastep 629 #define te_flowq _te_transport_state._te_clts_state._te_flowq 630 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 631 #define te_flows _te_transport_state._te_clts_state._te_flows 632 633 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 634 timeout_id_t te_timoutid; /* outstanding timeout id */ 635 pid_t te_cpid; /* cached pid of endpoint */ 636 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 637 /* 638 * Pieces of the endpoint state needed for closing. 639 */ 640 kmutex_t te_closelock; 641 kcondvar_t te_closecv; 642 uint8_t te_closing; /* The endpoint started closing */ 643 uint8_t te_closewait; /* Wait in close until zero */ 644 mblk_t te_closemp; /* for entering serializer on close */ 645 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 646 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 647 kmutex_t te_srv_lock; 648 kcondvar_t te_srv_cv; 649 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 650 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 651 /* 652 * Pieces of the endpoint state needed for serializer transitions. 653 */ 654 kmutex_t te_ser_lock; /* Protects the count below */ 655 uint_t te_ser_count; /* Number of messages on serializer */ 656 }; 657 658 /* 659 * Flag values. Lower 4 bits specify that transport used. 660 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 661 * they allow to identify the endpoint more easily. 662 */ 663 #define TL_LISTENER 0x00010 /* the listener endpoint */ 664 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 665 #define TL_EAGER 0x00040 /* connecting endpoint */ 666 #define TL_ACCEPTED 0x00080 /* accepted connection */ 667 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 668 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 669 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 670 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 671 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 672 /* 673 * Boolean checks for the endpoint type. 674 */ 675 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 676 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 677 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 678 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 679 680 /* 681 * Certain operations are always used together. These macros reduce the chance 682 * of missing a part of a combination. 683 */ 684 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 685 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 686 687 #define TL_PUTBQ(x, mp) { \ 688 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 689 (x)->te_nowsrv = B_TRUE; \ 690 (void) putbq((x)->te_wq, mp); \ 691 } 692 693 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 694 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 695 696 /* 697 * STREAMS driver glue data structures. 698 */ 699 static struct module_info tl_minfo = { 700 TL_ID, /* mi_idnum */ 701 TL_NAME, /* mi_idname */ 702 TL_MINPSZ, /* mi_minpsz */ 703 TL_MAXPSZ, /* mi_maxpsz */ 704 TL_HIWAT, /* mi_hiwat */ 705 TL_LOWAT /* mi_lowat */ 706 }; 707 708 static struct qinit tl_rinit = { 709 NULL, /* qi_putp */ 710 (int (*)())tl_rsrv, /* qi_srvp */ 711 tl_open, /* qi_qopen */ 712 tl_close, /* qi_qclose */ 713 NULL, /* qi_qadmin */ 714 &tl_minfo, /* qi_minfo */ 715 NULL /* qi_mstat */ 716 }; 717 718 static struct qinit tl_winit = { 719 (int (*)())tl_wput, /* qi_putp */ 720 (int (*)())tl_wsrv, /* qi_srvp */ 721 NULL, /* qi_qopen */ 722 NULL, /* qi_qclose */ 723 NULL, /* qi_qadmin */ 724 &tl_minfo, /* qi_minfo */ 725 NULL /* qi_mstat */ 726 }; 727 728 static struct streamtab tlinfo = { 729 &tl_rinit, /* st_rdinit */ 730 &tl_winit, /* st_wrinit */ 731 NULL, /* st_muxrinit */ 732 NULL /* st_muxwrinit */ 733 }; 734 735 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 736 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 737 738 static struct modldrv modldrv = { 739 &mod_driverops, /* Type of module -- pseudo driver here */ 740 "TPI Local Transport (tl)", 741 &tl_devops, /* driver ops */ 742 }; 743 744 /* 745 * Module linkage information for the kernel. 746 */ 747 static struct modlinkage modlinkage = { 748 MODREV_1, 749 &modldrv, 750 NULL 751 }; 752 753 /* 754 * Templates for response to info request 755 * Check sanity of unlimited connect data etc. 756 */ 757 758 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 759 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 760 761 static struct T_info_ack tl_cots_info_ack = 762 { 763 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 764 T_INFINITE, /* TSDU size */ 765 T_INFINITE, /* ETSDU size */ 766 T_INFINITE, /* CDATA_size */ 767 T_INFINITE, /* DDATA_size */ 768 T_INFINITE, /* ADDR_size */ 769 T_INFINITE, /* OPT_size */ 770 0, /* TIDU_size - fill at run time */ 771 T_COTS, /* SERV_type */ 772 -1, /* CURRENT_state */ 773 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 774 }; 775 776 static struct T_info_ack tl_clts_info_ack = 777 { 778 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 779 0, /* TSDU_size - fill at run time */ 780 -2, /* ETSDU_size -2 => not supported */ 781 -2, /* CDATA_size -2 => not supported */ 782 -2, /* DDATA_size -2 => not supported */ 783 -1, /* ADDR_size -1 => infinite */ 784 -1, /* OPT_size */ 785 0, /* TIDU_size - fill at run time */ 786 T_CLTS, /* SERV_type */ 787 -1, /* CURRENT_state */ 788 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 789 }; 790 791 /* 792 * private copy of devinfo pointer used in tl_info 793 */ 794 static dev_info_t *tl_dip; 795 796 /* 797 * Endpoints cache. 798 */ 799 static kmem_cache_t *tl_cache; 800 /* 801 * Minor number space. 802 */ 803 static id_space_t *tl_minors; 804 805 /* 806 * Default Data Unit size. 807 */ 808 static t_scalar_t tl_tidusz; 809 810 /* 811 * Size of hash tables. 812 */ 813 static size_t tl_hash_size = TL_HASH_SIZE; 814 815 /* 816 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 817 * for sockets. 818 */ 819 static int tl_disable_early_connect = 0; 820 static int tl_client_closing_when_accepting; 821 822 static int tl_serializer_noswitch; 823 824 /* 825 * LOCAL FUNCTION PROTOTYPES 826 * ------------------------- 827 */ 828 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 829 static void tl_do_proto(mblk_t *, tl_endpt_t *); 830 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 831 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 832 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 833 t_scalar_t); 834 static void tl_bind(mblk_t *, tl_endpt_t *); 835 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 836 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 837 static void tl_unbind(mblk_t *, tl_endpt_t *); 838 static void tl_optmgmt(queue_t *, mblk_t *); 839 static void tl_conn_req(queue_t *, mblk_t *); 840 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 841 static void tl_conn_res(mblk_t *, tl_endpt_t *); 842 static void tl_discon_req(mblk_t *, tl_endpt_t *); 843 static void tl_capability_req(mblk_t *, tl_endpt_t *); 844 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 845 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *); 846 static void tl_info_req(mblk_t *, tl_endpt_t *); 847 static void tl_addr_req(mblk_t *, tl_endpt_t *); 848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 849 static void tl_data(mblk_t *, tl_endpt_t *); 850 static void tl_exdata(mblk_t *, tl_endpt_t *); 851 static void tl_ordrel(mblk_t *, tl_endpt_t *); 852 static void tl_unitdata(mblk_t *, tl_endpt_t *); 853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 858 static void tl_cl_backenable(tl_endpt_t *); 859 static void tl_co_unconnect(tl_endpt_t *); 860 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 861 static void tl_discon_ind(tl_endpt_t *, uint32_t); 862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 863 static mblk_t *tl_ordrel_ind_alloc(void); 864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 868 static void tl_icon_freemsgs(mblk_t **); 869 static void tl_merror(queue_t *, mblk_t *, int); 870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 871 static int tl_default_opt(queue_t *, int, int, uchar_t *); 872 static int tl_get_opt(queue_t *, int, int, uchar_t *); 873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 874 uchar_t *, void *, cred_t *); 875 static void tl_memrecover(queue_t *, mblk_t *, size_t); 876 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 877 static void tl_free(tl_endpt_t *); 878 static int tl_constructor(void *, void *, int); 879 static void tl_destructor(void *, void *); 880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 881 static tl_serializer_t *tl_serializer_alloc(int); 882 static void tl_serializer_refhold(tl_serializer_t *); 883 static void tl_serializer_refrele(tl_serializer_t *); 884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 885 static void tl_serializer_exit(tl_endpt_t *); 886 static boolean_t tl_noclose(tl_endpt_t *); 887 static void tl_closeok(tl_endpt_t *); 888 static void tl_refhold(tl_endpt_t *); 889 static void tl_refrele(tl_endpt_t *); 890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 892 static void tl_close_ser(mblk_t *, tl_endpt_t *); 893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 895 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 896 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 898 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 901 static void tl_addr_unbind(tl_endpt_t *); 902 903 /* 904 * Intialize option database object for TL 905 */ 906 907 optdb_obj_t tl_opt_obj = { 908 tl_default_opt, /* TL default value function pointer */ 909 tl_get_opt, /* TL get function pointer */ 910 tl_set_opt, /* TL set function pointer */ 911 TL_OPT_ARR_CNT, /* TL option database count of entries */ 912 tl_opt_arr, /* TL option database */ 913 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 914 tl_valid_levels_arr /* TL valid level array */ 915 }; 916 917 /* 918 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 919 * --------------------------------------- 920 */ 921 922 /* 923 * Loadable module routines 924 */ 925 int 926 _init(void) 927 { 928 return (mod_install(&modlinkage)); 929 } 930 931 int 932 _fini(void) 933 { 934 return (mod_remove(&modlinkage)); 935 } 936 937 int 938 _info(struct modinfo *modinfop) 939 { 940 return (mod_info(&modlinkage, modinfop)); 941 } 942 943 /* 944 * Driver Entry Points and Other routines 945 */ 946 static int 947 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 948 { 949 int i; 950 char name[32]; 951 952 /* 953 * Resume from a checkpoint state. 954 */ 955 if (cmd == DDI_RESUME) 956 return (DDI_SUCCESS); 957 958 if (cmd != DDI_ATTACH) 959 return (DDI_FAILURE); 960 961 /* 962 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 963 * streams message sizes can be unlimited. We use a defined constant 964 * instead. 965 */ 966 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 967 968 /* 969 * Create subdevices for each transport. 970 */ 971 for (i = 0; i < TL_UNUSED; i++) { 972 if (ddi_create_minor_node(devi, 973 tl_transports[i].tr_name, 974 S_IFCHR, tl_transports[i].tr_minor, 975 DDI_PSEUDO, NULL) == DDI_FAILURE) { 976 ddi_remove_minor_node(devi, NULL); 977 return (DDI_FAILURE); 978 } 979 } 980 981 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 982 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 983 984 if (tl_cache == NULL) { 985 ddi_remove_minor_node(devi, NULL); 986 return (DDI_FAILURE); 987 } 988 989 tl_minors = id_space_create("tl_minor_space", 990 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 991 992 /* 993 * Create ID space for minor numbers 994 */ 995 for (i = 0; i < TL_MAXTRANSPORT; i++) { 996 tl_transport_state_t *t = &tl_transports[i]; 997 998 if (i == TL_UNUSED) 999 continue; 1000 1001 /* Socket COTSORD shares namespace with COTS */ 1002 if (i == TL_SOCK_COTSORD) { 1003 t->tr_ai_hash = 1004 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1005 ASSERT(t->tr_ai_hash != NULL); 1006 t->tr_addr_hash = 1007 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1008 ASSERT(t->tr_addr_hash != NULL); 1009 continue; 1010 } 1011 1012 /* 1013 * Create hash tables. 1014 */ 1015 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1016 t->tr_name); 1017 #ifdef _ILP32 1018 if (i & TL_SOCKET) 1019 t->tr_ai_hash = 1020 mod_hash_create_idhash(name, tl_hash_size - 1, 1021 mod_hash_null_valdtor); 1022 else 1023 t->tr_ai_hash = 1024 mod_hash_create_ptrhash(name, tl_hash_size, 1025 mod_hash_null_valdtor, sizeof (queue_t)); 1026 #else 1027 t->tr_ai_hash = 1028 mod_hash_create_idhash(name, tl_hash_size - 1, 1029 mod_hash_null_valdtor); 1030 #endif /* _ILP32 */ 1031 1032 if (i & TL_SOCKET) { 1033 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1034 t->tr_name); 1035 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1036 tl_hash_size, mod_hash_null_valdtor, 1037 sizeof (uintptr_t)); 1038 } else { 1039 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1040 t->tr_name); 1041 t->tr_addr_hash = mod_hash_create_extended(name, 1042 tl_hash_size, mod_hash_null_keydtor, 1043 mod_hash_null_valdtor, 1044 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1045 } 1046 1047 /* Create serializer for connectionless transports. */ 1048 if (i & TL_TICLTS) 1049 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1050 } 1051 1052 tl_dip = devi; 1053 1054 return (DDI_SUCCESS); 1055 } 1056 1057 static int 1058 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1059 { 1060 int i; 1061 1062 if (cmd == DDI_SUSPEND) 1063 return (DDI_SUCCESS); 1064 1065 if (cmd != DDI_DETACH) 1066 return (DDI_FAILURE); 1067 1068 /* 1069 * Destroy arenas and hash tables. 1070 */ 1071 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1072 tl_transport_state_t *t = &tl_transports[i]; 1073 1074 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1075 continue; 1076 1077 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL); 1078 if (t->tr_serializer != NULL) { 1079 tl_serializer_refrele(t->tr_serializer); 1080 t->tr_serializer = NULL; 1081 } 1082 1083 #ifdef _ILP32 1084 if (i & TL_SOCKET) 1085 mod_hash_destroy_idhash(t->tr_ai_hash); 1086 else 1087 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1088 #else 1089 mod_hash_destroy_idhash(t->tr_ai_hash); 1090 #endif /* _ILP32 */ 1091 t->tr_ai_hash = NULL; 1092 if (i & TL_SOCKET) 1093 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1094 else 1095 mod_hash_destroy_hash(t->tr_addr_hash); 1096 t->tr_addr_hash = NULL; 1097 } 1098 1099 kmem_cache_destroy(tl_cache); 1100 tl_cache = NULL; 1101 id_space_destroy(tl_minors); 1102 tl_minors = NULL; 1103 ddi_remove_minor_node(devi, NULL); 1104 return (DDI_SUCCESS); 1105 } 1106 1107 /* ARGSUSED */ 1108 static int 1109 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1110 { 1111 1112 int retcode = DDI_FAILURE; 1113 1114 switch (infocmd) { 1115 1116 case DDI_INFO_DEVT2DEVINFO: 1117 if (tl_dip != NULL) { 1118 *result = (void *)tl_dip; 1119 retcode = DDI_SUCCESS; 1120 } 1121 break; 1122 1123 case DDI_INFO_DEVT2INSTANCE: 1124 *result = (void *)0; 1125 retcode = DDI_SUCCESS; 1126 break; 1127 1128 default: 1129 break; 1130 } 1131 return (retcode); 1132 } 1133 1134 /* 1135 * Endpoint reference management. 1136 */ 1137 static void 1138 tl_refhold(tl_endpt_t *tep) 1139 { 1140 atomic_add_32(&tep->te_refcnt, 1); 1141 } 1142 1143 static void 1144 tl_refrele(tl_endpt_t *tep) 1145 { 1146 ASSERT(tep->te_refcnt != 0); 1147 1148 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1149 tl_free(tep); 1150 } 1151 1152 /*ARGSUSED*/ 1153 static int 1154 tl_constructor(void *buf, void *cdrarg, int kmflags) 1155 { 1156 tl_endpt_t *tep = buf; 1157 1158 bzero(tep, sizeof (tl_endpt_t)); 1159 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1160 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1161 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1162 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1163 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1164 1165 return (0); 1166 } 1167 1168 /*ARGSUSED*/ 1169 static void 1170 tl_destructor(void *buf, void *cdrarg) 1171 { 1172 tl_endpt_t *tep = buf; 1173 1174 mutex_destroy(&tep->te_closelock); 1175 cv_destroy(&tep->te_closecv); 1176 mutex_destroy(&tep->te_srv_lock); 1177 cv_destroy(&tep->te_srv_cv); 1178 mutex_destroy(&tep->te_ser_lock); 1179 } 1180 1181 static void 1182 tl_free(tl_endpt_t *tep) 1183 { 1184 ASSERT(tep->te_refcnt == 0); 1185 ASSERT(tep->te_transport != NULL); 1186 ASSERT(tep->te_rq == NULL); 1187 ASSERT(tep->te_wq == NULL); 1188 ASSERT(tep->te_ser != NULL); 1189 ASSERT(tep->te_ser_count == 0); 1190 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1191 1192 if (IS_SOCKET(tep)) { 1193 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1194 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1195 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1196 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1197 } else if (tep->te_abuf != NULL) { 1198 kmem_free(tep->te_abuf, tep->te_alen); 1199 tep->te_alen = -1; /* uninitialized */ 1200 tep->te_abuf = NULL; 1201 } else { 1202 ASSERT(tep->te_alen == -1); 1203 } 1204 1205 id_free(tl_minors, tep->te_minor); 1206 ASSERT(tep->te_credp == NULL); 1207 1208 if (tep->te_hash_hndl != NULL) 1209 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1210 1211 if (IS_COTS(tep)) { 1212 TL_REMOVE_PEER(tep->te_conp); 1213 TL_REMOVE_PEER(tep->te_oconp); 1214 tl_serializer_refrele(tep->te_ser); 1215 tep->te_ser = NULL; 1216 ASSERT(tep->te_nicon == 0); 1217 ASSERT(list_head(&tep->te_iconp) == NULL); 1218 } else { 1219 ASSERT(tep->te_lastep == NULL); 1220 ASSERT(list_head(&tep->te_flowlist) == NULL); 1221 ASSERT(tep->te_flowq == NULL); 1222 } 1223 1224 ASSERT(tep->te_bufcid == 0); 1225 ASSERT(tep->te_timoutid == 0); 1226 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1227 tep->te_acceptor_id = 0; 1228 1229 ASSERT(tep->te_closewait == 0); 1230 ASSERT(!tep->te_rsrv_active); 1231 ASSERT(!tep->te_wsrv_active); 1232 tep->te_closing = 0; 1233 tep->te_nowsrv = B_FALSE; 1234 tep->te_flag = 0; 1235 1236 kmem_cache_free(tl_cache, tep); 1237 } 1238 1239 /* 1240 * Allocate/free reference-counted wrappers for serializers. 1241 */ 1242 static tl_serializer_t * 1243 tl_serializer_alloc(int flags) 1244 { 1245 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1246 serializer_t *ser; 1247 1248 if (s == NULL) 1249 return (NULL); 1250 1251 ser = serializer_create(flags); 1252 1253 if (ser == NULL) { 1254 kmem_free(s, sizeof (tl_serializer_t)); 1255 return (NULL); 1256 } 1257 1258 s->ts_refcnt = 1; 1259 s->ts_serializer = ser; 1260 return (s); 1261 } 1262 1263 static void 1264 tl_serializer_refhold(tl_serializer_t *s) 1265 { 1266 atomic_add_32(&s->ts_refcnt, 1); 1267 } 1268 1269 static void 1270 tl_serializer_refrele(tl_serializer_t *s) 1271 { 1272 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1273 serializer_destroy(s->ts_serializer); 1274 kmem_free(s, sizeof (tl_serializer_t)); 1275 } 1276 } 1277 1278 /* 1279 * Post a request on the endpoint serializer. For COTS transports keep track of 1280 * the number of pending requests. 1281 */ 1282 static void 1283 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1284 { 1285 if (IS_COTS(tep)) { 1286 mutex_enter(&tep->te_ser_lock); 1287 tep->te_ser_count++; 1288 mutex_exit(&tep->te_ser_lock); 1289 } 1290 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1291 } 1292 1293 /* 1294 * Complete processing the request on the serializer. Decrement the counter for 1295 * pending requests for COTS transports. 1296 */ 1297 static void 1298 tl_serializer_exit(tl_endpt_t *tep) 1299 { 1300 if (IS_COTS(tep)) { 1301 mutex_enter(&tep->te_ser_lock); 1302 ASSERT(tep->te_ser_count != 0); 1303 tep->te_ser_count--; 1304 mutex_exit(&tep->te_ser_lock); 1305 } 1306 } 1307 1308 /* 1309 * Hash management functions. 1310 */ 1311 1312 /* 1313 * Return TRUE if two addresses are equal, false otherwise. 1314 */ 1315 static boolean_t 1316 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1317 { 1318 return ((ap1->ta_alen > 0) && 1319 (ap1->ta_alen == ap2->ta_alen) && 1320 (ap1->ta_zoneid == ap2->ta_zoneid) && 1321 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1322 } 1323 1324 /* 1325 * This function is called whenever an endpoint is found in the hash table. 1326 */ 1327 /* ARGSUSED0 */ 1328 static void 1329 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1330 { 1331 tl_refhold((tl_endpt_t *)val); 1332 } 1333 1334 /* 1335 * Address hash function. 1336 */ 1337 /* ARGSUSED */ 1338 static uint_t 1339 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1340 { 1341 tl_addr_t *ap = (tl_addr_t *)key; 1342 size_t len = ap->ta_alen; 1343 uchar_t *p = ap->ta_abuf; 1344 uint_t i, g; 1345 1346 ASSERT((len > 0) && (p != NULL)); 1347 1348 for (i = ap->ta_zoneid; len -- != 0; p++) { 1349 i = (i << 4) + (*p); 1350 if ((g = (i & 0xf0000000U)) != 0) { 1351 i ^= (g >> 24); 1352 i ^= g; 1353 } 1354 } 1355 return (i); 1356 } 1357 1358 /* 1359 * This function is used by hash lookups. It compares two generic addresses. 1360 */ 1361 static int 1362 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1363 { 1364 #ifdef DEBUG 1365 tl_addr_t *ap1 = (tl_addr_t *)key1; 1366 tl_addr_t *ap2 = (tl_addr_t *)key2; 1367 1368 ASSERT(key1 != NULL); 1369 ASSERT(key2 != NULL); 1370 1371 ASSERT(ap1->ta_abuf != NULL); 1372 ASSERT(ap2->ta_abuf != NULL); 1373 ASSERT(ap1->ta_alen > 0); 1374 ASSERT(ap2->ta_alen > 0); 1375 #endif 1376 1377 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1378 } 1379 1380 /* 1381 * Prevent endpoint from closing if possible. 1382 * Return B_TRUE on success, B_FALSE on failure. 1383 */ 1384 static boolean_t 1385 tl_noclose(tl_endpt_t *tep) 1386 { 1387 boolean_t rc = B_FALSE; 1388 1389 mutex_enter(&tep->te_closelock); 1390 if (! tep->te_closing) { 1391 ASSERT(tep->te_closewait == 0); 1392 tep->te_closewait++; 1393 rc = B_TRUE; 1394 } 1395 mutex_exit(&tep->te_closelock); 1396 return (rc); 1397 } 1398 1399 /* 1400 * Allow endpoint to close if needed. 1401 */ 1402 static void 1403 tl_closeok(tl_endpt_t *tep) 1404 { 1405 ASSERT(tep->te_closewait > 0); 1406 mutex_enter(&tep->te_closelock); 1407 ASSERT(tep->te_closewait == 1); 1408 tep->te_closewait--; 1409 cv_signal(&tep->te_closecv); 1410 mutex_exit(&tep->te_closelock); 1411 } 1412 1413 /* 1414 * STREAMS open entry point. 1415 */ 1416 /* ARGSUSED */ 1417 static int 1418 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1419 { 1420 tl_endpt_t *tep; 1421 minor_t minor = getminor(*devp); 1422 1423 /* 1424 * Driver is called directly. Both CLONEOPEN and MODOPEN 1425 * are illegal 1426 */ 1427 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1428 return (ENXIO); 1429 1430 if (rq->q_ptr != NULL) 1431 return (0); 1432 1433 /* Minor number should specify the mode used for the driver. */ 1434 if ((minor >= TL_UNUSED)) 1435 return (ENXIO); 1436 1437 if (oflag & SO_SOCKSTR) { 1438 minor |= TL_SOCKET; 1439 } 1440 1441 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1442 tep->te_refcnt = 1; 1443 tep->te_cpid = curproc->p_pid; 1444 rq->q_ptr = WR(rq)->q_ptr = tep; 1445 tep->te_state = TS_UNBND; 1446 tep->te_credp = credp; 1447 crhold(credp); 1448 tep->te_zoneid = getzoneid(); 1449 1450 tep->te_flag = minor & TL_MINOR_MASK; 1451 tep->te_transport = &tl_transports[minor]; 1452 1453 /* Allocate a unique minor number for this instance. */ 1454 tep->te_minor = (minor_t)id_alloc(tl_minors); 1455 1456 /* Reserve hash handle for bind(). */ 1457 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1458 1459 /* Transport-specific initialization */ 1460 if (IS_COTS(tep)) { 1461 /* Use private serializer */ 1462 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1463 1464 /* Create list for pending connections */ 1465 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1466 offsetof(tl_icon_t, ti_node)); 1467 tep->te_qlen = 0; 1468 tep->te_nicon = 0; 1469 tep->te_oconp = NULL; 1470 tep->te_conp = NULL; 1471 } else { 1472 /* Use shared serializer */ 1473 tep->te_ser = tep->te_transport->tr_serializer; 1474 bzero(&tep->te_flows, sizeof (list_node_t)); 1475 /* Create list for flow control */ 1476 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1477 offsetof(tl_endpt_t, te_flows)); 1478 tep->te_flowq = NULL; 1479 tep->te_lastep = NULL; 1480 1481 } 1482 1483 /* Initialize endpoint address */ 1484 if (IS_SOCKET(tep)) { 1485 /* Socket-specific address handling. */ 1486 tep->te_alen = TL_SOUX_ADDRLEN; 1487 tep->te_abuf = &tep->te_uxaddr; 1488 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1489 tep->te_magic = SOU_MAGIC_IMPLICIT; 1490 } else { 1491 tep->te_alen = -1; 1492 tep->te_abuf = NULL; 1493 } 1494 1495 /* clone the driver */ 1496 *devp = makedevice(getmajor(*devp), tep->te_minor); 1497 1498 tep->te_rq = rq; 1499 tep->te_wq = WR(rq); 1500 1501 #ifdef _ILP32 1502 if (IS_SOCKET(tep)) 1503 tep->te_acceptor_id = tep->te_minor; 1504 else 1505 tep->te_acceptor_id = (t_uscalar_t)rq; 1506 #else 1507 tep->te_acceptor_id = tep->te_minor; 1508 #endif /* _ILP32 */ 1509 1510 1511 qprocson(rq); 1512 1513 /* 1514 * Insert acceptor ID in the hash. The AI hash always sleeps on 1515 * insertion so insertion can't fail. 1516 */ 1517 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1518 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1519 (mod_hash_val_t)tep); 1520 1521 return (0); 1522 } 1523 1524 /* ARGSUSED1 */ 1525 static int 1526 tl_close(queue_t *rq, int flag, cred_t *credp) 1527 { 1528 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1529 tl_endpt_t *elp = NULL; 1530 queue_t *wq = tep->te_wq; 1531 int rc; 1532 1533 ASSERT(wq == WR(rq)); 1534 1535 /* 1536 * Remove the endpoint from acceptor hash. 1537 */ 1538 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1539 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1540 (mod_hash_val_t *)&elp); 1541 ASSERT(rc == 0 && tep == elp); 1542 if ((rc != 0) || (tep != elp)) { 1543 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1544 SL_TRACE|SL_ERROR, 1545 "tl_close:inconsistency in AI hash")); 1546 } 1547 1548 /* 1549 * Wait till close is safe, then mark endpoint as closing. 1550 */ 1551 mutex_enter(&tep->te_closelock); 1552 while (tep->te_closewait) 1553 cv_wait(&tep->te_closecv, &tep->te_closelock); 1554 tep->te_closing = B_TRUE; 1555 /* 1556 * Will wait for the serializer part of the close to finish, so set 1557 * te_closewait now. 1558 */ 1559 tep->te_closewait = 1; 1560 tep->te_nowsrv = B_FALSE; 1561 mutex_exit(&tep->te_closelock); 1562 1563 /* 1564 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1565 * It is safe because close will wait for tl_close_ser to finish. 1566 */ 1567 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1568 1569 /* 1570 * Wait for the first phase of close to complete before qprocsoff(). 1571 */ 1572 mutex_enter(&tep->te_closelock); 1573 while (tep->te_closewait) 1574 cv_wait(&tep->te_closecv, &tep->te_closelock); 1575 mutex_exit(&tep->te_closelock); 1576 1577 qprocsoff(rq); 1578 1579 if (tep->te_bufcid) { 1580 qunbufcall(rq, tep->te_bufcid); 1581 tep->te_bufcid = 0; 1582 } 1583 if (tep->te_timoutid) { 1584 (void) quntimeout(rq, tep->te_timoutid); 1585 tep->te_timoutid = 0; 1586 } 1587 1588 /* 1589 * Finish close behind serializer. 1590 * 1591 * For a CLTS endpoint increase a refcount and continue close processing 1592 * with serializer protection. This processing may happen asynchronously 1593 * with the completion of tl_close(). 1594 * 1595 * Fot a COTS endpoint wait before destroying tep since the serializer 1596 * may go away together with tep and we need to destroy serializer 1597 * outside of serializer context. 1598 */ 1599 ASSERT(tep->te_closewait == 0); 1600 if (IS_COTS(tep)) 1601 tep->te_closewait = 1; 1602 else 1603 tl_refhold(tep); 1604 1605 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1606 1607 /* 1608 * For connection-oriented transports wait for all serializer activity 1609 * to settle down. 1610 */ 1611 if (IS_COTS(tep)) { 1612 mutex_enter(&tep->te_closelock); 1613 while (tep->te_closewait) 1614 cv_wait(&tep->te_closecv, &tep->te_closelock); 1615 mutex_exit(&tep->te_closelock); 1616 } 1617 1618 crfree(tep->te_credp); 1619 tep->te_credp = NULL; 1620 tep->te_wq = NULL; 1621 tl_refrele(tep); 1622 /* 1623 * tep is likely to be destroyed now, so can't reference it any more. 1624 */ 1625 1626 rq->q_ptr = wq->q_ptr = NULL; 1627 return (0); 1628 } 1629 1630 /* 1631 * First phase of close processing done behind the serializer. 1632 * 1633 * Do not drop the reference in the end - tl_close() wants this reference to 1634 * stay. 1635 */ 1636 /* ARGSUSED0 */ 1637 static void 1638 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1639 { 1640 ASSERT(tep->te_closing); 1641 ASSERT(tep->te_closewait == 1); 1642 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1643 1644 tep->te_flag |= TL_CLOSE_SER; 1645 1646 /* 1647 * Drain out all messages on queue except for TL_TICOTS where the 1648 * abortive release semantics permit discarding of data on close 1649 */ 1650 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1651 tl_wsrv_ser(NULL, tep); 1652 } 1653 1654 /* Remove address from hash table. */ 1655 tl_addr_unbind(tep); 1656 /* 1657 * qprocsoff() gets confused when q->q_next is not NULL on the write 1658 * queue of the driver, so clear these before qprocsoff() is called. 1659 * Also clear q_next for the peer since this queue is going away. 1660 */ 1661 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1662 tl_endpt_t *peer_tep = tep->te_conp; 1663 1664 tep->te_wq->q_next = NULL; 1665 if ((peer_tep != NULL) && !peer_tep->te_closing) 1666 peer_tep->te_wq->q_next = NULL; 1667 } 1668 1669 tep->te_rq = NULL; 1670 1671 /* wake up tl_close() */ 1672 tl_closeok(tep); 1673 tl_serializer_exit(tep); 1674 } 1675 1676 /* 1677 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1678 * the reference for CLTS. 1679 * 1680 * Called from serializer. Should drop reference count for CLTS only. 1681 */ 1682 /* ARGSUSED0 */ 1683 static void 1684 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1685 { 1686 ASSERT(tep->te_closing); 1687 IMPLY(IS_CLTS(tep), tep->te_closewait == 0); 1688 IMPLY(IS_COTS(tep), tep->te_closewait == 1); 1689 1690 tep->te_state = -1; /* Uninitialized */ 1691 if (IS_COTS(tep)) { 1692 tl_co_unconnect(tep); 1693 } else { 1694 /* Connectionless specific cleanup */ 1695 TL_REMOVE_PEER(tep->te_lastep); 1696 /* 1697 * Backenable anybody that is flow controlled waiting for 1698 * this endpoint. 1699 */ 1700 tl_cl_backenable(tep); 1701 if (tep->te_flowq != NULL) { 1702 list_remove(&(tep->te_flowq->te_flowlist), tep); 1703 tep->te_flowq = NULL; 1704 } 1705 } 1706 1707 tl_serializer_exit(tep); 1708 if (IS_COTS(tep)) 1709 tl_closeok(tep); 1710 else 1711 tl_refrele(tep); 1712 } 1713 1714 /* 1715 * STREAMS write-side put procedure. 1716 * Enter serializer for most of the processing. 1717 * 1718 * The T_CONN_REQ is processed outside of serializer. 1719 */ 1720 static void 1721 tl_wput(queue_t *wq, mblk_t *mp) 1722 { 1723 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1724 ssize_t msz = MBLKL(mp); 1725 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1726 tlproc_t *tl_proc = NULL; 1727 1728 switch (DB_TYPE(mp)) { 1729 case M_DATA: 1730 /* Only valid for connection-oriented transports */ 1731 if (IS_CLTS(tep)) { 1732 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1733 SL_TRACE|SL_ERROR, 1734 "tl_wput:M_DATA invalid for ticlts driver")); 1735 tl_merror(wq, mp, EPROTO); 1736 return; 1737 } 1738 tl_proc = tl_wput_data_ser; 1739 break; 1740 1741 case M_IOCTL: 1742 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1743 case TL_IOC_CREDOPT: 1744 /* FALLTHROUGH */ 1745 case TL_IOC_UCREDOPT: 1746 /* 1747 * Serialize endpoint state change. 1748 */ 1749 tl_proc = tl_do_ioctl_ser; 1750 break; 1751 1752 default: 1753 miocnak(wq, mp, 0, EINVAL); 1754 return; 1755 } 1756 break; 1757 1758 case M_FLUSH: 1759 /* 1760 * do canonical M_FLUSH processing 1761 */ 1762 if (*mp->b_rptr & FLUSHW) { 1763 flushq(wq, FLUSHALL); 1764 *mp->b_rptr &= ~FLUSHW; 1765 } 1766 if (*mp->b_rptr & FLUSHR) { 1767 flushq(RD(wq), FLUSHALL); 1768 qreply(wq, mp); 1769 } else { 1770 freemsg(mp); 1771 } 1772 return; 1773 1774 case M_PROTO: 1775 if (msz < sizeof (prim->type)) { 1776 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1777 SL_TRACE|SL_ERROR, 1778 "tl_wput:M_PROTO data too short")); 1779 tl_merror(wq, mp, EPROTO); 1780 return; 1781 } 1782 switch (prim->type) { 1783 case T_OPTMGMT_REQ: 1784 case T_SVR4_OPTMGMT_REQ: 1785 /* 1786 * Process TPI option management requests immediately 1787 * in put procedure regardless of in-order processing 1788 * of already queued messages. 1789 * (Note: This driver supports AF_UNIX socket 1790 * implementation. Unless we implement this processing, 1791 * setsockopt() on socket endpoint will block on flow 1792 * controlled endpoints which it should not. That is 1793 * required for successful execution of VSU socket tests 1794 * and is consistent with BSD socket behavior). 1795 */ 1796 tl_optmgmt(wq, mp); 1797 return; 1798 case O_T_BIND_REQ: 1799 case T_BIND_REQ: 1800 tl_proc = tl_bind_ser; 1801 break; 1802 case T_CONN_REQ: 1803 if (IS_CLTS(tep)) { 1804 tl_merror(wq, mp, EPROTO); 1805 return; 1806 } 1807 tl_conn_req(wq, mp); 1808 return; 1809 case T_DATA_REQ: 1810 case T_OPTDATA_REQ: 1811 case T_EXDATA_REQ: 1812 case T_ORDREL_REQ: 1813 tl_proc = tl_putq_ser; 1814 break; 1815 case T_UNITDATA_REQ: 1816 if (IS_COTS(tep) || 1817 (msz < sizeof (struct T_unitdata_req))) { 1818 tl_merror(wq, mp, EPROTO); 1819 return; 1820 } 1821 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1822 tl_proc = tl_unitdata_ser; 1823 } else { 1824 tl_proc = tl_putq_ser; 1825 } 1826 break; 1827 default: 1828 /* 1829 * process in service procedure if message already 1830 * queued (maintain in-order processing) 1831 */ 1832 if (wq->q_first != NULL) { 1833 tl_proc = tl_putq_ser; 1834 } else { 1835 tl_proc = tl_wput_ser; 1836 } 1837 break; 1838 } 1839 break; 1840 1841 case M_PCPROTO: 1842 /* 1843 * Check that the message has enough data to figure out TPI 1844 * primitive. 1845 */ 1846 if (msz < sizeof (prim->type)) { 1847 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1848 SL_TRACE|SL_ERROR, 1849 "tl_wput:M_PCROTO data too short")); 1850 tl_merror(wq, mp, EPROTO); 1851 return; 1852 } 1853 switch (prim->type) { 1854 case T_CAPABILITY_REQ: 1855 tl_capability_req(mp, tep); 1856 return; 1857 case T_INFO_REQ: 1858 tl_proc = tl_info_req_ser; 1859 break; 1860 case T_ADDR_REQ: 1861 tl_proc = tl_addr_req_ser; 1862 break; 1863 1864 default: 1865 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1866 SL_TRACE|SL_ERROR, 1867 "tl_wput:unknown TPI msg primitive")); 1868 tl_merror(wq, mp, EPROTO); 1869 return; 1870 } 1871 break; 1872 default: 1873 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1874 "tl_wput:default:unexpected Streams message")); 1875 freemsg(mp); 1876 return; 1877 } 1878 1879 /* 1880 * Continue processing via serializer. 1881 */ 1882 ASSERT(tl_proc != NULL); 1883 tl_refhold(tep); 1884 tl_serializer_enter(tep, tl_proc, mp); 1885 } 1886 1887 /* 1888 * Place message on the queue while preserving order. 1889 */ 1890 static void 1891 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1892 { 1893 if (tep->te_closing) { 1894 tl_wput_ser(mp, tep); 1895 } else { 1896 TL_PUTQ(tep, mp); 1897 tl_serializer_exit(tep); 1898 tl_refrele(tep); 1899 } 1900 1901 } 1902 1903 static void 1904 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1905 { 1906 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1907 1908 switch (DB_TYPE(mp)) { 1909 case M_DATA: 1910 tl_data(mp, tep); 1911 break; 1912 case M_PROTO: 1913 tl_do_proto(mp, tep); 1914 break; 1915 default: 1916 freemsg(mp); 1917 break; 1918 } 1919 } 1920 1921 /* 1922 * Write side put procedure called from serializer. 1923 */ 1924 static void 1925 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1926 { 1927 tl_wput_common_ser(mp, tep); 1928 tl_serializer_exit(tep); 1929 tl_refrele(tep); 1930 } 1931 1932 /* 1933 * M_DATA processing. Called from serializer. 1934 */ 1935 static void 1936 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1937 { 1938 tl_endpt_t *peer_tep = tep->te_conp; 1939 queue_t *peer_rq; 1940 1941 ASSERT(DB_TYPE(mp) == M_DATA); 1942 ASSERT(IS_COTS(tep)); 1943 1944 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer); 1945 1946 /* 1947 * fastpath for data. Ignore flow control if tep is closing. 1948 */ 1949 if ((peer_tep != NULL) && 1950 !peer_tep->te_closing && 1951 ((tep->te_state == TS_DATA_XFER) || 1952 (tep->te_state == TS_WREQ_ORDREL)) && 1953 (tep->te_wq != NULL) && 1954 (tep->te_wq->q_first == NULL) && 1955 ((peer_tep->te_state == TS_DATA_XFER) || 1956 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1957 ((peer_rq = peer_tep->te_rq) != NULL) && 1958 (canputnext(peer_rq) || tep->te_closing)) { 1959 putnext(peer_rq, mp); 1960 } else if (tep->te_closing) { 1961 /* 1962 * It is possible that by the time we got here tep started to 1963 * close. If the write queue is not empty, and the state is 1964 * TS_DATA_XFER the data should be delivered in order, so we 1965 * call putq() instead of freeing the data. 1966 */ 1967 if ((tep->te_wq != NULL) && 1968 ((tep->te_state == TS_DATA_XFER) || 1969 (tep->te_state == TS_WREQ_ORDREL))) { 1970 TL_PUTQ(tep, mp); 1971 } else { 1972 freemsg(mp); 1973 } 1974 } else { 1975 TL_PUTQ(tep, mp); 1976 } 1977 1978 tl_serializer_exit(tep); 1979 tl_refrele(tep); 1980 } 1981 1982 /* 1983 * Write side service routine. 1984 * 1985 * All actual processing happens within serializer which is entered 1986 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1987 * messages that need processing may have arrived, so tl_wsrv repeats until 1988 * queue is empty or te_nowsrv is set. 1989 */ 1990 static void 1991 tl_wsrv(queue_t *wq) 1992 { 1993 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1994 1995 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 1996 mutex_enter(&tep->te_srv_lock); 1997 ASSERT(tep->te_wsrv_active == B_FALSE); 1998 tep->te_wsrv_active = B_TRUE; 1999 mutex_exit(&tep->te_srv_lock); 2000 2001 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2002 2003 /* 2004 * Wait for serializer job to complete. 2005 */ 2006 mutex_enter(&tep->te_srv_lock); 2007 while (tep->te_wsrv_active) { 2008 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2009 } 2010 cv_signal(&tep->te_srv_cv); 2011 mutex_exit(&tep->te_srv_lock); 2012 } 2013 } 2014 2015 /* 2016 * Serialized write side processing of the STREAMS queue. 2017 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2018 * is NULL. 2019 */ 2020 static void 2021 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2022 { 2023 mblk_t *mp; 2024 queue_t *wq = tep->te_wq; 2025 2026 ASSERT(wq != NULL); 2027 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2028 tl_wput_common_ser(mp, tep); 2029 } 2030 2031 /* 2032 * Wakeup service routine unless called from close. 2033 * If ser_mp is specified, the caller is tl_wsrv(). 2034 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2035 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2036 * be no matching tl_serializer_exit() in this case. 2037 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2038 * waiting on te_srv_cv. 2039 */ 2040 if (ser_mp != NULL) { 2041 /* 2042 * We are called from tl_wsrv. 2043 */ 2044 mutex_enter(&tep->te_srv_lock); 2045 ASSERT(tep->te_wsrv_active); 2046 tep->te_wsrv_active = B_FALSE; 2047 cv_signal(&tep->te_srv_cv); 2048 mutex_exit(&tep->te_srv_lock); 2049 tl_serializer_exit(tep); 2050 } 2051 } 2052 2053 /* 2054 * Called when the stream is backenabled. Enter serializer and qenable everyone 2055 * flow controlled by tep. 2056 * 2057 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2058 * is possible that two instances of tl_rsrv will be running reusing the same 2059 * rsrv mblk. 2060 */ 2061 static void 2062 tl_rsrv(queue_t *rq) 2063 { 2064 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2065 2066 ASSERT(rq->q_first == NULL); 2067 ASSERT(tep->te_rsrv_active == 0); 2068 2069 tep->te_rsrv_active = B_TRUE; 2070 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2071 /* 2072 * Wait for serializer job to complete. 2073 */ 2074 mutex_enter(&tep->te_srv_lock); 2075 while (tep->te_rsrv_active) { 2076 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2077 } 2078 cv_signal(&tep->te_srv_cv); 2079 mutex_exit(&tep->te_srv_lock); 2080 } 2081 2082 /* ARGSUSED */ 2083 static void 2084 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2085 { 2086 tl_endpt_t *peer_tep; 2087 2088 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2089 tl_cl_backenable(tep); 2090 } else if ( 2091 IS_COTS(tep) && 2092 ((peer_tep = tep->te_conp) != NULL) && 2093 !peer_tep->te_closing && 2094 ((tep->te_state == TS_DATA_XFER) || 2095 (tep->te_state == TS_WIND_ORDREL)|| 2096 (tep->te_state == TS_WREQ_ORDREL))) { 2097 TL_QENABLE(peer_tep); 2098 } 2099 2100 /* 2101 * Wakeup read side service routine. 2102 */ 2103 mutex_enter(&tep->te_srv_lock); 2104 ASSERT(tep->te_rsrv_active); 2105 tep->te_rsrv_active = B_FALSE; 2106 cv_signal(&tep->te_srv_cv); 2107 mutex_exit(&tep->te_srv_lock); 2108 tl_serializer_exit(tep); 2109 } 2110 2111 /* 2112 * process M_PROTO messages. Always called from serializer. 2113 */ 2114 static void 2115 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2116 { 2117 ssize_t msz = MBLKL(mp); 2118 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2119 2120 /* Message size was validated by tl_wput(). */ 2121 ASSERT(msz >= sizeof (prim->type)); 2122 2123 switch (prim->type) { 2124 case T_UNBIND_REQ: 2125 tl_unbind(mp, tep); 2126 break; 2127 2128 case T_ADDR_REQ: 2129 tl_addr_req(mp, tep); 2130 break; 2131 2132 case O_T_CONN_RES: 2133 case T_CONN_RES: 2134 if (IS_CLTS(tep)) { 2135 tl_merror(tep->te_wq, mp, EPROTO); 2136 break; 2137 } 2138 tl_conn_res(mp, tep); 2139 break; 2140 2141 case T_DISCON_REQ: 2142 if (IS_CLTS(tep)) { 2143 tl_merror(tep->te_wq, mp, EPROTO); 2144 break; 2145 } 2146 tl_discon_req(mp, tep); 2147 break; 2148 2149 case T_DATA_REQ: 2150 if (IS_CLTS(tep)) { 2151 tl_merror(tep->te_wq, mp, EPROTO); 2152 break; 2153 } 2154 tl_data(mp, tep); 2155 break; 2156 2157 case T_OPTDATA_REQ: 2158 if (IS_CLTS(tep)) { 2159 tl_merror(tep->te_wq, mp, EPROTO); 2160 break; 2161 } 2162 tl_data(mp, tep); 2163 break; 2164 2165 case T_EXDATA_REQ: 2166 if (IS_CLTS(tep)) { 2167 tl_merror(tep->te_wq, mp, EPROTO); 2168 break; 2169 } 2170 tl_exdata(mp, tep); 2171 break; 2172 2173 case T_ORDREL_REQ: 2174 if (! IS_COTSORD(tep)) { 2175 tl_merror(tep->te_wq, mp, EPROTO); 2176 break; 2177 } 2178 tl_ordrel(mp, tep); 2179 break; 2180 2181 case T_UNITDATA_REQ: 2182 if (IS_COTS(tep)) { 2183 tl_merror(tep->te_wq, mp, EPROTO); 2184 break; 2185 } 2186 tl_unitdata(mp, tep); 2187 break; 2188 2189 default: 2190 tl_merror(tep->te_wq, mp, EPROTO); 2191 break; 2192 } 2193 } 2194 2195 /* 2196 * Process ioctl from serializer. 2197 * This is a wrapper around tl_do_ioctl(). 2198 */ 2199 static void 2200 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2201 { 2202 if (! tep->te_closing) 2203 tl_do_ioctl(mp, tep); 2204 else 2205 freemsg(mp); 2206 2207 tl_serializer_exit(tep); 2208 tl_refrele(tep); 2209 } 2210 2211 static void 2212 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2213 { 2214 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2215 int cmd = iocbp->ioc_cmd; 2216 queue_t *wq = tep->te_wq; 2217 int error; 2218 int thisopt, otheropt; 2219 2220 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2221 2222 switch (cmd) { 2223 case TL_IOC_CREDOPT: 2224 if (cmd == TL_IOC_CREDOPT) { 2225 thisopt = TL_SETCRED; 2226 otheropt = TL_SETUCRED; 2227 } else { 2228 /* FALLTHROUGH */ 2229 case TL_IOC_UCREDOPT: 2230 thisopt = TL_SETUCRED; 2231 otheropt = TL_SETCRED; 2232 } 2233 /* 2234 * The credentials passing does not apply to sockets. 2235 * Only one of the cred options can be set at a given time. 2236 */ 2237 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2238 miocnak(wq, mp, 0, EINVAL); 2239 return; 2240 } 2241 2242 /* 2243 * Turn on generation of credential options for 2244 * T_conn_req, T_conn_con, T_unidata_ind. 2245 */ 2246 error = miocpullup(mp, sizeof (uint32_t)); 2247 if (error != 0) { 2248 miocnak(wq, mp, 0, error); 2249 return; 2250 } 2251 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2252 miocnak(wq, mp, 0, EINVAL); 2253 return; 2254 } 2255 2256 if (*(uint32_t *)mp->b_cont->b_rptr) 2257 tep->te_flag |= thisopt; 2258 else 2259 tep->te_flag &= ~thisopt; 2260 2261 miocack(wq, mp, 0, 0); 2262 break; 2263 2264 default: 2265 /* Should not be here */ 2266 miocnak(wq, mp, 0, EINVAL); 2267 break; 2268 } 2269 } 2270 2271 2272 /* 2273 * send T_ERROR_ACK 2274 * Note: assumes enough memory or caller passed big enough mp 2275 * - no recovery from allocb failures 2276 */ 2277 2278 static void 2279 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2280 t_scalar_t unix_err, t_scalar_t type) 2281 { 2282 struct T_error_ack *err_ack; 2283 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2284 M_PCPROTO, T_ERROR_ACK); 2285 2286 if (ackmp == NULL) { 2287 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2288 "tl_error_ack:out of mblk memory")); 2289 tl_merror(wq, NULL, ENOSR); 2290 return; 2291 } 2292 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2293 err_ack->ERROR_prim = type; 2294 err_ack->TLI_error = tli_err; 2295 err_ack->UNIX_error = unix_err; 2296 2297 /* 2298 * send error ack message 2299 */ 2300 qreply(wq, ackmp); 2301 } 2302 2303 2304 2305 /* 2306 * send T_OK_ACK 2307 * Note: assumes enough memory or caller passed big enough mp 2308 * - no recovery from allocb failures 2309 */ 2310 static void 2311 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2312 { 2313 struct T_ok_ack *ok_ack; 2314 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2315 M_PCPROTO, T_OK_ACK); 2316 2317 if (ackmp == NULL) { 2318 tl_merror(wq, NULL, ENOMEM); 2319 return; 2320 } 2321 2322 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2323 ok_ack->CORRECT_prim = type; 2324 2325 (void) qreply(wq, ackmp); 2326 } 2327 2328 /* 2329 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2330 * This is a wrapper around tl_bind(). 2331 */ 2332 static void 2333 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2334 { 2335 if (! tep->te_closing) 2336 tl_bind(mp, tep); 2337 else 2338 freemsg(mp); 2339 2340 tl_serializer_exit(tep); 2341 tl_refrele(tep); 2342 } 2343 2344 /* 2345 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2346 * Assumes that the endpoint is in the unbound. 2347 */ 2348 static void 2349 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2350 { 2351 queue_t *wq = tep->te_wq; 2352 struct T_bind_ack *b_ack; 2353 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2354 mblk_t *ackmp, *bamp; 2355 soux_addr_t ux_addr; 2356 t_uscalar_t qlen = 0; 2357 t_scalar_t alen, aoff; 2358 tl_addr_t addr_req; 2359 void *addr_startp; 2360 ssize_t msz = MBLKL(mp), basize; 2361 t_scalar_t tli_err = 0, unix_err = 0; 2362 t_scalar_t save_prim_type = bind->PRIM_type; 2363 t_scalar_t save_state = tep->te_state; 2364 2365 if (tep->te_state != TS_UNBND) { 2366 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2367 SL_TRACE|SL_ERROR, 2368 "tl_wput:bind_request:out of state, state=%d", 2369 tep->te_state)); 2370 tli_err = TOUTSTATE; 2371 goto error; 2372 } 2373 2374 if (msz < sizeof (struct T_bind_req)) { 2375 tli_err = TSYSERR; unix_err = EINVAL; 2376 goto error; 2377 } 2378 2379 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2380 2381 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2382 (bind->PRIM_type == T_BIND_REQ)); 2383 2384 alen = bind->ADDR_length; 2385 aoff = bind->ADDR_offset; 2386 2387 /* negotiate max conn req pending */ 2388 if (IS_COTS(tep)) { 2389 qlen = bind->CONIND_number; 2390 if (qlen > tl_maxqlen) 2391 qlen = tl_maxqlen; 2392 } 2393 2394 /* 2395 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2396 * and bound again. 2397 */ 2398 if ((tep->te_hash_hndl == NULL) && 2399 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2400 mod_hash_reserve_nosleep(tep->te_addrhash, 2401 &tep->te_hash_hndl) != 0) { 2402 tli_err = TSYSERR; unix_err = ENOSR; 2403 goto error; 2404 } 2405 2406 /* 2407 * Verify address correctness. 2408 */ 2409 if (IS_SOCKET(tep)) { 2410 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2411 2412 if ((alen != TL_SOUX_ADDRLEN) || 2413 (aoff < 0) || 2414 (aoff + alen > msz)) { 2415 (void) (STRLOG(TL_ID, tep->te_minor, 2416 1, SL_TRACE|SL_ERROR, 2417 "tl_bind: invalid socket addr")); 2418 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2419 tli_err = TSYSERR; unix_err = EINVAL; 2420 goto error; 2421 } 2422 /* Copy address from message to local buffer. */ 2423 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2424 /* 2425 * Check that we got correct address from sockets 2426 */ 2427 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2428 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2429 (void) (STRLOG(TL_ID, tep->te_minor, 2430 1, SL_TRACE|SL_ERROR, 2431 "tl_bind: invalid socket magic")); 2432 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2433 tli_err = TSYSERR; unix_err = EINVAL; 2434 goto error; 2435 } 2436 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2437 (ux_addr.soua_vp != NULL)) { 2438 (void) (STRLOG(TL_ID, tep->te_minor, 2439 1, SL_TRACE|SL_ERROR, 2440 "tl_bind: implicit addr non-empty")); 2441 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2442 tli_err = TSYSERR; unix_err = EINVAL; 2443 goto error; 2444 } 2445 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2446 (ux_addr.soua_vp == NULL)) { 2447 (void) (STRLOG(TL_ID, tep->te_minor, 2448 1, SL_TRACE|SL_ERROR, 2449 "tl_bind: explicit addr empty")); 2450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2451 tli_err = TSYSERR; unix_err = EINVAL; 2452 goto error; 2453 } 2454 } else { 2455 if ((alen > 0) && ((aoff < 0) || 2456 ((ssize_t)(aoff + alen) > msz) || 2457 ((aoff + alen) < 0))) { 2458 (void) (STRLOG(TL_ID, tep->te_minor, 2459 1, SL_TRACE|SL_ERROR, 2460 "tl_bind: invalid message")); 2461 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2462 tli_err = TSYSERR; unix_err = EINVAL; 2463 goto error; 2464 } 2465 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2466 (void) (STRLOG(TL_ID, tep->te_minor, 2467 1, SL_TRACE|SL_ERROR, 2468 "tl_bind: bad addr in message")); 2469 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2470 tli_err = TBADADDR; 2471 goto error; 2472 } 2473 #ifdef DEBUG 2474 /* 2475 * Mild form of ASSERT()ion to detect broken TPI apps. 2476 * if (! assertion) 2477 * log warning; 2478 */ 2479 if (! ((alen == 0 && aoff == 0) || 2480 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2481 (void) (STRLOG(TL_ID, tep->te_minor, 2482 3, SL_TRACE|SL_ERROR, 2483 "tl_bind: addr overlaps TPI message")); 2484 } 2485 #endif 2486 } 2487 2488 /* 2489 * Bind the address provided or allocate one if requested. 2490 * Allow rebinds with a new qlen value. 2491 */ 2492 if (IS_SOCKET(tep)) { 2493 /* 2494 * For anonymous requests the te_ap is already set up properly 2495 * so use minor number as an address. 2496 * For explicit requests need to check whether the address is 2497 * already in use. 2498 */ 2499 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2500 int rc; 2501 2502 if (tep->te_flag & TL_ADDRHASHED) { 2503 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2504 if (tep->te_vp == ux_addr.soua_vp) 2505 goto skip_addr_bind; 2506 else /* Rebind to a new address. */ 2507 tl_addr_unbind(tep); 2508 } 2509 /* 2510 * Insert address in the hash if it is not already 2511 * there. Since we use preallocated handle, the insert 2512 * can fail only if the key is already present. 2513 */ 2514 rc = mod_hash_insert_reserve(tep->te_addrhash, 2515 (mod_hash_key_t)ux_addr.soua_vp, 2516 (mod_hash_val_t)tep, tep->te_hash_hndl); 2517 2518 if (rc != 0) { 2519 ASSERT(rc == MH_ERR_DUPLICATE); 2520 /* 2521 * Violate O_T_BIND_REQ semantics and fail with 2522 * TADDRBUSY - sockets will not use any address 2523 * other than supplied one for explicit binds. 2524 */ 2525 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2526 SL_TRACE|SL_ERROR, 2527 "tl_bind:requested addr %p is busy", 2528 ux_addr.soua_vp)); 2529 tli_err = TADDRBUSY; unix_err = 0; 2530 goto error; 2531 } 2532 tep->te_uxaddr = ux_addr; 2533 tep->te_flag |= TL_ADDRHASHED; 2534 tep->te_hash_hndl = NULL; 2535 } 2536 } else if (alen == 0) { 2537 /* 2538 * assign any free address 2539 */ 2540 if (! tl_get_any_addr(tep, NULL)) { 2541 (void) (STRLOG(TL_ID, tep->te_minor, 2542 1, SL_TRACE|SL_ERROR, 2543 "tl_bind:failed to get buffer for any " 2544 "address")); 2545 tli_err = TSYSERR; unix_err = ENOSR; 2546 goto error; 2547 } 2548 } else { 2549 addr_req.ta_alen = alen; 2550 addr_req.ta_abuf = (mp->b_rptr + aoff); 2551 addr_req.ta_zoneid = tep->te_zoneid; 2552 2553 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2554 if (tep->te_abuf == NULL) { 2555 tli_err = TSYSERR; unix_err = ENOSR; 2556 goto error; 2557 } 2558 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2559 tep->te_alen = alen; 2560 2561 if (mod_hash_insert_reserve(tep->te_addrhash, 2562 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2563 tep->te_hash_hndl) != 0) { 2564 if (save_prim_type == T_BIND_REQ) { 2565 /* 2566 * The bind semantics for this primitive 2567 * require a failure if the exact address 2568 * requested is busy 2569 */ 2570 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2571 SL_TRACE|SL_ERROR, 2572 "tl_bind:requested addr is busy")); 2573 tli_err = TADDRBUSY; unix_err = 0; 2574 goto error; 2575 } 2576 2577 /* 2578 * O_T_BIND_REQ semantics say if address if requested 2579 * address is busy, bind to any available free address 2580 */ 2581 if (! tl_get_any_addr(tep, &addr_req)) { 2582 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2583 SL_TRACE|SL_ERROR, 2584 "tl_bind:unable to get any addr buf")); 2585 tli_err = TSYSERR; unix_err = ENOMEM; 2586 goto error; 2587 } 2588 } else { 2589 tep->te_flag |= TL_ADDRHASHED; 2590 tep->te_hash_hndl = NULL; 2591 } 2592 } 2593 2594 ASSERT(tep->te_alen >= 0); 2595 2596 skip_addr_bind: 2597 /* 2598 * prepare T_BIND_ACK TPI message 2599 */ 2600 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2601 bamp = reallocb(mp, basize, 0); 2602 if (bamp == NULL) { 2603 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2604 "tl_wput:tl_bind: allocb failed")); 2605 /* 2606 * roll back state changes 2607 */ 2608 tl_addr_unbind(tep); 2609 tep->te_state = TS_UNBND; 2610 tl_memrecover(wq, mp, basize); 2611 return; 2612 } 2613 2614 DB_TYPE(bamp) = M_PCPROTO; 2615 bamp->b_wptr = bamp->b_rptr + basize; 2616 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2617 b_ack->PRIM_type = T_BIND_ACK; 2618 b_ack->CONIND_number = qlen; 2619 b_ack->ADDR_length = tep->te_alen; 2620 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2621 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2622 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2623 2624 if (IS_COTS(tep)) { 2625 tep->te_qlen = qlen; 2626 if (qlen > 0) 2627 tep->te_flag |= TL_LISTENER; 2628 } 2629 2630 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2631 /* 2632 * send T_BIND_ACK message 2633 */ 2634 (void) qreply(wq, bamp); 2635 return; 2636 2637 error: 2638 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2639 if (ackmp == NULL) { 2640 /* 2641 * roll back state changes 2642 */ 2643 tep->te_state = save_state; 2644 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2645 return; 2646 } 2647 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2648 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2649 } 2650 2651 /* 2652 * Process T_UNBIND_REQ. 2653 * Called from serializer. 2654 */ 2655 static void 2656 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2657 { 2658 queue_t *wq; 2659 mblk_t *ackmp; 2660 2661 if (tep->te_closing) { 2662 freemsg(mp); 2663 return; 2664 } 2665 2666 wq = tep->te_wq; 2667 2668 /* 2669 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2670 * ==> allocate for T_ERROR_ACK (known max) 2671 */ 2672 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2673 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2674 return; 2675 } 2676 /* 2677 * memory resources committed 2678 * Note: no message validation. T_UNBIND_REQ message is 2679 * same size as PRIM_type field so already verified earlier. 2680 */ 2681 2682 /* 2683 * validate state 2684 */ 2685 if (tep->te_state != TS_IDLE) { 2686 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2687 SL_TRACE|SL_ERROR, 2688 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2689 tep->te_state)); 2690 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2691 return; 2692 } 2693 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2694 2695 /* 2696 * TPI says on T_UNBIND_REQ: 2697 * send up a M_FLUSH to flush both 2698 * read and write queues 2699 */ 2700 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2701 2702 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2703 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2704 2705 /* 2706 * Sockets use bind with qlen==0 followed by bind() to 2707 * the same address with qlen > 0 for listeners. 2708 * We allow rebind with a new qlen value. 2709 */ 2710 tl_addr_unbind(tep); 2711 } 2712 2713 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2714 /* 2715 * send T_OK_ACK 2716 */ 2717 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2718 } 2719 2720 2721 /* 2722 * Option management code from drv/ip is used here 2723 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2724 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2725 * However, that is what we want as that option is 'unorthodox' 2726 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2727 * and not in T_SVR4_OPTMGMT_REQ/ACK 2728 * Note2: use of optcom_req means this routine is an exception to 2729 * recovery from allocb() failures. 2730 */ 2731 2732 static void 2733 tl_optmgmt(queue_t *wq, mblk_t *mp) 2734 { 2735 tl_endpt_t *tep; 2736 mblk_t *ackmp; 2737 union T_primitives *prim; 2738 cred_t *cr; 2739 2740 tep = (tl_endpt_t *)wq->q_ptr; 2741 prim = (union T_primitives *)mp->b_rptr; 2742 2743 /* 2744 * All Solaris components should pass a db_credp 2745 * for this TPI message, hence we ASSERT. 2746 * But in case there is some other M_PROTO that looks 2747 * like a TPI message sent by some other kernel 2748 * component, we check and return an error. 2749 */ 2750 cr = msg_getcred(mp, NULL); 2751 ASSERT(cr != NULL); 2752 if (cr == NULL) { 2753 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2754 return; 2755 } 2756 2757 /* all states OK for AF_UNIX options ? */ 2758 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2759 prim->type == T_SVR4_OPTMGMT_REQ) { 2760 /* 2761 * Broken TLI semantics that options can only be managed 2762 * in TS_IDLE state. Needed for Sparc ABI test suite that 2763 * tests this TLI (mis)feature using this device driver. 2764 */ 2765 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2766 SL_TRACE|SL_ERROR, 2767 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2768 tep->te_state)); 2769 /* 2770 * preallocate memory for T_ERROR_ACK 2771 */ 2772 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2773 if (! ackmp) { 2774 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2775 return; 2776 } 2777 2778 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2779 freemsg(mp); 2780 return; 2781 } 2782 2783 /* 2784 * call common option management routine from drv/ip 2785 */ 2786 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2787 svr4_optcom_req(wq, mp, cr, &tl_opt_obj); 2788 } else { 2789 ASSERT(prim->type == T_OPTMGMT_REQ); 2790 tpi_optcom_req(wq, mp, cr, &tl_opt_obj); 2791 } 2792 } 2793 2794 /* 2795 * Handle T_conn_req - the driver part of accept(). 2796 * If TL_SET[U]CRED generate the credentials options. 2797 * If this is a socket pass through options unmodified. 2798 * For sockets generate the T_CONN_CON here instead of 2799 * waiting for the T_CONN_RES. 2800 */ 2801 static void 2802 tl_conn_req(queue_t *wq, mblk_t *mp) 2803 { 2804 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2805 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2806 ssize_t msz = MBLKL(mp); 2807 t_scalar_t alen, aoff, olen, ooff, err = 0; 2808 tl_endpt_t *peer_tep = NULL; 2809 mblk_t *ackmp; 2810 mblk_t *dimp; 2811 struct T_discon_ind *di; 2812 soux_addr_t ux_addr; 2813 tl_addr_t dst; 2814 2815 ASSERT(IS_COTS(tep)); 2816 2817 if (tep->te_closing) { 2818 freemsg(mp); 2819 return; 2820 } 2821 2822 /* 2823 * preallocate memory for: 2824 * 1. max of T_ERROR_ACK and T_OK_ACK 2825 * ==> known max T_ERROR_ACK 2826 * 2. max of T_DISCON_IND and T_CONN_IND 2827 */ 2828 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2829 if (! ackmp) { 2830 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2831 return; 2832 } 2833 /* 2834 * memory committed for T_OK_ACK/T_ERROR_ACK now 2835 * will be committed for T_DISCON_IND/T_CONN_IND later 2836 */ 2837 2838 if (tep->te_state != TS_IDLE) { 2839 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2840 SL_TRACE|SL_ERROR, 2841 "tl_wput:T_CONN_REQ:out of state, state=%d", 2842 tep->te_state)); 2843 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2844 freemsg(mp); 2845 return; 2846 } 2847 2848 /* 2849 * validate the message 2850 * Note: dereference fields in struct inside message only 2851 * after validating the message length. 2852 */ 2853 if (msz < sizeof (struct T_conn_req)) { 2854 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2855 "tl_conn_req:invalid message length")); 2856 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2857 freemsg(mp); 2858 return; 2859 } 2860 alen = creq->DEST_length; 2861 aoff = creq->DEST_offset; 2862 olen = creq->OPT_length; 2863 ooff = creq->OPT_offset; 2864 if (olen == 0) 2865 ooff = 0; 2866 2867 if (IS_SOCKET(tep)) { 2868 if ((alen != TL_SOUX_ADDRLEN) || 2869 (aoff < 0) || 2870 (aoff + alen > msz) || 2871 (alen > msz - sizeof (struct T_conn_req))) { 2872 (void) (STRLOG(TL_ID, tep->te_minor, 2873 1, SL_TRACE|SL_ERROR, 2874 "tl_conn_req: invalid socket addr")); 2875 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2876 freemsg(mp); 2877 return; 2878 } 2879 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2880 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2881 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2882 (void) (STRLOG(TL_ID, tep->te_minor, 2883 1, SL_TRACE|SL_ERROR, 2884 "tl_conn_req: invalid socket magic")); 2885 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2886 freemsg(mp); 2887 return; 2888 } 2889 } else { 2890 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2891 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2892 ooff + olen < 0)) || 2893 olen < 0 || ooff < 0) { 2894 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2895 SL_TRACE|SL_ERROR, 2896 "tl_conn_req:invalid message")); 2897 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2898 freemsg(mp); 2899 return; 2900 } 2901 2902 if (alen <= 0 || aoff < 0 || 2903 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2904 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2905 SL_TRACE|SL_ERROR, 2906 "tl_conn_req:bad addr in message, " 2907 "alen=%d, msz=%ld", 2908 alen, msz)); 2909 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2910 freemsg(mp); 2911 return; 2912 } 2913 #ifdef DEBUG 2914 /* 2915 * Mild form of ASSERT()ion to detect broken TPI apps. 2916 * if (! assertion) 2917 * log warning; 2918 */ 2919 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2920 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2921 SL_TRACE|SL_ERROR, 2922 "tl_conn_req: addr overlaps TPI message")); 2923 } 2924 #endif 2925 if (olen) { 2926 /* 2927 * no opts in connect req 2928 * supported in this provider except for sockets. 2929 */ 2930 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2931 SL_TRACE|SL_ERROR, 2932 "tl_conn_req:options not supported " 2933 "in message")); 2934 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2935 freemsg(mp); 2936 return; 2937 } 2938 } 2939 2940 /* 2941 * Prevent tep from closing on us. 2942 */ 2943 if (! tl_noclose(tep)) { 2944 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2945 "tl_conn_req:endpoint is closing")); 2946 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2947 freemsg(mp); 2948 return; 2949 } 2950 2951 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2952 /* 2953 * get endpoint to connect to 2954 * check that peer with DEST addr is bound to addr 2955 * and has CONIND_number > 0 2956 */ 2957 dst.ta_alen = alen; 2958 dst.ta_abuf = mp->b_rptr + aoff; 2959 dst.ta_zoneid = tep->te_zoneid; 2960 2961 /* 2962 * Verify if remote addr is in use 2963 */ 2964 peer_tep = (IS_SOCKET(tep) ? 2965 tl_sock_find_peer(tep, &ux_addr) : 2966 tl_find_peer(tep, &dst)); 2967 2968 if (peer_tep == NULL) { 2969 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2970 "tl_conn_req:no one at connect address")); 2971 err = ECONNREFUSED; 2972 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2973 /* 2974 * validate that number of incoming connection is 2975 * not to capacity on destination endpoint 2976 */ 2977 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2978 "tl_conn_req: qlen overflow connection refused")); 2979 err = ECONNREFUSED; 2980 } 2981 2982 /* 2983 * Send T_DISCON_IND in case of error 2984 */ 2985 if (err != 0) { 2986 if (peer_tep != NULL) 2987 tl_refrele(peer_tep); 2988 /* We are still expected to send T_OK_ACK */ 2989 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2990 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2991 tl_closeok(tep); 2992 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2993 M_PROTO, T_DISCON_IND); 2994 if (dimp == NULL) { 2995 tl_merror(wq, NULL, ENOSR); 2996 return; 2997 } 2998 di = (struct T_discon_ind *)dimp->b_rptr; 2999 di->DISCON_reason = err; 3000 di->SEQ_number = BADSEQNUM; 3001 3002 tep->te_state = TS_IDLE; 3003 /* 3004 * send T_DISCON_IND message 3005 */ 3006 putnext(tep->te_rq, dimp); 3007 return; 3008 } 3009 3010 ASSERT(IS_COTS(peer_tep)); 3011 3012 /* 3013 * Found the listener. At this point processing will continue on 3014 * listener serializer. Close of the endpoint should be blocked while we 3015 * switch serializers. 3016 */ 3017 tl_serializer_refhold(peer_tep->te_ser); 3018 tl_serializer_refrele(tep->te_ser); 3019 tep->te_ser = peer_tep->te_ser; 3020 ASSERT(tep->te_oconp == NULL); 3021 tep->te_oconp = peer_tep; 3022 3023 /* 3024 * It is safe to close now. Close may continue on listener serializer. 3025 */ 3026 tl_closeok(tep); 3027 3028 /* 3029 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3030 * data, so we link mp to ackmp. 3031 */ 3032 ackmp->b_cont = mp; 3033 mp = ackmp; 3034 3035 tl_refhold(tep); 3036 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3037 } 3038 3039 /* 3040 * Finish T_CONN_REQ processing on listener serializer. 3041 */ 3042 static void 3043 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3044 { 3045 queue_t *wq; 3046 tl_endpt_t *peer_tep = tep->te_oconp; 3047 mblk_t *confmp, *cimp, *indmp; 3048 void *opts = NULL; 3049 mblk_t *ackmp = mp; 3050 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3051 struct T_conn_ind *ci; 3052 tl_icon_t *tip; 3053 void *addr_startp; 3054 t_scalar_t olen = creq->OPT_length; 3055 t_scalar_t ooff = creq->OPT_offset; 3056 size_t ci_msz; 3057 size_t size; 3058 cred_t *cr = NULL; 3059 pid_t cpid; 3060 3061 if (tep->te_closing) { 3062 TL_UNCONNECT(tep->te_oconp); 3063 tl_serializer_exit(tep); 3064 tl_refrele(tep); 3065 freemsg(mp); 3066 return; 3067 } 3068 3069 wq = tep->te_wq; 3070 tep->te_flag |= TL_EAGER; 3071 3072 /* 3073 * Extract preallocated ackmp from mp. 3074 */ 3075 mp = mp->b_cont; 3076 ackmp->b_cont = NULL; 3077 3078 if (olen == 0) 3079 ooff = 0; 3080 3081 if (peer_tep->te_closing || 3082 !((peer_tep->te_state == TS_IDLE) || 3083 (peer_tep->te_state == TS_WRES_CIND))) { 3084 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3085 "tl_conn_req:peer in bad state (%d)", 3086 peer_tep->te_state)); 3087 TL_UNCONNECT(tep->te_oconp); 3088 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3089 freemsg(ackmp); 3090 tl_serializer_exit(tep); 3091 tl_refrele(tep); 3092 return; 3093 } 3094 3095 /* 3096 * preallocate now for T_DISCON_IND or T_CONN_IND 3097 */ 3098 /* 3099 * calculate length of T_CONN_IND message 3100 */ 3101 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3102 cr = msg_getcred(mp, &cpid); 3103 ASSERT(cr != NULL); 3104 if (peer_tep->te_flag & TL_SETCRED) { 3105 ooff = 0; 3106 olen = (t_scalar_t) sizeof (struct opthdr) + 3107 OPTLEN(sizeof (tl_credopt_t)); 3108 /* 1 option only */ 3109 } else { 3110 ooff = 0; 3111 olen = (t_scalar_t)sizeof (struct opthdr) + 3112 OPTLEN(ucredminsize(cr)); 3113 /* 1 option only */ 3114 } 3115 } 3116 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3117 ci_msz = T_ALIGN(ci_msz) + olen; 3118 size = max(ci_msz, sizeof (struct T_discon_ind)); 3119 3120 /* 3121 * Save options from mp - we'll need them for T_CONN_IND. 3122 */ 3123 if (ooff != 0) { 3124 opts = kmem_alloc(olen, KM_NOSLEEP); 3125 if (opts == NULL) { 3126 /* 3127 * roll back state changes 3128 */ 3129 tep->te_state = TS_IDLE; 3130 tl_memrecover(wq, mp, size); 3131 freemsg(ackmp); 3132 TL_UNCONNECT(tep->te_oconp); 3133 tl_serializer_exit(tep); 3134 tl_refrele(tep); 3135 return; 3136 } 3137 /* Copy options to a temp buffer */ 3138 bcopy(mp->b_rptr + ooff, opts, olen); 3139 } 3140 3141 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3142 /* 3143 * Generate a T_CONN_CON that has the identical address 3144 * (and options) as the T_CONN_REQ. 3145 * NOTE: assumes that the T_conn_req and T_conn_con structures 3146 * are isomorphic. 3147 */ 3148 confmp = copyb(mp); 3149 if (! confmp) { 3150 /* 3151 * roll back state changes 3152 */ 3153 tep->te_state = TS_IDLE; 3154 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3155 freemsg(ackmp); 3156 if (opts != NULL) 3157 kmem_free(opts, olen); 3158 TL_UNCONNECT(tep->te_oconp); 3159 tl_serializer_exit(tep); 3160 tl_refrele(tep); 3161 return; 3162 } 3163 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3164 T_CONN_CON; 3165 } else { 3166 confmp = NULL; 3167 } 3168 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3169 /* 3170 * roll back state changes 3171 */ 3172 tep->te_state = TS_IDLE; 3173 tl_memrecover(wq, mp, size); 3174 freemsg(ackmp); 3175 if (opts != NULL) 3176 kmem_free(opts, olen); 3177 freemsg(confmp); 3178 TL_UNCONNECT(tep->te_oconp); 3179 tl_serializer_exit(tep); 3180 tl_refrele(tep); 3181 return; 3182 } 3183 3184 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3185 if (tip == NULL) { 3186 /* 3187 * roll back state changes 3188 */ 3189 tep->te_state = TS_IDLE; 3190 tl_memrecover(wq, indmp, sizeof (*tip)); 3191 freemsg(ackmp); 3192 if (opts != NULL) 3193 kmem_free(opts, olen); 3194 freemsg(confmp); 3195 TL_UNCONNECT(tep->te_oconp); 3196 tl_serializer_exit(tep); 3197 tl_refrele(tep); 3198 return; 3199 } 3200 tip->ti_mp = NULL; 3201 3202 /* 3203 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3204 * and tl_icon_t cell. 3205 */ 3206 3207 /* 3208 * ack validity of request and send the peer credential in the ACK. 3209 */ 3210 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3211 3212 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3213 confmp != NULL) { 3214 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3215 } 3216 3217 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3218 3219 /* 3220 * prepare message to send T_CONN_IND 3221 */ 3222 /* 3223 * allocate the message - original data blocks retained 3224 * in the returned mblk 3225 */ 3226 cimp = tl_resizemp(indmp, size); 3227 if (! cimp) { 3228 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3229 "tl_conn_req:con_ind:allocb failure")); 3230 tl_merror(wq, indmp, ENOMEM); 3231 TL_UNCONNECT(tep->te_oconp); 3232 tl_serializer_exit(tep); 3233 tl_refrele(tep); 3234 if (opts != NULL) 3235 kmem_free(opts, olen); 3236 freemsg(confmp); 3237 ASSERT(tip->ti_mp == NULL); 3238 kmem_free(tip, sizeof (*tip)); 3239 return; 3240 } 3241 3242 DB_TYPE(cimp) = M_PROTO; 3243 ci = (struct T_conn_ind *)cimp->b_rptr; 3244 ci->PRIM_type = T_CONN_IND; 3245 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3246 ci->SRC_length = tep->te_alen; 3247 ci->SEQ_number = tep->te_seqno; 3248 3249 addr_startp = cimp->b_rptr + ci->SRC_offset; 3250 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3251 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3252 3253 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3254 ci->SRC_length); 3255 ci->OPT_length = olen; /* because only 1 option */ 3256 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3257 cr, cpid, 3258 peer_tep->te_flag, peer_tep->te_credp); 3259 } else if (ooff != 0) { 3260 /* Copy option from T_CONN_REQ */ 3261 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3262 ci->SRC_length); 3263 ci->OPT_length = olen; 3264 ASSERT(opts != NULL); 3265 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3266 } else { 3267 ci->OPT_offset = 0; 3268 ci->OPT_length = 0; 3269 } 3270 if (opts != NULL) 3271 kmem_free(opts, olen); 3272 3273 /* 3274 * register connection request with server peer 3275 * append to list of incoming connections 3276 * increment references for both peer_tep and tep: peer_tep is placed on 3277 * te_oconp and tep is placed on listeners queue. 3278 */ 3279 tip->ti_tep = tep; 3280 tip->ti_seqno = tep->te_seqno; 3281 list_insert_tail(&peer_tep->te_iconp, tip); 3282 peer_tep->te_nicon++; 3283 3284 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3285 /* 3286 * send the T_CONN_IND message 3287 */ 3288 putnext(peer_tep->te_rq, cimp); 3289 3290 /* 3291 * Send a T_CONN_CON message for sockets. 3292 * Disable the queues until we have reached the correct state! 3293 */ 3294 if (confmp != NULL) { 3295 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3296 noenable(wq); 3297 putnext(tep->te_rq, confmp); 3298 } 3299 /* 3300 * Now we need to increment tep reference because tep is referenced by 3301 * server list of pending connections. We also need to decrement 3302 * reference before exiting serializer. Two operations void each other 3303 * so we don't modify reference at all. 3304 */ 3305 ASSERT(tep->te_refcnt >= 2); 3306 ASSERT(peer_tep->te_refcnt >= 2); 3307 tl_serializer_exit(tep); 3308 } 3309 3310 3311 3312 /* 3313 * Handle T_conn_res on listener stream. Called on listener serializer. 3314 * tl_conn_req has already generated the T_CONN_CON. 3315 * tl_conn_res is called on listener serializer. 3316 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3317 * Switch eager serializer to acceptor's. 3318 * 3319 * If TL_SET[U]CRED generate the credentials options. 3320 * For sockets tl_conn_req has already generated the T_CONN_CON. 3321 */ 3322 static void 3323 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3324 { 3325 queue_t *wq; 3326 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3327 ssize_t msz = MBLKL(mp); 3328 t_scalar_t olen, ooff, err = 0; 3329 t_scalar_t prim = cres->PRIM_type; 3330 uchar_t *addr_startp; 3331 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3332 tl_icon_t *tip; 3333 size_t size; 3334 mblk_t *ackmp, *respmp; 3335 mblk_t *dimp, *ccmp = NULL; 3336 struct T_discon_ind *di; 3337 struct T_conn_con *cc; 3338 boolean_t client_noclose_set = B_FALSE; 3339 boolean_t switch_client_serializer = B_TRUE; 3340 3341 ASSERT(IS_COTS(tep)); 3342 3343 if (tep->te_closing) { 3344 freemsg(mp); 3345 return; 3346 } 3347 3348 wq = tep->te_wq; 3349 3350 /* 3351 * preallocate memory for: 3352 * 1. max of T_ERROR_ACK and T_OK_ACK 3353 * ==> known max T_ERROR_ACK 3354 * 2. max of T_DISCON_IND and T_CONN_CON 3355 */ 3356 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3357 if (! ackmp) { 3358 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3359 return; 3360 } 3361 /* 3362 * memory committed for T_OK_ACK/T_ERROR_ACK now 3363 * will be committed for T_DISCON_IND/T_CONN_CON later 3364 */ 3365 3366 3367 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3368 3369 /* 3370 * validate state 3371 */ 3372 if (tep->te_state != TS_WRES_CIND) { 3373 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3374 SL_TRACE|SL_ERROR, 3375 "tl_wput:T_CONN_RES:out of state, state=%d", 3376 tep->te_state)); 3377 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3378 freemsg(mp); 3379 return; 3380 } 3381 3382 /* 3383 * validate the message 3384 * Note: dereference fields in struct inside message only 3385 * after validating the message length. 3386 */ 3387 if (msz < sizeof (struct T_conn_res)) { 3388 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3389 "tl_conn_res:invalid message length")); 3390 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3391 freemsg(mp); 3392 return; 3393 } 3394 olen = cres->OPT_length; 3395 ooff = cres->OPT_offset; 3396 if (((olen > 0) && ((ooff + olen) > msz))) { 3397 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3398 "tl_conn_res:invalid message")); 3399 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3400 freemsg(mp); 3401 return; 3402 } 3403 if (olen) { 3404 /* 3405 * no opts in connect res 3406 * supported in this provider 3407 */ 3408 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3409 "tl_conn_res:options not supported in message")); 3410 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3411 freemsg(mp); 3412 return; 3413 } 3414 3415 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3416 ASSERT(tep->te_state == TS_WACK_CRES); 3417 3418 if (cres->SEQ_number < TL_MINOR_START && 3419 cres->SEQ_number >= BADSEQNUM) { 3420 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3421 "tl_conn_res:remote endpoint sequence number bad")); 3422 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3423 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3424 freemsg(mp); 3425 return; 3426 } 3427 3428 /* 3429 * find accepting endpoint. Will have extra reference if found. 3430 */ 3431 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3432 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3433 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3434 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3435 "tl_conn_res:bad accepting endpoint")); 3436 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3437 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3438 freemsg(mp); 3439 return; 3440 } 3441 3442 /* 3443 * Prevent acceptor from closing. 3444 */ 3445 if (! tl_noclose(acc_ep)) { 3446 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3447 "tl_conn_res:bad accepting endpoint")); 3448 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3449 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3450 tl_refrele(acc_ep); 3451 freemsg(mp); 3452 return; 3453 } 3454 3455 acc_ep->te_flag |= TL_ACCEPTOR; 3456 3457 /* 3458 * validate that accepting endpoint, if different from listening 3459 * has address bound => state is TS_IDLE 3460 * TROUBLE in XPG4 !!? 3461 */ 3462 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3463 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3464 "tl_conn_res:accepting endpoint has no address bound," 3465 "state=%d", acc_ep->te_state)); 3466 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3467 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3468 freemsg(mp); 3469 tl_closeok(acc_ep); 3470 tl_refrele(acc_ep); 3471 return; 3472 } 3473 3474 /* 3475 * validate if accepting endpt same as listening, then 3476 * no other incoming connection should be on the queue 3477 */ 3478 3479 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3480 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3481 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3482 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3483 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3484 freemsg(mp); 3485 tl_closeok(acc_ep); 3486 tl_refrele(acc_ep); 3487 return; 3488 } 3489 3490 /* 3491 * Mark for deletion, the entry corresponding to client 3492 * on list of pending connections made by the listener 3493 * search list to see if client is one of the 3494 * recorded as a listener. 3495 */ 3496 tip = tl_icon_find(tep, cres->SEQ_number); 3497 if (tip == NULL) { 3498 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3499 "tl_conn_res:no client in listener list")); 3500 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3501 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3502 freemsg(mp); 3503 tl_closeok(acc_ep); 3504 tl_refrele(acc_ep); 3505 return; 3506 } 3507 3508 /* 3509 * If ti_tep is NULL the client has already closed. In this case 3510 * the code below will avoid any action on the client side 3511 * but complete the server and acceptor state transitions. 3512 */ 3513 ASSERT(tip->ti_tep == NULL || 3514 tip->ti_tep->te_seqno == cres->SEQ_number); 3515 cl_ep = tip->ti_tep; 3516 3517 /* 3518 * If the client is present it is switched from listener's to acceptor's 3519 * serializer. We should block client closes while serializers are 3520 * being switched. 3521 * 3522 * It is possible that the client is present but is currently being 3523 * closed. There are two possible cases: 3524 * 3525 * 1) The client has already entered tl_close_finish_ser() and sent 3526 * T_ORDREL_IND. In this case we can just ignore the client (but we 3527 * still need to send all messages from tip->ti_mp to the acceptor). 3528 * 3529 * 2) The client started the close but has not entered 3530 * tl_close_finish_ser() yet. In this case, the client is already 3531 * proceeding asynchronously on the listener's serializer, so we're 3532 * forced to change the acceptor to use the listener's serializer to 3533 * ensure that any operations on the acceptor are serialized with 3534 * respect to the close that's in-progress. 3535 */ 3536 if (cl_ep != NULL) { 3537 if (tl_noclose(cl_ep)) { 3538 client_noclose_set = B_TRUE; 3539 } else { 3540 /* 3541 * Client is closing. If it it has sent the 3542 * T_ORDREL_IND, we can simply ignore it - otherwise, 3543 * we have to let let the client continue until it is 3544 * sent. 3545 * 3546 * If we do continue using the client, acceptor will 3547 * switch to client's serializer which is used by client 3548 * for its close. 3549 */ 3550 tl_client_closing_when_accepting++; 3551 switch_client_serializer = B_FALSE; 3552 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3553 cl_ep->te_state == -1) 3554 cl_ep = NULL; 3555 } 3556 } 3557 3558 if (cl_ep != NULL) { 3559 /* 3560 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3561 * (latter for sockets only) 3562 */ 3563 if (cl_ep->te_state != TS_WCON_CREQ && 3564 (cl_ep->te_state != TS_DATA_XFER && 3565 IS_SOCKET(cl_ep))) { 3566 err = ECONNREFUSED; 3567 /* 3568 * T_DISCON_IND sent later after committing memory 3569 * and acking validity of request 3570 */ 3571 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3572 "tl_conn_res:peer in bad state")); 3573 } 3574 3575 /* 3576 * preallocate now for T_DISCON_IND or T_CONN_CONN 3577 * ack validity of request (T_OK_ACK) after memory committed 3578 */ 3579 3580 if (err) 3581 size = sizeof (struct T_discon_ind); 3582 else { 3583 /* 3584 * calculate length of T_CONN_CON message 3585 */ 3586 olen = 0; 3587 if (cl_ep->te_flag & TL_SETCRED) { 3588 olen = (t_scalar_t)sizeof (struct opthdr) + 3589 OPTLEN(sizeof (tl_credopt_t)); 3590 } else if (cl_ep->te_flag & TL_SETUCRED) { 3591 olen = (t_scalar_t)sizeof (struct opthdr) + 3592 OPTLEN(ucredminsize(acc_ep->te_credp)); 3593 } 3594 size = T_ALIGN(sizeof (struct T_conn_con) + 3595 acc_ep->te_alen) + olen; 3596 } 3597 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3598 /* 3599 * roll back state changes 3600 */ 3601 tep->te_state = TS_WRES_CIND; 3602 tl_memrecover(wq, mp, size); 3603 freemsg(ackmp); 3604 if (client_noclose_set) 3605 tl_closeok(cl_ep); 3606 tl_closeok(acc_ep); 3607 tl_refrele(acc_ep); 3608 return; 3609 } 3610 mp = NULL; 3611 } 3612 3613 /* 3614 * Now ack validity of request 3615 */ 3616 if (tep->te_nicon == 1) { 3617 if (tep == acc_ep) 3618 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3619 else 3620 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3621 } else 3622 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3623 3624 /* 3625 * send T_DISCON_IND now if client state validation failed earlier 3626 */ 3627 if (err) { 3628 tl_ok_ack(wq, ackmp, prim); 3629 /* 3630 * flush the queues - why always ? 3631 */ 3632 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3633 3634 dimp = tl_resizemp(respmp, size); 3635 if (! dimp) { 3636 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3637 SL_TRACE|SL_ERROR, 3638 "tl_conn_res:con_ind:allocb failure")); 3639 tl_merror(wq, respmp, ENOMEM); 3640 tl_closeok(acc_ep); 3641 if (client_noclose_set) 3642 tl_closeok(cl_ep); 3643 tl_refrele(acc_ep); 3644 return; 3645 } 3646 if (dimp->b_cont) { 3647 /* no user data in provider generated discon ind */ 3648 freemsg(dimp->b_cont); 3649 dimp->b_cont = NULL; 3650 } 3651 3652 DB_TYPE(dimp) = M_PROTO; 3653 di = (struct T_discon_ind *)dimp->b_rptr; 3654 di->PRIM_type = T_DISCON_IND; 3655 di->DISCON_reason = err; 3656 di->SEQ_number = BADSEQNUM; 3657 3658 tep->te_state = TS_IDLE; 3659 /* 3660 * send T_DISCON_IND message 3661 */ 3662 putnext(acc_ep->te_rq, dimp); 3663 if (client_noclose_set) 3664 tl_closeok(cl_ep); 3665 tl_closeok(acc_ep); 3666 tl_refrele(acc_ep); 3667 return; 3668 } 3669 3670 /* 3671 * now start connecting the accepting endpoint 3672 */ 3673 if (tep != acc_ep) 3674 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3675 3676 if (cl_ep == NULL) { 3677 /* 3678 * The client has already closed. Send up any queued messages 3679 * and change the state accordingly. 3680 */ 3681 tl_ok_ack(wq, ackmp, prim); 3682 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3683 3684 /* 3685 * remove endpoint from incoming connection 3686 * delete client from list of incoming connections 3687 */ 3688 tl_freetip(tep, tip); 3689 freemsg(mp); 3690 tl_closeok(acc_ep); 3691 tl_refrele(acc_ep); 3692 return; 3693 } else if (tip->ti_mp != NULL) { 3694 /* 3695 * The client could have queued a T_DISCON_IND which needs 3696 * to be sent up. 3697 * Note that t_discon_req can not operate the same as 3698 * t_data_req since it is not possible for it to putbq 3699 * the message and return -1 due to the use of qwriter. 3700 */ 3701 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3702 } 3703 3704 /* 3705 * prepare connect confirm T_CONN_CON message 3706 */ 3707 3708 /* 3709 * allocate the message - original data blocks 3710 * retained in the returned mblk 3711 */ 3712 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3713 ccmp = tl_resizemp(respmp, size); 3714 if (ccmp == NULL) { 3715 tl_ok_ack(wq, ackmp, prim); 3716 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3717 SL_TRACE|SL_ERROR, 3718 "tl_conn_res:conn_con:allocb failure")); 3719 tl_merror(wq, respmp, ENOMEM); 3720 tl_closeok(acc_ep); 3721 if (client_noclose_set) 3722 tl_closeok(cl_ep); 3723 tl_refrele(acc_ep); 3724 return; 3725 } 3726 3727 DB_TYPE(ccmp) = M_PROTO; 3728 cc = (struct T_conn_con *)ccmp->b_rptr; 3729 cc->PRIM_type = T_CONN_CON; 3730 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3731 cc->RES_length = acc_ep->te_alen; 3732 addr_startp = ccmp->b_rptr + cc->RES_offset; 3733 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3734 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3735 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3736 cc->RES_length); 3737 cc->OPT_length = olen; 3738 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3739 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3740 cl_ep->te_credp); 3741 } else { 3742 cc->OPT_offset = 0; 3743 cc->OPT_length = 0; 3744 } 3745 /* 3746 * Forward the credential in the packet so it can be picked up 3747 * at the higher layers for more complete credential processing 3748 */ 3749 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3750 } else { 3751 freemsg(respmp); 3752 respmp = NULL; 3753 } 3754 3755 /* 3756 * make connection linking 3757 * accepting and client endpoints 3758 * No need to increment references: 3759 * on client: it should already have one from tip->ti_tep linkage. 3760 * on acceptor is should already have one from the table lookup. 3761 * 3762 * At this point both client and acceptor can't close. Set client 3763 * serializer to acceptor's. 3764 */ 3765 ASSERT(cl_ep->te_refcnt >= 2); 3766 ASSERT(acc_ep->te_refcnt >= 2); 3767 ASSERT(cl_ep->te_conp == NULL); 3768 ASSERT(acc_ep->te_conp == NULL); 3769 cl_ep->te_conp = acc_ep; 3770 acc_ep->te_conp = cl_ep; 3771 ASSERT(cl_ep->te_ser == tep->te_ser); 3772 if (switch_client_serializer) { 3773 mutex_enter(&cl_ep->te_ser_lock); 3774 if (cl_ep->te_ser_count > 0) { 3775 switch_client_serializer = B_FALSE; 3776 tl_serializer_noswitch++; 3777 } else { 3778 /* 3779 * Move client to the acceptor's serializer. 3780 */ 3781 tl_serializer_refhold(acc_ep->te_ser); 3782 tl_serializer_refrele(cl_ep->te_ser); 3783 cl_ep->te_ser = acc_ep->te_ser; 3784 } 3785 mutex_exit(&cl_ep->te_ser_lock); 3786 } 3787 if (!switch_client_serializer) { 3788 /* 3789 * It is not possible to switch client to use acceptor's. 3790 * Move acceptor to client's serializer (which is the same as 3791 * listener's). 3792 */ 3793 tl_serializer_refhold(cl_ep->te_ser); 3794 tl_serializer_refrele(acc_ep->te_ser); 3795 acc_ep->te_ser = cl_ep->te_ser; 3796 } 3797 3798 TL_REMOVE_PEER(cl_ep->te_oconp); 3799 TL_REMOVE_PEER(acc_ep->te_oconp); 3800 3801 /* 3802 * remove endpoint from incoming connection 3803 * delete client from list of incoming connections 3804 */ 3805 tip->ti_tep = NULL; 3806 tl_freetip(tep, tip); 3807 tl_ok_ack(wq, ackmp, prim); 3808 3809 /* 3810 * data blocks already linked in reallocb() 3811 */ 3812 3813 /* 3814 * link queues so that I_SENDFD will work 3815 */ 3816 if (! IS_SOCKET(tep)) { 3817 acc_ep->te_wq->q_next = cl_ep->te_rq; 3818 cl_ep->te_wq->q_next = acc_ep->te_rq; 3819 } 3820 3821 /* 3822 * send T_CONN_CON up on client side unless it was already 3823 * done (for a socket). In cases any data or ordrel req has been 3824 * queued make sure that the service procedure runs. 3825 */ 3826 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3827 enableok(cl_ep->te_wq); 3828 TL_QENABLE(cl_ep); 3829 if (ccmp != NULL) 3830 freemsg(ccmp); 3831 } else { 3832 /* 3833 * change client state on TE_CONN_CON event 3834 */ 3835 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3836 putnext(cl_ep->te_rq, ccmp); 3837 } 3838 3839 /* Mark the both endpoints as accepted */ 3840 cl_ep->te_flag |= TL_ACCEPTED; 3841 acc_ep->te_flag |= TL_ACCEPTED; 3842 3843 /* 3844 * Allow client and acceptor to close. 3845 */ 3846 tl_closeok(acc_ep); 3847 if (client_noclose_set) 3848 tl_closeok(cl_ep); 3849 } 3850 3851 3852 3853 3854 static void 3855 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3856 { 3857 queue_t *wq; 3858 struct T_discon_req *dr; 3859 ssize_t msz; 3860 tl_endpt_t *peer_tep = tep->te_conp; 3861 tl_endpt_t *srv_tep = tep->te_oconp; 3862 tl_icon_t *tip; 3863 size_t size; 3864 mblk_t *ackmp, *dimp, *respmp; 3865 struct T_discon_ind *di; 3866 t_scalar_t save_state, new_state; 3867 3868 if (tep->te_closing) { 3869 freemsg(mp); 3870 return; 3871 } 3872 3873 if ((peer_tep != NULL) && peer_tep->te_closing) { 3874 TL_UNCONNECT(tep->te_conp); 3875 peer_tep = NULL; 3876 } 3877 if ((srv_tep != NULL) && srv_tep->te_closing) { 3878 TL_UNCONNECT(tep->te_oconp); 3879 srv_tep = NULL; 3880 } 3881 3882 wq = tep->te_wq; 3883 3884 /* 3885 * preallocate memory for: 3886 * 1. max of T_ERROR_ACK and T_OK_ACK 3887 * ==> known max T_ERROR_ACK 3888 * 2. for T_DISCON_IND 3889 */ 3890 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3891 if (! ackmp) { 3892 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3893 return; 3894 } 3895 /* 3896 * memory committed for T_OK_ACK/T_ERROR_ACK now 3897 * will be committed for T_DISCON_IND later 3898 */ 3899 3900 dr = (struct T_discon_req *)mp->b_rptr; 3901 msz = MBLKL(mp); 3902 3903 /* 3904 * validate the state 3905 */ 3906 save_state = new_state = tep->te_state; 3907 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3908 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3909 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3910 SL_TRACE|SL_ERROR, 3911 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3912 tep->te_state)); 3913 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3914 freemsg(mp); 3915 return; 3916 } 3917 /* 3918 * Defer committing the state change until it is determined if 3919 * the message will be queued with the tl_icon or not. 3920 */ 3921 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3922 3923 /* validate the message */ 3924 if (msz < sizeof (struct T_discon_req)) { 3925 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3926 "tl_discon_req:invalid message")); 3927 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3928 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3929 freemsg(mp); 3930 return; 3931 } 3932 3933 /* 3934 * if server, then validate that client exists 3935 * by connection sequence number etc. 3936 */ 3937 if (tep->te_nicon > 0) { /* server */ 3938 3939 /* 3940 * search server list for disconnect client 3941 */ 3942 tip = tl_icon_find(tep, dr->SEQ_number); 3943 if (tip == NULL) { 3944 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3945 SL_TRACE|SL_ERROR, 3946 "tl_discon_req:no disconnect endpoint")); 3947 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3948 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3949 freemsg(mp); 3950 return; 3951 } 3952 /* 3953 * If ti_tep is NULL the client has already closed. In this case 3954 * the code below will avoid any action on the client side. 3955 */ 3956 3957 IMPLY(tip->ti_tep != NULL, 3958 tip->ti_tep->te_seqno == dr->SEQ_number); 3959 peer_tep = tip->ti_tep; 3960 } 3961 3962 /* 3963 * preallocate now for T_DISCON_IND 3964 * ack validity of request (T_OK_ACK) after memory committed 3965 */ 3966 size = sizeof (struct T_discon_ind); 3967 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3968 tl_memrecover(wq, mp, size); 3969 freemsg(ackmp); 3970 return; 3971 } 3972 3973 /* 3974 * prepare message to ack validity of request 3975 */ 3976 if (tep->te_nicon == 0) 3977 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3978 else 3979 if (tep->te_nicon == 1) 3980 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3981 else 3982 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3983 3984 /* 3985 * Flushing queues according to TPI. Using the old state. 3986 */ 3987 if ((tep->te_nicon <= 1) && 3988 ((save_state == TS_DATA_XFER) || 3989 (save_state == TS_WIND_ORDREL) || 3990 (save_state == TS_WREQ_ORDREL))) 3991 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3992 3993 /* send T_OK_ACK up */ 3994 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3995 3996 /* 3997 * now do disconnect business 3998 */ 3999 if (tep->te_nicon > 0) { /* listener */ 4000 if (peer_tep != NULL && !peer_tep->te_closing) { 4001 /* 4002 * disconnect incoming connect request pending to tep 4003 */ 4004 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4005 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4006 SL_TRACE|SL_ERROR, 4007 "tl_discon_req: reallocb failed")); 4008 tep->te_state = new_state; 4009 tl_merror(wq, respmp, ENOMEM); 4010 return; 4011 } 4012 di = (struct T_discon_ind *)dimp->b_rptr; 4013 di->SEQ_number = BADSEQNUM; 4014 save_state = peer_tep->te_state; 4015 peer_tep->te_state = TS_IDLE; 4016 4017 TL_REMOVE_PEER(peer_tep->te_oconp); 4018 enableok(peer_tep->te_wq); 4019 TL_QENABLE(peer_tep); 4020 } else { 4021 freemsg(respmp); 4022 dimp = NULL; 4023 } 4024 4025 /* 4026 * remove endpoint from incoming connection list 4027 * - remove disconnect client from list on server 4028 */ 4029 tl_freetip(tep, tip); 4030 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4031 /* 4032 * disconnect an outgoing request pending from tep 4033 */ 4034 4035 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4036 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4037 SL_TRACE|SL_ERROR, 4038 "tl_discon_req: reallocb failed")); 4039 tep->te_state = new_state; 4040 tl_merror(wq, respmp, ENOMEM); 4041 return; 4042 } 4043 di = (struct T_discon_ind *)dimp->b_rptr; 4044 DB_TYPE(dimp) = M_PROTO; 4045 di->PRIM_type = T_DISCON_IND; 4046 di->DISCON_reason = ECONNRESET; 4047 di->SEQ_number = tep->te_seqno; 4048 4049 /* 4050 * If this is a socket the T_DISCON_IND is queued with 4051 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4052 * from the list of pending connections. 4053 * Note that when te_oconp is set the peer better have 4054 * a t_connind_t for the client. 4055 */ 4056 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4057 /* 4058 * No need to check that 4059 * ti_tep == NULL since the T_DISCON_IND 4060 * takes precedence over other queued 4061 * messages. 4062 */ 4063 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4064 peer_tep = NULL; 4065 dimp = NULL; 4066 /* 4067 * Can't clear te_oconp since tl_co_unconnect needs 4068 * it as a hint not to free the tep. 4069 * Keep the state unchanged since tl_conn_res inspects 4070 * it. 4071 */ 4072 new_state = tep->te_state; 4073 } else { 4074 /* Found - delete it */ 4075 tip = tl_icon_find(peer_tep, tep->te_seqno); 4076 if (tip != NULL) { 4077 ASSERT(tep == tip->ti_tep); 4078 save_state = peer_tep->te_state; 4079 if (peer_tep->te_nicon == 1) 4080 peer_tep->te_state = 4081 NEXTSTATE(TE_DISCON_IND2, 4082 peer_tep->te_state); 4083 else 4084 peer_tep->te_state = 4085 NEXTSTATE(TE_DISCON_IND3, 4086 peer_tep->te_state); 4087 tl_freetip(peer_tep, tip); 4088 } 4089 ASSERT(tep->te_oconp != NULL); 4090 TL_UNCONNECT(tep->te_oconp); 4091 } 4092 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4093 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4094 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4095 SL_TRACE|SL_ERROR, 4096 "tl_discon_req: reallocb failed")); 4097 tep->te_state = new_state; 4098 tl_merror(wq, respmp, ENOMEM); 4099 return; 4100 } 4101 di = (struct T_discon_ind *)dimp->b_rptr; 4102 di->SEQ_number = BADSEQNUM; 4103 4104 save_state = peer_tep->te_state; 4105 peer_tep->te_state = TS_IDLE; 4106 } else { 4107 /* Not connected */ 4108 tep->te_state = new_state; 4109 freemsg(respmp); 4110 return; 4111 } 4112 4113 /* Commit state changes */ 4114 tep->te_state = new_state; 4115 4116 if (peer_tep == NULL) { 4117 ASSERT(dimp == NULL); 4118 goto done; 4119 } 4120 /* 4121 * Flush queues on peer before sending up 4122 * T_DISCON_IND according to TPI 4123 */ 4124 4125 if ((save_state == TS_DATA_XFER) || 4126 (save_state == TS_WIND_ORDREL) || 4127 (save_state == TS_WREQ_ORDREL)) 4128 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4129 4130 DB_TYPE(dimp) = M_PROTO; 4131 di->PRIM_type = T_DISCON_IND; 4132 di->DISCON_reason = ECONNRESET; 4133 4134 /* 4135 * data blocks already linked into dimp by reallocb() 4136 */ 4137 /* 4138 * send indication message to peer user module 4139 */ 4140 ASSERT(dimp != NULL); 4141 putnext(peer_tep->te_rq, dimp); 4142 done: 4143 if (tep->te_conp) { /* disconnect pointers if connected */ 4144 ASSERT(! peer_tep->te_closing); 4145 4146 /* 4147 * Messages may be queued on peer's write queue 4148 * waiting to be processed by its write service 4149 * procedure. Before the pointer to the peer transport 4150 * structure is set to NULL, qenable the peer's write 4151 * queue so that the queued up messages are processed. 4152 */ 4153 if ((save_state == TS_DATA_XFER) || 4154 (save_state == TS_WIND_ORDREL) || 4155 (save_state == TS_WREQ_ORDREL)) 4156 TL_QENABLE(peer_tep); 4157 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4158 TL_UNCONNECT(peer_tep->te_conp); 4159 if (! IS_SOCKET(tep)) { 4160 /* 4161 * unlink the streams 4162 */ 4163 tep->te_wq->q_next = NULL; 4164 peer_tep->te_wq->q_next = NULL; 4165 } 4166 TL_UNCONNECT(tep->te_conp); 4167 } 4168 } 4169 4170 static void 4171 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep) 4172 { 4173 if (!tep->te_closing) 4174 tl_addr_req(mp, tep); 4175 else 4176 freemsg(mp); 4177 4178 tl_serializer_exit(tep); 4179 tl_refrele(tep); 4180 } 4181 4182 static void 4183 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4184 { 4185 queue_t *wq; 4186 size_t ack_sz; 4187 mblk_t *ackmp; 4188 struct T_addr_ack *taa; 4189 4190 if (tep->te_closing) { 4191 freemsg(mp); 4192 return; 4193 } 4194 4195 wq = tep->te_wq; 4196 4197 /* 4198 * Note: T_ADDR_REQ message has only PRIM_type field 4199 * so it is already validated earlier. 4200 */ 4201 4202 if (IS_CLTS(tep) || 4203 (tep->te_state > TS_WREQ_ORDREL) || 4204 (tep->te_state < TS_DATA_XFER)) { 4205 /* 4206 * Either connectionless or connection oriented but not 4207 * in connected data transfer state or half-closed states. 4208 */ 4209 ack_sz = sizeof (struct T_addr_ack); 4210 if (tep->te_state >= TS_IDLE) 4211 /* is bound */ 4212 ack_sz += tep->te_alen; 4213 ackmp = reallocb(mp, ack_sz, 0); 4214 if (ackmp == NULL) { 4215 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4216 SL_TRACE|SL_ERROR, 4217 "tl_addr_req: reallocb failed")); 4218 tl_memrecover(wq, mp, ack_sz); 4219 return; 4220 } 4221 4222 taa = (struct T_addr_ack *)ackmp->b_rptr; 4223 4224 bzero(taa, sizeof (struct T_addr_ack)); 4225 4226 taa->PRIM_type = T_ADDR_ACK; 4227 ackmp->b_datap->db_type = M_PCPROTO; 4228 ackmp->b_wptr = (uchar_t *)&taa[1]; 4229 4230 if (tep->te_state >= TS_IDLE) { 4231 /* endpoint is bound */ 4232 taa->LOCADDR_length = tep->te_alen; 4233 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4234 4235 bcopy(tep->te_abuf, ackmp->b_wptr, 4236 tep->te_alen); 4237 ackmp->b_wptr += tep->te_alen; 4238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4239 } 4240 4241 (void) qreply(wq, ackmp); 4242 } else { 4243 ASSERT(tep->te_state == TS_DATA_XFER || 4244 tep->te_state == TS_WIND_ORDREL || 4245 tep->te_state == TS_WREQ_ORDREL); 4246 /* connection oriented in data transfer */ 4247 tl_connected_cots_addr_req(mp, tep); 4248 } 4249 } 4250 4251 4252 static void 4253 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4254 { 4255 tl_endpt_t *peer_tep; 4256 size_t ack_sz; 4257 mblk_t *ackmp; 4258 struct T_addr_ack *taa; 4259 uchar_t *addr_startp; 4260 4261 if (tep->te_closing) { 4262 freemsg(mp); 4263 return; 4264 } 4265 4266 ASSERT(tep->te_state >= TS_IDLE); 4267 4268 ack_sz = sizeof (struct T_addr_ack); 4269 ack_sz += T_ALIGN(tep->te_alen); 4270 peer_tep = tep->te_conp; 4271 ack_sz += peer_tep->te_alen; 4272 4273 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4274 if (ackmp == NULL) { 4275 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4276 "tl_connected_cots_addr_req: reallocb failed")); 4277 tl_memrecover(tep->te_wq, mp, ack_sz); 4278 return; 4279 } 4280 4281 taa = (struct T_addr_ack *)ackmp->b_rptr; 4282 4283 /* endpoint is bound */ 4284 taa->LOCADDR_length = tep->te_alen; 4285 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4286 4287 addr_startp = (uchar_t *)&taa[1]; 4288 4289 bcopy(tep->te_abuf, addr_startp, 4290 tep->te_alen); 4291 4292 taa->REMADDR_length = peer_tep->te_alen; 4293 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4294 taa->LOCADDR_length); 4295 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4296 bcopy(peer_tep->te_abuf, addr_startp, 4297 peer_tep->te_alen); 4298 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4299 taa->REMADDR_offset + peer_tep->te_alen; 4300 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4301 4302 putnext(tep->te_rq, ackmp); 4303 } 4304 4305 static void 4306 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4307 { 4308 if (IS_CLTS(tep)) { 4309 *ia = tl_clts_info_ack; 4310 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4311 } else { 4312 *ia = tl_cots_info_ack; 4313 if (IS_COTSORD(tep)) 4314 ia->SERV_type = T_COTS_ORD; 4315 } 4316 ia->TIDU_size = tl_tidusz; 4317 ia->CURRENT_state = tep->te_state; 4318 } 4319 4320 /* 4321 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4322 * tl_wput. 4323 */ 4324 static void 4325 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4326 { 4327 mblk_t *ackmp; 4328 t_uscalar_t cap_bits1; 4329 struct T_capability_ack *tcap; 4330 4331 if (tep->te_closing) { 4332 freemsg(mp); 4333 return; 4334 } 4335 4336 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4337 4338 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4339 M_PCPROTO, T_CAPABILITY_ACK); 4340 if (ackmp == NULL) { 4341 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4342 "tl_capability_req: reallocb failed")); 4343 tl_memrecover(tep->te_wq, mp, 4344 sizeof (struct T_capability_ack)); 4345 return; 4346 } 4347 4348 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4349 tcap->CAP_bits1 = 0; 4350 4351 if (cap_bits1 & TC1_INFO) { 4352 tl_copy_info(&tcap->INFO_ack, tep); 4353 tcap->CAP_bits1 |= TC1_INFO; 4354 } 4355 4356 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4357 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4358 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4359 } 4360 4361 putnext(tep->te_rq, ackmp); 4362 } 4363 4364 static void 4365 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4366 { 4367 if (! tep->te_closing) 4368 tl_info_req(mp, tep); 4369 else 4370 freemsg(mp); 4371 4372 tl_serializer_exit(tep); 4373 tl_refrele(tep); 4374 } 4375 4376 static void 4377 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4378 { 4379 mblk_t *ackmp; 4380 4381 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4382 M_PCPROTO, T_INFO_ACK); 4383 if (ackmp == NULL) { 4384 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4385 "tl_info_req: reallocb failed")); 4386 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4387 return; 4388 } 4389 4390 /* 4391 * fill in T_INFO_ACK contents 4392 */ 4393 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4394 4395 /* 4396 * send ack message 4397 */ 4398 putnext(tep->te_rq, ackmp); 4399 } 4400 4401 /* 4402 * Handle M_DATA, T_data_req and T_optdata_req. 4403 * If this is a socket pass through T_optdata_req options unmodified. 4404 */ 4405 static void 4406 tl_data(mblk_t *mp, tl_endpt_t *tep) 4407 { 4408 queue_t *wq = tep->te_wq; 4409 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4410 ssize_t msz = MBLKL(mp); 4411 tl_endpt_t *peer_tep; 4412 queue_t *peer_rq; 4413 boolean_t closing = tep->te_closing; 4414 4415 if (IS_CLTS(tep)) { 4416 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4417 SL_TRACE|SL_ERROR, 4418 "tl_wput:clts:unattached M_DATA")); 4419 if (!closing) { 4420 tl_merror(wq, mp, EPROTO); 4421 } else { 4422 freemsg(mp); 4423 } 4424 return; 4425 } 4426 4427 /* 4428 * If the endpoint is closing it should still forward any data to the 4429 * peer (if it has one). If it is not allowed to forward it can just 4430 * free the message. 4431 */ 4432 if (closing && 4433 (tep->te_state != TS_DATA_XFER) && 4434 (tep->te_state != TS_WREQ_ORDREL)) { 4435 freemsg(mp); 4436 return; 4437 } 4438 4439 if (DB_TYPE(mp) == M_PROTO) { 4440 if (prim->type == T_DATA_REQ && 4441 msz < sizeof (struct T_data_req)) { 4442 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4443 SL_TRACE|SL_ERROR, 4444 "tl_data:T_DATA_REQ:invalid message")); 4445 if (!closing) { 4446 tl_merror(wq, mp, EPROTO); 4447 } else { 4448 freemsg(mp); 4449 } 4450 return; 4451 } else if (prim->type == T_OPTDATA_REQ && 4452 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4453 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4454 SL_TRACE|SL_ERROR, 4455 "tl_data:T_OPTDATA_REQ:invalid message")); 4456 if (!closing) { 4457 tl_merror(wq, mp, EPROTO); 4458 } else { 4459 freemsg(mp); 4460 } 4461 return; 4462 } 4463 } 4464 4465 /* 4466 * connection oriented provider 4467 */ 4468 switch (tep->te_state) { 4469 case TS_IDLE: 4470 /* 4471 * Other end not here - do nothing. 4472 */ 4473 freemsg(mp); 4474 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4475 "tl_data:cots with endpoint idle")); 4476 return; 4477 4478 case TS_DATA_XFER: 4479 /* valid states */ 4480 if (tep->te_conp != NULL) 4481 break; 4482 4483 if (tep->te_oconp == NULL) { 4484 if (!closing) { 4485 tl_merror(wq, mp, EPROTO); 4486 } else { 4487 freemsg(mp); 4488 } 4489 return; 4490 } 4491 /* 4492 * For a socket the T_CONN_CON is sent early thus 4493 * the peer might not yet have accepted the connection. 4494 * If we are closing queue the packet with the T_CONN_IND. 4495 * Otherwise defer processing the packet until the peer 4496 * accepts the connection. 4497 * Note that the queue is noenabled when we go into this 4498 * state. 4499 */ 4500 if (!closing) { 4501 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4502 SL_TRACE|SL_ERROR, 4503 "tl_data: ocon")); 4504 TL_PUTBQ(tep, mp); 4505 return; 4506 } 4507 if (DB_TYPE(mp) == M_PROTO) { 4508 if (msz < sizeof (t_scalar_t)) { 4509 freemsg(mp); 4510 return; 4511 } 4512 /* reuse message block - just change REQ to IND */ 4513 if (prim->type == T_DATA_REQ) 4514 prim->type = T_DATA_IND; 4515 else 4516 prim->type = T_OPTDATA_IND; 4517 } 4518 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4519 return; 4520 4521 case TS_WREQ_ORDREL: 4522 if (tep->te_conp == NULL) { 4523 /* 4524 * Other end closed - generate discon_ind 4525 * with reason 0 to cause an EPIPE but no 4526 * read side error on AF_UNIX sockets. 4527 */ 4528 freemsg(mp); 4529 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4530 SL_TRACE|SL_ERROR, 4531 "tl_data: WREQ_ORDREL and no peer")); 4532 tl_discon_ind(tep, 0); 4533 return; 4534 } 4535 break; 4536 4537 default: 4538 /* invalid state for event TE_DATA_REQ */ 4539 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4540 "tl_data:cots:out of state")); 4541 tl_merror(wq, mp, EPROTO); 4542 return; 4543 } 4544 /* 4545 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4546 * (State stays same on this event) 4547 */ 4548 4549 /* 4550 * get connected endpoint 4551 */ 4552 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4553 freemsg(mp); 4554 /* Peer closed */ 4555 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4556 "tl_data: peer gone")); 4557 return; 4558 } 4559 4560 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4561 peer_rq = peer_tep->te_rq; 4562 4563 /* 4564 * Put it back if flow controlled 4565 * Note: Messages already on queue when we are closing is bounded 4566 * so we can ignore flow control. 4567 */ 4568 if (!canputnext(peer_rq) && !closing) { 4569 TL_PUTBQ(tep, mp); 4570 return; 4571 } 4572 4573 /* 4574 * validate peer state 4575 */ 4576 switch (peer_tep->te_state) { 4577 case TS_DATA_XFER: 4578 case TS_WIND_ORDREL: 4579 /* valid states */ 4580 break; 4581 default: 4582 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4583 "tl_data:rx side:invalid state")); 4584 tl_merror(peer_tep->te_wq, mp, EPROTO); 4585 return; 4586 } 4587 if (DB_TYPE(mp) == M_PROTO) { 4588 /* reuse message block - just change REQ to IND */ 4589 if (prim->type == T_DATA_REQ) 4590 prim->type = T_DATA_IND; 4591 else 4592 prim->type = T_OPTDATA_IND; 4593 } 4594 /* 4595 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4596 * (peer state stays same on this event) 4597 */ 4598 /* 4599 * send data to connected peer 4600 */ 4601 putnext(peer_rq, mp); 4602 } 4603 4604 4605 4606 static void 4607 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4608 { 4609 queue_t *wq = tep->te_wq; 4610 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4611 ssize_t msz = MBLKL(mp); 4612 tl_endpt_t *peer_tep; 4613 queue_t *peer_rq; 4614 boolean_t closing = tep->te_closing; 4615 4616 if (msz < sizeof (struct T_exdata_req)) { 4617 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4618 "tl_exdata:invalid message")); 4619 if (!closing) { 4620 tl_merror(wq, mp, EPROTO); 4621 } else { 4622 freemsg(mp); 4623 } 4624 return; 4625 } 4626 4627 /* 4628 * If the endpoint is closing it should still forward any data to the 4629 * peer (if it has one). If it is not allowed to forward it can just 4630 * free the message. 4631 */ 4632 if (closing && 4633 (tep->te_state != TS_DATA_XFER) && 4634 (tep->te_state != TS_WREQ_ORDREL)) { 4635 freemsg(mp); 4636 return; 4637 } 4638 4639 /* 4640 * validate state 4641 */ 4642 switch (tep->te_state) { 4643 case TS_IDLE: 4644 /* 4645 * Other end not here - do nothing. 4646 */ 4647 freemsg(mp); 4648 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4649 "tl_exdata:cots with endpoint idle")); 4650 return; 4651 4652 case TS_DATA_XFER: 4653 /* valid states */ 4654 if (tep->te_conp != NULL) 4655 break; 4656 4657 if (tep->te_oconp == NULL) { 4658 if (!closing) { 4659 tl_merror(wq, mp, EPROTO); 4660 } else { 4661 freemsg(mp); 4662 } 4663 return; 4664 } 4665 /* 4666 * For a socket the T_CONN_CON is sent early thus 4667 * the peer might not yet have accepted the connection. 4668 * If we are closing queue the packet with the T_CONN_IND. 4669 * Otherwise defer processing the packet until the peer 4670 * accepts the connection. 4671 * Note that the queue is noenabled when we go into this 4672 * state. 4673 */ 4674 if (!closing) { 4675 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4676 SL_TRACE|SL_ERROR, 4677 "tl_exdata: ocon")); 4678 TL_PUTBQ(tep, mp); 4679 return; 4680 } 4681 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4682 "tl_exdata: closing socket ocon")); 4683 prim->type = T_EXDATA_IND; 4684 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4685 return; 4686 4687 case TS_WREQ_ORDREL: 4688 if (tep->te_conp == NULL) { 4689 /* 4690 * Other end closed - generate discon_ind 4691 * with reason 0 to cause an EPIPE but no 4692 * read side error on AF_UNIX sockets. 4693 */ 4694 freemsg(mp); 4695 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4696 SL_TRACE|SL_ERROR, 4697 "tl_exdata: WREQ_ORDREL and no peer")); 4698 tl_discon_ind(tep, 0); 4699 return; 4700 } 4701 break; 4702 4703 default: 4704 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4705 SL_TRACE|SL_ERROR, 4706 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4707 tep->te_state)); 4708 tl_merror(wq, mp, EPROTO); 4709 return; 4710 } 4711 /* 4712 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4713 * (state stays same on this event) 4714 */ 4715 4716 /* 4717 * get connected endpoint 4718 */ 4719 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4720 freemsg(mp); 4721 /* Peer closed */ 4722 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4723 "tl_exdata: peer gone")); 4724 return; 4725 } 4726 4727 peer_rq = peer_tep->te_rq; 4728 4729 /* 4730 * Put it back if flow controlled 4731 * Note: Messages already on queue when we are closing is bounded 4732 * so we can ignore flow control. 4733 */ 4734 if (!canputnext(peer_rq) && !closing) { 4735 TL_PUTBQ(tep, mp); 4736 return; 4737 } 4738 4739 /* 4740 * validate state on peer 4741 */ 4742 switch (peer_tep->te_state) { 4743 case TS_DATA_XFER: 4744 case TS_WIND_ORDREL: 4745 /* valid states */ 4746 break; 4747 default: 4748 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4749 "tl_exdata:rx side:invalid state")); 4750 tl_merror(peer_tep->te_wq, mp, EPROTO); 4751 return; 4752 } 4753 /* 4754 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4755 * (peer state stays same on this event) 4756 */ 4757 /* 4758 * reuse message block 4759 */ 4760 prim->type = T_EXDATA_IND; 4761 4762 /* 4763 * send data to connected peer 4764 */ 4765 putnext(peer_rq, mp); 4766 } 4767 4768 4769 4770 static void 4771 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4772 { 4773 queue_t *wq = tep->te_wq; 4774 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4775 ssize_t msz = MBLKL(mp); 4776 tl_endpt_t *peer_tep; 4777 queue_t *peer_rq; 4778 boolean_t closing = tep->te_closing; 4779 4780 if (msz < sizeof (struct T_ordrel_req)) { 4781 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4782 "tl_ordrel:invalid message")); 4783 if (!closing) { 4784 tl_merror(wq, mp, EPROTO); 4785 } else { 4786 freemsg(mp); 4787 } 4788 return; 4789 } 4790 4791 /* 4792 * validate state 4793 */ 4794 switch (tep->te_state) { 4795 case TS_DATA_XFER: 4796 case TS_WREQ_ORDREL: 4797 /* valid states */ 4798 if (tep->te_conp != NULL) 4799 break; 4800 4801 if (tep->te_oconp == NULL) 4802 break; 4803 4804 /* 4805 * For a socket the T_CONN_CON is sent early thus 4806 * the peer might not yet have accepted the connection. 4807 * If we are closing queue the packet with the T_CONN_IND. 4808 * Otherwise defer processing the packet until the peer 4809 * accepts the connection. 4810 * Note that the queue is noenabled when we go into this 4811 * state. 4812 */ 4813 if (!closing) { 4814 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4815 SL_TRACE|SL_ERROR, 4816 "tl_ordlrel: ocon")); 4817 TL_PUTBQ(tep, mp); 4818 return; 4819 } 4820 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4821 "tl_ordlrel: closing socket ocon")); 4822 prim->type = T_ORDREL_IND; 4823 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4824 return; 4825 4826 default: 4827 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4828 SL_TRACE|SL_ERROR, 4829 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4830 tep->te_state)); 4831 if (!closing) { 4832 tl_merror(wq, mp, EPROTO); 4833 } else { 4834 freemsg(mp); 4835 } 4836 return; 4837 } 4838 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4839 4840 /* 4841 * get connected endpoint 4842 */ 4843 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4844 /* Peer closed */ 4845 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4846 "tl_ordrel: peer gone")); 4847 freemsg(mp); 4848 return; 4849 } 4850 4851 peer_rq = peer_tep->te_rq; 4852 4853 /* 4854 * Put it back if flow controlled except when we are closing. 4855 * Note: Messages already on queue when we are closing is bounded 4856 * so we can ignore flow control. 4857 */ 4858 if (! canputnext(peer_rq) && !closing) { 4859 TL_PUTBQ(tep, mp); 4860 return; 4861 } 4862 4863 /* 4864 * validate state on peer 4865 */ 4866 switch (peer_tep->te_state) { 4867 case TS_DATA_XFER: 4868 case TS_WIND_ORDREL: 4869 /* valid states */ 4870 break; 4871 default: 4872 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4873 "tl_ordrel:rx side:invalid state")); 4874 tl_merror(peer_tep->te_wq, mp, EPROTO); 4875 return; 4876 } 4877 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4878 4879 /* 4880 * reuse message block 4881 */ 4882 prim->type = T_ORDREL_IND; 4883 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4884 "tl_ordrel: send ordrel_ind")); 4885 4886 /* 4887 * send data to connected peer 4888 */ 4889 putnext(peer_rq, mp); 4890 } 4891 4892 4893 /* 4894 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4895 */ 4896 static void 4897 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4898 { 4899 size_t err_sz; 4900 tl_endpt_t *tep; 4901 struct T_unitdata_req *udreq; 4902 mblk_t *err_mp; 4903 t_scalar_t alen; 4904 t_scalar_t olen; 4905 struct T_uderror_ind *uderr; 4906 uchar_t *addr_startp; 4907 4908 err_sz = sizeof (struct T_uderror_ind); 4909 tep = (tl_endpt_t *)wq->q_ptr; 4910 udreq = (struct T_unitdata_req *)mp->b_rptr; 4911 alen = udreq->DEST_length; 4912 olen = udreq->OPT_length; 4913 4914 if (alen > 0) 4915 err_sz = T_ALIGN(err_sz + alen); 4916 if (olen > 0) 4917 err_sz += olen; 4918 4919 err_mp = allocb(err_sz, BPRI_MED); 4920 if (! err_mp) { 4921 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4922 "tl_uderr:allocb failure")); 4923 /* 4924 * Note: no rollback of state needed as it does 4925 * not change in connectionless transport 4926 */ 4927 tl_memrecover(wq, mp, err_sz); 4928 return; 4929 } 4930 4931 DB_TYPE(err_mp) = M_PROTO; 4932 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4933 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4934 uderr->PRIM_type = T_UDERROR_IND; 4935 uderr->ERROR_type = err; 4936 uderr->DEST_length = alen; 4937 uderr->OPT_length = olen; 4938 if (alen <= 0) { 4939 uderr->DEST_offset = 0; 4940 } else { 4941 uderr->DEST_offset = 4942 (t_scalar_t)sizeof (struct T_uderror_ind); 4943 addr_startp = mp->b_rptr + udreq->DEST_offset; 4944 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4945 (size_t)alen); 4946 } 4947 if (olen <= 0) { 4948 uderr->OPT_offset = 0; 4949 } else { 4950 uderr->OPT_offset = 4951 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4952 uderr->DEST_length); 4953 addr_startp = mp->b_rptr + udreq->OPT_offset; 4954 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4955 (size_t)olen); 4956 } 4957 freemsg(mp); 4958 4959 /* 4960 * send indication message 4961 */ 4962 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4963 4964 qreply(wq, err_mp); 4965 } 4966 4967 static void 4968 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4969 { 4970 queue_t *wq = tep->te_wq; 4971 4972 if (!tep->te_closing && (wq->q_first != NULL)) { 4973 TL_PUTQ(tep, mp); 4974 } else if (tep->te_rq != NULL) 4975 tl_unitdata(mp, tep); 4976 else 4977 freemsg(mp); 4978 4979 tl_serializer_exit(tep); 4980 tl_refrele(tep); 4981 } 4982 4983 /* 4984 * Handle T_unitdata_req. 4985 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4986 * If this is a socket pass through options unmodified. 4987 */ 4988 static void 4989 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4990 { 4991 queue_t *wq = tep->te_wq; 4992 soux_addr_t ux_addr; 4993 tl_addr_t destaddr; 4994 uchar_t *addr_startp; 4995 tl_endpt_t *peer_tep; 4996 struct T_unitdata_ind *udind; 4997 struct T_unitdata_req *udreq; 4998 ssize_t msz, ui_sz; 4999 t_scalar_t alen, aoff, olen, ooff; 5000 t_scalar_t oldolen = 0; 5001 cred_t *cr = NULL; 5002 pid_t cpid; 5003 5004 udreq = (struct T_unitdata_req *)mp->b_rptr; 5005 msz = MBLKL(mp); 5006 5007 /* 5008 * validate the state 5009 */ 5010 if (tep->te_state != TS_IDLE) { 5011 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5012 SL_TRACE|SL_ERROR, 5013 "tl_wput:T_CONN_REQ:out of state")); 5014 tl_merror(wq, mp, EPROTO); 5015 return; 5016 } 5017 /* 5018 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5019 * (state does not change on this event) 5020 */ 5021 5022 /* 5023 * validate the message 5024 * Note: dereference fields in struct inside message only 5025 * after validating the message length. 5026 */ 5027 if (msz < sizeof (struct T_unitdata_req)) { 5028 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5029 "tl_unitdata:invalid message length")); 5030 tl_merror(wq, mp, EINVAL); 5031 return; 5032 } 5033 alen = udreq->DEST_length; 5034 aoff = udreq->DEST_offset; 5035 oldolen = olen = udreq->OPT_length; 5036 ooff = udreq->OPT_offset; 5037 if (olen == 0) 5038 ooff = 0; 5039 5040 if (IS_SOCKET(tep)) { 5041 if ((alen != TL_SOUX_ADDRLEN) || 5042 (aoff < 0) || 5043 (aoff + alen > msz) || 5044 (olen < 0) || (ooff < 0) || 5045 ((olen > 0) && ((ooff + olen) > msz))) { 5046 (void) (STRLOG(TL_ID, tep->te_minor, 5047 1, SL_TRACE|SL_ERROR, 5048 "tl_unitdata_req: invalid socket addr " 5049 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5050 (int)msz, alen, aoff, olen, ooff)); 5051 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5052 return; 5053 } 5054 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5055 5056 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5057 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5058 (void) (STRLOG(TL_ID, tep->te_minor, 5059 1, SL_TRACE|SL_ERROR, 5060 "tl_conn_req: invalid socket magic")); 5061 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5062 return; 5063 } 5064 } else { 5065 if ((alen < 0) || 5066 (aoff < 0) || 5067 ((alen > 0) && ((aoff + alen) > msz)) || 5068 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5069 ((aoff + alen) < 0) || 5070 ((olen > 0) && ((ooff + olen) > msz)) || 5071 (olen < 0) || 5072 (ooff < 0) || 5073 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5074 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5075 SL_TRACE|SL_ERROR, 5076 "tl_unitdata:invalid unit data message")); 5077 tl_merror(wq, mp, EINVAL); 5078 return; 5079 } 5080 } 5081 5082 /* Options not supported unless it's a socket */ 5083 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5084 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5085 "tl_unitdata:option use(unsupported) or zero len addr")); 5086 tl_uderr(wq, mp, EPROTO); 5087 return; 5088 } 5089 #ifdef DEBUG 5090 /* 5091 * Mild form of ASSERT()ion to detect broken TPI apps. 5092 * if (! assertion) 5093 * log warning; 5094 */ 5095 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5096 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5097 "tl_unitdata:addr overlaps TPI message")); 5098 } 5099 #endif 5100 /* 5101 * get destination endpoint 5102 */ 5103 destaddr.ta_alen = alen; 5104 destaddr.ta_abuf = mp->b_rptr + aoff; 5105 destaddr.ta_zoneid = tep->te_zoneid; 5106 5107 /* 5108 * Check whether the destination is the same that was used previously 5109 * and the destination endpoint is in the right state. If something is 5110 * wrong, find destination again and cache it. 5111 */ 5112 peer_tep = tep->te_lastep; 5113 5114 if ((peer_tep == NULL) || peer_tep->te_closing || 5115 (peer_tep->te_state != TS_IDLE) || 5116 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5117 /* 5118 * Not the same as cached destination , need to find the right 5119 * destination. 5120 */ 5121 peer_tep = (IS_SOCKET(tep) ? 5122 tl_sock_find_peer(tep, &ux_addr) : 5123 tl_find_peer(tep, &destaddr)); 5124 5125 if (peer_tep == NULL) { 5126 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5127 SL_TRACE|SL_ERROR, 5128 "tl_unitdata:no one at destination address")); 5129 tl_uderr(wq, mp, ECONNRESET); 5130 return; 5131 } 5132 5133 /* 5134 * Cache the new peer. 5135 */ 5136 if (tep->te_lastep != NULL) 5137 tl_refrele(tep->te_lastep); 5138 5139 tep->te_lastep = peer_tep; 5140 } 5141 5142 if (peer_tep->te_state != TS_IDLE) { 5143 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5144 "tl_unitdata:provider in invalid state")); 5145 tl_uderr(wq, mp, EPROTO); 5146 return; 5147 } 5148 5149 ASSERT(peer_tep->te_rq != NULL); 5150 5151 /* 5152 * Put it back if flow controlled except when we are closing. 5153 * Note: Messages already on queue when we are closing is bounded 5154 * so we can ignore flow control. 5155 */ 5156 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5157 /* record what we are flow controlled on */ 5158 if (tep->te_flowq != NULL) { 5159 list_remove(&tep->te_flowq->te_flowlist, tep); 5160 } 5161 list_insert_head(&peer_tep->te_flowlist, tep); 5162 tep->te_flowq = peer_tep; 5163 TL_PUTBQ(tep, mp); 5164 return; 5165 } 5166 /* 5167 * prepare indication message 5168 */ 5169 5170 /* 5171 * calculate length of message 5172 */ 5173 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5174 cr = msg_getcred(mp, &cpid); 5175 ASSERT(cr != NULL); 5176 5177 if (peer_tep->te_flag & TL_SETCRED) { 5178 ASSERT(olen == 0); 5179 olen = (t_scalar_t)sizeof (struct opthdr) + 5180 OPTLEN(sizeof (tl_credopt_t)); 5181 /* 1 option only */ 5182 } else if (peer_tep->te_flag & TL_SETUCRED) { 5183 ASSERT(olen == 0); 5184 olen = (t_scalar_t)sizeof (struct opthdr) + 5185 OPTLEN(ucredminsize(cr)); 5186 /* 1 option only */ 5187 } else { 5188 /* Possibly more than one option */ 5189 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5190 OPTLEN(ucredminsize(cr)); 5191 } 5192 } 5193 5194 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5195 olen; 5196 /* 5197 * If the unitdata_ind fits and we are not adding options 5198 * reuse the udreq mblk. 5199 */ 5200 if (msz >= ui_sz && alen >= tep->te_alen && 5201 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5202 /* 5203 * Reuse the original mblk. Leave options in place. 5204 */ 5205 udind = (struct T_unitdata_ind *)mp->b_rptr; 5206 udind->PRIM_type = T_UNITDATA_IND; 5207 udind->SRC_length = tep->te_alen; 5208 addr_startp = mp->b_rptr + udind->SRC_offset; 5209 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5210 } else { 5211 /* Allocate a new T_unidata_ind message */ 5212 mblk_t *ui_mp; 5213 5214 ui_mp = allocb(ui_sz, BPRI_MED); 5215 if (! ui_mp) { 5216 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5217 "tl_unitdata:allocb failure:message queued")); 5218 tl_memrecover(wq, mp, ui_sz); 5219 return; 5220 } 5221 5222 /* 5223 * fill in T_UNITDATA_IND contents 5224 */ 5225 DB_TYPE(ui_mp) = M_PROTO; 5226 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5227 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5228 udind->PRIM_type = T_UNITDATA_IND; 5229 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5230 udind->SRC_length = tep->te_alen; 5231 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5232 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5233 udind->OPT_offset = 5234 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5235 udind->OPT_length = olen; 5236 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5237 5238 if (oldolen != 0) { 5239 bcopy((void *)((uintptr_t)udreq + ooff), 5240 (void *)((uintptr_t)udind + 5241 udind->OPT_offset), 5242 oldolen); 5243 } 5244 ASSERT(cr != NULL); 5245 5246 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5247 oldolen, cr, cpid, 5248 peer_tep->te_flag, peer_tep->te_credp); 5249 } else { 5250 bcopy((void *)((uintptr_t)udreq + ooff), 5251 (void *)((uintptr_t)udind + udind->OPT_offset), 5252 olen); 5253 } 5254 5255 /* 5256 * relink data blocks from mp to ui_mp 5257 */ 5258 ui_mp->b_cont = mp->b_cont; 5259 freeb(mp); 5260 mp = ui_mp; 5261 } 5262 /* 5263 * send indication message 5264 */ 5265 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5266 putnext(peer_tep->te_rq, mp); 5267 } 5268 5269 5270 5271 /* 5272 * Check if a given addr is in use. 5273 * Endpoint ptr returned or NULL if not found. 5274 * The name space is separate for each mode. This implies that 5275 * sockets get their own name space. 5276 */ 5277 static tl_endpt_t * 5278 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5279 { 5280 tl_endpt_t *peer_tep = NULL; 5281 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5282 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5283 5284 ASSERT(! IS_SOCKET(tep)); 5285 5286 ASSERT(ap != NULL && ap->ta_alen > 0); 5287 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5288 ASSERT(ap->ta_abuf != NULL); 5289 EQUIV(rc == 0, peer_tep != NULL); 5290 IMPLY(rc == 0, 5291 (tep->te_zoneid == peer_tep->te_zoneid) && 5292 (tep->te_transport == peer_tep->te_transport)); 5293 5294 if ((rc == 0) && (peer_tep->te_closing)) { 5295 tl_refrele(peer_tep); 5296 peer_tep = NULL; 5297 } 5298 5299 return (peer_tep); 5300 } 5301 5302 /* 5303 * Find peer for a socket based on unix domain address. 5304 * For implicit addresses our peer can be found by minor number in ai hash. For 5305 * explicit binds we look vnode address at addr_hash. 5306 */ 5307 static tl_endpt_t * 5308 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5309 { 5310 tl_endpt_t *peer_tep = NULL; 5311 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5312 tep->te_aihash : tep->te_addrhash; 5313 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5314 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5315 5316 ASSERT(IS_SOCKET(tep)); 5317 EQUIV(rc == 0, peer_tep != NULL); 5318 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)); 5319 5320 if (peer_tep != NULL) { 5321 /* Don't attempt to use closing peer. */ 5322 if (peer_tep->te_closing) 5323 goto errout; 5324 5325 /* 5326 * Cross-zone unix sockets are permitted, but for Trusted 5327 * Extensions only, the "server" for these must be in the 5328 * global zone. 5329 */ 5330 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5331 is_system_labeled() && 5332 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5333 goto errout; 5334 } 5335 5336 return (peer_tep); 5337 5338 errout: 5339 tl_refrele(peer_tep); 5340 return (NULL); 5341 } 5342 5343 /* 5344 * Generate a free addr and return it in struct pointed by ap 5345 * but allocating space for address buffer. 5346 * The generated address will be at least 4 bytes long and, if req->ta_alen 5347 * exceeds 4 bytes, be req->ta_alen bytes long. 5348 * 5349 * If address is found it will be inserted in the hash. 5350 * 5351 * If req->ta_alen is larger than the default alen (4 bytes) the last 5352 * alen-4 bytes will always be the same as in req. 5353 * 5354 * Return 0 for failure. 5355 * Return non-zero for success. 5356 */ 5357 static boolean_t 5358 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5359 { 5360 t_scalar_t alen; 5361 uint32_t loopcnt; /* Limit loop to 2^32 */ 5362 5363 ASSERT(tep->te_hash_hndl != NULL); 5364 ASSERT(! IS_SOCKET(tep)); 5365 5366 if (tep->te_hash_hndl == NULL) 5367 return (B_FALSE); 5368 5369 /* 5370 * check if default addr is in use 5371 * if it is - bump it and try again 5372 */ 5373 if (req == NULL) { 5374 alen = sizeof (uint32_t); 5375 } else { 5376 alen = max(req->ta_alen, sizeof (uint32_t)); 5377 ASSERT(tep->te_zoneid == req->ta_zoneid); 5378 } 5379 5380 if (tep->te_alen < alen) { 5381 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5382 5383 /* 5384 * Not enough space in tep->ta_ap to hold the address, 5385 * allocate a bigger space. 5386 */ 5387 if (abuf == NULL) 5388 return (B_FALSE); 5389 5390 if (tep->te_alen > 0) 5391 kmem_free(tep->te_abuf, tep->te_alen); 5392 5393 tep->te_alen = alen; 5394 tep->te_abuf = abuf; 5395 } 5396 5397 /* Copy in the address in req */ 5398 if (req != NULL) { 5399 ASSERT(alen >= req->ta_alen); 5400 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5401 } 5402 5403 /* 5404 * First try minor number then try default addresses. 5405 */ 5406 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5407 5408 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5409 if (mod_hash_insert_reserve(tep->te_addrhash, 5410 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5411 tep->te_hash_hndl) == 0) { 5412 /* 5413 * found free address 5414 */ 5415 tep->te_flag |= TL_ADDRHASHED; 5416 tep->te_hash_hndl = NULL; 5417 5418 return (B_TRUE); /* successful return */ 5419 } 5420 /* 5421 * Use default address. 5422 */ 5423 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5424 atomic_add_32(&tep->te_defaddr, 1); 5425 } 5426 5427 /* 5428 * Failed to find anything. 5429 */ 5430 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5431 "tl_get_any_addr:looped 2^32 times")); 5432 return (B_FALSE); 5433 } 5434 5435 /* 5436 * reallocb + set r/w ptrs to reflect size. 5437 */ 5438 static mblk_t * 5439 tl_resizemp(mblk_t *mp, ssize_t new_size) 5440 { 5441 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5442 return (NULL); 5443 5444 mp->b_rptr = DB_BASE(mp); 5445 mp->b_wptr = mp->b_rptr + new_size; 5446 return (mp); 5447 } 5448 5449 static void 5450 tl_cl_backenable(tl_endpt_t *tep) 5451 { 5452 list_t *l = &tep->te_flowlist; 5453 tl_endpt_t *elp; 5454 5455 ASSERT(IS_CLTS(tep)); 5456 5457 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5458 ASSERT(tep->te_ser == elp->te_ser); 5459 ASSERT(elp->te_flowq == tep); 5460 if (! elp->te_closing) 5461 TL_QENABLE(elp); 5462 elp->te_flowq = NULL; 5463 list_remove(l, elp); 5464 } 5465 } 5466 5467 /* 5468 * Unconnect endpoints. 5469 */ 5470 static void 5471 tl_co_unconnect(tl_endpt_t *tep) 5472 { 5473 tl_endpt_t *peer_tep = tep->te_conp; 5474 tl_endpt_t *srv_tep = tep->te_oconp; 5475 list_t *l; 5476 tl_icon_t *tip; 5477 tl_endpt_t *cl_tep; 5478 mblk_t *d_mp; 5479 5480 ASSERT(IS_COTS(tep)); 5481 /* 5482 * If our peer is closing, don't use it. 5483 */ 5484 if ((peer_tep != NULL) && peer_tep->te_closing) { 5485 TL_UNCONNECT(tep->te_conp); 5486 peer_tep = NULL; 5487 } 5488 if ((srv_tep != NULL) && srv_tep->te_closing) { 5489 TL_UNCONNECT(tep->te_oconp); 5490 srv_tep = NULL; 5491 } 5492 5493 if (tep->te_nicon > 0) { 5494 l = &tep->te_iconp; 5495 /* 5496 * If incoming requests pending, change state 5497 * of clients on disconnect ind event and send 5498 * discon_ind pdu to modules above them 5499 * for server: all clients get disconnect 5500 */ 5501 5502 while (tep->te_nicon > 0) { 5503 tip = list_head(l); 5504 cl_tep = tip->ti_tep; 5505 5506 if (cl_tep == NULL) { 5507 tl_freetip(tep, tip); 5508 continue; 5509 } 5510 5511 if (cl_tep->te_oconp != NULL) { 5512 ASSERT(cl_tep != cl_tep->te_oconp); 5513 TL_UNCONNECT(cl_tep->te_oconp); 5514 } 5515 5516 if (cl_tep->te_closing) { 5517 tl_freetip(tep, tip); 5518 continue; 5519 } 5520 5521 enableok(cl_tep->te_wq); 5522 TL_QENABLE(cl_tep); 5523 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5524 if (d_mp != NULL) { 5525 cl_tep->te_state = TS_IDLE; 5526 putnext(cl_tep->te_rq, d_mp); 5527 } else { 5528 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5529 SL_TRACE|SL_ERROR, 5530 "tl_co_unconnect:icmng: " 5531 "allocb failure")); 5532 } 5533 tl_freetip(tep, tip); 5534 } 5535 } else if (srv_tep != NULL) { 5536 /* 5537 * If outgoing request pending, change state 5538 * of server on discon ind event 5539 */ 5540 5541 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5542 IS_COTSORD(srv_tep) && 5543 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5544 /* 5545 * Queue ordrel_ind for server to be picked up 5546 * when the connection is accepted. 5547 */ 5548 d_mp = tl_ordrel_ind_alloc(); 5549 } else { 5550 /* 5551 * send discon_ind to server 5552 */ 5553 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5554 } 5555 if (d_mp == NULL) { 5556 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5557 SL_TRACE|SL_ERROR, 5558 "tl_co_unconnect:outgoing:allocb failure")); 5559 TL_UNCONNECT(tep->te_oconp); 5560 goto discon_peer; 5561 } 5562 5563 /* 5564 * If this is a socket the T_DISCON_IND is queued with 5565 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5566 * from the list of pending connections. 5567 * Note that when te_oconp is set the peer better have 5568 * a t_connind_t for the client. 5569 */ 5570 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5571 /* 5572 * Queue the disconnection message. 5573 */ 5574 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5575 } else { 5576 tip = tl_icon_find(srv_tep, tep->te_seqno); 5577 if (tip == NULL) { 5578 freemsg(d_mp); 5579 } else { 5580 ASSERT(tep == tip->ti_tep); 5581 ASSERT(tep->te_ser == srv_tep->te_ser); 5582 /* 5583 * Delete tip from the server list. 5584 */ 5585 if (srv_tep->te_nicon == 1) { 5586 srv_tep->te_state = 5587 NEXTSTATE(TE_DISCON_IND2, 5588 srv_tep->te_state); 5589 } else { 5590 srv_tep->te_state = 5591 NEXTSTATE(TE_DISCON_IND3, 5592 srv_tep->te_state); 5593 } 5594 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5595 T_DISCON_IND); 5596 putnext(srv_tep->te_rq, d_mp); 5597 tl_freetip(srv_tep, tip); 5598 } 5599 TL_UNCONNECT(tep->te_oconp); 5600 srv_tep = NULL; 5601 } 5602 } else if (peer_tep != NULL) { 5603 /* 5604 * unconnect existing connection 5605 * If connected, change state of peer on 5606 * discon ind event and send discon ind pdu 5607 * to module above it 5608 */ 5609 5610 ASSERT(tep->te_ser == peer_tep->te_ser); 5611 if (IS_COTSORD(peer_tep) && 5612 (peer_tep->te_state == TS_WIND_ORDREL || 5613 peer_tep->te_state == TS_DATA_XFER)) { 5614 /* 5615 * send ordrel ind 5616 */ 5617 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5618 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5619 peer_tep->te_state, 5620 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5621 d_mp = tl_ordrel_ind_alloc(); 5622 if (! d_mp) { 5623 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5624 SL_TRACE|SL_ERROR, 5625 "tl_co_unconnect:connected:" 5626 "allocb failure")); 5627 /* 5628 * Continue with cleaning up peer as 5629 * this side may go away with the close 5630 */ 5631 TL_QENABLE(peer_tep); 5632 goto discon_peer; 5633 } 5634 peer_tep->te_state = 5635 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5636 5637 putnext(peer_tep->te_rq, d_mp); 5638 /* 5639 * Handle flow control case. This will generate 5640 * a t_discon_ind message with reason 0 if there 5641 * is data queued on the write side. 5642 */ 5643 TL_QENABLE(peer_tep); 5644 } else if (IS_COTSORD(peer_tep) && 5645 peer_tep->te_state == TS_WREQ_ORDREL) { 5646 /* 5647 * Sent an ordrel_ind. We send a discon with 5648 * with error 0 to inform that the peer is gone. 5649 */ 5650 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5651 SL_TRACE|SL_ERROR, 5652 "tl_co_unconnect: discon in state %d", 5653 tep->te_state)); 5654 tl_discon_ind(peer_tep, 0); 5655 } else { 5656 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5657 SL_TRACE|SL_ERROR, 5658 "tl_co_unconnect: state %d", tep->te_state)); 5659 tl_discon_ind(peer_tep, ECONNRESET); 5660 } 5661 5662 discon_peer: 5663 /* 5664 * Disconnect cross-pointers only for close 5665 */ 5666 if (tep->te_closing) { 5667 peer_tep = tep->te_conp; 5668 TL_REMOVE_PEER(peer_tep->te_conp); 5669 TL_REMOVE_PEER(tep->te_conp); 5670 } 5671 } 5672 } 5673 5674 /* 5675 * Note: The following routine does not recover from allocb() 5676 * failures 5677 * The reason should be from the <sys/errno.h> space. 5678 */ 5679 static void 5680 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5681 { 5682 mblk_t *d_mp; 5683 5684 if (tep->te_closing) 5685 return; 5686 5687 /* 5688 * flush the queues. 5689 */ 5690 flushq(tep->te_rq, FLUSHDATA); 5691 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5692 5693 /* 5694 * send discon ind 5695 */ 5696 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5697 if (! d_mp) { 5698 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5699 "tl_discon_ind:allocb failure")); 5700 return; 5701 } 5702 tep->te_state = TS_IDLE; 5703 putnext(tep->te_rq, d_mp); 5704 } 5705 5706 /* 5707 * Note: The following routine does not recover from allocb() 5708 * failures 5709 * The reason should be from the <sys/errno.h> space. 5710 */ 5711 static mblk_t * 5712 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5713 { 5714 mblk_t *mp; 5715 struct T_discon_ind *tdi; 5716 5717 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5718 DB_TYPE(mp) = M_PROTO; 5719 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5720 tdi = (struct T_discon_ind *)mp->b_rptr; 5721 tdi->PRIM_type = T_DISCON_IND; 5722 tdi->DISCON_reason = reason; 5723 tdi->SEQ_number = seqnum; 5724 } 5725 return (mp); 5726 } 5727 5728 5729 /* 5730 * Note: The following routine does not recover from allocb() 5731 * failures 5732 */ 5733 static mblk_t * 5734 tl_ordrel_ind_alloc(void) 5735 { 5736 mblk_t *mp; 5737 struct T_ordrel_ind *toi; 5738 5739 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5740 DB_TYPE(mp) = M_PROTO; 5741 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5742 toi = (struct T_ordrel_ind *)mp->b_rptr; 5743 toi->PRIM_type = T_ORDREL_IND; 5744 } 5745 return (mp); 5746 } 5747 5748 5749 /* 5750 * Lookup the seqno in the list of queued connections. 5751 */ 5752 static tl_icon_t * 5753 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5754 { 5755 list_t *l = &tep->te_iconp; 5756 tl_icon_t *tip = list_head(l); 5757 5758 ASSERT(seqno != 0); 5759 5760 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5761 ; 5762 5763 return (tip); 5764 } 5765 5766 /* 5767 * Queue data for a given T_CONN_IND while verifying that redundant 5768 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5769 * Used when the originator of the connection closes. 5770 */ 5771 static void 5772 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5773 { 5774 tl_icon_t *tip; 5775 mblk_t **mpp, *mp; 5776 int prim, nprim; 5777 5778 if (nmp->b_datap->db_type == M_PROTO) 5779 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5780 else 5781 nprim = -1; /* M_DATA */ 5782 5783 tip = tl_icon_find(tep, seqno); 5784 if (tip == NULL) { 5785 freemsg(nmp); 5786 return; 5787 } 5788 5789 ASSERT(tip->ti_seqno != 0); 5790 mpp = &tip->ti_mp; 5791 while (*mpp != NULL) { 5792 mp = *mpp; 5793 5794 if (mp->b_datap->db_type == M_PROTO) 5795 prim = ((union T_primitives *)mp->b_rptr)->type; 5796 else 5797 prim = -1; /* M_DATA */ 5798 5799 /* 5800 * Allow nothing after a T_DISCON_IND 5801 */ 5802 if (prim == T_DISCON_IND) { 5803 freemsg(nmp); 5804 return; 5805 } 5806 /* 5807 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5808 */ 5809 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5810 freemsg(nmp); 5811 return; 5812 } 5813 mpp = &(mp->b_next); 5814 } 5815 *mpp = nmp; 5816 } 5817 5818 /* 5819 * Verify if a certain TPI primitive exists on the connind queue. 5820 * Use prim -1 for M_DATA. 5821 * Return non-zero if found. 5822 */ 5823 static boolean_t 5824 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5825 { 5826 tl_icon_t *tip = tl_icon_find(tep, seqno); 5827 boolean_t found = B_FALSE; 5828 5829 if (tip != NULL) { 5830 mblk_t *mp; 5831 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5832 found = (DB_TYPE(mp) == M_PROTO && 5833 ((union T_primitives *)mp->b_rptr)->type == prim); 5834 } 5835 } 5836 return (found); 5837 } 5838 5839 /* 5840 * Send the b_next mblk chain that has accumulated before the connection 5841 * was accepted. Perform the necessary state transitions. 5842 */ 5843 static void 5844 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5845 { 5846 mblk_t *mp; 5847 union T_primitives *primp; 5848 5849 if (tep->te_closing) { 5850 tl_icon_freemsgs(mpp); 5851 return; 5852 } 5853 5854 ASSERT(tep->te_state == TS_DATA_XFER); 5855 ASSERT(tep->te_rq->q_first == NULL); 5856 5857 while ((mp = *mpp) != NULL) { 5858 *mpp = mp->b_next; 5859 mp->b_next = NULL; 5860 5861 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5862 switch (DB_TYPE(mp)) { 5863 default: 5864 freemsg(mp); 5865 break; 5866 case M_DATA: 5867 putnext(tep->te_rq, mp); 5868 break; 5869 case M_PROTO: 5870 primp = (union T_primitives *)mp->b_rptr; 5871 switch (primp->type) { 5872 case T_UNITDATA_IND: 5873 case T_DATA_IND: 5874 case T_OPTDATA_IND: 5875 case T_EXDATA_IND: 5876 putnext(tep->te_rq, mp); 5877 break; 5878 case T_ORDREL_IND: 5879 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5880 tep->te_state); 5881 putnext(tep->te_rq, mp); 5882 break; 5883 case T_DISCON_IND: 5884 tep->te_state = TS_IDLE; 5885 putnext(tep->te_rq, mp); 5886 break; 5887 default: 5888 #ifdef DEBUG 5889 cmn_err(CE_PANIC, 5890 "tl_icon_sendmsgs: unknown primitive"); 5891 #endif /* DEBUG */ 5892 freemsg(mp); 5893 break; 5894 } 5895 break; 5896 } 5897 } 5898 } 5899 5900 /* 5901 * Free the b_next mblk chain that has accumulated before the connection 5902 * was accepted. 5903 */ 5904 static void 5905 tl_icon_freemsgs(mblk_t **mpp) 5906 { 5907 mblk_t *mp; 5908 5909 while ((mp = *mpp) != NULL) { 5910 *mpp = mp->b_next; 5911 mp->b_next = NULL; 5912 freemsg(mp); 5913 } 5914 } 5915 5916 /* 5917 * Send M_ERROR 5918 * Note: assumes caller ensured enough space in mp or enough 5919 * memory available. Does not attempt recovery from allocb() 5920 * failures 5921 */ 5922 5923 static void 5924 tl_merror(queue_t *wq, mblk_t *mp, int error) 5925 { 5926 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5927 5928 if (tep->te_closing) { 5929 freemsg(mp); 5930 return; 5931 } 5932 5933 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5934 SL_TRACE|SL_ERROR, 5935 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5936 5937 /* 5938 * flush all messages on queue. we are shutting 5939 * the stream down on fatal error 5940 */ 5941 flushq(wq, FLUSHALL); 5942 if (IS_COTS(tep)) { 5943 /* connection oriented - unconnect endpoints */ 5944 tl_co_unconnect(tep); 5945 } 5946 if (mp->b_cont) { 5947 freemsg(mp->b_cont); 5948 mp->b_cont = NULL; 5949 } 5950 5951 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5952 freemsg(mp); 5953 mp = allocb(1, BPRI_HI); 5954 if (!mp) { 5955 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5956 SL_TRACE|SL_ERROR, 5957 "tl_merror:M_PROTO: out of memory")); 5958 return; 5959 } 5960 } 5961 if (mp) { 5962 DB_TYPE(mp) = M_ERROR; 5963 mp->b_rptr = DB_BASE(mp); 5964 *mp->b_rptr = (char)error; 5965 mp->b_wptr = mp->b_rptr + sizeof (char); 5966 qreply(wq, mp); 5967 } else { 5968 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5969 } 5970 } 5971 5972 static void 5973 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5974 { 5975 ASSERT(cr != NULL); 5976 5977 if (flag & TL_SETCRED) { 5978 struct opthdr *opt = (struct opthdr *)buf; 5979 tl_credopt_t *tlcred; 5980 5981 opt->level = TL_PROT_LEVEL; 5982 opt->name = TL_OPT_PEER_CRED; 5983 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5984 5985 tlcred = (tl_credopt_t *)(opt + 1); 5986 tlcred->tc_uid = crgetuid(cr); 5987 tlcred->tc_gid = crgetgid(cr); 5988 tlcred->tc_ruid = crgetruid(cr); 5989 tlcred->tc_rgid = crgetrgid(cr); 5990 tlcred->tc_suid = crgetsuid(cr); 5991 tlcred->tc_sgid = crgetsgid(cr); 5992 tlcred->tc_ngroups = crgetngroups(cr); 5993 } else if (flag & TL_SETUCRED) { 5994 struct opthdr *opt = (struct opthdr *)buf; 5995 5996 opt->level = TL_PROT_LEVEL; 5997 opt->name = TL_OPT_PEER_UCRED; 5998 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); 5999 6000 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 6001 } else { 6002 struct T_opthdr *topt = (struct T_opthdr *)buf; 6003 ASSERT(flag & TL_SOCKUCRED); 6004 6005 topt->level = SOL_SOCKET; 6006 topt->name = SCM_UCRED; 6007 topt->len = ucredminsize(cr) + sizeof (*topt); 6008 topt->status = 0; 6009 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 6010 } 6011 } 6012 6013 /* ARGSUSED */ 6014 static int 6015 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6016 { 6017 /* no default value processed in protocol specific code currently */ 6018 return (-1); 6019 } 6020 6021 /* ARGSUSED */ 6022 static int 6023 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6024 { 6025 int len; 6026 tl_endpt_t *tep; 6027 int *valp; 6028 6029 tep = (tl_endpt_t *)wq->q_ptr; 6030 6031 len = 0; 6032 6033 /* 6034 * Assumes: option level and name sanity check done elsewhere 6035 */ 6036 6037 switch (level) { 6038 case SOL_SOCKET: 6039 if (! IS_SOCKET(tep)) 6040 break; 6041 switch (name) { 6042 case SO_RECVUCRED: 6043 len = sizeof (int); 6044 valp = (int *)ptr; 6045 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6046 break; 6047 default: 6048 break; 6049 } 6050 break; 6051 case TL_PROT_LEVEL: 6052 switch (name) { 6053 case TL_OPT_PEER_CRED: 6054 case TL_OPT_PEER_UCRED: 6055 /* 6056 * option not supposed to retrieved directly 6057 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6058 * when some internal flags set by other options 6059 * Direct retrieval always designed to fail(ignored) 6060 * for this option. 6061 */ 6062 break; 6063 } 6064 } 6065 return (len); 6066 } 6067 6068 /* ARGSUSED */ 6069 static int 6070 tl_set_opt( 6071 queue_t *wq, 6072 uint_t mgmt_flags, 6073 int level, 6074 int name, 6075 uint_t inlen, 6076 uchar_t *invalp, 6077 uint_t *outlenp, 6078 uchar_t *outvalp, 6079 void *thisdg_attrs, 6080 cred_t *cr) 6081 { 6082 int error; 6083 tl_endpt_t *tep; 6084 6085 tep = (tl_endpt_t *)wq->q_ptr; 6086 6087 error = 0; /* NOERROR */ 6088 6089 /* 6090 * Assumes: option level and name sanity checks done elsewhere 6091 */ 6092 6093 switch (level) { 6094 case SOL_SOCKET: 6095 if (! IS_SOCKET(tep)) { 6096 error = EINVAL; 6097 break; 6098 } 6099 /* 6100 * TBD: fill in other AF_UNIX socket options and then stop 6101 * returning error. 6102 */ 6103 switch (name) { 6104 case SO_RECVUCRED: 6105 /* 6106 * We only support this for datagram sockets; 6107 * getpeerucred handles the connection oriented 6108 * transports. 6109 */ 6110 if (! IS_CLTS(tep)) { 6111 error = EINVAL; 6112 break; 6113 } 6114 if (*(int *)invalp == 0) 6115 tep->te_flag &= ~TL_SOCKUCRED; 6116 else 6117 tep->te_flag |= TL_SOCKUCRED; 6118 break; 6119 default: 6120 error = EINVAL; 6121 break; 6122 } 6123 break; 6124 case TL_PROT_LEVEL: 6125 switch (name) { 6126 case TL_OPT_PEER_CRED: 6127 case TL_OPT_PEER_UCRED: 6128 /* 6129 * option not supposed to be set directly 6130 * Its value in initialized for each endpoint at 6131 * driver open time. 6132 * Direct setting always designed to fail for this 6133 * option. 6134 */ 6135 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6136 SL_TRACE|SL_ERROR, 6137 "tl_set_opt: option is not supported")); 6138 error = EPROTO; 6139 break; 6140 } 6141 } 6142 return (error); 6143 } 6144 6145 6146 static void 6147 tl_timer(void *arg) 6148 { 6149 queue_t *wq = arg; 6150 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6151 6152 ASSERT(tep); 6153 6154 tep->te_timoutid = 0; 6155 6156 enableok(wq); 6157 /* 6158 * Note: can call wsrv directly here and save context switch 6159 * Consider change when qtimeout (not timeout) is active 6160 */ 6161 qenable(wq); 6162 } 6163 6164 static void 6165 tl_buffer(void *arg) 6166 { 6167 queue_t *wq = arg; 6168 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6169 6170 ASSERT(tep); 6171 6172 tep->te_bufcid = 0; 6173 tep->te_nowsrv = B_FALSE; 6174 6175 enableok(wq); 6176 /* 6177 * Note: can call wsrv directly here and save context switch 6178 * Consider change when qbufcall (not bufcall) is active 6179 */ 6180 qenable(wq); 6181 } 6182 6183 static void 6184 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6185 { 6186 tl_endpt_t *tep; 6187 6188 tep = (tl_endpt_t *)wq->q_ptr; 6189 6190 if (tep->te_closing) { 6191 freemsg(mp); 6192 return; 6193 } 6194 noenable(wq); 6195 6196 (void) insq(wq, wq->q_first, mp); 6197 6198 if (tep->te_bufcid || tep->te_timoutid) { 6199 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6200 "tl_memrecover:recover %p pending", (void *)wq)); 6201 return; 6202 } 6203 6204 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6205 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6206 drv_usectohz(TL_BUFWAIT)); 6207 } 6208 } 6209 6210 static void 6211 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6212 { 6213 ASSERT(tip->ti_seqno != 0); 6214 6215 if (tip->ti_mp != NULL) { 6216 tl_icon_freemsgs(&tip->ti_mp); 6217 tip->ti_mp = NULL; 6218 } 6219 if (tip->ti_tep != NULL) { 6220 tl_refrele(tip->ti_tep); 6221 tip->ti_tep = NULL; 6222 } 6223 list_remove(&tep->te_iconp, tip); 6224 kmem_free(tip, sizeof (tl_icon_t)); 6225 tep->te_nicon--; 6226 } 6227 6228 /* 6229 * Remove address from address hash. 6230 */ 6231 static void 6232 tl_addr_unbind(tl_endpt_t *tep) 6233 { 6234 tl_endpt_t *elp; 6235 6236 if (tep->te_flag & TL_ADDRHASHED) { 6237 if (IS_SOCKET(tep)) { 6238 (void) mod_hash_remove(tep->te_addrhash, 6239 (mod_hash_key_t)tep->te_vp, 6240 (mod_hash_val_t *)&elp); 6241 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6242 tep->te_magic = SOU_MAGIC_IMPLICIT; 6243 } else { 6244 (void) mod_hash_remove(tep->te_addrhash, 6245 (mod_hash_key_t)&tep->te_ap, 6246 (mod_hash_val_t *)&elp); 6247 (void) kmem_free(tep->te_abuf, tep->te_alen); 6248 tep->te_alen = -1; 6249 tep->te_abuf = NULL; 6250 } 6251 tep->te_flag &= ~TL_ADDRHASHED; 6252 } 6253 } 6254