1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multithreaded STREAMS Local Transport Provider. 28 * 29 * OVERVIEW 30 * ======== 31 * 32 * This driver provides TLI as well as socket semantics. It provides 33 * connectionless, connection oriented, and connection oriented with orderly 34 * release transports for TLI and sockets. Each transport type has separate name 35 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 36 * this removes any name space conflicts when binding to socket style transport 37 * addresses. 38 * 39 * NOTE: There is one exception: Socket ticots and ticotsord transports share 40 * the same namespace. In fact, sockets always use ticotsord type transport. 41 * 42 * The driver mode is specified during open() by the minor number used for 43 * open. 44 * 45 * The sockets in addition have the following semantic differences: 46 * No support for passing up credentials (TL_SET[U]CRED). 47 * 48 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 49 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 50 * T_OPTDATA_IND. 51 * 52 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 53 * a T_CONN_RES is received from the acceptor. This means that a socket 54 * connect will complete before the peer has called accept. 55 * 56 * 57 * MULTITHREADING 58 * ============== 59 * 60 * The driver does not use STREAMS protection mechanisms. Instead it uses a 61 * generic "serializer" abstraction. Most of the operations are executed behind 62 * the serializer and are, essentially single-threaded. All functions executed 63 * behind the same serializer are strictly serialized. So if one thread calls 64 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 65 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 66 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 67 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 68 * same time. 69 * 70 * Connectionless transport use a single serializer per transport type (one for 71 * TLI and one for sockets. Connection-oriented transports use finer-grained 72 * serializers. 73 * 74 * All COTS-type endpoints start their life with private serializers. During 75 * connection request processing the endpoint serializer is switched to the 76 * listener's serializer and the rest of T_CONN_REQ processing is done on the 77 * listener serializer. During T_CONN_RES processing the eager serializer is 78 * switched from listener to acceptor serializer and after that point all 79 * processing for eager and acceptor happens on this serializer. To avoid races 80 * with endpoint closes while its serializer may be changing closes are blocked 81 * while serializers are manipulated. 82 * 83 * References accounting 84 * --------------------- 85 * 86 * Endpoints are reference counted and freed when the last reference is 87 * dropped. Functions within the serializer may access an endpoint state even 88 * after an endpoint closed. The te_closing being set on the endpoint indicates 89 * that the endpoint entered its close routine. 90 * 91 * One reference is held for each opened endpoint instance. The reference 92 * counter is incremented when the endpoint is linked to another endpoint and 93 * decremented when the link disappears. It is also incremented when the 94 * endpoint is found by the hash table lookup. This increment is atomic with the 95 * lookup itself and happens while the hash table read lock is held. 96 * 97 * Close synchronization 98 * --------------------- 99 * 100 * During close the endpoint as marked as closing using te_closing flag. It is 101 * usually enough to check for te_closing flag since all other state changes 102 * happen after this flag is set and the close entered serializer. Immediately 103 * after setting te_closing flag tl_close() enters serializer and waits until 104 * the callback finishes. This allows all functions called within serializer to 105 * simply check te_closing without any locks. 106 * 107 * Serializer management. 108 * --------------------- 109 * 110 * For COTS transports serializers are created when the endpoint is constructed 111 * and destroyed when the endpoint is destructed. CLTS transports use global 112 * serializers - one for sockets and one for TLI. 113 * 114 * COTS serializers have separate reference counts to deal with several 115 * endpoints sharing the same serializer. There is a subtle problem related to 116 * the serializer destruction. The serializer should never be destroyed by any 117 * function executed inside serializer. This means that close has to wait till 118 * all serializer activity for this endpoint is finished before it can drop the 119 * last reference on the endpoint (which may as well free the serializer). This 120 * is only relevant for COTS transports which manage serializers 121 * dynamically. For CLTS transports close may complete without waiting for all 122 * serializer activity to finish since serializer is only destroyed at driver 123 * detach time. 124 * 125 * COTS endpoints keep track of the number of outstanding requests on the 126 * serializer for the endpoint. The code handling accept() avoids changing 127 * client serializer if it has any pending messages on the serializer and 128 * instead moves acceptor to listener's serializer. 129 * 130 * 131 * Use of hash tables 132 * ------------------ 133 * 134 * The driver uses modhash hash table implementation. Each transport uses two 135 * hash tables - one for finding endpoints by acceptor ID and another one for 136 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 137 * pair of hash tables since sockets only use TICOTSORD. 138 * 139 * All hash tables lookups increment a reference count for returned endpoints, 140 * so we may safely check the endpoint state even when the endpoint is removed 141 * from the hash by another thread immediately after it is found. 142 * 143 * 144 * CLOSE processing 145 * ================ 146 * 147 * The driver enters serializer twice on close(). The close sequence is the 148 * following: 149 * 150 * 1) Wait until closing is safe (te_closewait becomes zero) 151 * This step is needed to prevent close during serializer switches. In most 152 * cases (close happening after connection establishment) te_closewait is 153 * zero. 154 * 1) Set te_closing. 155 * 2) Call tl_close_ser() within serializer and wait for it to complete. 156 * 157 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 158 * It also needs to clear write-side q_next pointers - this should be done 159 * before qprocsoff(). 160 * 161 * This synchronous serializer entry during close is needed to ensure that 162 * the queue is valid everywhere inside the serializer. 163 * 164 * Note that in many cases close will execute tl_close_ser() synchronously, 165 * so it will not wait at all. 166 * 167 * 3) Calls qprocsoff(). 168 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 169 * complete (for COTS transports). For CLTS transport there is no wait. 170 * 171 * tl_close_finish_ser() Finishes the close process and wakes up waiting 172 * close if there is any. 173 * 174 * Note that in most cases close will enter te_close_ser_finish() 175 * synchronously and will not wait at all. 176 * 177 * 178 * Flow Control 179 * ============ 180 * 181 * The driver implements both read and write side service routines. No one calls 182 * putq() on the read queue. The read side service routine tl_rsrv() is called 183 * when the read side stream is back-enabled. It enters serializer synchronously 184 * (waits till serializer processing is complete). Within serializer it 185 * back-enables all endpoints blocked by the queue for connection-less 186 * transports and enables write side service processing for the peer for 187 * connection-oriented transports. 188 * 189 * Read and write side service routines use special mblk_sized space in the 190 * endpoint structure to enter perimeter. 191 * 192 * Write-side flow control 193 * ----------------------- 194 * 195 * Write side flow control is a bit tricky. The driver needs to deal with two 196 * message queues - the explicit STREAMS message queue maintained by 197 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 198 * queues should be synchronized to preserve message ordering and should 199 * maintain a single order determined by the order in which messages enter 200 * tl_wput(). In order to maintain the ordering between these two queues the 201 * STREAMS queue is only manipulated within the serializer, so the ordering is 202 * provided by the serializer. 203 * 204 * Functions called from the tl_wsrv() sometimes may call putbq(). To 205 * immediately stop any further processing of the STREAMS message queues the 206 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 207 * side service processing stops when the flag is set. 208 * 209 * The tl_wsrv() function enters serializer synchronously and waits for it to 210 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 211 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 212 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 213 * always bounded by the amount of messages on the STREAMS queue at the time 214 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 215 * queue from another serialized entry which can't happen in parallel. This 216 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 217 * of it draining forever while writer places new messages on the STREAMS 218 * queue). 219 * 220 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 221 * 222 * 223 * Unix Domain Sockets 224 * =================== 225 * 226 * The driver knows the structure of Unix Domain sockets addresses and treats 227 * them differently from generic TLI addresses. For sockets implicit binds are 228 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 229 * instead of using address length of zero. Explicit binds specify 230 * SOU_MAGIC_EXPLICIT as magic. 231 * 232 * For implicit binds we always use minor number as soua_vp part of the address 233 * and avoid any hash table lookups. This saves two hash tables lookups per 234 * anonymous bind. 235 * 236 * For explicit address we hash the vnode pointer instead of hashing the 237 * full-scale address+zone+length. Hashing by pointer is more efficient then 238 * hashing by the full address. 239 * 240 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 241 * tep structure, so it should be never freed. 242 * 243 * Also for sockets the driver always uses minor number as acceptor id. 244 * 245 * TPI VIOLATIONS 246 * -------------- 247 * 248 * This driver violates TPI in several respects for Unix Domain Sockets: 249 * 250 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 251 * is requested and the endpoint is already in use. There is no point in 252 * generating an unused address since this address will be rejected by 253 * sockfs anyway. For implicit binds it always generates a new address 254 * (sets soua_vp to its minor number). 255 * 256 * 2) It always uses minor number as acceptor ID and never uses queue 257 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 258 * message and they do not use the queue pointer. 259 * 260 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 261 * followed by listen(). The listen() should be issued with non-zero 262 * backlog, so sotpi_listen() issues unbind request followed by bind 263 * request to the same address but with a non-zero qlen value. Both 264 * tl_bind() and tl_unbind() require write lock on the hash table to 265 * insert/remove the address. The driver does not remove the address from 266 * the hash for endpoints that are bound to the explicit address and have 267 * backlog of zero. During T_BIND_REQ processing if the address requested 268 * is equal to the address the endpoint already has it updates the backlog 269 * without reinserting the address in the hash table. This optimization 270 * avoids two hash table updates for each listener created. It always 271 * avoids the problem of a "stolen" address when another listener may use 272 * the same address between the unbind and bind and suddenly listen() fails 273 * because address is in use even though the bind() succeeded. 274 * 275 * 276 * CONNECTIONLESS TRANSPORTS 277 * ========================= 278 * 279 * Connectionless transports all share the same serializer (one for TLI and one 280 * for Sockets). Functions executing behind serializer can check or modify state 281 * of any endpoint. 282 * 283 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 284 * te_lastep field. The next time X talks to some address A it checks whether A 285 * is the same as Y's address and if it is there is no need to lookup Y. If the 286 * address is different or the state of Y is not appropriate (e.g. closed or not 287 * idle) X does a lookup using tl_find_peer() and caches the new address. 288 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 289 * on the endpoint found. 290 * 291 * During close of endpoint Y it doesn't try to remove itself from other 292 * endpoints caches. They will detect that Y is gone and will search the peer 293 * endpoint again. 294 * 295 * Flow Control Handling. 296 * ---------------------- 297 * 298 * Each connectionless endpoint keeps a list of endpoints which are 299 * flow-controlled by its queue. It also keeps a pointer to the queue which 300 * flow-controls itself. Whenever flow control releases for endpoint X it 301 * enables all queues from the list. During close it also back-enables everyone 302 * in the list. If X is flow-controlled when it is closing it removes it from 303 * the peers list. 304 * 305 * DATA STRUCTURES 306 * =============== 307 * 308 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 309 * endpoint state. For connection-oriented transports it has a keeps a list 310 * of pending connections (tl_icon_t). For connectionless transports it keeps a 311 * list of endpoints flow controlled by this one. 312 * 313 * Each transport type is represented by a per-transport data structure 314 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 315 * endpoint address hash tables for each transport. It also contains pointer to 316 * transport serializer for connectionless transports. 317 * 318 * Each endpoint keeps a link to its transport structure, so the code can find 319 * all per-transport information quickly. 320 */ 321 322 #include <sys/types.h> 323 #include <sys/inttypes.h> 324 #include <sys/stream.h> 325 #include <sys/stropts.h> 326 #define _SUN_TPI_VERSION 2 327 #include <sys/tihdr.h> 328 #include <sys/strlog.h> 329 #include <sys/debug.h> 330 #include <sys/cred.h> 331 #include <sys/errno.h> 332 #include <sys/kmem.h> 333 #include <sys/id_space.h> 334 #include <sys/modhash.h> 335 #include <sys/mkdev.h> 336 #include <sys/tl.h> 337 #include <sys/stat.h> 338 #include <sys/conf.h> 339 #include <sys/modctl.h> 340 #include <sys/strsun.h> 341 #include <sys/socket.h> 342 #include <sys/socketvar.h> 343 #include <sys/sysmacros.h> 344 #include <sys/xti_xtiopt.h> 345 #include <sys/ddi.h> 346 #include <sys/sunddi.h> 347 #include <sys/zone.h> 348 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 349 #include <inet/optcom.h> 350 #include <sys/strsubr.h> 351 #include <sys/ucred.h> 352 #include <sys/suntpi.h> 353 #include <sys/list.h> 354 #include <sys/serializer.h> 355 356 /* 357 * TBD List 358 * 14 Eliminate state changes through table 359 * 16. AF_UNIX socket options 360 * 17. connect() for ticlts 361 * 18. support for "netstat" to show AF_UNIX plus TLI local 362 * transport connections 363 * 21. sanity check to flushing on sending M_ERROR 364 */ 365 366 /* 367 * CONSTANT DECLARATIONS 368 * -------------------- 369 */ 370 371 /* 372 * Local declarations 373 */ 374 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 375 376 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 377 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 378 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 379 /* 380 * Hash tables size. 381 */ 382 #define TL_HASH_SIZE 311 383 384 /* 385 * Definitions for module_info 386 */ 387 #define TL_ID (104) /* module ID number */ 388 #define TL_NAME "tl" /* module name */ 389 #define TL_MINPSZ (0) /* min packet size */ 390 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 391 #define TL_HIWAT (16*1024) /* hi water mark */ 392 #define TL_LOWAT (256) /* lo water mark */ 393 /* 394 * Definition of minor numbers/modes for new transport provider modes. 395 * We view the socket use as a separate mode to get a separate name space. 396 */ 397 #define TL_TICOTS 0 /* connection oriented transport */ 398 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 399 #define TL_TICLTS 2 /* connectionless transport */ 400 #define TL_UNUSED 3 401 #define TL_SOCKET 4 /* Socket */ 402 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 403 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 404 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 405 406 #define TL_MINOR_MASK 0x7 407 #define TL_MINOR_START (TL_TICLTS + 1) 408 409 /* 410 * LOCAL MACROS 411 */ 412 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 413 414 /* 415 * EXTERNAL VARIABLE DECLARATIONS 416 * ----------------------------- 417 */ 418 /* 419 * state table defined in the OS space.c 420 */ 421 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 422 423 /* 424 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 425 */ 426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 427 static int tl_close(queue_t *, int, cred_t *); 428 static void tl_wput(queue_t *, mblk_t *); 429 static void tl_wsrv(queue_t *); 430 static void tl_rsrv(queue_t *); 431 432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 435 436 437 /* 438 * GLOBAL DATA STRUCTURES AND VARIABLES 439 * ----------------------------------- 440 */ 441 442 /* 443 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 444 * For now, we only manage the SO_RECVUCRED option but we also have 445 * harmless dummy options to make things work with some common code we access. 446 */ 447 opdes_t tl_opt_arr[] = { 448 /* The SO_TYPE is needed for the hack below */ 449 { 450 SO_TYPE, 451 SOL_SOCKET, 452 OA_R, 453 OA_R, 454 OP_NP, 455 0, 456 sizeof (t_scalar_t), 457 0 458 }, 459 { 460 SO_RECVUCRED, 461 SOL_SOCKET, 462 OA_RW, 463 OA_RW, 464 OP_NP, 465 0, 466 sizeof (int), 467 0 468 } 469 }; 470 471 /* 472 * Table of all supported levels 473 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 474 * any supported options so we need this info separately. 475 * 476 * This is needed only for topmost tpi providers. 477 */ 478 optlevel_t tl_valid_levels_arr[] = { 479 XTI_GENERIC, 480 SOL_SOCKET, 481 TL_PROT_LEVEL 482 }; 483 484 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 485 /* 486 * Current upper bound on the amount of space needed to return all options. 487 * Additional options with data size of sizeof(long) are handled automatically. 488 * Others need hand job. 489 */ 490 #define TL_MAX_OPT_BUF_LEN \ 491 ((A_CNT(tl_opt_arr) << 2) + \ 492 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 493 + 64 + sizeof (struct T_optmgmt_ack)) 494 495 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 496 497 /* 498 * transport addr structure 499 */ 500 typedef struct tl_addr { 501 zoneid_t ta_zoneid; /* Zone scope of address */ 502 t_scalar_t ta_alen; /* length of abuf */ 503 void *ta_abuf; /* the addr itself */ 504 } tl_addr_t; 505 506 /* 507 * Refcounted version of serializer. 508 */ 509 typedef struct tl_serializer { 510 uint_t ts_refcnt; 511 serializer_t *ts_serializer; 512 } tl_serializer_t; 513 514 /* 515 * Each transport type has a separate state. 516 * Per-transport state. 517 */ 518 typedef struct tl_transport_state { 519 char *tr_name; 520 minor_t tr_minor; 521 uint32_t tr_defaddr; 522 mod_hash_t *tr_ai_hash; 523 mod_hash_t *tr_addr_hash; 524 tl_serializer_t *tr_serializer; 525 } tl_transport_state_t; 526 527 #define TL_DFADDR 0x1000 528 529 static tl_transport_state_t tl_transports[] = { 530 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 531 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 532 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 533 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 534 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 535 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 536 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 537 }; 538 539 #define TL_MAXTRANSPORT A_CNT(tl_transports) 540 541 struct tl_endpt; 542 typedef struct tl_endpt tl_endpt_t; 543 544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 545 546 /* 547 * Data structure used to represent pending connects. 548 * Records enough information so that the connecting peer can close 549 * before the connection gets accepted. 550 */ 551 typedef struct tl_icon { 552 list_node_t ti_node; 553 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 554 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 555 t_scalar_t ti_seqno; /* Sequence number */ 556 } tl_icon_t; 557 558 typedef struct so_ux_addr soux_addr_t; 559 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 560 561 /* 562 * Maximum number of unaccepted connection indications allowed per listener. 563 */ 564 #define TL_MAXQLEN 4096 565 int tl_maxqlen = TL_MAXQLEN; 566 567 /* 568 * transport endpoint structure 569 */ 570 struct tl_endpt { 571 queue_t *te_rq; /* stream read queue */ 572 queue_t *te_wq; /* stream write queue */ 573 uint32_t te_refcnt; 574 int32_t te_state; /* TPI state of endpoint */ 575 minor_t te_minor; /* minor number */ 576 #define te_seqno te_minor 577 uint_t te_flag; /* flag field */ 578 boolean_t te_nowsrv; 579 tl_serializer_t *te_ser; /* Serializer to use */ 580 #define te_serializer te_ser->ts_serializer 581 582 soux_addr_t te_uxaddr; /* Socket address */ 583 #define te_magic te_uxaddr.soua_magic 584 #define te_vp te_uxaddr.soua_vp 585 tl_addr_t te_ap; /* addr bound to this endpt */ 586 #define te_zoneid te_ap.ta_zoneid 587 #define te_alen te_ap.ta_alen 588 #define te_abuf te_ap.ta_abuf 589 590 tl_transport_state_t *te_transport; 591 #define te_addrhash te_transport->tr_addr_hash 592 #define te_aihash te_transport->tr_ai_hash 593 #define te_defaddr te_transport->tr_defaddr 594 cred_t *te_credp; /* endpoint user credentials */ 595 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 596 597 /* 598 * State specific for connection-oriented and connectionless transports. 599 */ 600 union { 601 /* Connection-oriented state. */ 602 struct { 603 t_uscalar_t _te_nicon; /* count of conn requests */ 604 t_uscalar_t _te_qlen; /* max conn requests */ 605 tl_endpt_t *_te_oconp; /* conn request pending */ 606 tl_endpt_t *_te_conp; /* connected endpt */ 607 #ifndef _ILP32 608 void *_te_pad; 609 #endif 610 list_t _te_iconp; /* list of conn ind. pending */ 611 } _te_cots_state; 612 /* Connection-less state. */ 613 struct { 614 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 615 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 616 list_node_t _te_flows; /* lists of connections */ 617 list_t _te_flowlist; /* Who flowcontrols on me */ 618 } _te_clts_state; 619 } _te_transport_state; 620 #define te_nicon _te_transport_state._te_cots_state._te_nicon 621 #define te_qlen _te_transport_state._te_cots_state._te_qlen 622 #define te_oconp _te_transport_state._te_cots_state._te_oconp 623 #define te_conp _te_transport_state._te_cots_state._te_conp 624 #define te_iconp _te_transport_state._te_cots_state._te_iconp 625 #define te_lastep _te_transport_state._te_clts_state._te_lastep 626 #define te_flowq _te_transport_state._te_clts_state._te_flowq 627 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 628 #define te_flows _te_transport_state._te_clts_state._te_flows 629 630 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 631 timeout_id_t te_timoutid; /* outstanding timeout id */ 632 pid_t te_cpid; /* cached pid of endpoint */ 633 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 634 /* 635 * Pieces of the endpoint state needed for closing. 636 */ 637 kmutex_t te_closelock; 638 kcondvar_t te_closecv; 639 uint8_t te_closing; /* The endpoint started closing */ 640 uint8_t te_closewait; /* Wait in close until zero */ 641 mblk_t te_closemp; /* for entering serializer on close */ 642 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 643 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 644 kmutex_t te_srv_lock; 645 kcondvar_t te_srv_cv; 646 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 647 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 648 /* 649 * Pieces of the endpoint state needed for serializer transitions. 650 */ 651 kmutex_t te_ser_lock; /* Protects the count below */ 652 uint_t te_ser_count; /* Number of messages on serializer */ 653 }; 654 655 /* 656 * Flag values. Lower 4 bits specify that transport used. 657 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 658 * they allow to identify the endpoint more easily. 659 */ 660 #define TL_LISTENER 0x00010 /* the listener endpoint */ 661 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 662 #define TL_EAGER 0x00040 /* connecting endpoint */ 663 #define TL_ACCEPTED 0x00080 /* accepted connection */ 664 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 665 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 666 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 667 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 668 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 669 /* 670 * Boolean checks for the endpoint type. 671 */ 672 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 673 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 674 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 675 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 676 677 /* 678 * Certain operations are always used together. These macros reduce the chance 679 * of missing a part of a combination. 680 */ 681 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 682 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 683 684 #define TL_PUTBQ(x, mp) { \ 685 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 686 (x)->te_nowsrv = B_TRUE; \ 687 (void) putbq((x)->te_wq, mp); \ 688 } 689 690 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 691 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 692 693 /* 694 * STREAMS driver glue data structures. 695 */ 696 static struct module_info tl_minfo = { 697 TL_ID, /* mi_idnum */ 698 TL_NAME, /* mi_idname */ 699 TL_MINPSZ, /* mi_minpsz */ 700 TL_MAXPSZ, /* mi_maxpsz */ 701 TL_HIWAT, /* mi_hiwat */ 702 TL_LOWAT /* mi_lowat */ 703 }; 704 705 static struct qinit tl_rinit = { 706 NULL, /* qi_putp */ 707 (int (*)())tl_rsrv, /* qi_srvp */ 708 tl_open, /* qi_qopen */ 709 tl_close, /* qi_qclose */ 710 NULL, /* qi_qadmin */ 711 &tl_minfo, /* qi_minfo */ 712 NULL /* qi_mstat */ 713 }; 714 715 static struct qinit tl_winit = { 716 (int (*)())tl_wput, /* qi_putp */ 717 (int (*)())tl_wsrv, /* qi_srvp */ 718 NULL, /* qi_qopen */ 719 NULL, /* qi_qclose */ 720 NULL, /* qi_qadmin */ 721 &tl_minfo, /* qi_minfo */ 722 NULL /* qi_mstat */ 723 }; 724 725 static struct streamtab tlinfo = { 726 &tl_rinit, /* st_rdinit */ 727 &tl_winit, /* st_wrinit */ 728 NULL, /* st_muxrinit */ 729 NULL /* st_muxwrinit */ 730 }; 731 732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 733 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 734 735 static struct modldrv modldrv = { 736 &mod_driverops, /* Type of module -- pseudo driver here */ 737 "TPI Local Transport (tl)", 738 &tl_devops, /* driver ops */ 739 }; 740 741 /* 742 * Module linkage information for the kernel. 743 */ 744 static struct modlinkage modlinkage = { 745 MODREV_1, 746 &modldrv, 747 NULL 748 }; 749 750 /* 751 * Templates for response to info request 752 * Check sanity of unlimited connect data etc. 753 */ 754 755 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 756 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 757 758 static struct T_info_ack tl_cots_info_ack = 759 { 760 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 761 T_INFINITE, /* TSDU size */ 762 T_INFINITE, /* ETSDU size */ 763 T_INFINITE, /* CDATA_size */ 764 T_INFINITE, /* DDATA_size */ 765 T_INFINITE, /* ADDR_size */ 766 T_INFINITE, /* OPT_size */ 767 0, /* TIDU_size - fill at run time */ 768 T_COTS, /* SERV_type */ 769 -1, /* CURRENT_state */ 770 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 771 }; 772 773 static struct T_info_ack tl_clts_info_ack = 774 { 775 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 776 0, /* TSDU_size - fill at run time */ 777 -2, /* ETSDU_size -2 => not supported */ 778 -2, /* CDATA_size -2 => not supported */ 779 -2, /* DDATA_size -2 => not supported */ 780 -1, /* ADDR_size -1 => unlimited */ 781 -1, /* OPT_size */ 782 0, /* TIDU_size - fill at run time */ 783 T_CLTS, /* SERV_type */ 784 -1, /* CURRENT_state */ 785 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 786 }; 787 788 /* 789 * private copy of devinfo pointer used in tl_info 790 */ 791 static dev_info_t *tl_dip; 792 793 /* 794 * Endpoints cache. 795 */ 796 static kmem_cache_t *tl_cache; 797 /* 798 * Minor number space. 799 */ 800 static id_space_t *tl_minors; 801 802 /* 803 * Default Data Unit size. 804 */ 805 static t_scalar_t tl_tidusz; 806 807 /* 808 * Size of hash tables. 809 */ 810 static size_t tl_hash_size = TL_HASH_SIZE; 811 812 /* 813 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 814 * for sockets. 815 */ 816 static int tl_disable_early_connect = 0; 817 static int tl_client_closing_when_accepting; 818 819 static int tl_serializer_noswitch; 820 821 /* 822 * LOCAL FUNCTION PROTOTYPES 823 * ------------------------- 824 */ 825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 826 static void tl_do_proto(mblk_t *, tl_endpt_t *); 827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 830 t_scalar_t); 831 static void tl_bind(mblk_t *, tl_endpt_t *); 832 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 833 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 834 static void tl_unbind(mblk_t *, tl_endpt_t *); 835 static void tl_optmgmt(queue_t *, mblk_t *); 836 static void tl_conn_req(queue_t *, mblk_t *); 837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 838 static void tl_conn_res(mblk_t *, tl_endpt_t *); 839 static void tl_discon_req(mblk_t *, tl_endpt_t *); 840 static void tl_capability_req(mblk_t *, tl_endpt_t *); 841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 842 static void tl_info_req(mblk_t *, tl_endpt_t *); 843 static void tl_addr_req(mblk_t *, tl_endpt_t *); 844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 845 static void tl_data(mblk_t *, tl_endpt_t *); 846 static void tl_exdata(mblk_t *, tl_endpt_t *); 847 static void tl_ordrel(mblk_t *, tl_endpt_t *); 848 static void tl_unitdata(mblk_t *, tl_endpt_t *); 849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 854 static void tl_cl_backenable(tl_endpt_t *); 855 static void tl_co_unconnect(tl_endpt_t *); 856 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 857 static void tl_discon_ind(tl_endpt_t *, uint32_t); 858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 859 static mblk_t *tl_ordrel_ind_alloc(void); 860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 864 static void tl_icon_freemsgs(mblk_t **); 865 static void tl_merror(queue_t *, mblk_t *, int); 866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 867 static int tl_default_opt(queue_t *, int, int, uchar_t *); 868 static int tl_get_opt(queue_t *, int, int, uchar_t *); 869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 870 uchar_t *, void *, cred_t *); 871 static void tl_memrecover(queue_t *, mblk_t *, size_t); 872 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 873 static void tl_free(tl_endpt_t *); 874 static int tl_constructor(void *, void *, int); 875 static void tl_destructor(void *, void *); 876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 877 static tl_serializer_t *tl_serializer_alloc(int); 878 static void tl_serializer_refhold(tl_serializer_t *); 879 static void tl_serializer_refrele(tl_serializer_t *); 880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 881 static void tl_serializer_exit(tl_endpt_t *); 882 static boolean_t tl_noclose(tl_endpt_t *); 883 static void tl_closeok(tl_endpt_t *); 884 static void tl_refhold(tl_endpt_t *); 885 static void tl_refrele(tl_endpt_t *); 886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 888 static void tl_close_ser(mblk_t *, tl_endpt_t *); 889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 891 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 892 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 894 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 897 static void tl_addr_unbind(tl_endpt_t *); 898 899 /* 900 * Intialize option database object for TL 901 */ 902 903 optdb_obj_t tl_opt_obj = { 904 tl_default_opt, /* TL default value function pointer */ 905 tl_get_opt, /* TL get function pointer */ 906 tl_set_opt, /* TL set function pointer */ 907 TL_OPT_ARR_CNT, /* TL option database count of entries */ 908 tl_opt_arr, /* TL option database */ 909 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 910 tl_valid_levels_arr /* TL valid level array */ 911 }; 912 913 /* 914 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 915 * --------------------------------------- 916 */ 917 918 /* 919 * Loadable module routines 920 */ 921 int 922 _init(void) 923 { 924 return (mod_install(&modlinkage)); 925 } 926 927 int 928 _fini(void) 929 { 930 return (mod_remove(&modlinkage)); 931 } 932 933 int 934 _info(struct modinfo *modinfop) 935 { 936 return (mod_info(&modlinkage, modinfop)); 937 } 938 939 /* 940 * Driver Entry Points and Other routines 941 */ 942 static int 943 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 944 { 945 int i; 946 char name[32]; 947 948 /* 949 * Resume from a checkpoint state. 950 */ 951 if (cmd == DDI_RESUME) 952 return (DDI_SUCCESS); 953 954 if (cmd != DDI_ATTACH) 955 return (DDI_FAILURE); 956 957 /* 958 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 959 * streams message sizes can be unlimited. We use a defined constant 960 * instead. 961 */ 962 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 963 964 /* 965 * Create subdevices for each transport. 966 */ 967 for (i = 0; i < TL_UNUSED; i++) { 968 if (ddi_create_minor_node(devi, 969 tl_transports[i].tr_name, 970 S_IFCHR, tl_transports[i].tr_minor, 971 DDI_PSEUDO, NULL) == DDI_FAILURE) { 972 ddi_remove_minor_node(devi, NULL); 973 return (DDI_FAILURE); 974 } 975 } 976 977 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 978 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 979 980 if (tl_cache == NULL) { 981 ddi_remove_minor_node(devi, NULL); 982 return (DDI_FAILURE); 983 } 984 985 tl_minors = id_space_create("tl_minor_space", 986 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 987 988 /* 989 * Create ID space for minor numbers 990 */ 991 for (i = 0; i < TL_MAXTRANSPORT; i++) { 992 tl_transport_state_t *t = &tl_transports[i]; 993 994 if (i == TL_UNUSED) 995 continue; 996 997 /* Socket COTSORD shares namespace with COTS */ 998 if (i == TL_SOCK_COTSORD) { 999 t->tr_ai_hash = 1000 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1001 ASSERT(t->tr_ai_hash != NULL); 1002 t->tr_addr_hash = 1003 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1004 ASSERT(t->tr_addr_hash != NULL); 1005 continue; 1006 } 1007 1008 /* 1009 * Create hash tables. 1010 */ 1011 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1012 t->tr_name); 1013 #ifdef _ILP32 1014 if (i & TL_SOCKET) 1015 t->tr_ai_hash = 1016 mod_hash_create_idhash(name, tl_hash_size - 1, 1017 mod_hash_null_valdtor); 1018 else 1019 t->tr_ai_hash = 1020 mod_hash_create_ptrhash(name, tl_hash_size, 1021 mod_hash_null_valdtor, sizeof (queue_t)); 1022 #else 1023 t->tr_ai_hash = 1024 mod_hash_create_idhash(name, tl_hash_size - 1, 1025 mod_hash_null_valdtor); 1026 #endif /* _ILP32 */ 1027 1028 if (i & TL_SOCKET) { 1029 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1030 t->tr_name); 1031 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1032 tl_hash_size, mod_hash_null_valdtor, 1033 sizeof (uintptr_t)); 1034 } else { 1035 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1036 t->tr_name); 1037 t->tr_addr_hash = mod_hash_create_extended(name, 1038 tl_hash_size, mod_hash_null_keydtor, 1039 mod_hash_null_valdtor, 1040 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1041 } 1042 1043 /* Create serializer for connectionless transports. */ 1044 if (i & TL_TICLTS) 1045 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1046 } 1047 1048 tl_dip = devi; 1049 1050 return (DDI_SUCCESS); 1051 } 1052 1053 static int 1054 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1055 { 1056 int i; 1057 1058 if (cmd == DDI_SUSPEND) 1059 return (DDI_SUCCESS); 1060 1061 if (cmd != DDI_DETACH) 1062 return (DDI_FAILURE); 1063 1064 /* 1065 * Destroy arenas and hash tables. 1066 */ 1067 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1068 tl_transport_state_t *t = &tl_transports[i]; 1069 1070 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1071 continue; 1072 1073 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL); 1074 if (t->tr_serializer != NULL) { 1075 tl_serializer_refrele(t->tr_serializer); 1076 t->tr_serializer = NULL; 1077 } 1078 1079 #ifdef _ILP32 1080 if (i & TL_SOCKET) 1081 mod_hash_destroy_idhash(t->tr_ai_hash); 1082 else 1083 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1084 #else 1085 mod_hash_destroy_idhash(t->tr_ai_hash); 1086 #endif /* _ILP32 */ 1087 t->tr_ai_hash = NULL; 1088 if (i & TL_SOCKET) 1089 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1090 else 1091 mod_hash_destroy_hash(t->tr_addr_hash); 1092 t->tr_addr_hash = NULL; 1093 } 1094 1095 kmem_cache_destroy(tl_cache); 1096 tl_cache = NULL; 1097 id_space_destroy(tl_minors); 1098 tl_minors = NULL; 1099 ddi_remove_minor_node(devi, NULL); 1100 return (DDI_SUCCESS); 1101 } 1102 1103 /* ARGSUSED */ 1104 static int 1105 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1106 { 1107 1108 int retcode = DDI_FAILURE; 1109 1110 switch (infocmd) { 1111 1112 case DDI_INFO_DEVT2DEVINFO: 1113 if (tl_dip != NULL) { 1114 *result = (void *)tl_dip; 1115 retcode = DDI_SUCCESS; 1116 } 1117 break; 1118 1119 case DDI_INFO_DEVT2INSTANCE: 1120 *result = (void *)0; 1121 retcode = DDI_SUCCESS; 1122 break; 1123 1124 default: 1125 break; 1126 } 1127 return (retcode); 1128 } 1129 1130 /* 1131 * Endpoint reference management. 1132 */ 1133 static void 1134 tl_refhold(tl_endpt_t *tep) 1135 { 1136 atomic_add_32(&tep->te_refcnt, 1); 1137 } 1138 1139 static void 1140 tl_refrele(tl_endpt_t *tep) 1141 { 1142 ASSERT(tep->te_refcnt != 0); 1143 1144 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1145 tl_free(tep); 1146 } 1147 1148 /*ARGSUSED*/ 1149 static int 1150 tl_constructor(void *buf, void *cdrarg, int kmflags) 1151 { 1152 tl_endpt_t *tep = buf; 1153 1154 bzero(tep, sizeof (tl_endpt_t)); 1155 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1156 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1157 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1158 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1159 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1160 1161 return (0); 1162 } 1163 1164 /*ARGSUSED*/ 1165 static void 1166 tl_destructor(void *buf, void *cdrarg) 1167 { 1168 tl_endpt_t *tep = buf; 1169 1170 mutex_destroy(&tep->te_closelock); 1171 cv_destroy(&tep->te_closecv); 1172 mutex_destroy(&tep->te_srv_lock); 1173 cv_destroy(&tep->te_srv_cv); 1174 mutex_destroy(&tep->te_ser_lock); 1175 } 1176 1177 static void 1178 tl_free(tl_endpt_t *tep) 1179 { 1180 ASSERT(tep->te_refcnt == 0); 1181 ASSERT(tep->te_transport != NULL); 1182 ASSERT(tep->te_rq == NULL); 1183 ASSERT(tep->te_wq == NULL); 1184 ASSERT(tep->te_ser != NULL); 1185 ASSERT(tep->te_ser_count == 0); 1186 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1187 1188 if (IS_SOCKET(tep)) { 1189 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1190 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1191 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1192 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1193 } else if (tep->te_abuf != NULL) { 1194 kmem_free(tep->te_abuf, tep->te_alen); 1195 tep->te_alen = -1; /* uninitialized */ 1196 tep->te_abuf = NULL; 1197 } else { 1198 ASSERT(tep->te_alen == -1); 1199 } 1200 1201 id_free(tl_minors, tep->te_minor); 1202 ASSERT(tep->te_credp == NULL); 1203 1204 if (tep->te_hash_hndl != NULL) 1205 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1206 1207 if (IS_COTS(tep)) { 1208 TL_REMOVE_PEER(tep->te_conp); 1209 TL_REMOVE_PEER(tep->te_oconp); 1210 tl_serializer_refrele(tep->te_ser); 1211 tep->te_ser = NULL; 1212 ASSERT(tep->te_nicon == 0); 1213 ASSERT(list_head(&tep->te_iconp) == NULL); 1214 } else { 1215 ASSERT(tep->te_lastep == NULL); 1216 ASSERT(list_head(&tep->te_flowlist) == NULL); 1217 ASSERT(tep->te_flowq == NULL); 1218 } 1219 1220 ASSERT(tep->te_bufcid == 0); 1221 ASSERT(tep->te_timoutid == 0); 1222 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1223 tep->te_acceptor_id = 0; 1224 1225 ASSERT(tep->te_closewait == 0); 1226 ASSERT(!tep->te_rsrv_active); 1227 ASSERT(!tep->te_wsrv_active); 1228 tep->te_closing = 0; 1229 tep->te_nowsrv = B_FALSE; 1230 tep->te_flag = 0; 1231 1232 kmem_cache_free(tl_cache, tep); 1233 } 1234 1235 /* 1236 * Allocate/free reference-counted wrappers for serializers. 1237 */ 1238 static tl_serializer_t * 1239 tl_serializer_alloc(int flags) 1240 { 1241 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1242 serializer_t *ser; 1243 1244 if (s == NULL) 1245 return (NULL); 1246 1247 ser = serializer_create(flags); 1248 1249 if (ser == NULL) { 1250 kmem_free(s, sizeof (tl_serializer_t)); 1251 return (NULL); 1252 } 1253 1254 s->ts_refcnt = 1; 1255 s->ts_serializer = ser; 1256 return (s); 1257 } 1258 1259 static void 1260 tl_serializer_refhold(tl_serializer_t *s) 1261 { 1262 atomic_add_32(&s->ts_refcnt, 1); 1263 } 1264 1265 static void 1266 tl_serializer_refrele(tl_serializer_t *s) 1267 { 1268 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1269 serializer_destroy(s->ts_serializer); 1270 kmem_free(s, sizeof (tl_serializer_t)); 1271 } 1272 } 1273 1274 /* 1275 * Post a request on the endpoint serializer. For COTS transports keep track of 1276 * the number of pending requests. 1277 */ 1278 static void 1279 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1280 { 1281 if (IS_COTS(tep)) { 1282 mutex_enter(&tep->te_ser_lock); 1283 tep->te_ser_count++; 1284 mutex_exit(&tep->te_ser_lock); 1285 } 1286 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1287 } 1288 1289 /* 1290 * Complete processing the request on the serializer. Decrement the counter for 1291 * pending requests for COTS transports. 1292 */ 1293 static void 1294 tl_serializer_exit(tl_endpt_t *tep) 1295 { 1296 if (IS_COTS(tep)) { 1297 mutex_enter(&tep->te_ser_lock); 1298 ASSERT(tep->te_ser_count != 0); 1299 tep->te_ser_count--; 1300 mutex_exit(&tep->te_ser_lock); 1301 } 1302 } 1303 1304 /* 1305 * Hash management functions. 1306 */ 1307 1308 /* 1309 * Return TRUE if two addresses are equal, false otherwise. 1310 */ 1311 static boolean_t 1312 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1313 { 1314 return ((ap1->ta_alen > 0) && 1315 (ap1->ta_alen == ap2->ta_alen) && 1316 (ap1->ta_zoneid == ap2->ta_zoneid) && 1317 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1318 } 1319 1320 /* 1321 * This function is called whenever an endpoint is found in the hash table. 1322 */ 1323 /* ARGSUSED0 */ 1324 static void 1325 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1326 { 1327 tl_refhold((tl_endpt_t *)val); 1328 } 1329 1330 /* 1331 * Address hash function. 1332 */ 1333 /* ARGSUSED */ 1334 static uint_t 1335 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1336 { 1337 tl_addr_t *ap = (tl_addr_t *)key; 1338 size_t len = ap->ta_alen; 1339 uchar_t *p = ap->ta_abuf; 1340 uint_t i, g; 1341 1342 ASSERT((len > 0) && (p != NULL)); 1343 1344 for (i = ap->ta_zoneid; len -- != 0; p++) { 1345 i = (i << 4) + (*p); 1346 if ((g = (i & 0xf0000000U)) != 0) { 1347 i ^= (g >> 24); 1348 i ^= g; 1349 } 1350 } 1351 return (i); 1352 } 1353 1354 /* 1355 * This function is used by hash lookups. It compares two generic addresses. 1356 */ 1357 static int 1358 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1359 { 1360 #ifdef DEBUG 1361 tl_addr_t *ap1 = (tl_addr_t *)key1; 1362 tl_addr_t *ap2 = (tl_addr_t *)key2; 1363 1364 ASSERT(key1 != NULL); 1365 ASSERT(key2 != NULL); 1366 1367 ASSERT(ap1->ta_abuf != NULL); 1368 ASSERT(ap2->ta_abuf != NULL); 1369 ASSERT(ap1->ta_alen > 0); 1370 ASSERT(ap2->ta_alen > 0); 1371 #endif 1372 1373 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1374 } 1375 1376 /* 1377 * Prevent endpoint from closing if possible. 1378 * Return B_TRUE on success, B_FALSE on failure. 1379 */ 1380 static boolean_t 1381 tl_noclose(tl_endpt_t *tep) 1382 { 1383 boolean_t rc = B_FALSE; 1384 1385 mutex_enter(&tep->te_closelock); 1386 if (! tep->te_closing) { 1387 ASSERT(tep->te_closewait == 0); 1388 tep->te_closewait++; 1389 rc = B_TRUE; 1390 } 1391 mutex_exit(&tep->te_closelock); 1392 return (rc); 1393 } 1394 1395 /* 1396 * Allow endpoint to close if needed. 1397 */ 1398 static void 1399 tl_closeok(tl_endpt_t *tep) 1400 { 1401 ASSERT(tep->te_closewait > 0); 1402 mutex_enter(&tep->te_closelock); 1403 ASSERT(tep->te_closewait == 1); 1404 tep->te_closewait--; 1405 cv_signal(&tep->te_closecv); 1406 mutex_exit(&tep->te_closelock); 1407 } 1408 1409 /* 1410 * STREAMS open entry point. 1411 */ 1412 /* ARGSUSED */ 1413 static int 1414 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1415 { 1416 tl_endpt_t *tep; 1417 minor_t minor = getminor(*devp); 1418 1419 /* 1420 * Driver is called directly. Both CLONEOPEN and MODOPEN 1421 * are illegal 1422 */ 1423 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1424 return (ENXIO); 1425 1426 if (rq->q_ptr != NULL) 1427 return (0); 1428 1429 /* Minor number should specify the mode used for the driver. */ 1430 if ((minor >= TL_UNUSED)) 1431 return (ENXIO); 1432 1433 if (oflag & SO_SOCKSTR) { 1434 minor |= TL_SOCKET; 1435 } 1436 1437 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1438 tep->te_refcnt = 1; 1439 tep->te_cpid = curproc->p_pid; 1440 rq->q_ptr = WR(rq)->q_ptr = tep; 1441 tep->te_state = TS_UNBND; 1442 tep->te_credp = credp; 1443 crhold(credp); 1444 tep->te_zoneid = getzoneid(); 1445 1446 tep->te_flag = minor & TL_MINOR_MASK; 1447 tep->te_transport = &tl_transports[minor]; 1448 1449 /* Allocate a unique minor number for this instance. */ 1450 tep->te_minor = (minor_t)id_alloc(tl_minors); 1451 1452 /* Reserve hash handle for bind(). */ 1453 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1454 1455 /* Transport-specific initialization */ 1456 if (IS_COTS(tep)) { 1457 /* Use private serializer */ 1458 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1459 1460 /* Create list for pending connections */ 1461 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1462 offsetof(tl_icon_t, ti_node)); 1463 tep->te_qlen = 0; 1464 tep->te_nicon = 0; 1465 tep->te_oconp = NULL; 1466 tep->te_conp = NULL; 1467 } else { 1468 /* Use shared serializer */ 1469 tep->te_ser = tep->te_transport->tr_serializer; 1470 bzero(&tep->te_flows, sizeof (list_node_t)); 1471 /* Create list for flow control */ 1472 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1473 offsetof(tl_endpt_t, te_flows)); 1474 tep->te_flowq = NULL; 1475 tep->te_lastep = NULL; 1476 1477 } 1478 1479 /* Initialize endpoint address */ 1480 if (IS_SOCKET(tep)) { 1481 /* Socket-specific address handling. */ 1482 tep->te_alen = TL_SOUX_ADDRLEN; 1483 tep->te_abuf = &tep->te_uxaddr; 1484 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1485 tep->te_magic = SOU_MAGIC_IMPLICIT; 1486 } else { 1487 tep->te_alen = -1; 1488 tep->te_abuf = NULL; 1489 } 1490 1491 /* clone the driver */ 1492 *devp = makedevice(getmajor(*devp), tep->te_minor); 1493 1494 tep->te_rq = rq; 1495 tep->te_wq = WR(rq); 1496 1497 #ifdef _ILP32 1498 if (IS_SOCKET(tep)) 1499 tep->te_acceptor_id = tep->te_minor; 1500 else 1501 tep->te_acceptor_id = (t_uscalar_t)rq; 1502 #else 1503 tep->te_acceptor_id = tep->te_minor; 1504 #endif /* _ILP32 */ 1505 1506 1507 qprocson(rq); 1508 1509 /* 1510 * Insert acceptor ID in the hash. The AI hash always sleeps on 1511 * insertion so insertion can't fail. 1512 */ 1513 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1514 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1515 (mod_hash_val_t)tep); 1516 1517 return (0); 1518 } 1519 1520 /* ARGSUSED1 */ 1521 static int 1522 tl_close(queue_t *rq, int flag, cred_t *credp) 1523 { 1524 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1525 tl_endpt_t *elp = NULL; 1526 queue_t *wq = tep->te_wq; 1527 int rc; 1528 1529 ASSERT(wq == WR(rq)); 1530 1531 /* 1532 * Remove the endpoint from acceptor hash. 1533 */ 1534 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1535 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1536 (mod_hash_val_t *)&elp); 1537 ASSERT(rc == 0 && tep == elp); 1538 if ((rc != 0) || (tep != elp)) { 1539 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1540 SL_TRACE|SL_ERROR, 1541 "tl_close:inconsistency in AI hash")); 1542 } 1543 1544 /* 1545 * Wait till close is safe, then mark endpoint as closing. 1546 */ 1547 mutex_enter(&tep->te_closelock); 1548 while (tep->te_closewait) 1549 cv_wait(&tep->te_closecv, &tep->te_closelock); 1550 tep->te_closing = B_TRUE; 1551 /* 1552 * Will wait for the serializer part of the close to finish, so set 1553 * te_closewait now. 1554 */ 1555 tep->te_closewait = 1; 1556 tep->te_nowsrv = B_FALSE; 1557 mutex_exit(&tep->te_closelock); 1558 1559 /* 1560 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1561 * It is safe because close will wait for tl_close_ser to finish. 1562 */ 1563 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1564 1565 /* 1566 * Wait for the first phase of close to complete before qprocsoff(). 1567 */ 1568 mutex_enter(&tep->te_closelock); 1569 while (tep->te_closewait) 1570 cv_wait(&tep->te_closecv, &tep->te_closelock); 1571 mutex_exit(&tep->te_closelock); 1572 1573 qprocsoff(rq); 1574 1575 if (tep->te_bufcid) { 1576 qunbufcall(rq, tep->te_bufcid); 1577 tep->te_bufcid = 0; 1578 } 1579 if (tep->te_timoutid) { 1580 (void) quntimeout(rq, tep->te_timoutid); 1581 tep->te_timoutid = 0; 1582 } 1583 1584 /* 1585 * Finish close behind serializer. 1586 * 1587 * For a CLTS endpoint increase a refcount and continue close processing 1588 * with serializer protection. This processing may happen asynchronously 1589 * with the completion of tl_close(). 1590 * 1591 * Fot a COTS endpoint wait before destroying tep since the serializer 1592 * may go away together with tep and we need to destroy serializer 1593 * outside of serializer context. 1594 */ 1595 ASSERT(tep->te_closewait == 0); 1596 if (IS_COTS(tep)) 1597 tep->te_closewait = 1; 1598 else 1599 tl_refhold(tep); 1600 1601 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1602 1603 /* 1604 * For connection-oriented transports wait for all serializer activity 1605 * to settle down. 1606 */ 1607 if (IS_COTS(tep)) { 1608 mutex_enter(&tep->te_closelock); 1609 while (tep->te_closewait) 1610 cv_wait(&tep->te_closecv, &tep->te_closelock); 1611 mutex_exit(&tep->te_closelock); 1612 } 1613 1614 crfree(tep->te_credp); 1615 tep->te_credp = NULL; 1616 tep->te_wq = NULL; 1617 tl_refrele(tep); 1618 /* 1619 * tep is likely to be destroyed now, so can't reference it any more. 1620 */ 1621 1622 rq->q_ptr = wq->q_ptr = NULL; 1623 return (0); 1624 } 1625 1626 /* 1627 * First phase of close processing done behind the serializer. 1628 * 1629 * Do not drop the reference in the end - tl_close() wants this reference to 1630 * stay. 1631 */ 1632 /* ARGSUSED0 */ 1633 static void 1634 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1635 { 1636 ASSERT(tep->te_closing); 1637 ASSERT(tep->te_closewait == 1); 1638 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1639 1640 tep->te_flag |= TL_CLOSE_SER; 1641 1642 /* 1643 * Drain out all messages on queue except for TL_TICOTS where the 1644 * abortive release semantics permit discarding of data on close 1645 */ 1646 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1647 tl_wsrv_ser(NULL, tep); 1648 } 1649 1650 /* Remove address from hash table. */ 1651 tl_addr_unbind(tep); 1652 /* 1653 * qprocsoff() gets confused when q->q_next is not NULL on the write 1654 * queue of the driver, so clear these before qprocsoff() is called. 1655 * Also clear q_next for the peer since this queue is going away. 1656 */ 1657 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1658 tl_endpt_t *peer_tep = tep->te_conp; 1659 1660 tep->te_wq->q_next = NULL; 1661 if ((peer_tep != NULL) && !peer_tep->te_closing) 1662 peer_tep->te_wq->q_next = NULL; 1663 } 1664 1665 tep->te_rq = NULL; 1666 1667 /* wake up tl_close() */ 1668 tl_closeok(tep); 1669 tl_serializer_exit(tep); 1670 } 1671 1672 /* 1673 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1674 * the reference for CLTS. 1675 * 1676 * Called from serializer. Should drop reference count for CLTS only. 1677 */ 1678 /* ARGSUSED0 */ 1679 static void 1680 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1681 { 1682 ASSERT(tep->te_closing); 1683 IMPLY(IS_CLTS(tep), tep->te_closewait == 0); 1684 IMPLY(IS_COTS(tep), tep->te_closewait == 1); 1685 1686 tep->te_state = -1; /* Uninitialized */ 1687 if (IS_COTS(tep)) { 1688 tl_co_unconnect(tep); 1689 } else { 1690 /* Connectionless specific cleanup */ 1691 TL_REMOVE_PEER(tep->te_lastep); 1692 /* 1693 * Backenable anybody that is flow controlled waiting for 1694 * this endpoint. 1695 */ 1696 tl_cl_backenable(tep); 1697 if (tep->te_flowq != NULL) { 1698 list_remove(&(tep->te_flowq->te_flowlist), tep); 1699 tep->te_flowq = NULL; 1700 } 1701 } 1702 1703 tl_serializer_exit(tep); 1704 if (IS_COTS(tep)) 1705 tl_closeok(tep); 1706 else 1707 tl_refrele(tep); 1708 } 1709 1710 /* 1711 * STREAMS write-side put procedure. 1712 * Enter serializer for most of the processing. 1713 * 1714 * The T_CONN_REQ is processed outside of serializer. 1715 */ 1716 static void 1717 tl_wput(queue_t *wq, mblk_t *mp) 1718 { 1719 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1720 ssize_t msz = MBLKL(mp); 1721 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1722 tlproc_t *tl_proc = NULL; 1723 1724 switch (DB_TYPE(mp)) { 1725 case M_DATA: 1726 /* Only valid for connection-oriented transports */ 1727 if (IS_CLTS(tep)) { 1728 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1729 SL_TRACE|SL_ERROR, 1730 "tl_wput:M_DATA invalid for ticlts driver")); 1731 tl_merror(wq, mp, EPROTO); 1732 return; 1733 } 1734 tl_proc = tl_wput_data_ser; 1735 break; 1736 1737 case M_IOCTL: 1738 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1739 case TL_IOC_CREDOPT: 1740 /* FALLTHROUGH */ 1741 case TL_IOC_UCREDOPT: 1742 /* 1743 * Serialize endpoint state change. 1744 */ 1745 tl_proc = tl_do_ioctl_ser; 1746 break; 1747 1748 default: 1749 miocnak(wq, mp, 0, EINVAL); 1750 return; 1751 } 1752 break; 1753 1754 case M_FLUSH: 1755 /* 1756 * do canonical M_FLUSH processing 1757 */ 1758 if (*mp->b_rptr & FLUSHW) { 1759 flushq(wq, FLUSHALL); 1760 *mp->b_rptr &= ~FLUSHW; 1761 } 1762 if (*mp->b_rptr & FLUSHR) { 1763 flushq(RD(wq), FLUSHALL); 1764 qreply(wq, mp); 1765 } else { 1766 freemsg(mp); 1767 } 1768 return; 1769 1770 case M_PROTO: 1771 if (msz < sizeof (prim->type)) { 1772 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1773 SL_TRACE|SL_ERROR, 1774 "tl_wput:M_PROTO data too short")); 1775 tl_merror(wq, mp, EPROTO); 1776 return; 1777 } 1778 switch (prim->type) { 1779 case T_OPTMGMT_REQ: 1780 case T_SVR4_OPTMGMT_REQ: 1781 /* 1782 * Process TPI option management requests immediately 1783 * in put procedure regardless of in-order processing 1784 * of already queued messages. 1785 * (Note: This driver supports AF_UNIX socket 1786 * implementation. Unless we implement this processing, 1787 * setsockopt() on socket endpoint will block on flow 1788 * controlled endpoints which it should not. That is 1789 * required for successful execution of VSU socket tests 1790 * and is consistent with BSD socket behavior). 1791 */ 1792 tl_optmgmt(wq, mp); 1793 return; 1794 case O_T_BIND_REQ: 1795 case T_BIND_REQ: 1796 tl_proc = tl_bind_ser; 1797 break; 1798 case T_CONN_REQ: 1799 if (IS_CLTS(tep)) { 1800 tl_merror(wq, mp, EPROTO); 1801 return; 1802 } 1803 tl_conn_req(wq, mp); 1804 return; 1805 case T_DATA_REQ: 1806 case T_OPTDATA_REQ: 1807 case T_EXDATA_REQ: 1808 case T_ORDREL_REQ: 1809 tl_proc = tl_putq_ser; 1810 break; 1811 case T_UNITDATA_REQ: 1812 if (IS_COTS(tep) || 1813 (msz < sizeof (struct T_unitdata_req))) { 1814 tl_merror(wq, mp, EPROTO); 1815 return; 1816 } 1817 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1818 tl_proc = tl_unitdata_ser; 1819 } else { 1820 tl_proc = tl_putq_ser; 1821 } 1822 break; 1823 default: 1824 /* 1825 * process in service procedure if message already 1826 * queued (maintain in-order processing) 1827 */ 1828 if (wq->q_first != NULL) { 1829 tl_proc = tl_putq_ser; 1830 } else { 1831 tl_proc = tl_wput_ser; 1832 } 1833 break; 1834 } 1835 break; 1836 1837 case M_PCPROTO: 1838 /* 1839 * Check that the message has enough data to figure out TPI 1840 * primitive. 1841 */ 1842 if (msz < sizeof (prim->type)) { 1843 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1844 SL_TRACE|SL_ERROR, 1845 "tl_wput:M_PCROTO data too short")); 1846 tl_merror(wq, mp, EPROTO); 1847 return; 1848 } 1849 switch (prim->type) { 1850 case T_CAPABILITY_REQ: 1851 tl_capability_req(mp, tep); 1852 return; 1853 case T_INFO_REQ: 1854 tl_proc = tl_info_req_ser; 1855 break; 1856 default: 1857 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1858 SL_TRACE|SL_ERROR, 1859 "tl_wput:unknown TPI msg primitive")); 1860 tl_merror(wq, mp, EPROTO); 1861 return; 1862 } 1863 break; 1864 default: 1865 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1866 "tl_wput:default:unexpected Streams message")); 1867 freemsg(mp); 1868 return; 1869 } 1870 1871 /* 1872 * Continue processing via serializer. 1873 */ 1874 ASSERT(tl_proc != NULL); 1875 tl_refhold(tep); 1876 tl_serializer_enter(tep, tl_proc, mp); 1877 } 1878 1879 /* 1880 * Place message on the queue while preserving order. 1881 */ 1882 static void 1883 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1884 { 1885 if (tep->te_closing) { 1886 tl_wput_ser(mp, tep); 1887 } else { 1888 TL_PUTQ(tep, mp); 1889 tl_serializer_exit(tep); 1890 tl_refrele(tep); 1891 } 1892 1893 } 1894 1895 static void 1896 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1897 { 1898 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1899 1900 switch (DB_TYPE(mp)) { 1901 case M_DATA: 1902 tl_data(mp, tep); 1903 break; 1904 case M_PROTO: 1905 tl_do_proto(mp, tep); 1906 break; 1907 default: 1908 freemsg(mp); 1909 break; 1910 } 1911 } 1912 1913 /* 1914 * Write side put procedure called from serializer. 1915 */ 1916 static void 1917 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1918 { 1919 tl_wput_common_ser(mp, tep); 1920 tl_serializer_exit(tep); 1921 tl_refrele(tep); 1922 } 1923 1924 /* 1925 * M_DATA processing. Called from serializer. 1926 */ 1927 static void 1928 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1929 { 1930 tl_endpt_t *peer_tep = tep->te_conp; 1931 queue_t *peer_rq; 1932 1933 ASSERT(DB_TYPE(mp) == M_DATA); 1934 ASSERT(IS_COTS(tep)); 1935 1936 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer); 1937 1938 /* 1939 * fastpath for data. Ignore flow control if tep is closing. 1940 */ 1941 if ((peer_tep != NULL) && 1942 !peer_tep->te_closing && 1943 ((tep->te_state == TS_DATA_XFER) || 1944 (tep->te_state == TS_WREQ_ORDREL)) && 1945 (tep->te_wq != NULL) && 1946 (tep->te_wq->q_first == NULL) && 1947 ((peer_tep->te_state == TS_DATA_XFER) || 1948 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1949 ((peer_rq = peer_tep->te_rq) != NULL) && 1950 (canputnext(peer_rq) || tep->te_closing)) { 1951 putnext(peer_rq, mp); 1952 } else if (tep->te_closing) { 1953 /* 1954 * It is possible that by the time we got here tep started to 1955 * close. If the write queue is not empty, and the state is 1956 * TS_DATA_XFER the data should be delivered in order, so we 1957 * call putq() instead of freeing the data. 1958 */ 1959 if ((tep->te_wq != NULL) && 1960 ((tep->te_state == TS_DATA_XFER) || 1961 (tep->te_state == TS_WREQ_ORDREL))) { 1962 TL_PUTQ(tep, mp); 1963 } else { 1964 freemsg(mp); 1965 } 1966 } else { 1967 TL_PUTQ(tep, mp); 1968 } 1969 1970 tl_serializer_exit(tep); 1971 tl_refrele(tep); 1972 } 1973 1974 /* 1975 * Write side service routine. 1976 * 1977 * All actual processing happens within serializer which is entered 1978 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1979 * messages that need processing may have arrived, so tl_wsrv repeats until 1980 * queue is empty or te_nowsrv is set. 1981 */ 1982 static void 1983 tl_wsrv(queue_t *wq) 1984 { 1985 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1986 1987 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 1988 mutex_enter(&tep->te_srv_lock); 1989 ASSERT(tep->te_wsrv_active == B_FALSE); 1990 tep->te_wsrv_active = B_TRUE; 1991 mutex_exit(&tep->te_srv_lock); 1992 1993 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 1994 1995 /* 1996 * Wait for serializer job to complete. 1997 */ 1998 mutex_enter(&tep->te_srv_lock); 1999 while (tep->te_wsrv_active) { 2000 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2001 } 2002 cv_signal(&tep->te_srv_cv); 2003 mutex_exit(&tep->te_srv_lock); 2004 } 2005 } 2006 2007 /* 2008 * Serialized write side processing of the STREAMS queue. 2009 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2010 * is NULL. 2011 */ 2012 static void 2013 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2014 { 2015 mblk_t *mp; 2016 queue_t *wq = tep->te_wq; 2017 2018 ASSERT(wq != NULL); 2019 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2020 tl_wput_common_ser(mp, tep); 2021 } 2022 2023 /* 2024 * Wakeup service routine unless called from close. 2025 * If ser_mp is specified, the caller is tl_wsrv(). 2026 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2027 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2028 * be no matching tl_serializer_exit() in this case. 2029 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2030 * waiting on te_srv_cv. 2031 */ 2032 if (ser_mp != NULL) { 2033 /* 2034 * We are called from tl_wsrv. 2035 */ 2036 mutex_enter(&tep->te_srv_lock); 2037 ASSERT(tep->te_wsrv_active); 2038 tep->te_wsrv_active = B_FALSE; 2039 cv_signal(&tep->te_srv_cv); 2040 mutex_exit(&tep->te_srv_lock); 2041 tl_serializer_exit(tep); 2042 } 2043 } 2044 2045 /* 2046 * Called when the stream is backenabled. Enter serializer and qenable everyone 2047 * flow controlled by tep. 2048 * 2049 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2050 * is possible that two instances of tl_rsrv will be running reusing the same 2051 * rsrv mblk. 2052 */ 2053 static void 2054 tl_rsrv(queue_t *rq) 2055 { 2056 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2057 2058 ASSERT(rq->q_first == NULL); 2059 ASSERT(tep->te_rsrv_active == 0); 2060 2061 tep->te_rsrv_active = B_TRUE; 2062 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2063 /* 2064 * Wait for serializer job to complete. 2065 */ 2066 mutex_enter(&tep->te_srv_lock); 2067 while (tep->te_rsrv_active) { 2068 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2069 } 2070 cv_signal(&tep->te_srv_cv); 2071 mutex_exit(&tep->te_srv_lock); 2072 } 2073 2074 /* ARGSUSED */ 2075 static void 2076 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2077 { 2078 tl_endpt_t *peer_tep; 2079 2080 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2081 tl_cl_backenable(tep); 2082 } else if ( 2083 IS_COTS(tep) && 2084 ((peer_tep = tep->te_conp) != NULL) && 2085 !peer_tep->te_closing && 2086 ((tep->te_state == TS_DATA_XFER) || 2087 (tep->te_state == TS_WIND_ORDREL)|| 2088 (tep->te_state == TS_WREQ_ORDREL))) { 2089 TL_QENABLE(peer_tep); 2090 } 2091 2092 /* 2093 * Wakeup read side service routine. 2094 */ 2095 mutex_enter(&tep->te_srv_lock); 2096 ASSERT(tep->te_rsrv_active); 2097 tep->te_rsrv_active = B_FALSE; 2098 cv_signal(&tep->te_srv_cv); 2099 mutex_exit(&tep->te_srv_lock); 2100 tl_serializer_exit(tep); 2101 } 2102 2103 /* 2104 * process M_PROTO messages. Always called from serializer. 2105 */ 2106 static void 2107 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2108 { 2109 ssize_t msz = MBLKL(mp); 2110 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2111 2112 /* Message size was validated by tl_wput(). */ 2113 ASSERT(msz >= sizeof (prim->type)); 2114 2115 switch (prim->type) { 2116 case T_UNBIND_REQ: 2117 tl_unbind(mp, tep); 2118 break; 2119 2120 case T_ADDR_REQ: 2121 tl_addr_req(mp, tep); 2122 break; 2123 2124 case O_T_CONN_RES: 2125 case T_CONN_RES: 2126 if (IS_CLTS(tep)) { 2127 tl_merror(tep->te_wq, mp, EPROTO); 2128 break; 2129 } 2130 tl_conn_res(mp, tep); 2131 break; 2132 2133 case T_DISCON_REQ: 2134 if (IS_CLTS(tep)) { 2135 tl_merror(tep->te_wq, mp, EPROTO); 2136 break; 2137 } 2138 tl_discon_req(mp, tep); 2139 break; 2140 2141 case T_DATA_REQ: 2142 if (IS_CLTS(tep)) { 2143 tl_merror(tep->te_wq, mp, EPROTO); 2144 break; 2145 } 2146 tl_data(mp, tep); 2147 break; 2148 2149 case T_OPTDATA_REQ: 2150 if (IS_CLTS(tep)) { 2151 tl_merror(tep->te_wq, mp, EPROTO); 2152 break; 2153 } 2154 tl_data(mp, tep); 2155 break; 2156 2157 case T_EXDATA_REQ: 2158 if (IS_CLTS(tep)) { 2159 tl_merror(tep->te_wq, mp, EPROTO); 2160 break; 2161 } 2162 tl_exdata(mp, tep); 2163 break; 2164 2165 case T_ORDREL_REQ: 2166 if (! IS_COTSORD(tep)) { 2167 tl_merror(tep->te_wq, mp, EPROTO); 2168 break; 2169 } 2170 tl_ordrel(mp, tep); 2171 break; 2172 2173 case T_UNITDATA_REQ: 2174 if (IS_COTS(tep)) { 2175 tl_merror(tep->te_wq, mp, EPROTO); 2176 break; 2177 } 2178 tl_unitdata(mp, tep); 2179 break; 2180 2181 default: 2182 tl_merror(tep->te_wq, mp, EPROTO); 2183 break; 2184 } 2185 } 2186 2187 /* 2188 * Process ioctl from serializer. 2189 * This is a wrapper around tl_do_ioctl(). 2190 */ 2191 static void 2192 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2193 { 2194 if (! tep->te_closing) 2195 tl_do_ioctl(mp, tep); 2196 else 2197 freemsg(mp); 2198 2199 tl_serializer_exit(tep); 2200 tl_refrele(tep); 2201 } 2202 2203 static void 2204 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2205 { 2206 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2207 int cmd = iocbp->ioc_cmd; 2208 queue_t *wq = tep->te_wq; 2209 int error; 2210 int thisopt, otheropt; 2211 2212 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2213 2214 switch (cmd) { 2215 case TL_IOC_CREDOPT: 2216 if (cmd == TL_IOC_CREDOPT) { 2217 thisopt = TL_SETCRED; 2218 otheropt = TL_SETUCRED; 2219 } else { 2220 /* FALLTHROUGH */ 2221 case TL_IOC_UCREDOPT: 2222 thisopt = TL_SETUCRED; 2223 otheropt = TL_SETCRED; 2224 } 2225 /* 2226 * The credentials passing does not apply to sockets. 2227 * Only one of the cred options can be set at a given time. 2228 */ 2229 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2230 miocnak(wq, mp, 0, EINVAL); 2231 return; 2232 } 2233 2234 /* 2235 * Turn on generation of credential options for 2236 * T_conn_req, T_conn_con, T_unidata_ind. 2237 */ 2238 error = miocpullup(mp, sizeof (uint32_t)); 2239 if (error != 0) { 2240 miocnak(wq, mp, 0, error); 2241 return; 2242 } 2243 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2244 miocnak(wq, mp, 0, EINVAL); 2245 return; 2246 } 2247 2248 if (*(uint32_t *)mp->b_cont->b_rptr) 2249 tep->te_flag |= thisopt; 2250 else 2251 tep->te_flag &= ~thisopt; 2252 2253 miocack(wq, mp, 0, 0); 2254 break; 2255 2256 default: 2257 /* Should not be here */ 2258 miocnak(wq, mp, 0, EINVAL); 2259 break; 2260 } 2261 } 2262 2263 2264 /* 2265 * send T_ERROR_ACK 2266 * Note: assumes enough memory or caller passed big enough mp 2267 * - no recovery from allocb failures 2268 */ 2269 2270 static void 2271 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2272 t_scalar_t unix_err, t_scalar_t type) 2273 { 2274 struct T_error_ack *err_ack; 2275 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2276 M_PCPROTO, T_ERROR_ACK); 2277 2278 if (ackmp == NULL) { 2279 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2280 "tl_error_ack:out of mblk memory")); 2281 tl_merror(wq, NULL, ENOSR); 2282 return; 2283 } 2284 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2285 err_ack->ERROR_prim = type; 2286 err_ack->TLI_error = tli_err; 2287 err_ack->UNIX_error = unix_err; 2288 2289 /* 2290 * send error ack message 2291 */ 2292 qreply(wq, ackmp); 2293 } 2294 2295 2296 2297 /* 2298 * send T_OK_ACK 2299 * Note: assumes enough memory or caller passed big enough mp 2300 * - no recovery from allocb failures 2301 */ 2302 static void 2303 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2304 { 2305 struct T_ok_ack *ok_ack; 2306 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2307 M_PCPROTO, T_OK_ACK); 2308 2309 if (ackmp == NULL) { 2310 tl_merror(wq, NULL, ENOMEM); 2311 return; 2312 } 2313 2314 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2315 ok_ack->CORRECT_prim = type; 2316 2317 (void) qreply(wq, ackmp); 2318 } 2319 2320 /* 2321 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2322 * This is a wrapper around tl_bind(). 2323 */ 2324 static void 2325 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2326 { 2327 if (! tep->te_closing) 2328 tl_bind(mp, tep); 2329 else 2330 freemsg(mp); 2331 2332 tl_serializer_exit(tep); 2333 tl_refrele(tep); 2334 } 2335 2336 /* 2337 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2338 * Assumes that the endpoint is in the unbound. 2339 */ 2340 static void 2341 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2342 { 2343 queue_t *wq = tep->te_wq; 2344 struct T_bind_ack *b_ack; 2345 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2346 mblk_t *ackmp, *bamp; 2347 soux_addr_t ux_addr; 2348 t_uscalar_t qlen = 0; 2349 t_scalar_t alen, aoff; 2350 tl_addr_t addr_req; 2351 void *addr_startp; 2352 ssize_t msz = MBLKL(mp), basize; 2353 t_scalar_t tli_err = 0, unix_err = 0; 2354 t_scalar_t save_prim_type = bind->PRIM_type; 2355 t_scalar_t save_state = tep->te_state; 2356 2357 if (tep->te_state != TS_UNBND) { 2358 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2359 SL_TRACE|SL_ERROR, 2360 "tl_wput:bind_request:out of state, state=%d", 2361 tep->te_state)); 2362 tli_err = TOUTSTATE; 2363 goto error; 2364 } 2365 2366 if (msz < sizeof (struct T_bind_req)) { 2367 tli_err = TSYSERR; unix_err = EINVAL; 2368 goto error; 2369 } 2370 2371 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2372 2373 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2374 (bind->PRIM_type == T_BIND_REQ)); 2375 2376 alen = bind->ADDR_length; 2377 aoff = bind->ADDR_offset; 2378 2379 /* negotiate max conn req pending */ 2380 if (IS_COTS(tep)) { 2381 qlen = bind->CONIND_number; 2382 if (qlen > tl_maxqlen) 2383 qlen = tl_maxqlen; 2384 } 2385 2386 /* 2387 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2388 * and bound again. 2389 */ 2390 if ((tep->te_hash_hndl == NULL) && 2391 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2392 mod_hash_reserve_nosleep(tep->te_addrhash, 2393 &tep->te_hash_hndl) != 0) { 2394 tli_err = TSYSERR; unix_err = ENOSR; 2395 goto error; 2396 } 2397 2398 /* 2399 * Verify address correctness. 2400 */ 2401 if (IS_SOCKET(tep)) { 2402 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2403 2404 if ((alen != TL_SOUX_ADDRLEN) || 2405 (aoff < 0) || 2406 (aoff + alen > msz)) { 2407 (void) (STRLOG(TL_ID, tep->te_minor, 2408 1, SL_TRACE|SL_ERROR, 2409 "tl_bind: invalid socket addr")); 2410 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2411 tli_err = TSYSERR; unix_err = EINVAL; 2412 goto error; 2413 } 2414 /* Copy address from message to local buffer. */ 2415 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2416 /* 2417 * Check that we got correct address from sockets 2418 */ 2419 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2420 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2421 (void) (STRLOG(TL_ID, tep->te_minor, 2422 1, SL_TRACE|SL_ERROR, 2423 "tl_bind: invalid socket magic")); 2424 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2425 tli_err = TSYSERR; unix_err = EINVAL; 2426 goto error; 2427 } 2428 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2429 (ux_addr.soua_vp != NULL)) { 2430 (void) (STRLOG(TL_ID, tep->te_minor, 2431 1, SL_TRACE|SL_ERROR, 2432 "tl_bind: implicit addr non-empty")); 2433 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2434 tli_err = TSYSERR; unix_err = EINVAL; 2435 goto error; 2436 } 2437 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2438 (ux_addr.soua_vp == NULL)) { 2439 (void) (STRLOG(TL_ID, tep->te_minor, 2440 1, SL_TRACE|SL_ERROR, 2441 "tl_bind: explicit addr empty")); 2442 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2443 tli_err = TSYSERR; unix_err = EINVAL; 2444 goto error; 2445 } 2446 } else { 2447 if ((alen > 0) && ((aoff < 0) || 2448 ((ssize_t)(aoff + alen) > msz) || 2449 ((aoff + alen) < 0))) { 2450 (void) (STRLOG(TL_ID, tep->te_minor, 2451 1, SL_TRACE|SL_ERROR, 2452 "tl_bind: invalid message")); 2453 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2454 tli_err = TSYSERR; unix_err = EINVAL; 2455 goto error; 2456 } 2457 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2458 (void) (STRLOG(TL_ID, tep->te_minor, 2459 1, SL_TRACE|SL_ERROR, 2460 "tl_bind: bad addr in message")); 2461 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2462 tli_err = TBADADDR; 2463 goto error; 2464 } 2465 #ifdef DEBUG 2466 /* 2467 * Mild form of ASSERT()ion to detect broken TPI apps. 2468 * if (! assertion) 2469 * log warning; 2470 */ 2471 if (! ((alen == 0 && aoff == 0) || 2472 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2473 (void) (STRLOG(TL_ID, tep->te_minor, 2474 3, SL_TRACE|SL_ERROR, 2475 "tl_bind: addr overlaps TPI message")); 2476 } 2477 #endif 2478 } 2479 2480 /* 2481 * Bind the address provided or allocate one if requested. 2482 * Allow rebinds with a new qlen value. 2483 */ 2484 if (IS_SOCKET(tep)) { 2485 /* 2486 * For anonymous requests the te_ap is already set up properly 2487 * so use minor number as an address. 2488 * For explicit requests need to check whether the address is 2489 * already in use. 2490 */ 2491 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2492 int rc; 2493 2494 if (tep->te_flag & TL_ADDRHASHED) { 2495 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2496 if (tep->te_vp == ux_addr.soua_vp) 2497 goto skip_addr_bind; 2498 else /* Rebind to a new address. */ 2499 tl_addr_unbind(tep); 2500 } 2501 /* 2502 * Insert address in the hash if it is not already 2503 * there. Since we use preallocated handle, the insert 2504 * can fail only if the key is already present. 2505 */ 2506 rc = mod_hash_insert_reserve(tep->te_addrhash, 2507 (mod_hash_key_t)ux_addr.soua_vp, 2508 (mod_hash_val_t)tep, tep->te_hash_hndl); 2509 2510 if (rc != 0) { 2511 ASSERT(rc == MH_ERR_DUPLICATE); 2512 /* 2513 * Violate O_T_BIND_REQ semantics and fail with 2514 * TADDRBUSY - sockets will not use any address 2515 * other than supplied one for explicit binds. 2516 */ 2517 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2518 SL_TRACE|SL_ERROR, 2519 "tl_bind:requested addr %p is busy", 2520 ux_addr.soua_vp)); 2521 tli_err = TADDRBUSY; unix_err = 0; 2522 goto error; 2523 } 2524 tep->te_uxaddr = ux_addr; 2525 tep->te_flag |= TL_ADDRHASHED; 2526 tep->te_hash_hndl = NULL; 2527 } 2528 } else if (alen == 0) { 2529 /* 2530 * assign any free address 2531 */ 2532 if (! tl_get_any_addr(tep, NULL)) { 2533 (void) (STRLOG(TL_ID, tep->te_minor, 2534 1, SL_TRACE|SL_ERROR, 2535 "tl_bind:failed to get buffer for any " 2536 "address")); 2537 tli_err = TSYSERR; unix_err = ENOSR; 2538 goto error; 2539 } 2540 } else { 2541 addr_req.ta_alen = alen; 2542 addr_req.ta_abuf = (mp->b_rptr + aoff); 2543 addr_req.ta_zoneid = tep->te_zoneid; 2544 2545 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2546 if (tep->te_abuf == NULL) { 2547 tli_err = TSYSERR; unix_err = ENOSR; 2548 goto error; 2549 } 2550 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2551 tep->te_alen = alen; 2552 2553 if (mod_hash_insert_reserve(tep->te_addrhash, 2554 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2555 tep->te_hash_hndl) != 0) { 2556 if (save_prim_type == T_BIND_REQ) { 2557 /* 2558 * The bind semantics for this primitive 2559 * require a failure if the exact address 2560 * requested is busy 2561 */ 2562 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2563 SL_TRACE|SL_ERROR, 2564 "tl_bind:requested addr is busy")); 2565 tli_err = TADDRBUSY; unix_err = 0; 2566 goto error; 2567 } 2568 2569 /* 2570 * O_T_BIND_REQ semantics say if address if requested 2571 * address is busy, bind to any available free address 2572 */ 2573 if (! tl_get_any_addr(tep, &addr_req)) { 2574 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2575 SL_TRACE|SL_ERROR, 2576 "tl_bind:unable to get any addr buf")); 2577 tli_err = TSYSERR; unix_err = ENOMEM; 2578 goto error; 2579 } 2580 } else { 2581 tep->te_flag |= TL_ADDRHASHED; 2582 tep->te_hash_hndl = NULL; 2583 } 2584 } 2585 2586 ASSERT(tep->te_alen >= 0); 2587 2588 skip_addr_bind: 2589 /* 2590 * prepare T_BIND_ACK TPI message 2591 */ 2592 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2593 bamp = reallocb(mp, basize, 0); 2594 if (bamp == NULL) { 2595 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2596 "tl_wput:tl_bind: allocb failed")); 2597 /* 2598 * roll back state changes 2599 */ 2600 tl_addr_unbind(tep); 2601 tep->te_state = TS_UNBND; 2602 tl_memrecover(wq, mp, basize); 2603 return; 2604 } 2605 2606 DB_TYPE(bamp) = M_PCPROTO; 2607 bamp->b_wptr = bamp->b_rptr + basize; 2608 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2609 b_ack->PRIM_type = T_BIND_ACK; 2610 b_ack->CONIND_number = qlen; 2611 b_ack->ADDR_length = tep->te_alen; 2612 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2613 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2614 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2615 2616 if (IS_COTS(tep)) { 2617 tep->te_qlen = qlen; 2618 if (qlen > 0) 2619 tep->te_flag |= TL_LISTENER; 2620 } 2621 2622 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2623 /* 2624 * send T_BIND_ACK message 2625 */ 2626 (void) qreply(wq, bamp); 2627 return; 2628 2629 error: 2630 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2631 if (ackmp == NULL) { 2632 /* 2633 * roll back state changes 2634 */ 2635 tep->te_state = save_state; 2636 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2637 return; 2638 } 2639 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2640 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2641 } 2642 2643 /* 2644 * Process T_UNBIND_REQ. 2645 * Called from serializer. 2646 */ 2647 static void 2648 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2649 { 2650 queue_t *wq; 2651 mblk_t *ackmp; 2652 2653 if (tep->te_closing) { 2654 freemsg(mp); 2655 return; 2656 } 2657 2658 wq = tep->te_wq; 2659 2660 /* 2661 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2662 * ==> allocate for T_ERROR_ACK (known max) 2663 */ 2664 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2665 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2666 return; 2667 } 2668 /* 2669 * memory resources committed 2670 * Note: no message validation. T_UNBIND_REQ message is 2671 * same size as PRIM_type field so already verified earlier. 2672 */ 2673 2674 /* 2675 * validate state 2676 */ 2677 if (tep->te_state != TS_IDLE) { 2678 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2679 SL_TRACE|SL_ERROR, 2680 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2681 tep->te_state)); 2682 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2683 return; 2684 } 2685 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2686 2687 /* 2688 * TPI says on T_UNBIND_REQ: 2689 * send up a M_FLUSH to flush both 2690 * read and write queues 2691 */ 2692 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2693 2694 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2695 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2696 2697 /* 2698 * Sockets use bind with qlen==0 followed by bind() to 2699 * the same address with qlen > 0 for listeners. 2700 * We allow rebind with a new qlen value. 2701 */ 2702 tl_addr_unbind(tep); 2703 } 2704 2705 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2706 /* 2707 * send T_OK_ACK 2708 */ 2709 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2710 } 2711 2712 2713 /* 2714 * Option management code from drv/ip is used here 2715 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2716 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2717 * However, that is what we want as that option is 'unorthodox' 2718 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2719 * and not in T_SVR4_OPTMGMT_REQ/ACK 2720 * Note2: use of optcom_req means this routine is an exception to 2721 * recovery from allocb() failures. 2722 */ 2723 2724 static void 2725 tl_optmgmt(queue_t *wq, mblk_t *mp) 2726 { 2727 tl_endpt_t *tep; 2728 mblk_t *ackmp; 2729 union T_primitives *prim; 2730 cred_t *cr; 2731 2732 tep = (tl_endpt_t *)wq->q_ptr; 2733 prim = (union T_primitives *)mp->b_rptr; 2734 2735 /* 2736 * All Solaris components should pass a db_credp 2737 * for this TPI message, hence we ASSERT. 2738 * But in case there is some other M_PROTO that looks 2739 * like a TPI message sent by some other kernel 2740 * component, we check and return an error. 2741 */ 2742 cr = msg_getcred(mp, NULL); 2743 ASSERT(cr != NULL); 2744 if (cr == NULL) { 2745 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2746 return; 2747 } 2748 2749 /* all states OK for AF_UNIX options ? */ 2750 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2751 prim->type == T_SVR4_OPTMGMT_REQ) { 2752 /* 2753 * Broken TLI semantics that options can only be managed 2754 * in TS_IDLE state. Needed for Sparc ABI test suite that 2755 * tests this TLI (mis)feature using this device driver. 2756 */ 2757 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2758 SL_TRACE|SL_ERROR, 2759 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2760 tep->te_state)); 2761 /* 2762 * preallocate memory for T_ERROR_ACK 2763 */ 2764 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2765 if (! ackmp) { 2766 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2767 return; 2768 } 2769 2770 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2771 freemsg(mp); 2772 return; 2773 } 2774 2775 /* 2776 * call common option management routine from drv/ip 2777 */ 2778 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2779 svr4_optcom_req(wq, mp, cr, &tl_opt_obj); 2780 } else { 2781 ASSERT(prim->type == T_OPTMGMT_REQ); 2782 tpi_optcom_req(wq, mp, cr, &tl_opt_obj); 2783 } 2784 } 2785 2786 /* 2787 * Handle T_conn_req - the driver part of accept(). 2788 * If TL_SET[U]CRED generate the credentials options. 2789 * If this is a socket pass through options unmodified. 2790 * For sockets generate the T_CONN_CON here instead of 2791 * waiting for the T_CONN_RES. 2792 */ 2793 static void 2794 tl_conn_req(queue_t *wq, mblk_t *mp) 2795 { 2796 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2797 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2798 ssize_t msz = MBLKL(mp); 2799 t_scalar_t alen, aoff, olen, ooff, err = 0; 2800 tl_endpt_t *peer_tep = NULL; 2801 mblk_t *ackmp; 2802 mblk_t *dimp; 2803 struct T_discon_ind *di; 2804 soux_addr_t ux_addr; 2805 tl_addr_t dst; 2806 2807 ASSERT(IS_COTS(tep)); 2808 2809 if (tep->te_closing) { 2810 freemsg(mp); 2811 return; 2812 } 2813 2814 /* 2815 * preallocate memory for: 2816 * 1. max of T_ERROR_ACK and T_OK_ACK 2817 * ==> known max T_ERROR_ACK 2818 * 2. max of T_DISCON_IND and T_CONN_IND 2819 */ 2820 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2821 if (! ackmp) { 2822 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2823 return; 2824 } 2825 /* 2826 * memory committed for T_OK_ACK/T_ERROR_ACK now 2827 * will be committed for T_DISCON_IND/T_CONN_IND later 2828 */ 2829 2830 if (tep->te_state != TS_IDLE) { 2831 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2832 SL_TRACE|SL_ERROR, 2833 "tl_wput:T_CONN_REQ:out of state, state=%d", 2834 tep->te_state)); 2835 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2836 freemsg(mp); 2837 return; 2838 } 2839 2840 /* 2841 * validate the message 2842 * Note: dereference fields in struct inside message only 2843 * after validating the message length. 2844 */ 2845 if (msz < sizeof (struct T_conn_req)) { 2846 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2847 "tl_conn_req:invalid message length")); 2848 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2849 freemsg(mp); 2850 return; 2851 } 2852 alen = creq->DEST_length; 2853 aoff = creq->DEST_offset; 2854 olen = creq->OPT_length; 2855 ooff = creq->OPT_offset; 2856 if (olen == 0) 2857 ooff = 0; 2858 2859 if (IS_SOCKET(tep)) { 2860 if ((alen != TL_SOUX_ADDRLEN) || 2861 (aoff < 0) || 2862 (aoff + alen > msz) || 2863 (alen > msz - sizeof (struct T_conn_req))) { 2864 (void) (STRLOG(TL_ID, tep->te_minor, 2865 1, SL_TRACE|SL_ERROR, 2866 "tl_conn_req: invalid socket addr")); 2867 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2868 freemsg(mp); 2869 return; 2870 } 2871 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2872 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2873 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2874 (void) (STRLOG(TL_ID, tep->te_minor, 2875 1, SL_TRACE|SL_ERROR, 2876 "tl_conn_req: invalid socket magic")); 2877 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2878 freemsg(mp); 2879 return; 2880 } 2881 } else { 2882 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2883 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2884 ooff + olen < 0)) || 2885 olen < 0 || ooff < 0) { 2886 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2887 SL_TRACE|SL_ERROR, 2888 "tl_conn_req:invalid message")); 2889 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2890 freemsg(mp); 2891 return; 2892 } 2893 2894 if (alen <= 0 || aoff < 0 || 2895 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2896 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2897 SL_TRACE|SL_ERROR, 2898 "tl_conn_req:bad addr in message, " 2899 "alen=%d, msz=%ld", 2900 alen, msz)); 2901 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2902 freemsg(mp); 2903 return; 2904 } 2905 #ifdef DEBUG 2906 /* 2907 * Mild form of ASSERT()ion to detect broken TPI apps. 2908 * if (! assertion) 2909 * log warning; 2910 */ 2911 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2912 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2913 SL_TRACE|SL_ERROR, 2914 "tl_conn_req: addr overlaps TPI message")); 2915 } 2916 #endif 2917 if (olen) { 2918 /* 2919 * no opts in connect req 2920 * supported in this provider except for sockets. 2921 */ 2922 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2923 SL_TRACE|SL_ERROR, 2924 "tl_conn_req:options not supported " 2925 "in message")); 2926 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2927 freemsg(mp); 2928 return; 2929 } 2930 } 2931 2932 /* 2933 * Prevent tep from closing on us. 2934 */ 2935 if (! tl_noclose(tep)) { 2936 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2937 "tl_conn_req:endpoint is closing")); 2938 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2939 freemsg(mp); 2940 return; 2941 } 2942 2943 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2944 /* 2945 * get endpoint to connect to 2946 * check that peer with DEST addr is bound to addr 2947 * and has CONIND_number > 0 2948 */ 2949 dst.ta_alen = alen; 2950 dst.ta_abuf = mp->b_rptr + aoff; 2951 dst.ta_zoneid = tep->te_zoneid; 2952 2953 /* 2954 * Verify if remote addr is in use 2955 */ 2956 peer_tep = (IS_SOCKET(tep) ? 2957 tl_sock_find_peer(tep, &ux_addr) : 2958 tl_find_peer(tep, &dst)); 2959 2960 if (peer_tep == NULL) { 2961 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2962 "tl_conn_req:no one at connect address")); 2963 err = ECONNREFUSED; 2964 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2965 /* 2966 * validate that number of incoming connection is 2967 * not to capacity on destination endpoint 2968 */ 2969 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2970 "tl_conn_req: qlen overflow connection refused")); 2971 err = ECONNREFUSED; 2972 } 2973 2974 /* 2975 * Send T_DISCON_IND in case of error 2976 */ 2977 if (err != 0) { 2978 if (peer_tep != NULL) 2979 tl_refrele(peer_tep); 2980 /* We are still expected to send T_OK_ACK */ 2981 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2982 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2983 tl_closeok(tep); 2984 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2985 M_PROTO, T_DISCON_IND); 2986 if (dimp == NULL) { 2987 tl_merror(wq, NULL, ENOSR); 2988 return; 2989 } 2990 di = (struct T_discon_ind *)dimp->b_rptr; 2991 di->DISCON_reason = err; 2992 di->SEQ_number = BADSEQNUM; 2993 2994 tep->te_state = TS_IDLE; 2995 /* 2996 * send T_DISCON_IND message 2997 */ 2998 putnext(tep->te_rq, dimp); 2999 return; 3000 } 3001 3002 ASSERT(IS_COTS(peer_tep)); 3003 3004 /* 3005 * Found the listener. At this point processing will continue on 3006 * listener serializer. Close of the endpoint should be blocked while we 3007 * switch serializers. 3008 */ 3009 tl_serializer_refhold(peer_tep->te_ser); 3010 tl_serializer_refrele(tep->te_ser); 3011 tep->te_ser = peer_tep->te_ser; 3012 ASSERT(tep->te_oconp == NULL); 3013 tep->te_oconp = peer_tep; 3014 3015 /* 3016 * It is safe to close now. Close may continue on listener serializer. 3017 */ 3018 tl_closeok(tep); 3019 3020 /* 3021 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3022 * data, so we link mp to ackmp. 3023 */ 3024 ackmp->b_cont = mp; 3025 mp = ackmp; 3026 3027 tl_refhold(tep); 3028 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3029 } 3030 3031 /* 3032 * Finish T_CONN_REQ processing on listener serializer. 3033 */ 3034 static void 3035 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3036 { 3037 queue_t *wq; 3038 tl_endpt_t *peer_tep = tep->te_oconp; 3039 mblk_t *confmp, *cimp, *indmp; 3040 void *opts = NULL; 3041 mblk_t *ackmp = mp; 3042 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3043 struct T_conn_ind *ci; 3044 tl_icon_t *tip; 3045 void *addr_startp; 3046 t_scalar_t olen = creq->OPT_length; 3047 t_scalar_t ooff = creq->OPT_offset; 3048 size_t ci_msz; 3049 size_t size; 3050 cred_t *cr = NULL; 3051 pid_t cpid; 3052 3053 if (tep->te_closing) { 3054 TL_UNCONNECT(tep->te_oconp); 3055 tl_serializer_exit(tep); 3056 tl_refrele(tep); 3057 freemsg(mp); 3058 return; 3059 } 3060 3061 wq = tep->te_wq; 3062 tep->te_flag |= TL_EAGER; 3063 3064 /* 3065 * Extract preallocated ackmp from mp. 3066 */ 3067 mp = mp->b_cont; 3068 ackmp->b_cont = NULL; 3069 3070 if (olen == 0) 3071 ooff = 0; 3072 3073 if (peer_tep->te_closing || 3074 !((peer_tep->te_state == TS_IDLE) || 3075 (peer_tep->te_state == TS_WRES_CIND))) { 3076 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3077 "tl_conn_req:peer in bad state (%d)", 3078 peer_tep->te_state)); 3079 TL_UNCONNECT(tep->te_oconp); 3080 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3081 freemsg(ackmp); 3082 tl_serializer_exit(tep); 3083 tl_refrele(tep); 3084 return; 3085 } 3086 3087 /* 3088 * preallocate now for T_DISCON_IND or T_CONN_IND 3089 */ 3090 /* 3091 * calculate length of T_CONN_IND message 3092 */ 3093 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3094 cr = msg_getcred(mp, &cpid); 3095 ASSERT(cr != NULL); 3096 if (peer_tep->te_flag & TL_SETCRED) { 3097 ooff = 0; 3098 olen = (t_scalar_t) sizeof (struct opthdr) + 3099 OPTLEN(sizeof (tl_credopt_t)); 3100 /* 1 option only */ 3101 } else { 3102 ooff = 0; 3103 olen = (t_scalar_t)sizeof (struct opthdr) + 3104 OPTLEN(ucredminsize(cr)); 3105 /* 1 option only */ 3106 } 3107 } 3108 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3109 ci_msz = T_ALIGN(ci_msz) + olen; 3110 size = max(ci_msz, sizeof (struct T_discon_ind)); 3111 3112 /* 3113 * Save options from mp - we'll need them for T_CONN_IND. 3114 */ 3115 if (ooff != 0) { 3116 opts = kmem_alloc(olen, KM_NOSLEEP); 3117 if (opts == NULL) { 3118 /* 3119 * roll back state changes 3120 */ 3121 tep->te_state = TS_IDLE; 3122 tl_memrecover(wq, mp, size); 3123 freemsg(ackmp); 3124 TL_UNCONNECT(tep->te_oconp); 3125 tl_serializer_exit(tep); 3126 tl_refrele(tep); 3127 return; 3128 } 3129 /* Copy options to a temp buffer */ 3130 bcopy(mp->b_rptr + ooff, opts, olen); 3131 } 3132 3133 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3134 /* 3135 * Generate a T_CONN_CON that has the identical address 3136 * (and options) as the T_CONN_REQ. 3137 * NOTE: assumes that the T_conn_req and T_conn_con structures 3138 * are isomorphic. 3139 */ 3140 confmp = copyb(mp); 3141 if (! confmp) { 3142 /* 3143 * roll back state changes 3144 */ 3145 tep->te_state = TS_IDLE; 3146 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3147 freemsg(ackmp); 3148 if (opts != NULL) 3149 kmem_free(opts, olen); 3150 TL_UNCONNECT(tep->te_oconp); 3151 tl_serializer_exit(tep); 3152 tl_refrele(tep); 3153 return; 3154 } 3155 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3156 T_CONN_CON; 3157 } else { 3158 confmp = NULL; 3159 } 3160 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3161 /* 3162 * roll back state changes 3163 */ 3164 tep->te_state = TS_IDLE; 3165 tl_memrecover(wq, mp, size); 3166 freemsg(ackmp); 3167 if (opts != NULL) 3168 kmem_free(opts, olen); 3169 freemsg(confmp); 3170 TL_UNCONNECT(tep->te_oconp); 3171 tl_serializer_exit(tep); 3172 tl_refrele(tep); 3173 return; 3174 } 3175 3176 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3177 if (tip == NULL) { 3178 /* 3179 * roll back state changes 3180 */ 3181 tep->te_state = TS_IDLE; 3182 tl_memrecover(wq, indmp, sizeof (*tip)); 3183 freemsg(ackmp); 3184 if (opts != NULL) 3185 kmem_free(opts, olen); 3186 freemsg(confmp); 3187 TL_UNCONNECT(tep->te_oconp); 3188 tl_serializer_exit(tep); 3189 tl_refrele(tep); 3190 return; 3191 } 3192 tip->ti_mp = NULL; 3193 3194 /* 3195 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3196 * and tl_icon_t cell. 3197 */ 3198 3199 /* 3200 * ack validity of request and send the peer credential in the ACK. 3201 */ 3202 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3203 3204 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3205 confmp != NULL) { 3206 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3207 } 3208 3209 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3210 3211 /* 3212 * prepare message to send T_CONN_IND 3213 */ 3214 /* 3215 * allocate the message - original data blocks retained 3216 * in the returned mblk 3217 */ 3218 cimp = tl_resizemp(indmp, size); 3219 if (! cimp) { 3220 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3221 "tl_conn_req:con_ind:allocb failure")); 3222 tl_merror(wq, indmp, ENOMEM); 3223 TL_UNCONNECT(tep->te_oconp); 3224 tl_serializer_exit(tep); 3225 tl_refrele(tep); 3226 if (opts != NULL) 3227 kmem_free(opts, olen); 3228 freemsg(confmp); 3229 ASSERT(tip->ti_mp == NULL); 3230 kmem_free(tip, sizeof (*tip)); 3231 return; 3232 } 3233 3234 DB_TYPE(cimp) = M_PROTO; 3235 ci = (struct T_conn_ind *)cimp->b_rptr; 3236 ci->PRIM_type = T_CONN_IND; 3237 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3238 ci->SRC_length = tep->te_alen; 3239 ci->SEQ_number = tep->te_seqno; 3240 3241 addr_startp = cimp->b_rptr + ci->SRC_offset; 3242 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3243 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3244 3245 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3246 ci->SRC_length); 3247 ci->OPT_length = olen; /* because only 1 option */ 3248 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3249 cr, cpid, 3250 peer_tep->te_flag, peer_tep->te_credp); 3251 } else if (ooff != 0) { 3252 /* Copy option from T_CONN_REQ */ 3253 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3254 ci->SRC_length); 3255 ci->OPT_length = olen; 3256 ASSERT(opts != NULL); 3257 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3258 } else { 3259 ci->OPT_offset = 0; 3260 ci->OPT_length = 0; 3261 } 3262 if (opts != NULL) 3263 kmem_free(opts, olen); 3264 3265 /* 3266 * register connection request with server peer 3267 * append to list of incoming connections 3268 * increment references for both peer_tep and tep: peer_tep is placed on 3269 * te_oconp and tep is placed on listeners queue. 3270 */ 3271 tip->ti_tep = tep; 3272 tip->ti_seqno = tep->te_seqno; 3273 list_insert_tail(&peer_tep->te_iconp, tip); 3274 peer_tep->te_nicon++; 3275 3276 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3277 /* 3278 * send the T_CONN_IND message 3279 */ 3280 putnext(peer_tep->te_rq, cimp); 3281 3282 /* 3283 * Send a T_CONN_CON message for sockets. 3284 * Disable the queues until we have reached the correct state! 3285 */ 3286 if (confmp != NULL) { 3287 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3288 noenable(wq); 3289 putnext(tep->te_rq, confmp); 3290 } 3291 /* 3292 * Now we need to increment tep reference because tep is referenced by 3293 * server list of pending connections. We also need to decrement 3294 * reference before exiting serializer. Two operations void each other 3295 * so we don't modify reference at all. 3296 */ 3297 ASSERT(tep->te_refcnt >= 2); 3298 ASSERT(peer_tep->te_refcnt >= 2); 3299 tl_serializer_exit(tep); 3300 } 3301 3302 3303 3304 /* 3305 * Handle T_conn_res on listener stream. Called on listener serializer. 3306 * tl_conn_req has already generated the T_CONN_CON. 3307 * tl_conn_res is called on listener serializer. 3308 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3309 * Switch eager serializer to acceptor's. 3310 * 3311 * If TL_SET[U]CRED generate the credentials options. 3312 * For sockets tl_conn_req has already generated the T_CONN_CON. 3313 */ 3314 static void 3315 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3316 { 3317 queue_t *wq; 3318 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3319 ssize_t msz = MBLKL(mp); 3320 t_scalar_t olen, ooff, err = 0; 3321 t_scalar_t prim = cres->PRIM_type; 3322 uchar_t *addr_startp; 3323 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3324 tl_icon_t *tip; 3325 size_t size; 3326 mblk_t *ackmp, *respmp; 3327 mblk_t *dimp, *ccmp = NULL; 3328 struct T_discon_ind *di; 3329 struct T_conn_con *cc; 3330 boolean_t client_noclose_set = B_FALSE; 3331 boolean_t switch_client_serializer = B_TRUE; 3332 3333 ASSERT(IS_COTS(tep)); 3334 3335 if (tep->te_closing) { 3336 freemsg(mp); 3337 return; 3338 } 3339 3340 wq = tep->te_wq; 3341 3342 /* 3343 * preallocate memory for: 3344 * 1. max of T_ERROR_ACK and T_OK_ACK 3345 * ==> known max T_ERROR_ACK 3346 * 2. max of T_DISCON_IND and T_CONN_CON 3347 */ 3348 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3349 if (! ackmp) { 3350 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3351 return; 3352 } 3353 /* 3354 * memory committed for T_OK_ACK/T_ERROR_ACK now 3355 * will be committed for T_DISCON_IND/T_CONN_CON later 3356 */ 3357 3358 3359 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3360 3361 /* 3362 * validate state 3363 */ 3364 if (tep->te_state != TS_WRES_CIND) { 3365 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3366 SL_TRACE|SL_ERROR, 3367 "tl_wput:T_CONN_RES:out of state, state=%d", 3368 tep->te_state)); 3369 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3370 freemsg(mp); 3371 return; 3372 } 3373 3374 /* 3375 * validate the message 3376 * Note: dereference fields in struct inside message only 3377 * after validating the message length. 3378 */ 3379 if (msz < sizeof (struct T_conn_res)) { 3380 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3381 "tl_conn_res:invalid message length")); 3382 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3383 freemsg(mp); 3384 return; 3385 } 3386 olen = cres->OPT_length; 3387 ooff = cres->OPT_offset; 3388 if (((olen > 0) && ((ooff + olen) > msz))) { 3389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3390 "tl_conn_res:invalid message")); 3391 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3392 freemsg(mp); 3393 return; 3394 } 3395 if (olen) { 3396 /* 3397 * no opts in connect res 3398 * supported in this provider 3399 */ 3400 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3401 "tl_conn_res:options not supported in message")); 3402 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3403 freemsg(mp); 3404 return; 3405 } 3406 3407 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3408 ASSERT(tep->te_state == TS_WACK_CRES); 3409 3410 if (cres->SEQ_number < TL_MINOR_START && 3411 cres->SEQ_number >= BADSEQNUM) { 3412 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3413 "tl_conn_res:remote endpoint sequence number bad")); 3414 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3415 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3416 freemsg(mp); 3417 return; 3418 } 3419 3420 /* 3421 * find accepting endpoint. Will have extra reference if found. 3422 */ 3423 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3424 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3425 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3426 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3427 "tl_conn_res:bad accepting endpoint")); 3428 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3429 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3430 freemsg(mp); 3431 return; 3432 } 3433 3434 /* 3435 * Prevent acceptor from closing. 3436 */ 3437 if (! tl_noclose(acc_ep)) { 3438 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3439 "tl_conn_res:bad accepting endpoint")); 3440 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3441 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3442 tl_refrele(acc_ep); 3443 freemsg(mp); 3444 return; 3445 } 3446 3447 acc_ep->te_flag |= TL_ACCEPTOR; 3448 3449 /* 3450 * validate that accepting endpoint, if different from listening 3451 * has address bound => state is TS_IDLE 3452 * TROUBLE in XPG4 !!? 3453 */ 3454 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3455 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3456 "tl_conn_res:accepting endpoint has no address bound," 3457 "state=%d", acc_ep->te_state)); 3458 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3459 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3460 freemsg(mp); 3461 tl_closeok(acc_ep); 3462 tl_refrele(acc_ep); 3463 return; 3464 } 3465 3466 /* 3467 * validate if accepting endpt same as listening, then 3468 * no other incoming connection should be on the queue 3469 */ 3470 3471 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3472 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3473 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3474 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3475 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3476 freemsg(mp); 3477 tl_closeok(acc_ep); 3478 tl_refrele(acc_ep); 3479 return; 3480 } 3481 3482 /* 3483 * Mark for deletion, the entry corresponding to client 3484 * on list of pending connections made by the listener 3485 * search list to see if client is one of the 3486 * recorded as a listener. 3487 */ 3488 tip = tl_icon_find(tep, cres->SEQ_number); 3489 if (tip == NULL) { 3490 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3491 "tl_conn_res:no client in listener list")); 3492 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3493 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3494 freemsg(mp); 3495 tl_closeok(acc_ep); 3496 tl_refrele(acc_ep); 3497 return; 3498 } 3499 3500 /* 3501 * If ti_tep is NULL the client has already closed. In this case 3502 * the code below will avoid any action on the client side 3503 * but complete the server and acceptor state transitions. 3504 */ 3505 ASSERT(tip->ti_tep == NULL || 3506 tip->ti_tep->te_seqno == cres->SEQ_number); 3507 cl_ep = tip->ti_tep; 3508 3509 /* 3510 * If the client is present it is switched from listener's to acceptor's 3511 * serializer. We should block client closes while serializers are 3512 * being switched. 3513 * 3514 * It is possible that the client is present but is currently being 3515 * closed. There are two possible cases: 3516 * 3517 * 1) The client has already entered tl_close_finish_ser() and sent 3518 * T_ORDREL_IND. In this case we can just ignore the client (but we 3519 * still need to send all messages from tip->ti_mp to the acceptor). 3520 * 3521 * 2) The client started the close but has not entered 3522 * tl_close_finish_ser() yet. In this case, the client is already 3523 * proceeding asynchronously on the listener's serializer, so we're 3524 * forced to change the acceptor to use the listener's serializer to 3525 * ensure that any operations on the acceptor are serialized with 3526 * respect to the close that's in-progress. 3527 */ 3528 if (cl_ep != NULL) { 3529 if (tl_noclose(cl_ep)) { 3530 client_noclose_set = B_TRUE; 3531 } else { 3532 /* 3533 * Client is closing. If it it has sent the 3534 * T_ORDREL_IND, we can simply ignore it - otherwise, 3535 * we have to let let the client continue until it is 3536 * sent. 3537 * 3538 * If we do continue using the client, acceptor will 3539 * switch to client's serializer which is used by client 3540 * for its close. 3541 */ 3542 tl_client_closing_when_accepting++; 3543 switch_client_serializer = B_FALSE; 3544 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3545 cl_ep->te_state == -1) 3546 cl_ep = NULL; 3547 } 3548 } 3549 3550 if (cl_ep != NULL) { 3551 /* 3552 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3553 * (latter for sockets only) 3554 */ 3555 if (cl_ep->te_state != TS_WCON_CREQ && 3556 (cl_ep->te_state != TS_DATA_XFER && 3557 IS_SOCKET(cl_ep))) { 3558 err = ECONNREFUSED; 3559 /* 3560 * T_DISCON_IND sent later after committing memory 3561 * and acking validity of request 3562 */ 3563 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3564 "tl_conn_res:peer in bad state")); 3565 } 3566 3567 /* 3568 * preallocate now for T_DISCON_IND or T_CONN_CONN 3569 * ack validity of request (T_OK_ACK) after memory committed 3570 */ 3571 3572 if (err) 3573 size = sizeof (struct T_discon_ind); 3574 else { 3575 /* 3576 * calculate length of T_CONN_CON message 3577 */ 3578 olen = 0; 3579 if (cl_ep->te_flag & TL_SETCRED) { 3580 olen = (t_scalar_t)sizeof (struct opthdr) + 3581 OPTLEN(sizeof (tl_credopt_t)); 3582 } else if (cl_ep->te_flag & TL_SETUCRED) { 3583 olen = (t_scalar_t)sizeof (struct opthdr) + 3584 OPTLEN(ucredminsize(acc_ep->te_credp)); 3585 } 3586 size = T_ALIGN(sizeof (struct T_conn_con) + 3587 acc_ep->te_alen) + olen; 3588 } 3589 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3590 /* 3591 * roll back state changes 3592 */ 3593 tep->te_state = TS_WRES_CIND; 3594 tl_memrecover(wq, mp, size); 3595 freemsg(ackmp); 3596 if (client_noclose_set) 3597 tl_closeok(cl_ep); 3598 tl_closeok(acc_ep); 3599 tl_refrele(acc_ep); 3600 return; 3601 } 3602 mp = NULL; 3603 } 3604 3605 /* 3606 * Now ack validity of request 3607 */ 3608 if (tep->te_nicon == 1) { 3609 if (tep == acc_ep) 3610 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3611 else 3612 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3613 } else 3614 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3615 3616 /* 3617 * send T_DISCON_IND now if client state validation failed earlier 3618 */ 3619 if (err) { 3620 tl_ok_ack(wq, ackmp, prim); 3621 /* 3622 * flush the queues - why always ? 3623 */ 3624 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3625 3626 dimp = tl_resizemp(respmp, size); 3627 if (! dimp) { 3628 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3629 SL_TRACE|SL_ERROR, 3630 "tl_conn_res:con_ind:allocb failure")); 3631 tl_merror(wq, respmp, ENOMEM); 3632 tl_closeok(acc_ep); 3633 if (client_noclose_set) 3634 tl_closeok(cl_ep); 3635 tl_refrele(acc_ep); 3636 return; 3637 } 3638 if (dimp->b_cont) { 3639 /* no user data in provider generated discon ind */ 3640 freemsg(dimp->b_cont); 3641 dimp->b_cont = NULL; 3642 } 3643 3644 DB_TYPE(dimp) = M_PROTO; 3645 di = (struct T_discon_ind *)dimp->b_rptr; 3646 di->PRIM_type = T_DISCON_IND; 3647 di->DISCON_reason = err; 3648 di->SEQ_number = BADSEQNUM; 3649 3650 tep->te_state = TS_IDLE; 3651 /* 3652 * send T_DISCON_IND message 3653 */ 3654 putnext(acc_ep->te_rq, dimp); 3655 if (client_noclose_set) 3656 tl_closeok(cl_ep); 3657 tl_closeok(acc_ep); 3658 tl_refrele(acc_ep); 3659 return; 3660 } 3661 3662 /* 3663 * now start connecting the accepting endpoint 3664 */ 3665 if (tep != acc_ep) 3666 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3667 3668 if (cl_ep == NULL) { 3669 /* 3670 * The client has already closed. Send up any queued messages 3671 * and change the state accordingly. 3672 */ 3673 tl_ok_ack(wq, ackmp, prim); 3674 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3675 3676 /* 3677 * remove endpoint from incoming connection 3678 * delete client from list of incoming connections 3679 */ 3680 tl_freetip(tep, tip); 3681 freemsg(mp); 3682 tl_closeok(acc_ep); 3683 tl_refrele(acc_ep); 3684 return; 3685 } else if (tip->ti_mp != NULL) { 3686 /* 3687 * The client could have queued a T_DISCON_IND which needs 3688 * to be sent up. 3689 * Note that t_discon_req can not operate the same as 3690 * t_data_req since it is not possible for it to putbq 3691 * the message and return -1 due to the use of qwriter. 3692 */ 3693 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3694 } 3695 3696 /* 3697 * prepare connect confirm T_CONN_CON message 3698 */ 3699 3700 /* 3701 * allocate the message - original data blocks 3702 * retained in the returned mblk 3703 */ 3704 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3705 ccmp = tl_resizemp(respmp, size); 3706 if (ccmp == NULL) { 3707 tl_ok_ack(wq, ackmp, prim); 3708 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3709 SL_TRACE|SL_ERROR, 3710 "tl_conn_res:conn_con:allocb failure")); 3711 tl_merror(wq, respmp, ENOMEM); 3712 tl_closeok(acc_ep); 3713 if (client_noclose_set) 3714 tl_closeok(cl_ep); 3715 tl_refrele(acc_ep); 3716 return; 3717 } 3718 3719 DB_TYPE(ccmp) = M_PROTO; 3720 cc = (struct T_conn_con *)ccmp->b_rptr; 3721 cc->PRIM_type = T_CONN_CON; 3722 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3723 cc->RES_length = acc_ep->te_alen; 3724 addr_startp = ccmp->b_rptr + cc->RES_offset; 3725 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3726 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3727 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3728 cc->RES_length); 3729 cc->OPT_length = olen; 3730 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3731 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3732 cl_ep->te_credp); 3733 } else { 3734 cc->OPT_offset = 0; 3735 cc->OPT_length = 0; 3736 } 3737 /* 3738 * Forward the credential in the packet so it can be picked up 3739 * at the higher layers for more complete credential processing 3740 */ 3741 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3742 } else { 3743 freemsg(respmp); 3744 respmp = NULL; 3745 } 3746 3747 /* 3748 * make connection linking 3749 * accepting and client endpoints 3750 * No need to increment references: 3751 * on client: it should already have one from tip->ti_tep linkage. 3752 * on acceptor is should already have one from the table lookup. 3753 * 3754 * At this point both client and acceptor can't close. Set client 3755 * serializer to acceptor's. 3756 */ 3757 ASSERT(cl_ep->te_refcnt >= 2); 3758 ASSERT(acc_ep->te_refcnt >= 2); 3759 ASSERT(cl_ep->te_conp == NULL); 3760 ASSERT(acc_ep->te_conp == NULL); 3761 cl_ep->te_conp = acc_ep; 3762 acc_ep->te_conp = cl_ep; 3763 ASSERT(cl_ep->te_ser == tep->te_ser); 3764 if (switch_client_serializer) { 3765 mutex_enter(&cl_ep->te_ser_lock); 3766 if (cl_ep->te_ser_count > 0) { 3767 switch_client_serializer = B_FALSE; 3768 tl_serializer_noswitch++; 3769 } else { 3770 /* 3771 * Move client to the acceptor's serializer. 3772 */ 3773 tl_serializer_refhold(acc_ep->te_ser); 3774 tl_serializer_refrele(cl_ep->te_ser); 3775 cl_ep->te_ser = acc_ep->te_ser; 3776 } 3777 mutex_exit(&cl_ep->te_ser_lock); 3778 } 3779 if (!switch_client_serializer) { 3780 /* 3781 * It is not possible to switch client to use acceptor's. 3782 * Move acceptor to client's serializer (which is the same as 3783 * listener's). 3784 */ 3785 tl_serializer_refhold(cl_ep->te_ser); 3786 tl_serializer_refrele(acc_ep->te_ser); 3787 acc_ep->te_ser = cl_ep->te_ser; 3788 } 3789 3790 TL_REMOVE_PEER(cl_ep->te_oconp); 3791 TL_REMOVE_PEER(acc_ep->te_oconp); 3792 3793 /* 3794 * remove endpoint from incoming connection 3795 * delete client from list of incoming connections 3796 */ 3797 tip->ti_tep = NULL; 3798 tl_freetip(tep, tip); 3799 tl_ok_ack(wq, ackmp, prim); 3800 3801 /* 3802 * data blocks already linked in reallocb() 3803 */ 3804 3805 /* 3806 * link queues so that I_SENDFD will work 3807 */ 3808 if (! IS_SOCKET(tep)) { 3809 acc_ep->te_wq->q_next = cl_ep->te_rq; 3810 cl_ep->te_wq->q_next = acc_ep->te_rq; 3811 } 3812 3813 /* 3814 * send T_CONN_CON up on client side unless it was already 3815 * done (for a socket). In cases any data or ordrel req has been 3816 * queued make sure that the service procedure runs. 3817 */ 3818 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3819 enableok(cl_ep->te_wq); 3820 TL_QENABLE(cl_ep); 3821 if (ccmp != NULL) 3822 freemsg(ccmp); 3823 } else { 3824 /* 3825 * change client state on TE_CONN_CON event 3826 */ 3827 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3828 putnext(cl_ep->te_rq, ccmp); 3829 } 3830 3831 /* Mark the both endpoints as accepted */ 3832 cl_ep->te_flag |= TL_ACCEPTED; 3833 acc_ep->te_flag |= TL_ACCEPTED; 3834 3835 /* 3836 * Allow client and acceptor to close. 3837 */ 3838 tl_closeok(acc_ep); 3839 if (client_noclose_set) 3840 tl_closeok(cl_ep); 3841 } 3842 3843 3844 3845 3846 static void 3847 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3848 { 3849 queue_t *wq; 3850 struct T_discon_req *dr; 3851 ssize_t msz; 3852 tl_endpt_t *peer_tep = tep->te_conp; 3853 tl_endpt_t *srv_tep = tep->te_oconp; 3854 tl_icon_t *tip; 3855 size_t size; 3856 mblk_t *ackmp, *dimp, *respmp; 3857 struct T_discon_ind *di; 3858 t_scalar_t save_state, new_state; 3859 3860 if (tep->te_closing) { 3861 freemsg(mp); 3862 return; 3863 } 3864 3865 if ((peer_tep != NULL) && peer_tep->te_closing) { 3866 TL_UNCONNECT(tep->te_conp); 3867 peer_tep = NULL; 3868 } 3869 if ((srv_tep != NULL) && srv_tep->te_closing) { 3870 TL_UNCONNECT(tep->te_oconp); 3871 srv_tep = NULL; 3872 } 3873 3874 wq = tep->te_wq; 3875 3876 /* 3877 * preallocate memory for: 3878 * 1. max of T_ERROR_ACK and T_OK_ACK 3879 * ==> known max T_ERROR_ACK 3880 * 2. for T_DISCON_IND 3881 */ 3882 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3883 if (! ackmp) { 3884 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3885 return; 3886 } 3887 /* 3888 * memory committed for T_OK_ACK/T_ERROR_ACK now 3889 * will be committed for T_DISCON_IND later 3890 */ 3891 3892 dr = (struct T_discon_req *)mp->b_rptr; 3893 msz = MBLKL(mp); 3894 3895 /* 3896 * validate the state 3897 */ 3898 save_state = new_state = tep->te_state; 3899 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3900 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3901 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3902 SL_TRACE|SL_ERROR, 3903 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3904 tep->te_state)); 3905 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3906 freemsg(mp); 3907 return; 3908 } 3909 /* 3910 * Defer committing the state change until it is determined if 3911 * the message will be queued with the tl_icon or not. 3912 */ 3913 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3914 3915 /* validate the message */ 3916 if (msz < sizeof (struct T_discon_req)) { 3917 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3918 "tl_discon_req:invalid message")); 3919 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3920 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3921 freemsg(mp); 3922 return; 3923 } 3924 3925 /* 3926 * if server, then validate that client exists 3927 * by connection sequence number etc. 3928 */ 3929 if (tep->te_nicon > 0) { /* server */ 3930 3931 /* 3932 * search server list for disconnect client 3933 */ 3934 tip = tl_icon_find(tep, dr->SEQ_number); 3935 if (tip == NULL) { 3936 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3937 SL_TRACE|SL_ERROR, 3938 "tl_discon_req:no disconnect endpoint")); 3939 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3940 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3941 freemsg(mp); 3942 return; 3943 } 3944 /* 3945 * If ti_tep is NULL the client has already closed. In this case 3946 * the code below will avoid any action on the client side. 3947 */ 3948 3949 IMPLY(tip->ti_tep != NULL, 3950 tip->ti_tep->te_seqno == dr->SEQ_number); 3951 peer_tep = tip->ti_tep; 3952 } 3953 3954 /* 3955 * preallocate now for T_DISCON_IND 3956 * ack validity of request (T_OK_ACK) after memory committed 3957 */ 3958 size = sizeof (struct T_discon_ind); 3959 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3960 tl_memrecover(wq, mp, size); 3961 freemsg(ackmp); 3962 return; 3963 } 3964 3965 /* 3966 * prepare message to ack validity of request 3967 */ 3968 if (tep->te_nicon == 0) 3969 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3970 else 3971 if (tep->te_nicon == 1) 3972 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3973 else 3974 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3975 3976 /* 3977 * Flushing queues according to TPI. Using the old state. 3978 */ 3979 if ((tep->te_nicon <= 1) && 3980 ((save_state == TS_DATA_XFER) || 3981 (save_state == TS_WIND_ORDREL) || 3982 (save_state == TS_WREQ_ORDREL))) 3983 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3984 3985 /* send T_OK_ACK up */ 3986 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3987 3988 /* 3989 * now do disconnect business 3990 */ 3991 if (tep->te_nicon > 0) { /* listener */ 3992 if (peer_tep != NULL && !peer_tep->te_closing) { 3993 /* 3994 * disconnect incoming connect request pending to tep 3995 */ 3996 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 3997 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3998 SL_TRACE|SL_ERROR, 3999 "tl_discon_req: reallocb failed")); 4000 tep->te_state = new_state; 4001 tl_merror(wq, respmp, ENOMEM); 4002 return; 4003 } 4004 di = (struct T_discon_ind *)dimp->b_rptr; 4005 di->SEQ_number = BADSEQNUM; 4006 save_state = peer_tep->te_state; 4007 peer_tep->te_state = TS_IDLE; 4008 4009 TL_REMOVE_PEER(peer_tep->te_oconp); 4010 enableok(peer_tep->te_wq); 4011 TL_QENABLE(peer_tep); 4012 } else { 4013 freemsg(respmp); 4014 dimp = NULL; 4015 } 4016 4017 /* 4018 * remove endpoint from incoming connection list 4019 * - remove disconnect client from list on server 4020 */ 4021 tl_freetip(tep, tip); 4022 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4023 /* 4024 * disconnect an outgoing request pending from tep 4025 */ 4026 4027 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4028 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4029 SL_TRACE|SL_ERROR, 4030 "tl_discon_req: reallocb failed")); 4031 tep->te_state = new_state; 4032 tl_merror(wq, respmp, ENOMEM); 4033 return; 4034 } 4035 di = (struct T_discon_ind *)dimp->b_rptr; 4036 DB_TYPE(dimp) = M_PROTO; 4037 di->PRIM_type = T_DISCON_IND; 4038 di->DISCON_reason = ECONNRESET; 4039 di->SEQ_number = tep->te_seqno; 4040 4041 /* 4042 * If this is a socket the T_DISCON_IND is queued with 4043 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4044 * from the list of pending connections. 4045 * Note that when te_oconp is set the peer better have 4046 * a t_connind_t for the client. 4047 */ 4048 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4049 /* 4050 * No need to check that 4051 * ti_tep == NULL since the T_DISCON_IND 4052 * takes precedence over other queued 4053 * messages. 4054 */ 4055 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4056 peer_tep = NULL; 4057 dimp = NULL; 4058 /* 4059 * Can't clear te_oconp since tl_co_unconnect needs 4060 * it as a hint not to free the tep. 4061 * Keep the state unchanged since tl_conn_res inspects 4062 * it. 4063 */ 4064 new_state = tep->te_state; 4065 } else { 4066 /* Found - delete it */ 4067 tip = tl_icon_find(peer_tep, tep->te_seqno); 4068 if (tip != NULL) { 4069 ASSERT(tep == tip->ti_tep); 4070 save_state = peer_tep->te_state; 4071 if (peer_tep->te_nicon == 1) 4072 peer_tep->te_state = 4073 NEXTSTATE(TE_DISCON_IND2, 4074 peer_tep->te_state); 4075 else 4076 peer_tep->te_state = 4077 NEXTSTATE(TE_DISCON_IND3, 4078 peer_tep->te_state); 4079 tl_freetip(peer_tep, tip); 4080 } 4081 ASSERT(tep->te_oconp != NULL); 4082 TL_UNCONNECT(tep->te_oconp); 4083 } 4084 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4085 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4086 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4087 SL_TRACE|SL_ERROR, 4088 "tl_discon_req: reallocb failed")); 4089 tep->te_state = new_state; 4090 tl_merror(wq, respmp, ENOMEM); 4091 return; 4092 } 4093 di = (struct T_discon_ind *)dimp->b_rptr; 4094 di->SEQ_number = BADSEQNUM; 4095 4096 save_state = peer_tep->te_state; 4097 peer_tep->te_state = TS_IDLE; 4098 } else { 4099 /* Not connected */ 4100 tep->te_state = new_state; 4101 freemsg(respmp); 4102 return; 4103 } 4104 4105 /* Commit state changes */ 4106 tep->te_state = new_state; 4107 4108 if (peer_tep == NULL) { 4109 ASSERT(dimp == NULL); 4110 goto done; 4111 } 4112 /* 4113 * Flush queues on peer before sending up 4114 * T_DISCON_IND according to TPI 4115 */ 4116 4117 if ((save_state == TS_DATA_XFER) || 4118 (save_state == TS_WIND_ORDREL) || 4119 (save_state == TS_WREQ_ORDREL)) 4120 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4121 4122 DB_TYPE(dimp) = M_PROTO; 4123 di->PRIM_type = T_DISCON_IND; 4124 di->DISCON_reason = ECONNRESET; 4125 4126 /* 4127 * data blocks already linked into dimp by reallocb() 4128 */ 4129 /* 4130 * send indication message to peer user module 4131 */ 4132 ASSERT(dimp != NULL); 4133 putnext(peer_tep->te_rq, dimp); 4134 done: 4135 if (tep->te_conp) { /* disconnect pointers if connected */ 4136 ASSERT(! peer_tep->te_closing); 4137 4138 /* 4139 * Messages may be queued on peer's write queue 4140 * waiting to be processed by its write service 4141 * procedure. Before the pointer to the peer transport 4142 * structure is set to NULL, qenable the peer's write 4143 * queue so that the queued up messages are processed. 4144 */ 4145 if ((save_state == TS_DATA_XFER) || 4146 (save_state == TS_WIND_ORDREL) || 4147 (save_state == TS_WREQ_ORDREL)) 4148 TL_QENABLE(peer_tep); 4149 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4150 TL_UNCONNECT(peer_tep->te_conp); 4151 if (! IS_SOCKET(tep)) { 4152 /* 4153 * unlink the streams 4154 */ 4155 tep->te_wq->q_next = NULL; 4156 peer_tep->te_wq->q_next = NULL; 4157 } 4158 TL_UNCONNECT(tep->te_conp); 4159 } 4160 } 4161 4162 4163 static void 4164 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4165 { 4166 queue_t *wq; 4167 size_t ack_sz; 4168 mblk_t *ackmp; 4169 struct T_addr_ack *taa; 4170 4171 if (tep->te_closing) { 4172 freemsg(mp); 4173 return; 4174 } 4175 4176 wq = tep->te_wq; 4177 4178 /* 4179 * Note: T_ADDR_REQ message has only PRIM_type field 4180 * so it is already validated earlier. 4181 */ 4182 4183 if (IS_CLTS(tep) || 4184 (tep->te_state > TS_WREQ_ORDREL) || 4185 (tep->te_state < TS_DATA_XFER)) { 4186 /* 4187 * Either connectionless or connection oriented but not 4188 * in connected data transfer state or half-closed states. 4189 */ 4190 ack_sz = sizeof (struct T_addr_ack); 4191 if (tep->te_state >= TS_IDLE) 4192 /* is bound */ 4193 ack_sz += tep->te_alen; 4194 ackmp = reallocb(mp, ack_sz, 0); 4195 if (ackmp == NULL) { 4196 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4197 SL_TRACE|SL_ERROR, 4198 "tl_addr_req: reallocb failed")); 4199 tl_memrecover(wq, mp, ack_sz); 4200 return; 4201 } 4202 4203 taa = (struct T_addr_ack *)ackmp->b_rptr; 4204 4205 bzero(taa, sizeof (struct T_addr_ack)); 4206 4207 taa->PRIM_type = T_ADDR_ACK; 4208 ackmp->b_datap->db_type = M_PCPROTO; 4209 ackmp->b_wptr = (uchar_t *)&taa[1]; 4210 4211 if (tep->te_state >= TS_IDLE) { 4212 /* endpoint is bound */ 4213 taa->LOCADDR_length = tep->te_alen; 4214 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4215 4216 bcopy(tep->te_abuf, ackmp->b_wptr, 4217 tep->te_alen); 4218 ackmp->b_wptr += tep->te_alen; 4219 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4220 } 4221 4222 (void) qreply(wq, ackmp); 4223 } else { 4224 ASSERT(tep->te_state == TS_DATA_XFER || 4225 tep->te_state == TS_WIND_ORDREL || 4226 tep->te_state == TS_WREQ_ORDREL); 4227 /* connection oriented in data transfer */ 4228 tl_connected_cots_addr_req(mp, tep); 4229 } 4230 } 4231 4232 4233 static void 4234 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4235 { 4236 tl_endpt_t *peer_tep; 4237 size_t ack_sz; 4238 mblk_t *ackmp; 4239 struct T_addr_ack *taa; 4240 uchar_t *addr_startp; 4241 4242 if (tep->te_closing) { 4243 freemsg(mp); 4244 return; 4245 } 4246 4247 ASSERT(tep->te_state >= TS_IDLE); 4248 4249 ack_sz = sizeof (struct T_addr_ack); 4250 ack_sz += T_ALIGN(tep->te_alen); 4251 peer_tep = tep->te_conp; 4252 ack_sz += peer_tep->te_alen; 4253 4254 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4255 if (ackmp == NULL) { 4256 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4257 "tl_connected_cots_addr_req: reallocb failed")); 4258 tl_memrecover(tep->te_wq, mp, ack_sz); 4259 return; 4260 } 4261 4262 taa = (struct T_addr_ack *)ackmp->b_rptr; 4263 4264 /* endpoint is bound */ 4265 taa->LOCADDR_length = tep->te_alen; 4266 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4267 4268 addr_startp = (uchar_t *)&taa[1]; 4269 4270 bcopy(tep->te_abuf, addr_startp, 4271 tep->te_alen); 4272 4273 taa->REMADDR_length = peer_tep->te_alen; 4274 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4275 taa->LOCADDR_length); 4276 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4277 bcopy(peer_tep->te_abuf, addr_startp, 4278 peer_tep->te_alen); 4279 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4280 taa->REMADDR_offset + peer_tep->te_alen; 4281 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4282 4283 putnext(tep->te_rq, ackmp); 4284 } 4285 4286 static void 4287 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4288 { 4289 if (IS_CLTS(tep)) { 4290 *ia = tl_clts_info_ack; 4291 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4292 } else { 4293 *ia = tl_cots_info_ack; 4294 if (IS_COTSORD(tep)) 4295 ia->SERV_type = T_COTS_ORD; 4296 } 4297 ia->TIDU_size = tl_tidusz; 4298 ia->CURRENT_state = tep->te_state; 4299 } 4300 4301 /* 4302 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4303 * tl_wput. 4304 */ 4305 static void 4306 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4307 { 4308 mblk_t *ackmp; 4309 t_uscalar_t cap_bits1; 4310 struct T_capability_ack *tcap; 4311 4312 if (tep->te_closing) { 4313 freemsg(mp); 4314 return; 4315 } 4316 4317 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4318 4319 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4320 M_PCPROTO, T_CAPABILITY_ACK); 4321 if (ackmp == NULL) { 4322 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4323 "tl_capability_req: reallocb failed")); 4324 tl_memrecover(tep->te_wq, mp, 4325 sizeof (struct T_capability_ack)); 4326 return; 4327 } 4328 4329 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4330 tcap->CAP_bits1 = 0; 4331 4332 if (cap_bits1 & TC1_INFO) { 4333 tl_copy_info(&tcap->INFO_ack, tep); 4334 tcap->CAP_bits1 |= TC1_INFO; 4335 } 4336 4337 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4338 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4339 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4340 } 4341 4342 putnext(tep->te_rq, ackmp); 4343 } 4344 4345 static void 4346 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4347 { 4348 if (! tep->te_closing) 4349 tl_info_req(mp, tep); 4350 else 4351 freemsg(mp); 4352 4353 tl_serializer_exit(tep); 4354 tl_refrele(tep); 4355 } 4356 4357 static void 4358 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4359 { 4360 mblk_t *ackmp; 4361 4362 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4363 M_PCPROTO, T_INFO_ACK); 4364 if (ackmp == NULL) { 4365 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4366 "tl_info_req: reallocb failed")); 4367 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4368 return; 4369 } 4370 4371 /* 4372 * fill in T_INFO_ACK contents 4373 */ 4374 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4375 4376 /* 4377 * send ack message 4378 */ 4379 putnext(tep->te_rq, ackmp); 4380 } 4381 4382 /* 4383 * Handle M_DATA, T_data_req and T_optdata_req. 4384 * If this is a socket pass through T_optdata_req options unmodified. 4385 */ 4386 static void 4387 tl_data(mblk_t *mp, tl_endpt_t *tep) 4388 { 4389 queue_t *wq = tep->te_wq; 4390 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4391 ssize_t msz = MBLKL(mp); 4392 tl_endpt_t *peer_tep; 4393 queue_t *peer_rq; 4394 boolean_t closing = tep->te_closing; 4395 4396 if (IS_CLTS(tep)) { 4397 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4398 SL_TRACE|SL_ERROR, 4399 "tl_wput:clts:unattached M_DATA")); 4400 if (!closing) { 4401 tl_merror(wq, mp, EPROTO); 4402 } else { 4403 freemsg(mp); 4404 } 4405 return; 4406 } 4407 4408 /* 4409 * If the endpoint is closing it should still forward any data to the 4410 * peer (if it has one). If it is not allowed to forward it can just 4411 * free the message. 4412 */ 4413 if (closing && 4414 (tep->te_state != TS_DATA_XFER) && 4415 (tep->te_state != TS_WREQ_ORDREL)) { 4416 freemsg(mp); 4417 return; 4418 } 4419 4420 if (DB_TYPE(mp) == M_PROTO) { 4421 if (prim->type == T_DATA_REQ && 4422 msz < sizeof (struct T_data_req)) { 4423 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4424 SL_TRACE|SL_ERROR, 4425 "tl_data:T_DATA_REQ:invalid message")); 4426 if (!closing) { 4427 tl_merror(wq, mp, EPROTO); 4428 } else { 4429 freemsg(mp); 4430 } 4431 return; 4432 } else if (prim->type == T_OPTDATA_REQ && 4433 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4434 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4435 SL_TRACE|SL_ERROR, 4436 "tl_data:T_OPTDATA_REQ:invalid message")); 4437 if (!closing) { 4438 tl_merror(wq, mp, EPROTO); 4439 } else { 4440 freemsg(mp); 4441 } 4442 return; 4443 } 4444 } 4445 4446 /* 4447 * connection oriented provider 4448 */ 4449 switch (tep->te_state) { 4450 case TS_IDLE: 4451 /* 4452 * Other end not here - do nothing. 4453 */ 4454 freemsg(mp); 4455 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4456 "tl_data:cots with endpoint idle")); 4457 return; 4458 4459 case TS_DATA_XFER: 4460 /* valid states */ 4461 if (tep->te_conp != NULL) 4462 break; 4463 4464 if (tep->te_oconp == NULL) { 4465 if (!closing) { 4466 tl_merror(wq, mp, EPROTO); 4467 } else { 4468 freemsg(mp); 4469 } 4470 return; 4471 } 4472 /* 4473 * For a socket the T_CONN_CON is sent early thus 4474 * the peer might not yet have accepted the connection. 4475 * If we are closing queue the packet with the T_CONN_IND. 4476 * Otherwise defer processing the packet until the peer 4477 * accepts the connection. 4478 * Note that the queue is noenabled when we go into this 4479 * state. 4480 */ 4481 if (!closing) { 4482 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4483 SL_TRACE|SL_ERROR, 4484 "tl_data: ocon")); 4485 TL_PUTBQ(tep, mp); 4486 return; 4487 } 4488 if (DB_TYPE(mp) == M_PROTO) { 4489 if (msz < sizeof (t_scalar_t)) { 4490 freemsg(mp); 4491 return; 4492 } 4493 /* reuse message block - just change REQ to IND */ 4494 if (prim->type == T_DATA_REQ) 4495 prim->type = T_DATA_IND; 4496 else 4497 prim->type = T_OPTDATA_IND; 4498 } 4499 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4500 return; 4501 4502 case TS_WREQ_ORDREL: 4503 if (tep->te_conp == NULL) { 4504 /* 4505 * Other end closed - generate discon_ind 4506 * with reason 0 to cause an EPIPE but no 4507 * read side error on AF_UNIX sockets. 4508 */ 4509 freemsg(mp); 4510 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4511 SL_TRACE|SL_ERROR, 4512 "tl_data: WREQ_ORDREL and no peer")); 4513 tl_discon_ind(tep, 0); 4514 return; 4515 } 4516 break; 4517 4518 default: 4519 /* invalid state for event TE_DATA_REQ */ 4520 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4521 "tl_data:cots:out of state")); 4522 tl_merror(wq, mp, EPROTO); 4523 return; 4524 } 4525 /* 4526 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4527 * (State stays same on this event) 4528 */ 4529 4530 /* 4531 * get connected endpoint 4532 */ 4533 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4534 freemsg(mp); 4535 /* Peer closed */ 4536 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4537 "tl_data: peer gone")); 4538 return; 4539 } 4540 4541 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4542 peer_rq = peer_tep->te_rq; 4543 4544 /* 4545 * Put it back if flow controlled 4546 * Note: Messages already on queue when we are closing is bounded 4547 * so we can ignore flow control. 4548 */ 4549 if (!canputnext(peer_rq) && !closing) { 4550 TL_PUTBQ(tep, mp); 4551 return; 4552 } 4553 4554 /* 4555 * validate peer state 4556 */ 4557 switch (peer_tep->te_state) { 4558 case TS_DATA_XFER: 4559 case TS_WIND_ORDREL: 4560 /* valid states */ 4561 break; 4562 default: 4563 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4564 "tl_data:rx side:invalid state")); 4565 tl_merror(peer_tep->te_wq, mp, EPROTO); 4566 return; 4567 } 4568 if (DB_TYPE(mp) == M_PROTO) { 4569 /* reuse message block - just change REQ to IND */ 4570 if (prim->type == T_DATA_REQ) 4571 prim->type = T_DATA_IND; 4572 else 4573 prim->type = T_OPTDATA_IND; 4574 } 4575 /* 4576 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4577 * (peer state stays same on this event) 4578 */ 4579 /* 4580 * send data to connected peer 4581 */ 4582 putnext(peer_rq, mp); 4583 } 4584 4585 4586 4587 static void 4588 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4589 { 4590 queue_t *wq = tep->te_wq; 4591 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4592 ssize_t msz = MBLKL(mp); 4593 tl_endpt_t *peer_tep; 4594 queue_t *peer_rq; 4595 boolean_t closing = tep->te_closing; 4596 4597 if (msz < sizeof (struct T_exdata_req)) { 4598 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4599 "tl_exdata:invalid message")); 4600 if (!closing) { 4601 tl_merror(wq, mp, EPROTO); 4602 } else { 4603 freemsg(mp); 4604 } 4605 return; 4606 } 4607 4608 /* 4609 * If the endpoint is closing it should still forward any data to the 4610 * peer (if it has one). If it is not allowed to forward it can just 4611 * free the message. 4612 */ 4613 if (closing && 4614 (tep->te_state != TS_DATA_XFER) && 4615 (tep->te_state != TS_WREQ_ORDREL)) { 4616 freemsg(mp); 4617 return; 4618 } 4619 4620 /* 4621 * validate state 4622 */ 4623 switch (tep->te_state) { 4624 case TS_IDLE: 4625 /* 4626 * Other end not here - do nothing. 4627 */ 4628 freemsg(mp); 4629 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4630 "tl_exdata:cots with endpoint idle")); 4631 return; 4632 4633 case TS_DATA_XFER: 4634 /* valid states */ 4635 if (tep->te_conp != NULL) 4636 break; 4637 4638 if (tep->te_oconp == NULL) { 4639 if (!closing) { 4640 tl_merror(wq, mp, EPROTO); 4641 } else { 4642 freemsg(mp); 4643 } 4644 return; 4645 } 4646 /* 4647 * For a socket the T_CONN_CON is sent early thus 4648 * the peer might not yet have accepted the connection. 4649 * If we are closing queue the packet with the T_CONN_IND. 4650 * Otherwise defer processing the packet until the peer 4651 * accepts the connection. 4652 * Note that the queue is noenabled when we go into this 4653 * state. 4654 */ 4655 if (!closing) { 4656 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4657 SL_TRACE|SL_ERROR, 4658 "tl_exdata: ocon")); 4659 TL_PUTBQ(tep, mp); 4660 return; 4661 } 4662 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4663 "tl_exdata: closing socket ocon")); 4664 prim->type = T_EXDATA_IND; 4665 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4666 return; 4667 4668 case TS_WREQ_ORDREL: 4669 if (tep->te_conp == NULL) { 4670 /* 4671 * Other end closed - generate discon_ind 4672 * with reason 0 to cause an EPIPE but no 4673 * read side error on AF_UNIX sockets. 4674 */ 4675 freemsg(mp); 4676 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4677 SL_TRACE|SL_ERROR, 4678 "tl_exdata: WREQ_ORDREL and no peer")); 4679 tl_discon_ind(tep, 0); 4680 return; 4681 } 4682 break; 4683 4684 default: 4685 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4686 SL_TRACE|SL_ERROR, 4687 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4688 tep->te_state)); 4689 tl_merror(wq, mp, EPROTO); 4690 return; 4691 } 4692 /* 4693 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4694 * (state stays same on this event) 4695 */ 4696 4697 /* 4698 * get connected endpoint 4699 */ 4700 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4701 freemsg(mp); 4702 /* Peer closed */ 4703 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4704 "tl_exdata: peer gone")); 4705 return; 4706 } 4707 4708 peer_rq = peer_tep->te_rq; 4709 4710 /* 4711 * Put it back if flow controlled 4712 * Note: Messages already on queue when we are closing is bounded 4713 * so we can ignore flow control. 4714 */ 4715 if (!canputnext(peer_rq) && !closing) { 4716 TL_PUTBQ(tep, mp); 4717 return; 4718 } 4719 4720 /* 4721 * validate state on peer 4722 */ 4723 switch (peer_tep->te_state) { 4724 case TS_DATA_XFER: 4725 case TS_WIND_ORDREL: 4726 /* valid states */ 4727 break; 4728 default: 4729 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4730 "tl_exdata:rx side:invalid state")); 4731 tl_merror(peer_tep->te_wq, mp, EPROTO); 4732 return; 4733 } 4734 /* 4735 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4736 * (peer state stays same on this event) 4737 */ 4738 /* 4739 * reuse message block 4740 */ 4741 prim->type = T_EXDATA_IND; 4742 4743 /* 4744 * send data to connected peer 4745 */ 4746 putnext(peer_rq, mp); 4747 } 4748 4749 4750 4751 static void 4752 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4753 { 4754 queue_t *wq = tep->te_wq; 4755 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4756 ssize_t msz = MBLKL(mp); 4757 tl_endpt_t *peer_tep; 4758 queue_t *peer_rq; 4759 boolean_t closing = tep->te_closing; 4760 4761 if (msz < sizeof (struct T_ordrel_req)) { 4762 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4763 "tl_ordrel:invalid message")); 4764 if (!closing) { 4765 tl_merror(wq, mp, EPROTO); 4766 } else { 4767 freemsg(mp); 4768 } 4769 return; 4770 } 4771 4772 /* 4773 * validate state 4774 */ 4775 switch (tep->te_state) { 4776 case TS_DATA_XFER: 4777 case TS_WREQ_ORDREL: 4778 /* valid states */ 4779 if (tep->te_conp != NULL) 4780 break; 4781 4782 if (tep->te_oconp == NULL) 4783 break; 4784 4785 /* 4786 * For a socket the T_CONN_CON is sent early thus 4787 * the peer might not yet have accepted the connection. 4788 * If we are closing queue the packet with the T_CONN_IND. 4789 * Otherwise defer processing the packet until the peer 4790 * accepts the connection. 4791 * Note that the queue is noenabled when we go into this 4792 * state. 4793 */ 4794 if (!closing) { 4795 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4796 SL_TRACE|SL_ERROR, 4797 "tl_ordlrel: ocon")); 4798 TL_PUTBQ(tep, mp); 4799 return; 4800 } 4801 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4802 "tl_ordlrel: closing socket ocon")); 4803 prim->type = T_ORDREL_IND; 4804 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4805 return; 4806 4807 default: 4808 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4809 SL_TRACE|SL_ERROR, 4810 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4811 tep->te_state)); 4812 if (!closing) { 4813 tl_merror(wq, mp, EPROTO); 4814 } else { 4815 freemsg(mp); 4816 } 4817 return; 4818 } 4819 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4820 4821 /* 4822 * get connected endpoint 4823 */ 4824 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4825 /* Peer closed */ 4826 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4827 "tl_ordrel: peer gone")); 4828 freemsg(mp); 4829 return; 4830 } 4831 4832 peer_rq = peer_tep->te_rq; 4833 4834 /* 4835 * Put it back if flow controlled except when we are closing. 4836 * Note: Messages already on queue when we are closing is bounded 4837 * so we can ignore flow control. 4838 */ 4839 if (! canputnext(peer_rq) && !closing) { 4840 TL_PUTBQ(tep, mp); 4841 return; 4842 } 4843 4844 /* 4845 * validate state on peer 4846 */ 4847 switch (peer_tep->te_state) { 4848 case TS_DATA_XFER: 4849 case TS_WIND_ORDREL: 4850 /* valid states */ 4851 break; 4852 default: 4853 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4854 "tl_ordrel:rx side:invalid state")); 4855 tl_merror(peer_tep->te_wq, mp, EPROTO); 4856 return; 4857 } 4858 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4859 4860 /* 4861 * reuse message block 4862 */ 4863 prim->type = T_ORDREL_IND; 4864 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4865 "tl_ordrel: send ordrel_ind")); 4866 4867 /* 4868 * send data to connected peer 4869 */ 4870 putnext(peer_rq, mp); 4871 } 4872 4873 4874 /* 4875 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4876 */ 4877 static void 4878 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4879 { 4880 size_t err_sz; 4881 tl_endpt_t *tep; 4882 struct T_unitdata_req *udreq; 4883 mblk_t *err_mp; 4884 t_scalar_t alen; 4885 t_scalar_t olen; 4886 struct T_uderror_ind *uderr; 4887 uchar_t *addr_startp; 4888 4889 err_sz = sizeof (struct T_uderror_ind); 4890 tep = (tl_endpt_t *)wq->q_ptr; 4891 udreq = (struct T_unitdata_req *)mp->b_rptr; 4892 alen = udreq->DEST_length; 4893 olen = udreq->OPT_length; 4894 4895 if (alen > 0) 4896 err_sz = T_ALIGN(err_sz + alen); 4897 if (olen > 0) 4898 err_sz += olen; 4899 4900 err_mp = allocb(err_sz, BPRI_MED); 4901 if (! err_mp) { 4902 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4903 "tl_uderr:allocb failure")); 4904 /* 4905 * Note: no rollback of state needed as it does 4906 * not change in connectionless transport 4907 */ 4908 tl_memrecover(wq, mp, err_sz); 4909 return; 4910 } 4911 4912 DB_TYPE(err_mp) = M_PROTO; 4913 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4914 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4915 uderr->PRIM_type = T_UDERROR_IND; 4916 uderr->ERROR_type = err; 4917 uderr->DEST_length = alen; 4918 uderr->OPT_length = olen; 4919 if (alen <= 0) { 4920 uderr->DEST_offset = 0; 4921 } else { 4922 uderr->DEST_offset = 4923 (t_scalar_t)sizeof (struct T_uderror_ind); 4924 addr_startp = mp->b_rptr + udreq->DEST_offset; 4925 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4926 (size_t)alen); 4927 } 4928 if (olen <= 0) { 4929 uderr->OPT_offset = 0; 4930 } else { 4931 uderr->OPT_offset = 4932 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4933 uderr->DEST_length); 4934 addr_startp = mp->b_rptr + udreq->OPT_offset; 4935 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4936 (size_t)olen); 4937 } 4938 freemsg(mp); 4939 4940 /* 4941 * send indication message 4942 */ 4943 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4944 4945 qreply(wq, err_mp); 4946 } 4947 4948 static void 4949 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4950 { 4951 queue_t *wq = tep->te_wq; 4952 4953 if (!tep->te_closing && (wq->q_first != NULL)) { 4954 TL_PUTQ(tep, mp); 4955 } else if (tep->te_rq != NULL) 4956 tl_unitdata(mp, tep); 4957 else 4958 freemsg(mp); 4959 4960 tl_serializer_exit(tep); 4961 tl_refrele(tep); 4962 } 4963 4964 /* 4965 * Handle T_unitdata_req. 4966 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4967 * If this is a socket pass through options unmodified. 4968 */ 4969 static void 4970 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4971 { 4972 queue_t *wq = tep->te_wq; 4973 soux_addr_t ux_addr; 4974 tl_addr_t destaddr; 4975 uchar_t *addr_startp; 4976 tl_endpt_t *peer_tep; 4977 struct T_unitdata_ind *udind; 4978 struct T_unitdata_req *udreq; 4979 ssize_t msz, ui_sz; 4980 t_scalar_t alen, aoff, olen, ooff; 4981 t_scalar_t oldolen = 0; 4982 cred_t *cr = NULL; 4983 pid_t cpid; 4984 4985 udreq = (struct T_unitdata_req *)mp->b_rptr; 4986 msz = MBLKL(mp); 4987 4988 /* 4989 * validate the state 4990 */ 4991 if (tep->te_state != TS_IDLE) { 4992 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4993 SL_TRACE|SL_ERROR, 4994 "tl_wput:T_CONN_REQ:out of state")); 4995 tl_merror(wq, mp, EPROTO); 4996 return; 4997 } 4998 /* 4999 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5000 * (state does not change on this event) 5001 */ 5002 5003 /* 5004 * validate the message 5005 * Note: dereference fields in struct inside message only 5006 * after validating the message length. 5007 */ 5008 if (msz < sizeof (struct T_unitdata_req)) { 5009 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5010 "tl_unitdata:invalid message length")); 5011 tl_merror(wq, mp, EINVAL); 5012 return; 5013 } 5014 alen = udreq->DEST_length; 5015 aoff = udreq->DEST_offset; 5016 oldolen = olen = udreq->OPT_length; 5017 ooff = udreq->OPT_offset; 5018 if (olen == 0) 5019 ooff = 0; 5020 5021 if (IS_SOCKET(tep)) { 5022 if ((alen != TL_SOUX_ADDRLEN) || 5023 (aoff < 0) || 5024 (aoff + alen > msz) || 5025 (olen < 0) || (ooff < 0) || 5026 ((olen > 0) && ((ooff + olen) > msz))) { 5027 (void) (STRLOG(TL_ID, tep->te_minor, 5028 1, SL_TRACE|SL_ERROR, 5029 "tl_unitdata_req: invalid socket addr " 5030 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5031 (int)msz, alen, aoff, olen, ooff)); 5032 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5033 return; 5034 } 5035 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5036 5037 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5038 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5039 (void) (STRLOG(TL_ID, tep->te_minor, 5040 1, SL_TRACE|SL_ERROR, 5041 "tl_conn_req: invalid socket magic")); 5042 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5043 return; 5044 } 5045 } else { 5046 if ((alen < 0) || 5047 (aoff < 0) || 5048 ((alen > 0) && ((aoff + alen) > msz)) || 5049 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5050 ((aoff + alen) < 0) || 5051 ((olen > 0) && ((ooff + olen) > msz)) || 5052 (olen < 0) || 5053 (ooff < 0) || 5054 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5055 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5056 SL_TRACE|SL_ERROR, 5057 "tl_unitdata:invalid unit data message")); 5058 tl_merror(wq, mp, EINVAL); 5059 return; 5060 } 5061 } 5062 5063 /* Options not supported unless it's a socket */ 5064 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5065 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5066 "tl_unitdata:option use(unsupported) or zero len addr")); 5067 tl_uderr(wq, mp, EPROTO); 5068 return; 5069 } 5070 #ifdef DEBUG 5071 /* 5072 * Mild form of ASSERT()ion to detect broken TPI apps. 5073 * if (! assertion) 5074 * log warning; 5075 */ 5076 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5077 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5078 "tl_unitdata:addr overlaps TPI message")); 5079 } 5080 #endif 5081 /* 5082 * get destination endpoint 5083 */ 5084 destaddr.ta_alen = alen; 5085 destaddr.ta_abuf = mp->b_rptr + aoff; 5086 destaddr.ta_zoneid = tep->te_zoneid; 5087 5088 /* 5089 * Check whether the destination is the same that was used previously 5090 * and the destination endpoint is in the right state. If something is 5091 * wrong, find destination again and cache it. 5092 */ 5093 peer_tep = tep->te_lastep; 5094 5095 if ((peer_tep == NULL) || peer_tep->te_closing || 5096 (peer_tep->te_state != TS_IDLE) || 5097 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5098 /* 5099 * Not the same as cached destination , need to find the right 5100 * destination. 5101 */ 5102 peer_tep = (IS_SOCKET(tep) ? 5103 tl_sock_find_peer(tep, &ux_addr) : 5104 tl_find_peer(tep, &destaddr)); 5105 5106 if (peer_tep == NULL) { 5107 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5108 SL_TRACE|SL_ERROR, 5109 "tl_unitdata:no one at destination address")); 5110 tl_uderr(wq, mp, ECONNRESET); 5111 return; 5112 } 5113 5114 /* 5115 * Cache the new peer. 5116 */ 5117 if (tep->te_lastep != NULL) 5118 tl_refrele(tep->te_lastep); 5119 5120 tep->te_lastep = peer_tep; 5121 } 5122 5123 if (peer_tep->te_state != TS_IDLE) { 5124 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5125 "tl_unitdata:provider in invalid state")); 5126 tl_uderr(wq, mp, EPROTO); 5127 return; 5128 } 5129 5130 ASSERT(peer_tep->te_rq != NULL); 5131 5132 /* 5133 * Put it back if flow controlled except when we are closing. 5134 * Note: Messages already on queue when we are closing is bounded 5135 * so we can ignore flow control. 5136 */ 5137 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5138 /* record what we are flow controlled on */ 5139 if (tep->te_flowq != NULL) { 5140 list_remove(&tep->te_flowq->te_flowlist, tep); 5141 } 5142 list_insert_head(&peer_tep->te_flowlist, tep); 5143 tep->te_flowq = peer_tep; 5144 TL_PUTBQ(tep, mp); 5145 return; 5146 } 5147 /* 5148 * prepare indication message 5149 */ 5150 5151 /* 5152 * calculate length of message 5153 */ 5154 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5155 cr = msg_getcred(mp, &cpid); 5156 ASSERT(cr != NULL); 5157 5158 if (peer_tep->te_flag & TL_SETCRED) { 5159 ASSERT(olen == 0); 5160 olen = (t_scalar_t)sizeof (struct opthdr) + 5161 OPTLEN(sizeof (tl_credopt_t)); 5162 /* 1 option only */ 5163 } else if (peer_tep->te_flag & TL_SETUCRED) { 5164 ASSERT(olen == 0); 5165 olen = (t_scalar_t)sizeof (struct opthdr) + 5166 OPTLEN(ucredminsize(cr)); 5167 /* 1 option only */ 5168 } else { 5169 /* Possibly more than one option */ 5170 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5171 OPTLEN(ucredminsize(cr)); 5172 } 5173 } 5174 5175 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5176 olen; 5177 /* 5178 * If the unitdata_ind fits and we are not adding options 5179 * reuse the udreq mblk. 5180 */ 5181 if (msz >= ui_sz && alen >= tep->te_alen && 5182 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5183 /* 5184 * Reuse the original mblk. Leave options in place. 5185 */ 5186 udind = (struct T_unitdata_ind *)mp->b_rptr; 5187 udind->PRIM_type = T_UNITDATA_IND; 5188 udind->SRC_length = tep->te_alen; 5189 addr_startp = mp->b_rptr + udind->SRC_offset; 5190 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5191 } else { 5192 /* Allocate a new T_unidata_ind message */ 5193 mblk_t *ui_mp; 5194 5195 ui_mp = allocb(ui_sz, BPRI_MED); 5196 if (! ui_mp) { 5197 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5198 "tl_unitdata:allocb failure:message queued")); 5199 tl_memrecover(wq, mp, ui_sz); 5200 return; 5201 } 5202 5203 /* 5204 * fill in T_UNITDATA_IND contents 5205 */ 5206 DB_TYPE(ui_mp) = M_PROTO; 5207 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5208 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5209 udind->PRIM_type = T_UNITDATA_IND; 5210 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5211 udind->SRC_length = tep->te_alen; 5212 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5213 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5214 udind->OPT_offset = 5215 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5216 udind->OPT_length = olen; 5217 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5218 5219 if (oldolen != 0) { 5220 bcopy((void *)((uintptr_t)udreq + ooff), 5221 (void *)((uintptr_t)udind + 5222 udind->OPT_offset), 5223 oldolen); 5224 } 5225 ASSERT(cr != NULL); 5226 5227 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5228 oldolen, cr, cpid, 5229 peer_tep->te_flag, peer_tep->te_credp); 5230 } else { 5231 bcopy((void *)((uintptr_t)udreq + ooff), 5232 (void *)((uintptr_t)udind + udind->OPT_offset), 5233 olen); 5234 } 5235 5236 /* 5237 * relink data blocks from mp to ui_mp 5238 */ 5239 ui_mp->b_cont = mp->b_cont; 5240 freeb(mp); 5241 mp = ui_mp; 5242 } 5243 /* 5244 * send indication message 5245 */ 5246 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5247 putnext(peer_tep->te_rq, mp); 5248 } 5249 5250 5251 5252 /* 5253 * Check if a given addr is in use. 5254 * Endpoint ptr returned or NULL if not found. 5255 * The name space is separate for each mode. This implies that 5256 * sockets get their own name space. 5257 */ 5258 static tl_endpt_t * 5259 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5260 { 5261 tl_endpt_t *peer_tep = NULL; 5262 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5263 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5264 5265 ASSERT(! IS_SOCKET(tep)); 5266 5267 ASSERT(ap != NULL && ap->ta_alen > 0); 5268 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5269 ASSERT(ap->ta_abuf != NULL); 5270 EQUIV(rc == 0, peer_tep != NULL); 5271 IMPLY(rc == 0, 5272 (tep->te_zoneid == peer_tep->te_zoneid) && 5273 (tep->te_transport == peer_tep->te_transport)); 5274 5275 if ((rc == 0) && (peer_tep->te_closing)) { 5276 tl_refrele(peer_tep); 5277 peer_tep = NULL; 5278 } 5279 5280 return (peer_tep); 5281 } 5282 5283 /* 5284 * Find peer for a socket based on unix domain address. 5285 * For implicit addresses our peer can be found by minor number in ai hash. For 5286 * explicit binds we look vnode address at addr_hash. 5287 */ 5288 static tl_endpt_t * 5289 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5290 { 5291 tl_endpt_t *peer_tep = NULL; 5292 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5293 tep->te_aihash : tep->te_addrhash; 5294 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5295 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5296 5297 ASSERT(IS_SOCKET(tep)); 5298 EQUIV(rc == 0, peer_tep != NULL); 5299 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)); 5300 5301 if (peer_tep != NULL) { 5302 /* Don't attempt to use closing peer. */ 5303 if (peer_tep->te_closing) 5304 goto errout; 5305 5306 /* 5307 * Cross-zone unix sockets are permitted, but for Trusted 5308 * Extensions only, the "server" for these must be in the 5309 * global zone. 5310 */ 5311 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5312 is_system_labeled() && 5313 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5314 goto errout; 5315 } 5316 5317 return (peer_tep); 5318 5319 errout: 5320 tl_refrele(peer_tep); 5321 return (NULL); 5322 } 5323 5324 /* 5325 * Generate a free addr and return it in struct pointed by ap 5326 * but allocating space for address buffer. 5327 * The generated address will be at least 4 bytes long and, if req->ta_alen 5328 * exceeds 4 bytes, be req->ta_alen bytes long. 5329 * 5330 * If address is found it will be inserted in the hash. 5331 * 5332 * If req->ta_alen is larger than the default alen (4 bytes) the last 5333 * alen-4 bytes will always be the same as in req. 5334 * 5335 * Return 0 for failure. 5336 * Return non-zero for success. 5337 */ 5338 static boolean_t 5339 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5340 { 5341 t_scalar_t alen; 5342 uint32_t loopcnt; /* Limit loop to 2^32 */ 5343 5344 ASSERT(tep->te_hash_hndl != NULL); 5345 ASSERT(! IS_SOCKET(tep)); 5346 5347 if (tep->te_hash_hndl == NULL) 5348 return (B_FALSE); 5349 5350 /* 5351 * check if default addr is in use 5352 * if it is - bump it and try again 5353 */ 5354 if (req == NULL) { 5355 alen = sizeof (uint32_t); 5356 } else { 5357 alen = max(req->ta_alen, sizeof (uint32_t)); 5358 ASSERT(tep->te_zoneid == req->ta_zoneid); 5359 } 5360 5361 if (tep->te_alen < alen) { 5362 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5363 5364 /* 5365 * Not enough space in tep->ta_ap to hold the address, 5366 * allocate a bigger space. 5367 */ 5368 if (abuf == NULL) 5369 return (B_FALSE); 5370 5371 if (tep->te_alen > 0) 5372 kmem_free(tep->te_abuf, tep->te_alen); 5373 5374 tep->te_alen = alen; 5375 tep->te_abuf = abuf; 5376 } 5377 5378 /* Copy in the address in req */ 5379 if (req != NULL) { 5380 ASSERT(alen >= req->ta_alen); 5381 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5382 } 5383 5384 /* 5385 * First try minor number then try default addresses. 5386 */ 5387 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5388 5389 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5390 if (mod_hash_insert_reserve(tep->te_addrhash, 5391 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5392 tep->te_hash_hndl) == 0) { 5393 /* 5394 * found free address 5395 */ 5396 tep->te_flag |= TL_ADDRHASHED; 5397 tep->te_hash_hndl = NULL; 5398 5399 return (B_TRUE); /* successful return */ 5400 } 5401 /* 5402 * Use default address. 5403 */ 5404 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5405 atomic_add_32(&tep->te_defaddr, 1); 5406 } 5407 5408 /* 5409 * Failed to find anything. 5410 */ 5411 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5412 "tl_get_any_addr:looped 2^32 times")); 5413 return (B_FALSE); 5414 } 5415 5416 /* 5417 * reallocb + set r/w ptrs to reflect size. 5418 */ 5419 static mblk_t * 5420 tl_resizemp(mblk_t *mp, ssize_t new_size) 5421 { 5422 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5423 return (NULL); 5424 5425 mp->b_rptr = DB_BASE(mp); 5426 mp->b_wptr = mp->b_rptr + new_size; 5427 return (mp); 5428 } 5429 5430 static void 5431 tl_cl_backenable(tl_endpt_t *tep) 5432 { 5433 list_t *l = &tep->te_flowlist; 5434 tl_endpt_t *elp; 5435 5436 ASSERT(IS_CLTS(tep)); 5437 5438 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5439 ASSERT(tep->te_ser == elp->te_ser); 5440 ASSERT(elp->te_flowq == tep); 5441 if (! elp->te_closing) 5442 TL_QENABLE(elp); 5443 elp->te_flowq = NULL; 5444 list_remove(l, elp); 5445 } 5446 } 5447 5448 /* 5449 * Unconnect endpoints. 5450 */ 5451 static void 5452 tl_co_unconnect(tl_endpt_t *tep) 5453 { 5454 tl_endpt_t *peer_tep = tep->te_conp; 5455 tl_endpt_t *srv_tep = tep->te_oconp; 5456 list_t *l; 5457 tl_icon_t *tip; 5458 tl_endpt_t *cl_tep; 5459 mblk_t *d_mp; 5460 5461 ASSERT(IS_COTS(tep)); 5462 /* 5463 * If our peer is closing, don't use it. 5464 */ 5465 if ((peer_tep != NULL) && peer_tep->te_closing) { 5466 TL_UNCONNECT(tep->te_conp); 5467 peer_tep = NULL; 5468 } 5469 if ((srv_tep != NULL) && srv_tep->te_closing) { 5470 TL_UNCONNECT(tep->te_oconp); 5471 srv_tep = NULL; 5472 } 5473 5474 if (tep->te_nicon > 0) { 5475 l = &tep->te_iconp; 5476 /* 5477 * If incoming requests pending, change state 5478 * of clients on disconnect ind event and send 5479 * discon_ind pdu to modules above them 5480 * for server: all clients get disconnect 5481 */ 5482 5483 while (tep->te_nicon > 0) { 5484 tip = list_head(l); 5485 cl_tep = tip->ti_tep; 5486 5487 if (cl_tep == NULL) { 5488 tl_freetip(tep, tip); 5489 continue; 5490 } 5491 5492 if (cl_tep->te_oconp != NULL) { 5493 ASSERT(cl_tep != cl_tep->te_oconp); 5494 TL_UNCONNECT(cl_tep->te_oconp); 5495 } 5496 5497 if (cl_tep->te_closing) { 5498 tl_freetip(tep, tip); 5499 continue; 5500 } 5501 5502 enableok(cl_tep->te_wq); 5503 TL_QENABLE(cl_tep); 5504 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5505 if (d_mp != NULL) { 5506 cl_tep->te_state = TS_IDLE; 5507 putnext(cl_tep->te_rq, d_mp); 5508 } else { 5509 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5510 SL_TRACE|SL_ERROR, 5511 "tl_co_unconnect:icmng: " 5512 "allocb failure")); 5513 } 5514 tl_freetip(tep, tip); 5515 } 5516 } else if (srv_tep != NULL) { 5517 /* 5518 * If outgoing request pending, change state 5519 * of server on discon ind event 5520 */ 5521 5522 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5523 IS_COTSORD(srv_tep) && 5524 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5525 /* 5526 * Queue ordrel_ind for server to be picked up 5527 * when the connection is accepted. 5528 */ 5529 d_mp = tl_ordrel_ind_alloc(); 5530 } else { 5531 /* 5532 * send discon_ind to server 5533 */ 5534 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5535 } 5536 if (d_mp == NULL) { 5537 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5538 SL_TRACE|SL_ERROR, 5539 "tl_co_unconnect:outgoing:allocb failure")); 5540 TL_UNCONNECT(tep->te_oconp); 5541 goto discon_peer; 5542 } 5543 5544 /* 5545 * If this is a socket the T_DISCON_IND is queued with 5546 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5547 * from the list of pending connections. 5548 * Note that when te_oconp is set the peer better have 5549 * a t_connind_t for the client. 5550 */ 5551 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5552 /* 5553 * Queue the disconnection message. 5554 */ 5555 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5556 } else { 5557 tip = tl_icon_find(srv_tep, tep->te_seqno); 5558 if (tip == NULL) { 5559 freemsg(d_mp); 5560 } else { 5561 ASSERT(tep == tip->ti_tep); 5562 ASSERT(tep->te_ser == srv_tep->te_ser); 5563 /* 5564 * Delete tip from the server list. 5565 */ 5566 if (srv_tep->te_nicon == 1) { 5567 srv_tep->te_state = 5568 NEXTSTATE(TE_DISCON_IND2, 5569 srv_tep->te_state); 5570 } else { 5571 srv_tep->te_state = 5572 NEXTSTATE(TE_DISCON_IND3, 5573 srv_tep->te_state); 5574 } 5575 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5576 T_DISCON_IND); 5577 putnext(srv_tep->te_rq, d_mp); 5578 tl_freetip(srv_tep, tip); 5579 } 5580 TL_UNCONNECT(tep->te_oconp); 5581 srv_tep = NULL; 5582 } 5583 } else if (peer_tep != NULL) { 5584 /* 5585 * unconnect existing connection 5586 * If connected, change state of peer on 5587 * discon ind event and send discon ind pdu 5588 * to module above it 5589 */ 5590 5591 ASSERT(tep->te_ser == peer_tep->te_ser); 5592 if (IS_COTSORD(peer_tep) && 5593 (peer_tep->te_state == TS_WIND_ORDREL || 5594 peer_tep->te_state == TS_DATA_XFER)) { 5595 /* 5596 * send ordrel ind 5597 */ 5598 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5599 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5600 peer_tep->te_state, 5601 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5602 d_mp = tl_ordrel_ind_alloc(); 5603 if (! d_mp) { 5604 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5605 SL_TRACE|SL_ERROR, 5606 "tl_co_unconnect:connected:" 5607 "allocb failure")); 5608 /* 5609 * Continue with cleaning up peer as 5610 * this side may go away with the close 5611 */ 5612 TL_QENABLE(peer_tep); 5613 goto discon_peer; 5614 } 5615 peer_tep->te_state = 5616 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5617 5618 putnext(peer_tep->te_rq, d_mp); 5619 /* 5620 * Handle flow control case. This will generate 5621 * a t_discon_ind message with reason 0 if there 5622 * is data queued on the write side. 5623 */ 5624 TL_QENABLE(peer_tep); 5625 } else if (IS_COTSORD(peer_tep) && 5626 peer_tep->te_state == TS_WREQ_ORDREL) { 5627 /* 5628 * Sent an ordrel_ind. We send a discon with 5629 * with error 0 to inform that the peer is gone. 5630 */ 5631 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5632 SL_TRACE|SL_ERROR, 5633 "tl_co_unconnect: discon in state %d", 5634 tep->te_state)); 5635 tl_discon_ind(peer_tep, 0); 5636 } else { 5637 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5638 SL_TRACE|SL_ERROR, 5639 "tl_co_unconnect: state %d", tep->te_state)); 5640 tl_discon_ind(peer_tep, ECONNRESET); 5641 } 5642 5643 discon_peer: 5644 /* 5645 * Disconnect cross-pointers only for close 5646 */ 5647 if (tep->te_closing) { 5648 peer_tep = tep->te_conp; 5649 TL_REMOVE_PEER(peer_tep->te_conp); 5650 TL_REMOVE_PEER(tep->te_conp); 5651 } 5652 } 5653 } 5654 5655 /* 5656 * Note: The following routine does not recover from allocb() 5657 * failures 5658 * The reason should be from the <sys/errno.h> space. 5659 */ 5660 static void 5661 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5662 { 5663 mblk_t *d_mp; 5664 5665 if (tep->te_closing) 5666 return; 5667 5668 /* 5669 * flush the queues. 5670 */ 5671 flushq(tep->te_rq, FLUSHDATA); 5672 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5673 5674 /* 5675 * send discon ind 5676 */ 5677 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5678 if (! d_mp) { 5679 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5680 "tl_discon_ind:allocb failure")); 5681 return; 5682 } 5683 tep->te_state = TS_IDLE; 5684 putnext(tep->te_rq, d_mp); 5685 } 5686 5687 /* 5688 * Note: The following routine does not recover from allocb() 5689 * failures 5690 * The reason should be from the <sys/errno.h> space. 5691 */ 5692 static mblk_t * 5693 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5694 { 5695 mblk_t *mp; 5696 struct T_discon_ind *tdi; 5697 5698 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5699 DB_TYPE(mp) = M_PROTO; 5700 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5701 tdi = (struct T_discon_ind *)mp->b_rptr; 5702 tdi->PRIM_type = T_DISCON_IND; 5703 tdi->DISCON_reason = reason; 5704 tdi->SEQ_number = seqnum; 5705 } 5706 return (mp); 5707 } 5708 5709 5710 /* 5711 * Note: The following routine does not recover from allocb() 5712 * failures 5713 */ 5714 static mblk_t * 5715 tl_ordrel_ind_alloc(void) 5716 { 5717 mblk_t *mp; 5718 struct T_ordrel_ind *toi; 5719 5720 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5721 DB_TYPE(mp) = M_PROTO; 5722 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5723 toi = (struct T_ordrel_ind *)mp->b_rptr; 5724 toi->PRIM_type = T_ORDREL_IND; 5725 } 5726 return (mp); 5727 } 5728 5729 5730 /* 5731 * Lookup the seqno in the list of queued connections. 5732 */ 5733 static tl_icon_t * 5734 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5735 { 5736 list_t *l = &tep->te_iconp; 5737 tl_icon_t *tip = list_head(l); 5738 5739 ASSERT(seqno != 0); 5740 5741 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5742 ; 5743 5744 return (tip); 5745 } 5746 5747 /* 5748 * Queue data for a given T_CONN_IND while verifying that redundant 5749 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5750 * Used when the originator of the connection closes. 5751 */ 5752 static void 5753 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5754 { 5755 tl_icon_t *tip; 5756 mblk_t **mpp, *mp; 5757 int prim, nprim; 5758 5759 if (nmp->b_datap->db_type == M_PROTO) 5760 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5761 else 5762 nprim = -1; /* M_DATA */ 5763 5764 tip = tl_icon_find(tep, seqno); 5765 if (tip == NULL) { 5766 freemsg(nmp); 5767 return; 5768 } 5769 5770 ASSERT(tip->ti_seqno != 0); 5771 mpp = &tip->ti_mp; 5772 while (*mpp != NULL) { 5773 mp = *mpp; 5774 5775 if (mp->b_datap->db_type == M_PROTO) 5776 prim = ((union T_primitives *)mp->b_rptr)->type; 5777 else 5778 prim = -1; /* M_DATA */ 5779 5780 /* 5781 * Allow nothing after a T_DISCON_IND 5782 */ 5783 if (prim == T_DISCON_IND) { 5784 freemsg(nmp); 5785 return; 5786 } 5787 /* 5788 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5789 */ 5790 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5791 freemsg(nmp); 5792 return; 5793 } 5794 mpp = &(mp->b_next); 5795 } 5796 *mpp = nmp; 5797 } 5798 5799 /* 5800 * Verify if a certain TPI primitive exists on the connind queue. 5801 * Use prim -1 for M_DATA. 5802 * Return non-zero if found. 5803 */ 5804 static boolean_t 5805 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5806 { 5807 tl_icon_t *tip = tl_icon_find(tep, seqno); 5808 boolean_t found = B_FALSE; 5809 5810 if (tip != NULL) { 5811 mblk_t *mp; 5812 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5813 found = (DB_TYPE(mp) == M_PROTO && 5814 ((union T_primitives *)mp->b_rptr)->type == prim); 5815 } 5816 } 5817 return (found); 5818 } 5819 5820 /* 5821 * Send the b_next mblk chain that has accumulated before the connection 5822 * was accepted. Perform the necessary state transitions. 5823 */ 5824 static void 5825 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5826 { 5827 mblk_t *mp; 5828 union T_primitives *primp; 5829 5830 if (tep->te_closing) { 5831 tl_icon_freemsgs(mpp); 5832 return; 5833 } 5834 5835 ASSERT(tep->te_state == TS_DATA_XFER); 5836 ASSERT(tep->te_rq->q_first == NULL); 5837 5838 while ((mp = *mpp) != NULL) { 5839 *mpp = mp->b_next; 5840 mp->b_next = NULL; 5841 5842 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5843 switch (DB_TYPE(mp)) { 5844 default: 5845 freemsg(mp); 5846 break; 5847 case M_DATA: 5848 putnext(tep->te_rq, mp); 5849 break; 5850 case M_PROTO: 5851 primp = (union T_primitives *)mp->b_rptr; 5852 switch (primp->type) { 5853 case T_UNITDATA_IND: 5854 case T_DATA_IND: 5855 case T_OPTDATA_IND: 5856 case T_EXDATA_IND: 5857 putnext(tep->te_rq, mp); 5858 break; 5859 case T_ORDREL_IND: 5860 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5861 tep->te_state); 5862 putnext(tep->te_rq, mp); 5863 break; 5864 case T_DISCON_IND: 5865 tep->te_state = TS_IDLE; 5866 putnext(tep->te_rq, mp); 5867 break; 5868 default: 5869 #ifdef DEBUG 5870 cmn_err(CE_PANIC, 5871 "tl_icon_sendmsgs: unknown primitive"); 5872 #endif /* DEBUG */ 5873 freemsg(mp); 5874 break; 5875 } 5876 break; 5877 } 5878 } 5879 } 5880 5881 /* 5882 * Free the b_next mblk chain that has accumulated before the connection 5883 * was accepted. 5884 */ 5885 static void 5886 tl_icon_freemsgs(mblk_t **mpp) 5887 { 5888 mblk_t *mp; 5889 5890 while ((mp = *mpp) != NULL) { 5891 *mpp = mp->b_next; 5892 mp->b_next = NULL; 5893 freemsg(mp); 5894 } 5895 } 5896 5897 /* 5898 * Send M_ERROR 5899 * Note: assumes caller ensured enough space in mp or enough 5900 * memory available. Does not attempt recovery from allocb() 5901 * failures 5902 */ 5903 5904 static void 5905 tl_merror(queue_t *wq, mblk_t *mp, int error) 5906 { 5907 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5908 5909 if (tep->te_closing) { 5910 freemsg(mp); 5911 return; 5912 } 5913 5914 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5915 SL_TRACE|SL_ERROR, 5916 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5917 5918 /* 5919 * flush all messages on queue. we are shutting 5920 * the stream down on fatal error 5921 */ 5922 flushq(wq, FLUSHALL); 5923 if (IS_COTS(tep)) { 5924 /* connection oriented - unconnect endpoints */ 5925 tl_co_unconnect(tep); 5926 } 5927 if (mp->b_cont) { 5928 freemsg(mp->b_cont); 5929 mp->b_cont = NULL; 5930 } 5931 5932 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5933 freemsg(mp); 5934 mp = allocb(1, BPRI_HI); 5935 if (!mp) { 5936 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5937 SL_TRACE|SL_ERROR, 5938 "tl_merror:M_PROTO: out of memory")); 5939 return; 5940 } 5941 } 5942 if (mp) { 5943 DB_TYPE(mp) = M_ERROR; 5944 mp->b_rptr = DB_BASE(mp); 5945 *mp->b_rptr = (char)error; 5946 mp->b_wptr = mp->b_rptr + sizeof (char); 5947 qreply(wq, mp); 5948 } else { 5949 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5950 } 5951 } 5952 5953 static void 5954 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5955 { 5956 ASSERT(cr != NULL); 5957 5958 if (flag & TL_SETCRED) { 5959 struct opthdr *opt = (struct opthdr *)buf; 5960 tl_credopt_t *tlcred; 5961 5962 opt->level = TL_PROT_LEVEL; 5963 opt->name = TL_OPT_PEER_CRED; 5964 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5965 5966 tlcred = (tl_credopt_t *)(opt + 1); 5967 tlcred->tc_uid = crgetuid(cr); 5968 tlcred->tc_gid = crgetgid(cr); 5969 tlcred->tc_ruid = crgetruid(cr); 5970 tlcred->tc_rgid = crgetrgid(cr); 5971 tlcred->tc_suid = crgetsuid(cr); 5972 tlcred->tc_sgid = crgetsgid(cr); 5973 tlcred->tc_ngroups = crgetngroups(cr); 5974 } else if (flag & TL_SETUCRED) { 5975 struct opthdr *opt = (struct opthdr *)buf; 5976 5977 opt->level = TL_PROT_LEVEL; 5978 opt->name = TL_OPT_PEER_UCRED; 5979 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); 5980 5981 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 5982 } else { 5983 struct T_opthdr *topt = (struct T_opthdr *)buf; 5984 ASSERT(flag & TL_SOCKUCRED); 5985 5986 topt->level = SOL_SOCKET; 5987 topt->name = SCM_UCRED; 5988 topt->len = ucredminsize(cr) + sizeof (*topt); 5989 topt->status = 0; 5990 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 5991 } 5992 } 5993 5994 /* ARGSUSED */ 5995 static int 5996 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5997 { 5998 /* no default value processed in protocol specific code currently */ 5999 return (-1); 6000 } 6001 6002 /* ARGSUSED */ 6003 static int 6004 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6005 { 6006 int len; 6007 tl_endpt_t *tep; 6008 int *valp; 6009 6010 tep = (tl_endpt_t *)wq->q_ptr; 6011 6012 len = 0; 6013 6014 /* 6015 * Assumes: option level and name sanity check done elsewhere 6016 */ 6017 6018 switch (level) { 6019 case SOL_SOCKET: 6020 if (! IS_SOCKET(tep)) 6021 break; 6022 switch (name) { 6023 case SO_RECVUCRED: 6024 len = sizeof (int); 6025 valp = (int *)ptr; 6026 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6027 break; 6028 default: 6029 break; 6030 } 6031 break; 6032 case TL_PROT_LEVEL: 6033 switch (name) { 6034 case TL_OPT_PEER_CRED: 6035 case TL_OPT_PEER_UCRED: 6036 /* 6037 * option not supposed to retrieved directly 6038 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6039 * when some internal flags set by other options 6040 * Direct retrieval always designed to fail(ignored) 6041 * for this option. 6042 */ 6043 break; 6044 } 6045 } 6046 return (len); 6047 } 6048 6049 /* ARGSUSED */ 6050 static int 6051 tl_set_opt( 6052 queue_t *wq, 6053 uint_t mgmt_flags, 6054 int level, 6055 int name, 6056 uint_t inlen, 6057 uchar_t *invalp, 6058 uint_t *outlenp, 6059 uchar_t *outvalp, 6060 void *thisdg_attrs, 6061 cred_t *cr) 6062 { 6063 int error; 6064 tl_endpt_t *tep; 6065 6066 tep = (tl_endpt_t *)wq->q_ptr; 6067 6068 error = 0; /* NOERROR */ 6069 6070 /* 6071 * Assumes: option level and name sanity checks done elsewhere 6072 */ 6073 6074 switch (level) { 6075 case SOL_SOCKET: 6076 if (! IS_SOCKET(tep)) { 6077 error = EINVAL; 6078 break; 6079 } 6080 /* 6081 * TBD: fill in other AF_UNIX socket options and then stop 6082 * returning error. 6083 */ 6084 switch (name) { 6085 case SO_RECVUCRED: 6086 /* 6087 * We only support this for datagram sockets; 6088 * getpeerucred handles the connection oriented 6089 * transports. 6090 */ 6091 if (! IS_CLTS(tep)) { 6092 error = EINVAL; 6093 break; 6094 } 6095 if (*(int *)invalp == 0) 6096 tep->te_flag &= ~TL_SOCKUCRED; 6097 else 6098 tep->te_flag |= TL_SOCKUCRED; 6099 break; 6100 default: 6101 error = EINVAL; 6102 break; 6103 } 6104 break; 6105 case TL_PROT_LEVEL: 6106 switch (name) { 6107 case TL_OPT_PEER_CRED: 6108 case TL_OPT_PEER_UCRED: 6109 /* 6110 * option not supposed to be set directly 6111 * Its value in initialized for each endpoint at 6112 * driver open time. 6113 * Direct setting always designed to fail for this 6114 * option. 6115 */ 6116 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6117 SL_TRACE|SL_ERROR, 6118 "tl_set_opt: option is not supported")); 6119 error = EPROTO; 6120 break; 6121 } 6122 } 6123 return (error); 6124 } 6125 6126 6127 static void 6128 tl_timer(void *arg) 6129 { 6130 queue_t *wq = arg; 6131 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6132 6133 ASSERT(tep); 6134 6135 tep->te_timoutid = 0; 6136 6137 enableok(wq); 6138 /* 6139 * Note: can call wsrv directly here and save context switch 6140 * Consider change when qtimeout (not timeout) is active 6141 */ 6142 qenable(wq); 6143 } 6144 6145 static void 6146 tl_buffer(void *arg) 6147 { 6148 queue_t *wq = arg; 6149 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6150 6151 ASSERT(tep); 6152 6153 tep->te_bufcid = 0; 6154 tep->te_nowsrv = B_FALSE; 6155 6156 enableok(wq); 6157 /* 6158 * Note: can call wsrv directly here and save context switch 6159 * Consider change when qbufcall (not bufcall) is active 6160 */ 6161 qenable(wq); 6162 } 6163 6164 static void 6165 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6166 { 6167 tl_endpt_t *tep; 6168 6169 tep = (tl_endpt_t *)wq->q_ptr; 6170 6171 if (tep->te_closing) { 6172 freemsg(mp); 6173 return; 6174 } 6175 noenable(wq); 6176 6177 (void) insq(wq, wq->q_first, mp); 6178 6179 if (tep->te_bufcid || tep->te_timoutid) { 6180 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6181 "tl_memrecover:recover %p pending", (void *)wq)); 6182 return; 6183 } 6184 6185 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6186 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6187 drv_usectohz(TL_BUFWAIT)); 6188 } 6189 } 6190 6191 static void 6192 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6193 { 6194 ASSERT(tip->ti_seqno != 0); 6195 6196 if (tip->ti_mp != NULL) { 6197 tl_icon_freemsgs(&tip->ti_mp); 6198 tip->ti_mp = NULL; 6199 } 6200 if (tip->ti_tep != NULL) { 6201 tl_refrele(tip->ti_tep); 6202 tip->ti_tep = NULL; 6203 } 6204 list_remove(&tep->te_iconp, tip); 6205 kmem_free(tip, sizeof (tl_icon_t)); 6206 tep->te_nicon--; 6207 } 6208 6209 /* 6210 * Remove address from address hash. 6211 */ 6212 static void 6213 tl_addr_unbind(tl_endpt_t *tep) 6214 { 6215 tl_endpt_t *elp; 6216 6217 if (tep->te_flag & TL_ADDRHASHED) { 6218 if (IS_SOCKET(tep)) { 6219 (void) mod_hash_remove(tep->te_addrhash, 6220 (mod_hash_key_t)tep->te_vp, 6221 (mod_hash_val_t *)&elp); 6222 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6223 tep->te_magic = SOU_MAGIC_IMPLICIT; 6224 } else { 6225 (void) mod_hash_remove(tep->te_addrhash, 6226 (mod_hash_key_t)&tep->te_ap, 6227 (mod_hash_val_t *)&elp); 6228 (void) kmem_free(tep->te_abuf, tep->te_alen); 6229 tep->te_alen = -1; 6230 tep->te_abuf = NULL; 6231 } 6232 tep->te_flag &= ~TL_ADDRHASHED; 6233 } 6234 } 6235