1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Multithreaded STREAMS Local Transport Provider. 30 * 31 * OVERVIEW 32 * ======== 33 * 34 * This driver provides TLI as well as socket semantics. It provides 35 * connectionless, connection oriented, and connection oriented with orderly 36 * release transports for TLI and sockets. Each transport type has separate name 37 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 38 * this removes any name space conflicts when binding to socket style transport 39 * addresses. 40 * 41 * NOTE: There is one exception: Socket ticots and ticotsord transports share 42 * the same namespace. In fact, sockets always use ticotsord type transport. 43 * 44 * The driver mode is specified during open() by the minor number used for 45 * open. 46 * 47 * The sockets in addition have the following semantic differences: 48 * No support for passing up credentials (TL_SET[U]CRED). 49 * 50 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 51 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 52 * T_OPTDATA_IND. 53 * 54 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 55 * a T_CONN_RES is received from the acceptor. This means that a socket 56 * connect will complete before the peer has called accept. 57 * 58 * 59 * MULTITHREADING 60 * ============== 61 * 62 * The driver does not use STREAMS protection mechanisms. Instead it uses a 63 * generic "serializer" abstraction. Most of the operations are executed behind 64 * the serializer and are, essentially single-threaded. All functions executed 65 * behind the same serializer are strictly serialized. So if one thread calls 66 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 67 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 68 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 69 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 70 * same time. 71 * 72 * Connectionless transport use a single serializer per transport type (one for 73 * TLI and one for sockets. Connection-oriented transports use finer-grained 74 * serializers. 75 * 76 * All COTS-type endpoints start their life with private serializers. During 77 * connection request processing the endpoint serializer is switched to the 78 * listener's serializer and the rest of T_CONN_REQ processing is done on the 79 * listener serializer. During T_CONN_RES processing the eager serializer is 80 * switched from listener to acceptor serializer and after that point all 81 * processing for eager and acceptor happens on this serializer. To avoid races 82 * with endpoint closes while its serializer may be changing closes are blocked 83 * while serializers are manipulated. 84 * 85 * References accounting 86 * --------------------- 87 * 88 * Endpoints are reference counted and freed when the last reference is 89 * dropped. Functions within the serializer may access an endpoint state even 90 * after an endpoint closed. The te_closing being set on the endpoint indicates 91 * that the endpoint entered its close routine. 92 * 93 * One reference is held for each opened endpoint instance. The reference 94 * counter is incremented when the endpoint is linked to another endpoint and 95 * decremented when the link disappears. It is also incremented when the 96 * endpoint is found by the hash table lookup. This increment is atomic with the 97 * lookup itself and happens while the hash table read lock is held. 98 * 99 * Close synchronization 100 * --------------------- 101 * 102 * During close the endpoint as marked as closing using te_closing flag. It is 103 * usually enough to check for te_closing flag since all other state changes 104 * happen after this flag is set and the close entered serializer. Immediately 105 * after setting te_closing flag tl_close() enters serializer and waits until 106 * the callback finishes. This allows all functions called within serializer to 107 * simply check te_closing without any locks. 108 * 109 * Serializer management. 110 * --------------------- 111 * 112 * For COTS transports serializers are created when the endpoint is constructed 113 * and destroyed when the endpoint is destructed. CLTS transports use global 114 * serializers - one for sockets and one for TLI. 115 * 116 * COTS serializers have separate reference counts to deal with several 117 * endpoints sharing the same serializer. There is a subtle problem related to 118 * the serializer destruction. The serializer should never be destroyed by any 119 * function executed inside serializer. This means that close has to wait till 120 * all serializer activity for this endpoint is finished before it can drop the 121 * last reference on the endpoint (which may as well free the serializer). This 122 * is only relevant for COTS transports which manage serializers 123 * dynamically. For CLTS transports close may complete without waiting for all 124 * serializer activity to finish since serializer is only destroyed at driver 125 * detach time. 126 * 127 * COTS endpoints keep track of the number of outstanding requests on the 128 * serializer for the endpoint. The code handling accept() avoids changing 129 * client serializer if it has any pending messages on the serializer and 130 * instead moves acceptor to listener's serializer. 131 * 132 * 133 * Use of hash tables 134 * ------------------ 135 * 136 * The driver uses modhash hash table implementation. Each transport uses two 137 * hash tables - one for finding endpoints by acceptor ID and another one for 138 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 139 * pair of hash tables since sockets only use TICOTSORD. 140 * 141 * All hash tables lookups increment a reference count for returned endpoints, 142 * so we may safely check the endpoint state even when the endpoint is removed 143 * from the hash by another thread immediately after it is found. 144 * 145 * 146 * CLOSE processing 147 * ================ 148 * 149 * The driver enters serializer twice on close(). The close sequence is the 150 * following: 151 * 152 * 1) Wait until closing is safe (te_closewait becomes zero) 153 * This step is needed to prevent close during serializer switches. In most 154 * cases (close happening after connection establishment) te_closewait is 155 * zero. 156 * 1) Set te_closing. 157 * 2) Call tl_close_ser() within serializer and wait for it to complete. 158 * 159 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 160 * It also needs to clear write-side q_next pointers - this should be done 161 * before qprocsoff(). 162 * 163 * This synchronous serializer entry during close is needed to ensure that 164 * the queue is valid everywhere inside the serializer. 165 * 166 * Note that in many cases close will execute tl_close_ser() synchronously, 167 * so it will not wait at all. 168 * 169 * 3) Calls qprocsoff(). 170 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 171 * complete (for COTS transports). For CLTS transport there is no wait. 172 * 173 * tl_close_finish_ser() Finishes the close process and wakes up waiting 174 * close if there is any. 175 * 176 * Note that in most cases close will enter te_close_ser_finish() 177 * synchronously and will not wait at all. 178 * 179 * 180 * Flow Control 181 * ============ 182 * 183 * The driver implements both read and write side service routines. No one calls 184 * putq() on the read queue. The read side service routine tl_rsrv() is called 185 * when the read side stream is back-enabled. It enters serializer synchronously 186 * (waits till serializer processing is complete). Within serializer it 187 * back-enables all endpoints blocked by the queue for connection-less 188 * transports and enables write side service processing for the peer for 189 * connection-oriented transports. 190 * 191 * Read and write side service routines use special mblk_sized space in the 192 * endpoint structure to enter perimeter. 193 * 194 * Write-side flow control 195 * ----------------------- 196 * 197 * Write side flow control is a bit tricky. The driver needs to deal with two 198 * message queues - the explicit STREAMS message queue maintained by 199 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 200 * queues should be synchronized to preserve message ordering and should 201 * maintain a single order determined by the order in which messages enter 202 * tl_wput(). In order to maintain the ordering between these two queues the 203 * STREAMS queue is only manipulated within the serializer, so the ordering is 204 * provided by the serializer. 205 * 206 * Functions called from the tl_wsrv() sometimes may call putbq(). To 207 * immediately stop any further processing of the STREAMS message queues the 208 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 209 * side service processing stops when the flag is set. 210 * 211 * The tl_wsrv() function enters serializer synchronously and waits for it to 212 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 213 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 214 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 215 * always bounded by the amount of messages on the STREAMS queue at the time 216 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 217 * queue from another serialized entry which can't happen in parallel. This 218 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 219 * of it draining forever while writer places new messages on the STREAMS 220 * queue). 221 * 222 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 223 * 224 * 225 * Unix Domain Sockets 226 * =================== 227 * 228 * The driver knows the structure of Unix Domain sockets addresses and treats 229 * them differently from generic TLI addresses. For sockets implicit binds are 230 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 231 * instead of using address length of zero. Explicit binds specify 232 * SOU_MAGIC_EXPLICIT as magic. 233 * 234 * For implicit binds we always use minor number as soua_vp part of the address 235 * and avoid any hash table lookups. This saves two hash tables lookups per 236 * anonymous bind. 237 * 238 * For explicit address we hash the vnode pointer instead of hashing the 239 * full-scale address+zone+length. Hashing by pointer is more efficient then 240 * hashing by the full address. 241 * 242 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 243 * tep structure, so it should be never freed. 244 * 245 * Also for sockets the driver always uses minor number as acceptor id. 246 * 247 * TPI VIOLATIONS 248 * -------------- 249 * 250 * This driver violates TPI in several respects for Unix Domain Sockets: 251 * 252 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 253 * is requested and the endpoint is already in use. There is no point in 254 * generating an unused address since this address will be rejected by 255 * sockfs anyway. For implicit binds it always generates a new address 256 * (sets soua_vp to its minor number). 257 * 258 * 2) It always uses minor number as acceptor ID and never uses queue 259 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 260 * message and they do not use the queue pointer. 261 * 262 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 263 * followed by listen(). The listen() should be issued with non-zero 264 * backlog, so sotpi_listen() issues unbind request followed by bind 265 * request to the same address but with a non-zero qlen value. Both 266 * tl_bind() and tl_unbind() require write lock on the hash table to 267 * insert/remove the address. The driver does not remove the address from 268 * the hash for endpoints that are bound to the explicit address and have 269 * backlog of zero. During T_BIND_REQ processing if the address requested 270 * is equal to the address the endpoint already has it updates the backlog 271 * without reinserting the address in the hash table. This optimization 272 * avoids two hash table updates for each listener created. It always 273 * avoids the problem of a "stolen" address when another listener may use 274 * the same address between the unbind and bind and suddenly listen() fails 275 * because address is in use even though the bind() succeeded. 276 * 277 * 278 * CONNECTIONLESS TRANSPORTS 279 * ========================= 280 * 281 * Connectionless transports all share the same serializer (one for TLI and one 282 * for Sockets). Functions executing behind serializer can check or modify state 283 * of any endpoint. 284 * 285 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 286 * te_lastep field. The next time X talks to some address A it checks whether A 287 * is the same as Y's address and if it is there is no need to lookup Y. If the 288 * address is different or the state of Y is not appropriate (e.g. closed or not 289 * idle) X does a lookup using tl_find_peer() and caches the new address. 290 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 291 * on the endpoint found. 292 * 293 * During close of endpoint Y it doesn't try to remove itself from other 294 * endpoints caches. They will detect that Y is gone and will search the peer 295 * endpoint again. 296 * 297 * Flow Control Handling. 298 * ---------------------- 299 * 300 * Each connectionless endpoint keeps a list of endpoints which are 301 * flow-controlled by its queue. It also keeps a pointer to the queue which 302 * flow-controls itself. Whenever flow control releases for endpoint X it 303 * enables all queues from the list. During close it also back-enables everyone 304 * in the list. If X is flow-controlled when it is closing it removes it from 305 * the peers list. 306 * 307 * DATA STRUCTURES 308 * =============== 309 * 310 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 311 * endpoint state. For connection-oriented transports it has a keeps a list 312 * of pending connections (tl_icon_t). For connectionless transports it keeps a 313 * list of endpoints flow controlled by this one. 314 * 315 * Each transport type is represented by a per-transport data structure 316 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 317 * endpoint address hash tables for each transport. It also contains pointer to 318 * transport serializer for connectionless transports. 319 * 320 * Each endpoint keeps a link to its transport structure, so the code can find 321 * all per-transport information quickly. 322 */ 323 324 #include <sys/types.h> 325 #include <sys/inttypes.h> 326 #include <sys/stream.h> 327 #include <sys/stropts.h> 328 #define _SUN_TPI_VERSION 2 329 #include <sys/tihdr.h> 330 #include <sys/strlog.h> 331 #include <sys/debug.h> 332 #include <sys/cred.h> 333 #include <sys/errno.h> 334 #include <sys/kmem.h> 335 #include <sys/id_space.h> 336 #include <sys/modhash.h> 337 #include <sys/mkdev.h> 338 #include <sys/tl.h> 339 #include <sys/stat.h> 340 #include <sys/conf.h> 341 #include <sys/modctl.h> 342 #include <sys/strsun.h> 343 #include <sys/socket.h> 344 #include <sys/socketvar.h> 345 #include <sys/sysmacros.h> 346 #include <sys/xti_xtiopt.h> 347 #include <sys/ddi.h> 348 #include <sys/sunddi.h> 349 #include <sys/zone.h> 350 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 351 #include <inet/optcom.h> 352 #include <sys/strsubr.h> 353 #include <sys/ucred.h> 354 #include <sys/suntpi.h> 355 #include <sys/list.h> 356 #include <sys/serializer.h> 357 358 /* 359 * TBD List 360 * 14 Eliminate state changes through table 361 * 16. AF_UNIX socket options 362 * 17. connect() for ticlts 363 * 18. support for "netstat" to show AF_UNIX plus TLI local 364 * transport connections 365 * 21. sanity check to flushing on sending M_ERROR 366 */ 367 368 /* 369 * CONSTANT DECLARATIONS 370 * -------------------- 371 */ 372 373 /* 374 * Local declarations 375 */ 376 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 377 378 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 379 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 380 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 381 /* 382 * Hash tables size. 383 */ 384 #define TL_HASH_SIZE 311 385 386 /* 387 * Definitions for module_info 388 */ 389 #define TL_ID (104) /* module ID number */ 390 #define TL_NAME "tl" /* module name */ 391 #define TL_MINPSZ (0) /* min packet size */ 392 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 393 #define TL_HIWAT (16*1024) /* hi water mark */ 394 #define TL_LOWAT (256) /* lo water mark */ 395 /* 396 * Definition of minor numbers/modes for new transport provider modes. 397 * We view the socket use as a separate mode to get a separate name space. 398 */ 399 #define TL_TICOTS 0 /* connection oriented transport */ 400 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 401 #define TL_TICLTS 2 /* connectionless transport */ 402 #define TL_UNUSED 3 403 #define TL_SOCKET 4 /* Socket */ 404 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 405 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 406 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 407 408 #define TL_MINOR_MASK 0x7 409 #define TL_MINOR_START (TL_TICLTS + 1) 410 411 /* 412 * LOCAL MACROS 413 */ 414 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 415 416 /* 417 * EXTERNAL VARIABLE DECLARATIONS 418 * ----------------------------- 419 */ 420 /* 421 * state table defined in the OS space.c 422 */ 423 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 424 425 /* 426 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 427 */ 428 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 429 static int tl_close(queue_t *, int, cred_t *); 430 static void tl_wput(queue_t *, mblk_t *); 431 static void tl_wsrv(queue_t *); 432 static void tl_rsrv(queue_t *); 433 434 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 435 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 436 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 437 438 439 /* 440 * GLOBAL DATA STRUCTURES AND VARIABLES 441 * ----------------------------------- 442 */ 443 444 /* 445 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 446 * For now, we only manage the SO_RECVUCRED option but we also have 447 * harmless dummy options to make things work with some common code we access. 448 */ 449 opdes_t tl_opt_arr[] = { 450 /* The SO_TYPE is needed for the hack below */ 451 { 452 SO_TYPE, 453 SOL_SOCKET, 454 OA_R, 455 OA_R, 456 OP_NP, 457 OP_PASSNEXT, 458 sizeof (t_scalar_t), 459 0 460 }, 461 { 462 SO_RECVUCRED, 463 SOL_SOCKET, 464 OA_RW, 465 OA_RW, 466 OP_NP, 467 OP_PASSNEXT, 468 sizeof (int), 469 0 470 } 471 }; 472 473 /* 474 * Table of all supported levels 475 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 476 * any supported options so we need this info separately. 477 * 478 * This is needed only for topmost tpi providers. 479 */ 480 optlevel_t tl_valid_levels_arr[] = { 481 XTI_GENERIC, 482 SOL_SOCKET, 483 TL_PROT_LEVEL 484 }; 485 486 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 487 /* 488 * Current upper bound on the amount of space needed to return all options. 489 * Additional options with data size of sizeof(long) are handled automatically. 490 * Others need hand job. 491 */ 492 #define TL_MAX_OPT_BUF_LEN \ 493 ((A_CNT(tl_opt_arr) << 2) + \ 494 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 495 + 64 + sizeof (struct T_optmgmt_ack)) 496 497 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 498 499 /* 500 * transport addr structure 501 */ 502 typedef struct tl_addr { 503 zoneid_t ta_zoneid; /* Zone scope of address */ 504 t_scalar_t ta_alen; /* length of abuf */ 505 void *ta_abuf; /* the addr itself */ 506 } tl_addr_t; 507 508 /* 509 * Refcounted version of serializer. 510 */ 511 typedef struct tl_serializer { 512 uint_t ts_refcnt; 513 serializer_t *ts_serializer; 514 } tl_serializer_t; 515 516 /* 517 * Each transport type has a separate state. 518 * Per-transport state. 519 */ 520 typedef struct tl_transport_state { 521 char *tr_name; 522 minor_t tr_minor; 523 uint32_t tr_defaddr; 524 mod_hash_t *tr_ai_hash; 525 mod_hash_t *tr_addr_hash; 526 tl_serializer_t *tr_serializer; 527 } tl_transport_state_t; 528 529 #define TL_DFADDR 0x1000 530 531 static tl_transport_state_t tl_transports[] = { 532 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 533 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 534 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 535 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 536 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 537 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 538 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 539 }; 540 541 #define TL_MAXTRANSPORT A_CNT(tl_transports) 542 543 struct tl_endpt; 544 typedef struct tl_endpt tl_endpt_t; 545 546 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 547 548 /* 549 * Data structure used to represent pending connects. 550 * Records enough information so that the connecting peer can close 551 * before the connection gets accepted. 552 */ 553 typedef struct tl_icon { 554 list_node_t ti_node; 555 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 556 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 557 t_scalar_t ti_seqno; /* Sequence number */ 558 } tl_icon_t; 559 560 typedef struct so_ux_addr soux_addr_t; 561 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 562 563 /* 564 * Maximum number of unaccepted connection indications allowed per listener. 565 */ 566 #define TL_MAXQLEN 4096 567 int tl_maxqlen = TL_MAXQLEN; 568 569 /* 570 * transport endpoint structure 571 */ 572 struct tl_endpt { 573 queue_t *te_rq; /* stream read queue */ 574 queue_t *te_wq; /* stream write queue */ 575 uint32_t te_refcnt; 576 int32_t te_state; /* TPI state of endpoint */ 577 minor_t te_minor; /* minor number */ 578 #define te_seqno te_minor 579 uint_t te_flag; /* flag field */ 580 boolean_t te_nowsrv; 581 tl_serializer_t *te_ser; /* Serializer to use */ 582 #define te_serializer te_ser->ts_serializer 583 584 soux_addr_t te_uxaddr; /* Socket address */ 585 #define te_magic te_uxaddr.soua_magic 586 #define te_vp te_uxaddr.soua_vp 587 tl_addr_t te_ap; /* addr bound to this endpt */ 588 #define te_zoneid te_ap.ta_zoneid 589 #define te_alen te_ap.ta_alen 590 #define te_abuf te_ap.ta_abuf 591 592 tl_transport_state_t *te_transport; 593 #define te_addrhash te_transport->tr_addr_hash 594 #define te_aihash te_transport->tr_ai_hash 595 #define te_defaddr te_transport->tr_defaddr 596 cred_t *te_credp; /* endpoint user credentials */ 597 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 598 599 /* 600 * State specific for connection-oriented and connectionless transports. 601 */ 602 union { 603 /* Connection-oriented state. */ 604 struct { 605 t_uscalar_t _te_nicon; /* count of conn requests */ 606 t_uscalar_t _te_qlen; /* max conn requests */ 607 tl_endpt_t *_te_oconp; /* conn request pending */ 608 tl_endpt_t *_te_conp; /* connected endpt */ 609 #ifndef _ILP32 610 void *_te_pad; 611 #endif 612 list_t _te_iconp; /* list of conn ind. pending */ 613 } _te_cots_state; 614 /* Connection-less state. */ 615 struct { 616 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 617 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 618 list_node_t _te_flows; /* lists of connections */ 619 list_t _te_flowlist; /* Who flowcontrols on me */ 620 } _te_clts_state; 621 } _te_transport_state; 622 #define te_nicon _te_transport_state._te_cots_state._te_nicon 623 #define te_qlen _te_transport_state._te_cots_state._te_qlen 624 #define te_oconp _te_transport_state._te_cots_state._te_oconp 625 #define te_conp _te_transport_state._te_cots_state._te_conp 626 #define te_iconp _te_transport_state._te_cots_state._te_iconp 627 #define te_lastep _te_transport_state._te_clts_state._te_lastep 628 #define te_flowq _te_transport_state._te_clts_state._te_flowq 629 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 630 #define te_flows _te_transport_state._te_clts_state._te_flows 631 632 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 633 timeout_id_t te_timoutid; /* outstanding timeout id */ 634 pid_t te_cpid; /* cached pid of endpoint */ 635 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 636 /* 637 * Pieces of the endpoint state needed for closing. 638 */ 639 kmutex_t te_closelock; 640 kcondvar_t te_closecv; 641 uint8_t te_closing; /* The endpoint started closing */ 642 uint8_t te_closewait; /* Wait in close until zero */ 643 mblk_t te_closemp; /* for entering serializer on close */ 644 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 645 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 646 kmutex_t te_srv_lock; 647 kcondvar_t te_srv_cv; 648 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 649 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 650 /* 651 * Pieces of the endpoint state needed for serializer transitions. 652 */ 653 kmutex_t te_ser_lock; /* Protects the count below */ 654 uint_t te_ser_count; /* Number of messages on serializer */ 655 }; 656 657 /* 658 * Flag values. Lower 4 bits specify that transport used. 659 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 660 * they allow to identify the endpoint more easily. 661 */ 662 #define TL_LISTENER 0x00010 /* the listener endpoint */ 663 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 664 #define TL_EAGER 0x00040 /* connecting endpoint */ 665 #define TL_ACCEPTED 0x00080 /* accepted connection */ 666 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 667 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 668 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 669 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 670 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 671 /* 672 * Boolean checks for the endpoint type. 673 */ 674 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 675 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 676 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 677 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 678 679 #define TLPID(mp, tep) (DB_CPID(mp) == -1 ? (tep)->te_cpid : DB_CPID(mp)) 680 681 /* 682 * Certain operations are always used together. These macros reduce the chance 683 * of missing a part of a combination. 684 */ 685 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 686 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 687 688 #define TL_PUTBQ(x, mp) { \ 689 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 690 (x)->te_nowsrv = B_TRUE; \ 691 (void) putbq((x)->te_wq, mp); \ 692 } 693 694 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 695 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 696 697 /* 698 * STREAMS driver glue data structures. 699 */ 700 static struct module_info tl_minfo = { 701 TL_ID, /* mi_idnum */ 702 TL_NAME, /* mi_idname */ 703 TL_MINPSZ, /* mi_minpsz */ 704 TL_MAXPSZ, /* mi_maxpsz */ 705 TL_HIWAT, /* mi_hiwat */ 706 TL_LOWAT /* mi_lowat */ 707 }; 708 709 static struct qinit tl_rinit = { 710 NULL, /* qi_putp */ 711 (int (*)())tl_rsrv, /* qi_srvp */ 712 tl_open, /* qi_qopen */ 713 tl_close, /* qi_qclose */ 714 NULL, /* qi_qadmin */ 715 &tl_minfo, /* qi_minfo */ 716 NULL /* qi_mstat */ 717 }; 718 719 static struct qinit tl_winit = { 720 (int (*)())tl_wput, /* qi_putp */ 721 (int (*)())tl_wsrv, /* qi_srvp */ 722 NULL, /* qi_qopen */ 723 NULL, /* qi_qclose */ 724 NULL, /* qi_qadmin */ 725 &tl_minfo, /* qi_minfo */ 726 NULL /* qi_mstat */ 727 }; 728 729 static struct streamtab tlinfo = { 730 &tl_rinit, /* st_rdinit */ 731 &tl_winit, /* st_wrinit */ 732 NULL, /* st_muxrinit */ 733 NULL /* st_muxwrinit */ 734 }; 735 736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 737 nulldev, tl_info, D_MP, &tlinfo); 738 739 static struct modldrv modldrv = { 740 &mod_driverops, /* Type of module -- pseudo driver here */ 741 "TPI Local Transport (tl) %I%", 742 &tl_devops, /* driver ops */ 743 }; 744 745 /* 746 * Module linkage information for the kernel. 747 */ 748 static struct modlinkage modlinkage = { 749 MODREV_1, 750 &modldrv, 751 NULL 752 }; 753 754 /* 755 * Templates for response to info request 756 * Check sanity of unlimited connect data etc. 757 */ 758 759 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 760 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 761 762 static struct T_info_ack tl_cots_info_ack = 763 { 764 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 765 T_INFINITE, /* TSDU size */ 766 T_INFINITE, /* ETSDU size */ 767 T_INFINITE, /* CDATA_size */ 768 T_INFINITE, /* DDATA_size */ 769 T_INFINITE, /* ADDR_size */ 770 T_INFINITE, /* OPT_size */ 771 0, /* TIDU_size - fill at run time */ 772 T_COTS, /* SERV_type */ 773 -1, /* CURRENT_state */ 774 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 775 }; 776 777 static struct T_info_ack tl_clts_info_ack = 778 { 779 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 780 0, /* TSDU_size - fill at run time */ 781 -2, /* ETSDU_size -2 => not supported */ 782 -2, /* CDATA_size -2 => not supported */ 783 -2, /* DDATA_size -2 => not supported */ 784 -1, /* ADDR_size -1 => unlimited */ 785 -1, /* OPT_size */ 786 0, /* TIDU_size - fill at run time */ 787 T_CLTS, /* SERV_type */ 788 -1, /* CURRENT_state */ 789 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 790 }; 791 792 /* 793 * private copy of devinfo pointer used in tl_info 794 */ 795 static dev_info_t *tl_dip; 796 797 /* 798 * Endpoints cache. 799 */ 800 static kmem_cache_t *tl_cache; 801 /* 802 * Minor number space. 803 */ 804 static id_space_t *tl_minors; 805 806 /* 807 * Default Data Unit size. 808 */ 809 static t_scalar_t tl_tidusz; 810 811 /* 812 * Size of hash tables. 813 */ 814 static size_t tl_hash_size = TL_HASH_SIZE; 815 816 /* 817 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 818 * for sockets. 819 */ 820 static int tl_disable_early_connect = 0; 821 static int tl_client_closing_when_accepting; 822 823 static int tl_serializer_noswitch; 824 825 /* 826 * LOCAL FUNCTION PROTOTYPES 827 * ------------------------- 828 */ 829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 830 static void tl_do_proto(mblk_t *, tl_endpt_t *); 831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 834 t_scalar_t); 835 static void tl_bind(mblk_t *, tl_endpt_t *); 836 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 837 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 838 static void tl_unbind(mblk_t *, tl_endpt_t *); 839 static void tl_optmgmt(queue_t *, mblk_t *); 840 static void tl_conn_req(queue_t *, mblk_t *); 841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 842 static void tl_conn_res(mblk_t *, tl_endpt_t *); 843 static void tl_discon_req(mblk_t *, tl_endpt_t *); 844 static void tl_capability_req(mblk_t *, tl_endpt_t *); 845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 846 static void tl_info_req(mblk_t *, tl_endpt_t *); 847 static void tl_addr_req(mblk_t *, tl_endpt_t *); 848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 849 static void tl_data(mblk_t *, tl_endpt_t *); 850 static void tl_exdata(mblk_t *, tl_endpt_t *); 851 static void tl_ordrel(mblk_t *, tl_endpt_t *); 852 static void tl_unitdata(mblk_t *, tl_endpt_t *); 853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 858 static void tl_cl_backenable(tl_endpt_t *); 859 static void tl_co_unconnect(tl_endpt_t *); 860 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 861 static void tl_discon_ind(tl_endpt_t *, uint32_t); 862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 863 static mblk_t *tl_ordrel_ind_alloc(void); 864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 868 static void tl_icon_freemsgs(mblk_t **); 869 static void tl_merror(queue_t *, mblk_t *, int); 870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 871 static int tl_default_opt(queue_t *, int, int, uchar_t *); 872 static int tl_get_opt(queue_t *, int, int, uchar_t *); 873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 874 uchar_t *, void *, cred_t *, mblk_t *); 875 static void tl_memrecover(queue_t *, mblk_t *, size_t); 876 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 877 static void tl_free(tl_endpt_t *); 878 static int tl_constructor(void *, void *, int); 879 static void tl_destructor(void *, void *); 880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 881 static tl_serializer_t *tl_serializer_alloc(int); 882 static void tl_serializer_refhold(tl_serializer_t *); 883 static void tl_serializer_refrele(tl_serializer_t *); 884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 885 static void tl_serializer_exit(tl_endpt_t *); 886 static boolean_t tl_noclose(tl_endpt_t *); 887 static void tl_closeok(tl_endpt_t *); 888 static void tl_refhold(tl_endpt_t *); 889 static void tl_refrele(tl_endpt_t *); 890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 892 static void tl_close_ser(mblk_t *, tl_endpt_t *); 893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 895 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 896 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 898 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 901 static void tl_addr_unbind(tl_endpt_t *); 902 903 /* 904 * Intialize option database object for TL 905 */ 906 907 optdb_obj_t tl_opt_obj = { 908 tl_default_opt, /* TL default value function pointer */ 909 tl_get_opt, /* TL get function pointer */ 910 tl_set_opt, /* TL set function pointer */ 911 B_TRUE, /* TL is tpi provider */ 912 TL_OPT_ARR_CNT, /* TL option database count of entries */ 913 tl_opt_arr, /* TL option database */ 914 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 915 tl_valid_levels_arr /* TL valid level array */ 916 }; 917 918 /* 919 * Logical operations. 920 * 921 * IMPLY(X, Y) means that X implies Y i.e. when X is true, Y 922 * should also be true. 923 * 924 * EQUIV(X, Y) is logical equivalence. Both X and Y should be true or falce at 925 * the same time. 926 */ 927 #define IMPLY(X, Y) (!(X) || (Y)) 928 #define EQUIV(X, Y) (IMPLY(X, Y) && IMPLY(Y, X)) 929 930 /* 931 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 932 * --------------------------------------- 933 */ 934 935 /* 936 * Loadable module routines 937 */ 938 int 939 _init(void) 940 { 941 return (mod_install(&modlinkage)); 942 } 943 944 int 945 _fini(void) 946 { 947 return (mod_remove(&modlinkage)); 948 } 949 950 int 951 _info(struct modinfo *modinfop) 952 { 953 return (mod_info(&modlinkage, modinfop)); 954 } 955 956 /* 957 * Driver Entry Points and Other routines 958 */ 959 static int 960 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 961 { 962 int i; 963 char name[32]; 964 965 /* 966 * Resume from a checkpoint state. 967 */ 968 if (cmd == DDI_RESUME) 969 return (DDI_SUCCESS); 970 971 if (cmd != DDI_ATTACH) 972 return (DDI_FAILURE); 973 974 /* 975 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 976 * streams message sizes can be unlimited. We use a defined constant 977 * instead. 978 */ 979 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 980 981 /* 982 * Create subdevices for each transport. 983 */ 984 for (i = 0; i < TL_UNUSED; i++) { 985 if (ddi_create_minor_node(devi, 986 tl_transports[i].tr_name, 987 S_IFCHR, tl_transports[i].tr_minor, 988 DDI_PSEUDO, NULL) == DDI_FAILURE) { 989 ddi_remove_minor_node(devi, NULL); 990 return (DDI_FAILURE); 991 } 992 } 993 994 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 995 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 996 997 if (tl_cache == NULL) { 998 ddi_remove_minor_node(devi, NULL); 999 return (DDI_FAILURE); 1000 } 1001 1002 tl_minors = id_space_create("tl_minor_space", 1003 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 1004 1005 /* 1006 * Create ID space for minor numbers 1007 */ 1008 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1009 tl_transport_state_t *t = &tl_transports[i]; 1010 1011 if (i == TL_UNUSED) 1012 continue; 1013 1014 /* Socket COTSORD shares namespace with COTS */ 1015 if (i == TL_SOCK_COTSORD) { 1016 t->tr_ai_hash = 1017 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1018 ASSERT(t->tr_ai_hash != NULL); 1019 t->tr_addr_hash = 1020 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1021 ASSERT(t->tr_addr_hash != NULL); 1022 continue; 1023 } 1024 1025 /* 1026 * Create hash tables. 1027 */ 1028 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1029 t->tr_name); 1030 #ifdef _ILP32 1031 if (i & TL_SOCKET) 1032 t->tr_ai_hash = 1033 mod_hash_create_idhash(name, tl_hash_size - 1, 1034 mod_hash_null_valdtor); 1035 else 1036 t->tr_ai_hash = 1037 mod_hash_create_ptrhash(name, tl_hash_size, 1038 mod_hash_null_valdtor, sizeof (queue_t)); 1039 #else 1040 t->tr_ai_hash = 1041 mod_hash_create_idhash(name, tl_hash_size - 1, 1042 mod_hash_null_valdtor); 1043 #endif /* _ILP32 */ 1044 1045 if (i & TL_SOCKET) { 1046 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1047 t->tr_name); 1048 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1049 tl_hash_size, mod_hash_null_valdtor, 1050 sizeof (uintptr_t)); 1051 } else { 1052 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1053 t->tr_name); 1054 t->tr_addr_hash = mod_hash_create_extended(name, 1055 tl_hash_size, mod_hash_null_keydtor, 1056 mod_hash_null_valdtor, 1057 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1058 } 1059 1060 /* Create serializer for connectionless transports. */ 1061 if (i & TL_TICLTS) 1062 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1063 } 1064 1065 tl_dip = devi; 1066 1067 return (DDI_SUCCESS); 1068 } 1069 1070 static int 1071 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1072 { 1073 int i; 1074 1075 if (cmd == DDI_SUSPEND) 1076 return (DDI_SUCCESS); 1077 1078 if (cmd != DDI_DETACH) 1079 return (DDI_FAILURE); 1080 1081 /* 1082 * Destroy arenas and hash tables. 1083 */ 1084 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1085 tl_transport_state_t *t = &tl_transports[i]; 1086 1087 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1088 continue; 1089 1090 ASSERT(EQUIV(i & TL_TICLTS, t->tr_serializer != NULL)); 1091 if (t->tr_serializer != NULL) { 1092 tl_serializer_refrele(t->tr_serializer); 1093 t->tr_serializer = NULL; 1094 } 1095 1096 #ifdef _ILP32 1097 if (i & TL_SOCKET) 1098 mod_hash_destroy_idhash(t->tr_ai_hash); 1099 else 1100 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1101 #else 1102 mod_hash_destroy_idhash(t->tr_ai_hash); 1103 #endif /* _ILP32 */ 1104 t->tr_ai_hash = NULL; 1105 if (i & TL_SOCKET) 1106 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1107 else 1108 mod_hash_destroy_hash(t->tr_addr_hash); 1109 t->tr_addr_hash = NULL; 1110 } 1111 1112 kmem_cache_destroy(tl_cache); 1113 tl_cache = NULL; 1114 id_space_destroy(tl_minors); 1115 tl_minors = NULL; 1116 ddi_remove_minor_node(devi, NULL); 1117 return (DDI_SUCCESS); 1118 } 1119 1120 /* ARGSUSED */ 1121 static int 1122 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1123 { 1124 1125 int retcode = DDI_FAILURE; 1126 1127 switch (infocmd) { 1128 1129 case DDI_INFO_DEVT2DEVINFO: 1130 if (tl_dip != NULL) { 1131 *result = (void *)tl_dip; 1132 retcode = DDI_SUCCESS; 1133 } 1134 break; 1135 1136 case DDI_INFO_DEVT2INSTANCE: 1137 *result = (void *)0; 1138 retcode = DDI_SUCCESS; 1139 break; 1140 1141 default: 1142 break; 1143 } 1144 return (retcode); 1145 } 1146 1147 /* 1148 * Endpoint reference management. 1149 */ 1150 static void 1151 tl_refhold(tl_endpt_t *tep) 1152 { 1153 atomic_add_32(&tep->te_refcnt, 1); 1154 } 1155 1156 static void 1157 tl_refrele(tl_endpt_t *tep) 1158 { 1159 ASSERT(tep->te_refcnt != 0); 1160 1161 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1162 tl_free(tep); 1163 } 1164 1165 /*ARGSUSED*/ 1166 static int 1167 tl_constructor(void *buf, void *cdrarg, int kmflags) 1168 { 1169 tl_endpt_t *tep = buf; 1170 1171 bzero(tep, sizeof (tl_endpt_t)); 1172 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1173 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1174 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1175 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1176 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1177 1178 return (0); 1179 } 1180 1181 /*ARGSUSED*/ 1182 static void 1183 tl_destructor(void *buf, void *cdrarg) 1184 { 1185 tl_endpt_t *tep = buf; 1186 1187 mutex_destroy(&tep->te_closelock); 1188 cv_destroy(&tep->te_closecv); 1189 mutex_destroy(&tep->te_srv_lock); 1190 cv_destroy(&tep->te_srv_cv); 1191 mutex_destroy(&tep->te_ser_lock); 1192 } 1193 1194 static void 1195 tl_free(tl_endpt_t *tep) 1196 { 1197 ASSERT(tep->te_refcnt == 0); 1198 ASSERT(tep->te_transport != NULL); 1199 ASSERT(tep->te_rq == NULL); 1200 ASSERT(tep->te_wq == NULL); 1201 ASSERT(tep->te_ser != NULL); 1202 ASSERT(tep->te_ser_count == 0); 1203 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1204 1205 if (IS_SOCKET(tep)) { 1206 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1207 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1208 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1209 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1210 } else if (tep->te_abuf != NULL) { 1211 kmem_free(tep->te_abuf, tep->te_alen); 1212 tep->te_alen = -1; /* uninitialized */ 1213 tep->te_abuf = NULL; 1214 } else { 1215 ASSERT(tep->te_alen == -1); 1216 } 1217 1218 id_free(tl_minors, tep->te_minor); 1219 ASSERT(tep->te_credp == NULL); 1220 1221 if (tep->te_hash_hndl != NULL) 1222 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1223 1224 if (IS_COTS(tep)) { 1225 TL_REMOVE_PEER(tep->te_conp); 1226 TL_REMOVE_PEER(tep->te_oconp); 1227 tl_serializer_refrele(tep->te_ser); 1228 tep->te_ser = NULL; 1229 ASSERT(tep->te_nicon == 0); 1230 ASSERT(list_head(&tep->te_iconp) == NULL); 1231 } else { 1232 ASSERT(tep->te_lastep == NULL); 1233 ASSERT(list_head(&tep->te_flowlist) == NULL); 1234 ASSERT(tep->te_flowq == NULL); 1235 } 1236 1237 ASSERT(tep->te_bufcid == 0); 1238 ASSERT(tep->te_timoutid == 0); 1239 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1240 tep->te_acceptor_id = 0; 1241 1242 ASSERT(tep->te_closewait == 0); 1243 ASSERT(!tep->te_rsrv_active); 1244 ASSERT(!tep->te_wsrv_active); 1245 tep->te_closing = 0; 1246 tep->te_nowsrv = B_FALSE; 1247 tep->te_flag = 0; 1248 1249 kmem_cache_free(tl_cache, tep); 1250 } 1251 1252 /* 1253 * Allocate/free reference-counted wrappers for serializers. 1254 */ 1255 static tl_serializer_t * 1256 tl_serializer_alloc(int flags) 1257 { 1258 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1259 serializer_t *ser; 1260 1261 if (s == NULL) 1262 return (NULL); 1263 1264 ser = serializer_create(flags); 1265 1266 if (ser == NULL) { 1267 kmem_free(s, sizeof (tl_serializer_t)); 1268 return (NULL); 1269 } 1270 1271 s->ts_refcnt = 1; 1272 s->ts_serializer = ser; 1273 return (s); 1274 } 1275 1276 static void 1277 tl_serializer_refhold(tl_serializer_t *s) 1278 { 1279 atomic_add_32(&s->ts_refcnt, 1); 1280 } 1281 1282 static void 1283 tl_serializer_refrele(tl_serializer_t *s) 1284 { 1285 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1286 serializer_destroy(s->ts_serializer); 1287 kmem_free(s, sizeof (tl_serializer_t)); 1288 } 1289 } 1290 1291 /* 1292 * Post a request on the endpoint serializer. For COTS transports keep track of 1293 * the number of pending requests. 1294 */ 1295 static void 1296 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1297 { 1298 if (IS_COTS(tep)) { 1299 mutex_enter(&tep->te_ser_lock); 1300 tep->te_ser_count++; 1301 mutex_exit(&tep->te_ser_lock); 1302 } 1303 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1304 } 1305 1306 /* 1307 * Complete processing the request on the serializer. Decrement the counter for 1308 * pending requests for COTS transports. 1309 */ 1310 static void 1311 tl_serializer_exit(tl_endpt_t *tep) 1312 { 1313 if (IS_COTS(tep)) { 1314 mutex_enter(&tep->te_ser_lock); 1315 ASSERT(tep->te_ser_count != 0); 1316 tep->te_ser_count--; 1317 mutex_exit(&tep->te_ser_lock); 1318 } 1319 } 1320 1321 /* 1322 * Hash management functions. 1323 */ 1324 1325 /* 1326 * Return TRUE if two addresses are equal, false otherwise. 1327 */ 1328 static boolean_t 1329 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1330 { 1331 return ((ap1->ta_alen > 0) && 1332 (ap1->ta_alen == ap2->ta_alen) && 1333 (ap1->ta_zoneid == ap2->ta_zoneid) && 1334 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1335 } 1336 1337 /* 1338 * This function is called whenever an endpoint is found in the hash table. 1339 */ 1340 /* ARGSUSED0 */ 1341 static void 1342 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1343 { 1344 tl_refhold((tl_endpt_t *)val); 1345 } 1346 1347 /* 1348 * Address hash function. 1349 */ 1350 /* ARGSUSED */ 1351 static uint_t 1352 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1353 { 1354 tl_addr_t *ap = (tl_addr_t *)key; 1355 size_t len = ap->ta_alen; 1356 uchar_t *p = ap->ta_abuf; 1357 uint_t i, g; 1358 1359 ASSERT((len > 0) && (p != NULL)); 1360 1361 for (i = ap->ta_zoneid; len -- != 0; p++) { 1362 i = (i << 4) + (*p); 1363 if ((g = (i & 0xf0000000U)) != 0) { 1364 i ^= (g >> 24); 1365 i ^= g; 1366 } 1367 } 1368 return (i); 1369 } 1370 1371 /* 1372 * This function is used by hash lookups. It compares two generic addresses. 1373 */ 1374 static int 1375 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1376 { 1377 #ifdef DEBUG 1378 tl_addr_t *ap1 = (tl_addr_t *)key1; 1379 tl_addr_t *ap2 = (tl_addr_t *)key2; 1380 1381 ASSERT(key1 != NULL); 1382 ASSERT(key2 != NULL); 1383 1384 ASSERT(ap1->ta_abuf != NULL); 1385 ASSERT(ap2->ta_abuf != NULL); 1386 ASSERT(ap1->ta_alen > 0); 1387 ASSERT(ap2->ta_alen > 0); 1388 #endif 1389 1390 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1391 } 1392 1393 /* 1394 * Prevent endpoint from closing if possible. 1395 * Return B_TRUE on success, B_FALSE on failure. 1396 */ 1397 static boolean_t 1398 tl_noclose(tl_endpt_t *tep) 1399 { 1400 boolean_t rc = B_FALSE; 1401 1402 mutex_enter(&tep->te_closelock); 1403 if (! tep->te_closing) { 1404 ASSERT(tep->te_closewait == 0); 1405 tep->te_closewait++; 1406 rc = B_TRUE; 1407 } 1408 mutex_exit(&tep->te_closelock); 1409 return (rc); 1410 } 1411 1412 /* 1413 * Allow endpoint to close if needed. 1414 */ 1415 static void 1416 tl_closeok(tl_endpt_t *tep) 1417 { 1418 ASSERT(tep->te_closewait > 0); 1419 mutex_enter(&tep->te_closelock); 1420 ASSERT(tep->te_closewait == 1); 1421 tep->te_closewait--; 1422 cv_signal(&tep->te_closecv); 1423 mutex_exit(&tep->te_closelock); 1424 } 1425 1426 /* 1427 * STREAMS open entry point. 1428 */ 1429 /* ARGSUSED */ 1430 static int 1431 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1432 { 1433 tl_endpt_t *tep; 1434 minor_t minor = getminor(*devp); 1435 1436 /* 1437 * Driver is called directly. Both CLONEOPEN and MODOPEN 1438 * are illegal 1439 */ 1440 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1441 return (ENXIO); 1442 1443 if (rq->q_ptr != NULL) 1444 return (0); 1445 1446 /* Minor number should specify the mode used for the driver. */ 1447 if ((minor >= TL_UNUSED)) 1448 return (ENXIO); 1449 1450 if (oflag & SO_SOCKSTR) { 1451 minor |= TL_SOCKET; 1452 } 1453 1454 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1455 tep->te_refcnt = 1; 1456 tep->te_cpid = curproc->p_pid; 1457 rq->q_ptr = WR(rq)->q_ptr = tep; 1458 tep->te_state = TS_UNBND; 1459 tep->te_credp = credp; 1460 crhold(credp); 1461 tep->te_zoneid = getzoneid(); 1462 1463 tep->te_flag = minor & TL_MINOR_MASK; 1464 tep->te_transport = &tl_transports[minor]; 1465 1466 /* Allocate a unique minor number for this instance. */ 1467 tep->te_minor = (minor_t)id_alloc(tl_minors); 1468 1469 /* Reserve hash handle for bind(). */ 1470 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1471 1472 /* Transport-specific initialization */ 1473 if (IS_COTS(tep)) { 1474 /* Use private serializer */ 1475 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1476 1477 /* Create list for pending connections */ 1478 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1479 offsetof(tl_icon_t, ti_node)); 1480 tep->te_qlen = 0; 1481 tep->te_nicon = 0; 1482 tep->te_oconp = NULL; 1483 tep->te_conp = NULL; 1484 } else { 1485 /* Use shared serializer */ 1486 tep->te_ser = tep->te_transport->tr_serializer; 1487 bzero(&tep->te_flows, sizeof (list_node_t)); 1488 /* Create list for flow control */ 1489 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1490 offsetof(tl_endpt_t, te_flows)); 1491 tep->te_flowq = NULL; 1492 tep->te_lastep = NULL; 1493 1494 } 1495 1496 /* Initialize endpoint address */ 1497 if (IS_SOCKET(tep)) { 1498 /* Socket-specific address handling. */ 1499 tep->te_alen = TL_SOUX_ADDRLEN; 1500 tep->te_abuf = &tep->te_uxaddr; 1501 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1502 tep->te_magic = SOU_MAGIC_IMPLICIT; 1503 } else { 1504 tep->te_alen = -1; 1505 tep->te_abuf = NULL; 1506 } 1507 1508 /* clone the driver */ 1509 *devp = makedevice(getmajor(*devp), tep->te_minor); 1510 1511 tep->te_rq = rq; 1512 tep->te_wq = WR(rq); 1513 1514 #ifdef _ILP32 1515 if (IS_SOCKET(tep)) 1516 tep->te_acceptor_id = tep->te_minor; 1517 else 1518 tep->te_acceptor_id = (t_uscalar_t)rq; 1519 #else 1520 tep->te_acceptor_id = tep->te_minor; 1521 #endif /* _ILP32 */ 1522 1523 1524 qprocson(rq); 1525 1526 /* 1527 * Insert acceptor ID in the hash. The AI hash always sleeps on 1528 * insertion so insertion can't fail. 1529 */ 1530 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1531 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1532 (mod_hash_val_t)tep); 1533 1534 return (0); 1535 } 1536 1537 /* ARGSUSED1 */ 1538 static int 1539 tl_close(queue_t *rq, int flag, cred_t *credp) 1540 { 1541 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1542 tl_endpt_t *elp = NULL; 1543 queue_t *wq = tep->te_wq; 1544 int rc; 1545 1546 ASSERT(wq == WR(rq)); 1547 1548 /* 1549 * Remove the endpoint from acceptor hash. 1550 */ 1551 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1552 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1553 (mod_hash_val_t *)&elp); 1554 ASSERT(rc == 0 && tep == elp); 1555 if ((rc != 0) || (tep != elp)) { 1556 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1557 SL_TRACE|SL_ERROR, 1558 "tl_close:inconsistency in AI hash")); 1559 } 1560 1561 /* 1562 * Wait till close is safe, then mark endpoint as closing. 1563 */ 1564 mutex_enter(&tep->te_closelock); 1565 while (tep->te_closewait) 1566 cv_wait(&tep->te_closecv, &tep->te_closelock); 1567 tep->te_closing = B_TRUE; 1568 /* 1569 * Will wait for the serializer part of the close to finish, so set 1570 * te_closewait now. 1571 */ 1572 tep->te_closewait = 1; 1573 tep->te_nowsrv = B_FALSE; 1574 mutex_exit(&tep->te_closelock); 1575 1576 /* 1577 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1578 * It is safe because close will wait for tl_close_ser to finish. 1579 */ 1580 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1581 1582 /* 1583 * Wait for the first phase of close to complete before qprocsoff(). 1584 */ 1585 mutex_enter(&tep->te_closelock); 1586 while (tep->te_closewait) 1587 cv_wait(&tep->te_closecv, &tep->te_closelock); 1588 mutex_exit(&tep->te_closelock); 1589 1590 qprocsoff(rq); 1591 1592 if (tep->te_bufcid) { 1593 qunbufcall(rq, tep->te_bufcid); 1594 tep->te_bufcid = 0; 1595 } 1596 if (tep->te_timoutid) { 1597 (void) quntimeout(rq, tep->te_timoutid); 1598 tep->te_timoutid = 0; 1599 } 1600 1601 /* 1602 * Finish close behind serializer. 1603 * 1604 * For a CLTS endpoint increase a refcount and continue close processing 1605 * with serializer protection. This processing may happen asynchronously 1606 * with the completion of tl_close(). 1607 * 1608 * Fot a COTS endpoint wait before destroying tep since the serializer 1609 * may go away together with tep and we need to destroy serializer 1610 * outside of serializer context. 1611 */ 1612 ASSERT(tep->te_closewait == 0); 1613 if (IS_COTS(tep)) 1614 tep->te_closewait = 1; 1615 else 1616 tl_refhold(tep); 1617 1618 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1619 1620 /* 1621 * For connection-oriented transports wait for all serializer activity 1622 * to settle down. 1623 */ 1624 if (IS_COTS(tep)) { 1625 mutex_enter(&tep->te_closelock); 1626 while (tep->te_closewait) 1627 cv_wait(&tep->te_closecv, &tep->te_closelock); 1628 mutex_exit(&tep->te_closelock); 1629 } 1630 1631 crfree(tep->te_credp); 1632 tep->te_credp = NULL; 1633 tep->te_wq = NULL; 1634 tl_refrele(tep); 1635 /* 1636 * tep is likely to be destroyed now, so can't reference it any more. 1637 */ 1638 1639 rq->q_ptr = wq->q_ptr = NULL; 1640 return (0); 1641 } 1642 1643 /* 1644 * First phase of close processing done behind the serializer. 1645 * 1646 * Do not drop the reference in the end - tl_close() wants this reference to 1647 * stay. 1648 */ 1649 /* ARGSUSED0 */ 1650 static void 1651 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1652 { 1653 ASSERT(tep->te_closing); 1654 ASSERT(tep->te_closewait == 1); 1655 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1656 1657 tep->te_flag |= TL_CLOSE_SER; 1658 1659 /* 1660 * Drain out all messages on queue except for TL_TICOTS where the 1661 * abortive release semantics permit discarding of data on close 1662 */ 1663 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1664 tl_wsrv_ser(NULL, tep); 1665 } 1666 1667 /* Remove address from hash table. */ 1668 tl_addr_unbind(tep); 1669 /* 1670 * qprocsoff() gets confused when q->q_next is not NULL on the write 1671 * queue of the driver, so clear these before qprocsoff() is called. 1672 * Also clear q_next for the peer since this queue is going away. 1673 */ 1674 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1675 tl_endpt_t *peer_tep = tep->te_conp; 1676 1677 tep->te_wq->q_next = NULL; 1678 if ((peer_tep != NULL) && !peer_tep->te_closing) 1679 peer_tep->te_wq->q_next = NULL; 1680 } 1681 1682 tep->te_rq = NULL; 1683 1684 /* wake up tl_close() */ 1685 tl_closeok(tep); 1686 tl_serializer_exit(tep); 1687 } 1688 1689 /* 1690 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1691 * the reference for CLTS. 1692 * 1693 * Called from serializer. Should drop reference count for CLTS only. 1694 */ 1695 /* ARGSUSED0 */ 1696 static void 1697 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1698 { 1699 ASSERT(tep->te_closing); 1700 ASSERT(IMPLY(IS_CLTS(tep), tep->te_closewait == 0)); 1701 ASSERT(IMPLY(IS_COTS(tep), tep->te_closewait == 1)); 1702 1703 tep->te_state = -1; /* Uninitialized */ 1704 if (IS_COTS(tep)) { 1705 tl_co_unconnect(tep); 1706 } else { 1707 /* Connectionless specific cleanup */ 1708 TL_REMOVE_PEER(tep->te_lastep); 1709 /* 1710 * Backenable anybody that is flow controlled waiting for 1711 * this endpoint. 1712 */ 1713 tl_cl_backenable(tep); 1714 if (tep->te_flowq != NULL) { 1715 list_remove(&(tep->te_flowq->te_flowlist), tep); 1716 tep->te_flowq = NULL; 1717 } 1718 } 1719 1720 tl_serializer_exit(tep); 1721 if (IS_COTS(tep)) 1722 tl_closeok(tep); 1723 else 1724 tl_refrele(tep); 1725 } 1726 1727 /* 1728 * STREAMS write-side put procedure. 1729 * Enter serializer for most of the processing. 1730 * 1731 * The T_CONN_REQ is processed outside of serializer. 1732 */ 1733 static void 1734 tl_wput(queue_t *wq, mblk_t *mp) 1735 { 1736 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1737 ssize_t msz = MBLKL(mp); 1738 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1739 tlproc_t *tl_proc = NULL; 1740 1741 switch (DB_TYPE(mp)) { 1742 case M_DATA: 1743 /* Only valid for connection-oriented transports */ 1744 if (IS_CLTS(tep)) { 1745 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1746 SL_TRACE|SL_ERROR, 1747 "tl_wput:M_DATA invalid for ticlts driver")); 1748 tl_merror(wq, mp, EPROTO); 1749 return; 1750 } 1751 tl_proc = tl_wput_data_ser; 1752 break; 1753 1754 case M_IOCTL: 1755 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1756 case TL_IOC_CREDOPT: 1757 /* FALLTHROUGH */ 1758 case TL_IOC_UCREDOPT: 1759 /* 1760 * Serialize endpoint state change. 1761 */ 1762 tl_proc = tl_do_ioctl_ser; 1763 break; 1764 1765 default: 1766 miocnak(wq, mp, 0, EINVAL); 1767 return; 1768 } 1769 break; 1770 1771 case M_FLUSH: 1772 /* 1773 * do canonical M_FLUSH processing 1774 */ 1775 if (*mp->b_rptr & FLUSHW) { 1776 flushq(wq, FLUSHALL); 1777 *mp->b_rptr &= ~FLUSHW; 1778 } 1779 if (*mp->b_rptr & FLUSHR) { 1780 flushq(RD(wq), FLUSHALL); 1781 qreply(wq, mp); 1782 } else { 1783 freemsg(mp); 1784 } 1785 return; 1786 1787 case M_PROTO: 1788 if (msz < sizeof (prim->type)) { 1789 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1790 SL_TRACE|SL_ERROR, 1791 "tl_wput:M_PROTO data too short")); 1792 tl_merror(wq, mp, EPROTO); 1793 return; 1794 } 1795 switch (prim->type) { 1796 case T_OPTMGMT_REQ: 1797 case T_SVR4_OPTMGMT_REQ: 1798 /* 1799 * Process TPI option management requests immediately 1800 * in put procedure regardless of in-order processing 1801 * of already queued messages. 1802 * (Note: This driver supports AF_UNIX socket 1803 * implementation. Unless we implement this processing, 1804 * setsockopt() on socket endpoint will block on flow 1805 * controlled endpoints which it should not. That is 1806 * required for successful execution of VSU socket tests 1807 * and is consistent with BSD socket behavior). 1808 */ 1809 tl_optmgmt(wq, mp); 1810 return; 1811 case O_T_BIND_REQ: 1812 case T_BIND_REQ: 1813 tl_proc = tl_bind_ser; 1814 break; 1815 case T_CONN_REQ: 1816 if (IS_CLTS(tep)) { 1817 tl_merror(wq, mp, EPROTO); 1818 return; 1819 } 1820 tl_conn_req(wq, mp); 1821 return; 1822 case T_DATA_REQ: 1823 case T_OPTDATA_REQ: 1824 case T_EXDATA_REQ: 1825 case T_ORDREL_REQ: 1826 tl_proc = tl_putq_ser; 1827 break; 1828 case T_UNITDATA_REQ: 1829 if (IS_COTS(tep) || 1830 (msz < sizeof (struct T_unitdata_req))) { 1831 tl_merror(wq, mp, EPROTO); 1832 return; 1833 } 1834 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1835 tl_proc = tl_unitdata_ser; 1836 } else { 1837 tl_proc = tl_putq_ser; 1838 } 1839 break; 1840 default: 1841 /* 1842 * process in service procedure if message already 1843 * queued (maintain in-order processing) 1844 */ 1845 if (wq->q_first != NULL) { 1846 tl_proc = tl_putq_ser; 1847 } else { 1848 tl_proc = tl_wput_ser; 1849 } 1850 break; 1851 } 1852 break; 1853 1854 case M_PCPROTO: 1855 /* 1856 * Check that the message has enough data to figure out TPI 1857 * primitive. 1858 */ 1859 if (msz < sizeof (prim->type)) { 1860 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1861 SL_TRACE|SL_ERROR, 1862 "tl_wput:M_PCROTO data too short")); 1863 tl_merror(wq, mp, EPROTO); 1864 return; 1865 } 1866 switch (prim->type) { 1867 case T_CAPABILITY_REQ: 1868 tl_capability_req(mp, tep); 1869 return; 1870 case T_INFO_REQ: 1871 tl_proc = tl_info_req_ser; 1872 break; 1873 default: 1874 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1875 SL_TRACE|SL_ERROR, 1876 "tl_wput:unknown TPI msg primitive")); 1877 tl_merror(wq, mp, EPROTO); 1878 return; 1879 } 1880 break; 1881 default: 1882 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1883 "tl_wput:default:unexpected Streams message")); 1884 freemsg(mp); 1885 return; 1886 } 1887 1888 /* 1889 * Continue processing via serializer. 1890 */ 1891 ASSERT(tl_proc != NULL); 1892 tl_refhold(tep); 1893 tl_serializer_enter(tep, tl_proc, mp); 1894 } 1895 1896 /* 1897 * Place message on the queue while preserving order. 1898 */ 1899 static void 1900 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1901 { 1902 if (tep->te_closing) { 1903 tl_wput_ser(mp, tep); 1904 } else { 1905 TL_PUTQ(tep, mp); 1906 tl_serializer_exit(tep); 1907 tl_refrele(tep); 1908 } 1909 1910 } 1911 1912 static void 1913 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1914 { 1915 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1916 1917 switch (DB_TYPE(mp)) { 1918 case M_DATA: 1919 tl_data(mp, tep); 1920 break; 1921 case M_PROTO: 1922 tl_do_proto(mp, tep); 1923 break; 1924 default: 1925 freemsg(mp); 1926 break; 1927 } 1928 } 1929 1930 /* 1931 * Write side put procedure called from serializer. 1932 */ 1933 static void 1934 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1935 { 1936 tl_wput_common_ser(mp, tep); 1937 tl_serializer_exit(tep); 1938 tl_refrele(tep); 1939 } 1940 1941 /* 1942 * M_DATA processing. Called from serializer. 1943 */ 1944 static void 1945 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1946 { 1947 tl_endpt_t *peer_tep = tep->te_conp; 1948 queue_t *peer_rq; 1949 1950 ASSERT(DB_TYPE(mp) == M_DATA); 1951 ASSERT(IS_COTS(tep)); 1952 1953 ASSERT(IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer)); 1954 1955 /* 1956 * fastpath for data. Ignore flow control if tep is closing. 1957 */ 1958 if ((peer_tep != NULL) && 1959 !peer_tep->te_closing && 1960 ((tep->te_state == TS_DATA_XFER) || 1961 (tep->te_state == TS_WREQ_ORDREL)) && 1962 (tep->te_wq != NULL) && 1963 (tep->te_wq->q_first == NULL) && 1964 ((peer_tep->te_state == TS_DATA_XFER) || 1965 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1966 ((peer_rq = peer_tep->te_rq) != NULL) && 1967 (canputnext(peer_rq) || tep->te_closing)) { 1968 putnext(peer_rq, mp); 1969 } else if (tep->te_closing) { 1970 /* 1971 * It is possible that by the time we got here tep started to 1972 * close. If the write queue is not empty, and the state is 1973 * TS_DATA_XFER the data should be delivered in order, so we 1974 * call putq() instead of freeing the data. 1975 */ 1976 if ((tep->te_wq != NULL) && 1977 ((tep->te_state == TS_DATA_XFER) || 1978 (tep->te_state == TS_WREQ_ORDREL))) { 1979 TL_PUTQ(tep, mp); 1980 } else { 1981 freemsg(mp); 1982 } 1983 } else { 1984 TL_PUTQ(tep, mp); 1985 } 1986 1987 tl_serializer_exit(tep); 1988 tl_refrele(tep); 1989 } 1990 1991 /* 1992 * Write side service routine. 1993 * 1994 * All actual processing happens within serializer which is entered 1995 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1996 * messages that need processing may have arrived, so tl_wsrv repeats until 1997 * queue is empty or te_nowsrv is set. 1998 */ 1999 static void 2000 tl_wsrv(queue_t *wq) 2001 { 2002 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2003 2004 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 2005 mutex_enter(&tep->te_srv_lock); 2006 ASSERT(tep->te_wsrv_active == B_FALSE); 2007 tep->te_wsrv_active = B_TRUE; 2008 mutex_exit(&tep->te_srv_lock); 2009 2010 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2011 2012 /* 2013 * Wait for serializer job to complete. 2014 */ 2015 mutex_enter(&tep->te_srv_lock); 2016 while (tep->te_wsrv_active) { 2017 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2018 } 2019 cv_signal(&tep->te_srv_cv); 2020 mutex_exit(&tep->te_srv_lock); 2021 } 2022 } 2023 2024 /* 2025 * Serialized write side processing of the STREAMS queue. 2026 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2027 * is NULL. 2028 */ 2029 static void 2030 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2031 { 2032 mblk_t *mp; 2033 queue_t *wq = tep->te_wq; 2034 2035 ASSERT(wq != NULL); 2036 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2037 tl_wput_common_ser(mp, tep); 2038 } 2039 2040 /* 2041 * Wakeup service routine unless called from close. 2042 * If ser_mp is specified, the caller is tl_wsrv(). 2043 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2044 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2045 * be no matching tl_serializer_exit() in this case. 2046 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2047 * waiting on te_srv_cv. 2048 */ 2049 if (ser_mp != NULL) { 2050 /* 2051 * We are called from tl_wsrv. 2052 */ 2053 mutex_enter(&tep->te_srv_lock); 2054 ASSERT(tep->te_wsrv_active); 2055 tep->te_wsrv_active = B_FALSE; 2056 cv_signal(&tep->te_srv_cv); 2057 mutex_exit(&tep->te_srv_lock); 2058 tl_serializer_exit(tep); 2059 } 2060 } 2061 2062 /* 2063 * Called when the stream is backenabled. Enter serializer and qenable everyone 2064 * flow controlled by tep. 2065 * 2066 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2067 * is possible that two instances of tl_rsrv will be running reusing the same 2068 * rsrv mblk. 2069 */ 2070 static void 2071 tl_rsrv(queue_t *rq) 2072 { 2073 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2074 2075 ASSERT(rq->q_first == NULL); 2076 ASSERT(tep->te_rsrv_active == 0); 2077 2078 tep->te_rsrv_active = B_TRUE; 2079 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2080 /* 2081 * Wait for serializer job to complete. 2082 */ 2083 mutex_enter(&tep->te_srv_lock); 2084 while (tep->te_rsrv_active) { 2085 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2086 } 2087 cv_signal(&tep->te_srv_cv); 2088 mutex_exit(&tep->te_srv_lock); 2089 } 2090 2091 /* ARGSUSED */ 2092 static void 2093 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2094 { 2095 tl_endpt_t *peer_tep; 2096 2097 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2098 tl_cl_backenable(tep); 2099 } else if ( 2100 IS_COTS(tep) && 2101 ((peer_tep = tep->te_conp) != NULL) && 2102 !peer_tep->te_closing && 2103 ((tep->te_state == TS_DATA_XFER) || 2104 (tep->te_state == TS_WIND_ORDREL)|| 2105 (tep->te_state == TS_WREQ_ORDREL))) { 2106 TL_QENABLE(peer_tep); 2107 } 2108 2109 /* 2110 * Wakeup read side service routine. 2111 */ 2112 mutex_enter(&tep->te_srv_lock); 2113 ASSERT(tep->te_rsrv_active); 2114 tep->te_rsrv_active = B_FALSE; 2115 cv_signal(&tep->te_srv_cv); 2116 mutex_exit(&tep->te_srv_lock); 2117 tl_serializer_exit(tep); 2118 } 2119 2120 /* 2121 * process M_PROTO messages. Always called from serializer. 2122 */ 2123 static void 2124 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2125 { 2126 ssize_t msz = MBLKL(mp); 2127 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2128 2129 /* Message size was validated by tl_wput(). */ 2130 ASSERT(msz >= sizeof (prim->type)); 2131 2132 switch (prim->type) { 2133 case T_UNBIND_REQ: 2134 tl_unbind(mp, tep); 2135 break; 2136 2137 case T_ADDR_REQ: 2138 tl_addr_req(mp, tep); 2139 break; 2140 2141 case O_T_CONN_RES: 2142 case T_CONN_RES: 2143 if (IS_CLTS(tep)) { 2144 tl_merror(tep->te_wq, mp, EPROTO); 2145 break; 2146 } 2147 tl_conn_res(mp, tep); 2148 break; 2149 2150 case T_DISCON_REQ: 2151 if (IS_CLTS(tep)) { 2152 tl_merror(tep->te_wq, mp, EPROTO); 2153 break; 2154 } 2155 tl_discon_req(mp, tep); 2156 break; 2157 2158 case T_DATA_REQ: 2159 if (IS_CLTS(tep)) { 2160 tl_merror(tep->te_wq, mp, EPROTO); 2161 break; 2162 } 2163 tl_data(mp, tep); 2164 break; 2165 2166 case T_OPTDATA_REQ: 2167 if (IS_CLTS(tep)) { 2168 tl_merror(tep->te_wq, mp, EPROTO); 2169 break; 2170 } 2171 tl_data(mp, tep); 2172 break; 2173 2174 case T_EXDATA_REQ: 2175 if (IS_CLTS(tep)) { 2176 tl_merror(tep->te_wq, mp, EPROTO); 2177 break; 2178 } 2179 tl_exdata(mp, tep); 2180 break; 2181 2182 case T_ORDREL_REQ: 2183 if (! IS_COTSORD(tep)) { 2184 tl_merror(tep->te_wq, mp, EPROTO); 2185 break; 2186 } 2187 tl_ordrel(mp, tep); 2188 break; 2189 2190 case T_UNITDATA_REQ: 2191 if (IS_COTS(tep)) { 2192 tl_merror(tep->te_wq, mp, EPROTO); 2193 break; 2194 } 2195 tl_unitdata(mp, tep); 2196 break; 2197 2198 default: 2199 tl_merror(tep->te_wq, mp, EPROTO); 2200 break; 2201 } 2202 } 2203 2204 /* 2205 * Process ioctl from serializer. 2206 * This is a wrapper around tl_do_ioctl(). 2207 */ 2208 static void 2209 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2210 { 2211 if (! tep->te_closing) 2212 tl_do_ioctl(mp, tep); 2213 else 2214 freemsg(mp); 2215 2216 tl_serializer_exit(tep); 2217 tl_refrele(tep); 2218 } 2219 2220 static void 2221 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2222 { 2223 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2224 int cmd = iocbp->ioc_cmd; 2225 queue_t *wq = tep->te_wq; 2226 int error; 2227 int thisopt, otheropt; 2228 2229 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2230 2231 switch (cmd) { 2232 case TL_IOC_CREDOPT: 2233 if (cmd == TL_IOC_CREDOPT) { 2234 thisopt = TL_SETCRED; 2235 otheropt = TL_SETUCRED; 2236 } else { 2237 /* FALLTHROUGH */ 2238 case TL_IOC_UCREDOPT: 2239 thisopt = TL_SETUCRED; 2240 otheropt = TL_SETCRED; 2241 } 2242 /* 2243 * The credentials passing does not apply to sockets. 2244 * Only one of the cred options can be set at a given time. 2245 */ 2246 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2247 miocnak(wq, mp, 0, EINVAL); 2248 return; 2249 } 2250 2251 /* 2252 * Turn on generation of credential options for 2253 * T_conn_req, T_conn_con, T_unidata_ind. 2254 */ 2255 error = miocpullup(mp, sizeof (uint32_t)); 2256 if (error != 0) { 2257 miocnak(wq, mp, 0, error); 2258 return; 2259 } 2260 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2261 miocnak(wq, mp, 0, EINVAL); 2262 return; 2263 } 2264 2265 if (*(uint32_t *)mp->b_cont->b_rptr) 2266 tep->te_flag |= thisopt; 2267 else 2268 tep->te_flag &= ~thisopt; 2269 2270 miocack(wq, mp, 0, 0); 2271 break; 2272 2273 default: 2274 /* Should not be here */ 2275 miocnak(wq, mp, 0, EINVAL); 2276 break; 2277 } 2278 } 2279 2280 2281 /* 2282 * send T_ERROR_ACK 2283 * Note: assumes enough memory or caller passed big enough mp 2284 * - no recovery from allocb failures 2285 */ 2286 2287 static void 2288 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2289 t_scalar_t unix_err, t_scalar_t type) 2290 { 2291 struct T_error_ack *err_ack; 2292 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2293 M_PCPROTO, T_ERROR_ACK); 2294 2295 if (ackmp == NULL) { 2296 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2297 "tl_error_ack:out of mblk memory")); 2298 tl_merror(wq, NULL, ENOSR); 2299 return; 2300 } 2301 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2302 err_ack->ERROR_prim = type; 2303 err_ack->TLI_error = tli_err; 2304 err_ack->UNIX_error = unix_err; 2305 2306 /* 2307 * send error ack message 2308 */ 2309 qreply(wq, ackmp); 2310 } 2311 2312 2313 2314 /* 2315 * send T_OK_ACK 2316 * Note: assumes enough memory or caller passed big enough mp 2317 * - no recovery from allocb failures 2318 */ 2319 static void 2320 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2321 { 2322 struct T_ok_ack *ok_ack; 2323 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2324 M_PCPROTO, T_OK_ACK); 2325 2326 if (ackmp == NULL) { 2327 tl_merror(wq, NULL, ENOMEM); 2328 return; 2329 } 2330 2331 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2332 ok_ack->CORRECT_prim = type; 2333 2334 (void) qreply(wq, ackmp); 2335 } 2336 2337 /* 2338 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2339 * This is a wrapper around tl_bind(). 2340 */ 2341 static void 2342 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2343 { 2344 if (! tep->te_closing) 2345 tl_bind(mp, tep); 2346 else 2347 freemsg(mp); 2348 2349 tl_serializer_exit(tep); 2350 tl_refrele(tep); 2351 } 2352 2353 /* 2354 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2355 * Assumes that the endpoint is in the unbound. 2356 */ 2357 static void 2358 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2359 { 2360 queue_t *wq = tep->te_wq; 2361 struct T_bind_ack *b_ack; 2362 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2363 mblk_t *ackmp, *bamp; 2364 soux_addr_t ux_addr; 2365 t_uscalar_t qlen = 0; 2366 t_scalar_t alen, aoff; 2367 tl_addr_t addr_req; 2368 void *addr_startp; 2369 ssize_t msz = MBLKL(mp), basize; 2370 t_scalar_t tli_err = 0, unix_err = 0; 2371 t_scalar_t save_prim_type = bind->PRIM_type; 2372 t_scalar_t save_state = tep->te_state; 2373 2374 if (tep->te_state != TS_UNBND) { 2375 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2376 SL_TRACE|SL_ERROR, 2377 "tl_wput:bind_request:out of state, state=%d", 2378 tep->te_state)); 2379 tli_err = TOUTSTATE; 2380 goto error; 2381 } 2382 2383 if (msz < sizeof (struct T_bind_req)) { 2384 tli_err = TSYSERR; unix_err = EINVAL; 2385 goto error; 2386 } 2387 2388 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2389 2390 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2391 (bind->PRIM_type == T_BIND_REQ)); 2392 2393 alen = bind->ADDR_length; 2394 aoff = bind->ADDR_offset; 2395 2396 /* negotiate max conn req pending */ 2397 if (IS_COTS(tep)) { 2398 qlen = bind->CONIND_number; 2399 if (qlen > tl_maxqlen) 2400 qlen = tl_maxqlen; 2401 } 2402 2403 /* 2404 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2405 * and bound again. 2406 */ 2407 if ((tep->te_hash_hndl == NULL) && 2408 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2409 mod_hash_reserve_nosleep(tep->te_addrhash, 2410 &tep->te_hash_hndl) != 0) { 2411 tli_err = TSYSERR; unix_err = ENOSR; 2412 goto error; 2413 } 2414 2415 /* 2416 * Verify address correctness. 2417 */ 2418 if (IS_SOCKET(tep)) { 2419 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2420 2421 if ((alen != TL_SOUX_ADDRLEN) || 2422 (aoff < 0) || 2423 (aoff + alen > msz)) { 2424 (void) (STRLOG(TL_ID, tep->te_minor, 2425 1, SL_TRACE|SL_ERROR, 2426 "tl_bind: invalid socket addr")); 2427 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2428 tli_err = TSYSERR; unix_err = EINVAL; 2429 goto error; 2430 } 2431 /* Copy address from message to local buffer. */ 2432 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2433 /* 2434 * Check that we got correct address from sockets 2435 */ 2436 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2437 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2438 (void) (STRLOG(TL_ID, tep->te_minor, 2439 1, SL_TRACE|SL_ERROR, 2440 "tl_bind: invalid socket magic")); 2441 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2442 tli_err = TSYSERR; unix_err = EINVAL; 2443 goto error; 2444 } 2445 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2446 (ux_addr.soua_vp != NULL)) { 2447 (void) (STRLOG(TL_ID, tep->te_minor, 2448 1, SL_TRACE|SL_ERROR, 2449 "tl_bind: implicit addr non-empty")); 2450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2451 tli_err = TSYSERR; unix_err = EINVAL; 2452 goto error; 2453 } 2454 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2455 (ux_addr.soua_vp == NULL)) { 2456 (void) (STRLOG(TL_ID, tep->te_minor, 2457 1, SL_TRACE|SL_ERROR, 2458 "tl_bind: explicit addr empty")); 2459 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2460 tli_err = TSYSERR; unix_err = EINVAL; 2461 goto error; 2462 } 2463 } else { 2464 if ((alen > 0) && ((aoff < 0) || 2465 ((ssize_t)(aoff + alen) > msz) || 2466 ((aoff + alen) < 0))) { 2467 (void) (STRLOG(TL_ID, tep->te_minor, 2468 1, SL_TRACE|SL_ERROR, 2469 "tl_bind: invalid message")); 2470 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2471 tli_err = TSYSERR; unix_err = EINVAL; 2472 goto error; 2473 } 2474 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2475 (void) (STRLOG(TL_ID, tep->te_minor, 2476 1, SL_TRACE|SL_ERROR, 2477 "tl_bind: bad addr in message")); 2478 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2479 tli_err = TBADADDR; 2480 goto error; 2481 } 2482 #ifdef DEBUG 2483 /* 2484 * Mild form of ASSERT()ion to detect broken TPI apps. 2485 * if (! assertion) 2486 * log warning; 2487 */ 2488 if (! ((alen == 0 && aoff == 0) || 2489 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2490 (void) (STRLOG(TL_ID, tep->te_minor, 2491 3, SL_TRACE|SL_ERROR, 2492 "tl_bind: addr overlaps TPI message")); 2493 } 2494 #endif 2495 } 2496 2497 /* 2498 * Bind the address provided or allocate one if requested. 2499 * Allow rebinds with a new qlen value. 2500 */ 2501 if (IS_SOCKET(tep)) { 2502 /* 2503 * For anonymous requests the te_ap is already set up properly 2504 * so use minor number as an address. 2505 * For explicit requests need to check whether the address is 2506 * already in use. 2507 */ 2508 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2509 int rc; 2510 2511 if (tep->te_flag & TL_ADDRHASHED) { 2512 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2513 if (tep->te_vp == ux_addr.soua_vp) 2514 goto skip_addr_bind; 2515 else /* Rebind to a new address. */ 2516 tl_addr_unbind(tep); 2517 } 2518 /* 2519 * Insert address in the hash if it is not already 2520 * there. Since we use preallocated handle, the insert 2521 * can fail only if the key is already present. 2522 */ 2523 rc = mod_hash_insert_reserve(tep->te_addrhash, 2524 (mod_hash_key_t)ux_addr.soua_vp, 2525 (mod_hash_val_t)tep, tep->te_hash_hndl); 2526 2527 if (rc != 0) { 2528 ASSERT(rc == MH_ERR_DUPLICATE); 2529 /* 2530 * Violate O_T_BIND_REQ semantics and fail with 2531 * TADDRBUSY - sockets will not use any address 2532 * other than supplied one for explicit binds. 2533 */ 2534 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2535 SL_TRACE|SL_ERROR, 2536 "tl_bind:requested addr %p is busy", 2537 ux_addr.soua_vp)); 2538 tli_err = TADDRBUSY; unix_err = 0; 2539 goto error; 2540 } 2541 tep->te_uxaddr = ux_addr; 2542 tep->te_flag |= TL_ADDRHASHED; 2543 tep->te_hash_hndl = NULL; 2544 } 2545 } else if (alen == 0) { 2546 /* 2547 * assign any free address 2548 */ 2549 if (! tl_get_any_addr(tep, NULL)) { 2550 (void) (STRLOG(TL_ID, tep->te_minor, 2551 1, SL_TRACE|SL_ERROR, 2552 "tl_bind:failed to get buffer for any " 2553 "address")); 2554 tli_err = TSYSERR; unix_err = ENOSR; 2555 goto error; 2556 } 2557 } else { 2558 addr_req.ta_alen = alen; 2559 addr_req.ta_abuf = (mp->b_rptr + aoff); 2560 addr_req.ta_zoneid = tep->te_zoneid; 2561 2562 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2563 if (tep->te_abuf == NULL) { 2564 tli_err = TSYSERR; unix_err = ENOSR; 2565 goto error; 2566 } 2567 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2568 tep->te_alen = alen; 2569 2570 if (mod_hash_insert_reserve(tep->te_addrhash, 2571 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2572 tep->te_hash_hndl) != 0) { 2573 if (save_prim_type == T_BIND_REQ) { 2574 /* 2575 * The bind semantics for this primitive 2576 * require a failure if the exact address 2577 * requested is busy 2578 */ 2579 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2580 SL_TRACE|SL_ERROR, 2581 "tl_bind:requested addr is busy")); 2582 tli_err = TADDRBUSY; unix_err = 0; 2583 goto error; 2584 } 2585 2586 /* 2587 * O_T_BIND_REQ semantics say if address if requested 2588 * address is busy, bind to any available free address 2589 */ 2590 if (! tl_get_any_addr(tep, &addr_req)) { 2591 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2592 SL_TRACE|SL_ERROR, 2593 "tl_bind:unable to get any addr buf")); 2594 tli_err = TSYSERR; unix_err = ENOMEM; 2595 goto error; 2596 } 2597 } else { 2598 tep->te_flag |= TL_ADDRHASHED; 2599 tep->te_hash_hndl = NULL; 2600 } 2601 } 2602 2603 ASSERT(tep->te_alen >= 0); 2604 2605 skip_addr_bind: 2606 /* 2607 * prepare T_BIND_ACK TPI message 2608 */ 2609 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2610 bamp = reallocb(mp, basize, 0); 2611 if (bamp == NULL) { 2612 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2613 "tl_wput:tl_bind: allocb failed")); 2614 /* 2615 * roll back state changes 2616 */ 2617 tl_addr_unbind(tep); 2618 tep->te_state = TS_UNBND; 2619 tl_memrecover(wq, mp, basize); 2620 return; 2621 } 2622 2623 DB_TYPE(bamp) = M_PCPROTO; 2624 bamp->b_wptr = bamp->b_rptr + basize; 2625 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2626 b_ack->PRIM_type = T_BIND_ACK; 2627 b_ack->CONIND_number = qlen; 2628 b_ack->ADDR_length = tep->te_alen; 2629 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2630 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2631 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2632 2633 if (IS_COTS(tep)) { 2634 tep->te_qlen = qlen; 2635 if (qlen > 0) 2636 tep->te_flag |= TL_LISTENER; 2637 } 2638 2639 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2640 /* 2641 * send T_BIND_ACK message 2642 */ 2643 (void) qreply(wq, bamp); 2644 return; 2645 2646 error: 2647 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2648 if (ackmp == NULL) { 2649 /* 2650 * roll back state changes 2651 */ 2652 tep->te_state = save_state; 2653 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2654 return; 2655 } 2656 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2657 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2658 } 2659 2660 /* 2661 * Process T_UNBIND_REQ. 2662 * Called from serializer. 2663 */ 2664 static void 2665 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2666 { 2667 queue_t *wq; 2668 mblk_t *ackmp; 2669 2670 if (tep->te_closing) { 2671 freemsg(mp); 2672 return; 2673 } 2674 2675 wq = tep->te_wq; 2676 2677 /* 2678 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2679 * ==> allocate for T_ERROR_ACK (known max) 2680 */ 2681 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2682 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2683 return; 2684 } 2685 /* 2686 * memory resources committed 2687 * Note: no message validation. T_UNBIND_REQ message is 2688 * same size as PRIM_type field so already verified earlier. 2689 */ 2690 2691 /* 2692 * validate state 2693 */ 2694 if (tep->te_state != TS_IDLE) { 2695 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2696 SL_TRACE|SL_ERROR, 2697 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2698 tep->te_state)); 2699 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2700 return; 2701 } 2702 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2703 2704 /* 2705 * TPI says on T_UNBIND_REQ: 2706 * send up a M_FLUSH to flush both 2707 * read and write queues 2708 */ 2709 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2710 2711 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2712 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2713 2714 /* 2715 * Sockets use bind with qlen==0 followed by bind() to 2716 * the same address with qlen > 0 for listeners. 2717 * We allow rebind with a new qlen value. 2718 */ 2719 tl_addr_unbind(tep); 2720 } 2721 2722 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2723 /* 2724 * send T_OK_ACK 2725 */ 2726 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2727 } 2728 2729 2730 /* 2731 * Option management code from drv/ip is used here 2732 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2733 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2734 * However, that is what we want as that option is 'unorthodox' 2735 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2736 * and not in T_SVR4_OPTMGMT_REQ/ACK 2737 * Note2: use of optcom_req means this routine is an exception to 2738 * recovery from allocb() failures. 2739 */ 2740 2741 static void 2742 tl_optmgmt(queue_t *wq, mblk_t *mp) 2743 { 2744 tl_endpt_t *tep; 2745 mblk_t *ackmp; 2746 union T_primitives *prim; 2747 2748 tep = (tl_endpt_t *)wq->q_ptr; 2749 prim = (union T_primitives *)mp->b_rptr; 2750 2751 /* all states OK for AF_UNIX options ? */ 2752 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2753 prim->type == T_SVR4_OPTMGMT_REQ) { 2754 /* 2755 * Broken TLI semantics that options can only be managed 2756 * in TS_IDLE state. Needed for Sparc ABI test suite that 2757 * tests this TLI (mis)feature using this device driver. 2758 */ 2759 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2760 SL_TRACE|SL_ERROR, 2761 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2762 tep->te_state)); 2763 /* 2764 * preallocate memory for T_ERROR_ACK 2765 */ 2766 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2767 if (! ackmp) { 2768 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2769 return; 2770 } 2771 2772 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2773 freemsg(mp); 2774 return; 2775 } 2776 2777 /* 2778 * call common option management routine from drv/ip 2779 */ 2780 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2781 (void) svr4_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj, 2782 B_FALSE); 2783 } else { 2784 ASSERT(prim->type == T_OPTMGMT_REQ); 2785 (void) tpi_optcom_req(wq, mp, tep->te_credp, &tl_opt_obj, 2786 B_FALSE); 2787 } 2788 } 2789 2790 /* 2791 * Handle T_conn_req - the driver part of accept(). 2792 * If TL_SET[U]CRED generate the credentials options. 2793 * If this is a socket pass through options unmodified. 2794 * For sockets generate the T_CONN_CON here instead of 2795 * waiting for the T_CONN_RES. 2796 */ 2797 static void 2798 tl_conn_req(queue_t *wq, mblk_t *mp) 2799 { 2800 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2801 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2802 ssize_t msz = MBLKL(mp); 2803 t_scalar_t alen, aoff, olen, ooff, err = 0; 2804 tl_endpt_t *peer_tep = NULL; 2805 mblk_t *ackmp; 2806 mblk_t *dimp; 2807 struct T_discon_ind *di; 2808 soux_addr_t ux_addr; 2809 tl_addr_t dst; 2810 2811 ASSERT(IS_COTS(tep)); 2812 2813 if (tep->te_closing) { 2814 freemsg(mp); 2815 return; 2816 } 2817 2818 /* 2819 * preallocate memory for: 2820 * 1. max of T_ERROR_ACK and T_OK_ACK 2821 * ==> known max T_ERROR_ACK 2822 * 2. max of T_DISCON_IND and T_CONN_IND 2823 */ 2824 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2825 if (! ackmp) { 2826 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2827 return; 2828 } 2829 /* 2830 * memory committed for T_OK_ACK/T_ERROR_ACK now 2831 * will be committed for T_DISCON_IND/T_CONN_IND later 2832 */ 2833 2834 if (tep->te_state != TS_IDLE) { 2835 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2836 SL_TRACE|SL_ERROR, 2837 "tl_wput:T_CONN_REQ:out of state, state=%d", 2838 tep->te_state)); 2839 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2840 freemsg(mp); 2841 return; 2842 } 2843 2844 /* 2845 * validate the message 2846 * Note: dereference fields in struct inside message only 2847 * after validating the message length. 2848 */ 2849 if (msz < sizeof (struct T_conn_req)) { 2850 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2851 "tl_conn_req:invalid message length")); 2852 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2853 freemsg(mp); 2854 return; 2855 } 2856 alen = creq->DEST_length; 2857 aoff = creq->DEST_offset; 2858 olen = creq->OPT_length; 2859 ooff = creq->OPT_offset; 2860 if (olen == 0) 2861 ooff = 0; 2862 2863 if (IS_SOCKET(tep)) { 2864 if ((alen != TL_SOUX_ADDRLEN) || 2865 (aoff < 0) || 2866 (aoff + alen > msz) || 2867 (alen > msz - sizeof (struct T_conn_req))) { 2868 (void) (STRLOG(TL_ID, tep->te_minor, 2869 1, SL_TRACE|SL_ERROR, 2870 "tl_conn_req: invalid socket addr")); 2871 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2872 freemsg(mp); 2873 return; 2874 } 2875 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2876 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2877 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2878 (void) (STRLOG(TL_ID, tep->te_minor, 2879 1, SL_TRACE|SL_ERROR, 2880 "tl_conn_req: invalid socket magic")); 2881 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2882 freemsg(mp); 2883 return; 2884 } 2885 } else { 2886 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2887 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2888 ooff + olen < 0)) || 2889 olen < 0 || ooff < 0) { 2890 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2891 SL_TRACE|SL_ERROR, 2892 "tl_conn_req:invalid message")); 2893 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2894 freemsg(mp); 2895 return; 2896 } 2897 2898 if (alen <= 0 || aoff < 0 || 2899 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2900 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2901 SL_TRACE|SL_ERROR, 2902 "tl_conn_req:bad addr in message, " 2903 "alen=%d, msz=%ld", 2904 alen, msz)); 2905 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2906 freemsg(mp); 2907 return; 2908 } 2909 #ifdef DEBUG 2910 /* 2911 * Mild form of ASSERT()ion to detect broken TPI apps. 2912 * if (! assertion) 2913 * log warning; 2914 */ 2915 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2916 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2917 SL_TRACE|SL_ERROR, 2918 "tl_conn_req: addr overlaps TPI message")); 2919 } 2920 #endif 2921 if (olen) { 2922 /* 2923 * no opts in connect req 2924 * supported in this provider except for sockets. 2925 */ 2926 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2927 SL_TRACE|SL_ERROR, 2928 "tl_conn_req:options not supported " 2929 "in message")); 2930 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2931 freemsg(mp); 2932 return; 2933 } 2934 } 2935 2936 /* 2937 * Prevent tep from closing on us. 2938 */ 2939 if (! tl_noclose(tep)) { 2940 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2941 "tl_conn_req:endpoint is closing")); 2942 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2943 freemsg(mp); 2944 return; 2945 } 2946 2947 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2948 /* 2949 * get endpoint to connect to 2950 * check that peer with DEST addr is bound to addr 2951 * and has CONIND_number > 0 2952 */ 2953 dst.ta_alen = alen; 2954 dst.ta_abuf = mp->b_rptr + aoff; 2955 dst.ta_zoneid = tep->te_zoneid; 2956 2957 /* 2958 * Verify if remote addr is in use 2959 */ 2960 peer_tep = (IS_SOCKET(tep) ? 2961 tl_sock_find_peer(tep, &ux_addr) : 2962 tl_find_peer(tep, &dst)); 2963 2964 if (peer_tep == NULL) { 2965 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2966 "tl_conn_req:no one at connect address")); 2967 err = ECONNREFUSED; 2968 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2969 /* 2970 * validate that number of incoming connection is 2971 * not to capacity on destination endpoint 2972 */ 2973 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2974 "tl_conn_req: qlen overflow connection refused")); 2975 err = ECONNREFUSED; 2976 } 2977 2978 /* 2979 * Send T_DISCON_IND in case of error 2980 */ 2981 if (err != 0) { 2982 if (peer_tep != NULL) 2983 tl_refrele(peer_tep); 2984 /* We are still expected to send T_OK_ACK */ 2985 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2986 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2987 tl_closeok(tep); 2988 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2989 M_PROTO, T_DISCON_IND); 2990 if (dimp == NULL) { 2991 tl_merror(wq, NULL, ENOSR); 2992 return; 2993 } 2994 di = (struct T_discon_ind *)dimp->b_rptr; 2995 di->DISCON_reason = err; 2996 di->SEQ_number = BADSEQNUM; 2997 2998 tep->te_state = TS_IDLE; 2999 /* 3000 * send T_DISCON_IND message 3001 */ 3002 putnext(tep->te_rq, dimp); 3003 return; 3004 } 3005 3006 ASSERT(IS_COTS(peer_tep)); 3007 3008 /* 3009 * Found the listener. At this point processing will continue on 3010 * listener serializer. Close of the endpoint should be blocked while we 3011 * switch serializers. 3012 */ 3013 tl_serializer_refhold(peer_tep->te_ser); 3014 tl_serializer_refrele(tep->te_ser); 3015 tep->te_ser = peer_tep->te_ser; 3016 ASSERT(tep->te_oconp == NULL); 3017 tep->te_oconp = peer_tep; 3018 3019 /* 3020 * It is safe to close now. Close may continue on listener serializer. 3021 */ 3022 tl_closeok(tep); 3023 3024 /* 3025 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3026 * data, so we link mp to ackmp. 3027 */ 3028 ackmp->b_cont = mp; 3029 mp = ackmp; 3030 3031 tl_refhold(tep); 3032 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3033 } 3034 3035 /* 3036 * Finish T_CONN_REQ processing on listener serializer. 3037 */ 3038 static void 3039 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3040 { 3041 queue_t *wq; 3042 tl_endpt_t *peer_tep = tep->te_oconp; 3043 mblk_t *confmp, *cimp, *indmp; 3044 void *opts = NULL; 3045 mblk_t *ackmp = mp; 3046 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3047 struct T_conn_ind *ci; 3048 tl_icon_t *tip; 3049 void *addr_startp; 3050 t_scalar_t olen = creq->OPT_length; 3051 t_scalar_t ooff = creq->OPT_offset; 3052 size_t ci_msz; 3053 size_t size; 3054 3055 if (tep->te_closing) { 3056 TL_UNCONNECT(tep->te_oconp); 3057 tl_serializer_exit(tep); 3058 tl_refrele(tep); 3059 freemsg(mp); 3060 return; 3061 } 3062 3063 wq = tep->te_wq; 3064 tep->te_flag |= TL_EAGER; 3065 3066 /* 3067 * Extract preallocated ackmp from mp. 3068 */ 3069 mp = mp->b_cont; 3070 ackmp->b_cont = NULL; 3071 3072 if (olen == 0) 3073 ooff = 0; 3074 3075 if (peer_tep->te_closing || 3076 !((peer_tep->te_state == TS_IDLE) || 3077 (peer_tep->te_state == TS_WRES_CIND))) { 3078 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3079 "tl_conn_req:peer in bad state (%d)", 3080 peer_tep->te_state)); 3081 TL_UNCONNECT(tep->te_oconp); 3082 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3083 freemsg(ackmp); 3084 tl_serializer_exit(tep); 3085 tl_refrele(tep); 3086 return; 3087 } 3088 3089 /* 3090 * preallocate now for T_DISCON_IND or T_CONN_IND 3091 */ 3092 /* 3093 * calculate length of T_CONN_IND message 3094 */ 3095 if (peer_tep->te_flag & TL_SETCRED) { 3096 ooff = 0; 3097 olen = (t_scalar_t) sizeof (struct opthdr) + 3098 OPTLEN(sizeof (tl_credopt_t)); 3099 /* 1 option only */ 3100 } else if (peer_tep->te_flag & TL_SETUCRED) { 3101 ooff = 0; 3102 olen = (t_scalar_t)sizeof (struct opthdr) + 3103 OPTLEN(ucredsize); 3104 /* 1 option only */ 3105 } 3106 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3107 ci_msz = T_ALIGN(ci_msz) + olen; 3108 size = max(ci_msz, sizeof (struct T_discon_ind)); 3109 3110 /* 3111 * Save options from mp - we'll need them for T_CONN_IND. 3112 */ 3113 if (ooff != 0) { 3114 opts = kmem_alloc(olen, KM_NOSLEEP); 3115 if (opts == NULL) { 3116 /* 3117 * roll back state changes 3118 */ 3119 tep->te_state = TS_IDLE; 3120 tl_memrecover(wq, mp, size); 3121 freemsg(ackmp); 3122 TL_UNCONNECT(tep->te_oconp); 3123 tl_serializer_exit(tep); 3124 tl_refrele(tep); 3125 return; 3126 } 3127 /* Copy options to a temp buffer */ 3128 bcopy(mp->b_rptr + ooff, opts, olen); 3129 } 3130 3131 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3132 /* 3133 * Generate a T_CONN_CON that has the identical address 3134 * (and options) as the T_CONN_REQ. 3135 * NOTE: assumes that the T_conn_req and T_conn_con structures 3136 * are isomorphic. 3137 */ 3138 confmp = copyb(mp); 3139 if (! confmp) { 3140 /* 3141 * roll back state changes 3142 */ 3143 tep->te_state = TS_IDLE; 3144 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3145 freemsg(ackmp); 3146 if (opts != NULL) 3147 kmem_free(opts, olen); 3148 TL_UNCONNECT(tep->te_oconp); 3149 tl_serializer_exit(tep); 3150 tl_refrele(tep); 3151 return; 3152 } 3153 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3154 T_CONN_CON; 3155 } else { 3156 confmp = NULL; 3157 } 3158 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3159 /* 3160 * roll back state changes 3161 */ 3162 tep->te_state = TS_IDLE; 3163 tl_memrecover(wq, mp, size); 3164 freemsg(ackmp); 3165 if (opts != NULL) 3166 kmem_free(opts, olen); 3167 freemsg(confmp); 3168 TL_UNCONNECT(tep->te_oconp); 3169 tl_serializer_exit(tep); 3170 tl_refrele(tep); 3171 return; 3172 } 3173 3174 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3175 if (tip == NULL) { 3176 /* 3177 * roll back state changes 3178 */ 3179 tep->te_state = TS_IDLE; 3180 tl_memrecover(wq, indmp, sizeof (*tip)); 3181 freemsg(ackmp); 3182 if (opts != NULL) 3183 kmem_free(opts, olen); 3184 freemsg(confmp); 3185 TL_UNCONNECT(tep->te_oconp); 3186 tl_serializer_exit(tep); 3187 tl_refrele(tep); 3188 return; 3189 } 3190 tip->ti_mp = NULL; 3191 3192 /* 3193 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3194 * and tl_icon_t cell. 3195 */ 3196 3197 /* 3198 * ack validity of request and send the peer credential in the ACK. 3199 */ 3200 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3201 3202 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3203 confmp != NULL) { 3204 mblk_setcred(confmp, peer_tep->te_credp); 3205 DB_CPID(confmp) = peer_tep->te_cpid; 3206 } 3207 3208 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3209 3210 /* 3211 * prepare message to send T_CONN_IND 3212 */ 3213 /* 3214 * allocate the message - original data blocks retained 3215 * in the returned mblk 3216 */ 3217 cimp = tl_resizemp(indmp, size); 3218 if (! cimp) { 3219 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3220 "tl_conn_req:con_ind:allocb failure")); 3221 tl_merror(wq, indmp, ENOMEM); 3222 TL_UNCONNECT(tep->te_oconp); 3223 tl_serializer_exit(tep); 3224 tl_refrele(tep); 3225 if (opts != NULL) 3226 kmem_free(opts, olen); 3227 freemsg(confmp); 3228 ASSERT(tip->ti_mp == NULL); 3229 kmem_free(tip, sizeof (*tip)); 3230 return; 3231 } 3232 3233 DB_TYPE(cimp) = M_PROTO; 3234 ci = (struct T_conn_ind *)cimp->b_rptr; 3235 ci->PRIM_type = T_CONN_IND; 3236 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3237 ci->SRC_length = tep->te_alen; 3238 ci->SEQ_number = tep->te_seqno; 3239 3240 addr_startp = cimp->b_rptr + ci->SRC_offset; 3241 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3242 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3243 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3244 ci->SRC_length); 3245 ci->OPT_length = olen; /* because only 1 option */ 3246 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3247 DB_CREDDEF(cimp, tep->te_credp), 3248 TLPID(cimp, tep), 3249 peer_tep->te_flag, peer_tep->te_credp); 3250 } else if (ooff != 0) { 3251 /* Copy option from T_CONN_REQ */ 3252 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3253 ci->SRC_length); 3254 ci->OPT_length = olen; 3255 ASSERT(opts != NULL); 3256 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3257 } else { 3258 ci->OPT_offset = 0; 3259 ci->OPT_length = 0; 3260 } 3261 if (opts != NULL) 3262 kmem_free(opts, olen); 3263 3264 /* 3265 * register connection request with server peer 3266 * append to list of incoming connections 3267 * increment references for both peer_tep and tep: peer_tep is placed on 3268 * te_oconp and tep is placed on listeners queue. 3269 */ 3270 tip->ti_tep = tep; 3271 tip->ti_seqno = tep->te_seqno; 3272 list_insert_tail(&peer_tep->te_iconp, tip); 3273 peer_tep->te_nicon++; 3274 3275 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3276 /* 3277 * send the T_CONN_IND message 3278 */ 3279 putnext(peer_tep->te_rq, cimp); 3280 3281 /* 3282 * Send a T_CONN_CON message for sockets. 3283 * Disable the queues until we have reached the correct state! 3284 */ 3285 if (confmp != NULL) { 3286 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3287 noenable(wq); 3288 putnext(tep->te_rq, confmp); 3289 } 3290 /* 3291 * Now we need to increment tep reference because tep is referenced by 3292 * server list of pending connections. We also need to decrement 3293 * reference before exiting serializer. Two operations void each other 3294 * so we don't modify reference at all. 3295 */ 3296 ASSERT(tep->te_refcnt >= 2); 3297 ASSERT(peer_tep->te_refcnt >= 2); 3298 tl_serializer_exit(tep); 3299 } 3300 3301 3302 3303 /* 3304 * Handle T_conn_res on listener stream. Called on listener serializer. 3305 * tl_conn_req has already generated the T_CONN_CON. 3306 * tl_conn_res is called on listener serializer. 3307 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3308 * Switch eager serializer to acceptor's. 3309 * 3310 * If TL_SET[U]CRED generate the credentials options. 3311 * For sockets tl_conn_req has already generated the T_CONN_CON. 3312 */ 3313 static void 3314 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3315 { 3316 queue_t *wq; 3317 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3318 ssize_t msz = MBLKL(mp); 3319 t_scalar_t olen, ooff, err = 0; 3320 t_scalar_t prim = cres->PRIM_type; 3321 uchar_t *addr_startp; 3322 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3323 tl_icon_t *tip; 3324 size_t size; 3325 mblk_t *ackmp, *respmp; 3326 mblk_t *dimp, *ccmp = NULL; 3327 struct T_discon_ind *di; 3328 struct T_conn_con *cc; 3329 boolean_t client_noclose_set = B_FALSE; 3330 boolean_t switch_client_serializer = B_TRUE; 3331 3332 ASSERT(IS_COTS(tep)); 3333 3334 if (tep->te_closing) { 3335 freemsg(mp); 3336 return; 3337 } 3338 3339 wq = tep->te_wq; 3340 3341 /* 3342 * preallocate memory for: 3343 * 1. max of T_ERROR_ACK and T_OK_ACK 3344 * ==> known max T_ERROR_ACK 3345 * 2. max of T_DISCON_IND and T_CONN_CON 3346 */ 3347 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3348 if (! ackmp) { 3349 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3350 return; 3351 } 3352 /* 3353 * memory committed for T_OK_ACK/T_ERROR_ACK now 3354 * will be committed for T_DISCON_IND/T_CONN_CON later 3355 */ 3356 3357 3358 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3359 3360 /* 3361 * validate state 3362 */ 3363 if (tep->te_state != TS_WRES_CIND) { 3364 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3365 SL_TRACE|SL_ERROR, 3366 "tl_wput:T_CONN_RES:out of state, state=%d", 3367 tep->te_state)); 3368 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3369 freemsg(mp); 3370 return; 3371 } 3372 3373 /* 3374 * validate the message 3375 * Note: dereference fields in struct inside message only 3376 * after validating the message length. 3377 */ 3378 if (msz < sizeof (struct T_conn_res)) { 3379 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3380 "tl_conn_res:invalid message length")); 3381 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3382 freemsg(mp); 3383 return; 3384 } 3385 olen = cres->OPT_length; 3386 ooff = cres->OPT_offset; 3387 if (((olen > 0) && ((ooff + olen) > msz))) { 3388 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3389 "tl_conn_res:invalid message")); 3390 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3391 freemsg(mp); 3392 return; 3393 } 3394 if (olen) { 3395 /* 3396 * no opts in connect res 3397 * supported in this provider 3398 */ 3399 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3400 "tl_conn_res:options not supported in message")); 3401 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3402 freemsg(mp); 3403 return; 3404 } 3405 3406 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3407 ASSERT(tep->te_state == TS_WACK_CRES); 3408 3409 if (cres->SEQ_number < TL_MINOR_START && 3410 cres->SEQ_number >= BADSEQNUM) { 3411 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3412 "tl_conn_res:remote endpoint sequence number bad")); 3413 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3414 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3415 freemsg(mp); 3416 return; 3417 } 3418 3419 /* 3420 * find accepting endpoint. Will have extra reference if found. 3421 */ 3422 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3423 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3424 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3425 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3426 "tl_conn_res:bad accepting endpoint")); 3427 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3428 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3429 freemsg(mp); 3430 return; 3431 } 3432 3433 /* 3434 * Prevent acceptor from closing. 3435 */ 3436 if (! tl_noclose(acc_ep)) { 3437 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3438 "tl_conn_res:bad accepting endpoint")); 3439 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3440 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3441 tl_refrele(acc_ep); 3442 freemsg(mp); 3443 return; 3444 } 3445 3446 acc_ep->te_flag |= TL_ACCEPTOR; 3447 3448 /* 3449 * validate that accepting endpoint, if different from listening 3450 * has address bound => state is TS_IDLE 3451 * TROUBLE in XPG4 !!? 3452 */ 3453 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3454 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3455 "tl_conn_res:accepting endpoint has no address bound," 3456 "state=%d", acc_ep->te_state)); 3457 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3458 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3459 freemsg(mp); 3460 tl_closeok(acc_ep); 3461 tl_refrele(acc_ep); 3462 return; 3463 } 3464 3465 /* 3466 * validate if accepting endpt same as listening, then 3467 * no other incoming connection should be on the queue 3468 */ 3469 3470 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3471 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3472 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3473 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3474 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3475 freemsg(mp); 3476 tl_closeok(acc_ep); 3477 tl_refrele(acc_ep); 3478 return; 3479 } 3480 3481 /* 3482 * Mark for deletion, the entry corresponding to client 3483 * on list of pending connections made by the listener 3484 * search list to see if client is one of the 3485 * recorded as a listener. 3486 */ 3487 tip = tl_icon_find(tep, cres->SEQ_number); 3488 if (tip == NULL) { 3489 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3490 "tl_conn_res:no client in listener list")); 3491 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3492 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3493 freemsg(mp); 3494 tl_closeok(acc_ep); 3495 tl_refrele(acc_ep); 3496 return; 3497 } 3498 3499 /* 3500 * If ti_tep is NULL the client has already closed. In this case 3501 * the code below will avoid any action on the client side 3502 * but complete the server and acceptor state transitions. 3503 */ 3504 ASSERT(tip->ti_tep == NULL || 3505 tip->ti_tep->te_seqno == cres->SEQ_number); 3506 cl_ep = tip->ti_tep; 3507 3508 /* 3509 * If the client is present it is switched from listener's to acceptor's 3510 * serializer. We should block client closes while serializers are 3511 * being switched. 3512 * 3513 * It is possible that the client is present but is currently being 3514 * closed. There are two possible cases: 3515 * 3516 * 1) The client has already entered tl_close_finish_ser() and sent 3517 * T_ORDREL_IND. In this case we can just ignore the client (but we 3518 * still need to send all messages from tip->ti_mp to the acceptor). 3519 * 3520 * 2) The client started the close but has not entered 3521 * tl_close_finish_ser() yet. In this case, the client is already 3522 * proceeding asynchronously on the listener's serializer, so we're 3523 * forced to change the acceptor to use the listener's serializer to 3524 * ensure that any operations on the acceptor are serialized with 3525 * respect to the close that's in-progress. 3526 */ 3527 if (cl_ep != NULL) { 3528 if (tl_noclose(cl_ep)) { 3529 client_noclose_set = B_TRUE; 3530 } else { 3531 /* 3532 * Client is closing. If it it has sent the 3533 * T_ORDREL_IND, we can simply ignore it - otherwise, 3534 * we have to let let the client continue until it is 3535 * sent. 3536 * 3537 * If we do continue using the client, acceptor will 3538 * switch to client's serializer which is used by client 3539 * for its close. 3540 */ 3541 tl_client_closing_when_accepting++; 3542 switch_client_serializer = B_FALSE; 3543 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3544 cl_ep->te_state == -1) 3545 cl_ep = NULL; 3546 } 3547 } 3548 3549 if (cl_ep != NULL) { 3550 /* 3551 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3552 * (latter for sockets only) 3553 */ 3554 if (cl_ep->te_state != TS_WCON_CREQ && 3555 (cl_ep->te_state != TS_DATA_XFER && 3556 IS_SOCKET(cl_ep))) { 3557 err = ECONNREFUSED; 3558 /* 3559 * T_DISCON_IND sent later after committing memory 3560 * and acking validity of request 3561 */ 3562 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3563 "tl_conn_res:peer in bad state")); 3564 } 3565 3566 /* 3567 * preallocate now for T_DISCON_IND or T_CONN_CONN 3568 * ack validity of request (T_OK_ACK) after memory committed 3569 */ 3570 3571 if (err) 3572 size = sizeof (struct T_discon_ind); 3573 else { 3574 /* 3575 * calculate length of T_CONN_CON message 3576 */ 3577 olen = 0; 3578 if (cl_ep->te_flag & TL_SETCRED) { 3579 olen = (t_scalar_t)sizeof (struct opthdr) + 3580 OPTLEN(sizeof (tl_credopt_t)); 3581 } else if (cl_ep->te_flag & TL_SETUCRED) { 3582 olen = (t_scalar_t)sizeof (struct opthdr) + 3583 OPTLEN(ucredsize); 3584 } 3585 size = T_ALIGN(sizeof (struct T_conn_con) + 3586 acc_ep->te_alen) + olen; 3587 } 3588 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3589 /* 3590 * roll back state changes 3591 */ 3592 tep->te_state = TS_WRES_CIND; 3593 tl_memrecover(wq, mp, size); 3594 freemsg(ackmp); 3595 if (client_noclose_set) 3596 tl_closeok(cl_ep); 3597 tl_closeok(acc_ep); 3598 tl_refrele(acc_ep); 3599 return; 3600 } 3601 mp = NULL; 3602 } 3603 3604 /* 3605 * Now ack validity of request 3606 */ 3607 if (tep->te_nicon == 1) { 3608 if (tep == acc_ep) 3609 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3610 else 3611 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3612 } else 3613 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3614 3615 /* 3616 * send T_DISCON_IND now if client state validation failed earlier 3617 */ 3618 if (err) { 3619 tl_ok_ack(wq, ackmp, prim); 3620 /* 3621 * flush the queues - why always ? 3622 */ 3623 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3624 3625 dimp = tl_resizemp(respmp, size); 3626 if (! dimp) { 3627 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3628 SL_TRACE|SL_ERROR, 3629 "tl_conn_res:con_ind:allocb failure")); 3630 tl_merror(wq, respmp, ENOMEM); 3631 tl_closeok(acc_ep); 3632 if (client_noclose_set) 3633 tl_closeok(cl_ep); 3634 tl_refrele(acc_ep); 3635 return; 3636 } 3637 if (dimp->b_cont) { 3638 /* no user data in provider generated discon ind */ 3639 freemsg(dimp->b_cont); 3640 dimp->b_cont = NULL; 3641 } 3642 3643 DB_TYPE(dimp) = M_PROTO; 3644 di = (struct T_discon_ind *)dimp->b_rptr; 3645 di->PRIM_type = T_DISCON_IND; 3646 di->DISCON_reason = err; 3647 di->SEQ_number = BADSEQNUM; 3648 3649 tep->te_state = TS_IDLE; 3650 /* 3651 * send T_DISCON_IND message 3652 */ 3653 putnext(acc_ep->te_rq, dimp); 3654 if (client_noclose_set) 3655 tl_closeok(cl_ep); 3656 tl_closeok(acc_ep); 3657 tl_refrele(acc_ep); 3658 return; 3659 } 3660 3661 /* 3662 * now start connecting the accepting endpoint 3663 */ 3664 if (tep != acc_ep) 3665 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3666 3667 if (cl_ep == NULL) { 3668 /* 3669 * The client has already closed. Send up any queued messages 3670 * and change the state accordingly. 3671 */ 3672 tl_ok_ack(wq, ackmp, prim); 3673 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3674 3675 /* 3676 * remove endpoint from incoming connection 3677 * delete client from list of incoming connections 3678 */ 3679 tl_freetip(tep, tip); 3680 freemsg(mp); 3681 tl_closeok(acc_ep); 3682 tl_refrele(acc_ep); 3683 return; 3684 } else if (tip->ti_mp != NULL) { 3685 /* 3686 * The client could have queued a T_DISCON_IND which needs 3687 * to be sent up. 3688 * Note that t_discon_req can not operate the same as 3689 * t_data_req since it is not possible for it to putbq 3690 * the message and return -1 due to the use of qwriter. 3691 */ 3692 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3693 } 3694 3695 /* 3696 * prepare connect confirm T_CONN_CON message 3697 */ 3698 3699 /* 3700 * allocate the message - original data blocks 3701 * retained in the returned mblk 3702 */ 3703 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3704 ccmp = tl_resizemp(respmp, size); 3705 if (ccmp == NULL) { 3706 tl_ok_ack(wq, ackmp, prim); 3707 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3708 SL_TRACE|SL_ERROR, 3709 "tl_conn_res:conn_con:allocb failure")); 3710 tl_merror(wq, respmp, ENOMEM); 3711 tl_closeok(acc_ep); 3712 if (client_noclose_set) 3713 tl_closeok(cl_ep); 3714 tl_refrele(acc_ep); 3715 return; 3716 } 3717 3718 DB_TYPE(ccmp) = M_PROTO; 3719 cc = (struct T_conn_con *)ccmp->b_rptr; 3720 cc->PRIM_type = T_CONN_CON; 3721 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3722 cc->RES_length = acc_ep->te_alen; 3723 addr_startp = ccmp->b_rptr + cc->RES_offset; 3724 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3725 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3726 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3727 cc->RES_length); 3728 cc->OPT_length = olen; 3729 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3730 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3731 cl_ep->te_credp); 3732 } else { 3733 cc->OPT_offset = 0; 3734 cc->OPT_length = 0; 3735 } 3736 /* 3737 * Forward the credential in the packet so it can be picked up 3738 * at the higher layers for more complete credential processing 3739 */ 3740 mblk_setcred(ccmp, acc_ep->te_credp); 3741 DB_CPID(ccmp) = acc_ep->te_cpid; 3742 } else { 3743 freemsg(respmp); 3744 respmp = NULL; 3745 } 3746 3747 /* 3748 * make connection linking 3749 * accepting and client endpoints 3750 * No need to increment references: 3751 * on client: it should already have one from tip->ti_tep linkage. 3752 * on acceptor is should already have one from the table lookup. 3753 * 3754 * At this point both client and acceptor can't close. Set client 3755 * serializer to acceptor's. 3756 */ 3757 ASSERT(cl_ep->te_refcnt >= 2); 3758 ASSERT(acc_ep->te_refcnt >= 2); 3759 ASSERT(cl_ep->te_conp == NULL); 3760 ASSERT(acc_ep->te_conp == NULL); 3761 cl_ep->te_conp = acc_ep; 3762 acc_ep->te_conp = cl_ep; 3763 ASSERT(cl_ep->te_ser == tep->te_ser); 3764 if (switch_client_serializer) { 3765 mutex_enter(&cl_ep->te_ser_lock); 3766 if (cl_ep->te_ser_count > 0) { 3767 switch_client_serializer = B_FALSE; 3768 tl_serializer_noswitch++; 3769 } else { 3770 /* 3771 * Move client to the acceptor's serializer. 3772 */ 3773 tl_serializer_refhold(acc_ep->te_ser); 3774 tl_serializer_refrele(cl_ep->te_ser); 3775 cl_ep->te_ser = acc_ep->te_ser; 3776 } 3777 mutex_exit(&cl_ep->te_ser_lock); 3778 } 3779 if (!switch_client_serializer) { 3780 /* 3781 * It is not possible to switch client to use acceptor's. 3782 * Move acceptor to client's serializer (which is the same as 3783 * listener's). 3784 */ 3785 tl_serializer_refhold(cl_ep->te_ser); 3786 tl_serializer_refrele(acc_ep->te_ser); 3787 acc_ep->te_ser = cl_ep->te_ser; 3788 } 3789 3790 TL_REMOVE_PEER(cl_ep->te_oconp); 3791 TL_REMOVE_PEER(acc_ep->te_oconp); 3792 3793 /* 3794 * remove endpoint from incoming connection 3795 * delete client from list of incoming connections 3796 */ 3797 tip->ti_tep = NULL; 3798 tl_freetip(tep, tip); 3799 tl_ok_ack(wq, ackmp, prim); 3800 3801 /* 3802 * data blocks already linked in reallocb() 3803 */ 3804 3805 /* 3806 * link queues so that I_SENDFD will work 3807 */ 3808 if (! IS_SOCKET(tep)) { 3809 acc_ep->te_wq->q_next = cl_ep->te_rq; 3810 cl_ep->te_wq->q_next = acc_ep->te_rq; 3811 } 3812 3813 /* 3814 * send T_CONN_CON up on client side unless it was already 3815 * done (for a socket). In cases any data or ordrel req has been 3816 * queued make sure that the service procedure runs. 3817 */ 3818 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3819 enableok(cl_ep->te_wq); 3820 TL_QENABLE(cl_ep); 3821 if (ccmp != NULL) 3822 freemsg(ccmp); 3823 } else { 3824 /* 3825 * change client state on TE_CONN_CON event 3826 */ 3827 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3828 putnext(cl_ep->te_rq, ccmp); 3829 } 3830 3831 /* Mark the both endpoints as accepted */ 3832 cl_ep->te_flag |= TL_ACCEPTED; 3833 acc_ep->te_flag |= TL_ACCEPTED; 3834 3835 /* 3836 * Allow client and acceptor to close. 3837 */ 3838 tl_closeok(acc_ep); 3839 if (client_noclose_set) 3840 tl_closeok(cl_ep); 3841 } 3842 3843 3844 3845 3846 static void 3847 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3848 { 3849 queue_t *wq; 3850 struct T_discon_req *dr; 3851 ssize_t msz; 3852 tl_endpt_t *peer_tep = tep->te_conp; 3853 tl_endpt_t *srv_tep = tep->te_oconp; 3854 tl_icon_t *tip; 3855 size_t size; 3856 mblk_t *ackmp, *dimp, *respmp; 3857 struct T_discon_ind *di; 3858 t_scalar_t save_state, new_state; 3859 3860 if (tep->te_closing) { 3861 freemsg(mp); 3862 return; 3863 } 3864 3865 if ((peer_tep != NULL) && peer_tep->te_closing) { 3866 TL_UNCONNECT(tep->te_conp); 3867 peer_tep = NULL; 3868 } 3869 if ((srv_tep != NULL) && srv_tep->te_closing) { 3870 TL_UNCONNECT(tep->te_oconp); 3871 srv_tep = NULL; 3872 } 3873 3874 wq = tep->te_wq; 3875 3876 /* 3877 * preallocate memory for: 3878 * 1. max of T_ERROR_ACK and T_OK_ACK 3879 * ==> known max T_ERROR_ACK 3880 * 2. for T_DISCON_IND 3881 */ 3882 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3883 if (! ackmp) { 3884 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3885 return; 3886 } 3887 /* 3888 * memory committed for T_OK_ACK/T_ERROR_ACK now 3889 * will be committed for T_DISCON_IND later 3890 */ 3891 3892 dr = (struct T_discon_req *)mp->b_rptr; 3893 msz = MBLKL(mp); 3894 3895 /* 3896 * validate the state 3897 */ 3898 save_state = new_state = tep->te_state; 3899 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3900 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3901 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3902 SL_TRACE|SL_ERROR, 3903 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3904 tep->te_state)); 3905 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3906 freemsg(mp); 3907 return; 3908 } 3909 /* 3910 * Defer committing the state change until it is determined if 3911 * the message will be queued with the tl_icon or not. 3912 */ 3913 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3914 3915 /* validate the message */ 3916 if (msz < sizeof (struct T_discon_req)) { 3917 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3918 "tl_discon_req:invalid message")); 3919 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3920 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3921 freemsg(mp); 3922 return; 3923 } 3924 3925 /* 3926 * if server, then validate that client exists 3927 * by connection sequence number etc. 3928 */ 3929 if (tep->te_nicon > 0) { /* server */ 3930 3931 /* 3932 * search server list for disconnect client 3933 */ 3934 tip = tl_icon_find(tep, dr->SEQ_number); 3935 if (tip == NULL) { 3936 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3937 SL_TRACE|SL_ERROR, 3938 "tl_discon_req:no disconnect endpoint")); 3939 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3940 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3941 freemsg(mp); 3942 return; 3943 } 3944 /* 3945 * If ti_tep is NULL the client has already closed. In this case 3946 * the code below will avoid any action on the client side. 3947 */ 3948 3949 ASSERT(IMPLY(tip->ti_tep != NULL, 3950 tip->ti_tep->te_seqno == dr->SEQ_number)); 3951 peer_tep = tip->ti_tep; 3952 } 3953 3954 /* 3955 * preallocate now for T_DISCON_IND 3956 * ack validity of request (T_OK_ACK) after memory committed 3957 */ 3958 size = sizeof (struct T_discon_ind); 3959 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3960 tl_memrecover(wq, mp, size); 3961 freemsg(ackmp); 3962 return; 3963 } 3964 3965 /* 3966 * prepare message to ack validity of request 3967 */ 3968 if (tep->te_nicon == 0) 3969 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3970 else 3971 if (tep->te_nicon == 1) 3972 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3973 else 3974 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3975 3976 /* 3977 * Flushing queues according to TPI. Using the old state. 3978 */ 3979 if ((tep->te_nicon <= 1) && 3980 ((save_state == TS_DATA_XFER) || 3981 (save_state == TS_WIND_ORDREL) || 3982 (save_state == TS_WREQ_ORDREL))) 3983 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3984 3985 /* send T_OK_ACK up */ 3986 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3987 3988 /* 3989 * now do disconnect business 3990 */ 3991 if (tep->te_nicon > 0) { /* listener */ 3992 if (peer_tep != NULL && !peer_tep->te_closing) { 3993 /* 3994 * disconnect incoming connect request pending to tep 3995 */ 3996 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 3997 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3998 SL_TRACE|SL_ERROR, 3999 "tl_discon_req: reallocb failed")); 4000 tep->te_state = new_state; 4001 tl_merror(wq, respmp, ENOMEM); 4002 return; 4003 } 4004 di = (struct T_discon_ind *)dimp->b_rptr; 4005 di->SEQ_number = BADSEQNUM; 4006 save_state = peer_tep->te_state; 4007 peer_tep->te_state = TS_IDLE; 4008 4009 TL_REMOVE_PEER(peer_tep->te_oconp); 4010 enableok(peer_tep->te_wq); 4011 TL_QENABLE(peer_tep); 4012 } else { 4013 freemsg(respmp); 4014 dimp = NULL; 4015 } 4016 4017 /* 4018 * remove endpoint from incoming connection list 4019 * - remove disconnect client from list on server 4020 */ 4021 tl_freetip(tep, tip); 4022 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4023 /* 4024 * disconnect an outgoing request pending from tep 4025 */ 4026 4027 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4028 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4029 SL_TRACE|SL_ERROR, 4030 "tl_discon_req: reallocb failed")); 4031 tep->te_state = new_state; 4032 tl_merror(wq, respmp, ENOMEM); 4033 return; 4034 } 4035 di = (struct T_discon_ind *)dimp->b_rptr; 4036 DB_TYPE(dimp) = M_PROTO; 4037 di->PRIM_type = T_DISCON_IND; 4038 di->DISCON_reason = ECONNRESET; 4039 di->SEQ_number = tep->te_seqno; 4040 4041 /* 4042 * If this is a socket the T_DISCON_IND is queued with 4043 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4044 * from the list of pending connections. 4045 * Note that when te_oconp is set the peer better have 4046 * a t_connind_t for the client. 4047 */ 4048 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4049 /* 4050 * No need to check that 4051 * ti_tep == NULL since the T_DISCON_IND 4052 * takes precedence over other queued 4053 * messages. 4054 */ 4055 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4056 peer_tep = NULL; 4057 dimp = NULL; 4058 /* 4059 * Can't clear te_oconp since tl_co_unconnect needs 4060 * it as a hint not to free the tep. 4061 * Keep the state unchanged since tl_conn_res inspects 4062 * it. 4063 */ 4064 new_state = tep->te_state; 4065 } else { 4066 /* Found - delete it */ 4067 tip = tl_icon_find(peer_tep, tep->te_seqno); 4068 if (tip != NULL) { 4069 ASSERT(tep == tip->ti_tep); 4070 save_state = peer_tep->te_state; 4071 if (peer_tep->te_nicon == 1) 4072 peer_tep->te_state = 4073 NEXTSTATE(TE_DISCON_IND2, 4074 peer_tep->te_state); 4075 else 4076 peer_tep->te_state = 4077 NEXTSTATE(TE_DISCON_IND3, 4078 peer_tep->te_state); 4079 tl_freetip(peer_tep, tip); 4080 } 4081 ASSERT(tep->te_oconp != NULL); 4082 TL_UNCONNECT(tep->te_oconp); 4083 } 4084 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4085 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4086 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4087 SL_TRACE|SL_ERROR, 4088 "tl_discon_req: reallocb failed")); 4089 tep->te_state = new_state; 4090 tl_merror(wq, respmp, ENOMEM); 4091 return; 4092 } 4093 di = (struct T_discon_ind *)dimp->b_rptr; 4094 di->SEQ_number = BADSEQNUM; 4095 4096 save_state = peer_tep->te_state; 4097 peer_tep->te_state = TS_IDLE; 4098 } else { 4099 /* Not connected */ 4100 tep->te_state = new_state; 4101 freemsg(respmp); 4102 return; 4103 } 4104 4105 /* Commit state changes */ 4106 tep->te_state = new_state; 4107 4108 if (peer_tep == NULL) { 4109 ASSERT(dimp == NULL); 4110 goto done; 4111 } 4112 /* 4113 * Flush queues on peer before sending up 4114 * T_DISCON_IND according to TPI 4115 */ 4116 4117 if ((save_state == TS_DATA_XFER) || 4118 (save_state == TS_WIND_ORDREL) || 4119 (save_state == TS_WREQ_ORDREL)) 4120 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4121 4122 DB_TYPE(dimp) = M_PROTO; 4123 di->PRIM_type = T_DISCON_IND; 4124 di->DISCON_reason = ECONNRESET; 4125 4126 /* 4127 * data blocks already linked into dimp by reallocb() 4128 */ 4129 /* 4130 * send indication message to peer user module 4131 */ 4132 ASSERT(dimp != NULL); 4133 putnext(peer_tep->te_rq, dimp); 4134 done: 4135 if (tep->te_conp) { /* disconnect pointers if connected */ 4136 ASSERT(! peer_tep->te_closing); 4137 4138 /* 4139 * Messages may be queued on peer's write queue 4140 * waiting to be processed by its write service 4141 * procedure. Before the pointer to the peer transport 4142 * structure is set to NULL, qenable the peer's write 4143 * queue so that the queued up messages are processed. 4144 */ 4145 if ((save_state == TS_DATA_XFER) || 4146 (save_state == TS_WIND_ORDREL) || 4147 (save_state == TS_WREQ_ORDREL)) 4148 TL_QENABLE(peer_tep); 4149 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4150 TL_UNCONNECT(peer_tep->te_conp); 4151 if (! IS_SOCKET(tep)) { 4152 /* 4153 * unlink the streams 4154 */ 4155 tep->te_wq->q_next = NULL; 4156 peer_tep->te_wq->q_next = NULL; 4157 } 4158 TL_UNCONNECT(tep->te_conp); 4159 } 4160 } 4161 4162 4163 static void 4164 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4165 { 4166 queue_t *wq; 4167 size_t ack_sz; 4168 mblk_t *ackmp; 4169 struct T_addr_ack *taa; 4170 4171 if (tep->te_closing) { 4172 freemsg(mp); 4173 return; 4174 } 4175 4176 wq = tep->te_wq; 4177 4178 /* 4179 * Note: T_ADDR_REQ message has only PRIM_type field 4180 * so it is already validated earlier. 4181 */ 4182 4183 if (IS_CLTS(tep) || 4184 (tep->te_state > TS_WREQ_ORDREL) || 4185 (tep->te_state < TS_DATA_XFER)) { 4186 /* 4187 * Either connectionless or connection oriented but not 4188 * in connected data transfer state or half-closed states. 4189 */ 4190 ack_sz = sizeof (struct T_addr_ack); 4191 if (tep->te_state >= TS_IDLE) 4192 /* is bound */ 4193 ack_sz += tep->te_alen; 4194 ackmp = reallocb(mp, ack_sz, 0); 4195 if (ackmp == NULL) { 4196 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4197 SL_TRACE|SL_ERROR, 4198 "tl_addr_req: reallocb failed")); 4199 tl_memrecover(wq, mp, ack_sz); 4200 return; 4201 } 4202 4203 taa = (struct T_addr_ack *)ackmp->b_rptr; 4204 4205 bzero(taa, sizeof (struct T_addr_ack)); 4206 4207 taa->PRIM_type = T_ADDR_ACK; 4208 ackmp->b_datap->db_type = M_PCPROTO; 4209 ackmp->b_wptr = (uchar_t *)&taa[1]; 4210 4211 if (tep->te_state >= TS_IDLE) { 4212 /* endpoint is bound */ 4213 taa->LOCADDR_length = tep->te_alen; 4214 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4215 4216 bcopy(tep->te_abuf, ackmp->b_wptr, 4217 tep->te_alen); 4218 ackmp->b_wptr += tep->te_alen; 4219 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4220 } 4221 4222 (void) qreply(wq, ackmp); 4223 } else { 4224 ASSERT(tep->te_state == TS_DATA_XFER || 4225 tep->te_state == TS_WIND_ORDREL || 4226 tep->te_state == TS_WREQ_ORDREL); 4227 /* connection oriented in data transfer */ 4228 tl_connected_cots_addr_req(mp, tep); 4229 } 4230 } 4231 4232 4233 static void 4234 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4235 { 4236 tl_endpt_t *peer_tep; 4237 size_t ack_sz; 4238 mblk_t *ackmp; 4239 struct T_addr_ack *taa; 4240 uchar_t *addr_startp; 4241 4242 if (tep->te_closing) { 4243 freemsg(mp); 4244 return; 4245 } 4246 4247 ASSERT(tep->te_state >= TS_IDLE); 4248 4249 ack_sz = sizeof (struct T_addr_ack); 4250 ack_sz += T_ALIGN(tep->te_alen); 4251 peer_tep = tep->te_conp; 4252 ack_sz += peer_tep->te_alen; 4253 4254 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4255 if (ackmp == NULL) { 4256 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4257 "tl_connected_cots_addr_req: reallocb failed")); 4258 tl_memrecover(tep->te_wq, mp, ack_sz); 4259 return; 4260 } 4261 4262 taa = (struct T_addr_ack *)ackmp->b_rptr; 4263 4264 /* endpoint is bound */ 4265 taa->LOCADDR_length = tep->te_alen; 4266 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4267 4268 addr_startp = (uchar_t *)&taa[1]; 4269 4270 bcopy(tep->te_abuf, addr_startp, 4271 tep->te_alen); 4272 4273 taa->REMADDR_length = peer_tep->te_alen; 4274 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4275 taa->LOCADDR_length); 4276 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4277 bcopy(peer_tep->te_abuf, addr_startp, 4278 peer_tep->te_alen); 4279 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4280 taa->REMADDR_offset + peer_tep->te_alen; 4281 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4282 4283 putnext(tep->te_rq, ackmp); 4284 } 4285 4286 static void 4287 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4288 { 4289 if (IS_CLTS(tep)) { 4290 *ia = tl_clts_info_ack; 4291 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4292 } else { 4293 *ia = tl_cots_info_ack; 4294 if (IS_COTSORD(tep)) 4295 ia->SERV_type = T_COTS_ORD; 4296 } 4297 ia->TIDU_size = tl_tidusz; 4298 ia->CURRENT_state = tep->te_state; 4299 } 4300 4301 /* 4302 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4303 * tl_wput. 4304 */ 4305 static void 4306 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4307 { 4308 mblk_t *ackmp; 4309 t_uscalar_t cap_bits1; 4310 struct T_capability_ack *tcap; 4311 4312 if (tep->te_closing) { 4313 freemsg(mp); 4314 return; 4315 } 4316 4317 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4318 4319 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4320 M_PCPROTO, T_CAPABILITY_ACK); 4321 if (ackmp == NULL) { 4322 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4323 "tl_capability_req: reallocb failed")); 4324 tl_memrecover(tep->te_wq, mp, 4325 sizeof (struct T_capability_ack)); 4326 return; 4327 } 4328 4329 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4330 tcap->CAP_bits1 = 0; 4331 4332 if (cap_bits1 & TC1_INFO) { 4333 tl_copy_info(&tcap->INFO_ack, tep); 4334 tcap->CAP_bits1 |= TC1_INFO; 4335 } 4336 4337 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4338 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4339 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4340 } 4341 4342 putnext(tep->te_rq, ackmp); 4343 } 4344 4345 static void 4346 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4347 { 4348 if (! tep->te_closing) 4349 tl_info_req(mp, tep); 4350 else 4351 freemsg(mp); 4352 4353 tl_serializer_exit(tep); 4354 tl_refrele(tep); 4355 } 4356 4357 static void 4358 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4359 { 4360 mblk_t *ackmp; 4361 4362 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4363 M_PCPROTO, T_INFO_ACK); 4364 if (ackmp == NULL) { 4365 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4366 "tl_info_req: reallocb failed")); 4367 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4368 return; 4369 } 4370 4371 /* 4372 * fill in T_INFO_ACK contents 4373 */ 4374 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4375 4376 /* 4377 * send ack message 4378 */ 4379 putnext(tep->te_rq, ackmp); 4380 } 4381 4382 /* 4383 * Handle M_DATA, T_data_req and T_optdata_req. 4384 * If this is a socket pass through T_optdata_req options unmodified. 4385 */ 4386 static void 4387 tl_data(mblk_t *mp, tl_endpt_t *tep) 4388 { 4389 queue_t *wq = tep->te_wq; 4390 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4391 ssize_t msz = MBLKL(mp); 4392 tl_endpt_t *peer_tep; 4393 queue_t *peer_rq; 4394 boolean_t closing = tep->te_closing; 4395 4396 if (IS_CLTS(tep)) { 4397 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4398 SL_TRACE|SL_ERROR, 4399 "tl_wput:clts:unattached M_DATA")); 4400 if (!closing) { 4401 tl_merror(wq, mp, EPROTO); 4402 } else { 4403 freemsg(mp); 4404 } 4405 return; 4406 } 4407 4408 /* 4409 * If the endpoint is closing it should still forward any data to the 4410 * peer (if it has one). If it is not allowed to forward it can just 4411 * free the message. 4412 */ 4413 if (closing && 4414 (tep->te_state != TS_DATA_XFER) && 4415 (tep->te_state != TS_WREQ_ORDREL)) { 4416 freemsg(mp); 4417 return; 4418 } 4419 4420 if (DB_TYPE(mp) == M_PROTO) { 4421 if (prim->type == T_DATA_REQ && 4422 msz < sizeof (struct T_data_req)) { 4423 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4424 SL_TRACE|SL_ERROR, 4425 "tl_data:T_DATA_REQ:invalid message")); 4426 if (!closing) { 4427 tl_merror(wq, mp, EPROTO); 4428 } else { 4429 freemsg(mp); 4430 } 4431 return; 4432 } else if (prim->type == T_OPTDATA_REQ && 4433 (msz < sizeof (struct T_optdata_req) || 4434 !IS_SOCKET(tep))) { 4435 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4436 SL_TRACE|SL_ERROR, 4437 "tl_data:T_OPTDATA_REQ:invalid message")); 4438 if (!closing) { 4439 tl_merror(wq, mp, EPROTO); 4440 } else { 4441 freemsg(mp); 4442 } 4443 return; 4444 } 4445 } 4446 4447 /* 4448 * connection oriented provider 4449 */ 4450 switch (tep->te_state) { 4451 case TS_IDLE: 4452 /* 4453 * Other end not here - do nothing. 4454 */ 4455 freemsg(mp); 4456 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4457 "tl_data:cots with endpoint idle")); 4458 return; 4459 4460 case TS_DATA_XFER: 4461 /* valid states */ 4462 if (tep->te_conp != NULL) 4463 break; 4464 4465 if (tep->te_oconp == NULL) { 4466 if (!closing) { 4467 tl_merror(wq, mp, EPROTO); 4468 } else { 4469 freemsg(mp); 4470 } 4471 return; 4472 } 4473 /* 4474 * For a socket the T_CONN_CON is sent early thus 4475 * the peer might not yet have accepted the connection. 4476 * If we are closing queue the packet with the T_CONN_IND. 4477 * Otherwise defer processing the packet until the peer 4478 * accepts the connection. 4479 * Note that the queue is noenabled when we go into this 4480 * state. 4481 */ 4482 if (!closing) { 4483 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4484 SL_TRACE|SL_ERROR, 4485 "tl_data: ocon")); 4486 TL_PUTBQ(tep, mp); 4487 return; 4488 } 4489 if (DB_TYPE(mp) == M_PROTO) { 4490 if (msz < sizeof (t_scalar_t)) { 4491 freemsg(mp); 4492 return; 4493 } 4494 /* reuse message block - just change REQ to IND */ 4495 if (prim->type == T_DATA_REQ) 4496 prim->type = T_DATA_IND; 4497 else 4498 prim->type = T_OPTDATA_IND; 4499 } 4500 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4501 return; 4502 4503 case TS_WREQ_ORDREL: 4504 if (tep->te_conp == NULL) { 4505 /* 4506 * Other end closed - generate discon_ind 4507 * with reason 0 to cause an EPIPE but no 4508 * read side error on AF_UNIX sockets. 4509 */ 4510 freemsg(mp); 4511 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4512 SL_TRACE|SL_ERROR, 4513 "tl_data: WREQ_ORDREL and no peer")); 4514 tl_discon_ind(tep, 0); 4515 return; 4516 } 4517 break; 4518 4519 default: 4520 /* invalid state for event TE_DATA_REQ */ 4521 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4522 "tl_data:cots:out of state")); 4523 tl_merror(wq, mp, EPROTO); 4524 return; 4525 } 4526 /* 4527 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4528 * (State stays same on this event) 4529 */ 4530 4531 /* 4532 * get connected endpoint 4533 */ 4534 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4535 freemsg(mp); 4536 /* Peer closed */ 4537 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4538 "tl_data: peer gone")); 4539 return; 4540 } 4541 4542 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4543 peer_rq = peer_tep->te_rq; 4544 4545 /* 4546 * Put it back if flow controlled 4547 * Note: Messages already on queue when we are closing is bounded 4548 * so we can ignore flow control. 4549 */ 4550 if (!canputnext(peer_rq) && !closing) { 4551 TL_PUTBQ(tep, mp); 4552 return; 4553 } 4554 4555 /* 4556 * validate peer state 4557 */ 4558 switch (peer_tep->te_state) { 4559 case TS_DATA_XFER: 4560 case TS_WIND_ORDREL: 4561 /* valid states */ 4562 break; 4563 default: 4564 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4565 "tl_data:rx side:invalid state")); 4566 tl_merror(peer_tep->te_wq, mp, EPROTO); 4567 return; 4568 } 4569 if (DB_TYPE(mp) == M_PROTO) { 4570 /* reuse message block - just change REQ to IND */ 4571 if (prim->type == T_DATA_REQ) 4572 prim->type = T_DATA_IND; 4573 else 4574 prim->type = T_OPTDATA_IND; 4575 } 4576 /* 4577 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4578 * (peer state stays same on this event) 4579 */ 4580 /* 4581 * send data to connected peer 4582 */ 4583 putnext(peer_rq, mp); 4584 } 4585 4586 4587 4588 static void 4589 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4590 { 4591 queue_t *wq = tep->te_wq; 4592 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4593 ssize_t msz = MBLKL(mp); 4594 tl_endpt_t *peer_tep; 4595 queue_t *peer_rq; 4596 boolean_t closing = tep->te_closing; 4597 4598 if (msz < sizeof (struct T_exdata_req)) { 4599 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4600 "tl_exdata:invalid message")); 4601 if (!closing) { 4602 tl_merror(wq, mp, EPROTO); 4603 } else { 4604 freemsg(mp); 4605 } 4606 return; 4607 } 4608 4609 /* 4610 * If the endpoint is closing it should still forward any data to the 4611 * peer (if it has one). If it is not allowed to forward it can just 4612 * free the message. 4613 */ 4614 if (closing && 4615 (tep->te_state != TS_DATA_XFER) && 4616 (tep->te_state != TS_WREQ_ORDREL)) { 4617 freemsg(mp); 4618 return; 4619 } 4620 4621 /* 4622 * validate state 4623 */ 4624 switch (tep->te_state) { 4625 case TS_IDLE: 4626 /* 4627 * Other end not here - do nothing. 4628 */ 4629 freemsg(mp); 4630 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4631 "tl_exdata:cots with endpoint idle")); 4632 return; 4633 4634 case TS_DATA_XFER: 4635 /* valid states */ 4636 if (tep->te_conp != NULL) 4637 break; 4638 4639 if (tep->te_oconp == NULL) { 4640 if (!closing) { 4641 tl_merror(wq, mp, EPROTO); 4642 } else { 4643 freemsg(mp); 4644 } 4645 return; 4646 } 4647 /* 4648 * For a socket the T_CONN_CON is sent early thus 4649 * the peer might not yet have accepted the connection. 4650 * If we are closing queue the packet with the T_CONN_IND. 4651 * Otherwise defer processing the packet until the peer 4652 * accepts the connection. 4653 * Note that the queue is noenabled when we go into this 4654 * state. 4655 */ 4656 if (!closing) { 4657 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4658 SL_TRACE|SL_ERROR, 4659 "tl_exdata: ocon")); 4660 TL_PUTBQ(tep, mp); 4661 return; 4662 } 4663 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4664 "tl_exdata: closing socket ocon")); 4665 prim->type = T_EXDATA_IND; 4666 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4667 return; 4668 4669 case TS_WREQ_ORDREL: 4670 if (tep->te_conp == NULL) { 4671 /* 4672 * Other end closed - generate discon_ind 4673 * with reason 0 to cause an EPIPE but no 4674 * read side error on AF_UNIX sockets. 4675 */ 4676 freemsg(mp); 4677 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4678 SL_TRACE|SL_ERROR, 4679 "tl_exdata: WREQ_ORDREL and no peer")); 4680 tl_discon_ind(tep, 0); 4681 return; 4682 } 4683 break; 4684 4685 default: 4686 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4687 SL_TRACE|SL_ERROR, 4688 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4689 tep->te_state)); 4690 tl_merror(wq, mp, EPROTO); 4691 return; 4692 } 4693 /* 4694 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4695 * (state stays same on this event) 4696 */ 4697 4698 /* 4699 * get connected endpoint 4700 */ 4701 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4702 freemsg(mp); 4703 /* Peer closed */ 4704 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4705 "tl_exdata: peer gone")); 4706 return; 4707 } 4708 4709 peer_rq = peer_tep->te_rq; 4710 4711 /* 4712 * Put it back if flow controlled 4713 * Note: Messages already on queue when we are closing is bounded 4714 * so we can ignore flow control. 4715 */ 4716 if (!canputnext(peer_rq) && !closing) { 4717 TL_PUTBQ(tep, mp); 4718 return; 4719 } 4720 4721 /* 4722 * validate state on peer 4723 */ 4724 switch (peer_tep->te_state) { 4725 case TS_DATA_XFER: 4726 case TS_WIND_ORDREL: 4727 /* valid states */ 4728 break; 4729 default: 4730 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4731 "tl_exdata:rx side:invalid state")); 4732 tl_merror(peer_tep->te_wq, mp, EPROTO); 4733 return; 4734 } 4735 /* 4736 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4737 * (peer state stays same on this event) 4738 */ 4739 /* 4740 * reuse message block 4741 */ 4742 prim->type = T_EXDATA_IND; 4743 4744 /* 4745 * send data to connected peer 4746 */ 4747 putnext(peer_rq, mp); 4748 } 4749 4750 4751 4752 static void 4753 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4754 { 4755 queue_t *wq = tep->te_wq; 4756 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4757 ssize_t msz = MBLKL(mp); 4758 tl_endpt_t *peer_tep; 4759 queue_t *peer_rq; 4760 boolean_t closing = tep->te_closing; 4761 4762 if (msz < sizeof (struct T_ordrel_req)) { 4763 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4764 "tl_ordrel:invalid message")); 4765 if (!closing) { 4766 tl_merror(wq, mp, EPROTO); 4767 } else { 4768 freemsg(mp); 4769 } 4770 return; 4771 } 4772 4773 /* 4774 * validate state 4775 */ 4776 switch (tep->te_state) { 4777 case TS_DATA_XFER: 4778 case TS_WREQ_ORDREL: 4779 /* valid states */ 4780 if (tep->te_conp != NULL) 4781 break; 4782 4783 if (tep->te_oconp == NULL) 4784 break; 4785 4786 /* 4787 * For a socket the T_CONN_CON is sent early thus 4788 * the peer might not yet have accepted the connection. 4789 * If we are closing queue the packet with the T_CONN_IND. 4790 * Otherwise defer processing the packet until the peer 4791 * accepts the connection. 4792 * Note that the queue is noenabled when we go into this 4793 * state. 4794 */ 4795 if (!closing) { 4796 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4797 SL_TRACE|SL_ERROR, 4798 "tl_ordlrel: ocon")); 4799 TL_PUTBQ(tep, mp); 4800 return; 4801 } 4802 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4803 "tl_ordlrel: closing socket ocon")); 4804 prim->type = T_ORDREL_IND; 4805 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4806 return; 4807 4808 default: 4809 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4810 SL_TRACE|SL_ERROR, 4811 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4812 tep->te_state)); 4813 if (!closing) { 4814 tl_merror(wq, mp, EPROTO); 4815 } else { 4816 freemsg(mp); 4817 } 4818 return; 4819 } 4820 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4821 4822 /* 4823 * get connected endpoint 4824 */ 4825 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4826 /* Peer closed */ 4827 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4828 "tl_ordrel: peer gone")); 4829 freemsg(mp); 4830 return; 4831 } 4832 4833 peer_rq = peer_tep->te_rq; 4834 4835 /* 4836 * Put it back if flow controlled except when we are closing. 4837 * Note: Messages already on queue when we are closing is bounded 4838 * so we can ignore flow control. 4839 */ 4840 if (! canputnext(peer_rq) && !closing) { 4841 TL_PUTBQ(tep, mp); 4842 return; 4843 } 4844 4845 /* 4846 * validate state on peer 4847 */ 4848 switch (peer_tep->te_state) { 4849 case TS_DATA_XFER: 4850 case TS_WIND_ORDREL: 4851 /* valid states */ 4852 break; 4853 default: 4854 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4855 "tl_ordrel:rx side:invalid state")); 4856 tl_merror(peer_tep->te_wq, mp, EPROTO); 4857 return; 4858 } 4859 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4860 4861 /* 4862 * reuse message block 4863 */ 4864 prim->type = T_ORDREL_IND; 4865 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4866 "tl_ordrel: send ordrel_ind")); 4867 4868 /* 4869 * send data to connected peer 4870 */ 4871 putnext(peer_rq, mp); 4872 } 4873 4874 4875 /* 4876 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4877 */ 4878 static void 4879 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4880 { 4881 size_t err_sz; 4882 tl_endpt_t *tep; 4883 struct T_unitdata_req *udreq; 4884 mblk_t *err_mp; 4885 t_scalar_t alen; 4886 t_scalar_t olen; 4887 struct T_uderror_ind *uderr; 4888 uchar_t *addr_startp; 4889 4890 err_sz = sizeof (struct T_uderror_ind); 4891 tep = (tl_endpt_t *)wq->q_ptr; 4892 udreq = (struct T_unitdata_req *)mp->b_rptr; 4893 alen = udreq->DEST_length; 4894 olen = udreq->OPT_length; 4895 4896 if (alen > 0) 4897 err_sz = T_ALIGN(err_sz + alen); 4898 if (olen > 0) 4899 err_sz += olen; 4900 4901 err_mp = allocb(err_sz, BPRI_MED); 4902 if (! err_mp) { 4903 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4904 "tl_uderr:allocb failure")); 4905 /* 4906 * Note: no rollback of state needed as it does 4907 * not change in connectionless transport 4908 */ 4909 tl_memrecover(wq, mp, err_sz); 4910 return; 4911 } 4912 4913 DB_TYPE(err_mp) = M_PROTO; 4914 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4915 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4916 uderr->PRIM_type = T_UDERROR_IND; 4917 uderr->ERROR_type = err; 4918 uderr->DEST_length = alen; 4919 uderr->OPT_length = olen; 4920 if (alen <= 0) { 4921 uderr->DEST_offset = 0; 4922 } else { 4923 uderr->DEST_offset = 4924 (t_scalar_t)sizeof (struct T_uderror_ind); 4925 addr_startp = mp->b_rptr + udreq->DEST_offset; 4926 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4927 (size_t)alen); 4928 } 4929 if (olen <= 0) { 4930 uderr->OPT_offset = 0; 4931 } else { 4932 uderr->OPT_offset = 4933 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4934 uderr->DEST_length); 4935 addr_startp = mp->b_rptr + udreq->OPT_offset; 4936 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4937 (size_t)olen); 4938 } 4939 freemsg(mp); 4940 4941 /* 4942 * send indication message 4943 */ 4944 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4945 4946 qreply(wq, err_mp); 4947 } 4948 4949 static void 4950 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4951 { 4952 queue_t *wq = tep->te_wq; 4953 4954 if (!tep->te_closing && (wq->q_first != NULL)) { 4955 TL_PUTQ(tep, mp); 4956 } else if (tep->te_rq != NULL) 4957 tl_unitdata(mp, tep); 4958 else 4959 freemsg(mp); 4960 4961 tl_serializer_exit(tep); 4962 tl_refrele(tep); 4963 } 4964 4965 /* 4966 * Handle T_unitdata_req. 4967 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4968 * If this is a socket pass through options unmodified. 4969 */ 4970 static void 4971 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4972 { 4973 queue_t *wq = tep->te_wq; 4974 soux_addr_t ux_addr; 4975 tl_addr_t destaddr; 4976 uchar_t *addr_startp; 4977 tl_endpt_t *peer_tep; 4978 struct T_unitdata_ind *udind; 4979 struct T_unitdata_req *udreq; 4980 ssize_t msz, ui_sz; 4981 t_scalar_t alen, aoff, olen, ooff; 4982 t_scalar_t oldolen = 0; 4983 4984 udreq = (struct T_unitdata_req *)mp->b_rptr; 4985 msz = MBLKL(mp); 4986 4987 /* 4988 * validate the state 4989 */ 4990 if (tep->te_state != TS_IDLE) { 4991 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4992 SL_TRACE|SL_ERROR, 4993 "tl_wput:T_CONN_REQ:out of state")); 4994 tl_merror(wq, mp, EPROTO); 4995 return; 4996 } 4997 /* 4998 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 4999 * (state does not change on this event) 5000 */ 5001 5002 /* 5003 * validate the message 5004 * Note: dereference fields in struct inside message only 5005 * after validating the message length. 5006 */ 5007 if (msz < sizeof (struct T_unitdata_req)) { 5008 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5009 "tl_unitdata:invalid message length")); 5010 tl_merror(wq, mp, EINVAL); 5011 return; 5012 } 5013 alen = udreq->DEST_length; 5014 aoff = udreq->DEST_offset; 5015 oldolen = olen = udreq->OPT_length; 5016 ooff = udreq->OPT_offset; 5017 if (olen == 0) 5018 ooff = 0; 5019 5020 if (IS_SOCKET(tep)) { 5021 if ((alen != TL_SOUX_ADDRLEN) || 5022 (aoff < 0) || 5023 (aoff + alen > msz) || 5024 (olen < 0) || (ooff < 0) || 5025 ((olen > 0) && ((ooff + olen) > msz))) { 5026 (void) (STRLOG(TL_ID, tep->te_minor, 5027 1, SL_TRACE|SL_ERROR, 5028 "tl_unitdata_req: invalid socket addr " 5029 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5030 (int)msz, alen, aoff, olen, ooff)); 5031 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5032 return; 5033 } 5034 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5035 5036 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5037 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5038 (void) (STRLOG(TL_ID, tep->te_minor, 5039 1, SL_TRACE|SL_ERROR, 5040 "tl_conn_req: invalid socket magic")); 5041 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5042 return; 5043 } 5044 } else { 5045 if ((alen < 0) || 5046 (aoff < 0) || 5047 ((alen > 0) && ((aoff + alen) > msz)) || 5048 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5049 ((aoff + alen) < 0) || 5050 ((olen > 0) && ((ooff + olen) > msz)) || 5051 (olen < 0) || 5052 (ooff < 0) || 5053 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5054 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5055 SL_TRACE|SL_ERROR, 5056 "tl_unitdata:invalid unit data message")); 5057 tl_merror(wq, mp, EINVAL); 5058 return; 5059 } 5060 } 5061 5062 /* Options not supported unless it's a socket */ 5063 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5064 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5065 "tl_unitdata:option use(unsupported) or zero len addr")); 5066 tl_uderr(wq, mp, EPROTO); 5067 return; 5068 } 5069 #ifdef DEBUG 5070 /* 5071 * Mild form of ASSERT()ion to detect broken TPI apps. 5072 * if (! assertion) 5073 * log warning; 5074 */ 5075 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5076 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5077 "tl_unitdata:addr overlaps TPI message")); 5078 } 5079 #endif 5080 /* 5081 * get destination endpoint 5082 */ 5083 destaddr.ta_alen = alen; 5084 destaddr.ta_abuf = mp->b_rptr + aoff; 5085 destaddr.ta_zoneid = tep->te_zoneid; 5086 5087 /* 5088 * Check whether the destination is the same that was used previously 5089 * and the destination endpoint is in the right state. If something is 5090 * wrong, find destination again and cache it. 5091 */ 5092 peer_tep = tep->te_lastep; 5093 5094 if ((peer_tep == NULL) || peer_tep->te_closing || 5095 (peer_tep->te_state != TS_IDLE) || 5096 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5097 /* 5098 * Not the same as cached destination , need to find the right 5099 * destination. 5100 */ 5101 peer_tep = (IS_SOCKET(tep) ? 5102 tl_sock_find_peer(tep, &ux_addr) : 5103 tl_find_peer(tep, &destaddr)); 5104 5105 if (peer_tep == NULL) { 5106 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5107 SL_TRACE|SL_ERROR, 5108 "tl_unitdata:no one at destination address")); 5109 tl_uderr(wq, mp, ECONNRESET); 5110 return; 5111 } 5112 5113 /* 5114 * Cache the new peer. 5115 */ 5116 if (tep->te_lastep != NULL) 5117 tl_refrele(tep->te_lastep); 5118 5119 tep->te_lastep = peer_tep; 5120 } 5121 5122 if (peer_tep->te_state != TS_IDLE) { 5123 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5124 "tl_unitdata:provider in invalid state")); 5125 tl_uderr(wq, mp, EPROTO); 5126 return; 5127 } 5128 5129 ASSERT(peer_tep->te_rq != NULL); 5130 5131 /* 5132 * Put it back if flow controlled except when we are closing. 5133 * Note: Messages already on queue when we are closing is bounded 5134 * so we can ignore flow control. 5135 */ 5136 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5137 /* record what we are flow controlled on */ 5138 if (tep->te_flowq != NULL) { 5139 list_remove(&tep->te_flowq->te_flowlist, tep); 5140 } 5141 list_insert_head(&peer_tep->te_flowlist, tep); 5142 tep->te_flowq = peer_tep; 5143 TL_PUTBQ(tep, mp); 5144 return; 5145 } 5146 /* 5147 * prepare indication message 5148 */ 5149 5150 /* 5151 * calculate length of message 5152 */ 5153 if (peer_tep->te_flag & TL_SETCRED) { 5154 ASSERT(olen == 0); 5155 olen = (t_scalar_t)sizeof (struct opthdr) + 5156 OPTLEN(sizeof (tl_credopt_t)); 5157 /* 1 option only */ 5158 } else if (peer_tep->te_flag & TL_SETUCRED) { 5159 ASSERT(olen == 0); 5160 olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredsize); 5161 /* 1 option only */ 5162 } else if (peer_tep->te_flag & TL_SOCKUCRED) { 5163 /* Possibly more than one option */ 5164 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5165 OPTLEN(ucredsize); 5166 } 5167 5168 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5169 olen; 5170 /* 5171 * If the unitdata_ind fits and we are not adding options 5172 * reuse the udreq mblk. 5173 */ 5174 if (msz >= ui_sz && alen >= tep->te_alen && 5175 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5176 /* 5177 * Reuse the original mblk. Leave options in place. 5178 */ 5179 udind = (struct T_unitdata_ind *)mp->b_rptr; 5180 udind->PRIM_type = T_UNITDATA_IND; 5181 udind->SRC_length = tep->te_alen; 5182 addr_startp = mp->b_rptr + udind->SRC_offset; 5183 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5184 } else { 5185 /* Allocate a new T_unidata_ind message */ 5186 mblk_t *ui_mp; 5187 5188 ui_mp = allocb(ui_sz, BPRI_MED); 5189 if (! ui_mp) { 5190 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5191 "tl_unitdata:allocb failure:message queued")); 5192 tl_memrecover(wq, mp, ui_sz); 5193 return; 5194 } 5195 5196 /* 5197 * fill in T_UNITDATA_IND contents 5198 */ 5199 DB_TYPE(ui_mp) = M_PROTO; 5200 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5201 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5202 udind->PRIM_type = T_UNITDATA_IND; 5203 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5204 udind->SRC_length = tep->te_alen; 5205 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5206 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5207 udind->OPT_offset = 5208 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5209 udind->OPT_length = olen; 5210 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5211 if (oldolen != 0) { 5212 bcopy((void *)((uintptr_t)udreq + ooff), 5213 (void *)((uintptr_t)udind + 5214 udind->OPT_offset), 5215 oldolen); 5216 } 5217 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5218 oldolen, 5219 DB_CREDDEF(mp, tep->te_credp), TLPID(mp, tep), 5220 peer_tep->te_flag, peer_tep->te_credp); 5221 } else { 5222 bcopy((void *)((uintptr_t)udreq + ooff), 5223 (void *)((uintptr_t)udind + udind->OPT_offset), 5224 olen); 5225 } 5226 5227 /* 5228 * relink data blocks from mp to ui_mp 5229 */ 5230 ui_mp->b_cont = mp->b_cont; 5231 freeb(mp); 5232 mp = ui_mp; 5233 } 5234 /* 5235 * send indication message 5236 */ 5237 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5238 putnext(peer_tep->te_rq, mp); 5239 } 5240 5241 5242 5243 /* 5244 * Check if a given addr is in use. 5245 * Endpoint ptr returned or NULL if not found. 5246 * The name space is separate for each mode. This implies that 5247 * sockets get their own name space. 5248 */ 5249 static tl_endpt_t * 5250 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5251 { 5252 tl_endpt_t *peer_tep = NULL; 5253 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5254 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5255 5256 ASSERT(! IS_SOCKET(tep)); 5257 5258 ASSERT(ap != NULL && ap->ta_alen > 0); 5259 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5260 ASSERT(ap->ta_abuf != NULL); 5261 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5262 ASSERT(IMPLY(rc == 0, 5263 (tep->te_zoneid == peer_tep->te_zoneid) && 5264 (tep->te_transport == peer_tep->te_transport))); 5265 5266 if ((rc == 0) && (peer_tep->te_closing)) { 5267 tl_refrele(peer_tep); 5268 peer_tep = NULL; 5269 } 5270 5271 return (peer_tep); 5272 } 5273 5274 /* 5275 * Find peer for a socket based on unix domain address. 5276 * For implicit addresses our peer can be found by minor number in ai hash. For 5277 * explici binds we look vnode address at addr_hash. 5278 */ 5279 static tl_endpt_t * 5280 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5281 { 5282 tl_endpt_t *peer_tep = NULL; 5283 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5284 tep->te_aihash : tep->te_addrhash; 5285 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5286 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5287 5288 ASSERT(IS_SOCKET(tep)); 5289 ASSERT(EQUIV(rc == 0, peer_tep != NULL)); 5290 ASSERT(IMPLY(rc == 0, 5291 (tep->te_zoneid == peer_tep->te_zoneid) && 5292 (tep->te_transport == peer_tep->te_transport))); 5293 /* 5294 * Don't attempt to use closing peer. 5295 */ 5296 if ((peer_tep != NULL) && 5297 (peer_tep->te_closing || 5298 (peer_tep->te_zoneid != tep->te_zoneid))) { 5299 tl_refrele(peer_tep); 5300 peer_tep = NULL; 5301 } 5302 5303 return (peer_tep); 5304 } 5305 5306 /* 5307 * Generate a free addr and return it in struct pointed by ap 5308 * but allocating space for address buffer. 5309 * The generated address will be at least 4 bytes long and, if req->ta_alen 5310 * exceeds 4 bytes, be req->ta_alen bytes long. 5311 * 5312 * If address is found it will be inserted in the hash. 5313 * 5314 * If req->ta_alen is larger than the default alen (4 bytes) the last 5315 * alen-4 bytes will always be the same as in req. 5316 * 5317 * Return 0 for failure. 5318 * Return non-zero for success. 5319 */ 5320 static boolean_t 5321 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5322 { 5323 t_scalar_t alen; 5324 uint32_t loopcnt; /* Limit loop to 2^32 */ 5325 5326 ASSERT(tep->te_hash_hndl != NULL); 5327 ASSERT(! IS_SOCKET(tep)); 5328 5329 if (tep->te_hash_hndl == NULL) 5330 return (B_FALSE); 5331 5332 /* 5333 * check if default addr is in use 5334 * if it is - bump it and try again 5335 */ 5336 if (req == NULL) { 5337 alen = sizeof (uint32_t); 5338 } else { 5339 alen = max(req->ta_alen, sizeof (uint32_t)); 5340 ASSERT(tep->te_zoneid == req->ta_zoneid); 5341 } 5342 5343 if (tep->te_alen < alen) { 5344 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5345 5346 /* 5347 * Not enough space in tep->ta_ap to hold the address, 5348 * allocate a bigger space. 5349 */ 5350 if (abuf == NULL) 5351 return (B_FALSE); 5352 5353 if (tep->te_alen > 0) 5354 kmem_free(tep->te_abuf, tep->te_alen); 5355 5356 tep->te_alen = alen; 5357 tep->te_abuf = abuf; 5358 } 5359 5360 /* Copy in the address in req */ 5361 if (req != NULL) { 5362 ASSERT(alen >= req->ta_alen); 5363 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5364 } 5365 5366 /* 5367 * First try minor number then try default addresses. 5368 */ 5369 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5370 5371 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5372 if (mod_hash_insert_reserve(tep->te_addrhash, 5373 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5374 tep->te_hash_hndl) == 0) { 5375 /* 5376 * found free address 5377 */ 5378 tep->te_flag |= TL_ADDRHASHED; 5379 tep->te_hash_hndl = NULL; 5380 5381 return (B_TRUE); /* successful return */ 5382 } 5383 /* 5384 * Use default address. 5385 */ 5386 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5387 atomic_add_32(&tep->te_defaddr, 1); 5388 } 5389 5390 /* 5391 * Failed to find anything. 5392 */ 5393 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5394 "tl_get_any_addr:looped 2^32 times")); 5395 return (B_FALSE); 5396 } 5397 5398 /* 5399 * reallocb + set r/w ptrs to reflect size. 5400 */ 5401 static mblk_t * 5402 tl_resizemp(mblk_t *mp, ssize_t new_size) 5403 { 5404 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5405 return (NULL); 5406 5407 mp->b_rptr = DB_BASE(mp); 5408 mp->b_wptr = mp->b_rptr + new_size; 5409 return (mp); 5410 } 5411 5412 static void 5413 tl_cl_backenable(tl_endpt_t *tep) 5414 { 5415 list_t *l = &tep->te_flowlist; 5416 tl_endpt_t *elp; 5417 5418 ASSERT(IS_CLTS(tep)); 5419 5420 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5421 ASSERT(tep->te_ser == elp->te_ser); 5422 ASSERT(elp->te_flowq == tep); 5423 if (! elp->te_closing) 5424 TL_QENABLE(elp); 5425 elp->te_flowq = NULL; 5426 list_remove(l, elp); 5427 } 5428 } 5429 5430 /* 5431 * Unconnect endpoints. 5432 */ 5433 static void 5434 tl_co_unconnect(tl_endpt_t *tep) 5435 { 5436 tl_endpt_t *peer_tep = tep->te_conp; 5437 tl_endpt_t *srv_tep = tep->te_oconp; 5438 list_t *l; 5439 tl_icon_t *tip; 5440 tl_endpt_t *cl_tep; 5441 mblk_t *d_mp; 5442 5443 ASSERT(IS_COTS(tep)); 5444 /* 5445 * If our peer is closing, don't use it. 5446 */ 5447 if ((peer_tep != NULL) && peer_tep->te_closing) { 5448 TL_UNCONNECT(tep->te_conp); 5449 peer_tep = NULL; 5450 } 5451 if ((srv_tep != NULL) && srv_tep->te_closing) { 5452 TL_UNCONNECT(tep->te_oconp); 5453 srv_tep = NULL; 5454 } 5455 5456 if (tep->te_nicon > 0) { 5457 l = &tep->te_iconp; 5458 /* 5459 * If incoming requests pending, change state 5460 * of clients on disconnect ind event and send 5461 * discon_ind pdu to modules above them 5462 * for server: all clients get disconnect 5463 */ 5464 5465 while (tep->te_nicon > 0) { 5466 tip = list_head(l); 5467 cl_tep = tip->ti_tep; 5468 5469 if (cl_tep == NULL) { 5470 tl_freetip(tep, tip); 5471 continue; 5472 } 5473 5474 if (cl_tep->te_oconp != NULL) { 5475 ASSERT(cl_tep != cl_tep->te_oconp); 5476 TL_UNCONNECT(cl_tep->te_oconp); 5477 } 5478 5479 if (cl_tep->te_closing) { 5480 tl_freetip(tep, tip); 5481 continue; 5482 } 5483 5484 enableok(cl_tep->te_wq); 5485 TL_QENABLE(cl_tep); 5486 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5487 if (d_mp != NULL) { 5488 cl_tep->te_state = TS_IDLE; 5489 putnext(cl_tep->te_rq, d_mp); 5490 } else { 5491 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5492 SL_TRACE|SL_ERROR, 5493 "tl_co_unconnect:icmng: " 5494 "allocb failure")); 5495 } 5496 tl_freetip(tep, tip); 5497 } 5498 } else if (srv_tep != NULL) { 5499 /* 5500 * If outgoing request pending, change state 5501 * of server on discon ind event 5502 */ 5503 5504 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5505 IS_COTSORD(srv_tep) && 5506 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5507 /* 5508 * Queue ordrel_ind for server to be picked up 5509 * when the connection is accepted. 5510 */ 5511 d_mp = tl_ordrel_ind_alloc(); 5512 } else { 5513 /* 5514 * send discon_ind to server 5515 */ 5516 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5517 } 5518 if (d_mp == NULL) { 5519 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5520 SL_TRACE|SL_ERROR, 5521 "tl_co_unconnect:outgoing:allocb failure")); 5522 TL_UNCONNECT(tep->te_oconp); 5523 goto discon_peer; 5524 } 5525 5526 /* 5527 * If this is a socket the T_DISCON_IND is queued with 5528 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5529 * from the list of pending connections. 5530 * Note that when te_oconp is set the peer better have 5531 * a t_connind_t for the client. 5532 */ 5533 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5534 /* 5535 * Queue the disconnection message. 5536 */ 5537 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5538 } else { 5539 tip = tl_icon_find(srv_tep, tep->te_seqno); 5540 if (tip == NULL) { 5541 freemsg(d_mp); 5542 } else { 5543 ASSERT(tep == tip->ti_tep); 5544 ASSERT(tep->te_ser == srv_tep->te_ser); 5545 /* 5546 * Delete tip from the server list. 5547 */ 5548 if (srv_tep->te_nicon == 1) { 5549 srv_tep->te_state = 5550 NEXTSTATE(TE_DISCON_IND2, 5551 srv_tep->te_state); 5552 } else { 5553 srv_tep->te_state = 5554 NEXTSTATE(TE_DISCON_IND3, 5555 srv_tep->te_state); 5556 } 5557 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5558 T_DISCON_IND); 5559 putnext(srv_tep->te_rq, d_mp); 5560 tl_freetip(srv_tep, tip); 5561 } 5562 TL_UNCONNECT(tep->te_oconp); 5563 srv_tep = NULL; 5564 } 5565 } else if (peer_tep != NULL) { 5566 /* 5567 * unconnect existing connection 5568 * If connected, change state of peer on 5569 * discon ind event and send discon ind pdu 5570 * to module above it 5571 */ 5572 5573 ASSERT(tep->te_ser == peer_tep->te_ser); 5574 if (IS_COTSORD(peer_tep) && 5575 (peer_tep->te_state == TS_WIND_ORDREL || 5576 peer_tep->te_state == TS_DATA_XFER)) { 5577 /* 5578 * send ordrel ind 5579 */ 5580 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5581 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5582 peer_tep->te_state, 5583 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5584 d_mp = tl_ordrel_ind_alloc(); 5585 if (! d_mp) { 5586 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5587 SL_TRACE|SL_ERROR, 5588 "tl_co_unconnect:connected:" 5589 "allocb failure")); 5590 /* 5591 * Continue with cleaning up peer as 5592 * this side may go away with the close 5593 */ 5594 TL_QENABLE(peer_tep); 5595 goto discon_peer; 5596 } 5597 peer_tep->te_state = 5598 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5599 5600 putnext(peer_tep->te_rq, d_mp); 5601 /* 5602 * Handle flow control case. This will generate 5603 * a t_discon_ind message with reason 0 if there 5604 * is data queued on the write side. 5605 */ 5606 TL_QENABLE(peer_tep); 5607 } else if (IS_COTSORD(peer_tep) && 5608 peer_tep->te_state == TS_WREQ_ORDREL) { 5609 /* 5610 * Sent an ordrel_ind. We send a discon with 5611 * with error 0 to inform that the peer is gone. 5612 */ 5613 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5614 SL_TRACE|SL_ERROR, 5615 "tl_co_unconnect: discon in state %d", 5616 tep->te_state)); 5617 tl_discon_ind(peer_tep, 0); 5618 } else { 5619 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5620 SL_TRACE|SL_ERROR, 5621 "tl_co_unconnect: state %d", tep->te_state)); 5622 tl_discon_ind(peer_tep, ECONNRESET); 5623 } 5624 5625 discon_peer: 5626 /* 5627 * Disconnect cross-pointers only for close 5628 */ 5629 if (tep->te_closing) { 5630 peer_tep = tep->te_conp; 5631 TL_REMOVE_PEER(peer_tep->te_conp); 5632 TL_REMOVE_PEER(tep->te_conp); 5633 } 5634 } 5635 } 5636 5637 /* 5638 * Note: The following routine does not recover from allocb() 5639 * failures 5640 * The reason should be from the <sys/errno.h> space. 5641 */ 5642 static void 5643 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5644 { 5645 mblk_t *d_mp; 5646 5647 if (tep->te_closing) 5648 return; 5649 5650 /* 5651 * flush the queues. 5652 */ 5653 flushq(tep->te_rq, FLUSHDATA); 5654 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5655 5656 /* 5657 * send discon ind 5658 */ 5659 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5660 if (! d_mp) { 5661 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5662 "tl_discon_ind:allocb failure")); 5663 return; 5664 } 5665 tep->te_state = TS_IDLE; 5666 putnext(tep->te_rq, d_mp); 5667 } 5668 5669 /* 5670 * Note: The following routine does not recover from allocb() 5671 * failures 5672 * The reason should be from the <sys/errno.h> space. 5673 */ 5674 static mblk_t * 5675 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5676 { 5677 mblk_t *mp; 5678 struct T_discon_ind *tdi; 5679 5680 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5681 DB_TYPE(mp) = M_PROTO; 5682 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5683 tdi = (struct T_discon_ind *)mp->b_rptr; 5684 tdi->PRIM_type = T_DISCON_IND; 5685 tdi->DISCON_reason = reason; 5686 tdi->SEQ_number = seqnum; 5687 } 5688 return (mp); 5689 } 5690 5691 5692 /* 5693 * Note: The following routine does not recover from allocb() 5694 * failures 5695 */ 5696 static mblk_t * 5697 tl_ordrel_ind_alloc(void) 5698 { 5699 mblk_t *mp; 5700 struct T_ordrel_ind *toi; 5701 5702 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5703 DB_TYPE(mp) = M_PROTO; 5704 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5705 toi = (struct T_ordrel_ind *)mp->b_rptr; 5706 toi->PRIM_type = T_ORDREL_IND; 5707 } 5708 return (mp); 5709 } 5710 5711 5712 /* 5713 * Lookup the seqno in the list of queued connections. 5714 */ 5715 static tl_icon_t * 5716 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5717 { 5718 list_t *l = &tep->te_iconp; 5719 tl_icon_t *tip = list_head(l); 5720 5721 ASSERT(seqno != 0); 5722 5723 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5724 ; 5725 5726 return (tip); 5727 } 5728 5729 /* 5730 * Queue data for a given T_CONN_IND while verifying that redundant 5731 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5732 * Used when the originator of the connection closes. 5733 */ 5734 static void 5735 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5736 { 5737 tl_icon_t *tip; 5738 mblk_t **mpp, *mp; 5739 int prim, nprim; 5740 5741 if (nmp->b_datap->db_type == M_PROTO) 5742 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5743 else 5744 nprim = -1; /* M_DATA */ 5745 5746 tip = tl_icon_find(tep, seqno); 5747 if (tip == NULL) { 5748 freemsg(nmp); 5749 return; 5750 } 5751 5752 ASSERT(tip->ti_seqno != 0); 5753 mpp = &tip->ti_mp; 5754 while (*mpp != NULL) { 5755 mp = *mpp; 5756 5757 if (mp->b_datap->db_type == M_PROTO) 5758 prim = ((union T_primitives *)mp->b_rptr)->type; 5759 else 5760 prim = -1; /* M_DATA */ 5761 5762 /* 5763 * Allow nothing after a T_DISCON_IND 5764 */ 5765 if (prim == T_DISCON_IND) { 5766 freemsg(nmp); 5767 return; 5768 } 5769 /* 5770 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5771 */ 5772 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5773 freemsg(nmp); 5774 return; 5775 } 5776 mpp = &(mp->b_next); 5777 } 5778 *mpp = nmp; 5779 } 5780 5781 /* 5782 * Verify if a certain TPI primitive exists on the connind queue. 5783 * Use prim -1 for M_DATA. 5784 * Return non-zero if found. 5785 */ 5786 static boolean_t 5787 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5788 { 5789 tl_icon_t *tip = tl_icon_find(tep, seqno); 5790 boolean_t found = B_FALSE; 5791 5792 if (tip != NULL) { 5793 mblk_t *mp; 5794 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5795 found = (DB_TYPE(mp) == M_PROTO && 5796 ((union T_primitives *)mp->b_rptr)->type == prim); 5797 } 5798 } 5799 return (found); 5800 } 5801 5802 /* 5803 * Send the b_next mblk chain that has accumulated before the connection 5804 * was accepted. Perform the necessary state transitions. 5805 */ 5806 static void 5807 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5808 { 5809 mblk_t *mp; 5810 union T_primitives *primp; 5811 5812 if (tep->te_closing) { 5813 tl_icon_freemsgs(mpp); 5814 return; 5815 } 5816 5817 ASSERT(tep->te_state == TS_DATA_XFER); 5818 ASSERT(tep->te_rq->q_first == NULL); 5819 5820 while ((mp = *mpp) != NULL) { 5821 *mpp = mp->b_next; 5822 mp->b_next = NULL; 5823 5824 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5825 switch (DB_TYPE(mp)) { 5826 default: 5827 freemsg(mp); 5828 break; 5829 case M_DATA: 5830 putnext(tep->te_rq, mp); 5831 break; 5832 case M_PROTO: 5833 primp = (union T_primitives *)mp->b_rptr; 5834 switch (primp->type) { 5835 case T_UNITDATA_IND: 5836 case T_DATA_IND: 5837 case T_OPTDATA_IND: 5838 case T_EXDATA_IND: 5839 putnext(tep->te_rq, mp); 5840 break; 5841 case T_ORDREL_IND: 5842 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5843 tep->te_state); 5844 putnext(tep->te_rq, mp); 5845 break; 5846 case T_DISCON_IND: 5847 tep->te_state = TS_IDLE; 5848 putnext(tep->te_rq, mp); 5849 break; 5850 default: 5851 #ifdef DEBUG 5852 cmn_err(CE_PANIC, 5853 "tl_icon_sendmsgs: unknown primitive"); 5854 #endif /* DEBUG */ 5855 freemsg(mp); 5856 break; 5857 } 5858 break; 5859 } 5860 } 5861 } 5862 5863 /* 5864 * Free the b_next mblk chain that has accumulated before the connection 5865 * was accepted. 5866 */ 5867 static void 5868 tl_icon_freemsgs(mblk_t **mpp) 5869 { 5870 mblk_t *mp; 5871 5872 while ((mp = *mpp) != NULL) { 5873 *mpp = mp->b_next; 5874 mp->b_next = NULL; 5875 freemsg(mp); 5876 } 5877 } 5878 5879 /* 5880 * Send M_ERROR 5881 * Note: assumes caller ensured enough space in mp or enough 5882 * memory available. Does not attempt recovery from allocb() 5883 * failures 5884 */ 5885 5886 static void 5887 tl_merror(queue_t *wq, mblk_t *mp, int error) 5888 { 5889 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5890 5891 if (tep->te_closing) { 5892 freemsg(mp); 5893 return; 5894 } 5895 5896 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5897 SL_TRACE|SL_ERROR, 5898 "tl_merror: tep=%p, err=%d", tep, error)); 5899 5900 /* 5901 * flush all messages on queue. we are shutting 5902 * the stream down on fatal error 5903 */ 5904 flushq(wq, FLUSHALL); 5905 if (IS_COTS(tep)) { 5906 /* connection oriented - unconnect endpoints */ 5907 tl_co_unconnect(tep); 5908 } 5909 if (mp->b_cont) { 5910 freemsg(mp->b_cont); 5911 mp->b_cont = NULL; 5912 } 5913 5914 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5915 freemsg(mp); 5916 mp = allocb(1, BPRI_HI); 5917 if (!mp) { 5918 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5919 SL_TRACE|SL_ERROR, 5920 "tl_merror:M_PROTO: out of memory")); 5921 return; 5922 } 5923 } 5924 if (mp) { 5925 DB_TYPE(mp) = M_ERROR; 5926 mp->b_rptr = DB_BASE(mp); 5927 *mp->b_rptr = (char)error; 5928 mp->b_wptr = mp->b_rptr + sizeof (char); 5929 qreply(wq, mp); 5930 } else { 5931 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5932 } 5933 } 5934 5935 static void 5936 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5937 { 5938 if (flag & TL_SETCRED) { 5939 struct opthdr *opt = (struct opthdr *)buf; 5940 tl_credopt_t *tlcred; 5941 5942 opt->level = TL_PROT_LEVEL; 5943 opt->name = TL_OPT_PEER_CRED; 5944 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5945 5946 tlcred = (tl_credopt_t *)(opt + 1); 5947 tlcred->tc_uid = crgetuid(cr); 5948 tlcred->tc_gid = crgetgid(cr); 5949 tlcred->tc_ruid = crgetruid(cr); 5950 tlcred->tc_rgid = crgetrgid(cr); 5951 tlcred->tc_suid = crgetsuid(cr); 5952 tlcred->tc_sgid = crgetsgid(cr); 5953 tlcred->tc_ngroups = crgetngroups(cr); 5954 } else if (flag & TL_SETUCRED) { 5955 struct opthdr *opt = (struct opthdr *)buf; 5956 5957 opt->level = TL_PROT_LEVEL; 5958 opt->name = TL_OPT_PEER_UCRED; 5959 opt->len = (t_uscalar_t)OPTLEN(ucredsize); 5960 5961 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 5962 } else { 5963 struct T_opthdr *topt = (struct T_opthdr *)buf; 5964 ASSERT(flag & TL_SOCKUCRED); 5965 5966 topt->level = SOL_SOCKET; 5967 topt->name = SCM_UCRED; 5968 topt->len = ucredsize + sizeof (*topt); 5969 topt->status = 0; 5970 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 5971 } 5972 } 5973 5974 /* ARGSUSED */ 5975 static int 5976 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5977 { 5978 /* no default value processed in protocol specific code currently */ 5979 return (-1); 5980 } 5981 5982 /* ARGSUSED */ 5983 static int 5984 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 5985 { 5986 int len; 5987 tl_endpt_t *tep; 5988 int *valp; 5989 5990 tep = (tl_endpt_t *)wq->q_ptr; 5991 5992 len = 0; 5993 5994 /* 5995 * Assumes: option level and name sanity check done elsewhere 5996 */ 5997 5998 switch (level) { 5999 case SOL_SOCKET: 6000 if (! IS_SOCKET(tep)) 6001 break; 6002 switch (name) { 6003 case SO_RECVUCRED: 6004 len = sizeof (int); 6005 valp = (int *)ptr; 6006 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6007 break; 6008 default: 6009 break; 6010 } 6011 break; 6012 case TL_PROT_LEVEL: 6013 switch (name) { 6014 case TL_OPT_PEER_CRED: 6015 case TL_OPT_PEER_UCRED: 6016 /* 6017 * option not supposed to retrieved directly 6018 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6019 * when some internal flags set by other options 6020 * Direct retrieval always designed to fail(ignored) 6021 * for this option. 6022 */ 6023 break; 6024 } 6025 } 6026 return (len); 6027 } 6028 6029 /* ARGSUSED */ 6030 static int 6031 tl_set_opt( 6032 queue_t *wq, 6033 uint_t mgmt_flags, 6034 int level, 6035 int name, 6036 uint_t inlen, 6037 uchar_t *invalp, 6038 uint_t *outlenp, 6039 uchar_t *outvalp, 6040 void *thisdg_attrs, 6041 cred_t *cr, 6042 mblk_t *mblk) 6043 { 6044 int error; 6045 tl_endpt_t *tep; 6046 6047 tep = (tl_endpt_t *)wq->q_ptr; 6048 6049 error = 0; /* NOERROR */ 6050 6051 /* 6052 * Assumes: option level and name sanity checks done elsewhere 6053 */ 6054 6055 switch (level) { 6056 case SOL_SOCKET: 6057 if (! IS_SOCKET(tep)) { 6058 error = EINVAL; 6059 break; 6060 } 6061 /* 6062 * TBD: fill in other AF_UNIX socket options and then stop 6063 * returning error. 6064 */ 6065 switch (name) { 6066 case SO_RECVUCRED: 6067 /* 6068 * We only support this for datagram sockets; 6069 * getpeerucred handles the connection oriented 6070 * transports. 6071 */ 6072 if (! IS_CLTS(tep)) { 6073 error = EINVAL; 6074 break; 6075 } 6076 if (*(int *)invalp == 0) 6077 tep->te_flag &= ~TL_SOCKUCRED; 6078 else 6079 tep->te_flag |= TL_SOCKUCRED; 6080 break; 6081 default: 6082 error = EINVAL; 6083 break; 6084 } 6085 break; 6086 case TL_PROT_LEVEL: 6087 switch (name) { 6088 case TL_OPT_PEER_CRED: 6089 case TL_OPT_PEER_UCRED: 6090 /* 6091 * option not supposed to be set directly 6092 * Its value in initialized for each endpoint at 6093 * driver open time. 6094 * Direct setting always designed to fail for this 6095 * option. 6096 */ 6097 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6098 SL_TRACE|SL_ERROR, 6099 "tl_set_opt: option is not supported")); 6100 error = EPROTO; 6101 break; 6102 } 6103 } 6104 return (error); 6105 } 6106 6107 6108 static void 6109 tl_timer(void *arg) 6110 { 6111 queue_t *wq = arg; 6112 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6113 6114 ASSERT(tep); 6115 6116 tep->te_timoutid = 0; 6117 6118 enableok(wq); 6119 /* 6120 * Note: can call wsrv directly here and save context switch 6121 * Consider change when qtimeout (not timeout) is active 6122 */ 6123 qenable(wq); 6124 } 6125 6126 static void 6127 tl_buffer(void *arg) 6128 { 6129 queue_t *wq = arg; 6130 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6131 6132 ASSERT(tep); 6133 6134 tep->te_bufcid = 0; 6135 tep->te_nowsrv = B_FALSE; 6136 6137 enableok(wq); 6138 /* 6139 * Note: can call wsrv directly here and save context switch 6140 * Consider change when qbufcall (not bufcall) is active 6141 */ 6142 qenable(wq); 6143 } 6144 6145 static void 6146 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6147 { 6148 tl_endpt_t *tep; 6149 6150 tep = (tl_endpt_t *)wq->q_ptr; 6151 6152 if (tep->te_closing) { 6153 freemsg(mp); 6154 return; 6155 } 6156 noenable(wq); 6157 6158 (void) insq(wq, wq->q_first, mp); 6159 6160 if (tep->te_bufcid || tep->te_timoutid) { 6161 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6162 "tl_memrecover:recover %p pending", (void *)wq)); 6163 return; 6164 } 6165 6166 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6167 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6168 drv_usectohz(TL_BUFWAIT)); 6169 } 6170 } 6171 6172 static void 6173 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6174 { 6175 ASSERT(tip->ti_seqno != 0); 6176 6177 if (tip->ti_mp != NULL) { 6178 tl_icon_freemsgs(&tip->ti_mp); 6179 tip->ti_mp = NULL; 6180 } 6181 if (tip->ti_tep != NULL) { 6182 tl_refrele(tip->ti_tep); 6183 tip->ti_tep = NULL; 6184 } 6185 list_remove(&tep->te_iconp, tip); 6186 kmem_free(tip, sizeof (tl_icon_t)); 6187 tep->te_nicon--; 6188 } 6189 6190 /* 6191 * Remove address from address hash. 6192 */ 6193 static void 6194 tl_addr_unbind(tl_endpt_t *tep) 6195 { 6196 tl_endpt_t *elp; 6197 6198 if (tep->te_flag & TL_ADDRHASHED) { 6199 if (IS_SOCKET(tep)) { 6200 (void) mod_hash_remove(tep->te_addrhash, 6201 (mod_hash_key_t)tep->te_vp, 6202 (mod_hash_val_t *)&elp); 6203 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6204 tep->te_magic = SOU_MAGIC_IMPLICIT; 6205 } else { 6206 (void) mod_hash_remove(tep->te_addrhash, 6207 (mod_hash_key_t)&tep->te_ap, 6208 (mod_hash_val_t *)&elp); 6209 (void) kmem_free(tep->te_abuf, tep->te_alen); 6210 tep->te_alen = -1; 6211 tep->te_abuf = NULL; 6212 } 6213 tep->te_flag &= ~TL_ADDRHASHED; 6214 } 6215 } 6216