1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27 * Copyright (c) 2012 by Delphix. All rights reserved. 28 * Copyright (c) 2018, Joyent, Inc. 29 */ 30 31 /* 32 * Multithreaded STREAMS Local Transport Provider. 33 * 34 * OVERVIEW 35 * ======== 36 * 37 * This driver provides TLI as well as socket semantics. It provides 38 * connectionless, connection oriented, and connection oriented with orderly 39 * release transports for TLI and sockets. Each transport type has separate name 40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 41 * this removes any name space conflicts when binding to socket style transport 42 * addresses. 43 * 44 * NOTE: There is one exception: Socket ticots and ticotsord transports share 45 * the same namespace. In fact, sockets always use ticotsord type transport. 46 * 47 * The driver mode is specified during open() by the minor number used for 48 * open. 49 * 50 * The sockets in addition have the following semantic differences: 51 * No support for passing up credentials (TL_SET[U]CRED). 52 * 53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 55 * T_OPTDATA_IND. 56 * 57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 58 * a T_CONN_RES is received from the acceptor. This means that a socket 59 * connect will complete before the peer has called accept. 60 * 61 * 62 * MULTITHREADING 63 * ============== 64 * 65 * The driver does not use STREAMS protection mechanisms. Instead it uses a 66 * generic "serializer" abstraction. Most of the operations are executed behind 67 * the serializer and are, essentially single-threaded. All functions executed 68 * behind the same serializer are strictly serialized. So if one thread calls 69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 73 * same time. 74 * 75 * Connectionless transport use a single serializer per transport type (one for 76 * TLI and one for sockets. Connection-oriented transports use finer-grained 77 * serializers. 78 * 79 * All COTS-type endpoints start their life with private serializers. During 80 * connection request processing the endpoint serializer is switched to the 81 * listener's serializer and the rest of T_CONN_REQ processing is done on the 82 * listener serializer. During T_CONN_RES processing the eager serializer is 83 * switched from listener to acceptor serializer and after that point all 84 * processing for eager and acceptor happens on this serializer. To avoid races 85 * with endpoint closes while its serializer may be changing closes are blocked 86 * while serializers are manipulated. 87 * 88 * References accounting 89 * --------------------- 90 * 91 * Endpoints are reference counted and freed when the last reference is 92 * dropped. Functions within the serializer may access an endpoint state even 93 * after an endpoint closed. The te_closing being set on the endpoint indicates 94 * that the endpoint entered its close routine. 95 * 96 * One reference is held for each opened endpoint instance. The reference 97 * counter is incremented when the endpoint is linked to another endpoint and 98 * decremented when the link disappears. It is also incremented when the 99 * endpoint is found by the hash table lookup. This increment is atomic with the 100 * lookup itself and happens while the hash table read lock is held. 101 * 102 * Close synchronization 103 * --------------------- 104 * 105 * During close the endpoint as marked as closing using te_closing flag. It is 106 * usually enough to check for te_closing flag since all other state changes 107 * happen after this flag is set and the close entered serializer. Immediately 108 * after setting te_closing flag tl_close() enters serializer and waits until 109 * the callback finishes. This allows all functions called within serializer to 110 * simply check te_closing without any locks. 111 * 112 * Serializer management. 113 * --------------------- 114 * 115 * For COTS transports serializers are created when the endpoint is constructed 116 * and destroyed when the endpoint is destructed. CLTS transports use global 117 * serializers - one for sockets and one for TLI. 118 * 119 * COTS serializers have separate reference counts to deal with several 120 * endpoints sharing the same serializer. There is a subtle problem related to 121 * the serializer destruction. The serializer should never be destroyed by any 122 * function executed inside serializer. This means that close has to wait till 123 * all serializer activity for this endpoint is finished before it can drop the 124 * last reference on the endpoint (which may as well free the serializer). This 125 * is only relevant for COTS transports which manage serializers 126 * dynamically. For CLTS transports close may complete without waiting for all 127 * serializer activity to finish since serializer is only destroyed at driver 128 * detach time. 129 * 130 * COTS endpoints keep track of the number of outstanding requests on the 131 * serializer for the endpoint. The code handling accept() avoids changing 132 * client serializer if it has any pending messages on the serializer and 133 * instead moves acceptor to listener's serializer. 134 * 135 * 136 * Use of hash tables 137 * ------------------ 138 * 139 * The driver uses modhash hash table implementation. Each transport uses two 140 * hash tables - one for finding endpoints by acceptor ID and another one for 141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 142 * pair of hash tables since sockets only use TICOTSORD. 143 * 144 * All hash tables lookups increment a reference count for returned endpoints, 145 * so we may safely check the endpoint state even when the endpoint is removed 146 * from the hash by another thread immediately after it is found. 147 * 148 * 149 * CLOSE processing 150 * ================ 151 * 152 * The driver enters serializer twice on close(). The close sequence is the 153 * following: 154 * 155 * 1) Wait until closing is safe (te_closewait becomes zero) 156 * This step is needed to prevent close during serializer switches. In most 157 * cases (close happening after connection establishment) te_closewait is 158 * zero. 159 * 1) Set te_closing. 160 * 2) Call tl_close_ser() within serializer and wait for it to complete. 161 * 162 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 163 * It also needs to clear write-side q_next pointers - this should be done 164 * before qprocsoff(). 165 * 166 * This synchronous serializer entry during close is needed to ensure that 167 * the queue is valid everywhere inside the serializer. 168 * 169 * Note that in many cases close will execute tl_close_ser() synchronously, 170 * so it will not wait at all. 171 * 172 * 3) Calls qprocsoff(). 173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 174 * complete (for COTS transports). For CLTS transport there is no wait. 175 * 176 * tl_close_finish_ser() Finishes the close process and wakes up waiting 177 * close if there is any. 178 * 179 * Note that in most cases close will enter te_close_ser_finish() 180 * synchronously and will not wait at all. 181 * 182 * 183 * Flow Control 184 * ============ 185 * 186 * The driver implements both read and write side service routines. No one calls 187 * putq() on the read queue. The read side service routine tl_rsrv() is called 188 * when the read side stream is back-enabled. It enters serializer synchronously 189 * (waits till serializer processing is complete). Within serializer it 190 * back-enables all endpoints blocked by the queue for connection-less 191 * transports and enables write side service processing for the peer for 192 * connection-oriented transports. 193 * 194 * Read and write side service routines use special mblk_sized space in the 195 * endpoint structure to enter perimeter. 196 * 197 * Write-side flow control 198 * ----------------------- 199 * 200 * Write side flow control is a bit tricky. The driver needs to deal with two 201 * message queues - the explicit STREAMS message queue maintained by 202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 203 * queues should be synchronized to preserve message ordering and should 204 * maintain a single order determined by the order in which messages enter 205 * tl_wput(). In order to maintain the ordering between these two queues the 206 * STREAMS queue is only manipulated within the serializer, so the ordering is 207 * provided by the serializer. 208 * 209 * Functions called from the tl_wsrv() sometimes may call putbq(). To 210 * immediately stop any further processing of the STREAMS message queues the 211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 212 * side service processing stops when the flag is set. 213 * 214 * The tl_wsrv() function enters serializer synchronously and waits for it to 215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 218 * always bounded by the amount of messages on the STREAMS queue at the time 219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 220 * queue from another serialized entry which can't happen in parallel. This 221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 222 * of it draining forever while writer places new messages on the STREAMS 223 * queue). 224 * 225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 226 * 227 * 228 * Unix Domain Sockets 229 * =================== 230 * 231 * The driver knows the structure of Unix Domain sockets addresses and treats 232 * them differently from generic TLI addresses. For sockets implicit binds are 233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 234 * instead of using address length of zero. Explicit binds specify 235 * SOU_MAGIC_EXPLICIT as magic. 236 * 237 * For implicit binds we always use minor number as soua_vp part of the address 238 * and avoid any hash table lookups. This saves two hash tables lookups per 239 * anonymous bind. 240 * 241 * For explicit address we hash the vnode pointer instead of hashing the 242 * full-scale address+zone+length. Hashing by pointer is more efficient then 243 * hashing by the full address. 244 * 245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 246 * tep structure, so it should be never freed. 247 * 248 * Also for sockets the driver always uses minor number as acceptor id. 249 * 250 * TPI VIOLATIONS 251 * -------------- 252 * 253 * This driver violates TPI in several respects for Unix Domain Sockets: 254 * 255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 256 * is requested and the endpoint is already in use. There is no point in 257 * generating an unused address since this address will be rejected by 258 * sockfs anyway. For implicit binds it always generates a new address 259 * (sets soua_vp to its minor number). 260 * 261 * 2) It always uses minor number as acceptor ID and never uses queue 262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 263 * message and they do not use the queue pointer. 264 * 265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 266 * followed by listen(). The listen() should be issued with non-zero 267 * backlog, so sotpi_listen() issues unbind request followed by bind 268 * request to the same address but with a non-zero qlen value. Both 269 * tl_bind() and tl_unbind() require write lock on the hash table to 270 * insert/remove the address. The driver does not remove the address from 271 * the hash for endpoints that are bound to the explicit address and have 272 * backlog of zero. During T_BIND_REQ processing if the address requested 273 * is equal to the address the endpoint already has it updates the backlog 274 * without reinserting the address in the hash table. This optimization 275 * avoids two hash table updates for each listener created. It always 276 * avoids the problem of a "stolen" address when another listener may use 277 * the same address between the unbind and bind and suddenly listen() fails 278 * because address is in use even though the bind() succeeded. 279 * 280 * 281 * CONNECTIONLESS TRANSPORTS 282 * ========================= 283 * 284 * Connectionless transports all share the same serializer (one for TLI and one 285 * for Sockets). Functions executing behind serializer can check or modify state 286 * of any endpoint. 287 * 288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 289 * te_lastep field. The next time X talks to some address A it checks whether A 290 * is the same as Y's address and if it is there is no need to lookup Y. If the 291 * address is different or the state of Y is not appropriate (e.g. closed or not 292 * idle) X does a lookup using tl_find_peer() and caches the new address. 293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 294 * on the endpoint found. 295 * 296 * During close of endpoint Y it doesn't try to remove itself from other 297 * endpoints caches. They will detect that Y is gone and will search the peer 298 * endpoint again. 299 * 300 * Flow Control Handling. 301 * ---------------------- 302 * 303 * Each connectionless endpoint keeps a list of endpoints which are 304 * flow-controlled by its queue. It also keeps a pointer to the queue which 305 * flow-controls itself. Whenever flow control releases for endpoint X it 306 * enables all queues from the list. During close it also back-enables everyone 307 * in the list. If X is flow-controlled when it is closing it removes it from 308 * the peers list. 309 * 310 * DATA STRUCTURES 311 * =============== 312 * 313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 314 * endpoint state. For connection-oriented transports it has a keeps a list 315 * of pending connections (tl_icon_t). For connectionless transports it keeps a 316 * list of endpoints flow controlled by this one. 317 * 318 * Each transport type is represented by a per-transport data structure 319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 320 * endpoint address hash tables for each transport. It also contains pointer to 321 * transport serializer for connectionless transports. 322 * 323 * Each endpoint keeps a link to its transport structure, so the code can find 324 * all per-transport information quickly. 325 */ 326 327 #include <sys/types.h> 328 #include <sys/inttypes.h> 329 #include <sys/stream.h> 330 #include <sys/stropts.h> 331 #define _SUN_TPI_VERSION 2 332 #include <sys/tihdr.h> 333 #include <sys/strlog.h> 334 #include <sys/debug.h> 335 #include <sys/cred.h> 336 #include <sys/errno.h> 337 #include <sys/kmem.h> 338 #include <sys/id_space.h> 339 #include <sys/modhash.h> 340 #include <sys/mkdev.h> 341 #include <sys/tl.h> 342 #include <sys/stat.h> 343 #include <sys/conf.h> 344 #include <sys/modctl.h> 345 #include <sys/strsun.h> 346 #include <sys/socket.h> 347 #include <sys/socketvar.h> 348 #include <sys/sysmacros.h> 349 #include <sys/xti_xtiopt.h> 350 #include <sys/ddi.h> 351 #include <sys/sunddi.h> 352 #include <sys/zone.h> 353 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 354 #include <inet/optcom.h> 355 #include <sys/strsubr.h> 356 #include <sys/ucred.h> 357 #include <sys/suntpi.h> 358 #include <sys/list.h> 359 #include <sys/serializer.h> 360 361 /* 362 * TBD List 363 * 14 Eliminate state changes through table 364 * 16. AF_UNIX socket options 365 * 17. connect() for ticlts 366 * 18. support for "netstat" to show AF_UNIX plus TLI local 367 * transport connections 368 * 21. sanity check to flushing on sending M_ERROR 369 */ 370 371 /* 372 * CONSTANT DECLARATIONS 373 * -------------------- 374 */ 375 376 /* 377 * Local declarations 378 */ 379 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 380 381 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 382 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 383 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 384 /* 385 * Hash tables size. 386 */ 387 #define TL_HASH_SIZE 311 388 389 /* 390 * Definitions for module_info 391 */ 392 #define TL_ID (104) /* module ID number */ 393 #define TL_NAME "tl" /* module name */ 394 #define TL_MINPSZ (0) /* min packet size */ 395 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 396 #define TL_HIWAT (16*1024) /* hi water mark */ 397 #define TL_LOWAT (256) /* lo water mark */ 398 /* 399 * Definition of minor numbers/modes for new transport provider modes. 400 * We view the socket use as a separate mode to get a separate name space. 401 */ 402 #define TL_TICOTS 0 /* connection oriented transport */ 403 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 404 #define TL_TICLTS 2 /* connectionless transport */ 405 #define TL_UNUSED 3 406 #define TL_SOCKET 4 /* Socket */ 407 #define TL_SOCK_COTS (TL_SOCKET | TL_TICOTS) 408 #define TL_SOCK_COTSORD (TL_SOCKET | TL_TICOTSORD) 409 #define TL_SOCK_CLTS (TL_SOCKET | TL_TICLTS) 410 411 #define TL_MINOR_MASK 0x7 412 #define TL_MINOR_START (TL_TICLTS + 1) 413 414 /* 415 * LOCAL MACROS 416 */ 417 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 418 419 /* 420 * EXTERNAL VARIABLE DECLARATIONS 421 * ----------------------------- 422 */ 423 /* 424 * state table defined in the OS space.c 425 */ 426 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 427 428 /* 429 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 430 */ 431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 432 static int tl_close(queue_t *, int, cred_t *); 433 static int tl_wput(queue_t *, mblk_t *); 434 static int tl_wsrv(queue_t *); 435 static int tl_rsrv(queue_t *); 436 437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 440 441 442 /* 443 * GLOBAL DATA STRUCTURES AND VARIABLES 444 * ----------------------------------- 445 */ 446 447 /* 448 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 449 * For now, we only manage the SO_RECVUCRED option but we also have 450 * harmless dummy options to make things work with some common code we access. 451 */ 452 opdes_t tl_opt_arr[] = { 453 /* The SO_TYPE is needed for the hack below */ 454 { 455 SO_TYPE, 456 SOL_SOCKET, 457 OA_R, 458 OA_R, 459 OP_NP, 460 0, 461 sizeof (t_scalar_t), 462 0 463 }, 464 { 465 SO_RECVUCRED, 466 SOL_SOCKET, 467 OA_RW, 468 OA_RW, 469 OP_NP, 470 0, 471 sizeof (int), 472 0 473 } 474 }; 475 476 /* 477 * Table of all supported levels 478 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 479 * any supported options so we need this info separately. 480 * 481 * This is needed only for topmost tpi providers. 482 */ 483 optlevel_t tl_valid_levels_arr[] = { 484 XTI_GENERIC, 485 SOL_SOCKET, 486 TL_PROT_LEVEL 487 }; 488 489 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 490 /* 491 * Current upper bound on the amount of space needed to return all options. 492 * Additional options with data size of sizeof(long) are handled automatically. 493 * Others need hand job. 494 */ 495 #define TL_MAX_OPT_BUF_LEN \ 496 ((A_CNT(tl_opt_arr) << 2) + \ 497 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 498 + 64 + sizeof (struct T_optmgmt_ack)) 499 500 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 501 502 /* 503 * transport addr structure 504 */ 505 typedef struct tl_addr { 506 zoneid_t ta_zoneid; /* Zone scope of address */ 507 t_scalar_t ta_alen; /* length of abuf */ 508 void *ta_abuf; /* the addr itself */ 509 } tl_addr_t; 510 511 /* 512 * Refcounted version of serializer. 513 */ 514 typedef struct tl_serializer { 515 uint_t ts_refcnt; 516 serializer_t *ts_serializer; 517 } tl_serializer_t; 518 519 /* 520 * Each transport type has a separate state. 521 * Per-transport state. 522 */ 523 typedef struct tl_transport_state { 524 char *tr_name; 525 minor_t tr_minor; 526 uint32_t tr_defaddr; 527 mod_hash_t *tr_ai_hash; 528 mod_hash_t *tr_addr_hash; 529 tl_serializer_t *tr_serializer; 530 } tl_transport_state_t; 531 532 #define TL_DFADDR 0x1000 533 534 static tl_transport_state_t tl_transports[] = { 535 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 536 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 537 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 538 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 539 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 540 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 541 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 542 }; 543 544 #define TL_MAXTRANSPORT A_CNT(tl_transports) 545 546 struct tl_endpt; 547 typedef struct tl_endpt tl_endpt_t; 548 549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 550 551 /* 552 * Data structure used to represent pending connects. 553 * Records enough information so that the connecting peer can close 554 * before the connection gets accepted. 555 */ 556 typedef struct tl_icon { 557 list_node_t ti_node; 558 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 559 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 560 t_scalar_t ti_seqno; /* Sequence number */ 561 } tl_icon_t; 562 563 typedef struct so_ux_addr soux_addr_t; 564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 565 566 /* 567 * Maximum number of unaccepted connection indications allowed per listener. 568 */ 569 #define TL_MAXQLEN 4096 570 int tl_maxqlen = TL_MAXQLEN; 571 572 /* 573 * transport endpoint structure 574 */ 575 struct tl_endpt { 576 queue_t *te_rq; /* stream read queue */ 577 queue_t *te_wq; /* stream write queue */ 578 uint32_t te_refcnt; 579 int32_t te_state; /* TPI state of endpoint */ 580 minor_t te_minor; /* minor number */ 581 #define te_seqno te_minor 582 uint_t te_flag; /* flag field */ 583 boolean_t te_nowsrv; 584 tl_serializer_t *te_ser; /* Serializer to use */ 585 #define te_serializer te_ser->ts_serializer 586 587 soux_addr_t te_uxaddr; /* Socket address */ 588 #define te_magic te_uxaddr.soua_magic 589 #define te_vp te_uxaddr.soua_vp 590 tl_addr_t te_ap; /* addr bound to this endpt */ 591 #define te_zoneid te_ap.ta_zoneid 592 #define te_alen te_ap.ta_alen 593 #define te_abuf te_ap.ta_abuf 594 595 tl_transport_state_t *te_transport; 596 #define te_addrhash te_transport->tr_addr_hash 597 #define te_aihash te_transport->tr_ai_hash 598 #define te_defaddr te_transport->tr_defaddr 599 cred_t *te_credp; /* endpoint user credentials */ 600 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 601 602 /* 603 * State specific for connection-oriented and connectionless transports. 604 */ 605 union { 606 /* Connection-oriented state. */ 607 struct { 608 t_uscalar_t _te_nicon; /* count of conn requests */ 609 t_uscalar_t _te_qlen; /* max conn requests */ 610 tl_endpt_t *_te_oconp; /* conn request pending */ 611 tl_endpt_t *_te_conp; /* connected endpt */ 612 #ifndef _ILP32 613 void *_te_pad; 614 #endif 615 list_t _te_iconp; /* list of conn ind. pending */ 616 } _te_cots_state; 617 /* Connection-less state. */ 618 struct { 619 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 620 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 621 list_node_t _te_flows; /* lists of connections */ 622 list_t _te_flowlist; /* Who flowcontrols on me */ 623 } _te_clts_state; 624 } _te_transport_state; 625 #define te_nicon _te_transport_state._te_cots_state._te_nicon 626 #define te_qlen _te_transport_state._te_cots_state._te_qlen 627 #define te_oconp _te_transport_state._te_cots_state._te_oconp 628 #define te_conp _te_transport_state._te_cots_state._te_conp 629 #define te_iconp _te_transport_state._te_cots_state._te_iconp 630 #define te_lastep _te_transport_state._te_clts_state._te_lastep 631 #define te_flowq _te_transport_state._te_clts_state._te_flowq 632 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 633 #define te_flows _te_transport_state._te_clts_state._te_flows 634 635 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 636 timeout_id_t te_timoutid; /* outstanding timeout id */ 637 pid_t te_cpid; /* cached pid of endpoint */ 638 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 639 /* 640 * Pieces of the endpoint state needed for closing. 641 */ 642 kmutex_t te_closelock; 643 kcondvar_t te_closecv; 644 uint8_t te_closing; /* The endpoint started closing */ 645 uint8_t te_closewait; /* Wait in close until zero */ 646 mblk_t te_closemp; /* for entering serializer on close */ 647 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 648 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 649 kmutex_t te_srv_lock; 650 kcondvar_t te_srv_cv; 651 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 652 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 653 /* 654 * Pieces of the endpoint state needed for serializer transitions. 655 */ 656 kmutex_t te_ser_lock; /* Protects the count below */ 657 uint_t te_ser_count; /* Number of messages on serializer */ 658 }; 659 660 /* 661 * Flag values. Lower 4 bits specify that transport used. 662 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 663 * they allow to identify the endpoint more easily. 664 */ 665 #define TL_LISTENER 0x00010 /* the listener endpoint */ 666 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 667 #define TL_EAGER 0x00040 /* connecting endpoint */ 668 #define TL_ACCEPTED 0x00080 /* accepted connection */ 669 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 670 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 671 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 672 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 673 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 674 /* 675 * Boolean checks for the endpoint type. 676 */ 677 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 678 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 679 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 680 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 681 682 /* 683 * Certain operations are always used together. These macros reduce the chance 684 * of missing a part of a combination. 685 */ 686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 688 689 #define TL_PUTBQ(x, mp) { \ 690 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 691 (x)->te_nowsrv = B_TRUE; \ 692 (void) putbq((x)->te_wq, mp); \ 693 } 694 695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 697 698 /* 699 * STREAMS driver glue data structures. 700 */ 701 static struct module_info tl_minfo = { 702 TL_ID, /* mi_idnum */ 703 TL_NAME, /* mi_idname */ 704 TL_MINPSZ, /* mi_minpsz */ 705 TL_MAXPSZ, /* mi_maxpsz */ 706 TL_HIWAT, /* mi_hiwat */ 707 TL_LOWAT /* mi_lowat */ 708 }; 709 710 static struct qinit tl_rinit = { 711 NULL, /* qi_putp */ 712 tl_rsrv, /* qi_srvp */ 713 tl_open, /* qi_qopen */ 714 tl_close, /* qi_qclose */ 715 NULL, /* qi_qadmin */ 716 &tl_minfo, /* qi_minfo */ 717 NULL /* qi_mstat */ 718 }; 719 720 static struct qinit tl_winit = { 721 tl_wput, /* qi_putp */ 722 tl_wsrv, /* qi_srvp */ 723 NULL, /* qi_qopen */ 724 NULL, /* qi_qclose */ 725 NULL, /* qi_qadmin */ 726 &tl_minfo, /* qi_minfo */ 727 NULL /* qi_mstat */ 728 }; 729 730 static struct streamtab tlinfo = { 731 &tl_rinit, /* st_rdinit */ 732 &tl_winit, /* st_wrinit */ 733 NULL, /* st_muxrinit */ 734 NULL /* st_muxwrinit */ 735 }; 736 737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 738 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 739 740 static struct modldrv modldrv = { 741 &mod_driverops, /* Type of module -- pseudo driver here */ 742 "TPI Local Transport (tl)", 743 &tl_devops, /* driver ops */ 744 }; 745 746 /* 747 * Module linkage information for the kernel. 748 */ 749 static struct modlinkage modlinkage = { 750 MODREV_1, 751 &modldrv, 752 NULL 753 }; 754 755 /* 756 * Templates for response to info request 757 * Check sanity of unlimited connect data etc. 758 */ 759 760 #define TL_CLTS_PROVIDER_FLAG (XPG4_1 | SENDZERO) 761 #define TL_COTS_PROVIDER_FLAG (XPG4_1 | SENDZERO) 762 763 static struct T_info_ack tl_cots_info_ack = 764 { 765 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 766 T_INFINITE, /* TSDU size */ 767 T_INFINITE, /* ETSDU size */ 768 T_INFINITE, /* CDATA_size */ 769 T_INFINITE, /* DDATA_size */ 770 T_INFINITE, /* ADDR_size */ 771 T_INFINITE, /* OPT_size */ 772 0, /* TIDU_size - fill at run time */ 773 T_COTS, /* SERV_type */ 774 -1, /* CURRENT_state */ 775 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 776 }; 777 778 static struct T_info_ack tl_clts_info_ack = 779 { 780 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 781 0, /* TSDU_size - fill at run time */ 782 -2, /* ETSDU_size -2 => not supported */ 783 -2, /* CDATA_size -2 => not supported */ 784 -2, /* DDATA_size -2 => not supported */ 785 -1, /* ADDR_size -1 => infinite */ 786 -1, /* OPT_size */ 787 0, /* TIDU_size - fill at run time */ 788 T_CLTS, /* SERV_type */ 789 -1, /* CURRENT_state */ 790 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 791 }; 792 793 /* 794 * private copy of devinfo pointer used in tl_info 795 */ 796 static dev_info_t *tl_dip; 797 798 /* 799 * Endpoints cache. 800 */ 801 static kmem_cache_t *tl_cache; 802 /* 803 * Minor number space. 804 */ 805 static id_space_t *tl_minors; 806 807 /* 808 * Default Data Unit size. 809 */ 810 static t_scalar_t tl_tidusz; 811 812 /* 813 * Size of hash tables. 814 */ 815 static size_t tl_hash_size = TL_HASH_SIZE; 816 817 /* 818 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 819 * for sockets. 820 */ 821 static int tl_disable_early_connect = 0; 822 static int tl_client_closing_when_accepting; 823 824 static int tl_serializer_noswitch; 825 826 /* 827 * LOCAL FUNCTION PROTOTYPES 828 * ------------------------- 829 */ 830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 831 static void tl_do_proto(mblk_t *, tl_endpt_t *); 832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 835 t_scalar_t); 836 static void tl_bind(mblk_t *, tl_endpt_t *); 837 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 838 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 839 static void tl_unbind(mblk_t *, tl_endpt_t *); 840 static void tl_optmgmt(queue_t *, mblk_t *); 841 static void tl_conn_req(queue_t *, mblk_t *); 842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 843 static void tl_conn_res(mblk_t *, tl_endpt_t *); 844 static void tl_discon_req(mblk_t *, tl_endpt_t *); 845 static void tl_capability_req(mblk_t *, tl_endpt_t *); 846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *); 848 static void tl_info_req(mblk_t *, tl_endpt_t *); 849 static void tl_addr_req(mblk_t *, tl_endpt_t *); 850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 851 static void tl_data(mblk_t *, tl_endpt_t *); 852 static void tl_exdata(mblk_t *, tl_endpt_t *); 853 static void tl_ordrel(mblk_t *, tl_endpt_t *); 854 static void tl_unitdata(mblk_t *, tl_endpt_t *); 855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 860 static void tl_cl_backenable(tl_endpt_t *); 861 static void tl_co_unconnect(tl_endpt_t *); 862 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 863 static void tl_discon_ind(tl_endpt_t *, uint32_t); 864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 865 static mblk_t *tl_ordrel_ind_alloc(void); 866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 870 static void tl_icon_freemsgs(mblk_t **); 871 static void tl_merror(queue_t *, mblk_t *, int); 872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 873 static int tl_default_opt(queue_t *, int, int, uchar_t *); 874 static int tl_get_opt(queue_t *, int, int, uchar_t *); 875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 876 uchar_t *, void *, cred_t *); 877 static void tl_memrecover(queue_t *, mblk_t *, size_t); 878 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 879 static void tl_free(tl_endpt_t *); 880 static int tl_constructor(void *, void *, int); 881 static void tl_destructor(void *, void *); 882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 883 static tl_serializer_t *tl_serializer_alloc(int); 884 static void tl_serializer_refhold(tl_serializer_t *); 885 static void tl_serializer_refrele(tl_serializer_t *); 886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 887 static void tl_serializer_exit(tl_endpt_t *); 888 static boolean_t tl_noclose(tl_endpt_t *); 889 static void tl_closeok(tl_endpt_t *); 890 static void tl_refhold(tl_endpt_t *); 891 static void tl_refrele(tl_endpt_t *); 892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 894 static void tl_close_ser(mblk_t *, tl_endpt_t *); 895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 897 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 898 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 900 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 903 static void tl_addr_unbind(tl_endpt_t *); 904 905 /* 906 * Intialize option database object for TL 907 */ 908 909 optdb_obj_t tl_opt_obj = { 910 tl_default_opt, /* TL default value function pointer */ 911 tl_get_opt, /* TL get function pointer */ 912 tl_set_opt, /* TL set function pointer */ 913 TL_OPT_ARR_CNT, /* TL option database count of entries */ 914 tl_opt_arr, /* TL option database */ 915 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 916 tl_valid_levels_arr /* TL valid level array */ 917 }; 918 919 /* 920 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 921 * --------------------------------------- 922 */ 923 924 /* 925 * Loadable module routines 926 */ 927 int 928 _init(void) 929 { 930 return (mod_install(&modlinkage)); 931 } 932 933 int 934 _fini(void) 935 { 936 return (mod_remove(&modlinkage)); 937 } 938 939 int 940 _info(struct modinfo *modinfop) 941 { 942 return (mod_info(&modlinkage, modinfop)); 943 } 944 945 /* 946 * Driver Entry Points and Other routines 947 */ 948 static int 949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 950 { 951 int i; 952 char name[32]; 953 954 /* 955 * Resume from a checkpoint state. 956 */ 957 if (cmd == DDI_RESUME) 958 return (DDI_SUCCESS); 959 960 if (cmd != DDI_ATTACH) 961 return (DDI_FAILURE); 962 963 /* 964 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 965 * streams message sizes can be unlimited. We use a defined constant 966 * instead. 967 */ 968 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 969 970 /* 971 * Create subdevices for each transport. 972 */ 973 for (i = 0; i < TL_UNUSED; i++) { 974 if (ddi_create_minor_node(devi, 975 tl_transports[i].tr_name, 976 S_IFCHR, tl_transports[i].tr_minor, 977 DDI_PSEUDO, 0) == DDI_FAILURE) { 978 ddi_remove_minor_node(devi, NULL); 979 return (DDI_FAILURE); 980 } 981 } 982 983 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 984 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 985 986 if (tl_cache == NULL) { 987 ddi_remove_minor_node(devi, NULL); 988 return (DDI_FAILURE); 989 } 990 991 tl_minors = id_space_create("tl_minor_space", 992 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 993 994 /* 995 * Create ID space for minor numbers 996 */ 997 for (i = 0; i < TL_MAXTRANSPORT; i++) { 998 tl_transport_state_t *t = &tl_transports[i]; 999 1000 if (i == TL_UNUSED) 1001 continue; 1002 1003 /* Socket COTSORD shares namespace with COTS */ 1004 if (i == TL_SOCK_COTSORD) { 1005 t->tr_ai_hash = 1006 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1007 ASSERT(t->tr_ai_hash != NULL); 1008 t->tr_addr_hash = 1009 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1010 ASSERT(t->tr_addr_hash != NULL); 1011 continue; 1012 } 1013 1014 /* 1015 * Create hash tables. 1016 */ 1017 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1018 t->tr_name); 1019 #ifdef _ILP32 1020 if (i & TL_SOCKET) 1021 t->tr_ai_hash = 1022 mod_hash_create_idhash(name, tl_hash_size - 1, 1023 mod_hash_null_valdtor); 1024 else 1025 t->tr_ai_hash = 1026 mod_hash_create_ptrhash(name, tl_hash_size, 1027 mod_hash_null_valdtor, sizeof (queue_t)); 1028 #else 1029 t->tr_ai_hash = 1030 mod_hash_create_idhash(name, tl_hash_size - 1, 1031 mod_hash_null_valdtor); 1032 #endif /* _ILP32 */ 1033 1034 if (i & TL_SOCKET) { 1035 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1036 t->tr_name); 1037 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1038 tl_hash_size, mod_hash_null_valdtor, 1039 sizeof (uintptr_t)); 1040 } else { 1041 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1042 t->tr_name); 1043 t->tr_addr_hash = mod_hash_create_extended(name, 1044 tl_hash_size, mod_hash_null_keydtor, 1045 mod_hash_null_valdtor, 1046 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1047 } 1048 1049 /* Create serializer for connectionless transports. */ 1050 if (i & TL_TICLTS) 1051 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1052 } 1053 1054 tl_dip = devi; 1055 1056 return (DDI_SUCCESS); 1057 } 1058 1059 static int 1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1061 { 1062 int i; 1063 1064 if (cmd == DDI_SUSPEND) 1065 return (DDI_SUCCESS); 1066 1067 if (cmd != DDI_DETACH) 1068 return (DDI_FAILURE); 1069 1070 /* 1071 * Destroy arenas and hash tables. 1072 */ 1073 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1074 tl_transport_state_t *t = &tl_transports[i]; 1075 1076 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1077 continue; 1078 1079 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL); 1080 if (t->tr_serializer != NULL) { 1081 tl_serializer_refrele(t->tr_serializer); 1082 t->tr_serializer = NULL; 1083 } 1084 1085 #ifdef _ILP32 1086 if (i & TL_SOCKET) 1087 mod_hash_destroy_idhash(t->tr_ai_hash); 1088 else 1089 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1090 #else 1091 mod_hash_destroy_idhash(t->tr_ai_hash); 1092 #endif /* _ILP32 */ 1093 t->tr_ai_hash = NULL; 1094 if (i & TL_SOCKET) 1095 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1096 else 1097 mod_hash_destroy_hash(t->tr_addr_hash); 1098 t->tr_addr_hash = NULL; 1099 } 1100 1101 kmem_cache_destroy(tl_cache); 1102 tl_cache = NULL; 1103 id_space_destroy(tl_minors); 1104 tl_minors = NULL; 1105 ddi_remove_minor_node(devi, NULL); 1106 return (DDI_SUCCESS); 1107 } 1108 1109 /* ARGSUSED */ 1110 static int 1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1112 { 1113 1114 int retcode = DDI_FAILURE; 1115 1116 switch (infocmd) { 1117 1118 case DDI_INFO_DEVT2DEVINFO: 1119 if (tl_dip != NULL) { 1120 *result = (void *)tl_dip; 1121 retcode = DDI_SUCCESS; 1122 } 1123 break; 1124 1125 case DDI_INFO_DEVT2INSTANCE: 1126 *result = NULL; 1127 retcode = DDI_SUCCESS; 1128 break; 1129 1130 default: 1131 break; 1132 } 1133 return (retcode); 1134 } 1135 1136 /* 1137 * Endpoint reference management. 1138 */ 1139 static void 1140 tl_refhold(tl_endpt_t *tep) 1141 { 1142 atomic_inc_32(&tep->te_refcnt); 1143 } 1144 1145 static void 1146 tl_refrele(tl_endpt_t *tep) 1147 { 1148 ASSERT(tep->te_refcnt != 0); 1149 1150 if (atomic_dec_32_nv(&tep->te_refcnt) == 0) 1151 tl_free(tep); 1152 } 1153 1154 /*ARGSUSED*/ 1155 static int 1156 tl_constructor(void *buf, void *cdrarg, int kmflags) 1157 { 1158 tl_endpt_t *tep = buf; 1159 1160 bzero(tep, sizeof (tl_endpt_t)); 1161 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1162 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1163 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1164 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1165 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1166 1167 return (0); 1168 } 1169 1170 /*ARGSUSED*/ 1171 static void 1172 tl_destructor(void *buf, void *cdrarg) 1173 { 1174 tl_endpt_t *tep = buf; 1175 1176 mutex_destroy(&tep->te_closelock); 1177 cv_destroy(&tep->te_closecv); 1178 mutex_destroy(&tep->te_srv_lock); 1179 cv_destroy(&tep->te_srv_cv); 1180 mutex_destroy(&tep->te_ser_lock); 1181 } 1182 1183 static void 1184 tl_free(tl_endpt_t *tep) 1185 { 1186 ASSERT(tep->te_refcnt == 0); 1187 ASSERT(tep->te_transport != NULL); 1188 ASSERT(tep->te_rq == NULL); 1189 ASSERT(tep->te_wq == NULL); 1190 ASSERT(tep->te_ser != NULL); 1191 ASSERT(tep->te_ser_count == 0); 1192 ASSERT(!(tep->te_flag & TL_ADDRHASHED)); 1193 1194 if (IS_SOCKET(tep)) { 1195 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1196 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1197 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1198 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1199 } else if (tep->te_abuf != NULL) { 1200 kmem_free(tep->te_abuf, tep->te_alen); 1201 tep->te_alen = -1; /* uninitialized */ 1202 tep->te_abuf = NULL; 1203 } else { 1204 ASSERT(tep->te_alen == -1); 1205 } 1206 1207 id_free(tl_minors, tep->te_minor); 1208 ASSERT(tep->te_credp == NULL); 1209 1210 if (tep->te_hash_hndl != NULL) 1211 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1212 1213 if (IS_COTS(tep)) { 1214 TL_REMOVE_PEER(tep->te_conp); 1215 TL_REMOVE_PEER(tep->te_oconp); 1216 tl_serializer_refrele(tep->te_ser); 1217 tep->te_ser = NULL; 1218 ASSERT(tep->te_nicon == 0); 1219 ASSERT(list_head(&tep->te_iconp) == NULL); 1220 } else { 1221 ASSERT(tep->te_lastep == NULL); 1222 ASSERT(list_head(&tep->te_flowlist) == NULL); 1223 ASSERT(tep->te_flowq == NULL); 1224 } 1225 1226 ASSERT(tep->te_bufcid == 0); 1227 ASSERT(tep->te_timoutid == 0); 1228 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1229 tep->te_acceptor_id = 0; 1230 1231 ASSERT(tep->te_closewait == 0); 1232 ASSERT(!tep->te_rsrv_active); 1233 ASSERT(!tep->te_wsrv_active); 1234 tep->te_closing = 0; 1235 tep->te_nowsrv = B_FALSE; 1236 tep->te_flag = 0; 1237 1238 kmem_cache_free(tl_cache, tep); 1239 } 1240 1241 /* 1242 * Allocate/free reference-counted wrappers for serializers. 1243 */ 1244 static tl_serializer_t * 1245 tl_serializer_alloc(int flags) 1246 { 1247 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1248 serializer_t *ser; 1249 1250 if (s == NULL) 1251 return (NULL); 1252 1253 ser = serializer_create(flags); 1254 1255 if (ser == NULL) { 1256 kmem_free(s, sizeof (tl_serializer_t)); 1257 return (NULL); 1258 } 1259 1260 s->ts_refcnt = 1; 1261 s->ts_serializer = ser; 1262 return (s); 1263 } 1264 1265 static void 1266 tl_serializer_refhold(tl_serializer_t *s) 1267 { 1268 atomic_inc_32(&s->ts_refcnt); 1269 } 1270 1271 static void 1272 tl_serializer_refrele(tl_serializer_t *s) 1273 { 1274 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) { 1275 serializer_destroy(s->ts_serializer); 1276 kmem_free(s, sizeof (tl_serializer_t)); 1277 } 1278 } 1279 1280 /* 1281 * Post a request on the endpoint serializer. For COTS transports keep track of 1282 * the number of pending requests. 1283 */ 1284 static void 1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1286 { 1287 if (IS_COTS(tep)) { 1288 mutex_enter(&tep->te_ser_lock); 1289 tep->te_ser_count++; 1290 mutex_exit(&tep->te_ser_lock); 1291 } 1292 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1293 } 1294 1295 /* 1296 * Complete processing the request on the serializer. Decrement the counter for 1297 * pending requests for COTS transports. 1298 */ 1299 static void 1300 tl_serializer_exit(tl_endpt_t *tep) 1301 { 1302 if (IS_COTS(tep)) { 1303 mutex_enter(&tep->te_ser_lock); 1304 ASSERT(tep->te_ser_count != 0); 1305 tep->te_ser_count--; 1306 mutex_exit(&tep->te_ser_lock); 1307 } 1308 } 1309 1310 /* 1311 * Hash management functions. 1312 */ 1313 1314 /* 1315 * Return TRUE if two addresses are equal, false otherwise. 1316 */ 1317 static boolean_t 1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1319 { 1320 return ((ap1->ta_alen > 0) && 1321 (ap1->ta_alen == ap2->ta_alen) && 1322 (ap1->ta_zoneid == ap2->ta_zoneid) && 1323 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1324 } 1325 1326 /* 1327 * This function is called whenever an endpoint is found in the hash table. 1328 */ 1329 /* ARGSUSED0 */ 1330 static void 1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1332 { 1333 tl_refhold((tl_endpt_t *)val); 1334 } 1335 1336 /* 1337 * Address hash function. 1338 */ 1339 /* ARGSUSED */ 1340 static uint_t 1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1342 { 1343 tl_addr_t *ap = (tl_addr_t *)key; 1344 size_t len = ap->ta_alen; 1345 uchar_t *p = ap->ta_abuf; 1346 uint_t i, g; 1347 1348 ASSERT((len > 0) && (p != NULL)); 1349 1350 for (i = ap->ta_zoneid; len -- != 0; p++) { 1351 i = (i << 4) + (*p); 1352 if ((g = (i & 0xf0000000U)) != 0) { 1353 i ^= (g >> 24); 1354 i ^= g; 1355 } 1356 } 1357 return (i); 1358 } 1359 1360 /* 1361 * This function is used by hash lookups. It compares two generic addresses. 1362 */ 1363 static int 1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1365 { 1366 #ifdef DEBUG 1367 tl_addr_t *ap1 = (tl_addr_t *)key1; 1368 tl_addr_t *ap2 = (tl_addr_t *)key2; 1369 1370 ASSERT(key1 != NULL); 1371 ASSERT(key2 != NULL); 1372 1373 ASSERT(ap1->ta_abuf != NULL); 1374 ASSERT(ap2->ta_abuf != NULL); 1375 ASSERT(ap1->ta_alen > 0); 1376 ASSERT(ap2->ta_alen > 0); 1377 #endif 1378 1379 return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1380 } 1381 1382 /* 1383 * Prevent endpoint from closing if possible. 1384 * Return B_TRUE on success, B_FALSE on failure. 1385 */ 1386 static boolean_t 1387 tl_noclose(tl_endpt_t *tep) 1388 { 1389 boolean_t rc = B_FALSE; 1390 1391 mutex_enter(&tep->te_closelock); 1392 if (!tep->te_closing) { 1393 ASSERT(tep->te_closewait == 0); 1394 tep->te_closewait++; 1395 rc = B_TRUE; 1396 } 1397 mutex_exit(&tep->te_closelock); 1398 return (rc); 1399 } 1400 1401 /* 1402 * Allow endpoint to close if needed. 1403 */ 1404 static void 1405 tl_closeok(tl_endpt_t *tep) 1406 { 1407 ASSERT(tep->te_closewait > 0); 1408 mutex_enter(&tep->te_closelock); 1409 ASSERT(tep->te_closewait == 1); 1410 tep->te_closewait--; 1411 cv_signal(&tep->te_closecv); 1412 mutex_exit(&tep->te_closelock); 1413 } 1414 1415 /* 1416 * STREAMS open entry point. 1417 */ 1418 /* ARGSUSED */ 1419 static int 1420 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1421 { 1422 tl_endpt_t *tep; 1423 minor_t minor = getminor(*devp); 1424 1425 /* 1426 * Driver is called directly. Both CLONEOPEN and MODOPEN 1427 * are illegal 1428 */ 1429 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1430 return (ENXIO); 1431 1432 if (rq->q_ptr != NULL) 1433 return (0); 1434 1435 /* Minor number should specify the mode used for the driver. */ 1436 if ((minor >= TL_UNUSED)) 1437 return (ENXIO); 1438 1439 if (oflag & SO_SOCKSTR) { 1440 minor |= TL_SOCKET; 1441 } 1442 1443 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1444 tep->te_refcnt = 1; 1445 tep->te_cpid = curproc->p_pid; 1446 rq->q_ptr = WR(rq)->q_ptr = tep; 1447 tep->te_state = TS_UNBND; 1448 tep->te_credp = credp; 1449 crhold(credp); 1450 tep->te_zoneid = getzoneid(); 1451 1452 tep->te_flag = minor & TL_MINOR_MASK; 1453 tep->te_transport = &tl_transports[minor]; 1454 1455 /* Allocate a unique minor number for this instance. */ 1456 tep->te_minor = (minor_t)id_alloc(tl_minors); 1457 1458 /* Reserve hash handle for bind(). */ 1459 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1460 1461 /* Transport-specific initialization */ 1462 if (IS_COTS(tep)) { 1463 /* Use private serializer */ 1464 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1465 1466 /* Create list for pending connections */ 1467 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1468 offsetof(tl_icon_t, ti_node)); 1469 tep->te_qlen = 0; 1470 tep->te_nicon = 0; 1471 tep->te_oconp = NULL; 1472 tep->te_conp = NULL; 1473 } else { 1474 /* Use shared serializer */ 1475 tep->te_ser = tep->te_transport->tr_serializer; 1476 bzero(&tep->te_flows, sizeof (list_node_t)); 1477 /* Create list for flow control */ 1478 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1479 offsetof(tl_endpt_t, te_flows)); 1480 tep->te_flowq = NULL; 1481 tep->te_lastep = NULL; 1482 1483 } 1484 1485 /* Initialize endpoint address */ 1486 if (IS_SOCKET(tep)) { 1487 /* Socket-specific address handling. */ 1488 tep->te_alen = TL_SOUX_ADDRLEN; 1489 tep->te_abuf = &tep->te_uxaddr; 1490 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1491 tep->te_magic = SOU_MAGIC_IMPLICIT; 1492 } else { 1493 tep->te_alen = -1; 1494 tep->te_abuf = NULL; 1495 } 1496 1497 /* clone the driver */ 1498 *devp = makedevice(getmajor(*devp), tep->te_minor); 1499 1500 tep->te_rq = rq; 1501 tep->te_wq = WR(rq); 1502 1503 #ifdef _ILP32 1504 if (IS_SOCKET(tep)) 1505 tep->te_acceptor_id = tep->te_minor; 1506 else 1507 tep->te_acceptor_id = (t_uscalar_t)rq; 1508 #else 1509 tep->te_acceptor_id = tep->te_minor; 1510 #endif /* _ILP32 */ 1511 1512 1513 qprocson(rq); 1514 1515 /* 1516 * Insert acceptor ID in the hash. The AI hash always sleeps on 1517 * insertion so insertion can't fail. 1518 */ 1519 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1520 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1521 (mod_hash_val_t)tep); 1522 1523 return (0); 1524 } 1525 1526 /* ARGSUSED1 */ 1527 static int 1528 tl_close(queue_t *rq, int flag, cred_t *credp) 1529 { 1530 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1531 tl_endpt_t *elp = NULL; 1532 queue_t *wq = tep->te_wq; 1533 int rc; 1534 1535 ASSERT(wq == WR(rq)); 1536 1537 /* 1538 * Remove the endpoint from acceptor hash. 1539 */ 1540 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1541 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1542 (mod_hash_val_t *)&elp); 1543 ASSERT(rc == 0 && tep == elp); 1544 if ((rc != 0) || (tep != elp)) { 1545 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1546 SL_TRACE | SL_ERROR, 1547 "tl_close:inconsistency in AI hash")); 1548 } 1549 1550 /* 1551 * Wait till close is safe, then mark endpoint as closing. 1552 */ 1553 mutex_enter(&tep->te_closelock); 1554 while (tep->te_closewait) 1555 cv_wait(&tep->te_closecv, &tep->te_closelock); 1556 tep->te_closing = B_TRUE; 1557 /* 1558 * Will wait for the serializer part of the close to finish, so set 1559 * te_closewait now. 1560 */ 1561 tep->te_closewait = 1; 1562 tep->te_nowsrv = B_FALSE; 1563 mutex_exit(&tep->te_closelock); 1564 1565 /* 1566 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1567 * It is safe because close will wait for tl_close_ser to finish. 1568 */ 1569 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1570 1571 /* 1572 * Wait for the first phase of close to complete before qprocsoff(). 1573 */ 1574 mutex_enter(&tep->te_closelock); 1575 while (tep->te_closewait) 1576 cv_wait(&tep->te_closecv, &tep->te_closelock); 1577 mutex_exit(&tep->te_closelock); 1578 1579 qprocsoff(rq); 1580 1581 if (tep->te_bufcid) { 1582 qunbufcall(rq, tep->te_bufcid); 1583 tep->te_bufcid = 0; 1584 } 1585 if (tep->te_timoutid) { 1586 (void) quntimeout(rq, tep->te_timoutid); 1587 tep->te_timoutid = 0; 1588 } 1589 1590 /* 1591 * Finish close behind serializer. 1592 * 1593 * For a CLTS endpoint increase a refcount and continue close processing 1594 * with serializer protection. This processing may happen asynchronously 1595 * with the completion of tl_close(). 1596 * 1597 * Fot a COTS endpoint wait before destroying tep since the serializer 1598 * may go away together with tep and we need to destroy serializer 1599 * outside of serializer context. 1600 */ 1601 ASSERT(tep->te_closewait == 0); 1602 if (IS_COTS(tep)) 1603 tep->te_closewait = 1; 1604 else 1605 tl_refhold(tep); 1606 1607 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1608 1609 /* 1610 * For connection-oriented transports wait for all serializer activity 1611 * to settle down. 1612 */ 1613 if (IS_COTS(tep)) { 1614 mutex_enter(&tep->te_closelock); 1615 while (tep->te_closewait) 1616 cv_wait(&tep->te_closecv, &tep->te_closelock); 1617 mutex_exit(&tep->te_closelock); 1618 } 1619 1620 crfree(tep->te_credp); 1621 tep->te_credp = NULL; 1622 tep->te_wq = NULL; 1623 tl_refrele(tep); 1624 /* 1625 * tep is likely to be destroyed now, so can't reference it any more. 1626 */ 1627 1628 rq->q_ptr = wq->q_ptr = NULL; 1629 return (0); 1630 } 1631 1632 /* 1633 * First phase of close processing done behind the serializer. 1634 * 1635 * Do not drop the reference in the end - tl_close() wants this reference to 1636 * stay. 1637 */ 1638 /* ARGSUSED0 */ 1639 static void 1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1641 { 1642 ASSERT(tep->te_closing); 1643 ASSERT(tep->te_closewait == 1); 1644 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1645 1646 tep->te_flag |= TL_CLOSE_SER; 1647 1648 /* 1649 * Drain out all messages on queue except for TL_TICOTS where the 1650 * abortive release semantics permit discarding of data on close 1651 */ 1652 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1653 tl_wsrv_ser(NULL, tep); 1654 } 1655 1656 /* Remove address from hash table. */ 1657 tl_addr_unbind(tep); 1658 /* 1659 * qprocsoff() gets confused when q->q_next is not NULL on the write 1660 * queue of the driver, so clear these before qprocsoff() is called. 1661 * Also clear q_next for the peer since this queue is going away. 1662 */ 1663 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1664 tl_endpt_t *peer_tep = tep->te_conp; 1665 1666 tep->te_wq->q_next = NULL; 1667 if ((peer_tep != NULL) && !peer_tep->te_closing) 1668 peer_tep->te_wq->q_next = NULL; 1669 } 1670 1671 tep->te_rq = NULL; 1672 1673 /* wake up tl_close() */ 1674 tl_closeok(tep); 1675 tl_serializer_exit(tep); 1676 } 1677 1678 /* 1679 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1680 * the reference for CLTS. 1681 * 1682 * Called from serializer. Should drop reference count for CLTS only. 1683 */ 1684 /* ARGSUSED0 */ 1685 static void 1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1687 { 1688 ASSERT(tep->te_closing); 1689 IMPLY(IS_CLTS(tep), tep->te_closewait == 0); 1690 IMPLY(IS_COTS(tep), tep->te_closewait == 1); 1691 1692 tep->te_state = -1; /* Uninitialized */ 1693 if (IS_COTS(tep)) { 1694 tl_co_unconnect(tep); 1695 } else { 1696 /* Connectionless specific cleanup */ 1697 TL_REMOVE_PEER(tep->te_lastep); 1698 /* 1699 * Backenable anybody that is flow controlled waiting for 1700 * this endpoint. 1701 */ 1702 tl_cl_backenable(tep); 1703 if (tep->te_flowq != NULL) { 1704 list_remove(&(tep->te_flowq->te_flowlist), tep); 1705 tep->te_flowq = NULL; 1706 } 1707 } 1708 1709 tl_serializer_exit(tep); 1710 if (IS_COTS(tep)) 1711 tl_closeok(tep); 1712 else 1713 tl_refrele(tep); 1714 } 1715 1716 /* 1717 * STREAMS write-side put procedure. 1718 * Enter serializer for most of the processing. 1719 * 1720 * The T_CONN_REQ is processed outside of serializer. 1721 */ 1722 static int 1723 tl_wput(queue_t *wq, mblk_t *mp) 1724 { 1725 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1726 ssize_t msz = MBLKL(mp); 1727 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1728 tlproc_t *tl_proc = NULL; 1729 1730 switch (DB_TYPE(mp)) { 1731 case M_DATA: 1732 /* Only valid for connection-oriented transports */ 1733 if (IS_CLTS(tep)) { 1734 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1735 SL_TRACE | SL_ERROR, 1736 "tl_wput:M_DATA invalid for ticlts driver")); 1737 tl_merror(wq, mp, EPROTO); 1738 return (0); 1739 } 1740 tl_proc = tl_wput_data_ser; 1741 break; 1742 1743 case M_IOCTL: 1744 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1745 case TL_IOC_CREDOPT: 1746 /* FALLTHROUGH */ 1747 case TL_IOC_UCREDOPT: 1748 /* 1749 * Serialize endpoint state change. 1750 */ 1751 tl_proc = tl_do_ioctl_ser; 1752 break; 1753 1754 default: 1755 miocnak(wq, mp, 0, EINVAL); 1756 return (0); 1757 } 1758 break; 1759 1760 case M_FLUSH: 1761 /* 1762 * do canonical M_FLUSH processing 1763 */ 1764 if (*mp->b_rptr & FLUSHW) { 1765 flushq(wq, FLUSHALL); 1766 *mp->b_rptr &= ~FLUSHW; 1767 } 1768 if (*mp->b_rptr & FLUSHR) { 1769 flushq(RD(wq), FLUSHALL); 1770 qreply(wq, mp); 1771 } else { 1772 freemsg(mp); 1773 } 1774 return (0); 1775 1776 case M_PROTO: 1777 if (msz < sizeof (prim->type)) { 1778 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1779 SL_TRACE | SL_ERROR, 1780 "tl_wput:M_PROTO data too short")); 1781 tl_merror(wq, mp, EPROTO); 1782 return (0); 1783 } 1784 switch (prim->type) { 1785 case T_OPTMGMT_REQ: 1786 case T_SVR4_OPTMGMT_REQ: 1787 /* 1788 * Process TPI option management requests immediately 1789 * in put procedure regardless of in-order processing 1790 * of already queued messages. 1791 * (Note: This driver supports AF_UNIX socket 1792 * implementation. Unless we implement this processing, 1793 * setsockopt() on socket endpoint will block on flow 1794 * controlled endpoints which it should not. That is 1795 * required for successful execution of VSU socket tests 1796 * and is consistent with BSD socket behavior). 1797 */ 1798 tl_optmgmt(wq, mp); 1799 return (0); 1800 case O_T_BIND_REQ: 1801 case T_BIND_REQ: 1802 tl_proc = tl_bind_ser; 1803 break; 1804 case T_CONN_REQ: 1805 if (IS_CLTS(tep)) { 1806 tl_merror(wq, mp, EPROTO); 1807 return (0); 1808 } 1809 tl_conn_req(wq, mp); 1810 return (0); 1811 case T_DATA_REQ: 1812 case T_OPTDATA_REQ: 1813 case T_EXDATA_REQ: 1814 case T_ORDREL_REQ: 1815 tl_proc = tl_putq_ser; 1816 break; 1817 case T_UNITDATA_REQ: 1818 if (IS_COTS(tep) || 1819 (msz < sizeof (struct T_unitdata_req))) { 1820 tl_merror(wq, mp, EPROTO); 1821 return (0); 1822 } 1823 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1824 tl_proc = tl_unitdata_ser; 1825 } else { 1826 tl_proc = tl_putq_ser; 1827 } 1828 break; 1829 default: 1830 /* 1831 * process in service procedure if message already 1832 * queued (maintain in-order processing) 1833 */ 1834 if (wq->q_first != NULL) { 1835 tl_proc = tl_putq_ser; 1836 } else { 1837 tl_proc = tl_wput_ser; 1838 } 1839 break; 1840 } 1841 break; 1842 1843 case M_PCPROTO: 1844 /* 1845 * Check that the message has enough data to figure out TPI 1846 * primitive. 1847 */ 1848 if (msz < sizeof (prim->type)) { 1849 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1850 SL_TRACE | SL_ERROR, 1851 "tl_wput:M_PCROTO data too short")); 1852 tl_merror(wq, mp, EPROTO); 1853 return (0); 1854 } 1855 switch (prim->type) { 1856 case T_CAPABILITY_REQ: 1857 tl_capability_req(mp, tep); 1858 return (0); 1859 case T_INFO_REQ: 1860 tl_proc = tl_info_req_ser; 1861 break; 1862 case T_ADDR_REQ: 1863 tl_proc = tl_addr_req_ser; 1864 break; 1865 1866 default: 1867 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1868 SL_TRACE | SL_ERROR, 1869 "tl_wput:unknown TPI msg primitive")); 1870 tl_merror(wq, mp, EPROTO); 1871 return (0); 1872 } 1873 break; 1874 default: 1875 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 1876 "tl_wput:default:unexpected Streams message")); 1877 freemsg(mp); 1878 return (0); 1879 } 1880 1881 /* 1882 * Continue processing via serializer. 1883 */ 1884 ASSERT(tl_proc != NULL); 1885 tl_refhold(tep); 1886 tl_serializer_enter(tep, tl_proc, mp); 1887 return (0); 1888 } 1889 1890 /* 1891 * Place message on the queue while preserving order. 1892 */ 1893 static void 1894 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1895 { 1896 if (tep->te_closing) { 1897 tl_wput_ser(mp, tep); 1898 } else { 1899 TL_PUTQ(tep, mp); 1900 tl_serializer_exit(tep); 1901 tl_refrele(tep); 1902 } 1903 1904 } 1905 1906 static void 1907 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1908 { 1909 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1910 1911 switch (DB_TYPE(mp)) { 1912 case M_DATA: 1913 tl_data(mp, tep); 1914 break; 1915 case M_PROTO: 1916 tl_do_proto(mp, tep); 1917 break; 1918 default: 1919 freemsg(mp); 1920 break; 1921 } 1922 } 1923 1924 /* 1925 * Write side put procedure called from serializer. 1926 */ 1927 static void 1928 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1929 { 1930 tl_wput_common_ser(mp, tep); 1931 tl_serializer_exit(tep); 1932 tl_refrele(tep); 1933 } 1934 1935 /* 1936 * M_DATA processing. Called from serializer. 1937 */ 1938 static void 1939 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1940 { 1941 tl_endpt_t *peer_tep = tep->te_conp; 1942 queue_t *peer_rq; 1943 1944 ASSERT(DB_TYPE(mp) == M_DATA); 1945 ASSERT(IS_COTS(tep)); 1946 1947 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer); 1948 1949 /* 1950 * fastpath for data. Ignore flow control if tep is closing. 1951 */ 1952 if ((peer_tep != NULL) && 1953 !peer_tep->te_closing && 1954 ((tep->te_state == TS_DATA_XFER) || 1955 (tep->te_state == TS_WREQ_ORDREL)) && 1956 (tep->te_wq != NULL) && 1957 (tep->te_wq->q_first == NULL) && 1958 ((peer_tep->te_state == TS_DATA_XFER) || 1959 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1960 ((peer_rq = peer_tep->te_rq) != NULL) && 1961 (canputnext(peer_rq) || tep->te_closing)) { 1962 putnext(peer_rq, mp); 1963 } else if (tep->te_closing) { 1964 /* 1965 * It is possible that by the time we got here tep started to 1966 * close. If the write queue is not empty, and the state is 1967 * TS_DATA_XFER the data should be delivered in order, so we 1968 * call putq() instead of freeing the data. 1969 */ 1970 if ((tep->te_wq != NULL) && 1971 ((tep->te_state == TS_DATA_XFER) || 1972 (tep->te_state == TS_WREQ_ORDREL))) { 1973 TL_PUTQ(tep, mp); 1974 } else { 1975 freemsg(mp); 1976 } 1977 } else { 1978 TL_PUTQ(tep, mp); 1979 } 1980 1981 tl_serializer_exit(tep); 1982 tl_refrele(tep); 1983 } 1984 1985 /* 1986 * Write side service routine. 1987 * 1988 * All actual processing happens within serializer which is entered 1989 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1990 * messages that need processing may have arrived, so tl_wsrv repeats until 1991 * queue is empty or te_nowsrv is set. 1992 */ 1993 static int 1994 tl_wsrv(queue_t *wq) 1995 { 1996 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1997 1998 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 1999 mutex_enter(&tep->te_srv_lock); 2000 ASSERT(tep->te_wsrv_active == B_FALSE); 2001 tep->te_wsrv_active = B_TRUE; 2002 mutex_exit(&tep->te_srv_lock); 2003 2004 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2005 2006 /* 2007 * Wait for serializer job to complete. 2008 */ 2009 mutex_enter(&tep->te_srv_lock); 2010 while (tep->te_wsrv_active) { 2011 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2012 } 2013 cv_signal(&tep->te_srv_cv); 2014 mutex_exit(&tep->te_srv_lock); 2015 } 2016 return (0); 2017 } 2018 2019 /* 2020 * Serialized write side processing of the STREAMS queue. 2021 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2022 * is NULL. 2023 */ 2024 static void 2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2026 { 2027 mblk_t *mp; 2028 queue_t *wq = tep->te_wq; 2029 2030 ASSERT(wq != NULL); 2031 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2032 tl_wput_common_ser(mp, tep); 2033 } 2034 2035 /* 2036 * Wakeup service routine unless called from close. 2037 * If ser_mp is specified, the caller is tl_wsrv(). 2038 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2039 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2040 * be no matching tl_serializer_exit() in this case. 2041 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2042 * waiting on te_srv_cv. 2043 */ 2044 if (ser_mp != NULL) { 2045 /* 2046 * We are called from tl_wsrv. 2047 */ 2048 mutex_enter(&tep->te_srv_lock); 2049 ASSERT(tep->te_wsrv_active); 2050 tep->te_wsrv_active = B_FALSE; 2051 cv_signal(&tep->te_srv_cv); 2052 mutex_exit(&tep->te_srv_lock); 2053 tl_serializer_exit(tep); 2054 } 2055 } 2056 2057 /* 2058 * Called when the stream is backenabled. Enter serializer and qenable everyone 2059 * flow controlled by tep. 2060 * 2061 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2062 * is possible that two instances of tl_rsrv will be running reusing the same 2063 * rsrv mblk. 2064 */ 2065 static int 2066 tl_rsrv(queue_t *rq) 2067 { 2068 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2069 2070 ASSERT(rq->q_first == NULL); 2071 ASSERT(tep->te_rsrv_active == 0); 2072 2073 tep->te_rsrv_active = B_TRUE; 2074 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2075 /* 2076 * Wait for serializer job to complete. 2077 */ 2078 mutex_enter(&tep->te_srv_lock); 2079 while (tep->te_rsrv_active) { 2080 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2081 } 2082 cv_signal(&tep->te_srv_cv); 2083 mutex_exit(&tep->te_srv_lock); 2084 return (0); 2085 } 2086 2087 /* ARGSUSED */ 2088 static void 2089 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2090 { 2091 tl_endpt_t *peer_tep; 2092 2093 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2094 tl_cl_backenable(tep); 2095 } else if ( 2096 IS_COTS(tep) && 2097 ((peer_tep = tep->te_conp) != NULL) && 2098 !peer_tep->te_closing && 2099 ((tep->te_state == TS_DATA_XFER) || 2100 (tep->te_state == TS_WIND_ORDREL)|| 2101 (tep->te_state == TS_WREQ_ORDREL))) { 2102 TL_QENABLE(peer_tep); 2103 } 2104 2105 /* 2106 * Wakeup read side service routine. 2107 */ 2108 mutex_enter(&tep->te_srv_lock); 2109 ASSERT(tep->te_rsrv_active); 2110 tep->te_rsrv_active = B_FALSE; 2111 cv_signal(&tep->te_srv_cv); 2112 mutex_exit(&tep->te_srv_lock); 2113 tl_serializer_exit(tep); 2114 } 2115 2116 /* 2117 * process M_PROTO messages. Always called from serializer. 2118 */ 2119 static void 2120 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2121 { 2122 ssize_t msz = MBLKL(mp); 2123 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2124 2125 /* Message size was validated by tl_wput(). */ 2126 ASSERT(msz >= sizeof (prim->type)); 2127 2128 switch (prim->type) { 2129 case T_UNBIND_REQ: 2130 tl_unbind(mp, tep); 2131 break; 2132 2133 case T_ADDR_REQ: 2134 tl_addr_req(mp, tep); 2135 break; 2136 2137 case O_T_CONN_RES: 2138 case T_CONN_RES: 2139 if (IS_CLTS(tep)) { 2140 tl_merror(tep->te_wq, mp, EPROTO); 2141 break; 2142 } 2143 tl_conn_res(mp, tep); 2144 break; 2145 2146 case T_DISCON_REQ: 2147 if (IS_CLTS(tep)) { 2148 tl_merror(tep->te_wq, mp, EPROTO); 2149 break; 2150 } 2151 tl_discon_req(mp, tep); 2152 break; 2153 2154 case T_DATA_REQ: 2155 if (IS_CLTS(tep)) { 2156 tl_merror(tep->te_wq, mp, EPROTO); 2157 break; 2158 } 2159 tl_data(mp, tep); 2160 break; 2161 2162 case T_OPTDATA_REQ: 2163 if (IS_CLTS(tep)) { 2164 tl_merror(tep->te_wq, mp, EPROTO); 2165 break; 2166 } 2167 tl_data(mp, tep); 2168 break; 2169 2170 case T_EXDATA_REQ: 2171 if (IS_CLTS(tep)) { 2172 tl_merror(tep->te_wq, mp, EPROTO); 2173 break; 2174 } 2175 tl_exdata(mp, tep); 2176 break; 2177 2178 case T_ORDREL_REQ: 2179 if (!IS_COTSORD(tep)) { 2180 tl_merror(tep->te_wq, mp, EPROTO); 2181 break; 2182 } 2183 tl_ordrel(mp, tep); 2184 break; 2185 2186 case T_UNITDATA_REQ: 2187 if (IS_COTS(tep)) { 2188 tl_merror(tep->te_wq, mp, EPROTO); 2189 break; 2190 } 2191 tl_unitdata(mp, tep); 2192 break; 2193 2194 default: 2195 tl_merror(tep->te_wq, mp, EPROTO); 2196 break; 2197 } 2198 } 2199 2200 /* 2201 * Process ioctl from serializer. 2202 * This is a wrapper around tl_do_ioctl(). 2203 */ 2204 static void 2205 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2206 { 2207 if (!tep->te_closing) 2208 tl_do_ioctl(mp, tep); 2209 else 2210 freemsg(mp); 2211 2212 tl_serializer_exit(tep); 2213 tl_refrele(tep); 2214 } 2215 2216 static void 2217 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2218 { 2219 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2220 int cmd = iocbp->ioc_cmd; 2221 queue_t *wq = tep->te_wq; 2222 int error; 2223 int thisopt, otheropt; 2224 2225 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2226 2227 switch (cmd) { 2228 case TL_IOC_CREDOPT: 2229 if (cmd == TL_IOC_CREDOPT) { 2230 thisopt = TL_SETCRED; 2231 otheropt = TL_SETUCRED; 2232 } else { 2233 /* FALLTHROUGH */ 2234 case TL_IOC_UCREDOPT: 2235 thisopt = TL_SETUCRED; 2236 otheropt = TL_SETCRED; 2237 } 2238 /* 2239 * The credentials passing does not apply to sockets. 2240 * Only one of the cred options can be set at a given time. 2241 */ 2242 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2243 miocnak(wq, mp, 0, EINVAL); 2244 return; 2245 } 2246 2247 /* 2248 * Turn on generation of credential options for 2249 * T_conn_req, T_conn_con, T_unidata_ind. 2250 */ 2251 error = miocpullup(mp, sizeof (uint32_t)); 2252 if (error != 0) { 2253 miocnak(wq, mp, 0, error); 2254 return; 2255 } 2256 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2257 miocnak(wq, mp, 0, EINVAL); 2258 return; 2259 } 2260 2261 if (*(uint32_t *)mp->b_cont->b_rptr) 2262 tep->te_flag |= thisopt; 2263 else 2264 tep->te_flag &= ~thisopt; 2265 2266 miocack(wq, mp, 0, 0); 2267 break; 2268 2269 default: 2270 /* Should not be here */ 2271 miocnak(wq, mp, 0, EINVAL); 2272 break; 2273 } 2274 } 2275 2276 2277 /* 2278 * send T_ERROR_ACK 2279 * Note: assumes enough memory or caller passed big enough mp 2280 * - no recovery from allocb failures 2281 */ 2282 2283 static void 2284 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2285 t_scalar_t unix_err, t_scalar_t type) 2286 { 2287 struct T_error_ack *err_ack; 2288 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2289 M_PCPROTO, T_ERROR_ACK); 2290 2291 if (ackmp == NULL) { 2292 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR, 2293 "tl_error_ack:out of mblk memory")); 2294 tl_merror(wq, NULL, ENOSR); 2295 return; 2296 } 2297 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2298 err_ack->ERROR_prim = type; 2299 err_ack->TLI_error = tli_err; 2300 err_ack->UNIX_error = unix_err; 2301 2302 /* 2303 * send error ack message 2304 */ 2305 qreply(wq, ackmp); 2306 } 2307 2308 2309 2310 /* 2311 * send T_OK_ACK 2312 * Note: assumes enough memory or caller passed big enough mp 2313 * - no recovery from allocb failures 2314 */ 2315 static void 2316 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2317 { 2318 struct T_ok_ack *ok_ack; 2319 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2320 M_PCPROTO, T_OK_ACK); 2321 2322 if (ackmp == NULL) { 2323 tl_merror(wq, NULL, ENOMEM); 2324 return; 2325 } 2326 2327 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2328 ok_ack->CORRECT_prim = type; 2329 2330 (void) qreply(wq, ackmp); 2331 } 2332 2333 /* 2334 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2335 * This is a wrapper around tl_bind(). 2336 */ 2337 static void 2338 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2339 { 2340 if (!tep->te_closing) 2341 tl_bind(mp, tep); 2342 else 2343 freemsg(mp); 2344 2345 tl_serializer_exit(tep); 2346 tl_refrele(tep); 2347 } 2348 2349 /* 2350 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2351 * Assumes that the endpoint is in the unbound. 2352 */ 2353 static void 2354 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2355 { 2356 queue_t *wq = tep->te_wq; 2357 struct T_bind_ack *b_ack; 2358 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2359 mblk_t *ackmp, *bamp; 2360 soux_addr_t ux_addr; 2361 t_uscalar_t qlen = 0; 2362 t_scalar_t alen, aoff; 2363 tl_addr_t addr_req; 2364 void *addr_startp; 2365 ssize_t msz = MBLKL(mp), basize; 2366 t_scalar_t tli_err = 0, unix_err = 0; 2367 t_scalar_t save_prim_type = bind->PRIM_type; 2368 t_scalar_t save_state = tep->te_state; 2369 2370 if (tep->te_state != TS_UNBND) { 2371 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2372 SL_TRACE | SL_ERROR, 2373 "tl_wput:bind_request:out of state, state=%d", 2374 tep->te_state)); 2375 tli_err = TOUTSTATE; 2376 goto error; 2377 } 2378 2379 if (msz < sizeof (struct T_bind_req)) { 2380 tli_err = TSYSERR; 2381 unix_err = EINVAL; 2382 goto error; 2383 } 2384 2385 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2386 2387 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2388 (bind->PRIM_type == T_BIND_REQ)); 2389 2390 alen = bind->ADDR_length; 2391 aoff = bind->ADDR_offset; 2392 2393 /* negotiate max conn req pending */ 2394 if (IS_COTS(tep)) { 2395 qlen = bind->CONIND_number; 2396 if (qlen > tl_maxqlen) 2397 qlen = tl_maxqlen; 2398 } 2399 2400 /* 2401 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2402 * and bound again. 2403 */ 2404 if ((tep->te_hash_hndl == NULL) && 2405 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2406 mod_hash_reserve_nosleep(tep->te_addrhash, 2407 &tep->te_hash_hndl) != 0) { 2408 tli_err = TSYSERR; 2409 unix_err = ENOSR; 2410 goto error; 2411 } 2412 2413 /* 2414 * Verify address correctness. 2415 */ 2416 if (IS_SOCKET(tep)) { 2417 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2418 2419 if ((alen != TL_SOUX_ADDRLEN) || 2420 (aoff < 0) || 2421 (aoff + alen > msz)) { 2422 (void) (STRLOG(TL_ID, tep->te_minor, 2423 1, SL_TRACE | SL_ERROR, 2424 "tl_bind: invalid socket addr")); 2425 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2426 tli_err = TSYSERR; 2427 unix_err = EINVAL; 2428 goto error; 2429 } 2430 /* Copy address from message to local buffer. */ 2431 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2432 /* 2433 * Check that we got correct address from sockets 2434 */ 2435 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2436 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2437 (void) (STRLOG(TL_ID, tep->te_minor, 2438 1, SL_TRACE | SL_ERROR, 2439 "tl_bind: invalid socket magic")); 2440 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2441 tli_err = TSYSERR; 2442 unix_err = EINVAL; 2443 goto error; 2444 } 2445 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2446 (ux_addr.soua_vp != NULL)) { 2447 (void) (STRLOG(TL_ID, tep->te_minor, 2448 1, SL_TRACE | SL_ERROR, 2449 "tl_bind: implicit addr non-empty")); 2450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2451 tli_err = TSYSERR; 2452 unix_err = EINVAL; 2453 goto error; 2454 } 2455 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2456 (ux_addr.soua_vp == NULL)) { 2457 (void) (STRLOG(TL_ID, tep->te_minor, 2458 1, SL_TRACE | SL_ERROR, 2459 "tl_bind: explicit addr empty")); 2460 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2461 tli_err = TSYSERR; 2462 unix_err = EINVAL; 2463 goto error; 2464 } 2465 } else { 2466 if ((alen > 0) && ((aoff < 0) || 2467 ((ssize_t)(aoff + alen) > msz) || 2468 ((aoff + alen) < 0))) { 2469 (void) (STRLOG(TL_ID, tep->te_minor, 2470 1, SL_TRACE | SL_ERROR, 2471 "tl_bind: invalid message")); 2472 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2473 tli_err = TSYSERR; 2474 unix_err = EINVAL; 2475 goto error; 2476 } 2477 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2478 (void) (STRLOG(TL_ID, tep->te_minor, 2479 1, SL_TRACE | SL_ERROR, 2480 "tl_bind: bad addr in message")); 2481 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2482 tli_err = TBADADDR; 2483 goto error; 2484 } 2485 #ifdef DEBUG 2486 /* 2487 * Mild form of ASSERT()ion to detect broken TPI apps. 2488 * if (!assertion) 2489 * log warning; 2490 */ 2491 if (!((alen == 0 && aoff == 0) || 2492 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2493 (void) (STRLOG(TL_ID, tep->te_minor, 2494 3, SL_TRACE | SL_ERROR, 2495 "tl_bind: addr overlaps TPI message")); 2496 } 2497 #endif 2498 } 2499 2500 /* 2501 * Bind the address provided or allocate one if requested. 2502 * Allow rebinds with a new qlen value. 2503 */ 2504 if (IS_SOCKET(tep)) { 2505 /* 2506 * For anonymous requests the te_ap is already set up properly 2507 * so use minor number as an address. 2508 * For explicit requests need to check whether the address is 2509 * already in use. 2510 */ 2511 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2512 int rc; 2513 2514 if (tep->te_flag & TL_ADDRHASHED) { 2515 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2516 if (tep->te_vp == ux_addr.soua_vp) 2517 goto skip_addr_bind; 2518 else /* Rebind to a new address. */ 2519 tl_addr_unbind(tep); 2520 } 2521 /* 2522 * Insert address in the hash if it is not already 2523 * there. Since we use preallocated handle, the insert 2524 * can fail only if the key is already present. 2525 */ 2526 rc = mod_hash_insert_reserve(tep->te_addrhash, 2527 (mod_hash_key_t)ux_addr.soua_vp, 2528 (mod_hash_val_t)tep, tep->te_hash_hndl); 2529 2530 if (rc != 0) { 2531 ASSERT(rc == MH_ERR_DUPLICATE); 2532 /* 2533 * Violate O_T_BIND_REQ semantics and fail with 2534 * TADDRBUSY - sockets will not use any address 2535 * other than supplied one for explicit binds. 2536 */ 2537 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2538 SL_TRACE | SL_ERROR, 2539 "tl_bind:requested addr %p is busy", 2540 ux_addr.soua_vp)); 2541 tli_err = TADDRBUSY; 2542 unix_err = 0; 2543 goto error; 2544 } 2545 tep->te_uxaddr = ux_addr; 2546 tep->te_flag |= TL_ADDRHASHED; 2547 tep->te_hash_hndl = NULL; 2548 } 2549 } else if (alen == 0) { 2550 /* 2551 * assign any free address 2552 */ 2553 if (!tl_get_any_addr(tep, NULL)) { 2554 (void) (STRLOG(TL_ID, tep->te_minor, 2555 1, SL_TRACE | SL_ERROR, 2556 "tl_bind:failed to get buffer for any " 2557 "address")); 2558 tli_err = TSYSERR; 2559 unix_err = ENOSR; 2560 goto error; 2561 } 2562 } else { 2563 addr_req.ta_alen = alen; 2564 addr_req.ta_abuf = (mp->b_rptr + aoff); 2565 addr_req.ta_zoneid = tep->te_zoneid; 2566 2567 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2568 if (tep->te_abuf == NULL) { 2569 tli_err = TSYSERR; 2570 unix_err = ENOSR; 2571 goto error; 2572 } 2573 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2574 tep->te_alen = alen; 2575 2576 if (mod_hash_insert_reserve(tep->te_addrhash, 2577 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2578 tep->te_hash_hndl) != 0) { 2579 if (save_prim_type == T_BIND_REQ) { 2580 /* 2581 * The bind semantics for this primitive 2582 * require a failure if the exact address 2583 * requested is busy 2584 */ 2585 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2586 SL_TRACE | SL_ERROR, 2587 "tl_bind:requested addr is busy")); 2588 tli_err = TADDRBUSY; 2589 unix_err = 0; 2590 goto error; 2591 } 2592 2593 /* 2594 * O_T_BIND_REQ semantics say if address if requested 2595 * address is busy, bind to any available free address 2596 */ 2597 if (!tl_get_any_addr(tep, &addr_req)) { 2598 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2599 SL_TRACE | SL_ERROR, 2600 "tl_bind:unable to get any addr buf")); 2601 tli_err = TSYSERR; 2602 unix_err = ENOMEM; 2603 goto error; 2604 } 2605 } else { 2606 tep->te_flag |= TL_ADDRHASHED; 2607 tep->te_hash_hndl = NULL; 2608 } 2609 } 2610 2611 ASSERT(tep->te_alen >= 0); 2612 2613 skip_addr_bind: 2614 /* 2615 * prepare T_BIND_ACK TPI message 2616 */ 2617 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2618 bamp = reallocb(mp, basize, 0); 2619 if (bamp == NULL) { 2620 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 2621 "tl_wput:tl_bind: allocb failed")); 2622 /* 2623 * roll back state changes 2624 */ 2625 tl_addr_unbind(tep); 2626 tep->te_state = TS_UNBND; 2627 tl_memrecover(wq, mp, basize); 2628 return; 2629 } 2630 2631 DB_TYPE(bamp) = M_PCPROTO; 2632 bamp->b_wptr = bamp->b_rptr + basize; 2633 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2634 b_ack->PRIM_type = T_BIND_ACK; 2635 b_ack->CONIND_number = qlen; 2636 b_ack->ADDR_length = tep->te_alen; 2637 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2638 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2639 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2640 2641 if (IS_COTS(tep)) { 2642 tep->te_qlen = qlen; 2643 if (qlen > 0) 2644 tep->te_flag |= TL_LISTENER; 2645 } 2646 2647 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2648 /* 2649 * send T_BIND_ACK message 2650 */ 2651 (void) qreply(wq, bamp); 2652 return; 2653 2654 error: 2655 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2656 if (ackmp == NULL) { 2657 /* 2658 * roll back state changes 2659 */ 2660 tep->te_state = save_state; 2661 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2662 return; 2663 } 2664 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2665 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2666 } 2667 2668 /* 2669 * Process T_UNBIND_REQ. 2670 * Called from serializer. 2671 */ 2672 static void 2673 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2674 { 2675 queue_t *wq; 2676 mblk_t *ackmp; 2677 2678 if (tep->te_closing) { 2679 freemsg(mp); 2680 return; 2681 } 2682 2683 wq = tep->te_wq; 2684 2685 /* 2686 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2687 * ==> allocate for T_ERROR_ACK (known max) 2688 */ 2689 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2690 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2691 return; 2692 } 2693 /* 2694 * memory resources committed 2695 * Note: no message validation. T_UNBIND_REQ message is 2696 * same size as PRIM_type field so already verified earlier. 2697 */ 2698 2699 /* 2700 * validate state 2701 */ 2702 if (tep->te_state != TS_IDLE) { 2703 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2704 SL_TRACE | SL_ERROR, 2705 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2706 tep->te_state)); 2707 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2708 return; 2709 } 2710 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2711 2712 /* 2713 * TPI says on T_UNBIND_REQ: 2714 * send up a M_FLUSH to flush both 2715 * read and write queues 2716 */ 2717 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2718 2719 if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2720 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2721 2722 /* 2723 * Sockets use bind with qlen==0 followed by bind() to 2724 * the same address with qlen > 0 for listeners. 2725 * We allow rebind with a new qlen value. 2726 */ 2727 tl_addr_unbind(tep); 2728 } 2729 2730 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2731 /* 2732 * send T_OK_ACK 2733 */ 2734 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2735 } 2736 2737 2738 /* 2739 * Option management code from drv/ip is used here 2740 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2741 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2742 * However, that is what we want as that option is 'unorthodox' 2743 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2744 * and not in T_SVR4_OPTMGMT_REQ/ACK 2745 * Note2: use of optcom_req means this routine is an exception to 2746 * recovery from allocb() failures. 2747 */ 2748 2749 static void 2750 tl_optmgmt(queue_t *wq, mblk_t *mp) 2751 { 2752 tl_endpt_t *tep; 2753 mblk_t *ackmp; 2754 union T_primitives *prim; 2755 cred_t *cr; 2756 2757 tep = (tl_endpt_t *)wq->q_ptr; 2758 prim = (union T_primitives *)mp->b_rptr; 2759 2760 /* 2761 * All Solaris components should pass a db_credp 2762 * for this TPI message, hence we ASSERT. 2763 * But in case there is some other M_PROTO that looks 2764 * like a TPI message sent by some other kernel 2765 * component, we check and return an error. 2766 */ 2767 cr = msg_getcred(mp, NULL); 2768 ASSERT(cr != NULL); 2769 if (cr == NULL) { 2770 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2771 return; 2772 } 2773 2774 /* all states OK for AF_UNIX options ? */ 2775 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2776 prim->type == T_SVR4_OPTMGMT_REQ) { 2777 /* 2778 * Broken TLI semantics that options can only be managed 2779 * in TS_IDLE state. Needed for Sparc ABI test suite that 2780 * tests this TLI (mis)feature using this device driver. 2781 */ 2782 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2783 SL_TRACE | SL_ERROR, 2784 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2785 tep->te_state)); 2786 /* 2787 * preallocate memory for T_ERROR_ACK 2788 */ 2789 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2790 if (ackmp == NULL) { 2791 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2792 return; 2793 } 2794 2795 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2796 freemsg(mp); 2797 return; 2798 } 2799 2800 /* 2801 * call common option management routine from drv/ip 2802 */ 2803 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2804 svr4_optcom_req(wq, mp, cr, &tl_opt_obj); 2805 } else { 2806 ASSERT(prim->type == T_OPTMGMT_REQ); 2807 tpi_optcom_req(wq, mp, cr, &tl_opt_obj); 2808 } 2809 } 2810 2811 /* 2812 * Handle T_conn_req - the driver part of accept(). 2813 * If TL_SET[U]CRED generate the credentials options. 2814 * If this is a socket pass through options unmodified. 2815 * For sockets generate the T_CONN_CON here instead of 2816 * waiting for the T_CONN_RES. 2817 */ 2818 static void 2819 tl_conn_req(queue_t *wq, mblk_t *mp) 2820 { 2821 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2822 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2823 ssize_t msz = MBLKL(mp); 2824 t_scalar_t alen, aoff, olen, ooff, err = 0; 2825 tl_endpt_t *peer_tep = NULL; 2826 mblk_t *ackmp; 2827 mblk_t *dimp; 2828 struct T_discon_ind *di; 2829 soux_addr_t ux_addr; 2830 tl_addr_t dst; 2831 2832 ASSERT(IS_COTS(tep)); 2833 2834 if (tep->te_closing) { 2835 freemsg(mp); 2836 return; 2837 } 2838 2839 /* 2840 * preallocate memory for: 2841 * 1. max of T_ERROR_ACK and T_OK_ACK 2842 * ==> known max T_ERROR_ACK 2843 * 2. max of T_DISCON_IND and T_CONN_IND 2844 */ 2845 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2846 if (ackmp == NULL) { 2847 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2848 return; 2849 } 2850 /* 2851 * memory committed for T_OK_ACK/T_ERROR_ACK now 2852 * will be committed for T_DISCON_IND/T_CONN_IND later 2853 */ 2854 2855 if (tep->te_state != TS_IDLE) { 2856 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2857 SL_TRACE | SL_ERROR, 2858 "tl_wput:T_CONN_REQ:out of state, state=%d", 2859 tep->te_state)); 2860 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2861 freemsg(mp); 2862 return; 2863 } 2864 2865 /* 2866 * validate the message 2867 * Note: dereference fields in struct inside message only 2868 * after validating the message length. 2869 */ 2870 if (msz < sizeof (struct T_conn_req)) { 2871 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 2872 "tl_conn_req:invalid message length")); 2873 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2874 freemsg(mp); 2875 return; 2876 } 2877 alen = creq->DEST_length; 2878 aoff = creq->DEST_offset; 2879 olen = creq->OPT_length; 2880 ooff = creq->OPT_offset; 2881 if (olen == 0) 2882 ooff = 0; 2883 2884 if (IS_SOCKET(tep)) { 2885 if ((alen != TL_SOUX_ADDRLEN) || 2886 (aoff < 0) || 2887 (aoff + alen > msz) || 2888 (alen > msz - sizeof (struct T_conn_req))) { 2889 (void) (STRLOG(TL_ID, tep->te_minor, 2890 1, SL_TRACE | SL_ERROR, 2891 "tl_conn_req: invalid socket addr")); 2892 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2893 freemsg(mp); 2894 return; 2895 } 2896 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2897 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2898 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2899 (void) (STRLOG(TL_ID, tep->te_minor, 2900 1, SL_TRACE | SL_ERROR, 2901 "tl_conn_req: invalid socket magic")); 2902 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2903 freemsg(mp); 2904 return; 2905 } 2906 } else { 2907 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2908 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2909 ooff + olen < 0)) || 2910 olen < 0 || ooff < 0) { 2911 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2912 SL_TRACE | SL_ERROR, 2913 "tl_conn_req:invalid message")); 2914 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2915 freemsg(mp); 2916 return; 2917 } 2918 2919 if (alen <= 0 || aoff < 0 || 2920 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2921 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2922 SL_TRACE | SL_ERROR, 2923 "tl_conn_req:bad addr in message, " 2924 "alen=%d, msz=%ld", 2925 alen, msz)); 2926 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2927 freemsg(mp); 2928 return; 2929 } 2930 #ifdef DEBUG 2931 /* 2932 * Mild form of ASSERT()ion to detect broken TPI apps. 2933 * if (!assertion) 2934 * log warning; 2935 */ 2936 if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2937 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2938 SL_TRACE | SL_ERROR, 2939 "tl_conn_req: addr overlaps TPI message")); 2940 } 2941 #endif 2942 if (olen) { 2943 /* 2944 * no opts in connect req 2945 * supported in this provider except for sockets. 2946 */ 2947 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2948 SL_TRACE | SL_ERROR, 2949 "tl_conn_req:options not supported " 2950 "in message")); 2951 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2952 freemsg(mp); 2953 return; 2954 } 2955 } 2956 2957 /* 2958 * Prevent tep from closing on us. 2959 */ 2960 if (!tl_noclose(tep)) { 2961 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 2962 "tl_conn_req:endpoint is closing")); 2963 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2964 freemsg(mp); 2965 return; 2966 } 2967 2968 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2969 /* 2970 * get endpoint to connect to 2971 * check that peer with DEST addr is bound to addr 2972 * and has CONIND_number > 0 2973 */ 2974 dst.ta_alen = alen; 2975 dst.ta_abuf = mp->b_rptr + aoff; 2976 dst.ta_zoneid = tep->te_zoneid; 2977 2978 /* 2979 * Verify if remote addr is in use 2980 */ 2981 peer_tep = (IS_SOCKET(tep) ? 2982 tl_sock_find_peer(tep, &ux_addr) : 2983 tl_find_peer(tep, &dst)); 2984 2985 if (peer_tep == NULL) { 2986 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 2987 "tl_conn_req:no one at connect address")); 2988 err = ECONNREFUSED; 2989 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2990 /* 2991 * validate that number of incoming connection is 2992 * not to capacity on destination endpoint 2993 */ 2994 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2995 "tl_conn_req: qlen overflow connection refused")); 2996 err = ECONNREFUSED; 2997 } 2998 2999 /* 3000 * Send T_DISCON_IND in case of error 3001 */ 3002 if (err != 0) { 3003 if (peer_tep != NULL) 3004 tl_refrele(peer_tep); 3005 /* We are still expected to send T_OK_ACK */ 3006 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3007 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 3008 tl_closeok(tep); 3009 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 3010 M_PROTO, T_DISCON_IND); 3011 if (dimp == NULL) { 3012 tl_merror(wq, NULL, ENOSR); 3013 return; 3014 } 3015 di = (struct T_discon_ind *)dimp->b_rptr; 3016 di->DISCON_reason = err; 3017 di->SEQ_number = BADSEQNUM; 3018 3019 tep->te_state = TS_IDLE; 3020 /* 3021 * send T_DISCON_IND message 3022 */ 3023 putnext(tep->te_rq, dimp); 3024 return; 3025 } 3026 3027 ASSERT(IS_COTS(peer_tep)); 3028 3029 /* 3030 * Found the listener. At this point processing will continue on 3031 * listener serializer. Close of the endpoint should be blocked while we 3032 * switch serializers. 3033 */ 3034 tl_serializer_refhold(peer_tep->te_ser); 3035 tl_serializer_refrele(tep->te_ser); 3036 tep->te_ser = peer_tep->te_ser; 3037 ASSERT(tep->te_oconp == NULL); 3038 tep->te_oconp = peer_tep; 3039 3040 /* 3041 * It is safe to close now. Close may continue on listener serializer. 3042 */ 3043 tl_closeok(tep); 3044 3045 /* 3046 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3047 * data, so we link mp to ackmp. 3048 */ 3049 ackmp->b_cont = mp; 3050 mp = ackmp; 3051 3052 tl_refhold(tep); 3053 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3054 } 3055 3056 /* 3057 * Finish T_CONN_REQ processing on listener serializer. 3058 */ 3059 static void 3060 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3061 { 3062 queue_t *wq; 3063 tl_endpt_t *peer_tep = tep->te_oconp; 3064 mblk_t *confmp, *cimp, *indmp; 3065 void *opts = NULL; 3066 mblk_t *ackmp = mp; 3067 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3068 struct T_conn_ind *ci; 3069 tl_icon_t *tip; 3070 void *addr_startp; 3071 t_scalar_t olen = creq->OPT_length; 3072 t_scalar_t ooff = creq->OPT_offset; 3073 size_t ci_msz; 3074 size_t size; 3075 cred_t *cr = NULL; 3076 pid_t cpid; 3077 3078 if (tep->te_closing) { 3079 TL_UNCONNECT(tep->te_oconp); 3080 tl_serializer_exit(tep); 3081 tl_refrele(tep); 3082 freemsg(mp); 3083 return; 3084 } 3085 3086 wq = tep->te_wq; 3087 tep->te_flag |= TL_EAGER; 3088 3089 /* 3090 * Extract preallocated ackmp from mp. 3091 */ 3092 mp = mp->b_cont; 3093 ackmp->b_cont = NULL; 3094 3095 if (olen == 0) 3096 ooff = 0; 3097 3098 if (peer_tep->te_closing || 3099 !((peer_tep->te_state == TS_IDLE) || 3100 (peer_tep->te_state == TS_WRES_CIND))) { 3101 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3102 "tl_conn_req:peer in bad state (%d)", 3103 peer_tep->te_state)); 3104 TL_UNCONNECT(tep->te_oconp); 3105 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3106 freemsg(ackmp); 3107 tl_serializer_exit(tep); 3108 tl_refrele(tep); 3109 return; 3110 } 3111 3112 /* 3113 * preallocate now for T_DISCON_IND or T_CONN_IND 3114 */ 3115 /* 3116 * calculate length of T_CONN_IND message 3117 */ 3118 if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) { 3119 cr = msg_getcred(mp, &cpid); 3120 ASSERT(cr != NULL); 3121 if (peer_tep->te_flag & TL_SETCRED) { 3122 ooff = 0; 3123 olen = (t_scalar_t) sizeof (struct opthdr) + 3124 OPTLEN(sizeof (tl_credopt_t)); 3125 /* 1 option only */ 3126 } else { 3127 ooff = 0; 3128 olen = (t_scalar_t)sizeof (struct opthdr) + 3129 OPTLEN(ucredminsize(cr)); 3130 /* 1 option only */ 3131 } 3132 } 3133 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3134 ci_msz = T_ALIGN(ci_msz) + olen; 3135 size = max(ci_msz, sizeof (struct T_discon_ind)); 3136 3137 /* 3138 * Save options from mp - we'll need them for T_CONN_IND. 3139 */ 3140 if (ooff != 0) { 3141 opts = kmem_alloc(olen, KM_NOSLEEP); 3142 if (opts == NULL) { 3143 /* 3144 * roll back state changes 3145 */ 3146 tep->te_state = TS_IDLE; 3147 tl_memrecover(wq, mp, size); 3148 freemsg(ackmp); 3149 TL_UNCONNECT(tep->te_oconp); 3150 tl_serializer_exit(tep); 3151 tl_refrele(tep); 3152 return; 3153 } 3154 /* Copy options to a temp buffer */ 3155 bcopy(mp->b_rptr + ooff, opts, olen); 3156 } 3157 3158 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3159 /* 3160 * Generate a T_CONN_CON that has the identical address 3161 * (and options) as the T_CONN_REQ. 3162 * NOTE: assumes that the T_conn_req and T_conn_con structures 3163 * are isomorphic. 3164 */ 3165 confmp = copyb(mp); 3166 if (confmp == NULL) { 3167 /* 3168 * roll back state changes 3169 */ 3170 tep->te_state = TS_IDLE; 3171 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3172 freemsg(ackmp); 3173 if (opts != NULL) 3174 kmem_free(opts, olen); 3175 TL_UNCONNECT(tep->te_oconp); 3176 tl_serializer_exit(tep); 3177 tl_refrele(tep); 3178 return; 3179 } 3180 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3181 T_CONN_CON; 3182 } else { 3183 confmp = NULL; 3184 } 3185 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3186 /* 3187 * roll back state changes 3188 */ 3189 tep->te_state = TS_IDLE; 3190 tl_memrecover(wq, mp, size); 3191 freemsg(ackmp); 3192 if (opts != NULL) 3193 kmem_free(opts, olen); 3194 freemsg(confmp); 3195 TL_UNCONNECT(tep->te_oconp); 3196 tl_serializer_exit(tep); 3197 tl_refrele(tep); 3198 return; 3199 } 3200 3201 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3202 if (tip == NULL) { 3203 /* 3204 * roll back state changes 3205 */ 3206 tep->te_state = TS_IDLE; 3207 tl_memrecover(wq, indmp, sizeof (*tip)); 3208 freemsg(ackmp); 3209 if (opts != NULL) 3210 kmem_free(opts, olen); 3211 freemsg(confmp); 3212 TL_UNCONNECT(tep->te_oconp); 3213 tl_serializer_exit(tep); 3214 tl_refrele(tep); 3215 return; 3216 } 3217 tip->ti_mp = NULL; 3218 3219 /* 3220 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3221 * and tl_icon_t cell. 3222 */ 3223 3224 /* 3225 * ack validity of request and send the peer credential in the ACK. 3226 */ 3227 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3228 3229 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3230 confmp != NULL) { 3231 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3232 } 3233 3234 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3235 3236 /* 3237 * prepare message to send T_CONN_IND 3238 */ 3239 /* 3240 * allocate the message - original data blocks retained 3241 * in the returned mblk 3242 */ 3243 cimp = tl_resizemp(indmp, size); 3244 if (cimp == NULL) { 3245 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 3246 "tl_conn_req:con_ind:allocb failure")); 3247 tl_merror(wq, indmp, ENOMEM); 3248 TL_UNCONNECT(tep->te_oconp); 3249 tl_serializer_exit(tep); 3250 tl_refrele(tep); 3251 if (opts != NULL) 3252 kmem_free(opts, olen); 3253 freemsg(confmp); 3254 ASSERT(tip->ti_mp == NULL); 3255 kmem_free(tip, sizeof (*tip)); 3256 return; 3257 } 3258 3259 DB_TYPE(cimp) = M_PROTO; 3260 ci = (struct T_conn_ind *)cimp->b_rptr; 3261 ci->PRIM_type = T_CONN_IND; 3262 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3263 ci->SRC_length = tep->te_alen; 3264 ci->SEQ_number = tep->te_seqno; 3265 3266 addr_startp = cimp->b_rptr + ci->SRC_offset; 3267 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3268 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3269 3270 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3271 ci->SRC_length); 3272 ci->OPT_length = olen; /* because only 1 option */ 3273 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3274 cr, cpid, 3275 peer_tep->te_flag, peer_tep->te_credp); 3276 } else if (ooff != 0) { 3277 /* Copy option from T_CONN_REQ */ 3278 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3279 ci->SRC_length); 3280 ci->OPT_length = olen; 3281 ASSERT(opts != NULL); 3282 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3283 } else { 3284 ci->OPT_offset = 0; 3285 ci->OPT_length = 0; 3286 } 3287 if (opts != NULL) 3288 kmem_free(opts, olen); 3289 3290 /* 3291 * register connection request with server peer 3292 * append to list of incoming connections 3293 * increment references for both peer_tep and tep: peer_tep is placed on 3294 * te_oconp and tep is placed on listeners queue. 3295 */ 3296 tip->ti_tep = tep; 3297 tip->ti_seqno = tep->te_seqno; 3298 list_insert_tail(&peer_tep->te_iconp, tip); 3299 peer_tep->te_nicon++; 3300 3301 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3302 /* 3303 * send the T_CONN_IND message 3304 */ 3305 putnext(peer_tep->te_rq, cimp); 3306 3307 /* 3308 * Send a T_CONN_CON message for sockets. 3309 * Disable the queues until we have reached the correct state! 3310 */ 3311 if (confmp != NULL) { 3312 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3313 noenable(wq); 3314 putnext(tep->te_rq, confmp); 3315 } 3316 /* 3317 * Now we need to increment tep reference because tep is referenced by 3318 * server list of pending connections. We also need to decrement 3319 * reference before exiting serializer. Two operations void each other 3320 * so we don't modify reference at all. 3321 */ 3322 ASSERT(tep->te_refcnt >= 2); 3323 ASSERT(peer_tep->te_refcnt >= 2); 3324 tl_serializer_exit(tep); 3325 } 3326 3327 3328 3329 /* 3330 * Handle T_conn_res on listener stream. Called on listener serializer. 3331 * tl_conn_req has already generated the T_CONN_CON. 3332 * tl_conn_res is called on listener serializer. 3333 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3334 * Switch eager serializer to acceptor's. 3335 * 3336 * If TL_SET[U]CRED generate the credentials options. 3337 * For sockets tl_conn_req has already generated the T_CONN_CON. 3338 */ 3339 static void 3340 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3341 { 3342 queue_t *wq; 3343 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3344 ssize_t msz = MBLKL(mp); 3345 t_scalar_t olen, ooff, err = 0; 3346 t_scalar_t prim = cres->PRIM_type; 3347 uchar_t *addr_startp; 3348 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3349 tl_icon_t *tip; 3350 size_t size; 3351 mblk_t *ackmp, *respmp; 3352 mblk_t *dimp, *ccmp = NULL; 3353 struct T_discon_ind *di; 3354 struct T_conn_con *cc; 3355 boolean_t client_noclose_set = B_FALSE; 3356 boolean_t switch_client_serializer = B_TRUE; 3357 3358 ASSERT(IS_COTS(tep)); 3359 3360 if (tep->te_closing) { 3361 freemsg(mp); 3362 return; 3363 } 3364 3365 wq = tep->te_wq; 3366 3367 /* 3368 * preallocate memory for: 3369 * 1. max of T_ERROR_ACK and T_OK_ACK 3370 * ==> known max T_ERROR_ACK 3371 * 2. max of T_DISCON_IND and T_CONN_CON 3372 */ 3373 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3374 if (ackmp == NULL) { 3375 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3376 return; 3377 } 3378 /* 3379 * memory committed for T_OK_ACK/T_ERROR_ACK now 3380 * will be committed for T_DISCON_IND/T_CONN_CON later 3381 */ 3382 3383 3384 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3385 3386 /* 3387 * validate state 3388 */ 3389 if (tep->te_state != TS_WRES_CIND) { 3390 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3391 SL_TRACE | SL_ERROR, 3392 "tl_wput:T_CONN_RES:out of state, state=%d", 3393 tep->te_state)); 3394 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3395 freemsg(mp); 3396 return; 3397 } 3398 3399 /* 3400 * validate the message 3401 * Note: dereference fields in struct inside message only 3402 * after validating the message length. 3403 */ 3404 if (msz < sizeof (struct T_conn_res)) { 3405 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 3406 "tl_conn_res:invalid message length")); 3407 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3408 freemsg(mp); 3409 return; 3410 } 3411 olen = cres->OPT_length; 3412 ooff = cres->OPT_offset; 3413 if (((olen > 0) && ((ooff + olen) > msz))) { 3414 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 3415 "tl_conn_res:invalid message")); 3416 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3417 freemsg(mp); 3418 return; 3419 } 3420 if (olen) { 3421 /* 3422 * no opts in connect res 3423 * supported in this provider 3424 */ 3425 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 3426 "tl_conn_res:options not supported in message")); 3427 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3428 freemsg(mp); 3429 return; 3430 } 3431 3432 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3433 ASSERT(tep->te_state == TS_WACK_CRES); 3434 3435 if (cres->SEQ_number < TL_MINOR_START && 3436 cres->SEQ_number >= BADSEQNUM) { 3437 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3438 "tl_conn_res:remote endpoint sequence number bad")); 3439 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3440 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3441 freemsg(mp); 3442 return; 3443 } 3444 3445 /* 3446 * find accepting endpoint. Will have extra reference if found. 3447 */ 3448 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3449 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3450 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3451 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3452 "tl_conn_res:bad accepting endpoint")); 3453 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3454 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3455 freemsg(mp); 3456 return; 3457 } 3458 3459 /* 3460 * Prevent acceptor from closing. 3461 */ 3462 if (!tl_noclose(acc_ep)) { 3463 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3464 "tl_conn_res:bad accepting endpoint")); 3465 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3466 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3467 tl_refrele(acc_ep); 3468 freemsg(mp); 3469 return; 3470 } 3471 3472 acc_ep->te_flag |= TL_ACCEPTOR; 3473 3474 /* 3475 * validate that accepting endpoint, if different from listening 3476 * has address bound => state is TS_IDLE 3477 * TROUBLE in XPG4 !!? 3478 */ 3479 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3480 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3481 "tl_conn_res:accepting endpoint has no address bound," 3482 "state=%d", acc_ep->te_state)); 3483 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3484 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3485 freemsg(mp); 3486 tl_closeok(acc_ep); 3487 tl_refrele(acc_ep); 3488 return; 3489 } 3490 3491 /* 3492 * validate if accepting endpt same as listening, then 3493 * no other incoming connection should be on the queue 3494 */ 3495 3496 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3497 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 3498 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3499 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3500 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3501 freemsg(mp); 3502 tl_closeok(acc_ep); 3503 tl_refrele(acc_ep); 3504 return; 3505 } 3506 3507 /* 3508 * Mark for deletion, the entry corresponding to client 3509 * on list of pending connections made by the listener 3510 * search list to see if client is one of the 3511 * recorded as a listener. 3512 */ 3513 tip = tl_icon_find(tep, cres->SEQ_number); 3514 if (tip == NULL) { 3515 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3516 "tl_conn_res:no client in listener list")); 3517 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3518 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3519 freemsg(mp); 3520 tl_closeok(acc_ep); 3521 tl_refrele(acc_ep); 3522 return; 3523 } 3524 3525 /* 3526 * If ti_tep is NULL the client has already closed. In this case 3527 * the code below will avoid any action on the client side 3528 * but complete the server and acceptor state transitions. 3529 */ 3530 ASSERT(tip->ti_tep == NULL || 3531 tip->ti_tep->te_seqno == cres->SEQ_number); 3532 cl_ep = tip->ti_tep; 3533 3534 /* 3535 * If the client is present it is switched from listener's to acceptor's 3536 * serializer. We should block client closes while serializers are 3537 * being switched. 3538 * 3539 * It is possible that the client is present but is currently being 3540 * closed. There are two possible cases: 3541 * 3542 * 1) The client has already entered tl_close_finish_ser() and sent 3543 * T_ORDREL_IND. In this case we can just ignore the client (but we 3544 * still need to send all messages from tip->ti_mp to the acceptor). 3545 * 3546 * 2) The client started the close but has not entered 3547 * tl_close_finish_ser() yet. In this case, the client is already 3548 * proceeding asynchronously on the listener's serializer, so we're 3549 * forced to change the acceptor to use the listener's serializer to 3550 * ensure that any operations on the acceptor are serialized with 3551 * respect to the close that's in-progress. 3552 */ 3553 if (cl_ep != NULL) { 3554 if (tl_noclose(cl_ep)) { 3555 client_noclose_set = B_TRUE; 3556 } else { 3557 /* 3558 * Client is closing. If it it has sent the 3559 * T_ORDREL_IND, we can simply ignore it - otherwise, 3560 * we have to let let the client continue until it is 3561 * sent. 3562 * 3563 * If we do continue using the client, acceptor will 3564 * switch to client's serializer which is used by client 3565 * for its close. 3566 */ 3567 tl_client_closing_when_accepting++; 3568 switch_client_serializer = B_FALSE; 3569 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3570 cl_ep->te_state == -1) 3571 cl_ep = NULL; 3572 } 3573 } 3574 3575 if (cl_ep != NULL) { 3576 /* 3577 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3578 * (latter for sockets only) 3579 */ 3580 if (cl_ep->te_state != TS_WCON_CREQ && 3581 (cl_ep->te_state != TS_DATA_XFER && 3582 IS_SOCKET(cl_ep))) { 3583 err = ECONNREFUSED; 3584 /* 3585 * T_DISCON_IND sent later after committing memory 3586 * and acking validity of request 3587 */ 3588 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3589 "tl_conn_res:peer in bad state")); 3590 } 3591 3592 /* 3593 * preallocate now for T_DISCON_IND or T_CONN_CONN 3594 * ack validity of request (T_OK_ACK) after memory committed 3595 */ 3596 3597 if (err) { 3598 size = sizeof (struct T_discon_ind); 3599 } else { 3600 /* 3601 * calculate length of T_CONN_CON message 3602 */ 3603 olen = 0; 3604 if (cl_ep->te_flag & TL_SETCRED) { 3605 olen = (t_scalar_t)sizeof (struct opthdr) + 3606 OPTLEN(sizeof (tl_credopt_t)); 3607 } else if (cl_ep->te_flag & TL_SETUCRED) { 3608 olen = (t_scalar_t)sizeof (struct opthdr) + 3609 OPTLEN(ucredminsize(acc_ep->te_credp)); 3610 } 3611 size = T_ALIGN(sizeof (struct T_conn_con) + 3612 acc_ep->te_alen) + olen; 3613 } 3614 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3615 /* 3616 * roll back state changes 3617 */ 3618 tep->te_state = TS_WRES_CIND; 3619 tl_memrecover(wq, mp, size); 3620 freemsg(ackmp); 3621 if (client_noclose_set) 3622 tl_closeok(cl_ep); 3623 tl_closeok(acc_ep); 3624 tl_refrele(acc_ep); 3625 return; 3626 } 3627 mp = NULL; 3628 } 3629 3630 /* 3631 * Now ack validity of request 3632 */ 3633 if (tep->te_nicon == 1) { 3634 if (tep == acc_ep) 3635 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3636 else 3637 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3638 } else { 3639 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3640 } 3641 3642 /* 3643 * send T_DISCON_IND now if client state validation failed earlier 3644 */ 3645 if (err) { 3646 tl_ok_ack(wq, ackmp, prim); 3647 /* 3648 * flush the queues - why always ? 3649 */ 3650 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3651 3652 dimp = tl_resizemp(respmp, size); 3653 if (dimp == NULL) { 3654 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3655 SL_TRACE | SL_ERROR, 3656 "tl_conn_res:con_ind:allocb failure")); 3657 tl_merror(wq, respmp, ENOMEM); 3658 tl_closeok(acc_ep); 3659 if (client_noclose_set) 3660 tl_closeok(cl_ep); 3661 tl_refrele(acc_ep); 3662 return; 3663 } 3664 if (dimp->b_cont) { 3665 /* no user data in provider generated discon ind */ 3666 freemsg(dimp->b_cont); 3667 dimp->b_cont = NULL; 3668 } 3669 3670 DB_TYPE(dimp) = M_PROTO; 3671 di = (struct T_discon_ind *)dimp->b_rptr; 3672 di->PRIM_type = T_DISCON_IND; 3673 di->DISCON_reason = err; 3674 di->SEQ_number = BADSEQNUM; 3675 3676 tep->te_state = TS_IDLE; 3677 /* 3678 * send T_DISCON_IND message 3679 */ 3680 putnext(acc_ep->te_rq, dimp); 3681 if (client_noclose_set) 3682 tl_closeok(cl_ep); 3683 tl_closeok(acc_ep); 3684 tl_refrele(acc_ep); 3685 return; 3686 } 3687 3688 /* 3689 * now start connecting the accepting endpoint 3690 */ 3691 if (tep != acc_ep) 3692 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3693 3694 if (cl_ep == NULL) { 3695 /* 3696 * The client has already closed. Send up any queued messages 3697 * and change the state accordingly. 3698 */ 3699 tl_ok_ack(wq, ackmp, prim); 3700 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3701 3702 /* 3703 * remove endpoint from incoming connection 3704 * delete client from list of incoming connections 3705 */ 3706 tl_freetip(tep, tip); 3707 freemsg(mp); 3708 tl_closeok(acc_ep); 3709 tl_refrele(acc_ep); 3710 return; 3711 } else if (tip->ti_mp != NULL) { 3712 /* 3713 * The client could have queued a T_DISCON_IND which needs 3714 * to be sent up. 3715 * Note that t_discon_req can not operate the same as 3716 * t_data_req since it is not possible for it to putbq 3717 * the message and return -1 due to the use of qwriter. 3718 */ 3719 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3720 } 3721 3722 /* 3723 * prepare connect confirm T_CONN_CON message 3724 */ 3725 3726 /* 3727 * allocate the message - original data blocks 3728 * retained in the returned mblk 3729 */ 3730 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3731 ccmp = tl_resizemp(respmp, size); 3732 if (ccmp == NULL) { 3733 tl_ok_ack(wq, ackmp, prim); 3734 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3735 SL_TRACE | SL_ERROR, 3736 "tl_conn_res:conn_con:allocb failure")); 3737 tl_merror(wq, respmp, ENOMEM); 3738 tl_closeok(acc_ep); 3739 if (client_noclose_set) 3740 tl_closeok(cl_ep); 3741 tl_refrele(acc_ep); 3742 return; 3743 } 3744 3745 DB_TYPE(ccmp) = M_PROTO; 3746 cc = (struct T_conn_con *)ccmp->b_rptr; 3747 cc->PRIM_type = T_CONN_CON; 3748 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3749 cc->RES_length = acc_ep->te_alen; 3750 addr_startp = ccmp->b_rptr + cc->RES_offset; 3751 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3752 if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) { 3753 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3754 cc->RES_length); 3755 cc->OPT_length = olen; 3756 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3757 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3758 cl_ep->te_credp); 3759 } else { 3760 cc->OPT_offset = 0; 3761 cc->OPT_length = 0; 3762 } 3763 /* 3764 * Forward the credential in the packet so it can be picked up 3765 * at the higher layers for more complete credential processing 3766 */ 3767 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3768 } else { 3769 freemsg(respmp); 3770 respmp = NULL; 3771 } 3772 3773 /* 3774 * make connection linking 3775 * accepting and client endpoints 3776 * No need to increment references: 3777 * on client: it should already have one from tip->ti_tep linkage. 3778 * on acceptor is should already have one from the table lookup. 3779 * 3780 * At this point both client and acceptor can't close. Set client 3781 * serializer to acceptor's. 3782 */ 3783 ASSERT(cl_ep->te_refcnt >= 2); 3784 ASSERT(acc_ep->te_refcnt >= 2); 3785 ASSERT(cl_ep->te_conp == NULL); 3786 ASSERT(acc_ep->te_conp == NULL); 3787 cl_ep->te_conp = acc_ep; 3788 acc_ep->te_conp = cl_ep; 3789 ASSERT(cl_ep->te_ser == tep->te_ser); 3790 if (switch_client_serializer) { 3791 mutex_enter(&cl_ep->te_ser_lock); 3792 if (cl_ep->te_ser_count > 0) { 3793 switch_client_serializer = B_FALSE; 3794 tl_serializer_noswitch++; 3795 } else { 3796 /* 3797 * Move client to the acceptor's serializer. 3798 */ 3799 tl_serializer_refhold(acc_ep->te_ser); 3800 tl_serializer_refrele(cl_ep->te_ser); 3801 cl_ep->te_ser = acc_ep->te_ser; 3802 } 3803 mutex_exit(&cl_ep->te_ser_lock); 3804 } 3805 if (!switch_client_serializer) { 3806 /* 3807 * It is not possible to switch client to use acceptor's. 3808 * Move acceptor to client's serializer (which is the same as 3809 * listener's). 3810 */ 3811 tl_serializer_refhold(cl_ep->te_ser); 3812 tl_serializer_refrele(acc_ep->te_ser); 3813 acc_ep->te_ser = cl_ep->te_ser; 3814 } 3815 3816 TL_REMOVE_PEER(cl_ep->te_oconp); 3817 TL_REMOVE_PEER(acc_ep->te_oconp); 3818 3819 /* 3820 * remove endpoint from incoming connection 3821 * delete client from list of incoming connections 3822 */ 3823 tip->ti_tep = NULL; 3824 tl_freetip(tep, tip); 3825 tl_ok_ack(wq, ackmp, prim); 3826 3827 /* 3828 * data blocks already linked in reallocb() 3829 */ 3830 3831 /* 3832 * link queues so that I_SENDFD will work 3833 */ 3834 if (!IS_SOCKET(tep)) { 3835 acc_ep->te_wq->q_next = cl_ep->te_rq; 3836 cl_ep->te_wq->q_next = acc_ep->te_rq; 3837 } 3838 3839 /* 3840 * send T_CONN_CON up on client side unless it was already 3841 * done (for a socket). In cases any data or ordrel req has been 3842 * queued make sure that the service procedure runs. 3843 */ 3844 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3845 enableok(cl_ep->te_wq); 3846 TL_QENABLE(cl_ep); 3847 if (ccmp != NULL) 3848 freemsg(ccmp); 3849 } else { 3850 /* 3851 * change client state on TE_CONN_CON event 3852 */ 3853 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3854 putnext(cl_ep->te_rq, ccmp); 3855 } 3856 3857 /* Mark the both endpoints as accepted */ 3858 cl_ep->te_flag |= TL_ACCEPTED; 3859 acc_ep->te_flag |= TL_ACCEPTED; 3860 3861 /* 3862 * Allow client and acceptor to close. 3863 */ 3864 tl_closeok(acc_ep); 3865 if (client_noclose_set) 3866 tl_closeok(cl_ep); 3867 } 3868 3869 3870 3871 3872 static void 3873 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3874 { 3875 queue_t *wq; 3876 struct T_discon_req *dr; 3877 ssize_t msz; 3878 tl_endpt_t *peer_tep = tep->te_conp; 3879 tl_endpt_t *srv_tep = tep->te_oconp; 3880 tl_icon_t *tip; 3881 size_t size; 3882 mblk_t *ackmp, *dimp, *respmp; 3883 struct T_discon_ind *di; 3884 t_scalar_t save_state, new_state; 3885 3886 if (tep->te_closing) { 3887 freemsg(mp); 3888 return; 3889 } 3890 3891 if ((peer_tep != NULL) && peer_tep->te_closing) { 3892 TL_UNCONNECT(tep->te_conp); 3893 peer_tep = NULL; 3894 } 3895 if ((srv_tep != NULL) && srv_tep->te_closing) { 3896 TL_UNCONNECT(tep->te_oconp); 3897 srv_tep = NULL; 3898 } 3899 3900 wq = tep->te_wq; 3901 3902 /* 3903 * preallocate memory for: 3904 * 1. max of T_ERROR_ACK and T_OK_ACK 3905 * ==> known max T_ERROR_ACK 3906 * 2. for T_DISCON_IND 3907 */ 3908 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3909 if (ackmp == NULL) { 3910 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3911 return; 3912 } 3913 /* 3914 * memory committed for T_OK_ACK/T_ERROR_ACK now 3915 * will be committed for T_DISCON_IND later 3916 */ 3917 3918 dr = (struct T_discon_req *)mp->b_rptr; 3919 msz = MBLKL(mp); 3920 3921 /* 3922 * validate the state 3923 */ 3924 save_state = new_state = tep->te_state; 3925 if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3926 !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3927 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3928 SL_TRACE | SL_ERROR, 3929 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3930 tep->te_state)); 3931 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3932 freemsg(mp); 3933 return; 3934 } 3935 /* 3936 * Defer committing the state change until it is determined if 3937 * the message will be queued with the tl_icon or not. 3938 */ 3939 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3940 3941 /* validate the message */ 3942 if (msz < sizeof (struct T_discon_req)) { 3943 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 3944 "tl_discon_req:invalid message")); 3945 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3946 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3947 freemsg(mp); 3948 return; 3949 } 3950 3951 /* 3952 * if server, then validate that client exists 3953 * by connection sequence number etc. 3954 */ 3955 if (tep->te_nicon > 0) { /* server */ 3956 3957 /* 3958 * search server list for disconnect client 3959 */ 3960 tip = tl_icon_find(tep, dr->SEQ_number); 3961 if (tip == NULL) { 3962 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3963 SL_TRACE | SL_ERROR, 3964 "tl_discon_req:no disconnect endpoint")); 3965 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3966 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3967 freemsg(mp); 3968 return; 3969 } 3970 /* 3971 * If ti_tep is NULL the client has already closed. In this case 3972 * the code below will avoid any action on the client side. 3973 */ 3974 3975 IMPLY(tip->ti_tep != NULL, 3976 tip->ti_tep->te_seqno == dr->SEQ_number); 3977 peer_tep = tip->ti_tep; 3978 } 3979 3980 /* 3981 * preallocate now for T_DISCON_IND 3982 * ack validity of request (T_OK_ACK) after memory committed 3983 */ 3984 size = sizeof (struct T_discon_ind); 3985 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3986 tl_memrecover(wq, mp, size); 3987 freemsg(ackmp); 3988 return; 3989 } 3990 3991 /* 3992 * prepare message to ack validity of request 3993 */ 3994 if (tep->te_nicon == 0) { 3995 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3996 } else { 3997 if (tep->te_nicon == 1) 3998 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3999 else 4000 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 4001 } 4002 4003 /* 4004 * Flushing queues according to TPI. Using the old state. 4005 */ 4006 if ((tep->te_nicon <= 1) && 4007 ((save_state == TS_DATA_XFER) || 4008 (save_state == TS_WIND_ORDREL) || 4009 (save_state == TS_WREQ_ORDREL))) 4010 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 4011 4012 /* send T_OK_ACK up */ 4013 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 4014 4015 /* 4016 * now do disconnect business 4017 */ 4018 if (tep->te_nicon > 0) { /* listener */ 4019 if (peer_tep != NULL && !peer_tep->te_closing) { 4020 /* 4021 * disconnect incoming connect request pending to tep 4022 */ 4023 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4024 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4025 SL_TRACE | SL_ERROR, 4026 "tl_discon_req: reallocb failed")); 4027 tep->te_state = new_state; 4028 tl_merror(wq, respmp, ENOMEM); 4029 return; 4030 } 4031 di = (struct T_discon_ind *)dimp->b_rptr; 4032 di->SEQ_number = BADSEQNUM; 4033 save_state = peer_tep->te_state; 4034 peer_tep->te_state = TS_IDLE; 4035 4036 TL_REMOVE_PEER(peer_tep->te_oconp); 4037 enableok(peer_tep->te_wq); 4038 TL_QENABLE(peer_tep); 4039 } else { 4040 freemsg(respmp); 4041 dimp = NULL; 4042 } 4043 4044 /* 4045 * remove endpoint from incoming connection list 4046 * - remove disconnect client from list on server 4047 */ 4048 tl_freetip(tep, tip); 4049 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4050 /* 4051 * disconnect an outgoing request pending from tep 4052 */ 4053 4054 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4055 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4056 SL_TRACE | SL_ERROR, 4057 "tl_discon_req: reallocb failed")); 4058 tep->te_state = new_state; 4059 tl_merror(wq, respmp, ENOMEM); 4060 return; 4061 } 4062 di = (struct T_discon_ind *)dimp->b_rptr; 4063 DB_TYPE(dimp) = M_PROTO; 4064 di->PRIM_type = T_DISCON_IND; 4065 di->DISCON_reason = ECONNRESET; 4066 di->SEQ_number = tep->te_seqno; 4067 4068 /* 4069 * If this is a socket the T_DISCON_IND is queued with 4070 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4071 * from the list of pending connections. 4072 * Note that when te_oconp is set the peer better have 4073 * a t_connind_t for the client. 4074 */ 4075 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4076 /* 4077 * No need to check that 4078 * ti_tep == NULL since the T_DISCON_IND 4079 * takes precedence over other queued 4080 * messages. 4081 */ 4082 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4083 peer_tep = NULL; 4084 dimp = NULL; 4085 /* 4086 * Can't clear te_oconp since tl_co_unconnect needs 4087 * it as a hint not to free the tep. 4088 * Keep the state unchanged since tl_conn_res inspects 4089 * it. 4090 */ 4091 new_state = tep->te_state; 4092 } else { 4093 /* Found - delete it */ 4094 tip = tl_icon_find(peer_tep, tep->te_seqno); 4095 if (tip != NULL) { 4096 ASSERT(tep == tip->ti_tep); 4097 save_state = peer_tep->te_state; 4098 if (peer_tep->te_nicon == 1) 4099 peer_tep->te_state = 4100 NEXTSTATE(TE_DISCON_IND2, 4101 peer_tep->te_state); 4102 else 4103 peer_tep->te_state = 4104 NEXTSTATE(TE_DISCON_IND3, 4105 peer_tep->te_state); 4106 tl_freetip(peer_tep, tip); 4107 } 4108 ASSERT(tep->te_oconp != NULL); 4109 TL_UNCONNECT(tep->te_oconp); 4110 } 4111 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4112 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4113 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4114 SL_TRACE | SL_ERROR, 4115 "tl_discon_req: reallocb failed")); 4116 tep->te_state = new_state; 4117 tl_merror(wq, respmp, ENOMEM); 4118 return; 4119 } 4120 di = (struct T_discon_ind *)dimp->b_rptr; 4121 di->SEQ_number = BADSEQNUM; 4122 4123 save_state = peer_tep->te_state; 4124 peer_tep->te_state = TS_IDLE; 4125 } else { 4126 /* Not connected */ 4127 tep->te_state = new_state; 4128 freemsg(respmp); 4129 return; 4130 } 4131 4132 /* Commit state changes */ 4133 tep->te_state = new_state; 4134 4135 if (peer_tep == NULL) { 4136 ASSERT(dimp == NULL); 4137 goto done; 4138 } 4139 /* 4140 * Flush queues on peer before sending up 4141 * T_DISCON_IND according to TPI 4142 */ 4143 4144 if ((save_state == TS_DATA_XFER) || 4145 (save_state == TS_WIND_ORDREL) || 4146 (save_state == TS_WREQ_ORDREL)) 4147 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4148 4149 DB_TYPE(dimp) = M_PROTO; 4150 di->PRIM_type = T_DISCON_IND; 4151 di->DISCON_reason = ECONNRESET; 4152 4153 /* 4154 * data blocks already linked into dimp by reallocb() 4155 */ 4156 /* 4157 * send indication message to peer user module 4158 */ 4159 ASSERT(dimp != NULL); 4160 putnext(peer_tep->te_rq, dimp); 4161 done: 4162 if (tep->te_conp) { /* disconnect pointers if connected */ 4163 ASSERT(!peer_tep->te_closing); 4164 4165 /* 4166 * Messages may be queued on peer's write queue 4167 * waiting to be processed by its write service 4168 * procedure. Before the pointer to the peer transport 4169 * structure is set to NULL, qenable the peer's write 4170 * queue so that the queued up messages are processed. 4171 */ 4172 if ((save_state == TS_DATA_XFER) || 4173 (save_state == TS_WIND_ORDREL) || 4174 (save_state == TS_WREQ_ORDREL)) 4175 TL_QENABLE(peer_tep); 4176 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4177 TL_UNCONNECT(peer_tep->te_conp); 4178 if (!IS_SOCKET(tep)) { 4179 /* 4180 * unlink the streams 4181 */ 4182 tep->te_wq->q_next = NULL; 4183 peer_tep->te_wq->q_next = NULL; 4184 } 4185 TL_UNCONNECT(tep->te_conp); 4186 } 4187 } 4188 4189 static void 4190 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep) 4191 { 4192 if (!tep->te_closing) 4193 tl_addr_req(mp, tep); 4194 else 4195 freemsg(mp); 4196 4197 tl_serializer_exit(tep); 4198 tl_refrele(tep); 4199 } 4200 4201 static void 4202 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4203 { 4204 queue_t *wq; 4205 size_t ack_sz; 4206 mblk_t *ackmp; 4207 struct T_addr_ack *taa; 4208 4209 if (tep->te_closing) { 4210 freemsg(mp); 4211 return; 4212 } 4213 4214 wq = tep->te_wq; 4215 4216 /* 4217 * Note: T_ADDR_REQ message has only PRIM_type field 4218 * so it is already validated earlier. 4219 */ 4220 4221 if (IS_CLTS(tep) || 4222 (tep->te_state > TS_WREQ_ORDREL) || 4223 (tep->te_state < TS_DATA_XFER)) { 4224 /* 4225 * Either connectionless or connection oriented but not 4226 * in connected data transfer state or half-closed states. 4227 */ 4228 ack_sz = sizeof (struct T_addr_ack); 4229 if (tep->te_state >= TS_IDLE) 4230 /* is bound */ 4231 ack_sz += tep->te_alen; 4232 ackmp = reallocb(mp, ack_sz, 0); 4233 if (ackmp == NULL) { 4234 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4235 SL_TRACE | SL_ERROR, 4236 "tl_addr_req: reallocb failed")); 4237 tl_memrecover(wq, mp, ack_sz); 4238 return; 4239 } 4240 4241 taa = (struct T_addr_ack *)ackmp->b_rptr; 4242 4243 bzero(taa, sizeof (struct T_addr_ack)); 4244 4245 taa->PRIM_type = T_ADDR_ACK; 4246 ackmp->b_datap->db_type = M_PCPROTO; 4247 ackmp->b_wptr = (uchar_t *)&taa[1]; 4248 4249 if (tep->te_state >= TS_IDLE) { 4250 /* endpoint is bound */ 4251 taa->LOCADDR_length = tep->te_alen; 4252 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4253 4254 bcopy(tep->te_abuf, ackmp->b_wptr, 4255 tep->te_alen); 4256 ackmp->b_wptr += tep->te_alen; 4257 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4258 } 4259 4260 (void) qreply(wq, ackmp); 4261 } else { 4262 ASSERT(tep->te_state == TS_DATA_XFER || 4263 tep->te_state == TS_WIND_ORDREL || 4264 tep->te_state == TS_WREQ_ORDREL); 4265 /* connection oriented in data transfer */ 4266 tl_connected_cots_addr_req(mp, tep); 4267 } 4268 } 4269 4270 4271 static void 4272 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4273 { 4274 tl_endpt_t *peer_tep = tep->te_conp; 4275 size_t ack_sz; 4276 mblk_t *ackmp; 4277 struct T_addr_ack *taa; 4278 uchar_t *addr_startp; 4279 4280 if (tep->te_closing) { 4281 freemsg(mp); 4282 return; 4283 } 4284 4285 if (peer_tep == NULL || peer_tep->te_closing) { 4286 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ); 4287 return; 4288 } 4289 4290 ASSERT(tep->te_state >= TS_IDLE); 4291 4292 ack_sz = sizeof (struct T_addr_ack); 4293 ack_sz += T_ALIGN(tep->te_alen); 4294 ack_sz += peer_tep->te_alen; 4295 4296 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4297 if (ackmp == NULL) { 4298 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4299 "tl_connected_cots_addr_req: reallocb failed")); 4300 tl_memrecover(tep->te_wq, mp, ack_sz); 4301 return; 4302 } 4303 4304 taa = (struct T_addr_ack *)ackmp->b_rptr; 4305 4306 /* endpoint is bound */ 4307 taa->LOCADDR_length = tep->te_alen; 4308 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4309 4310 addr_startp = (uchar_t *)&taa[1]; 4311 4312 bcopy(tep->te_abuf, addr_startp, 4313 tep->te_alen); 4314 4315 taa->REMADDR_length = peer_tep->te_alen; 4316 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4317 taa->LOCADDR_length); 4318 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4319 bcopy(peer_tep->te_abuf, addr_startp, 4320 peer_tep->te_alen); 4321 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4322 taa->REMADDR_offset + peer_tep->te_alen; 4323 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4324 4325 putnext(tep->te_rq, ackmp); 4326 } 4327 4328 static void 4329 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4330 { 4331 if (IS_CLTS(tep)) { 4332 *ia = tl_clts_info_ack; 4333 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4334 } else { 4335 *ia = tl_cots_info_ack; 4336 if (IS_COTSORD(tep)) 4337 ia->SERV_type = T_COTS_ORD; 4338 } 4339 ia->TIDU_size = tl_tidusz; 4340 ia->CURRENT_state = tep->te_state; 4341 } 4342 4343 /* 4344 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4345 * tl_wput. 4346 */ 4347 static void 4348 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4349 { 4350 mblk_t *ackmp; 4351 t_uscalar_t cap_bits1; 4352 struct T_capability_ack *tcap; 4353 4354 if (tep->te_closing) { 4355 freemsg(mp); 4356 return; 4357 } 4358 4359 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4360 4361 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4362 M_PCPROTO, T_CAPABILITY_ACK); 4363 if (ackmp == NULL) { 4364 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4365 "tl_capability_req: reallocb failed")); 4366 tl_memrecover(tep->te_wq, mp, 4367 sizeof (struct T_capability_ack)); 4368 return; 4369 } 4370 4371 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4372 tcap->CAP_bits1 = 0; 4373 4374 if (cap_bits1 & TC1_INFO) { 4375 tl_copy_info(&tcap->INFO_ack, tep); 4376 tcap->CAP_bits1 |= TC1_INFO; 4377 } 4378 4379 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4380 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4381 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4382 } 4383 4384 putnext(tep->te_rq, ackmp); 4385 } 4386 4387 static void 4388 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4389 { 4390 if (!tep->te_closing) 4391 tl_info_req(mp, tep); 4392 else 4393 freemsg(mp); 4394 4395 tl_serializer_exit(tep); 4396 tl_refrele(tep); 4397 } 4398 4399 static void 4400 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4401 { 4402 mblk_t *ackmp; 4403 4404 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4405 M_PCPROTO, T_INFO_ACK); 4406 if (ackmp == NULL) { 4407 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4408 "tl_info_req: reallocb failed")); 4409 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4410 return; 4411 } 4412 4413 /* 4414 * fill in T_INFO_ACK contents 4415 */ 4416 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4417 4418 /* 4419 * send ack message 4420 */ 4421 putnext(tep->te_rq, ackmp); 4422 } 4423 4424 /* 4425 * Handle M_DATA, T_data_req and T_optdata_req. 4426 * If this is a socket pass through T_optdata_req options unmodified. 4427 */ 4428 static void 4429 tl_data(mblk_t *mp, tl_endpt_t *tep) 4430 { 4431 queue_t *wq = tep->te_wq; 4432 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4433 ssize_t msz = MBLKL(mp); 4434 tl_endpt_t *peer_tep; 4435 queue_t *peer_rq; 4436 boolean_t closing = tep->te_closing; 4437 4438 if (IS_CLTS(tep)) { 4439 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4440 SL_TRACE | SL_ERROR, 4441 "tl_wput:clts:unattached M_DATA")); 4442 if (!closing) { 4443 tl_merror(wq, mp, EPROTO); 4444 } else { 4445 freemsg(mp); 4446 } 4447 return; 4448 } 4449 4450 /* 4451 * If the endpoint is closing it should still forward any data to the 4452 * peer (if it has one). If it is not allowed to forward it can just 4453 * free the message. 4454 */ 4455 if (closing && 4456 (tep->te_state != TS_DATA_XFER) && 4457 (tep->te_state != TS_WREQ_ORDREL)) { 4458 freemsg(mp); 4459 return; 4460 } 4461 4462 if (DB_TYPE(mp) == M_PROTO) { 4463 if (prim->type == T_DATA_REQ && 4464 msz < sizeof (struct T_data_req)) { 4465 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4466 SL_TRACE | SL_ERROR, 4467 "tl_data:T_DATA_REQ:invalid message")); 4468 if (!closing) { 4469 tl_merror(wq, mp, EPROTO); 4470 } else { 4471 freemsg(mp); 4472 } 4473 return; 4474 } else if (prim->type == T_OPTDATA_REQ && 4475 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4476 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4477 SL_TRACE | SL_ERROR, 4478 "tl_data:T_OPTDATA_REQ:invalid message")); 4479 if (!closing) { 4480 tl_merror(wq, mp, EPROTO); 4481 } else { 4482 freemsg(mp); 4483 } 4484 return; 4485 } 4486 } 4487 4488 /* 4489 * connection oriented provider 4490 */ 4491 switch (tep->te_state) { 4492 case TS_IDLE: 4493 /* 4494 * Other end not here - do nothing. 4495 */ 4496 freemsg(mp); 4497 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 4498 "tl_data:cots with endpoint idle")); 4499 return; 4500 4501 case TS_DATA_XFER: 4502 /* valid states */ 4503 if (tep->te_conp != NULL) 4504 break; 4505 4506 if (tep->te_oconp == NULL) { 4507 if (!closing) { 4508 tl_merror(wq, mp, EPROTO); 4509 } else { 4510 freemsg(mp); 4511 } 4512 return; 4513 } 4514 /* 4515 * For a socket the T_CONN_CON is sent early thus 4516 * the peer might not yet have accepted the connection. 4517 * If we are closing queue the packet with the T_CONN_IND. 4518 * Otherwise defer processing the packet until the peer 4519 * accepts the connection. 4520 * Note that the queue is noenabled when we go into this 4521 * state. 4522 */ 4523 if (!closing) { 4524 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4525 SL_TRACE | SL_ERROR, 4526 "tl_data: ocon")); 4527 TL_PUTBQ(tep, mp); 4528 return; 4529 } 4530 if (DB_TYPE(mp) == M_PROTO) { 4531 if (msz < sizeof (t_scalar_t)) { 4532 freemsg(mp); 4533 return; 4534 } 4535 /* reuse message block - just change REQ to IND */ 4536 if (prim->type == T_DATA_REQ) 4537 prim->type = T_DATA_IND; 4538 else 4539 prim->type = T_OPTDATA_IND; 4540 } 4541 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4542 return; 4543 4544 case TS_WREQ_ORDREL: 4545 if (tep->te_conp == NULL) { 4546 /* 4547 * Other end closed - generate discon_ind 4548 * with reason 0 to cause an EPIPE but no 4549 * read side error on AF_UNIX sockets. 4550 */ 4551 freemsg(mp); 4552 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4553 SL_TRACE | SL_ERROR, 4554 "tl_data: WREQ_ORDREL and no peer")); 4555 tl_discon_ind(tep, 0); 4556 return; 4557 } 4558 break; 4559 4560 default: 4561 /* invalid state for event TE_DATA_REQ */ 4562 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4563 "tl_data:cots:out of state")); 4564 tl_merror(wq, mp, EPROTO); 4565 return; 4566 } 4567 /* 4568 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4569 * (State stays same on this event) 4570 */ 4571 4572 /* 4573 * get connected endpoint 4574 */ 4575 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4576 freemsg(mp); 4577 /* Peer closed */ 4578 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4579 "tl_data: peer gone")); 4580 return; 4581 } 4582 4583 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4584 peer_rq = peer_tep->te_rq; 4585 4586 /* 4587 * Put it back if flow controlled 4588 * Note: Messages already on queue when we are closing is bounded 4589 * so we can ignore flow control. 4590 */ 4591 if (!canputnext(peer_rq) && !closing) { 4592 TL_PUTBQ(tep, mp); 4593 return; 4594 } 4595 4596 /* 4597 * validate peer state 4598 */ 4599 switch (peer_tep->te_state) { 4600 case TS_DATA_XFER: 4601 case TS_WIND_ORDREL: 4602 /* valid states */ 4603 break; 4604 default: 4605 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4606 "tl_data:rx side:invalid state")); 4607 tl_merror(peer_tep->te_wq, mp, EPROTO); 4608 return; 4609 } 4610 if (DB_TYPE(mp) == M_PROTO) { 4611 /* reuse message block - just change REQ to IND */ 4612 if (prim->type == T_DATA_REQ) 4613 prim->type = T_DATA_IND; 4614 else 4615 prim->type = T_OPTDATA_IND; 4616 } 4617 /* 4618 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4619 * (peer state stays same on this event) 4620 */ 4621 /* 4622 * send data to connected peer 4623 */ 4624 putnext(peer_rq, mp); 4625 } 4626 4627 4628 4629 static void 4630 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4631 { 4632 queue_t *wq = tep->te_wq; 4633 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4634 ssize_t msz = MBLKL(mp); 4635 tl_endpt_t *peer_tep; 4636 queue_t *peer_rq; 4637 boolean_t closing = tep->te_closing; 4638 4639 if (msz < sizeof (struct T_exdata_req)) { 4640 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4641 "tl_exdata:invalid message")); 4642 if (!closing) { 4643 tl_merror(wq, mp, EPROTO); 4644 } else { 4645 freemsg(mp); 4646 } 4647 return; 4648 } 4649 4650 /* 4651 * If the endpoint is closing it should still forward any data to the 4652 * peer (if it has one). If it is not allowed to forward it can just 4653 * free the message. 4654 */ 4655 if (closing && 4656 (tep->te_state != TS_DATA_XFER) && 4657 (tep->te_state != TS_WREQ_ORDREL)) { 4658 freemsg(mp); 4659 return; 4660 } 4661 4662 /* 4663 * validate state 4664 */ 4665 switch (tep->te_state) { 4666 case TS_IDLE: 4667 /* 4668 * Other end not here - do nothing. 4669 */ 4670 freemsg(mp); 4671 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 4672 "tl_exdata:cots with endpoint idle")); 4673 return; 4674 4675 case TS_DATA_XFER: 4676 /* valid states */ 4677 if (tep->te_conp != NULL) 4678 break; 4679 4680 if (tep->te_oconp == NULL) { 4681 if (!closing) { 4682 tl_merror(wq, mp, EPROTO); 4683 } else { 4684 freemsg(mp); 4685 } 4686 return; 4687 } 4688 /* 4689 * For a socket the T_CONN_CON is sent early thus 4690 * the peer might not yet have accepted the connection. 4691 * If we are closing queue the packet with the T_CONN_IND. 4692 * Otherwise defer processing the packet until the peer 4693 * accepts the connection. 4694 * Note that the queue is noenabled when we go into this 4695 * state. 4696 */ 4697 if (!closing) { 4698 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4699 SL_TRACE | SL_ERROR, 4700 "tl_exdata: ocon")); 4701 TL_PUTBQ(tep, mp); 4702 return; 4703 } 4704 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4705 "tl_exdata: closing socket ocon")); 4706 prim->type = T_EXDATA_IND; 4707 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4708 return; 4709 4710 case TS_WREQ_ORDREL: 4711 if (tep->te_conp == NULL) { 4712 /* 4713 * Other end closed - generate discon_ind 4714 * with reason 0 to cause an EPIPE but no 4715 * read side error on AF_UNIX sockets. 4716 */ 4717 freemsg(mp); 4718 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4719 SL_TRACE | SL_ERROR, 4720 "tl_exdata: WREQ_ORDREL and no peer")); 4721 tl_discon_ind(tep, 0); 4722 return; 4723 } 4724 break; 4725 4726 default: 4727 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4728 SL_TRACE | SL_ERROR, 4729 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4730 tep->te_state)); 4731 tl_merror(wq, mp, EPROTO); 4732 return; 4733 } 4734 /* 4735 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4736 * (state stays same on this event) 4737 */ 4738 4739 /* 4740 * get connected endpoint 4741 */ 4742 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4743 freemsg(mp); 4744 /* Peer closed */ 4745 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4746 "tl_exdata: peer gone")); 4747 return; 4748 } 4749 4750 peer_rq = peer_tep->te_rq; 4751 4752 /* 4753 * Put it back if flow controlled 4754 * Note: Messages already on queue when we are closing is bounded 4755 * so we can ignore flow control. 4756 */ 4757 if (!canputnext(peer_rq) && !closing) { 4758 TL_PUTBQ(tep, mp); 4759 return; 4760 } 4761 4762 /* 4763 * validate state on peer 4764 */ 4765 switch (peer_tep->te_state) { 4766 case TS_DATA_XFER: 4767 case TS_WIND_ORDREL: 4768 /* valid states */ 4769 break; 4770 default: 4771 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4772 "tl_exdata:rx side:invalid state")); 4773 tl_merror(peer_tep->te_wq, mp, EPROTO); 4774 return; 4775 } 4776 /* 4777 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4778 * (peer state stays same on this event) 4779 */ 4780 /* 4781 * reuse message block 4782 */ 4783 prim->type = T_EXDATA_IND; 4784 4785 /* 4786 * send data to connected peer 4787 */ 4788 putnext(peer_rq, mp); 4789 } 4790 4791 4792 4793 static void 4794 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4795 { 4796 queue_t *wq = tep->te_wq; 4797 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4798 ssize_t msz = MBLKL(mp); 4799 tl_endpt_t *peer_tep; 4800 queue_t *peer_rq; 4801 boolean_t closing = tep->te_closing; 4802 4803 if (msz < sizeof (struct T_ordrel_req)) { 4804 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4805 "tl_ordrel:invalid message")); 4806 if (!closing) { 4807 tl_merror(wq, mp, EPROTO); 4808 } else { 4809 freemsg(mp); 4810 } 4811 return; 4812 } 4813 4814 /* 4815 * validate state 4816 */ 4817 switch (tep->te_state) { 4818 case TS_DATA_XFER: 4819 case TS_WREQ_ORDREL: 4820 /* valid states */ 4821 if (tep->te_conp != NULL) 4822 break; 4823 4824 if (tep->te_oconp == NULL) 4825 break; 4826 4827 /* 4828 * For a socket the T_CONN_CON is sent early thus 4829 * the peer might not yet have accepted the connection. 4830 * If we are closing queue the packet with the T_CONN_IND. 4831 * Otherwise defer processing the packet until the peer 4832 * accepts the connection. 4833 * Note that the queue is noenabled when we go into this 4834 * state. 4835 */ 4836 if (!closing) { 4837 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4838 SL_TRACE | SL_ERROR, 4839 "tl_ordlrel: ocon")); 4840 TL_PUTBQ(tep, mp); 4841 return; 4842 } 4843 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4844 "tl_ordlrel: closing socket ocon")); 4845 prim->type = T_ORDREL_IND; 4846 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4847 return; 4848 4849 default: 4850 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4851 SL_TRACE | SL_ERROR, 4852 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4853 tep->te_state)); 4854 if (!closing) { 4855 tl_merror(wq, mp, EPROTO); 4856 } else { 4857 freemsg(mp); 4858 } 4859 return; 4860 } 4861 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4862 4863 /* 4864 * get connected endpoint 4865 */ 4866 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4867 /* Peer closed */ 4868 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4869 "tl_ordrel: peer gone")); 4870 freemsg(mp); 4871 return; 4872 } 4873 4874 peer_rq = peer_tep->te_rq; 4875 4876 /* 4877 * Put it back if flow controlled except when we are closing. 4878 * Note: Messages already on queue when we are closing is bounded 4879 * so we can ignore flow control. 4880 */ 4881 if (!canputnext(peer_rq) && !closing) { 4882 TL_PUTBQ(tep, mp); 4883 return; 4884 } 4885 4886 /* 4887 * validate state on peer 4888 */ 4889 switch (peer_tep->te_state) { 4890 case TS_DATA_XFER: 4891 case TS_WIND_ORDREL: 4892 /* valid states */ 4893 break; 4894 default: 4895 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 4896 "tl_ordrel:rx side:invalid state")); 4897 tl_merror(peer_tep->te_wq, mp, EPROTO); 4898 return; 4899 } 4900 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4901 4902 /* 4903 * reuse message block 4904 */ 4905 prim->type = T_ORDREL_IND; 4906 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4907 "tl_ordrel: send ordrel_ind")); 4908 4909 /* 4910 * send data to connected peer 4911 */ 4912 putnext(peer_rq, mp); 4913 } 4914 4915 4916 /* 4917 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4918 */ 4919 static void 4920 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4921 { 4922 size_t err_sz; 4923 tl_endpt_t *tep; 4924 struct T_unitdata_req *udreq; 4925 mblk_t *err_mp; 4926 t_scalar_t alen; 4927 t_scalar_t olen; 4928 struct T_uderror_ind *uderr; 4929 uchar_t *addr_startp; 4930 4931 err_sz = sizeof (struct T_uderror_ind); 4932 tep = (tl_endpt_t *)wq->q_ptr; 4933 udreq = (struct T_unitdata_req *)mp->b_rptr; 4934 alen = udreq->DEST_length; 4935 olen = udreq->OPT_length; 4936 4937 if (alen > 0) 4938 err_sz = T_ALIGN(err_sz + alen); 4939 if (olen > 0) 4940 err_sz += olen; 4941 4942 err_mp = allocb(err_sz, BPRI_MED); 4943 if (err_mp == NULL) { 4944 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 4945 "tl_uderr:allocb failure")); 4946 /* 4947 * Note: no rollback of state needed as it does 4948 * not change in connectionless transport 4949 */ 4950 tl_memrecover(wq, mp, err_sz); 4951 return; 4952 } 4953 4954 DB_TYPE(err_mp) = M_PROTO; 4955 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4956 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4957 uderr->PRIM_type = T_UDERROR_IND; 4958 uderr->ERROR_type = err; 4959 uderr->DEST_length = alen; 4960 uderr->OPT_length = olen; 4961 if (alen <= 0) { 4962 uderr->DEST_offset = 0; 4963 } else { 4964 uderr->DEST_offset = 4965 (t_scalar_t)sizeof (struct T_uderror_ind); 4966 addr_startp = mp->b_rptr + udreq->DEST_offset; 4967 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4968 (size_t)alen); 4969 } 4970 if (olen <= 0) { 4971 uderr->OPT_offset = 0; 4972 } else { 4973 uderr->OPT_offset = 4974 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4975 uderr->DEST_length); 4976 addr_startp = mp->b_rptr + udreq->OPT_offset; 4977 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4978 (size_t)olen); 4979 } 4980 freemsg(mp); 4981 4982 /* 4983 * send indication message 4984 */ 4985 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4986 4987 qreply(wq, err_mp); 4988 } 4989 4990 static void 4991 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4992 { 4993 queue_t *wq = tep->te_wq; 4994 4995 if (!tep->te_closing && (wq->q_first != NULL)) { 4996 TL_PUTQ(tep, mp); 4997 } else { 4998 if (tep->te_rq != NULL) 4999 tl_unitdata(mp, tep); 5000 else 5001 freemsg(mp); 5002 } 5003 5004 tl_serializer_exit(tep); 5005 tl_refrele(tep); 5006 } 5007 5008 /* 5009 * Handle T_unitdata_req. 5010 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 5011 * If this is a socket pass through options unmodified. 5012 */ 5013 static void 5014 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 5015 { 5016 queue_t *wq = tep->te_wq; 5017 soux_addr_t ux_addr; 5018 tl_addr_t destaddr; 5019 uchar_t *addr_startp; 5020 tl_endpt_t *peer_tep; 5021 struct T_unitdata_ind *udind; 5022 struct T_unitdata_req *udreq; 5023 ssize_t msz, ui_sz, reuse_mb_sz; 5024 t_scalar_t alen, aoff, olen, ooff; 5025 t_scalar_t oldolen = 0; 5026 cred_t *cr = NULL; 5027 pid_t cpid; 5028 5029 udreq = (struct T_unitdata_req *)mp->b_rptr; 5030 msz = MBLKL(mp); 5031 5032 /* 5033 * validate the state 5034 */ 5035 if (tep->te_state != TS_IDLE) { 5036 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5037 SL_TRACE | SL_ERROR, 5038 "tl_wput:T_CONN_REQ:out of state")); 5039 tl_merror(wq, mp, EPROTO); 5040 return; 5041 } 5042 /* 5043 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5044 * (state does not change on this event) 5045 */ 5046 5047 /* 5048 * validate the message 5049 * Note: dereference fields in struct inside message only 5050 * after validating the message length. 5051 */ 5052 if (msz < sizeof (struct T_unitdata_req)) { 5053 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 5054 "tl_unitdata:invalid message length")); 5055 tl_merror(wq, mp, EINVAL); 5056 return; 5057 } 5058 alen = udreq->DEST_length; 5059 aoff = udreq->DEST_offset; 5060 oldolen = olen = udreq->OPT_length; 5061 ooff = udreq->OPT_offset; 5062 if (olen == 0) 5063 ooff = 0; 5064 5065 if (IS_SOCKET(tep)) { 5066 if ((alen != TL_SOUX_ADDRLEN) || 5067 (aoff < 0) || 5068 (aoff + alen > msz) || 5069 (olen < 0) || (ooff < 0) || 5070 ((olen > 0) && ((ooff + olen) > msz))) { 5071 (void) (STRLOG(TL_ID, tep->te_minor, 5072 1, SL_TRACE | SL_ERROR, 5073 "tl_unitdata_req: invalid socket addr " 5074 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5075 (int)msz, alen, aoff, olen, ooff)); 5076 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5077 return; 5078 } 5079 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5080 5081 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5082 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5083 (void) (STRLOG(TL_ID, tep->te_minor, 5084 1, SL_TRACE | SL_ERROR, 5085 "tl_conn_req: invalid socket magic")); 5086 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5087 return; 5088 } 5089 } else { 5090 if ((alen < 0) || 5091 (aoff < 0) || 5092 ((alen > 0) && ((aoff + alen) > msz)) || 5093 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5094 ((aoff + alen) < 0) || 5095 ((olen > 0) && ((ooff + olen) > msz)) || 5096 (olen < 0) || 5097 (ooff < 0) || 5098 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5099 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5100 SL_TRACE | SL_ERROR, 5101 "tl_unitdata:invalid unit data message")); 5102 tl_merror(wq, mp, EINVAL); 5103 return; 5104 } 5105 } 5106 5107 /* Options not supported unless it's a socket */ 5108 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5109 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 5110 "tl_unitdata:option use(unsupported) or zero len addr")); 5111 tl_uderr(wq, mp, EPROTO); 5112 return; 5113 } 5114 #ifdef DEBUG 5115 /* 5116 * Mild form of ASSERT()ion to detect broken TPI apps. 5117 * if (!assertion) 5118 * log warning; 5119 */ 5120 if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5121 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 5122 "tl_unitdata:addr overlaps TPI message")); 5123 } 5124 #endif 5125 /* 5126 * get destination endpoint 5127 */ 5128 destaddr.ta_alen = alen; 5129 destaddr.ta_abuf = mp->b_rptr + aoff; 5130 destaddr.ta_zoneid = tep->te_zoneid; 5131 5132 /* 5133 * Check whether the destination is the same that was used previously 5134 * and the destination endpoint is in the right state. If something is 5135 * wrong, find destination again and cache it. 5136 */ 5137 peer_tep = tep->te_lastep; 5138 5139 if ((peer_tep == NULL) || peer_tep->te_closing || 5140 (peer_tep->te_state != TS_IDLE) || 5141 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5142 /* 5143 * Not the same as cached destination , need to find the right 5144 * destination. 5145 */ 5146 peer_tep = (IS_SOCKET(tep) ? 5147 tl_sock_find_peer(tep, &ux_addr) : 5148 tl_find_peer(tep, &destaddr)); 5149 5150 if (peer_tep == NULL) { 5151 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5152 SL_TRACE | SL_ERROR, 5153 "tl_unitdata:no one at destination address")); 5154 tl_uderr(wq, mp, ECONNRESET); 5155 return; 5156 } 5157 5158 /* 5159 * Cache the new peer. 5160 */ 5161 if (tep->te_lastep != NULL) 5162 tl_refrele(tep->te_lastep); 5163 5164 tep->te_lastep = peer_tep; 5165 } 5166 5167 if (peer_tep->te_state != TS_IDLE) { 5168 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 5169 "tl_unitdata:provider in invalid state")); 5170 tl_uderr(wq, mp, EPROTO); 5171 return; 5172 } 5173 5174 ASSERT(peer_tep->te_rq != NULL); 5175 5176 /* 5177 * Put it back if flow controlled except when we are closing. 5178 * Note: Messages already on queue when we are closing is bounded 5179 * so we can ignore flow control. 5180 */ 5181 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5182 /* record what we are flow controlled on */ 5183 if (tep->te_flowq != NULL) { 5184 list_remove(&tep->te_flowq->te_flowlist, tep); 5185 } 5186 list_insert_head(&peer_tep->te_flowlist, tep); 5187 tep->te_flowq = peer_tep; 5188 TL_PUTBQ(tep, mp); 5189 return; 5190 } 5191 /* 5192 * prepare indication message 5193 */ 5194 5195 /* 5196 * calculate length of message 5197 */ 5198 if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) { 5199 cr = msg_getcred(mp, &cpid); 5200 ASSERT(cr != NULL); 5201 5202 if (peer_tep->te_flag & TL_SETCRED) { 5203 ASSERT(olen == 0); 5204 olen = (t_scalar_t)sizeof (struct opthdr) + 5205 OPTLEN(sizeof (tl_credopt_t)); 5206 /* 1 option only */ 5207 } else if (peer_tep->te_flag & TL_SETUCRED) { 5208 ASSERT(olen == 0); 5209 olen = (t_scalar_t)sizeof (struct opthdr) + 5210 OPTLEN(ucredminsize(cr)); 5211 /* 1 option only */ 5212 } else { 5213 /* Possibly more than one option */ 5214 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5215 OPTLEN(ucredminsize(cr)); 5216 } 5217 } 5218 5219 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen; 5220 reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen; 5221 5222 /* 5223 * If the unitdata_ind fits and we are not adding options 5224 * reuse the udreq mblk. 5225 * 5226 * Otherwise, it is possible we need to append an option if one of the 5227 * te_flag bits is set. This requires extra space in the data block for 5228 * the additional option but the traditional technique used below to 5229 * allocate a new block and copy into it will not work when there is a 5230 * message block with a free pointer (since we don't know anything 5231 * about the layout of the data, pointers referencing or within the 5232 * data, etc.). To handle this possibility the upper layers may have 5233 * preallocated some space to use for appending an option. We check the 5234 * overall mblock size against the size we need ('reuse_mb_sz' with the 5235 * original address length [alen] to ensure we won't overrun the 5236 * current mblk data size) to see if there is free space and thus 5237 * avoid allocating a new message block. 5238 */ 5239 if (msz >= ui_sz && alen >= tep->te_alen && 5240 !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) { 5241 /* 5242 * Reuse the original mblk. Leave options in place. 5243 */ 5244 udind = (struct T_unitdata_ind *)mp->b_rptr; 5245 udind->PRIM_type = T_UNITDATA_IND; 5246 udind->SRC_length = tep->te_alen; 5247 addr_startp = mp->b_rptr + udind->SRC_offset; 5248 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5249 5250 } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen && 5251 mp->b_datap->db_frtnp != NULL) { 5252 /* 5253 * We have a message block with a free pointer, but extra space 5254 * has been pre-allocated for us in case we need to append an 5255 * option. Reuse the original mblk, leaving existing options in 5256 * place. 5257 */ 5258 udind = (struct T_unitdata_ind *)mp->b_rptr; 5259 udind->PRIM_type = T_UNITDATA_IND; 5260 udind->SRC_length = tep->te_alen; 5261 addr_startp = mp->b_rptr + udind->SRC_offset; 5262 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5263 5264 if (peer_tep->te_flag & 5265 (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) { 5266 ASSERT(cr != NULL); 5267 /* 5268 * We're appending one new option here after the 5269 * original ones. 5270 */ 5271 tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen, 5272 cr, cpid, peer_tep->te_flag, peer_tep->te_credp); 5273 } 5274 5275 } else if (mp->b_datap->db_frtnp != NULL) { 5276 /* 5277 * The next block creates a new mp and tries to copy the data 5278 * block into it, but that cannot handle a message with a free 5279 * pointer (for more details see the comment in kstrputmsg() 5280 * where dupmsg() is called). Since we can never properly 5281 * duplicate the mp while also extending the data, just error 5282 * out now. 5283 */ 5284 tl_uderr(wq, mp, EPROTO); 5285 return; 5286 } else { 5287 /* Allocate a new T_unitdata_ind message */ 5288 mblk_t *ui_mp; 5289 5290 ui_mp = allocb(ui_sz, BPRI_MED); 5291 if (ui_mp == NULL) { 5292 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5293 "tl_unitdata:allocb failure:message queued")); 5294 tl_memrecover(wq, mp, ui_sz); 5295 return; 5296 } 5297 5298 /* 5299 * fill in T_UNITDATA_IND contents 5300 */ 5301 DB_TYPE(ui_mp) = M_PROTO; 5302 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5303 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5304 udind->PRIM_type = T_UNITDATA_IND; 5305 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5306 udind->SRC_length = tep->te_alen; 5307 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5308 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5309 udind->OPT_offset = 5310 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5311 udind->OPT_length = olen; 5312 if (peer_tep->te_flag & 5313 (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) { 5314 5315 if (oldolen != 0) { 5316 bcopy((void *)((uintptr_t)udreq + ooff), 5317 (void *)((uintptr_t)udind + 5318 udind->OPT_offset), 5319 oldolen); 5320 } 5321 ASSERT(cr != NULL); 5322 5323 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5324 oldolen, cr, cpid, 5325 peer_tep->te_flag, peer_tep->te_credp); 5326 } else { 5327 bcopy((void *)((uintptr_t)udreq + ooff), 5328 (void *)((uintptr_t)udind + udind->OPT_offset), 5329 olen); 5330 } 5331 5332 /* 5333 * relink data blocks from mp to ui_mp 5334 */ 5335 ui_mp->b_cont = mp->b_cont; 5336 freeb(mp); 5337 mp = ui_mp; 5338 } 5339 /* 5340 * send indication message 5341 */ 5342 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5343 putnext(peer_tep->te_rq, mp); 5344 } 5345 5346 5347 5348 /* 5349 * Check if a given addr is in use. 5350 * Endpoint ptr returned or NULL if not found. 5351 * The name space is separate for each mode. This implies that 5352 * sockets get their own name space. 5353 */ 5354 static tl_endpt_t * 5355 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5356 { 5357 tl_endpt_t *peer_tep = NULL; 5358 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5359 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5360 5361 ASSERT(!IS_SOCKET(tep)); 5362 5363 ASSERT(ap != NULL && ap->ta_alen > 0); 5364 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5365 ASSERT(ap->ta_abuf != NULL); 5366 EQUIV(rc == 0, peer_tep != NULL); 5367 IMPLY(rc == 0, 5368 (tep->te_zoneid == peer_tep->te_zoneid) && 5369 (tep->te_transport == peer_tep->te_transport)); 5370 5371 if ((rc == 0) && (peer_tep->te_closing)) { 5372 tl_refrele(peer_tep); 5373 peer_tep = NULL; 5374 } 5375 5376 return (peer_tep); 5377 } 5378 5379 /* 5380 * Find peer for a socket based on unix domain address. 5381 * For implicit addresses our peer can be found by minor number in ai hash. For 5382 * explicit binds we look vnode address at addr_hash. 5383 */ 5384 static tl_endpt_t * 5385 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5386 { 5387 tl_endpt_t *peer_tep = NULL; 5388 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5389 tep->te_aihash : tep->te_addrhash; 5390 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5391 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5392 5393 ASSERT(IS_SOCKET(tep)); 5394 EQUIV(rc == 0, peer_tep != NULL); 5395 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)); 5396 5397 if (peer_tep != NULL) { 5398 /* Don't attempt to use closing peer. */ 5399 if (peer_tep->te_closing) 5400 goto errout; 5401 5402 /* 5403 * Cross-zone unix sockets are permitted, but for Trusted 5404 * Extensions only, the "server" for these must be in the 5405 * global zone. 5406 */ 5407 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5408 is_system_labeled() && 5409 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5410 goto errout; 5411 } 5412 5413 return (peer_tep); 5414 5415 errout: 5416 tl_refrele(peer_tep); 5417 return (NULL); 5418 } 5419 5420 /* 5421 * Generate a free addr and return it in struct pointed by ap 5422 * but allocating space for address buffer. 5423 * The generated address will be at least 4 bytes long and, if req->ta_alen 5424 * exceeds 4 bytes, be req->ta_alen bytes long. 5425 * 5426 * If address is found it will be inserted in the hash. 5427 * 5428 * If req->ta_alen is larger than the default alen (4 bytes) the last 5429 * alen-4 bytes will always be the same as in req. 5430 * 5431 * Return 0 for failure. 5432 * Return non-zero for success. 5433 */ 5434 static boolean_t 5435 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5436 { 5437 t_scalar_t alen; 5438 uint32_t loopcnt; /* Limit loop to 2^32 */ 5439 5440 ASSERT(tep->te_hash_hndl != NULL); 5441 ASSERT(!IS_SOCKET(tep)); 5442 5443 if (tep->te_hash_hndl == NULL) 5444 return (B_FALSE); 5445 5446 /* 5447 * check if default addr is in use 5448 * if it is - bump it and try again 5449 */ 5450 if (req == NULL) { 5451 alen = sizeof (uint32_t); 5452 } else { 5453 alen = max(req->ta_alen, sizeof (uint32_t)); 5454 ASSERT(tep->te_zoneid == req->ta_zoneid); 5455 } 5456 5457 if (tep->te_alen < alen) { 5458 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5459 5460 /* 5461 * Not enough space in tep->ta_ap to hold the address, 5462 * allocate a bigger space. 5463 */ 5464 if (abuf == NULL) 5465 return (B_FALSE); 5466 5467 if (tep->te_alen > 0) 5468 kmem_free(tep->te_abuf, tep->te_alen); 5469 5470 tep->te_alen = alen; 5471 tep->te_abuf = abuf; 5472 } 5473 5474 /* Copy in the address in req */ 5475 if (req != NULL) { 5476 ASSERT(alen >= req->ta_alen); 5477 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5478 } 5479 5480 /* 5481 * First try minor number then try default addresses. 5482 */ 5483 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5484 5485 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5486 if (mod_hash_insert_reserve(tep->te_addrhash, 5487 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5488 tep->te_hash_hndl) == 0) { 5489 /* 5490 * found free address 5491 */ 5492 tep->te_flag |= TL_ADDRHASHED; 5493 tep->te_hash_hndl = NULL; 5494 5495 return (B_TRUE); /* successful return */ 5496 } 5497 /* 5498 * Use default address. 5499 */ 5500 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5501 atomic_inc_32(&tep->te_defaddr); 5502 } 5503 5504 /* 5505 * Failed to find anything. 5506 */ 5507 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5508 "tl_get_any_addr:looped 2^32 times")); 5509 return (B_FALSE); 5510 } 5511 5512 /* 5513 * reallocb + set r/w ptrs to reflect size. 5514 */ 5515 static mblk_t * 5516 tl_resizemp(mblk_t *mp, ssize_t new_size) 5517 { 5518 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5519 return (NULL); 5520 5521 mp->b_rptr = DB_BASE(mp); 5522 mp->b_wptr = mp->b_rptr + new_size; 5523 return (mp); 5524 } 5525 5526 static void 5527 tl_cl_backenable(tl_endpt_t *tep) 5528 { 5529 list_t *l = &tep->te_flowlist; 5530 tl_endpt_t *elp; 5531 5532 ASSERT(IS_CLTS(tep)); 5533 5534 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5535 ASSERT(tep->te_ser == elp->te_ser); 5536 ASSERT(elp->te_flowq == tep); 5537 if (!elp->te_closing) 5538 TL_QENABLE(elp); 5539 elp->te_flowq = NULL; 5540 list_remove(l, elp); 5541 } 5542 } 5543 5544 /* 5545 * Unconnect endpoints. 5546 */ 5547 static void 5548 tl_co_unconnect(tl_endpt_t *tep) 5549 { 5550 tl_endpt_t *peer_tep = tep->te_conp; 5551 tl_endpt_t *srv_tep = tep->te_oconp; 5552 list_t *l; 5553 tl_icon_t *tip; 5554 tl_endpt_t *cl_tep; 5555 mblk_t *d_mp; 5556 5557 ASSERT(IS_COTS(tep)); 5558 /* 5559 * If our peer is closing, don't use it. 5560 */ 5561 if ((peer_tep != NULL) && peer_tep->te_closing) { 5562 TL_UNCONNECT(tep->te_conp); 5563 peer_tep = NULL; 5564 } 5565 if ((srv_tep != NULL) && srv_tep->te_closing) { 5566 TL_UNCONNECT(tep->te_oconp); 5567 srv_tep = NULL; 5568 } 5569 5570 if (tep->te_nicon > 0) { 5571 l = &tep->te_iconp; 5572 /* 5573 * If incoming requests pending, change state 5574 * of clients on disconnect ind event and send 5575 * discon_ind pdu to modules above them 5576 * for server: all clients get disconnect 5577 */ 5578 5579 while (tep->te_nicon > 0) { 5580 tip = list_head(l); 5581 cl_tep = tip->ti_tep; 5582 5583 if (cl_tep == NULL) { 5584 tl_freetip(tep, tip); 5585 continue; 5586 } 5587 5588 if (cl_tep->te_oconp != NULL) { 5589 ASSERT(cl_tep != cl_tep->te_oconp); 5590 TL_UNCONNECT(cl_tep->te_oconp); 5591 } 5592 5593 if (cl_tep->te_closing) { 5594 tl_freetip(tep, tip); 5595 continue; 5596 } 5597 5598 enableok(cl_tep->te_wq); 5599 TL_QENABLE(cl_tep); 5600 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5601 if (d_mp != NULL) { 5602 cl_tep->te_state = TS_IDLE; 5603 putnext(cl_tep->te_rq, d_mp); 5604 } else { 5605 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5606 SL_TRACE | SL_ERROR, 5607 "tl_co_unconnect:icmng: " 5608 "allocb failure")); 5609 } 5610 tl_freetip(tep, tip); 5611 } 5612 } else if (srv_tep != NULL) { 5613 /* 5614 * If outgoing request pending, change state 5615 * of server on discon ind event 5616 */ 5617 5618 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5619 IS_COTSORD(srv_tep) && 5620 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5621 /* 5622 * Queue ordrel_ind for server to be picked up 5623 * when the connection is accepted. 5624 */ 5625 d_mp = tl_ordrel_ind_alloc(); 5626 } else { 5627 /* 5628 * send discon_ind to server 5629 */ 5630 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5631 } 5632 if (d_mp == NULL) { 5633 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5634 SL_TRACE | SL_ERROR, 5635 "tl_co_unconnect:outgoing:allocb failure")); 5636 TL_UNCONNECT(tep->te_oconp); 5637 goto discon_peer; 5638 } 5639 5640 /* 5641 * If this is a socket the T_DISCON_IND is queued with 5642 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5643 * from the list of pending connections. 5644 * Note that when te_oconp is set the peer better have 5645 * a t_connind_t for the client. 5646 */ 5647 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5648 /* 5649 * Queue the disconnection message. 5650 */ 5651 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5652 } else { 5653 tip = tl_icon_find(srv_tep, tep->te_seqno); 5654 if (tip == NULL) { 5655 freemsg(d_mp); 5656 } else { 5657 ASSERT(tep == tip->ti_tep); 5658 ASSERT(tep->te_ser == srv_tep->te_ser); 5659 /* 5660 * Delete tip from the server list. 5661 */ 5662 if (srv_tep->te_nicon == 1) { 5663 srv_tep->te_state = 5664 NEXTSTATE(TE_DISCON_IND2, 5665 srv_tep->te_state); 5666 } else { 5667 srv_tep->te_state = 5668 NEXTSTATE(TE_DISCON_IND3, 5669 srv_tep->te_state); 5670 } 5671 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5672 T_DISCON_IND); 5673 putnext(srv_tep->te_rq, d_mp); 5674 tl_freetip(srv_tep, tip); 5675 } 5676 TL_UNCONNECT(tep->te_oconp); 5677 srv_tep = NULL; 5678 } 5679 } else if (peer_tep != NULL) { 5680 /* 5681 * unconnect existing connection 5682 * If connected, change state of peer on 5683 * discon ind event and send discon ind pdu 5684 * to module above it 5685 */ 5686 5687 ASSERT(tep->te_ser == peer_tep->te_ser); 5688 if (IS_COTSORD(peer_tep) && 5689 (peer_tep->te_state == TS_WIND_ORDREL || 5690 peer_tep->te_state == TS_DATA_XFER)) { 5691 /* 5692 * send ordrel ind 5693 */ 5694 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5695 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5696 peer_tep->te_state, 5697 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5698 d_mp = tl_ordrel_ind_alloc(); 5699 if (d_mp == NULL) { 5700 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5701 SL_TRACE | SL_ERROR, 5702 "tl_co_unconnect:connected:" 5703 "allocb failure")); 5704 /* 5705 * Continue with cleaning up peer as 5706 * this side may go away with the close 5707 */ 5708 TL_QENABLE(peer_tep); 5709 goto discon_peer; 5710 } 5711 peer_tep->te_state = 5712 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5713 5714 putnext(peer_tep->te_rq, d_mp); 5715 /* 5716 * Handle flow control case. This will generate 5717 * a t_discon_ind message with reason 0 if there 5718 * is data queued on the write side. 5719 */ 5720 TL_QENABLE(peer_tep); 5721 } else if (IS_COTSORD(peer_tep) && 5722 peer_tep->te_state == TS_WREQ_ORDREL) { 5723 /* 5724 * Sent an ordrel_ind. We send a discon with 5725 * with error 0 to inform that the peer is gone. 5726 */ 5727 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5728 SL_TRACE | SL_ERROR, 5729 "tl_co_unconnect: discon in state %d", 5730 tep->te_state)); 5731 tl_discon_ind(peer_tep, 0); 5732 } else { 5733 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5734 SL_TRACE | SL_ERROR, 5735 "tl_co_unconnect: state %d", tep->te_state)); 5736 tl_discon_ind(peer_tep, ECONNRESET); 5737 } 5738 5739 discon_peer: 5740 /* 5741 * Disconnect cross-pointers only for close 5742 */ 5743 if (tep->te_closing) { 5744 peer_tep = tep->te_conp; 5745 TL_REMOVE_PEER(peer_tep->te_conp); 5746 TL_REMOVE_PEER(tep->te_conp); 5747 } 5748 } 5749 } 5750 5751 /* 5752 * Note: The following routine does not recover from allocb() 5753 * failures 5754 * The reason should be from the <sys/errno.h> space. 5755 */ 5756 static void 5757 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5758 { 5759 mblk_t *d_mp; 5760 5761 if (tep->te_closing) 5762 return; 5763 5764 /* 5765 * flush the queues. 5766 */ 5767 flushq(tep->te_rq, FLUSHDATA); 5768 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5769 5770 /* 5771 * send discon ind 5772 */ 5773 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5774 if (d_mp == NULL) { 5775 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR, 5776 "tl_discon_ind:allocb failure")); 5777 return; 5778 } 5779 tep->te_state = TS_IDLE; 5780 putnext(tep->te_rq, d_mp); 5781 } 5782 5783 /* 5784 * Note: The following routine does not recover from allocb() 5785 * failures 5786 * The reason should be from the <sys/errno.h> space. 5787 */ 5788 static mblk_t * 5789 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5790 { 5791 mblk_t *mp; 5792 struct T_discon_ind *tdi; 5793 5794 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5795 DB_TYPE(mp) = M_PROTO; 5796 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5797 tdi = (struct T_discon_ind *)mp->b_rptr; 5798 tdi->PRIM_type = T_DISCON_IND; 5799 tdi->DISCON_reason = reason; 5800 tdi->SEQ_number = seqnum; 5801 } 5802 return (mp); 5803 } 5804 5805 5806 /* 5807 * Note: The following routine does not recover from allocb() 5808 * failures 5809 */ 5810 static mblk_t * 5811 tl_ordrel_ind_alloc(void) 5812 { 5813 mblk_t *mp; 5814 struct T_ordrel_ind *toi; 5815 5816 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5817 DB_TYPE(mp) = M_PROTO; 5818 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5819 toi = (struct T_ordrel_ind *)mp->b_rptr; 5820 toi->PRIM_type = T_ORDREL_IND; 5821 } 5822 return (mp); 5823 } 5824 5825 5826 /* 5827 * Lookup the seqno in the list of queued connections. 5828 */ 5829 static tl_icon_t * 5830 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5831 { 5832 list_t *l = &tep->te_iconp; 5833 tl_icon_t *tip = list_head(l); 5834 5835 ASSERT(seqno != 0); 5836 5837 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5838 ; 5839 5840 return (tip); 5841 } 5842 5843 /* 5844 * Queue data for a given T_CONN_IND while verifying that redundant 5845 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5846 * Used when the originator of the connection closes. 5847 */ 5848 static void 5849 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5850 { 5851 tl_icon_t *tip; 5852 mblk_t **mpp, *mp; 5853 int prim, nprim; 5854 5855 if (nmp->b_datap->db_type == M_PROTO) 5856 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5857 else 5858 nprim = -1; /* M_DATA */ 5859 5860 tip = tl_icon_find(tep, seqno); 5861 if (tip == NULL) { 5862 freemsg(nmp); 5863 return; 5864 } 5865 5866 ASSERT(tip->ti_seqno != 0); 5867 mpp = &tip->ti_mp; 5868 while (*mpp != NULL) { 5869 mp = *mpp; 5870 5871 if (mp->b_datap->db_type == M_PROTO) 5872 prim = ((union T_primitives *)mp->b_rptr)->type; 5873 else 5874 prim = -1; /* M_DATA */ 5875 5876 /* 5877 * Allow nothing after a T_DISCON_IND 5878 */ 5879 if (prim == T_DISCON_IND) { 5880 freemsg(nmp); 5881 return; 5882 } 5883 /* 5884 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5885 */ 5886 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5887 freemsg(nmp); 5888 return; 5889 } 5890 mpp = &(mp->b_next); 5891 } 5892 *mpp = nmp; 5893 } 5894 5895 /* 5896 * Verify if a certain TPI primitive exists on the connind queue. 5897 * Use prim -1 for M_DATA. 5898 * Return non-zero if found. 5899 */ 5900 static boolean_t 5901 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5902 { 5903 tl_icon_t *tip = tl_icon_find(tep, seqno); 5904 boolean_t found = B_FALSE; 5905 5906 if (tip != NULL) { 5907 mblk_t *mp; 5908 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5909 found = (DB_TYPE(mp) == M_PROTO && 5910 ((union T_primitives *)mp->b_rptr)->type == prim); 5911 } 5912 } 5913 return (found); 5914 } 5915 5916 /* 5917 * Send the b_next mblk chain that has accumulated before the connection 5918 * was accepted. Perform the necessary state transitions. 5919 */ 5920 static void 5921 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5922 { 5923 mblk_t *mp; 5924 union T_primitives *primp; 5925 5926 if (tep->te_closing) { 5927 tl_icon_freemsgs(mpp); 5928 return; 5929 } 5930 5931 ASSERT(tep->te_state == TS_DATA_XFER); 5932 ASSERT(tep->te_rq->q_first == NULL); 5933 5934 while ((mp = *mpp) != NULL) { 5935 *mpp = mp->b_next; 5936 mp->b_next = NULL; 5937 5938 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5939 switch (DB_TYPE(mp)) { 5940 default: 5941 freemsg(mp); 5942 break; 5943 case M_DATA: 5944 putnext(tep->te_rq, mp); 5945 break; 5946 case M_PROTO: 5947 primp = (union T_primitives *)mp->b_rptr; 5948 switch (primp->type) { 5949 case T_UNITDATA_IND: 5950 case T_DATA_IND: 5951 case T_OPTDATA_IND: 5952 case T_EXDATA_IND: 5953 putnext(tep->te_rq, mp); 5954 break; 5955 case T_ORDREL_IND: 5956 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5957 tep->te_state); 5958 putnext(tep->te_rq, mp); 5959 break; 5960 case T_DISCON_IND: 5961 tep->te_state = TS_IDLE; 5962 putnext(tep->te_rq, mp); 5963 break; 5964 default: 5965 #ifdef DEBUG 5966 cmn_err(CE_PANIC, 5967 "tl_icon_sendmsgs: unknown primitive"); 5968 #endif /* DEBUG */ 5969 freemsg(mp); 5970 break; 5971 } 5972 break; 5973 } 5974 } 5975 } 5976 5977 /* 5978 * Free the b_next mblk chain that has accumulated before the connection 5979 * was accepted. 5980 */ 5981 static void 5982 tl_icon_freemsgs(mblk_t **mpp) 5983 { 5984 mblk_t *mp; 5985 5986 while ((mp = *mpp) != NULL) { 5987 *mpp = mp->b_next; 5988 mp->b_next = NULL; 5989 freemsg(mp); 5990 } 5991 } 5992 5993 /* 5994 * Send M_ERROR 5995 * Note: assumes caller ensured enough space in mp or enough 5996 * memory available. Does not attempt recovery from allocb() 5997 * failures 5998 */ 5999 6000 static void 6001 tl_merror(queue_t *wq, mblk_t *mp, int error) 6002 { 6003 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6004 6005 if (tep->te_closing) { 6006 freemsg(mp); 6007 return; 6008 } 6009 6010 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6011 SL_TRACE | SL_ERROR, 6012 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 6013 6014 /* 6015 * flush all messages on queue. we are shutting 6016 * the stream down on fatal error 6017 */ 6018 flushq(wq, FLUSHALL); 6019 if (IS_COTS(tep)) { 6020 /* connection oriented - unconnect endpoints */ 6021 tl_co_unconnect(tep); 6022 } 6023 if (mp->b_cont) { 6024 freemsg(mp->b_cont); 6025 mp->b_cont = NULL; 6026 } 6027 6028 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 6029 freemsg(mp); 6030 mp = allocb(1, BPRI_HI); 6031 if (mp == NULL) { 6032 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6033 SL_TRACE | SL_ERROR, 6034 "tl_merror:M_PROTO: out of memory")); 6035 return; 6036 } 6037 } 6038 if (mp) { 6039 DB_TYPE(mp) = M_ERROR; 6040 mp->b_rptr = DB_BASE(mp); 6041 *mp->b_rptr = (char)error; 6042 mp->b_wptr = mp->b_rptr + sizeof (char); 6043 qreply(wq, mp); 6044 } else { 6045 (void) putnextctl1(tep->te_rq, M_ERROR, error); 6046 } 6047 } 6048 6049 static void 6050 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 6051 { 6052 ASSERT(cr != NULL); 6053 6054 if (flag & TL_SETCRED) { 6055 struct opthdr *opt = (struct opthdr *)buf; 6056 tl_credopt_t *tlcred; 6057 6058 opt->level = TL_PROT_LEVEL; 6059 opt->name = TL_OPT_PEER_CRED; 6060 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 6061 6062 tlcred = (tl_credopt_t *)(opt + 1); 6063 tlcred->tc_uid = crgetuid(cr); 6064 tlcred->tc_gid = crgetgid(cr); 6065 tlcred->tc_ruid = crgetruid(cr); 6066 tlcred->tc_rgid = crgetrgid(cr); 6067 tlcred->tc_suid = crgetsuid(cr); 6068 tlcred->tc_sgid = crgetsgid(cr); 6069 tlcred->tc_ngroups = crgetngroups(cr); 6070 } else if (flag & TL_SETUCRED) { 6071 struct opthdr *opt = (struct opthdr *)buf; 6072 6073 opt->level = TL_PROT_LEVEL; 6074 opt->name = TL_OPT_PEER_UCRED; 6075 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); 6076 6077 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 6078 } else { 6079 struct T_opthdr *topt = (struct T_opthdr *)buf; 6080 ASSERT(flag & TL_SOCKUCRED); 6081 6082 topt->level = SOL_SOCKET; 6083 topt->name = SCM_UCRED; 6084 topt->len = ucredminsize(cr) + sizeof (*topt); 6085 topt->status = 0; 6086 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 6087 } 6088 } 6089 6090 /* ARGSUSED */ 6091 static int 6092 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6093 { 6094 /* no default value processed in protocol specific code currently */ 6095 return (-1); 6096 } 6097 6098 /* ARGSUSED */ 6099 static int 6100 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6101 { 6102 int len; 6103 tl_endpt_t *tep; 6104 int *valp; 6105 6106 tep = (tl_endpt_t *)wq->q_ptr; 6107 6108 len = 0; 6109 6110 /* 6111 * Assumes: option level and name sanity check done elsewhere 6112 */ 6113 6114 switch (level) { 6115 case SOL_SOCKET: 6116 if (!IS_SOCKET(tep)) 6117 break; 6118 switch (name) { 6119 case SO_RECVUCRED: 6120 len = sizeof (int); 6121 valp = (int *)ptr; 6122 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6123 break; 6124 default: 6125 break; 6126 } 6127 break; 6128 case TL_PROT_LEVEL: 6129 switch (name) { 6130 case TL_OPT_PEER_CRED: 6131 case TL_OPT_PEER_UCRED: 6132 /* 6133 * option not supposed to retrieved directly 6134 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6135 * when some internal flags set by other options 6136 * Direct retrieval always designed to fail(ignored) 6137 * for this option. 6138 */ 6139 break; 6140 } 6141 } 6142 return (len); 6143 } 6144 6145 /* ARGSUSED */ 6146 static int 6147 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen, 6148 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs, 6149 cred_t *cr) 6150 { 6151 int error; 6152 tl_endpt_t *tep; 6153 6154 tep = (tl_endpt_t *)wq->q_ptr; 6155 6156 error = 0; /* NOERROR */ 6157 6158 /* 6159 * Assumes: option level and name sanity checks done elsewhere 6160 */ 6161 6162 switch (level) { 6163 case SOL_SOCKET: 6164 if (!IS_SOCKET(tep)) { 6165 error = EINVAL; 6166 break; 6167 } 6168 /* 6169 * TBD: fill in other AF_UNIX socket options and then stop 6170 * returning error. 6171 */ 6172 switch (name) { 6173 case SO_RECVUCRED: 6174 /* 6175 * We only support this for datagram sockets; 6176 * getpeerucred handles the connection oriented 6177 * transports. 6178 */ 6179 if (!IS_CLTS(tep)) { 6180 error = EINVAL; 6181 break; 6182 } 6183 if (*(int *)invalp == 0) 6184 tep->te_flag &= ~TL_SOCKUCRED; 6185 else 6186 tep->te_flag |= TL_SOCKUCRED; 6187 break; 6188 default: 6189 error = EINVAL; 6190 break; 6191 } 6192 break; 6193 case TL_PROT_LEVEL: 6194 switch (name) { 6195 case TL_OPT_PEER_CRED: 6196 case TL_OPT_PEER_UCRED: 6197 /* 6198 * option not supposed to be set directly 6199 * Its value in initialized for each endpoint at 6200 * driver open time. 6201 * Direct setting always designed to fail for this 6202 * option. 6203 */ 6204 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6205 SL_TRACE | SL_ERROR, 6206 "tl_set_opt: option is not supported")); 6207 error = EPROTO; 6208 break; 6209 } 6210 } 6211 return (error); 6212 } 6213 6214 6215 static void 6216 tl_timer(void *arg) 6217 { 6218 queue_t *wq = arg; 6219 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6220 6221 ASSERT(tep); 6222 6223 tep->te_timoutid = 0; 6224 6225 enableok(wq); 6226 /* 6227 * Note: can call wsrv directly here and save context switch 6228 * Consider change when qtimeout (not timeout) is active 6229 */ 6230 qenable(wq); 6231 } 6232 6233 static void 6234 tl_buffer(void *arg) 6235 { 6236 queue_t *wq = arg; 6237 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6238 6239 ASSERT(tep); 6240 6241 tep->te_bufcid = 0; 6242 tep->te_nowsrv = B_FALSE; 6243 6244 enableok(wq); 6245 /* 6246 * Note: can call wsrv directly here and save context switch 6247 * Consider change when qbufcall (not bufcall) is active 6248 */ 6249 qenable(wq); 6250 } 6251 6252 static void 6253 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6254 { 6255 tl_endpt_t *tep; 6256 6257 tep = (tl_endpt_t *)wq->q_ptr; 6258 6259 if (tep->te_closing) { 6260 freemsg(mp); 6261 return; 6262 } 6263 noenable(wq); 6264 6265 (void) insq(wq, wq->q_first, mp); 6266 6267 if (tep->te_bufcid || tep->te_timoutid) { 6268 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR, 6269 "tl_memrecover:recover %p pending", (void *)wq)); 6270 return; 6271 } 6272 6273 tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq); 6274 if (tep->te_bufcid == NULL) { 6275 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6276 drv_usectohz(TL_BUFWAIT)); 6277 } 6278 } 6279 6280 static void 6281 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6282 { 6283 ASSERT(tip->ti_seqno != 0); 6284 6285 if (tip->ti_mp != NULL) { 6286 tl_icon_freemsgs(&tip->ti_mp); 6287 tip->ti_mp = NULL; 6288 } 6289 if (tip->ti_tep != NULL) { 6290 tl_refrele(tip->ti_tep); 6291 tip->ti_tep = NULL; 6292 } 6293 list_remove(&tep->te_iconp, tip); 6294 kmem_free(tip, sizeof (tl_icon_t)); 6295 tep->te_nicon--; 6296 } 6297 6298 /* 6299 * Remove address from address hash. 6300 */ 6301 static void 6302 tl_addr_unbind(tl_endpt_t *tep) 6303 { 6304 tl_endpt_t *elp; 6305 6306 if (tep->te_flag & TL_ADDRHASHED) { 6307 if (IS_SOCKET(tep)) { 6308 (void) mod_hash_remove(tep->te_addrhash, 6309 (mod_hash_key_t)tep->te_vp, 6310 (mod_hash_val_t *)&elp); 6311 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6312 tep->te_magic = SOU_MAGIC_IMPLICIT; 6313 } else { 6314 (void) mod_hash_remove(tep->te_addrhash, 6315 (mod_hash_key_t)&tep->te_ap, 6316 (mod_hash_val_t *)&elp); 6317 (void) kmem_free(tep->te_abuf, tep->te_alen); 6318 tep->te_alen = -1; 6319 tep->te_abuf = NULL; 6320 } 6321 tep->te_flag &= ~TL_ADDRHASHED; 6322 } 6323 } 6324