1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27 * Copyright (c) 2012 by Delphix. All rights reserved. 28 * Copyright 2015 Joyent, Inc. 29 */ 30 31 /* 32 * Multithreaded STREAMS Local Transport Provider. 33 * 34 * OVERVIEW 35 * ======== 36 * 37 * This driver provides TLI as well as socket semantics. It provides 38 * connectionless, connection oriented, and connection oriented with orderly 39 * release transports for TLI and sockets. Each transport type has separate name 40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 41 * this removes any name space conflicts when binding to socket style transport 42 * addresses. 43 * 44 * NOTE: There is one exception: Socket ticots and ticotsord transports share 45 * the same namespace. In fact, sockets always use ticotsord type transport. 46 * 47 * The driver mode is specified during open() by the minor number used for 48 * open. 49 * 50 * The sockets in addition have the following semantic differences: 51 * No support for passing up credentials (TL_SET[U]CRED). 52 * 53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 55 * T_OPTDATA_IND. 56 * 57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 58 * a T_CONN_RES is received from the acceptor. This means that a socket 59 * connect will complete before the peer has called accept. 60 * 61 * 62 * MULTITHREADING 63 * ============== 64 * 65 * The driver does not use STREAMS protection mechanisms. Instead it uses a 66 * generic "serializer" abstraction. Most of the operations are executed behind 67 * the serializer and are, essentially single-threaded. All functions executed 68 * behind the same serializer are strictly serialized. So if one thread calls 69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 73 * same time. 74 * 75 * Connectionless transport use a single serializer per transport type (one for 76 * TLI and one for sockets. Connection-oriented transports use finer-grained 77 * serializers. 78 * 79 * All COTS-type endpoints start their life with private serializers. During 80 * connection request processing the endpoint serializer is switched to the 81 * listener's serializer and the rest of T_CONN_REQ processing is done on the 82 * listener serializer. During T_CONN_RES processing the eager serializer is 83 * switched from listener to acceptor serializer and after that point all 84 * processing for eager and acceptor happens on this serializer. To avoid races 85 * with endpoint closes while its serializer may be changing closes are blocked 86 * while serializers are manipulated. 87 * 88 * References accounting 89 * --------------------- 90 * 91 * Endpoints are reference counted and freed when the last reference is 92 * dropped. Functions within the serializer may access an endpoint state even 93 * after an endpoint closed. The te_closing being set on the endpoint indicates 94 * that the endpoint entered its close routine. 95 * 96 * One reference is held for each opened endpoint instance. The reference 97 * counter is incremented when the endpoint is linked to another endpoint and 98 * decremented when the link disappears. It is also incremented when the 99 * endpoint is found by the hash table lookup. This increment is atomic with the 100 * lookup itself and happens while the hash table read lock is held. 101 * 102 * Close synchronization 103 * --------------------- 104 * 105 * During close the endpoint as marked as closing using te_closing flag. It is 106 * usually enough to check for te_closing flag since all other state changes 107 * happen after this flag is set and the close entered serializer. Immediately 108 * after setting te_closing flag tl_close() enters serializer and waits until 109 * the callback finishes. This allows all functions called within serializer to 110 * simply check te_closing without any locks. 111 * 112 * Serializer management. 113 * --------------------- 114 * 115 * For COTS transports serializers are created when the endpoint is constructed 116 * and destroyed when the endpoint is destructed. CLTS transports use global 117 * serializers - one for sockets and one for TLI. 118 * 119 * COTS serializers have separate reference counts to deal with several 120 * endpoints sharing the same serializer. There is a subtle problem related to 121 * the serializer destruction. The serializer should never be destroyed by any 122 * function executed inside serializer. This means that close has to wait till 123 * all serializer activity for this endpoint is finished before it can drop the 124 * last reference on the endpoint (which may as well free the serializer). This 125 * is only relevant for COTS transports which manage serializers 126 * dynamically. For CLTS transports close may complete without waiting for all 127 * serializer activity to finish since serializer is only destroyed at driver 128 * detach time. 129 * 130 * COTS endpoints keep track of the number of outstanding requests on the 131 * serializer for the endpoint. The code handling accept() avoids changing 132 * client serializer if it has any pending messages on the serializer and 133 * instead moves acceptor to listener's serializer. 134 * 135 * 136 * Use of hash tables 137 * ------------------ 138 * 139 * The driver uses modhash hash table implementation. Each transport uses two 140 * hash tables - one for finding endpoints by acceptor ID and another one for 141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 142 * pair of hash tables since sockets only use TICOTSORD. 143 * 144 * All hash tables lookups increment a reference count for returned endpoints, 145 * so we may safely check the endpoint state even when the endpoint is removed 146 * from the hash by another thread immediately after it is found. 147 * 148 * 149 * CLOSE processing 150 * ================ 151 * 152 * The driver enters serializer twice on close(). The close sequence is the 153 * following: 154 * 155 * 1) Wait until closing is safe (te_closewait becomes zero) 156 * This step is needed to prevent close during serializer switches. In most 157 * cases (close happening after connection establishment) te_closewait is 158 * zero. 159 * 1) Set te_closing. 160 * 2) Call tl_close_ser() within serializer and wait for it to complete. 161 * 162 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 163 * It also needs to clear write-side q_next pointers - this should be done 164 * before qprocsoff(). 165 * 166 * This synchronous serializer entry during close is needed to ensure that 167 * the queue is valid everywhere inside the serializer. 168 * 169 * Note that in many cases close will execute tl_close_ser() synchronously, 170 * so it will not wait at all. 171 * 172 * 3) Calls qprocsoff(). 173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 174 * complete (for COTS transports). For CLTS transport there is no wait. 175 * 176 * tl_close_finish_ser() Finishes the close process and wakes up waiting 177 * close if there is any. 178 * 179 * Note that in most cases close will enter te_close_ser_finish() 180 * synchronously and will not wait at all. 181 * 182 * 183 * Flow Control 184 * ============ 185 * 186 * The driver implements both read and write side service routines. No one calls 187 * putq() on the read queue. The read side service routine tl_rsrv() is called 188 * when the read side stream is back-enabled. It enters serializer synchronously 189 * (waits till serializer processing is complete). Within serializer it 190 * back-enables all endpoints blocked by the queue for connection-less 191 * transports and enables write side service processing for the peer for 192 * connection-oriented transports. 193 * 194 * Read and write side service routines use special mblk_sized space in the 195 * endpoint structure to enter perimeter. 196 * 197 * Write-side flow control 198 * ----------------------- 199 * 200 * Write side flow control is a bit tricky. The driver needs to deal with two 201 * message queues - the explicit STREAMS message queue maintained by 202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 203 * queues should be synchronized to preserve message ordering and should 204 * maintain a single order determined by the order in which messages enter 205 * tl_wput(). In order to maintain the ordering between these two queues the 206 * STREAMS queue is only manipulated within the serializer, so the ordering is 207 * provided by the serializer. 208 * 209 * Functions called from the tl_wsrv() sometimes may call putbq(). To 210 * immediately stop any further processing of the STREAMS message queues the 211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 212 * side service processing stops when the flag is set. 213 * 214 * The tl_wsrv() function enters serializer synchronously and waits for it to 215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 218 * always bounded by the amount of messages on the STREAMS queue at the time 219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 220 * queue from another serialized entry which can't happen in parallel. This 221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 222 * of it draining forever while writer places new messages on the STREAMS 223 * queue). 224 * 225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 226 * 227 * 228 * Unix Domain Sockets 229 * =================== 230 * 231 * The driver knows the structure of Unix Domain sockets addresses and treats 232 * them differently from generic TLI addresses. For sockets implicit binds are 233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 234 * instead of using address length of zero. Explicit binds specify 235 * SOU_MAGIC_EXPLICIT as magic. 236 * 237 * For implicit binds we always use minor number as soua_vp part of the address 238 * and avoid any hash table lookups. This saves two hash tables lookups per 239 * anonymous bind. 240 * 241 * For explicit address we hash the vnode pointer instead of hashing the 242 * full-scale address+zone+length. Hashing by pointer is more efficient then 243 * hashing by the full address. 244 * 245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 246 * tep structure, so it should be never freed. 247 * 248 * Also for sockets the driver always uses minor number as acceptor id. 249 * 250 * TPI VIOLATIONS 251 * -------------- 252 * 253 * This driver violates TPI in several respects for Unix Domain Sockets: 254 * 255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 256 * is requested and the endpoint is already in use. There is no point in 257 * generating an unused address since this address will be rejected by 258 * sockfs anyway. For implicit binds it always generates a new address 259 * (sets soua_vp to its minor number). 260 * 261 * 2) It always uses minor number as acceptor ID and never uses queue 262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 263 * message and they do not use the queue pointer. 264 * 265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 266 * followed by listen(). The listen() should be issued with non-zero 267 * backlog, so sotpi_listen() issues unbind request followed by bind 268 * request to the same address but with a non-zero qlen value. Both 269 * tl_bind() and tl_unbind() require write lock on the hash table to 270 * insert/remove the address. The driver does not remove the address from 271 * the hash for endpoints that are bound to the explicit address and have 272 * backlog of zero. During T_BIND_REQ processing if the address requested 273 * is equal to the address the endpoint already has it updates the backlog 274 * without reinserting the address in the hash table. This optimization 275 * avoids two hash table updates for each listener created. It always 276 * avoids the problem of a "stolen" address when another listener may use 277 * the same address between the unbind and bind and suddenly listen() fails 278 * because address is in use even though the bind() succeeded. 279 * 280 * 281 * CONNECTIONLESS TRANSPORTS 282 * ========================= 283 * 284 * Connectionless transports all share the same serializer (one for TLI and one 285 * for Sockets). Functions executing behind serializer can check or modify state 286 * of any endpoint. 287 * 288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 289 * te_lastep field. The next time X talks to some address A it checks whether A 290 * is the same as Y's address and if it is there is no need to lookup Y. If the 291 * address is different or the state of Y is not appropriate (e.g. closed or not 292 * idle) X does a lookup using tl_find_peer() and caches the new address. 293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 294 * on the endpoint found. 295 * 296 * During close of endpoint Y it doesn't try to remove itself from other 297 * endpoints caches. They will detect that Y is gone and will search the peer 298 * endpoint again. 299 * 300 * Flow Control Handling. 301 * ---------------------- 302 * 303 * Each connectionless endpoint keeps a list of endpoints which are 304 * flow-controlled by its queue. It also keeps a pointer to the queue which 305 * flow-controls itself. Whenever flow control releases for endpoint X it 306 * enables all queues from the list. During close it also back-enables everyone 307 * in the list. If X is flow-controlled when it is closing it removes it from 308 * the peers list. 309 * 310 * DATA STRUCTURES 311 * =============== 312 * 313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 314 * endpoint state. For connection-oriented transports it has a keeps a list 315 * of pending connections (tl_icon_t). For connectionless transports it keeps a 316 * list of endpoints flow controlled by this one. 317 * 318 * Each transport type is represented by a per-transport data structure 319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 320 * endpoint address hash tables for each transport. It also contains pointer to 321 * transport serializer for connectionless transports. 322 * 323 * Each endpoint keeps a link to its transport structure, so the code can find 324 * all per-transport information quickly. 325 */ 326 327 #include <sys/types.h> 328 #include <sys/inttypes.h> 329 #include <sys/stream.h> 330 #include <sys/stropts.h> 331 #define _SUN_TPI_VERSION 2 332 #include <sys/tihdr.h> 333 #include <sys/strlog.h> 334 #include <sys/debug.h> 335 #include <sys/cred.h> 336 #include <sys/errno.h> 337 #include <sys/kmem.h> 338 #include <sys/id_space.h> 339 #include <sys/modhash.h> 340 #include <sys/mkdev.h> 341 #include <sys/tl.h> 342 #include <sys/stat.h> 343 #include <sys/conf.h> 344 #include <sys/modctl.h> 345 #include <sys/strsun.h> 346 #include <sys/socket.h> 347 #include <sys/socketvar.h> 348 #include <sys/sysmacros.h> 349 #include <sys/xti_xtiopt.h> 350 #include <sys/ddi.h> 351 #include <sys/sunddi.h> 352 #include <sys/zone.h> 353 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 354 #include <inet/optcom.h> 355 #include <sys/strsubr.h> 356 #include <sys/ucred.h> 357 #include <sys/suntpi.h> 358 #include <sys/list.h> 359 #include <sys/serializer.h> 360 361 /* 362 * TBD List 363 * 14 Eliminate state changes through table 364 * 16. AF_UNIX socket options 365 * 17. connect() for ticlts 366 * 18. support for "netstat" to show AF_UNIX plus TLI local 367 * transport connections 368 * 21. sanity check to flushing on sending M_ERROR 369 */ 370 371 /* 372 * CONSTANT DECLARATIONS 373 * -------------------- 374 */ 375 376 /* 377 * Local declarations 378 */ 379 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 380 381 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 382 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 383 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 384 /* 385 * Hash tables size. 386 */ 387 #define TL_HASH_SIZE 311 388 389 /* 390 * Definitions for module_info 391 */ 392 #define TL_ID (104) /* module ID number */ 393 #define TL_NAME "tl" /* module name */ 394 #define TL_MINPSZ (0) /* min packet size */ 395 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 396 #define TL_HIWAT (16*1024) /* hi water mark */ 397 #define TL_LOWAT (256) /* lo water mark */ 398 /* 399 * Definition of minor numbers/modes for new transport provider modes. 400 * We view the socket use as a separate mode to get a separate name space. 401 */ 402 #define TL_TICOTS 0 /* connection oriented transport */ 403 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 404 #define TL_TICLTS 2 /* connectionless transport */ 405 #define TL_UNUSED 3 406 #define TL_SOCKET 4 /* Socket */ 407 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 408 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 409 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 410 411 #define TL_MINOR_MASK 0x7 412 #define TL_MINOR_START (TL_TICLTS + 1) 413 414 /* 415 * LOCAL MACROS 416 */ 417 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 418 419 /* 420 * EXTERNAL VARIABLE DECLARATIONS 421 * ----------------------------- 422 */ 423 /* 424 * state table defined in the OS space.c 425 */ 426 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 427 428 /* 429 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 430 */ 431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 432 static int tl_close(queue_t *, int, cred_t *); 433 static void tl_wput(queue_t *, mblk_t *); 434 static void tl_wsrv(queue_t *); 435 static void tl_rsrv(queue_t *); 436 437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 440 441 442 /* 443 * GLOBAL DATA STRUCTURES AND VARIABLES 444 * ----------------------------------- 445 */ 446 447 /* 448 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 449 * For now, we only manage the SO_RECVUCRED option but we also have 450 * harmless dummy options to make things work with some common code we access. 451 */ 452 opdes_t tl_opt_arr[] = { 453 /* The SO_TYPE is needed for the hack below */ 454 { 455 SO_TYPE, 456 SOL_SOCKET, 457 OA_R, 458 OA_R, 459 OP_NP, 460 0, 461 sizeof (t_scalar_t), 462 0 463 }, 464 { 465 SO_RECVUCRED, 466 SOL_SOCKET, 467 OA_RW, 468 OA_RW, 469 OP_NP, 470 0, 471 sizeof (int), 472 0 473 } 474 }; 475 476 /* 477 * Table of all supported levels 478 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 479 * any supported options so we need this info separately. 480 * 481 * This is needed only for topmost tpi providers. 482 */ 483 optlevel_t tl_valid_levels_arr[] = { 484 XTI_GENERIC, 485 SOL_SOCKET, 486 TL_PROT_LEVEL 487 }; 488 489 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 490 /* 491 * Current upper bound on the amount of space needed to return all options. 492 * Additional options with data size of sizeof(long) are handled automatically. 493 * Others need hand job. 494 */ 495 #define TL_MAX_OPT_BUF_LEN \ 496 ((A_CNT(tl_opt_arr) << 2) + \ 497 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 498 + 64 + sizeof (struct T_optmgmt_ack)) 499 500 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 501 502 /* 503 * transport addr structure 504 */ 505 typedef struct tl_addr { 506 zoneid_t ta_zoneid; /* Zone scope of address */ 507 t_scalar_t ta_alen; /* length of abuf */ 508 void *ta_abuf; /* the addr itself */ 509 } tl_addr_t; 510 511 /* 512 * Refcounted version of serializer. 513 */ 514 typedef struct tl_serializer { 515 uint_t ts_refcnt; 516 serializer_t *ts_serializer; 517 } tl_serializer_t; 518 519 /* 520 * Each transport type has a separate state. 521 * Per-transport state. 522 */ 523 typedef struct tl_transport_state { 524 char *tr_name; 525 minor_t tr_minor; 526 uint32_t tr_defaddr; 527 mod_hash_t *tr_ai_hash; 528 mod_hash_t *tr_addr_hash; 529 tl_serializer_t *tr_serializer; 530 } tl_transport_state_t; 531 532 #define TL_DFADDR 0x1000 533 534 static tl_transport_state_t tl_transports[] = { 535 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 536 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 537 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 538 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 539 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 540 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 541 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 542 }; 543 544 #define TL_MAXTRANSPORT A_CNT(tl_transports) 545 546 struct tl_endpt; 547 typedef struct tl_endpt tl_endpt_t; 548 549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 550 551 /* 552 * Data structure used to represent pending connects. 553 * Records enough information so that the connecting peer can close 554 * before the connection gets accepted. 555 */ 556 typedef struct tl_icon { 557 list_node_t ti_node; 558 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 559 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 560 t_scalar_t ti_seqno; /* Sequence number */ 561 } tl_icon_t; 562 563 typedef struct so_ux_addr soux_addr_t; 564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 565 566 /* 567 * Maximum number of unaccepted connection indications allowed per listener. 568 */ 569 #define TL_MAXQLEN 4096 570 int tl_maxqlen = TL_MAXQLEN; 571 572 /* 573 * transport endpoint structure 574 */ 575 struct tl_endpt { 576 queue_t *te_rq; /* stream read queue */ 577 queue_t *te_wq; /* stream write queue */ 578 uint32_t te_refcnt; 579 int32_t te_state; /* TPI state of endpoint */ 580 minor_t te_minor; /* minor number */ 581 #define te_seqno te_minor 582 uint_t te_flag; /* flag field */ 583 boolean_t te_nowsrv; 584 tl_serializer_t *te_ser; /* Serializer to use */ 585 #define te_serializer te_ser->ts_serializer 586 587 soux_addr_t te_uxaddr; /* Socket address */ 588 #define te_magic te_uxaddr.soua_magic 589 #define te_vp te_uxaddr.soua_vp 590 tl_addr_t te_ap; /* addr bound to this endpt */ 591 #define te_zoneid te_ap.ta_zoneid 592 #define te_alen te_ap.ta_alen 593 #define te_abuf te_ap.ta_abuf 594 595 tl_transport_state_t *te_transport; 596 #define te_addrhash te_transport->tr_addr_hash 597 #define te_aihash te_transport->tr_ai_hash 598 #define te_defaddr te_transport->tr_defaddr 599 cred_t *te_credp; /* endpoint user credentials */ 600 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 601 602 /* 603 * State specific for connection-oriented and connectionless transports. 604 */ 605 union { 606 /* Connection-oriented state. */ 607 struct { 608 t_uscalar_t _te_nicon; /* count of conn requests */ 609 t_uscalar_t _te_qlen; /* max conn requests */ 610 tl_endpt_t *_te_oconp; /* conn request pending */ 611 tl_endpt_t *_te_conp; /* connected endpt */ 612 #ifndef _ILP32 613 void *_te_pad; 614 #endif 615 list_t _te_iconp; /* list of conn ind. pending */ 616 } _te_cots_state; 617 /* Connection-less state. */ 618 struct { 619 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 620 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 621 list_node_t _te_flows; /* lists of connections */ 622 list_t _te_flowlist; /* Who flowcontrols on me */ 623 } _te_clts_state; 624 } _te_transport_state; 625 #define te_nicon _te_transport_state._te_cots_state._te_nicon 626 #define te_qlen _te_transport_state._te_cots_state._te_qlen 627 #define te_oconp _te_transport_state._te_cots_state._te_oconp 628 #define te_conp _te_transport_state._te_cots_state._te_conp 629 #define te_iconp _te_transport_state._te_cots_state._te_iconp 630 #define te_lastep _te_transport_state._te_clts_state._te_lastep 631 #define te_flowq _te_transport_state._te_clts_state._te_flowq 632 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 633 #define te_flows _te_transport_state._te_clts_state._te_flows 634 635 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 636 timeout_id_t te_timoutid; /* outstanding timeout id */ 637 pid_t te_cpid; /* cached pid of endpoint */ 638 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 639 /* 640 * Pieces of the endpoint state needed for closing. 641 */ 642 kmutex_t te_closelock; 643 kcondvar_t te_closecv; 644 uint8_t te_closing; /* The endpoint started closing */ 645 uint8_t te_closewait; /* Wait in close until zero */ 646 mblk_t te_closemp; /* for entering serializer on close */ 647 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 648 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 649 kmutex_t te_srv_lock; 650 kcondvar_t te_srv_cv; 651 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 652 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 653 /* 654 * Pieces of the endpoint state needed for serializer transitions. 655 */ 656 kmutex_t te_ser_lock; /* Protects the count below */ 657 uint_t te_ser_count; /* Number of messages on serializer */ 658 }; 659 660 /* 661 * Flag values. Lower 4 bits specify that transport used. 662 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 663 * they allow to identify the endpoint more easily. 664 */ 665 #define TL_LISTENER 0x00010 /* the listener endpoint */ 666 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 667 #define TL_EAGER 0x00040 /* connecting endpoint */ 668 #define TL_ACCEPTED 0x00080 /* accepted connection */ 669 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 670 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 671 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 672 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 673 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 674 /* 675 * Boolean checks for the endpoint type. 676 */ 677 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 678 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 679 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 680 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 681 682 /* 683 * Certain operations are always used together. These macros reduce the chance 684 * of missing a part of a combination. 685 */ 686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 688 689 #define TL_PUTBQ(x, mp) { \ 690 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 691 (x)->te_nowsrv = B_TRUE; \ 692 (void) putbq((x)->te_wq, mp); \ 693 } 694 695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 697 698 /* 699 * STREAMS driver glue data structures. 700 */ 701 static struct module_info tl_minfo = { 702 TL_ID, /* mi_idnum */ 703 TL_NAME, /* mi_idname */ 704 TL_MINPSZ, /* mi_minpsz */ 705 TL_MAXPSZ, /* mi_maxpsz */ 706 TL_HIWAT, /* mi_hiwat */ 707 TL_LOWAT /* mi_lowat */ 708 }; 709 710 static struct qinit tl_rinit = { 711 NULL, /* qi_putp */ 712 (int (*)())tl_rsrv, /* qi_srvp */ 713 tl_open, /* qi_qopen */ 714 tl_close, /* qi_qclose */ 715 NULL, /* qi_qadmin */ 716 &tl_minfo, /* qi_minfo */ 717 NULL /* qi_mstat */ 718 }; 719 720 static struct qinit tl_winit = { 721 (int (*)())tl_wput, /* qi_putp */ 722 (int (*)())tl_wsrv, /* qi_srvp */ 723 NULL, /* qi_qopen */ 724 NULL, /* qi_qclose */ 725 NULL, /* qi_qadmin */ 726 &tl_minfo, /* qi_minfo */ 727 NULL /* qi_mstat */ 728 }; 729 730 static struct streamtab tlinfo = { 731 &tl_rinit, /* st_rdinit */ 732 &tl_winit, /* st_wrinit */ 733 NULL, /* st_muxrinit */ 734 NULL /* st_muxwrinit */ 735 }; 736 737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 738 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 739 740 static struct modldrv modldrv = { 741 &mod_driverops, /* Type of module -- pseudo driver here */ 742 "TPI Local Transport (tl)", 743 &tl_devops, /* driver ops */ 744 }; 745 746 /* 747 * Module linkage information for the kernel. 748 */ 749 static struct modlinkage modlinkage = { 750 MODREV_1, 751 &modldrv, 752 NULL 753 }; 754 755 /* 756 * Templates for response to info request 757 * Check sanity of unlimited connect data etc. 758 */ 759 760 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 761 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 762 763 static struct T_info_ack tl_cots_info_ack = 764 { 765 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 766 T_INFINITE, /* TSDU size */ 767 T_INFINITE, /* ETSDU size */ 768 T_INFINITE, /* CDATA_size */ 769 T_INFINITE, /* DDATA_size */ 770 T_INFINITE, /* ADDR_size */ 771 T_INFINITE, /* OPT_size */ 772 0, /* TIDU_size - fill at run time */ 773 T_COTS, /* SERV_type */ 774 -1, /* CURRENT_state */ 775 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 776 }; 777 778 static struct T_info_ack tl_clts_info_ack = 779 { 780 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 781 0, /* TSDU_size - fill at run time */ 782 -2, /* ETSDU_size -2 => not supported */ 783 -2, /* CDATA_size -2 => not supported */ 784 -2, /* DDATA_size -2 => not supported */ 785 -1, /* ADDR_size -1 => infinite */ 786 -1, /* OPT_size */ 787 0, /* TIDU_size - fill at run time */ 788 T_CLTS, /* SERV_type */ 789 -1, /* CURRENT_state */ 790 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 791 }; 792 793 /* 794 * private copy of devinfo pointer used in tl_info 795 */ 796 static dev_info_t *tl_dip; 797 798 /* 799 * Endpoints cache. 800 */ 801 static kmem_cache_t *tl_cache; 802 /* 803 * Minor number space. 804 */ 805 static id_space_t *tl_minors; 806 807 /* 808 * Default Data Unit size. 809 */ 810 static t_scalar_t tl_tidusz; 811 812 /* 813 * Size of hash tables. 814 */ 815 static size_t tl_hash_size = TL_HASH_SIZE; 816 817 /* 818 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 819 * for sockets. 820 */ 821 static int tl_disable_early_connect = 0; 822 static int tl_client_closing_when_accepting; 823 824 static int tl_serializer_noswitch; 825 826 /* 827 * LOCAL FUNCTION PROTOTYPES 828 * ------------------------- 829 */ 830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 831 static void tl_do_proto(mblk_t *, tl_endpt_t *); 832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 835 t_scalar_t); 836 static void tl_bind(mblk_t *, tl_endpt_t *); 837 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 838 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 839 static void tl_unbind(mblk_t *, tl_endpt_t *); 840 static void tl_optmgmt(queue_t *, mblk_t *); 841 static void tl_conn_req(queue_t *, mblk_t *); 842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 843 static void tl_conn_res(mblk_t *, tl_endpt_t *); 844 static void tl_discon_req(mblk_t *, tl_endpt_t *); 845 static void tl_capability_req(mblk_t *, tl_endpt_t *); 846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *); 848 static void tl_info_req(mblk_t *, tl_endpt_t *); 849 static void tl_addr_req(mblk_t *, tl_endpt_t *); 850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 851 static void tl_data(mblk_t *, tl_endpt_t *); 852 static void tl_exdata(mblk_t *, tl_endpt_t *); 853 static void tl_ordrel(mblk_t *, tl_endpt_t *); 854 static void tl_unitdata(mblk_t *, tl_endpt_t *); 855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 860 static void tl_cl_backenable(tl_endpt_t *); 861 static void tl_co_unconnect(tl_endpt_t *); 862 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 863 static void tl_discon_ind(tl_endpt_t *, uint32_t); 864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 865 static mblk_t *tl_ordrel_ind_alloc(void); 866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 870 static void tl_icon_freemsgs(mblk_t **); 871 static void tl_merror(queue_t *, mblk_t *, int); 872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 873 static int tl_default_opt(queue_t *, int, int, uchar_t *); 874 static int tl_get_opt(queue_t *, int, int, uchar_t *); 875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 876 uchar_t *, void *, cred_t *); 877 static void tl_memrecover(queue_t *, mblk_t *, size_t); 878 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 879 static void tl_free(tl_endpt_t *); 880 static int tl_constructor(void *, void *, int); 881 static void tl_destructor(void *, void *); 882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 883 static tl_serializer_t *tl_serializer_alloc(int); 884 static void tl_serializer_refhold(tl_serializer_t *); 885 static void tl_serializer_refrele(tl_serializer_t *); 886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 887 static void tl_serializer_exit(tl_endpt_t *); 888 static boolean_t tl_noclose(tl_endpt_t *); 889 static void tl_closeok(tl_endpt_t *); 890 static void tl_refhold(tl_endpt_t *); 891 static void tl_refrele(tl_endpt_t *); 892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 894 static void tl_close_ser(mblk_t *, tl_endpt_t *); 895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 897 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 898 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 900 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 903 static void tl_addr_unbind(tl_endpt_t *); 904 905 /* 906 * Intialize option database object for TL 907 */ 908 909 optdb_obj_t tl_opt_obj = { 910 tl_default_opt, /* TL default value function pointer */ 911 tl_get_opt, /* TL get function pointer */ 912 tl_set_opt, /* TL set function pointer */ 913 TL_OPT_ARR_CNT, /* TL option database count of entries */ 914 tl_opt_arr, /* TL option database */ 915 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 916 tl_valid_levels_arr /* TL valid level array */ 917 }; 918 919 /* 920 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 921 * --------------------------------------- 922 */ 923 924 /* 925 * Loadable module routines 926 */ 927 int 928 _init(void) 929 { 930 return (mod_install(&modlinkage)); 931 } 932 933 int 934 _fini(void) 935 { 936 return (mod_remove(&modlinkage)); 937 } 938 939 int 940 _info(struct modinfo *modinfop) 941 { 942 return (mod_info(&modlinkage, modinfop)); 943 } 944 945 /* 946 * Driver Entry Points and Other routines 947 */ 948 static int 949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 950 { 951 int i; 952 char name[32]; 953 954 /* 955 * Resume from a checkpoint state. 956 */ 957 if (cmd == DDI_RESUME) 958 return (DDI_SUCCESS); 959 960 if (cmd != DDI_ATTACH) 961 return (DDI_FAILURE); 962 963 /* 964 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 965 * streams message sizes can be unlimited. We use a defined constant 966 * instead. 967 */ 968 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 969 970 /* 971 * Create subdevices for each transport. 972 */ 973 for (i = 0; i < TL_UNUSED; i++) { 974 if (ddi_create_minor_node(devi, 975 tl_transports[i].tr_name, 976 S_IFCHR, tl_transports[i].tr_minor, 977 DDI_PSEUDO, NULL) == DDI_FAILURE) { 978 ddi_remove_minor_node(devi, NULL); 979 return (DDI_FAILURE); 980 } 981 } 982 983 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 984 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 985 986 if (tl_cache == NULL) { 987 ddi_remove_minor_node(devi, NULL); 988 return (DDI_FAILURE); 989 } 990 991 tl_minors = id_space_create("tl_minor_space", 992 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 993 994 /* 995 * Create ID space for minor numbers 996 */ 997 for (i = 0; i < TL_MAXTRANSPORT; i++) { 998 tl_transport_state_t *t = &tl_transports[i]; 999 1000 if (i == TL_UNUSED) 1001 continue; 1002 1003 /* Socket COTSORD shares namespace with COTS */ 1004 if (i == TL_SOCK_COTSORD) { 1005 t->tr_ai_hash = 1006 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1007 ASSERT(t->tr_ai_hash != NULL); 1008 t->tr_addr_hash = 1009 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1010 ASSERT(t->tr_addr_hash != NULL); 1011 continue; 1012 } 1013 1014 /* 1015 * Create hash tables. 1016 */ 1017 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1018 t->tr_name); 1019 #ifdef _ILP32 1020 if (i & TL_SOCKET) 1021 t->tr_ai_hash = 1022 mod_hash_create_idhash(name, tl_hash_size - 1, 1023 mod_hash_null_valdtor); 1024 else 1025 t->tr_ai_hash = 1026 mod_hash_create_ptrhash(name, tl_hash_size, 1027 mod_hash_null_valdtor, sizeof (queue_t)); 1028 #else 1029 t->tr_ai_hash = 1030 mod_hash_create_idhash(name, tl_hash_size - 1, 1031 mod_hash_null_valdtor); 1032 #endif /* _ILP32 */ 1033 1034 if (i & TL_SOCKET) { 1035 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1036 t->tr_name); 1037 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1038 tl_hash_size, mod_hash_null_valdtor, 1039 sizeof (uintptr_t)); 1040 } else { 1041 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1042 t->tr_name); 1043 t->tr_addr_hash = mod_hash_create_extended(name, 1044 tl_hash_size, mod_hash_null_keydtor, 1045 mod_hash_null_valdtor, 1046 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1047 } 1048 1049 /* Create serializer for connectionless transports. */ 1050 if (i & TL_TICLTS) 1051 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1052 } 1053 1054 tl_dip = devi; 1055 1056 return (DDI_SUCCESS); 1057 } 1058 1059 static int 1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1061 { 1062 int i; 1063 1064 if (cmd == DDI_SUSPEND) 1065 return (DDI_SUCCESS); 1066 1067 if (cmd != DDI_DETACH) 1068 return (DDI_FAILURE); 1069 1070 /* 1071 * Destroy arenas and hash tables. 1072 */ 1073 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1074 tl_transport_state_t *t = &tl_transports[i]; 1075 1076 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1077 continue; 1078 1079 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL); 1080 if (t->tr_serializer != NULL) { 1081 tl_serializer_refrele(t->tr_serializer); 1082 t->tr_serializer = NULL; 1083 } 1084 1085 #ifdef _ILP32 1086 if (i & TL_SOCKET) 1087 mod_hash_destroy_idhash(t->tr_ai_hash); 1088 else 1089 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1090 #else 1091 mod_hash_destroy_idhash(t->tr_ai_hash); 1092 #endif /* _ILP32 */ 1093 t->tr_ai_hash = NULL; 1094 if (i & TL_SOCKET) 1095 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1096 else 1097 mod_hash_destroy_hash(t->tr_addr_hash); 1098 t->tr_addr_hash = NULL; 1099 } 1100 1101 kmem_cache_destroy(tl_cache); 1102 tl_cache = NULL; 1103 id_space_destroy(tl_minors); 1104 tl_minors = NULL; 1105 ddi_remove_minor_node(devi, NULL); 1106 return (DDI_SUCCESS); 1107 } 1108 1109 /* ARGSUSED */ 1110 static int 1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1112 { 1113 1114 int retcode = DDI_FAILURE; 1115 1116 switch (infocmd) { 1117 1118 case DDI_INFO_DEVT2DEVINFO: 1119 if (tl_dip != NULL) { 1120 *result = (void *)tl_dip; 1121 retcode = DDI_SUCCESS; 1122 } 1123 break; 1124 1125 case DDI_INFO_DEVT2INSTANCE: 1126 *result = (void *)0; 1127 retcode = DDI_SUCCESS; 1128 break; 1129 1130 default: 1131 break; 1132 } 1133 return (retcode); 1134 } 1135 1136 /* 1137 * Endpoint reference management. 1138 */ 1139 static void 1140 tl_refhold(tl_endpt_t *tep) 1141 { 1142 atomic_inc_32(&tep->te_refcnt); 1143 } 1144 1145 static void 1146 tl_refrele(tl_endpt_t *tep) 1147 { 1148 ASSERT(tep->te_refcnt != 0); 1149 1150 if (atomic_dec_32_nv(&tep->te_refcnt) == 0) 1151 tl_free(tep); 1152 } 1153 1154 /*ARGSUSED*/ 1155 static int 1156 tl_constructor(void *buf, void *cdrarg, int kmflags) 1157 { 1158 tl_endpt_t *tep = buf; 1159 1160 bzero(tep, sizeof (tl_endpt_t)); 1161 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1162 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1163 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1164 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1165 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1166 1167 return (0); 1168 } 1169 1170 /*ARGSUSED*/ 1171 static void 1172 tl_destructor(void *buf, void *cdrarg) 1173 { 1174 tl_endpt_t *tep = buf; 1175 1176 mutex_destroy(&tep->te_closelock); 1177 cv_destroy(&tep->te_closecv); 1178 mutex_destroy(&tep->te_srv_lock); 1179 cv_destroy(&tep->te_srv_cv); 1180 mutex_destroy(&tep->te_ser_lock); 1181 } 1182 1183 static void 1184 tl_free(tl_endpt_t *tep) 1185 { 1186 ASSERT(tep->te_refcnt == 0); 1187 ASSERT(tep->te_transport != NULL); 1188 ASSERT(tep->te_rq == NULL); 1189 ASSERT(tep->te_wq == NULL); 1190 ASSERT(tep->te_ser != NULL); 1191 ASSERT(tep->te_ser_count == 0); 1192 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1193 1194 if (IS_SOCKET(tep)) { 1195 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1196 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1197 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1198 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1199 } else if (tep->te_abuf != NULL) { 1200 kmem_free(tep->te_abuf, tep->te_alen); 1201 tep->te_alen = -1; /* uninitialized */ 1202 tep->te_abuf = NULL; 1203 } else { 1204 ASSERT(tep->te_alen == -1); 1205 } 1206 1207 id_free(tl_minors, tep->te_minor); 1208 ASSERT(tep->te_credp == NULL); 1209 1210 if (tep->te_hash_hndl != NULL) 1211 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1212 1213 if (IS_COTS(tep)) { 1214 TL_REMOVE_PEER(tep->te_conp); 1215 TL_REMOVE_PEER(tep->te_oconp); 1216 tl_serializer_refrele(tep->te_ser); 1217 tep->te_ser = NULL; 1218 ASSERT(tep->te_nicon == 0); 1219 ASSERT(list_head(&tep->te_iconp) == NULL); 1220 } else { 1221 ASSERT(tep->te_lastep == NULL); 1222 ASSERT(list_head(&tep->te_flowlist) == NULL); 1223 ASSERT(tep->te_flowq == NULL); 1224 } 1225 1226 ASSERT(tep->te_bufcid == 0); 1227 ASSERT(tep->te_timoutid == 0); 1228 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1229 tep->te_acceptor_id = 0; 1230 1231 ASSERT(tep->te_closewait == 0); 1232 ASSERT(!tep->te_rsrv_active); 1233 ASSERT(!tep->te_wsrv_active); 1234 tep->te_closing = 0; 1235 tep->te_nowsrv = B_FALSE; 1236 tep->te_flag = 0; 1237 1238 kmem_cache_free(tl_cache, tep); 1239 } 1240 1241 /* 1242 * Allocate/free reference-counted wrappers for serializers. 1243 */ 1244 static tl_serializer_t * 1245 tl_serializer_alloc(int flags) 1246 { 1247 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1248 serializer_t *ser; 1249 1250 if (s == NULL) 1251 return (NULL); 1252 1253 ser = serializer_create(flags); 1254 1255 if (ser == NULL) { 1256 kmem_free(s, sizeof (tl_serializer_t)); 1257 return (NULL); 1258 } 1259 1260 s->ts_refcnt = 1; 1261 s->ts_serializer = ser; 1262 return (s); 1263 } 1264 1265 static void 1266 tl_serializer_refhold(tl_serializer_t *s) 1267 { 1268 atomic_inc_32(&s->ts_refcnt); 1269 } 1270 1271 static void 1272 tl_serializer_refrele(tl_serializer_t *s) 1273 { 1274 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) { 1275 serializer_destroy(s->ts_serializer); 1276 kmem_free(s, sizeof (tl_serializer_t)); 1277 } 1278 } 1279 1280 /* 1281 * Post a request on the endpoint serializer. For COTS transports keep track of 1282 * the number of pending requests. 1283 */ 1284 static void 1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1286 { 1287 if (IS_COTS(tep)) { 1288 mutex_enter(&tep->te_ser_lock); 1289 tep->te_ser_count++; 1290 mutex_exit(&tep->te_ser_lock); 1291 } 1292 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1293 } 1294 1295 /* 1296 * Complete processing the request on the serializer. Decrement the counter for 1297 * pending requests for COTS transports. 1298 */ 1299 static void 1300 tl_serializer_exit(tl_endpt_t *tep) 1301 { 1302 if (IS_COTS(tep)) { 1303 mutex_enter(&tep->te_ser_lock); 1304 ASSERT(tep->te_ser_count != 0); 1305 tep->te_ser_count--; 1306 mutex_exit(&tep->te_ser_lock); 1307 } 1308 } 1309 1310 /* 1311 * Hash management functions. 1312 */ 1313 1314 /* 1315 * Return TRUE if two addresses are equal, false otherwise. 1316 */ 1317 static boolean_t 1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1319 { 1320 return ((ap1->ta_alen > 0) && 1321 (ap1->ta_alen == ap2->ta_alen) && 1322 (ap1->ta_zoneid == ap2->ta_zoneid) && 1323 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1324 } 1325 1326 /* 1327 * This function is called whenever an endpoint is found in the hash table. 1328 */ 1329 /* ARGSUSED0 */ 1330 static void 1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1332 { 1333 tl_refhold((tl_endpt_t *)val); 1334 } 1335 1336 /* 1337 * Address hash function. 1338 */ 1339 /* ARGSUSED */ 1340 static uint_t 1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1342 { 1343 tl_addr_t *ap = (tl_addr_t *)key; 1344 size_t len = ap->ta_alen; 1345 uchar_t *p = ap->ta_abuf; 1346 uint_t i, g; 1347 1348 ASSERT((len > 0) && (p != NULL)); 1349 1350 for (i = ap->ta_zoneid; len -- != 0; p++) { 1351 i = (i << 4) + (*p); 1352 if ((g = (i & 0xf0000000U)) != 0) { 1353 i ^= (g >> 24); 1354 i ^= g; 1355 } 1356 } 1357 return (i); 1358 } 1359 1360 /* 1361 * This function is used by hash lookups. It compares two generic addresses. 1362 */ 1363 static int 1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1365 { 1366 #ifdef DEBUG 1367 tl_addr_t *ap1 = (tl_addr_t *)key1; 1368 tl_addr_t *ap2 = (tl_addr_t *)key2; 1369 1370 ASSERT(key1 != NULL); 1371 ASSERT(key2 != NULL); 1372 1373 ASSERT(ap1->ta_abuf != NULL); 1374 ASSERT(ap2->ta_abuf != NULL); 1375 ASSERT(ap1->ta_alen > 0); 1376 ASSERT(ap2->ta_alen > 0); 1377 #endif 1378 1379 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1380 } 1381 1382 /* 1383 * Prevent endpoint from closing if possible. 1384 * Return B_TRUE on success, B_FALSE on failure. 1385 */ 1386 static boolean_t 1387 tl_noclose(tl_endpt_t *tep) 1388 { 1389 boolean_t rc = B_FALSE; 1390 1391 mutex_enter(&tep->te_closelock); 1392 if (! tep->te_closing) { 1393 ASSERT(tep->te_closewait == 0); 1394 tep->te_closewait++; 1395 rc = B_TRUE; 1396 } 1397 mutex_exit(&tep->te_closelock); 1398 return (rc); 1399 } 1400 1401 /* 1402 * Allow endpoint to close if needed. 1403 */ 1404 static void 1405 tl_closeok(tl_endpt_t *tep) 1406 { 1407 ASSERT(tep->te_closewait > 0); 1408 mutex_enter(&tep->te_closelock); 1409 ASSERT(tep->te_closewait == 1); 1410 tep->te_closewait--; 1411 cv_signal(&tep->te_closecv); 1412 mutex_exit(&tep->te_closelock); 1413 } 1414 1415 /* 1416 * STREAMS open entry point. 1417 */ 1418 /* ARGSUSED */ 1419 static int 1420 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1421 { 1422 tl_endpt_t *tep; 1423 minor_t minor = getminor(*devp); 1424 1425 /* 1426 * Driver is called directly. Both CLONEOPEN and MODOPEN 1427 * are illegal 1428 */ 1429 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1430 return (ENXIO); 1431 1432 if (rq->q_ptr != NULL) 1433 return (0); 1434 1435 /* Minor number should specify the mode used for the driver. */ 1436 if ((minor >= TL_UNUSED)) 1437 return (ENXIO); 1438 1439 if (oflag & SO_SOCKSTR) { 1440 minor |= TL_SOCKET; 1441 } 1442 1443 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1444 tep->te_refcnt = 1; 1445 tep->te_cpid = curproc->p_pid; 1446 rq->q_ptr = WR(rq)->q_ptr = tep; 1447 tep->te_state = TS_UNBND; 1448 tep->te_credp = credp; 1449 crhold(credp); 1450 tep->te_zoneid = getzoneid(); 1451 1452 tep->te_flag = minor & TL_MINOR_MASK; 1453 tep->te_transport = &tl_transports[minor]; 1454 1455 /* Allocate a unique minor number for this instance. */ 1456 tep->te_minor = (minor_t)id_alloc(tl_minors); 1457 1458 /* Reserve hash handle for bind(). */ 1459 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1460 1461 /* Transport-specific initialization */ 1462 if (IS_COTS(tep)) { 1463 /* Use private serializer */ 1464 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1465 1466 /* Create list for pending connections */ 1467 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1468 offsetof(tl_icon_t, ti_node)); 1469 tep->te_qlen = 0; 1470 tep->te_nicon = 0; 1471 tep->te_oconp = NULL; 1472 tep->te_conp = NULL; 1473 } else { 1474 /* Use shared serializer */ 1475 tep->te_ser = tep->te_transport->tr_serializer; 1476 bzero(&tep->te_flows, sizeof (list_node_t)); 1477 /* Create list for flow control */ 1478 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1479 offsetof(tl_endpt_t, te_flows)); 1480 tep->te_flowq = NULL; 1481 tep->te_lastep = NULL; 1482 1483 } 1484 1485 /* Initialize endpoint address */ 1486 if (IS_SOCKET(tep)) { 1487 /* Socket-specific address handling. */ 1488 tep->te_alen = TL_SOUX_ADDRLEN; 1489 tep->te_abuf = &tep->te_uxaddr; 1490 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1491 tep->te_magic = SOU_MAGIC_IMPLICIT; 1492 } else { 1493 tep->te_alen = -1; 1494 tep->te_abuf = NULL; 1495 } 1496 1497 /* clone the driver */ 1498 *devp = makedevice(getmajor(*devp), tep->te_minor); 1499 1500 tep->te_rq = rq; 1501 tep->te_wq = WR(rq); 1502 1503 #ifdef _ILP32 1504 if (IS_SOCKET(tep)) 1505 tep->te_acceptor_id = tep->te_minor; 1506 else 1507 tep->te_acceptor_id = (t_uscalar_t)rq; 1508 #else 1509 tep->te_acceptor_id = tep->te_minor; 1510 #endif /* _ILP32 */ 1511 1512 1513 qprocson(rq); 1514 1515 /* 1516 * Insert acceptor ID in the hash. The AI hash always sleeps on 1517 * insertion so insertion can't fail. 1518 */ 1519 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1520 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1521 (mod_hash_val_t)tep); 1522 1523 return (0); 1524 } 1525 1526 /* ARGSUSED1 */ 1527 static int 1528 tl_close(queue_t *rq, int flag, cred_t *credp) 1529 { 1530 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1531 tl_endpt_t *elp = NULL; 1532 queue_t *wq = tep->te_wq; 1533 int rc; 1534 1535 ASSERT(wq == WR(rq)); 1536 1537 /* 1538 * Remove the endpoint from acceptor hash. 1539 */ 1540 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1541 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1542 (mod_hash_val_t *)&elp); 1543 ASSERT(rc == 0 && tep == elp); 1544 if ((rc != 0) || (tep != elp)) { 1545 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1546 SL_TRACE|SL_ERROR, 1547 "tl_close:inconsistency in AI hash")); 1548 } 1549 1550 /* 1551 * Wait till close is safe, then mark endpoint as closing. 1552 */ 1553 mutex_enter(&tep->te_closelock); 1554 while (tep->te_closewait) 1555 cv_wait(&tep->te_closecv, &tep->te_closelock); 1556 tep->te_closing = B_TRUE; 1557 /* 1558 * Will wait for the serializer part of the close to finish, so set 1559 * te_closewait now. 1560 */ 1561 tep->te_closewait = 1; 1562 tep->te_nowsrv = B_FALSE; 1563 mutex_exit(&tep->te_closelock); 1564 1565 /* 1566 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1567 * It is safe because close will wait for tl_close_ser to finish. 1568 */ 1569 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1570 1571 /* 1572 * Wait for the first phase of close to complete before qprocsoff(). 1573 */ 1574 mutex_enter(&tep->te_closelock); 1575 while (tep->te_closewait) 1576 cv_wait(&tep->te_closecv, &tep->te_closelock); 1577 mutex_exit(&tep->te_closelock); 1578 1579 qprocsoff(rq); 1580 1581 if (tep->te_bufcid) { 1582 qunbufcall(rq, tep->te_bufcid); 1583 tep->te_bufcid = 0; 1584 } 1585 if (tep->te_timoutid) { 1586 (void) quntimeout(rq, tep->te_timoutid); 1587 tep->te_timoutid = 0; 1588 } 1589 1590 /* 1591 * Finish close behind serializer. 1592 * 1593 * For a CLTS endpoint increase a refcount and continue close processing 1594 * with serializer protection. This processing may happen asynchronously 1595 * with the completion of tl_close(). 1596 * 1597 * Fot a COTS endpoint wait before destroying tep since the serializer 1598 * may go away together with tep and we need to destroy serializer 1599 * outside of serializer context. 1600 */ 1601 ASSERT(tep->te_closewait == 0); 1602 if (IS_COTS(tep)) 1603 tep->te_closewait = 1; 1604 else 1605 tl_refhold(tep); 1606 1607 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1608 1609 /* 1610 * For connection-oriented transports wait for all serializer activity 1611 * to settle down. 1612 */ 1613 if (IS_COTS(tep)) { 1614 mutex_enter(&tep->te_closelock); 1615 while (tep->te_closewait) 1616 cv_wait(&tep->te_closecv, &tep->te_closelock); 1617 mutex_exit(&tep->te_closelock); 1618 } 1619 1620 crfree(tep->te_credp); 1621 tep->te_credp = NULL; 1622 tep->te_wq = NULL; 1623 tl_refrele(tep); 1624 /* 1625 * tep is likely to be destroyed now, so can't reference it any more. 1626 */ 1627 1628 rq->q_ptr = wq->q_ptr = NULL; 1629 return (0); 1630 } 1631 1632 /* 1633 * First phase of close processing done behind the serializer. 1634 * 1635 * Do not drop the reference in the end - tl_close() wants this reference to 1636 * stay. 1637 */ 1638 /* ARGSUSED0 */ 1639 static void 1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1641 { 1642 ASSERT(tep->te_closing); 1643 ASSERT(tep->te_closewait == 1); 1644 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1645 1646 tep->te_flag |= TL_CLOSE_SER; 1647 1648 /* 1649 * Drain out all messages on queue except for TL_TICOTS where the 1650 * abortive release semantics permit discarding of data on close 1651 */ 1652 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1653 tl_wsrv_ser(NULL, tep); 1654 } 1655 1656 /* Remove address from hash table. */ 1657 tl_addr_unbind(tep); 1658 /* 1659 * qprocsoff() gets confused when q->q_next is not NULL on the write 1660 * queue of the driver, so clear these before qprocsoff() is called. 1661 * Also clear q_next for the peer since this queue is going away. 1662 */ 1663 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1664 tl_endpt_t *peer_tep = tep->te_conp; 1665 1666 tep->te_wq->q_next = NULL; 1667 if ((peer_tep != NULL) && !peer_tep->te_closing) 1668 peer_tep->te_wq->q_next = NULL; 1669 } 1670 1671 tep->te_rq = NULL; 1672 1673 /* wake up tl_close() */ 1674 tl_closeok(tep); 1675 tl_serializer_exit(tep); 1676 } 1677 1678 /* 1679 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1680 * the reference for CLTS. 1681 * 1682 * Called from serializer. Should drop reference count for CLTS only. 1683 */ 1684 /* ARGSUSED0 */ 1685 static void 1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1687 { 1688 ASSERT(tep->te_closing); 1689 IMPLY(IS_CLTS(tep), tep->te_closewait == 0); 1690 IMPLY(IS_COTS(tep), tep->te_closewait == 1); 1691 1692 tep->te_state = -1; /* Uninitialized */ 1693 if (IS_COTS(tep)) { 1694 tl_co_unconnect(tep); 1695 } else { 1696 /* Connectionless specific cleanup */ 1697 TL_REMOVE_PEER(tep->te_lastep); 1698 /* 1699 * Backenable anybody that is flow controlled waiting for 1700 * this endpoint. 1701 */ 1702 tl_cl_backenable(tep); 1703 if (tep->te_flowq != NULL) { 1704 list_remove(&(tep->te_flowq->te_flowlist), tep); 1705 tep->te_flowq = NULL; 1706 } 1707 } 1708 1709 tl_serializer_exit(tep); 1710 if (IS_COTS(tep)) 1711 tl_closeok(tep); 1712 else 1713 tl_refrele(tep); 1714 } 1715 1716 /* 1717 * STREAMS write-side put procedure. 1718 * Enter serializer for most of the processing. 1719 * 1720 * The T_CONN_REQ is processed outside of serializer. 1721 */ 1722 static void 1723 tl_wput(queue_t *wq, mblk_t *mp) 1724 { 1725 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1726 ssize_t msz = MBLKL(mp); 1727 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1728 tlproc_t *tl_proc = NULL; 1729 1730 switch (DB_TYPE(mp)) { 1731 case M_DATA: 1732 /* Only valid for connection-oriented transports */ 1733 if (IS_CLTS(tep)) { 1734 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1735 SL_TRACE|SL_ERROR, 1736 "tl_wput:M_DATA invalid for ticlts driver")); 1737 tl_merror(wq, mp, EPROTO); 1738 return; 1739 } 1740 tl_proc = tl_wput_data_ser; 1741 break; 1742 1743 case M_IOCTL: 1744 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1745 case TL_IOC_CREDOPT: 1746 /* FALLTHROUGH */ 1747 case TL_IOC_UCREDOPT: 1748 /* 1749 * Serialize endpoint state change. 1750 */ 1751 tl_proc = tl_do_ioctl_ser; 1752 break; 1753 1754 default: 1755 miocnak(wq, mp, 0, EINVAL); 1756 return; 1757 } 1758 break; 1759 1760 case M_FLUSH: 1761 /* 1762 * do canonical M_FLUSH processing 1763 */ 1764 if (*mp->b_rptr & FLUSHW) { 1765 flushq(wq, FLUSHALL); 1766 *mp->b_rptr &= ~FLUSHW; 1767 } 1768 if (*mp->b_rptr & FLUSHR) { 1769 flushq(RD(wq), FLUSHALL); 1770 qreply(wq, mp); 1771 } else { 1772 freemsg(mp); 1773 } 1774 return; 1775 1776 case M_PROTO: 1777 if (msz < sizeof (prim->type)) { 1778 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1779 SL_TRACE|SL_ERROR, 1780 "tl_wput:M_PROTO data too short")); 1781 tl_merror(wq, mp, EPROTO); 1782 return; 1783 } 1784 switch (prim->type) { 1785 case T_OPTMGMT_REQ: 1786 case T_SVR4_OPTMGMT_REQ: 1787 /* 1788 * Process TPI option management requests immediately 1789 * in put procedure regardless of in-order processing 1790 * of already queued messages. 1791 * (Note: This driver supports AF_UNIX socket 1792 * implementation. Unless we implement this processing, 1793 * setsockopt() on socket endpoint will block on flow 1794 * controlled endpoints which it should not. That is 1795 * required for successful execution of VSU socket tests 1796 * and is consistent with BSD socket behavior). 1797 */ 1798 tl_optmgmt(wq, mp); 1799 return; 1800 case O_T_BIND_REQ: 1801 case T_BIND_REQ: 1802 tl_proc = tl_bind_ser; 1803 break; 1804 case T_CONN_REQ: 1805 if (IS_CLTS(tep)) { 1806 tl_merror(wq, mp, EPROTO); 1807 return; 1808 } 1809 tl_conn_req(wq, mp); 1810 return; 1811 case T_DATA_REQ: 1812 case T_OPTDATA_REQ: 1813 case T_EXDATA_REQ: 1814 case T_ORDREL_REQ: 1815 tl_proc = tl_putq_ser; 1816 break; 1817 case T_UNITDATA_REQ: 1818 if (IS_COTS(tep) || 1819 (msz < sizeof (struct T_unitdata_req))) { 1820 tl_merror(wq, mp, EPROTO); 1821 return; 1822 } 1823 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1824 tl_proc = tl_unitdata_ser; 1825 } else { 1826 tl_proc = tl_putq_ser; 1827 } 1828 break; 1829 default: 1830 /* 1831 * process in service procedure if message already 1832 * queued (maintain in-order processing) 1833 */ 1834 if (wq->q_first != NULL) { 1835 tl_proc = tl_putq_ser; 1836 } else { 1837 tl_proc = tl_wput_ser; 1838 } 1839 break; 1840 } 1841 break; 1842 1843 case M_PCPROTO: 1844 /* 1845 * Check that the message has enough data to figure out TPI 1846 * primitive. 1847 */ 1848 if (msz < sizeof (prim->type)) { 1849 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1850 SL_TRACE|SL_ERROR, 1851 "tl_wput:M_PCROTO data too short")); 1852 tl_merror(wq, mp, EPROTO); 1853 return; 1854 } 1855 switch (prim->type) { 1856 case T_CAPABILITY_REQ: 1857 tl_capability_req(mp, tep); 1858 return; 1859 case T_INFO_REQ: 1860 tl_proc = tl_info_req_ser; 1861 break; 1862 case T_ADDR_REQ: 1863 tl_proc = tl_addr_req_ser; 1864 break; 1865 1866 default: 1867 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1868 SL_TRACE|SL_ERROR, 1869 "tl_wput:unknown TPI msg primitive")); 1870 tl_merror(wq, mp, EPROTO); 1871 return; 1872 } 1873 break; 1874 default: 1875 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1876 "tl_wput:default:unexpected Streams message")); 1877 freemsg(mp); 1878 return; 1879 } 1880 1881 /* 1882 * Continue processing via serializer. 1883 */ 1884 ASSERT(tl_proc != NULL); 1885 tl_refhold(tep); 1886 tl_serializer_enter(tep, tl_proc, mp); 1887 } 1888 1889 /* 1890 * Place message on the queue while preserving order. 1891 */ 1892 static void 1893 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1894 { 1895 if (tep->te_closing) { 1896 tl_wput_ser(mp, tep); 1897 } else { 1898 TL_PUTQ(tep, mp); 1899 tl_serializer_exit(tep); 1900 tl_refrele(tep); 1901 } 1902 1903 } 1904 1905 static void 1906 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1907 { 1908 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1909 1910 switch (DB_TYPE(mp)) { 1911 case M_DATA: 1912 tl_data(mp, tep); 1913 break; 1914 case M_PROTO: 1915 tl_do_proto(mp, tep); 1916 break; 1917 default: 1918 freemsg(mp); 1919 break; 1920 } 1921 } 1922 1923 /* 1924 * Write side put procedure called from serializer. 1925 */ 1926 static void 1927 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1928 { 1929 tl_wput_common_ser(mp, tep); 1930 tl_serializer_exit(tep); 1931 tl_refrele(tep); 1932 } 1933 1934 /* 1935 * M_DATA processing. Called from serializer. 1936 */ 1937 static void 1938 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1939 { 1940 tl_endpt_t *peer_tep = tep->te_conp; 1941 queue_t *peer_rq; 1942 1943 ASSERT(DB_TYPE(mp) == M_DATA); 1944 ASSERT(IS_COTS(tep)); 1945 1946 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer); 1947 1948 /* 1949 * fastpath for data. Ignore flow control if tep is closing. 1950 */ 1951 if ((peer_tep != NULL) && 1952 !peer_tep->te_closing && 1953 ((tep->te_state == TS_DATA_XFER) || 1954 (tep->te_state == TS_WREQ_ORDREL)) && 1955 (tep->te_wq != NULL) && 1956 (tep->te_wq->q_first == NULL) && 1957 ((peer_tep->te_state == TS_DATA_XFER) || 1958 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1959 ((peer_rq = peer_tep->te_rq) != NULL) && 1960 (canputnext(peer_rq) || tep->te_closing)) { 1961 putnext(peer_rq, mp); 1962 } else if (tep->te_closing) { 1963 /* 1964 * It is possible that by the time we got here tep started to 1965 * close. If the write queue is not empty, and the state is 1966 * TS_DATA_XFER the data should be delivered in order, so we 1967 * call putq() instead of freeing the data. 1968 */ 1969 if ((tep->te_wq != NULL) && 1970 ((tep->te_state == TS_DATA_XFER) || 1971 (tep->te_state == TS_WREQ_ORDREL))) { 1972 TL_PUTQ(tep, mp); 1973 } else { 1974 freemsg(mp); 1975 } 1976 } else { 1977 TL_PUTQ(tep, mp); 1978 } 1979 1980 tl_serializer_exit(tep); 1981 tl_refrele(tep); 1982 } 1983 1984 /* 1985 * Write side service routine. 1986 * 1987 * All actual processing happens within serializer which is entered 1988 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1989 * messages that need processing may have arrived, so tl_wsrv repeats until 1990 * queue is empty or te_nowsrv is set. 1991 */ 1992 static void 1993 tl_wsrv(queue_t *wq) 1994 { 1995 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1996 1997 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 1998 mutex_enter(&tep->te_srv_lock); 1999 ASSERT(tep->te_wsrv_active == B_FALSE); 2000 tep->te_wsrv_active = B_TRUE; 2001 mutex_exit(&tep->te_srv_lock); 2002 2003 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2004 2005 /* 2006 * Wait for serializer job to complete. 2007 */ 2008 mutex_enter(&tep->te_srv_lock); 2009 while (tep->te_wsrv_active) { 2010 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2011 } 2012 cv_signal(&tep->te_srv_cv); 2013 mutex_exit(&tep->te_srv_lock); 2014 } 2015 } 2016 2017 /* 2018 * Serialized write side processing of the STREAMS queue. 2019 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2020 * is NULL. 2021 */ 2022 static void 2023 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2024 { 2025 mblk_t *mp; 2026 queue_t *wq = tep->te_wq; 2027 2028 ASSERT(wq != NULL); 2029 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2030 tl_wput_common_ser(mp, tep); 2031 } 2032 2033 /* 2034 * Wakeup service routine unless called from close. 2035 * If ser_mp is specified, the caller is tl_wsrv(). 2036 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2037 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2038 * be no matching tl_serializer_exit() in this case. 2039 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2040 * waiting on te_srv_cv. 2041 */ 2042 if (ser_mp != NULL) { 2043 /* 2044 * We are called from tl_wsrv. 2045 */ 2046 mutex_enter(&tep->te_srv_lock); 2047 ASSERT(tep->te_wsrv_active); 2048 tep->te_wsrv_active = B_FALSE; 2049 cv_signal(&tep->te_srv_cv); 2050 mutex_exit(&tep->te_srv_lock); 2051 tl_serializer_exit(tep); 2052 } 2053 } 2054 2055 /* 2056 * Called when the stream is backenabled. Enter serializer and qenable everyone 2057 * flow controlled by tep. 2058 * 2059 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2060 * is possible that two instances of tl_rsrv will be running reusing the same 2061 * rsrv mblk. 2062 */ 2063 static void 2064 tl_rsrv(queue_t *rq) 2065 { 2066 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2067 2068 ASSERT(rq->q_first == NULL); 2069 ASSERT(tep->te_rsrv_active == 0); 2070 2071 tep->te_rsrv_active = B_TRUE; 2072 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2073 /* 2074 * Wait for serializer job to complete. 2075 */ 2076 mutex_enter(&tep->te_srv_lock); 2077 while (tep->te_rsrv_active) { 2078 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2079 } 2080 cv_signal(&tep->te_srv_cv); 2081 mutex_exit(&tep->te_srv_lock); 2082 } 2083 2084 /* ARGSUSED */ 2085 static void 2086 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2087 { 2088 tl_endpt_t *peer_tep; 2089 2090 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2091 tl_cl_backenable(tep); 2092 } else if ( 2093 IS_COTS(tep) && 2094 ((peer_tep = tep->te_conp) != NULL) && 2095 !peer_tep->te_closing && 2096 ((tep->te_state == TS_DATA_XFER) || 2097 (tep->te_state == TS_WIND_ORDREL)|| 2098 (tep->te_state == TS_WREQ_ORDREL))) { 2099 TL_QENABLE(peer_tep); 2100 } 2101 2102 /* 2103 * Wakeup read side service routine. 2104 */ 2105 mutex_enter(&tep->te_srv_lock); 2106 ASSERT(tep->te_rsrv_active); 2107 tep->te_rsrv_active = B_FALSE; 2108 cv_signal(&tep->te_srv_cv); 2109 mutex_exit(&tep->te_srv_lock); 2110 tl_serializer_exit(tep); 2111 } 2112 2113 /* 2114 * process M_PROTO messages. Always called from serializer. 2115 */ 2116 static void 2117 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2118 { 2119 ssize_t msz = MBLKL(mp); 2120 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2121 2122 /* Message size was validated by tl_wput(). */ 2123 ASSERT(msz >= sizeof (prim->type)); 2124 2125 switch (prim->type) { 2126 case T_UNBIND_REQ: 2127 tl_unbind(mp, tep); 2128 break; 2129 2130 case T_ADDR_REQ: 2131 tl_addr_req(mp, tep); 2132 break; 2133 2134 case O_T_CONN_RES: 2135 case T_CONN_RES: 2136 if (IS_CLTS(tep)) { 2137 tl_merror(tep->te_wq, mp, EPROTO); 2138 break; 2139 } 2140 tl_conn_res(mp, tep); 2141 break; 2142 2143 case T_DISCON_REQ: 2144 if (IS_CLTS(tep)) { 2145 tl_merror(tep->te_wq, mp, EPROTO); 2146 break; 2147 } 2148 tl_discon_req(mp, tep); 2149 break; 2150 2151 case T_DATA_REQ: 2152 if (IS_CLTS(tep)) { 2153 tl_merror(tep->te_wq, mp, EPROTO); 2154 break; 2155 } 2156 tl_data(mp, tep); 2157 break; 2158 2159 case T_OPTDATA_REQ: 2160 if (IS_CLTS(tep)) { 2161 tl_merror(tep->te_wq, mp, EPROTO); 2162 break; 2163 } 2164 tl_data(mp, tep); 2165 break; 2166 2167 case T_EXDATA_REQ: 2168 if (IS_CLTS(tep)) { 2169 tl_merror(tep->te_wq, mp, EPROTO); 2170 break; 2171 } 2172 tl_exdata(mp, tep); 2173 break; 2174 2175 case T_ORDREL_REQ: 2176 if (! IS_COTSORD(tep)) { 2177 tl_merror(tep->te_wq, mp, EPROTO); 2178 break; 2179 } 2180 tl_ordrel(mp, tep); 2181 break; 2182 2183 case T_UNITDATA_REQ: 2184 if (IS_COTS(tep)) { 2185 tl_merror(tep->te_wq, mp, EPROTO); 2186 break; 2187 } 2188 tl_unitdata(mp, tep); 2189 break; 2190 2191 default: 2192 tl_merror(tep->te_wq, mp, EPROTO); 2193 break; 2194 } 2195 } 2196 2197 /* 2198 * Process ioctl from serializer. 2199 * This is a wrapper around tl_do_ioctl(). 2200 */ 2201 static void 2202 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2203 { 2204 if (! tep->te_closing) 2205 tl_do_ioctl(mp, tep); 2206 else 2207 freemsg(mp); 2208 2209 tl_serializer_exit(tep); 2210 tl_refrele(tep); 2211 } 2212 2213 static void 2214 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2215 { 2216 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2217 int cmd = iocbp->ioc_cmd; 2218 queue_t *wq = tep->te_wq; 2219 int error; 2220 int thisopt, otheropt; 2221 2222 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2223 2224 switch (cmd) { 2225 case TL_IOC_CREDOPT: 2226 if (cmd == TL_IOC_CREDOPT) { 2227 thisopt = TL_SETCRED; 2228 otheropt = TL_SETUCRED; 2229 } else { 2230 /* FALLTHROUGH */ 2231 case TL_IOC_UCREDOPT: 2232 thisopt = TL_SETUCRED; 2233 otheropt = TL_SETCRED; 2234 } 2235 /* 2236 * The credentials passing does not apply to sockets. 2237 * Only one of the cred options can be set at a given time. 2238 */ 2239 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2240 miocnak(wq, mp, 0, EINVAL); 2241 return; 2242 } 2243 2244 /* 2245 * Turn on generation of credential options for 2246 * T_conn_req, T_conn_con, T_unidata_ind. 2247 */ 2248 error = miocpullup(mp, sizeof (uint32_t)); 2249 if (error != 0) { 2250 miocnak(wq, mp, 0, error); 2251 return; 2252 } 2253 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2254 miocnak(wq, mp, 0, EINVAL); 2255 return; 2256 } 2257 2258 if (*(uint32_t *)mp->b_cont->b_rptr) 2259 tep->te_flag |= thisopt; 2260 else 2261 tep->te_flag &= ~thisopt; 2262 2263 miocack(wq, mp, 0, 0); 2264 break; 2265 2266 default: 2267 /* Should not be here */ 2268 miocnak(wq, mp, 0, EINVAL); 2269 break; 2270 } 2271 } 2272 2273 2274 /* 2275 * send T_ERROR_ACK 2276 * Note: assumes enough memory or caller passed big enough mp 2277 * - no recovery from allocb failures 2278 */ 2279 2280 static void 2281 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2282 t_scalar_t unix_err, t_scalar_t type) 2283 { 2284 struct T_error_ack *err_ack; 2285 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2286 M_PCPROTO, T_ERROR_ACK); 2287 2288 if (ackmp == NULL) { 2289 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2290 "tl_error_ack:out of mblk memory")); 2291 tl_merror(wq, NULL, ENOSR); 2292 return; 2293 } 2294 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2295 err_ack->ERROR_prim = type; 2296 err_ack->TLI_error = tli_err; 2297 err_ack->UNIX_error = unix_err; 2298 2299 /* 2300 * send error ack message 2301 */ 2302 qreply(wq, ackmp); 2303 } 2304 2305 2306 2307 /* 2308 * send T_OK_ACK 2309 * Note: assumes enough memory or caller passed big enough mp 2310 * - no recovery from allocb failures 2311 */ 2312 static void 2313 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2314 { 2315 struct T_ok_ack *ok_ack; 2316 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2317 M_PCPROTO, T_OK_ACK); 2318 2319 if (ackmp == NULL) { 2320 tl_merror(wq, NULL, ENOMEM); 2321 return; 2322 } 2323 2324 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2325 ok_ack->CORRECT_prim = type; 2326 2327 (void) qreply(wq, ackmp); 2328 } 2329 2330 /* 2331 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2332 * This is a wrapper around tl_bind(). 2333 */ 2334 static void 2335 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2336 { 2337 if (! tep->te_closing) 2338 tl_bind(mp, tep); 2339 else 2340 freemsg(mp); 2341 2342 tl_serializer_exit(tep); 2343 tl_refrele(tep); 2344 } 2345 2346 /* 2347 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2348 * Assumes that the endpoint is in the unbound. 2349 */ 2350 static void 2351 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2352 { 2353 queue_t *wq = tep->te_wq; 2354 struct T_bind_ack *b_ack; 2355 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2356 mblk_t *ackmp, *bamp; 2357 soux_addr_t ux_addr; 2358 t_uscalar_t qlen = 0; 2359 t_scalar_t alen, aoff; 2360 tl_addr_t addr_req; 2361 void *addr_startp; 2362 ssize_t msz = MBLKL(mp), basize; 2363 t_scalar_t tli_err = 0, unix_err = 0; 2364 t_scalar_t save_prim_type = bind->PRIM_type; 2365 t_scalar_t save_state = tep->te_state; 2366 2367 if (tep->te_state != TS_UNBND) { 2368 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2369 SL_TRACE|SL_ERROR, 2370 "tl_wput:bind_request:out of state, state=%d", 2371 tep->te_state)); 2372 tli_err = TOUTSTATE; 2373 goto error; 2374 } 2375 2376 if (msz < sizeof (struct T_bind_req)) { 2377 tli_err = TSYSERR; unix_err = EINVAL; 2378 goto error; 2379 } 2380 2381 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2382 2383 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2384 (bind->PRIM_type == T_BIND_REQ)); 2385 2386 alen = bind->ADDR_length; 2387 aoff = bind->ADDR_offset; 2388 2389 /* negotiate max conn req pending */ 2390 if (IS_COTS(tep)) { 2391 qlen = bind->CONIND_number; 2392 if (qlen > tl_maxqlen) 2393 qlen = tl_maxqlen; 2394 } 2395 2396 /* 2397 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2398 * and bound again. 2399 */ 2400 if ((tep->te_hash_hndl == NULL) && 2401 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2402 mod_hash_reserve_nosleep(tep->te_addrhash, 2403 &tep->te_hash_hndl) != 0) { 2404 tli_err = TSYSERR; unix_err = ENOSR; 2405 goto error; 2406 } 2407 2408 /* 2409 * Verify address correctness. 2410 */ 2411 if (IS_SOCKET(tep)) { 2412 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2413 2414 if ((alen != TL_SOUX_ADDRLEN) || 2415 (aoff < 0) || 2416 (aoff + alen > msz)) { 2417 (void) (STRLOG(TL_ID, tep->te_minor, 2418 1, SL_TRACE|SL_ERROR, 2419 "tl_bind: invalid socket addr")); 2420 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2421 tli_err = TSYSERR; unix_err = EINVAL; 2422 goto error; 2423 } 2424 /* Copy address from message to local buffer. */ 2425 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2426 /* 2427 * Check that we got correct address from sockets 2428 */ 2429 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2430 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2431 (void) (STRLOG(TL_ID, tep->te_minor, 2432 1, SL_TRACE|SL_ERROR, 2433 "tl_bind: invalid socket magic")); 2434 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2435 tli_err = TSYSERR; unix_err = EINVAL; 2436 goto error; 2437 } 2438 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2439 (ux_addr.soua_vp != NULL)) { 2440 (void) (STRLOG(TL_ID, tep->te_minor, 2441 1, SL_TRACE|SL_ERROR, 2442 "tl_bind: implicit addr non-empty")); 2443 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2444 tli_err = TSYSERR; unix_err = EINVAL; 2445 goto error; 2446 } 2447 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2448 (ux_addr.soua_vp == NULL)) { 2449 (void) (STRLOG(TL_ID, tep->te_minor, 2450 1, SL_TRACE|SL_ERROR, 2451 "tl_bind: explicit addr empty")); 2452 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2453 tli_err = TSYSERR; unix_err = EINVAL; 2454 goto error; 2455 } 2456 } else { 2457 if ((alen > 0) && ((aoff < 0) || 2458 ((ssize_t)(aoff + alen) > msz) || 2459 ((aoff + alen) < 0))) { 2460 (void) (STRLOG(TL_ID, tep->te_minor, 2461 1, SL_TRACE|SL_ERROR, 2462 "tl_bind: invalid message")); 2463 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2464 tli_err = TSYSERR; unix_err = EINVAL; 2465 goto error; 2466 } 2467 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2468 (void) (STRLOG(TL_ID, tep->te_minor, 2469 1, SL_TRACE|SL_ERROR, 2470 "tl_bind: bad addr in message")); 2471 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2472 tli_err = TBADADDR; 2473 goto error; 2474 } 2475 #ifdef DEBUG 2476 /* 2477 * Mild form of ASSERT()ion to detect broken TPI apps. 2478 * if (! assertion) 2479 * log warning; 2480 */ 2481 if (! ((alen == 0 && aoff == 0) || 2482 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2483 (void) (STRLOG(TL_ID, tep->te_minor, 2484 3, SL_TRACE|SL_ERROR, 2485 "tl_bind: addr overlaps TPI message")); 2486 } 2487 #endif 2488 } 2489 2490 /* 2491 * Bind the address provided or allocate one if requested. 2492 * Allow rebinds with a new qlen value. 2493 */ 2494 if (IS_SOCKET(tep)) { 2495 /* 2496 * For anonymous requests the te_ap is already set up properly 2497 * so use minor number as an address. 2498 * For explicit requests need to check whether the address is 2499 * already in use. 2500 */ 2501 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2502 int rc; 2503 2504 if (tep->te_flag & TL_ADDRHASHED) { 2505 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2506 if (tep->te_vp == ux_addr.soua_vp) 2507 goto skip_addr_bind; 2508 else /* Rebind to a new address. */ 2509 tl_addr_unbind(tep); 2510 } 2511 /* 2512 * Insert address in the hash if it is not already 2513 * there. Since we use preallocated handle, the insert 2514 * can fail only if the key is already present. 2515 */ 2516 rc = mod_hash_insert_reserve(tep->te_addrhash, 2517 (mod_hash_key_t)ux_addr.soua_vp, 2518 (mod_hash_val_t)tep, tep->te_hash_hndl); 2519 2520 if (rc != 0) { 2521 ASSERT(rc == MH_ERR_DUPLICATE); 2522 /* 2523 * Violate O_T_BIND_REQ semantics and fail with 2524 * TADDRBUSY - sockets will not use any address 2525 * other than supplied one for explicit binds. 2526 */ 2527 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2528 SL_TRACE|SL_ERROR, 2529 "tl_bind:requested addr %p is busy", 2530 ux_addr.soua_vp)); 2531 tli_err = TADDRBUSY; unix_err = 0; 2532 goto error; 2533 } 2534 tep->te_uxaddr = ux_addr; 2535 tep->te_flag |= TL_ADDRHASHED; 2536 tep->te_hash_hndl = NULL; 2537 } 2538 } else if (alen == 0) { 2539 /* 2540 * assign any free address 2541 */ 2542 if (! tl_get_any_addr(tep, NULL)) { 2543 (void) (STRLOG(TL_ID, tep->te_minor, 2544 1, SL_TRACE|SL_ERROR, 2545 "tl_bind:failed to get buffer for any " 2546 "address")); 2547 tli_err = TSYSERR; unix_err = ENOSR; 2548 goto error; 2549 } 2550 } else { 2551 addr_req.ta_alen = alen; 2552 addr_req.ta_abuf = (mp->b_rptr + aoff); 2553 addr_req.ta_zoneid = tep->te_zoneid; 2554 2555 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2556 if (tep->te_abuf == NULL) { 2557 tli_err = TSYSERR; unix_err = ENOSR; 2558 goto error; 2559 } 2560 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2561 tep->te_alen = alen; 2562 2563 if (mod_hash_insert_reserve(tep->te_addrhash, 2564 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2565 tep->te_hash_hndl) != 0) { 2566 if (save_prim_type == T_BIND_REQ) { 2567 /* 2568 * The bind semantics for this primitive 2569 * require a failure if the exact address 2570 * requested is busy 2571 */ 2572 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2573 SL_TRACE|SL_ERROR, 2574 "tl_bind:requested addr is busy")); 2575 tli_err = TADDRBUSY; unix_err = 0; 2576 goto error; 2577 } 2578 2579 /* 2580 * O_T_BIND_REQ semantics say if address if requested 2581 * address is busy, bind to any available free address 2582 */ 2583 if (! tl_get_any_addr(tep, &addr_req)) { 2584 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2585 SL_TRACE|SL_ERROR, 2586 "tl_bind:unable to get any addr buf")); 2587 tli_err = TSYSERR; unix_err = ENOMEM; 2588 goto error; 2589 } 2590 } else { 2591 tep->te_flag |= TL_ADDRHASHED; 2592 tep->te_hash_hndl = NULL; 2593 } 2594 } 2595 2596 ASSERT(tep->te_alen >= 0); 2597 2598 skip_addr_bind: 2599 /* 2600 * prepare T_BIND_ACK TPI message 2601 */ 2602 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2603 bamp = reallocb(mp, basize, 0); 2604 if (bamp == NULL) { 2605 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2606 "tl_wput:tl_bind: allocb failed")); 2607 /* 2608 * roll back state changes 2609 */ 2610 tl_addr_unbind(tep); 2611 tep->te_state = TS_UNBND; 2612 tl_memrecover(wq, mp, basize); 2613 return; 2614 } 2615 2616 DB_TYPE(bamp) = M_PCPROTO; 2617 bamp->b_wptr = bamp->b_rptr + basize; 2618 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2619 b_ack->PRIM_type = T_BIND_ACK; 2620 b_ack->CONIND_number = qlen; 2621 b_ack->ADDR_length = tep->te_alen; 2622 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2623 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2624 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2625 2626 if (IS_COTS(tep)) { 2627 tep->te_qlen = qlen; 2628 if (qlen > 0) 2629 tep->te_flag |= TL_LISTENER; 2630 } 2631 2632 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2633 /* 2634 * send T_BIND_ACK message 2635 */ 2636 (void) qreply(wq, bamp); 2637 return; 2638 2639 error: 2640 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2641 if (ackmp == NULL) { 2642 /* 2643 * roll back state changes 2644 */ 2645 tep->te_state = save_state; 2646 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2647 return; 2648 } 2649 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2650 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2651 } 2652 2653 /* 2654 * Process T_UNBIND_REQ. 2655 * Called from serializer. 2656 */ 2657 static void 2658 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2659 { 2660 queue_t *wq; 2661 mblk_t *ackmp; 2662 2663 if (tep->te_closing) { 2664 freemsg(mp); 2665 return; 2666 } 2667 2668 wq = tep->te_wq; 2669 2670 /* 2671 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2672 * ==> allocate for T_ERROR_ACK (known max) 2673 */ 2674 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2675 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2676 return; 2677 } 2678 /* 2679 * memory resources committed 2680 * Note: no message validation. T_UNBIND_REQ message is 2681 * same size as PRIM_type field so already verified earlier. 2682 */ 2683 2684 /* 2685 * validate state 2686 */ 2687 if (tep->te_state != TS_IDLE) { 2688 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2689 SL_TRACE|SL_ERROR, 2690 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2691 tep->te_state)); 2692 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2693 return; 2694 } 2695 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2696 2697 /* 2698 * TPI says on T_UNBIND_REQ: 2699 * send up a M_FLUSH to flush both 2700 * read and write queues 2701 */ 2702 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2703 2704 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2705 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2706 2707 /* 2708 * Sockets use bind with qlen==0 followed by bind() to 2709 * the same address with qlen > 0 for listeners. 2710 * We allow rebind with a new qlen value. 2711 */ 2712 tl_addr_unbind(tep); 2713 } 2714 2715 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2716 /* 2717 * send T_OK_ACK 2718 */ 2719 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2720 } 2721 2722 2723 /* 2724 * Option management code from drv/ip is used here 2725 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2726 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2727 * However, that is what we want as that option is 'unorthodox' 2728 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2729 * and not in T_SVR4_OPTMGMT_REQ/ACK 2730 * Note2: use of optcom_req means this routine is an exception to 2731 * recovery from allocb() failures. 2732 */ 2733 2734 static void 2735 tl_optmgmt(queue_t *wq, mblk_t *mp) 2736 { 2737 tl_endpt_t *tep; 2738 mblk_t *ackmp; 2739 union T_primitives *prim; 2740 cred_t *cr; 2741 2742 tep = (tl_endpt_t *)wq->q_ptr; 2743 prim = (union T_primitives *)mp->b_rptr; 2744 2745 /* 2746 * All Solaris components should pass a db_credp 2747 * for this TPI message, hence we ASSERT. 2748 * But in case there is some other M_PROTO that looks 2749 * like a TPI message sent by some other kernel 2750 * component, we check and return an error. 2751 */ 2752 cr = msg_getcred(mp, NULL); 2753 ASSERT(cr != NULL); 2754 if (cr == NULL) { 2755 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2756 return; 2757 } 2758 2759 /* all states OK for AF_UNIX options ? */ 2760 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2761 prim->type == T_SVR4_OPTMGMT_REQ) { 2762 /* 2763 * Broken TLI semantics that options can only be managed 2764 * in TS_IDLE state. Needed for Sparc ABI test suite that 2765 * tests this TLI (mis)feature using this device driver. 2766 */ 2767 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2768 SL_TRACE|SL_ERROR, 2769 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2770 tep->te_state)); 2771 /* 2772 * preallocate memory for T_ERROR_ACK 2773 */ 2774 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2775 if (! ackmp) { 2776 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2777 return; 2778 } 2779 2780 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2781 freemsg(mp); 2782 return; 2783 } 2784 2785 /* 2786 * call common option management routine from drv/ip 2787 */ 2788 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2789 svr4_optcom_req(wq, mp, cr, &tl_opt_obj); 2790 } else { 2791 ASSERT(prim->type == T_OPTMGMT_REQ); 2792 tpi_optcom_req(wq, mp, cr, &tl_opt_obj); 2793 } 2794 } 2795 2796 /* 2797 * Handle T_conn_req - the driver part of accept(). 2798 * If TL_SET[U]CRED generate the credentials options. 2799 * If this is a socket pass through options unmodified. 2800 * For sockets generate the T_CONN_CON here instead of 2801 * waiting for the T_CONN_RES. 2802 */ 2803 static void 2804 tl_conn_req(queue_t *wq, mblk_t *mp) 2805 { 2806 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2807 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2808 ssize_t msz = MBLKL(mp); 2809 t_scalar_t alen, aoff, olen, ooff, err = 0; 2810 tl_endpt_t *peer_tep = NULL; 2811 mblk_t *ackmp; 2812 mblk_t *dimp; 2813 struct T_discon_ind *di; 2814 soux_addr_t ux_addr; 2815 tl_addr_t dst; 2816 2817 ASSERT(IS_COTS(tep)); 2818 2819 if (tep->te_closing) { 2820 freemsg(mp); 2821 return; 2822 } 2823 2824 /* 2825 * preallocate memory for: 2826 * 1. max of T_ERROR_ACK and T_OK_ACK 2827 * ==> known max T_ERROR_ACK 2828 * 2. max of T_DISCON_IND and T_CONN_IND 2829 */ 2830 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2831 if (! ackmp) { 2832 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2833 return; 2834 } 2835 /* 2836 * memory committed for T_OK_ACK/T_ERROR_ACK now 2837 * will be committed for T_DISCON_IND/T_CONN_IND later 2838 */ 2839 2840 if (tep->te_state != TS_IDLE) { 2841 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2842 SL_TRACE|SL_ERROR, 2843 "tl_wput:T_CONN_REQ:out of state, state=%d", 2844 tep->te_state)); 2845 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2846 freemsg(mp); 2847 return; 2848 } 2849 2850 /* 2851 * validate the message 2852 * Note: dereference fields in struct inside message only 2853 * after validating the message length. 2854 */ 2855 if (msz < sizeof (struct T_conn_req)) { 2856 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2857 "tl_conn_req:invalid message length")); 2858 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2859 freemsg(mp); 2860 return; 2861 } 2862 alen = creq->DEST_length; 2863 aoff = creq->DEST_offset; 2864 olen = creq->OPT_length; 2865 ooff = creq->OPT_offset; 2866 if (olen == 0) 2867 ooff = 0; 2868 2869 if (IS_SOCKET(tep)) { 2870 if ((alen != TL_SOUX_ADDRLEN) || 2871 (aoff < 0) || 2872 (aoff + alen > msz) || 2873 (alen > msz - sizeof (struct T_conn_req))) { 2874 (void) (STRLOG(TL_ID, tep->te_minor, 2875 1, SL_TRACE|SL_ERROR, 2876 "tl_conn_req: invalid socket addr")); 2877 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2878 freemsg(mp); 2879 return; 2880 } 2881 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2882 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2883 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2884 (void) (STRLOG(TL_ID, tep->te_minor, 2885 1, SL_TRACE|SL_ERROR, 2886 "tl_conn_req: invalid socket magic")); 2887 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2888 freemsg(mp); 2889 return; 2890 } 2891 } else { 2892 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2893 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2894 ooff + olen < 0)) || 2895 olen < 0 || ooff < 0) { 2896 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2897 SL_TRACE|SL_ERROR, 2898 "tl_conn_req:invalid message")); 2899 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2900 freemsg(mp); 2901 return; 2902 } 2903 2904 if (alen <= 0 || aoff < 0 || 2905 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2906 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2907 SL_TRACE|SL_ERROR, 2908 "tl_conn_req:bad addr in message, " 2909 "alen=%d, msz=%ld", 2910 alen, msz)); 2911 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2912 freemsg(mp); 2913 return; 2914 } 2915 #ifdef DEBUG 2916 /* 2917 * Mild form of ASSERT()ion to detect broken TPI apps. 2918 * if (! assertion) 2919 * log warning; 2920 */ 2921 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2922 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2923 SL_TRACE|SL_ERROR, 2924 "tl_conn_req: addr overlaps TPI message")); 2925 } 2926 #endif 2927 if (olen) { 2928 /* 2929 * no opts in connect req 2930 * supported in this provider except for sockets. 2931 */ 2932 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2933 SL_TRACE|SL_ERROR, 2934 "tl_conn_req:options not supported " 2935 "in message")); 2936 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2937 freemsg(mp); 2938 return; 2939 } 2940 } 2941 2942 /* 2943 * Prevent tep from closing on us. 2944 */ 2945 if (! tl_noclose(tep)) { 2946 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2947 "tl_conn_req:endpoint is closing")); 2948 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2949 freemsg(mp); 2950 return; 2951 } 2952 2953 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2954 /* 2955 * get endpoint to connect to 2956 * check that peer with DEST addr is bound to addr 2957 * and has CONIND_number > 0 2958 */ 2959 dst.ta_alen = alen; 2960 dst.ta_abuf = mp->b_rptr + aoff; 2961 dst.ta_zoneid = tep->te_zoneid; 2962 2963 /* 2964 * Verify if remote addr is in use 2965 */ 2966 peer_tep = (IS_SOCKET(tep) ? 2967 tl_sock_find_peer(tep, &ux_addr) : 2968 tl_find_peer(tep, &dst)); 2969 2970 if (peer_tep == NULL) { 2971 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2972 "tl_conn_req:no one at connect address")); 2973 err = ECONNREFUSED; 2974 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2975 /* 2976 * validate that number of incoming connection is 2977 * not to capacity on destination endpoint 2978 */ 2979 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2980 "tl_conn_req: qlen overflow connection refused")); 2981 err = ECONNREFUSED; 2982 } 2983 2984 /* 2985 * Send T_DISCON_IND in case of error 2986 */ 2987 if (err != 0) { 2988 if (peer_tep != NULL) 2989 tl_refrele(peer_tep); 2990 /* We are still expected to send T_OK_ACK */ 2991 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2992 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2993 tl_closeok(tep); 2994 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2995 M_PROTO, T_DISCON_IND); 2996 if (dimp == NULL) { 2997 tl_merror(wq, NULL, ENOSR); 2998 return; 2999 } 3000 di = (struct T_discon_ind *)dimp->b_rptr; 3001 di->DISCON_reason = err; 3002 di->SEQ_number = BADSEQNUM; 3003 3004 tep->te_state = TS_IDLE; 3005 /* 3006 * send T_DISCON_IND message 3007 */ 3008 putnext(tep->te_rq, dimp); 3009 return; 3010 } 3011 3012 ASSERT(IS_COTS(peer_tep)); 3013 3014 /* 3015 * Found the listener. At this point processing will continue on 3016 * listener serializer. Close of the endpoint should be blocked while we 3017 * switch serializers. 3018 */ 3019 tl_serializer_refhold(peer_tep->te_ser); 3020 tl_serializer_refrele(tep->te_ser); 3021 tep->te_ser = peer_tep->te_ser; 3022 ASSERT(tep->te_oconp == NULL); 3023 tep->te_oconp = peer_tep; 3024 3025 /* 3026 * It is safe to close now. Close may continue on listener serializer. 3027 */ 3028 tl_closeok(tep); 3029 3030 /* 3031 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3032 * data, so we link mp to ackmp. 3033 */ 3034 ackmp->b_cont = mp; 3035 mp = ackmp; 3036 3037 tl_refhold(tep); 3038 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3039 } 3040 3041 /* 3042 * Finish T_CONN_REQ processing on listener serializer. 3043 */ 3044 static void 3045 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3046 { 3047 queue_t *wq; 3048 tl_endpt_t *peer_tep = tep->te_oconp; 3049 mblk_t *confmp, *cimp, *indmp; 3050 void *opts = NULL; 3051 mblk_t *ackmp = mp; 3052 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3053 struct T_conn_ind *ci; 3054 tl_icon_t *tip; 3055 void *addr_startp; 3056 t_scalar_t olen = creq->OPT_length; 3057 t_scalar_t ooff = creq->OPT_offset; 3058 size_t ci_msz; 3059 size_t size; 3060 cred_t *cr = NULL; 3061 pid_t cpid; 3062 3063 if (tep->te_closing) { 3064 TL_UNCONNECT(tep->te_oconp); 3065 tl_serializer_exit(tep); 3066 tl_refrele(tep); 3067 freemsg(mp); 3068 return; 3069 } 3070 3071 wq = tep->te_wq; 3072 tep->te_flag |= TL_EAGER; 3073 3074 /* 3075 * Extract preallocated ackmp from mp. 3076 */ 3077 mp = mp->b_cont; 3078 ackmp->b_cont = NULL; 3079 3080 if (olen == 0) 3081 ooff = 0; 3082 3083 if (peer_tep->te_closing || 3084 !((peer_tep->te_state == TS_IDLE) || 3085 (peer_tep->te_state == TS_WRES_CIND))) { 3086 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3087 "tl_conn_req:peer in bad state (%d)", 3088 peer_tep->te_state)); 3089 TL_UNCONNECT(tep->te_oconp); 3090 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3091 freemsg(ackmp); 3092 tl_serializer_exit(tep); 3093 tl_refrele(tep); 3094 return; 3095 } 3096 3097 /* 3098 * preallocate now for T_DISCON_IND or T_CONN_IND 3099 */ 3100 /* 3101 * calculate length of T_CONN_IND message 3102 */ 3103 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3104 cr = msg_getcred(mp, &cpid); 3105 ASSERT(cr != NULL); 3106 if (peer_tep->te_flag & TL_SETCRED) { 3107 ooff = 0; 3108 olen = (t_scalar_t) sizeof (struct opthdr) + 3109 OPTLEN(sizeof (tl_credopt_t)); 3110 /* 1 option only */ 3111 } else { 3112 ooff = 0; 3113 olen = (t_scalar_t)sizeof (struct opthdr) + 3114 OPTLEN(ucredminsize(cr)); 3115 /* 1 option only */ 3116 } 3117 } 3118 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3119 ci_msz = T_ALIGN(ci_msz) + olen; 3120 size = max(ci_msz, sizeof (struct T_discon_ind)); 3121 3122 /* 3123 * Save options from mp - we'll need them for T_CONN_IND. 3124 */ 3125 if (ooff != 0) { 3126 opts = kmem_alloc(olen, KM_NOSLEEP); 3127 if (opts == NULL) { 3128 /* 3129 * roll back state changes 3130 */ 3131 tep->te_state = TS_IDLE; 3132 tl_memrecover(wq, mp, size); 3133 freemsg(ackmp); 3134 TL_UNCONNECT(tep->te_oconp); 3135 tl_serializer_exit(tep); 3136 tl_refrele(tep); 3137 return; 3138 } 3139 /* Copy options to a temp buffer */ 3140 bcopy(mp->b_rptr + ooff, opts, olen); 3141 } 3142 3143 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3144 /* 3145 * Generate a T_CONN_CON that has the identical address 3146 * (and options) as the T_CONN_REQ. 3147 * NOTE: assumes that the T_conn_req and T_conn_con structures 3148 * are isomorphic. 3149 */ 3150 confmp = copyb(mp); 3151 if (! confmp) { 3152 /* 3153 * roll back state changes 3154 */ 3155 tep->te_state = TS_IDLE; 3156 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3157 freemsg(ackmp); 3158 if (opts != NULL) 3159 kmem_free(opts, olen); 3160 TL_UNCONNECT(tep->te_oconp); 3161 tl_serializer_exit(tep); 3162 tl_refrele(tep); 3163 return; 3164 } 3165 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3166 T_CONN_CON; 3167 } else { 3168 confmp = NULL; 3169 } 3170 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3171 /* 3172 * roll back state changes 3173 */ 3174 tep->te_state = TS_IDLE; 3175 tl_memrecover(wq, mp, size); 3176 freemsg(ackmp); 3177 if (opts != NULL) 3178 kmem_free(opts, olen); 3179 freemsg(confmp); 3180 TL_UNCONNECT(tep->te_oconp); 3181 tl_serializer_exit(tep); 3182 tl_refrele(tep); 3183 return; 3184 } 3185 3186 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3187 if (tip == NULL) { 3188 /* 3189 * roll back state changes 3190 */ 3191 tep->te_state = TS_IDLE; 3192 tl_memrecover(wq, indmp, sizeof (*tip)); 3193 freemsg(ackmp); 3194 if (opts != NULL) 3195 kmem_free(opts, olen); 3196 freemsg(confmp); 3197 TL_UNCONNECT(tep->te_oconp); 3198 tl_serializer_exit(tep); 3199 tl_refrele(tep); 3200 return; 3201 } 3202 tip->ti_mp = NULL; 3203 3204 /* 3205 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3206 * and tl_icon_t cell. 3207 */ 3208 3209 /* 3210 * ack validity of request and send the peer credential in the ACK. 3211 */ 3212 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3213 3214 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3215 confmp != NULL) { 3216 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3217 } 3218 3219 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3220 3221 /* 3222 * prepare message to send T_CONN_IND 3223 */ 3224 /* 3225 * allocate the message - original data blocks retained 3226 * in the returned mblk 3227 */ 3228 cimp = tl_resizemp(indmp, size); 3229 if (! cimp) { 3230 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3231 "tl_conn_req:con_ind:allocb failure")); 3232 tl_merror(wq, indmp, ENOMEM); 3233 TL_UNCONNECT(tep->te_oconp); 3234 tl_serializer_exit(tep); 3235 tl_refrele(tep); 3236 if (opts != NULL) 3237 kmem_free(opts, olen); 3238 freemsg(confmp); 3239 ASSERT(tip->ti_mp == NULL); 3240 kmem_free(tip, sizeof (*tip)); 3241 return; 3242 } 3243 3244 DB_TYPE(cimp) = M_PROTO; 3245 ci = (struct T_conn_ind *)cimp->b_rptr; 3246 ci->PRIM_type = T_CONN_IND; 3247 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3248 ci->SRC_length = tep->te_alen; 3249 ci->SEQ_number = tep->te_seqno; 3250 3251 addr_startp = cimp->b_rptr + ci->SRC_offset; 3252 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3253 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3254 3255 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3256 ci->SRC_length); 3257 ci->OPT_length = olen; /* because only 1 option */ 3258 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3259 cr, cpid, 3260 peer_tep->te_flag, peer_tep->te_credp); 3261 } else if (ooff != 0) { 3262 /* Copy option from T_CONN_REQ */ 3263 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3264 ci->SRC_length); 3265 ci->OPT_length = olen; 3266 ASSERT(opts != NULL); 3267 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3268 } else { 3269 ci->OPT_offset = 0; 3270 ci->OPT_length = 0; 3271 } 3272 if (opts != NULL) 3273 kmem_free(opts, olen); 3274 3275 /* 3276 * register connection request with server peer 3277 * append to list of incoming connections 3278 * increment references for both peer_tep and tep: peer_tep is placed on 3279 * te_oconp and tep is placed on listeners queue. 3280 */ 3281 tip->ti_tep = tep; 3282 tip->ti_seqno = tep->te_seqno; 3283 list_insert_tail(&peer_tep->te_iconp, tip); 3284 peer_tep->te_nicon++; 3285 3286 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3287 /* 3288 * send the T_CONN_IND message 3289 */ 3290 putnext(peer_tep->te_rq, cimp); 3291 3292 /* 3293 * Send a T_CONN_CON message for sockets. 3294 * Disable the queues until we have reached the correct state! 3295 */ 3296 if (confmp != NULL) { 3297 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3298 noenable(wq); 3299 putnext(tep->te_rq, confmp); 3300 } 3301 /* 3302 * Now we need to increment tep reference because tep is referenced by 3303 * server list of pending connections. We also need to decrement 3304 * reference before exiting serializer. Two operations void each other 3305 * so we don't modify reference at all. 3306 */ 3307 ASSERT(tep->te_refcnt >= 2); 3308 ASSERT(peer_tep->te_refcnt >= 2); 3309 tl_serializer_exit(tep); 3310 } 3311 3312 3313 3314 /* 3315 * Handle T_conn_res on listener stream. Called on listener serializer. 3316 * tl_conn_req has already generated the T_CONN_CON. 3317 * tl_conn_res is called on listener serializer. 3318 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3319 * Switch eager serializer to acceptor's. 3320 * 3321 * If TL_SET[U]CRED generate the credentials options. 3322 * For sockets tl_conn_req has already generated the T_CONN_CON. 3323 */ 3324 static void 3325 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3326 { 3327 queue_t *wq; 3328 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3329 ssize_t msz = MBLKL(mp); 3330 t_scalar_t olen, ooff, err = 0; 3331 t_scalar_t prim = cres->PRIM_type; 3332 uchar_t *addr_startp; 3333 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3334 tl_icon_t *tip; 3335 size_t size; 3336 mblk_t *ackmp, *respmp; 3337 mblk_t *dimp, *ccmp = NULL; 3338 struct T_discon_ind *di; 3339 struct T_conn_con *cc; 3340 boolean_t client_noclose_set = B_FALSE; 3341 boolean_t switch_client_serializer = B_TRUE; 3342 3343 ASSERT(IS_COTS(tep)); 3344 3345 if (tep->te_closing) { 3346 freemsg(mp); 3347 return; 3348 } 3349 3350 wq = tep->te_wq; 3351 3352 /* 3353 * preallocate memory for: 3354 * 1. max of T_ERROR_ACK and T_OK_ACK 3355 * ==> known max T_ERROR_ACK 3356 * 2. max of T_DISCON_IND and T_CONN_CON 3357 */ 3358 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3359 if (! ackmp) { 3360 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3361 return; 3362 } 3363 /* 3364 * memory committed for T_OK_ACK/T_ERROR_ACK now 3365 * will be committed for T_DISCON_IND/T_CONN_CON later 3366 */ 3367 3368 3369 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3370 3371 /* 3372 * validate state 3373 */ 3374 if (tep->te_state != TS_WRES_CIND) { 3375 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3376 SL_TRACE|SL_ERROR, 3377 "tl_wput:T_CONN_RES:out of state, state=%d", 3378 tep->te_state)); 3379 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3380 freemsg(mp); 3381 return; 3382 } 3383 3384 /* 3385 * validate the message 3386 * Note: dereference fields in struct inside message only 3387 * after validating the message length. 3388 */ 3389 if (msz < sizeof (struct T_conn_res)) { 3390 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3391 "tl_conn_res:invalid message length")); 3392 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3393 freemsg(mp); 3394 return; 3395 } 3396 olen = cres->OPT_length; 3397 ooff = cres->OPT_offset; 3398 if (((olen > 0) && ((ooff + olen) > msz))) { 3399 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3400 "tl_conn_res:invalid message")); 3401 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3402 freemsg(mp); 3403 return; 3404 } 3405 if (olen) { 3406 /* 3407 * no opts in connect res 3408 * supported in this provider 3409 */ 3410 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3411 "tl_conn_res:options not supported in message")); 3412 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3413 freemsg(mp); 3414 return; 3415 } 3416 3417 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3418 ASSERT(tep->te_state == TS_WACK_CRES); 3419 3420 if (cres->SEQ_number < TL_MINOR_START && 3421 cres->SEQ_number >= BADSEQNUM) { 3422 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3423 "tl_conn_res:remote endpoint sequence number bad")); 3424 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3425 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3426 freemsg(mp); 3427 return; 3428 } 3429 3430 /* 3431 * find accepting endpoint. Will have extra reference if found. 3432 */ 3433 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3434 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3435 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3436 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3437 "tl_conn_res:bad accepting endpoint")); 3438 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3439 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3440 freemsg(mp); 3441 return; 3442 } 3443 3444 /* 3445 * Prevent acceptor from closing. 3446 */ 3447 if (! tl_noclose(acc_ep)) { 3448 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3449 "tl_conn_res:bad accepting endpoint")); 3450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3451 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3452 tl_refrele(acc_ep); 3453 freemsg(mp); 3454 return; 3455 } 3456 3457 acc_ep->te_flag |= TL_ACCEPTOR; 3458 3459 /* 3460 * validate that accepting endpoint, if different from listening 3461 * has address bound => state is TS_IDLE 3462 * TROUBLE in XPG4 !!? 3463 */ 3464 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3465 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3466 "tl_conn_res:accepting endpoint has no address bound," 3467 "state=%d", acc_ep->te_state)); 3468 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3469 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3470 freemsg(mp); 3471 tl_closeok(acc_ep); 3472 tl_refrele(acc_ep); 3473 return; 3474 } 3475 3476 /* 3477 * validate if accepting endpt same as listening, then 3478 * no other incoming connection should be on the queue 3479 */ 3480 3481 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3482 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3483 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3484 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3485 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3486 freemsg(mp); 3487 tl_closeok(acc_ep); 3488 tl_refrele(acc_ep); 3489 return; 3490 } 3491 3492 /* 3493 * Mark for deletion, the entry corresponding to client 3494 * on list of pending connections made by the listener 3495 * search list to see if client is one of the 3496 * recorded as a listener. 3497 */ 3498 tip = tl_icon_find(tep, cres->SEQ_number); 3499 if (tip == NULL) { 3500 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3501 "tl_conn_res:no client in listener list")); 3502 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3503 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3504 freemsg(mp); 3505 tl_closeok(acc_ep); 3506 tl_refrele(acc_ep); 3507 return; 3508 } 3509 3510 /* 3511 * If ti_tep is NULL the client has already closed. In this case 3512 * the code below will avoid any action on the client side 3513 * but complete the server and acceptor state transitions. 3514 */ 3515 ASSERT(tip->ti_tep == NULL || 3516 tip->ti_tep->te_seqno == cres->SEQ_number); 3517 cl_ep = tip->ti_tep; 3518 3519 /* 3520 * If the client is present it is switched from listener's to acceptor's 3521 * serializer. We should block client closes while serializers are 3522 * being switched. 3523 * 3524 * It is possible that the client is present but is currently being 3525 * closed. There are two possible cases: 3526 * 3527 * 1) The client has already entered tl_close_finish_ser() and sent 3528 * T_ORDREL_IND. In this case we can just ignore the client (but we 3529 * still need to send all messages from tip->ti_mp to the acceptor). 3530 * 3531 * 2) The client started the close but has not entered 3532 * tl_close_finish_ser() yet. In this case, the client is already 3533 * proceeding asynchronously on the listener's serializer, so we're 3534 * forced to change the acceptor to use the listener's serializer to 3535 * ensure that any operations on the acceptor are serialized with 3536 * respect to the close that's in-progress. 3537 */ 3538 if (cl_ep != NULL) { 3539 if (tl_noclose(cl_ep)) { 3540 client_noclose_set = B_TRUE; 3541 } else { 3542 /* 3543 * Client is closing. If it it has sent the 3544 * T_ORDREL_IND, we can simply ignore it - otherwise, 3545 * we have to let let the client continue until it is 3546 * sent. 3547 * 3548 * If we do continue using the client, acceptor will 3549 * switch to client's serializer which is used by client 3550 * for its close. 3551 */ 3552 tl_client_closing_when_accepting++; 3553 switch_client_serializer = B_FALSE; 3554 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3555 cl_ep->te_state == -1) 3556 cl_ep = NULL; 3557 } 3558 } 3559 3560 if (cl_ep != NULL) { 3561 /* 3562 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3563 * (latter for sockets only) 3564 */ 3565 if (cl_ep->te_state != TS_WCON_CREQ && 3566 (cl_ep->te_state != TS_DATA_XFER && 3567 IS_SOCKET(cl_ep))) { 3568 err = ECONNREFUSED; 3569 /* 3570 * T_DISCON_IND sent later after committing memory 3571 * and acking validity of request 3572 */ 3573 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3574 "tl_conn_res:peer in bad state")); 3575 } 3576 3577 /* 3578 * preallocate now for T_DISCON_IND or T_CONN_CONN 3579 * ack validity of request (T_OK_ACK) after memory committed 3580 */ 3581 3582 if (err) 3583 size = sizeof (struct T_discon_ind); 3584 else { 3585 /* 3586 * calculate length of T_CONN_CON message 3587 */ 3588 olen = 0; 3589 if (cl_ep->te_flag & TL_SETCRED) { 3590 olen = (t_scalar_t)sizeof (struct opthdr) + 3591 OPTLEN(sizeof (tl_credopt_t)); 3592 } else if (cl_ep->te_flag & TL_SETUCRED) { 3593 olen = (t_scalar_t)sizeof (struct opthdr) + 3594 OPTLEN(ucredminsize(acc_ep->te_credp)); 3595 } 3596 size = T_ALIGN(sizeof (struct T_conn_con) + 3597 acc_ep->te_alen) + olen; 3598 } 3599 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3600 /* 3601 * roll back state changes 3602 */ 3603 tep->te_state = TS_WRES_CIND; 3604 tl_memrecover(wq, mp, size); 3605 freemsg(ackmp); 3606 if (client_noclose_set) 3607 tl_closeok(cl_ep); 3608 tl_closeok(acc_ep); 3609 tl_refrele(acc_ep); 3610 return; 3611 } 3612 mp = NULL; 3613 } 3614 3615 /* 3616 * Now ack validity of request 3617 */ 3618 if (tep->te_nicon == 1) { 3619 if (tep == acc_ep) 3620 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3621 else 3622 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3623 } else 3624 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3625 3626 /* 3627 * send T_DISCON_IND now if client state validation failed earlier 3628 */ 3629 if (err) { 3630 tl_ok_ack(wq, ackmp, prim); 3631 /* 3632 * flush the queues - why always ? 3633 */ 3634 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3635 3636 dimp = tl_resizemp(respmp, size); 3637 if (! dimp) { 3638 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3639 SL_TRACE|SL_ERROR, 3640 "tl_conn_res:con_ind:allocb failure")); 3641 tl_merror(wq, respmp, ENOMEM); 3642 tl_closeok(acc_ep); 3643 if (client_noclose_set) 3644 tl_closeok(cl_ep); 3645 tl_refrele(acc_ep); 3646 return; 3647 } 3648 if (dimp->b_cont) { 3649 /* no user data in provider generated discon ind */ 3650 freemsg(dimp->b_cont); 3651 dimp->b_cont = NULL; 3652 } 3653 3654 DB_TYPE(dimp) = M_PROTO; 3655 di = (struct T_discon_ind *)dimp->b_rptr; 3656 di->PRIM_type = T_DISCON_IND; 3657 di->DISCON_reason = err; 3658 di->SEQ_number = BADSEQNUM; 3659 3660 tep->te_state = TS_IDLE; 3661 /* 3662 * send T_DISCON_IND message 3663 */ 3664 putnext(acc_ep->te_rq, dimp); 3665 if (client_noclose_set) 3666 tl_closeok(cl_ep); 3667 tl_closeok(acc_ep); 3668 tl_refrele(acc_ep); 3669 return; 3670 } 3671 3672 /* 3673 * now start connecting the accepting endpoint 3674 */ 3675 if (tep != acc_ep) 3676 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3677 3678 if (cl_ep == NULL) { 3679 /* 3680 * The client has already closed. Send up any queued messages 3681 * and change the state accordingly. 3682 */ 3683 tl_ok_ack(wq, ackmp, prim); 3684 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3685 3686 /* 3687 * remove endpoint from incoming connection 3688 * delete client from list of incoming connections 3689 */ 3690 tl_freetip(tep, tip); 3691 freemsg(mp); 3692 tl_closeok(acc_ep); 3693 tl_refrele(acc_ep); 3694 return; 3695 } else if (tip->ti_mp != NULL) { 3696 /* 3697 * The client could have queued a T_DISCON_IND which needs 3698 * to be sent up. 3699 * Note that t_discon_req can not operate the same as 3700 * t_data_req since it is not possible for it to putbq 3701 * the message and return -1 due to the use of qwriter. 3702 */ 3703 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3704 } 3705 3706 /* 3707 * prepare connect confirm T_CONN_CON message 3708 */ 3709 3710 /* 3711 * allocate the message - original data blocks 3712 * retained in the returned mblk 3713 */ 3714 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3715 ccmp = tl_resizemp(respmp, size); 3716 if (ccmp == NULL) { 3717 tl_ok_ack(wq, ackmp, prim); 3718 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3719 SL_TRACE|SL_ERROR, 3720 "tl_conn_res:conn_con:allocb failure")); 3721 tl_merror(wq, respmp, ENOMEM); 3722 tl_closeok(acc_ep); 3723 if (client_noclose_set) 3724 tl_closeok(cl_ep); 3725 tl_refrele(acc_ep); 3726 return; 3727 } 3728 3729 DB_TYPE(ccmp) = M_PROTO; 3730 cc = (struct T_conn_con *)ccmp->b_rptr; 3731 cc->PRIM_type = T_CONN_CON; 3732 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3733 cc->RES_length = acc_ep->te_alen; 3734 addr_startp = ccmp->b_rptr + cc->RES_offset; 3735 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3736 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3737 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3738 cc->RES_length); 3739 cc->OPT_length = olen; 3740 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3741 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3742 cl_ep->te_credp); 3743 } else { 3744 cc->OPT_offset = 0; 3745 cc->OPT_length = 0; 3746 } 3747 /* 3748 * Forward the credential in the packet so it can be picked up 3749 * at the higher layers for more complete credential processing 3750 */ 3751 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3752 } else { 3753 freemsg(respmp); 3754 respmp = NULL; 3755 } 3756 3757 /* 3758 * make connection linking 3759 * accepting and client endpoints 3760 * No need to increment references: 3761 * on client: it should already have one from tip->ti_tep linkage. 3762 * on acceptor is should already have one from the table lookup. 3763 * 3764 * At this point both client and acceptor can't close. Set client 3765 * serializer to acceptor's. 3766 */ 3767 ASSERT(cl_ep->te_refcnt >= 2); 3768 ASSERT(acc_ep->te_refcnt >= 2); 3769 ASSERT(cl_ep->te_conp == NULL); 3770 ASSERT(acc_ep->te_conp == NULL); 3771 cl_ep->te_conp = acc_ep; 3772 acc_ep->te_conp = cl_ep; 3773 ASSERT(cl_ep->te_ser == tep->te_ser); 3774 if (switch_client_serializer) { 3775 mutex_enter(&cl_ep->te_ser_lock); 3776 if (cl_ep->te_ser_count > 0) { 3777 switch_client_serializer = B_FALSE; 3778 tl_serializer_noswitch++; 3779 } else { 3780 /* 3781 * Move client to the acceptor's serializer. 3782 */ 3783 tl_serializer_refhold(acc_ep->te_ser); 3784 tl_serializer_refrele(cl_ep->te_ser); 3785 cl_ep->te_ser = acc_ep->te_ser; 3786 } 3787 mutex_exit(&cl_ep->te_ser_lock); 3788 } 3789 if (!switch_client_serializer) { 3790 /* 3791 * It is not possible to switch client to use acceptor's. 3792 * Move acceptor to client's serializer (which is the same as 3793 * listener's). 3794 */ 3795 tl_serializer_refhold(cl_ep->te_ser); 3796 tl_serializer_refrele(acc_ep->te_ser); 3797 acc_ep->te_ser = cl_ep->te_ser; 3798 } 3799 3800 TL_REMOVE_PEER(cl_ep->te_oconp); 3801 TL_REMOVE_PEER(acc_ep->te_oconp); 3802 3803 /* 3804 * remove endpoint from incoming connection 3805 * delete client from list of incoming connections 3806 */ 3807 tip->ti_tep = NULL; 3808 tl_freetip(tep, tip); 3809 tl_ok_ack(wq, ackmp, prim); 3810 3811 /* 3812 * data blocks already linked in reallocb() 3813 */ 3814 3815 /* 3816 * link queues so that I_SENDFD will work 3817 */ 3818 if (! IS_SOCKET(tep)) { 3819 acc_ep->te_wq->q_next = cl_ep->te_rq; 3820 cl_ep->te_wq->q_next = acc_ep->te_rq; 3821 } 3822 3823 /* 3824 * send T_CONN_CON up on client side unless it was already 3825 * done (for a socket). In cases any data or ordrel req has been 3826 * queued make sure that the service procedure runs. 3827 */ 3828 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3829 enableok(cl_ep->te_wq); 3830 TL_QENABLE(cl_ep); 3831 if (ccmp != NULL) 3832 freemsg(ccmp); 3833 } else { 3834 /* 3835 * change client state on TE_CONN_CON event 3836 */ 3837 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3838 putnext(cl_ep->te_rq, ccmp); 3839 } 3840 3841 /* Mark the both endpoints as accepted */ 3842 cl_ep->te_flag |= TL_ACCEPTED; 3843 acc_ep->te_flag |= TL_ACCEPTED; 3844 3845 /* 3846 * Allow client and acceptor to close. 3847 */ 3848 tl_closeok(acc_ep); 3849 if (client_noclose_set) 3850 tl_closeok(cl_ep); 3851 } 3852 3853 3854 3855 3856 static void 3857 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3858 { 3859 queue_t *wq; 3860 struct T_discon_req *dr; 3861 ssize_t msz; 3862 tl_endpt_t *peer_tep = tep->te_conp; 3863 tl_endpt_t *srv_tep = tep->te_oconp; 3864 tl_icon_t *tip; 3865 size_t size; 3866 mblk_t *ackmp, *dimp, *respmp; 3867 struct T_discon_ind *di; 3868 t_scalar_t save_state, new_state; 3869 3870 if (tep->te_closing) { 3871 freemsg(mp); 3872 return; 3873 } 3874 3875 if ((peer_tep != NULL) && peer_tep->te_closing) { 3876 TL_UNCONNECT(tep->te_conp); 3877 peer_tep = NULL; 3878 } 3879 if ((srv_tep != NULL) && srv_tep->te_closing) { 3880 TL_UNCONNECT(tep->te_oconp); 3881 srv_tep = NULL; 3882 } 3883 3884 wq = tep->te_wq; 3885 3886 /* 3887 * preallocate memory for: 3888 * 1. max of T_ERROR_ACK and T_OK_ACK 3889 * ==> known max T_ERROR_ACK 3890 * 2. for T_DISCON_IND 3891 */ 3892 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3893 if (! ackmp) { 3894 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3895 return; 3896 } 3897 /* 3898 * memory committed for T_OK_ACK/T_ERROR_ACK now 3899 * will be committed for T_DISCON_IND later 3900 */ 3901 3902 dr = (struct T_discon_req *)mp->b_rptr; 3903 msz = MBLKL(mp); 3904 3905 /* 3906 * validate the state 3907 */ 3908 save_state = new_state = tep->te_state; 3909 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3910 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3911 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3912 SL_TRACE|SL_ERROR, 3913 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3914 tep->te_state)); 3915 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3916 freemsg(mp); 3917 return; 3918 } 3919 /* 3920 * Defer committing the state change until it is determined if 3921 * the message will be queued with the tl_icon or not. 3922 */ 3923 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3924 3925 /* validate the message */ 3926 if (msz < sizeof (struct T_discon_req)) { 3927 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3928 "tl_discon_req:invalid message")); 3929 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3930 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3931 freemsg(mp); 3932 return; 3933 } 3934 3935 /* 3936 * if server, then validate that client exists 3937 * by connection sequence number etc. 3938 */ 3939 if (tep->te_nicon > 0) { /* server */ 3940 3941 /* 3942 * search server list for disconnect client 3943 */ 3944 tip = tl_icon_find(tep, dr->SEQ_number); 3945 if (tip == NULL) { 3946 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3947 SL_TRACE|SL_ERROR, 3948 "tl_discon_req:no disconnect endpoint")); 3949 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3950 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3951 freemsg(mp); 3952 return; 3953 } 3954 /* 3955 * If ti_tep is NULL the client has already closed. In this case 3956 * the code below will avoid any action on the client side. 3957 */ 3958 3959 IMPLY(tip->ti_tep != NULL, 3960 tip->ti_tep->te_seqno == dr->SEQ_number); 3961 peer_tep = tip->ti_tep; 3962 } 3963 3964 /* 3965 * preallocate now for T_DISCON_IND 3966 * ack validity of request (T_OK_ACK) after memory committed 3967 */ 3968 size = sizeof (struct T_discon_ind); 3969 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3970 tl_memrecover(wq, mp, size); 3971 freemsg(ackmp); 3972 return; 3973 } 3974 3975 /* 3976 * prepare message to ack validity of request 3977 */ 3978 if (tep->te_nicon == 0) 3979 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3980 else 3981 if (tep->te_nicon == 1) 3982 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3983 else 3984 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3985 3986 /* 3987 * Flushing queues according to TPI. Using the old state. 3988 */ 3989 if ((tep->te_nicon <= 1) && 3990 ((save_state == TS_DATA_XFER) || 3991 (save_state == TS_WIND_ORDREL) || 3992 (save_state == TS_WREQ_ORDREL))) 3993 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3994 3995 /* send T_OK_ACK up */ 3996 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3997 3998 /* 3999 * now do disconnect business 4000 */ 4001 if (tep->te_nicon > 0) { /* listener */ 4002 if (peer_tep != NULL && !peer_tep->te_closing) { 4003 /* 4004 * disconnect incoming connect request pending to tep 4005 */ 4006 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4007 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4008 SL_TRACE|SL_ERROR, 4009 "tl_discon_req: reallocb failed")); 4010 tep->te_state = new_state; 4011 tl_merror(wq, respmp, ENOMEM); 4012 return; 4013 } 4014 di = (struct T_discon_ind *)dimp->b_rptr; 4015 di->SEQ_number = BADSEQNUM; 4016 save_state = peer_tep->te_state; 4017 peer_tep->te_state = TS_IDLE; 4018 4019 TL_REMOVE_PEER(peer_tep->te_oconp); 4020 enableok(peer_tep->te_wq); 4021 TL_QENABLE(peer_tep); 4022 } else { 4023 freemsg(respmp); 4024 dimp = NULL; 4025 } 4026 4027 /* 4028 * remove endpoint from incoming connection list 4029 * - remove disconnect client from list on server 4030 */ 4031 tl_freetip(tep, tip); 4032 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4033 /* 4034 * disconnect an outgoing request pending from tep 4035 */ 4036 4037 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4038 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4039 SL_TRACE|SL_ERROR, 4040 "tl_discon_req: reallocb failed")); 4041 tep->te_state = new_state; 4042 tl_merror(wq, respmp, ENOMEM); 4043 return; 4044 } 4045 di = (struct T_discon_ind *)dimp->b_rptr; 4046 DB_TYPE(dimp) = M_PROTO; 4047 di->PRIM_type = T_DISCON_IND; 4048 di->DISCON_reason = ECONNRESET; 4049 di->SEQ_number = tep->te_seqno; 4050 4051 /* 4052 * If this is a socket the T_DISCON_IND is queued with 4053 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4054 * from the list of pending connections. 4055 * Note that when te_oconp is set the peer better have 4056 * a t_connind_t for the client. 4057 */ 4058 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4059 /* 4060 * No need to check that 4061 * ti_tep == NULL since the T_DISCON_IND 4062 * takes precedence over other queued 4063 * messages. 4064 */ 4065 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4066 peer_tep = NULL; 4067 dimp = NULL; 4068 /* 4069 * Can't clear te_oconp since tl_co_unconnect needs 4070 * it as a hint not to free the tep. 4071 * Keep the state unchanged since tl_conn_res inspects 4072 * it. 4073 */ 4074 new_state = tep->te_state; 4075 } else { 4076 /* Found - delete it */ 4077 tip = tl_icon_find(peer_tep, tep->te_seqno); 4078 if (tip != NULL) { 4079 ASSERT(tep == tip->ti_tep); 4080 save_state = peer_tep->te_state; 4081 if (peer_tep->te_nicon == 1) 4082 peer_tep->te_state = 4083 NEXTSTATE(TE_DISCON_IND2, 4084 peer_tep->te_state); 4085 else 4086 peer_tep->te_state = 4087 NEXTSTATE(TE_DISCON_IND3, 4088 peer_tep->te_state); 4089 tl_freetip(peer_tep, tip); 4090 } 4091 ASSERT(tep->te_oconp != NULL); 4092 TL_UNCONNECT(tep->te_oconp); 4093 } 4094 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4095 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4096 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4097 SL_TRACE|SL_ERROR, 4098 "tl_discon_req: reallocb failed")); 4099 tep->te_state = new_state; 4100 tl_merror(wq, respmp, ENOMEM); 4101 return; 4102 } 4103 di = (struct T_discon_ind *)dimp->b_rptr; 4104 di->SEQ_number = BADSEQNUM; 4105 4106 save_state = peer_tep->te_state; 4107 peer_tep->te_state = TS_IDLE; 4108 } else { 4109 /* Not connected */ 4110 tep->te_state = new_state; 4111 freemsg(respmp); 4112 return; 4113 } 4114 4115 /* Commit state changes */ 4116 tep->te_state = new_state; 4117 4118 if (peer_tep == NULL) { 4119 ASSERT(dimp == NULL); 4120 goto done; 4121 } 4122 /* 4123 * Flush queues on peer before sending up 4124 * T_DISCON_IND according to TPI 4125 */ 4126 4127 if ((save_state == TS_DATA_XFER) || 4128 (save_state == TS_WIND_ORDREL) || 4129 (save_state == TS_WREQ_ORDREL)) 4130 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4131 4132 DB_TYPE(dimp) = M_PROTO; 4133 di->PRIM_type = T_DISCON_IND; 4134 di->DISCON_reason = ECONNRESET; 4135 4136 /* 4137 * data blocks already linked into dimp by reallocb() 4138 */ 4139 /* 4140 * send indication message to peer user module 4141 */ 4142 ASSERT(dimp != NULL); 4143 putnext(peer_tep->te_rq, dimp); 4144 done: 4145 if (tep->te_conp) { /* disconnect pointers if connected */ 4146 ASSERT(! peer_tep->te_closing); 4147 4148 /* 4149 * Messages may be queued on peer's write queue 4150 * waiting to be processed by its write service 4151 * procedure. Before the pointer to the peer transport 4152 * structure is set to NULL, qenable the peer's write 4153 * queue so that the queued up messages are processed. 4154 */ 4155 if ((save_state == TS_DATA_XFER) || 4156 (save_state == TS_WIND_ORDREL) || 4157 (save_state == TS_WREQ_ORDREL)) 4158 TL_QENABLE(peer_tep); 4159 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4160 TL_UNCONNECT(peer_tep->te_conp); 4161 if (! IS_SOCKET(tep)) { 4162 /* 4163 * unlink the streams 4164 */ 4165 tep->te_wq->q_next = NULL; 4166 peer_tep->te_wq->q_next = NULL; 4167 } 4168 TL_UNCONNECT(tep->te_conp); 4169 } 4170 } 4171 4172 static void 4173 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep) 4174 { 4175 if (!tep->te_closing) 4176 tl_addr_req(mp, tep); 4177 else 4178 freemsg(mp); 4179 4180 tl_serializer_exit(tep); 4181 tl_refrele(tep); 4182 } 4183 4184 static void 4185 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4186 { 4187 queue_t *wq; 4188 size_t ack_sz; 4189 mblk_t *ackmp; 4190 struct T_addr_ack *taa; 4191 4192 if (tep->te_closing) { 4193 freemsg(mp); 4194 return; 4195 } 4196 4197 wq = tep->te_wq; 4198 4199 /* 4200 * Note: T_ADDR_REQ message has only PRIM_type field 4201 * so it is already validated earlier. 4202 */ 4203 4204 if (IS_CLTS(tep) || 4205 (tep->te_state > TS_WREQ_ORDREL) || 4206 (tep->te_state < TS_DATA_XFER)) { 4207 /* 4208 * Either connectionless or connection oriented but not 4209 * in connected data transfer state or half-closed states. 4210 */ 4211 ack_sz = sizeof (struct T_addr_ack); 4212 if (tep->te_state >= TS_IDLE) 4213 /* is bound */ 4214 ack_sz += tep->te_alen; 4215 ackmp = reallocb(mp, ack_sz, 0); 4216 if (ackmp == NULL) { 4217 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4218 SL_TRACE|SL_ERROR, 4219 "tl_addr_req: reallocb failed")); 4220 tl_memrecover(wq, mp, ack_sz); 4221 return; 4222 } 4223 4224 taa = (struct T_addr_ack *)ackmp->b_rptr; 4225 4226 bzero(taa, sizeof (struct T_addr_ack)); 4227 4228 taa->PRIM_type = T_ADDR_ACK; 4229 ackmp->b_datap->db_type = M_PCPROTO; 4230 ackmp->b_wptr = (uchar_t *)&taa[1]; 4231 4232 if (tep->te_state >= TS_IDLE) { 4233 /* endpoint is bound */ 4234 taa->LOCADDR_length = tep->te_alen; 4235 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4236 4237 bcopy(tep->te_abuf, ackmp->b_wptr, 4238 tep->te_alen); 4239 ackmp->b_wptr += tep->te_alen; 4240 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4241 } 4242 4243 (void) qreply(wq, ackmp); 4244 } else { 4245 ASSERT(tep->te_state == TS_DATA_XFER || 4246 tep->te_state == TS_WIND_ORDREL || 4247 tep->te_state == TS_WREQ_ORDREL); 4248 /* connection oriented in data transfer */ 4249 tl_connected_cots_addr_req(mp, tep); 4250 } 4251 } 4252 4253 4254 static void 4255 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4256 { 4257 tl_endpt_t *peer_tep = tep->te_conp; 4258 size_t ack_sz; 4259 mblk_t *ackmp; 4260 struct T_addr_ack *taa; 4261 uchar_t *addr_startp; 4262 4263 if (tep->te_closing) { 4264 freemsg(mp); 4265 return; 4266 } 4267 4268 if (peer_tep == NULL || peer_tep->te_closing) { 4269 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ); 4270 return; 4271 } 4272 4273 ASSERT(tep->te_state >= TS_IDLE); 4274 4275 ack_sz = sizeof (struct T_addr_ack); 4276 ack_sz += T_ALIGN(tep->te_alen); 4277 ack_sz += peer_tep->te_alen; 4278 4279 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4280 if (ackmp == NULL) { 4281 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4282 "tl_connected_cots_addr_req: reallocb failed")); 4283 tl_memrecover(tep->te_wq, mp, ack_sz); 4284 return; 4285 } 4286 4287 taa = (struct T_addr_ack *)ackmp->b_rptr; 4288 4289 /* endpoint is bound */ 4290 taa->LOCADDR_length = tep->te_alen; 4291 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4292 4293 addr_startp = (uchar_t *)&taa[1]; 4294 4295 bcopy(tep->te_abuf, addr_startp, 4296 tep->te_alen); 4297 4298 taa->REMADDR_length = peer_tep->te_alen; 4299 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4300 taa->LOCADDR_length); 4301 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4302 bcopy(peer_tep->te_abuf, addr_startp, 4303 peer_tep->te_alen); 4304 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4305 taa->REMADDR_offset + peer_tep->te_alen; 4306 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4307 4308 putnext(tep->te_rq, ackmp); 4309 } 4310 4311 static void 4312 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4313 { 4314 if (IS_CLTS(tep)) { 4315 *ia = tl_clts_info_ack; 4316 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4317 } else { 4318 *ia = tl_cots_info_ack; 4319 if (IS_COTSORD(tep)) 4320 ia->SERV_type = T_COTS_ORD; 4321 } 4322 ia->TIDU_size = tl_tidusz; 4323 ia->CURRENT_state = tep->te_state; 4324 } 4325 4326 /* 4327 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4328 * tl_wput. 4329 */ 4330 static void 4331 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4332 { 4333 mblk_t *ackmp; 4334 t_uscalar_t cap_bits1; 4335 struct T_capability_ack *tcap; 4336 4337 if (tep->te_closing) { 4338 freemsg(mp); 4339 return; 4340 } 4341 4342 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4343 4344 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4345 M_PCPROTO, T_CAPABILITY_ACK); 4346 if (ackmp == NULL) { 4347 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4348 "tl_capability_req: reallocb failed")); 4349 tl_memrecover(tep->te_wq, mp, 4350 sizeof (struct T_capability_ack)); 4351 return; 4352 } 4353 4354 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4355 tcap->CAP_bits1 = 0; 4356 4357 if (cap_bits1 & TC1_INFO) { 4358 tl_copy_info(&tcap->INFO_ack, tep); 4359 tcap->CAP_bits1 |= TC1_INFO; 4360 } 4361 4362 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4363 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4364 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4365 } 4366 4367 putnext(tep->te_rq, ackmp); 4368 } 4369 4370 static void 4371 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4372 { 4373 if (! tep->te_closing) 4374 tl_info_req(mp, tep); 4375 else 4376 freemsg(mp); 4377 4378 tl_serializer_exit(tep); 4379 tl_refrele(tep); 4380 } 4381 4382 static void 4383 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4384 { 4385 mblk_t *ackmp; 4386 4387 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4388 M_PCPROTO, T_INFO_ACK); 4389 if (ackmp == NULL) { 4390 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4391 "tl_info_req: reallocb failed")); 4392 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4393 return; 4394 } 4395 4396 /* 4397 * fill in T_INFO_ACK contents 4398 */ 4399 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4400 4401 /* 4402 * send ack message 4403 */ 4404 putnext(tep->te_rq, ackmp); 4405 } 4406 4407 /* 4408 * Handle M_DATA, T_data_req and T_optdata_req. 4409 * If this is a socket pass through T_optdata_req options unmodified. 4410 */ 4411 static void 4412 tl_data(mblk_t *mp, tl_endpt_t *tep) 4413 { 4414 queue_t *wq = tep->te_wq; 4415 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4416 ssize_t msz = MBLKL(mp); 4417 tl_endpt_t *peer_tep; 4418 queue_t *peer_rq; 4419 boolean_t closing = tep->te_closing; 4420 4421 if (IS_CLTS(tep)) { 4422 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4423 SL_TRACE|SL_ERROR, 4424 "tl_wput:clts:unattached M_DATA")); 4425 if (!closing) { 4426 tl_merror(wq, mp, EPROTO); 4427 } else { 4428 freemsg(mp); 4429 } 4430 return; 4431 } 4432 4433 /* 4434 * If the endpoint is closing it should still forward any data to the 4435 * peer (if it has one). If it is not allowed to forward it can just 4436 * free the message. 4437 */ 4438 if (closing && 4439 (tep->te_state != TS_DATA_XFER) && 4440 (tep->te_state != TS_WREQ_ORDREL)) { 4441 freemsg(mp); 4442 return; 4443 } 4444 4445 if (DB_TYPE(mp) == M_PROTO) { 4446 if (prim->type == T_DATA_REQ && 4447 msz < sizeof (struct T_data_req)) { 4448 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4449 SL_TRACE|SL_ERROR, 4450 "tl_data:T_DATA_REQ:invalid message")); 4451 if (!closing) { 4452 tl_merror(wq, mp, EPROTO); 4453 } else { 4454 freemsg(mp); 4455 } 4456 return; 4457 } else if (prim->type == T_OPTDATA_REQ && 4458 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4459 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4460 SL_TRACE|SL_ERROR, 4461 "tl_data:T_OPTDATA_REQ:invalid message")); 4462 if (!closing) { 4463 tl_merror(wq, mp, EPROTO); 4464 } else { 4465 freemsg(mp); 4466 } 4467 return; 4468 } 4469 } 4470 4471 /* 4472 * connection oriented provider 4473 */ 4474 switch (tep->te_state) { 4475 case TS_IDLE: 4476 /* 4477 * Other end not here - do nothing. 4478 */ 4479 freemsg(mp); 4480 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4481 "tl_data:cots with endpoint idle")); 4482 return; 4483 4484 case TS_DATA_XFER: 4485 /* valid states */ 4486 if (tep->te_conp != NULL) 4487 break; 4488 4489 if (tep->te_oconp == NULL) { 4490 if (!closing) { 4491 tl_merror(wq, mp, EPROTO); 4492 } else { 4493 freemsg(mp); 4494 } 4495 return; 4496 } 4497 /* 4498 * For a socket the T_CONN_CON is sent early thus 4499 * the peer might not yet have accepted the connection. 4500 * If we are closing queue the packet with the T_CONN_IND. 4501 * Otherwise defer processing the packet until the peer 4502 * accepts the connection. 4503 * Note that the queue is noenabled when we go into this 4504 * state. 4505 */ 4506 if (!closing) { 4507 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4508 SL_TRACE|SL_ERROR, 4509 "tl_data: ocon")); 4510 TL_PUTBQ(tep, mp); 4511 return; 4512 } 4513 if (DB_TYPE(mp) == M_PROTO) { 4514 if (msz < sizeof (t_scalar_t)) { 4515 freemsg(mp); 4516 return; 4517 } 4518 /* reuse message block - just change REQ to IND */ 4519 if (prim->type == T_DATA_REQ) 4520 prim->type = T_DATA_IND; 4521 else 4522 prim->type = T_OPTDATA_IND; 4523 } 4524 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4525 return; 4526 4527 case TS_WREQ_ORDREL: 4528 if (tep->te_conp == NULL) { 4529 /* 4530 * Other end closed - generate discon_ind 4531 * with reason 0 to cause an EPIPE but no 4532 * read side error on AF_UNIX sockets. 4533 */ 4534 freemsg(mp); 4535 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4536 SL_TRACE|SL_ERROR, 4537 "tl_data: WREQ_ORDREL and no peer")); 4538 tl_discon_ind(tep, 0); 4539 return; 4540 } 4541 break; 4542 4543 default: 4544 /* invalid state for event TE_DATA_REQ */ 4545 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4546 "tl_data:cots:out of state")); 4547 tl_merror(wq, mp, EPROTO); 4548 return; 4549 } 4550 /* 4551 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4552 * (State stays same on this event) 4553 */ 4554 4555 /* 4556 * get connected endpoint 4557 */ 4558 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4559 freemsg(mp); 4560 /* Peer closed */ 4561 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4562 "tl_data: peer gone")); 4563 return; 4564 } 4565 4566 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4567 peer_rq = peer_tep->te_rq; 4568 4569 /* 4570 * Put it back if flow controlled 4571 * Note: Messages already on queue when we are closing is bounded 4572 * so we can ignore flow control. 4573 */ 4574 if (!canputnext(peer_rq) && !closing) { 4575 TL_PUTBQ(tep, mp); 4576 return; 4577 } 4578 4579 /* 4580 * validate peer state 4581 */ 4582 switch (peer_tep->te_state) { 4583 case TS_DATA_XFER: 4584 case TS_WIND_ORDREL: 4585 /* valid states */ 4586 break; 4587 default: 4588 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4589 "tl_data:rx side:invalid state")); 4590 tl_merror(peer_tep->te_wq, mp, EPROTO); 4591 return; 4592 } 4593 if (DB_TYPE(mp) == M_PROTO) { 4594 /* reuse message block - just change REQ to IND */ 4595 if (prim->type == T_DATA_REQ) 4596 prim->type = T_DATA_IND; 4597 else 4598 prim->type = T_OPTDATA_IND; 4599 } 4600 /* 4601 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4602 * (peer state stays same on this event) 4603 */ 4604 /* 4605 * send data to connected peer 4606 */ 4607 putnext(peer_rq, mp); 4608 } 4609 4610 4611 4612 static void 4613 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4614 { 4615 queue_t *wq = tep->te_wq; 4616 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4617 ssize_t msz = MBLKL(mp); 4618 tl_endpt_t *peer_tep; 4619 queue_t *peer_rq; 4620 boolean_t closing = tep->te_closing; 4621 4622 if (msz < sizeof (struct T_exdata_req)) { 4623 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4624 "tl_exdata:invalid message")); 4625 if (!closing) { 4626 tl_merror(wq, mp, EPROTO); 4627 } else { 4628 freemsg(mp); 4629 } 4630 return; 4631 } 4632 4633 /* 4634 * If the endpoint is closing it should still forward any data to the 4635 * peer (if it has one). If it is not allowed to forward it can just 4636 * free the message. 4637 */ 4638 if (closing && 4639 (tep->te_state != TS_DATA_XFER) && 4640 (tep->te_state != TS_WREQ_ORDREL)) { 4641 freemsg(mp); 4642 return; 4643 } 4644 4645 /* 4646 * validate state 4647 */ 4648 switch (tep->te_state) { 4649 case TS_IDLE: 4650 /* 4651 * Other end not here - do nothing. 4652 */ 4653 freemsg(mp); 4654 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4655 "tl_exdata:cots with endpoint idle")); 4656 return; 4657 4658 case TS_DATA_XFER: 4659 /* valid states */ 4660 if (tep->te_conp != NULL) 4661 break; 4662 4663 if (tep->te_oconp == NULL) { 4664 if (!closing) { 4665 tl_merror(wq, mp, EPROTO); 4666 } else { 4667 freemsg(mp); 4668 } 4669 return; 4670 } 4671 /* 4672 * For a socket the T_CONN_CON is sent early thus 4673 * the peer might not yet have accepted the connection. 4674 * If we are closing queue the packet with the T_CONN_IND. 4675 * Otherwise defer processing the packet until the peer 4676 * accepts the connection. 4677 * Note that the queue is noenabled when we go into this 4678 * state. 4679 */ 4680 if (!closing) { 4681 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4682 SL_TRACE|SL_ERROR, 4683 "tl_exdata: ocon")); 4684 TL_PUTBQ(tep, mp); 4685 return; 4686 } 4687 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4688 "tl_exdata: closing socket ocon")); 4689 prim->type = T_EXDATA_IND; 4690 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4691 return; 4692 4693 case TS_WREQ_ORDREL: 4694 if (tep->te_conp == NULL) { 4695 /* 4696 * Other end closed - generate discon_ind 4697 * with reason 0 to cause an EPIPE but no 4698 * read side error on AF_UNIX sockets. 4699 */ 4700 freemsg(mp); 4701 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4702 SL_TRACE|SL_ERROR, 4703 "tl_exdata: WREQ_ORDREL and no peer")); 4704 tl_discon_ind(tep, 0); 4705 return; 4706 } 4707 break; 4708 4709 default: 4710 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4711 SL_TRACE|SL_ERROR, 4712 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4713 tep->te_state)); 4714 tl_merror(wq, mp, EPROTO); 4715 return; 4716 } 4717 /* 4718 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4719 * (state stays same on this event) 4720 */ 4721 4722 /* 4723 * get connected endpoint 4724 */ 4725 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4726 freemsg(mp); 4727 /* Peer closed */ 4728 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4729 "tl_exdata: peer gone")); 4730 return; 4731 } 4732 4733 peer_rq = peer_tep->te_rq; 4734 4735 /* 4736 * Put it back if flow controlled 4737 * Note: Messages already on queue when we are closing is bounded 4738 * so we can ignore flow control. 4739 */ 4740 if (!canputnext(peer_rq) && !closing) { 4741 TL_PUTBQ(tep, mp); 4742 return; 4743 } 4744 4745 /* 4746 * validate state on peer 4747 */ 4748 switch (peer_tep->te_state) { 4749 case TS_DATA_XFER: 4750 case TS_WIND_ORDREL: 4751 /* valid states */ 4752 break; 4753 default: 4754 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4755 "tl_exdata:rx side:invalid state")); 4756 tl_merror(peer_tep->te_wq, mp, EPROTO); 4757 return; 4758 } 4759 /* 4760 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4761 * (peer state stays same on this event) 4762 */ 4763 /* 4764 * reuse message block 4765 */ 4766 prim->type = T_EXDATA_IND; 4767 4768 /* 4769 * send data to connected peer 4770 */ 4771 putnext(peer_rq, mp); 4772 } 4773 4774 4775 4776 static void 4777 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4778 { 4779 queue_t *wq = tep->te_wq; 4780 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4781 ssize_t msz = MBLKL(mp); 4782 tl_endpt_t *peer_tep; 4783 queue_t *peer_rq; 4784 boolean_t closing = tep->te_closing; 4785 4786 if (msz < sizeof (struct T_ordrel_req)) { 4787 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4788 "tl_ordrel:invalid message")); 4789 if (!closing) { 4790 tl_merror(wq, mp, EPROTO); 4791 } else { 4792 freemsg(mp); 4793 } 4794 return; 4795 } 4796 4797 /* 4798 * validate state 4799 */ 4800 switch (tep->te_state) { 4801 case TS_DATA_XFER: 4802 case TS_WREQ_ORDREL: 4803 /* valid states */ 4804 if (tep->te_conp != NULL) 4805 break; 4806 4807 if (tep->te_oconp == NULL) 4808 break; 4809 4810 /* 4811 * For a socket the T_CONN_CON is sent early thus 4812 * the peer might not yet have accepted the connection. 4813 * If we are closing queue the packet with the T_CONN_IND. 4814 * Otherwise defer processing the packet until the peer 4815 * accepts the connection. 4816 * Note that the queue is noenabled when we go into this 4817 * state. 4818 */ 4819 if (!closing) { 4820 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4821 SL_TRACE|SL_ERROR, 4822 "tl_ordlrel: ocon")); 4823 TL_PUTBQ(tep, mp); 4824 return; 4825 } 4826 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4827 "tl_ordlrel: closing socket ocon")); 4828 prim->type = T_ORDREL_IND; 4829 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4830 return; 4831 4832 default: 4833 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4834 SL_TRACE|SL_ERROR, 4835 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4836 tep->te_state)); 4837 if (!closing) { 4838 tl_merror(wq, mp, EPROTO); 4839 } else { 4840 freemsg(mp); 4841 } 4842 return; 4843 } 4844 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4845 4846 /* 4847 * get connected endpoint 4848 */ 4849 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4850 /* Peer closed */ 4851 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4852 "tl_ordrel: peer gone")); 4853 freemsg(mp); 4854 return; 4855 } 4856 4857 peer_rq = peer_tep->te_rq; 4858 4859 /* 4860 * Put it back if flow controlled except when we are closing. 4861 * Note: Messages already on queue when we are closing is bounded 4862 * so we can ignore flow control. 4863 */ 4864 if (! canputnext(peer_rq) && !closing) { 4865 TL_PUTBQ(tep, mp); 4866 return; 4867 } 4868 4869 /* 4870 * validate state on peer 4871 */ 4872 switch (peer_tep->te_state) { 4873 case TS_DATA_XFER: 4874 case TS_WIND_ORDREL: 4875 /* valid states */ 4876 break; 4877 default: 4878 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4879 "tl_ordrel:rx side:invalid state")); 4880 tl_merror(peer_tep->te_wq, mp, EPROTO); 4881 return; 4882 } 4883 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4884 4885 /* 4886 * reuse message block 4887 */ 4888 prim->type = T_ORDREL_IND; 4889 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4890 "tl_ordrel: send ordrel_ind")); 4891 4892 /* 4893 * send data to connected peer 4894 */ 4895 putnext(peer_rq, mp); 4896 } 4897 4898 4899 /* 4900 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4901 */ 4902 static void 4903 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4904 { 4905 size_t err_sz; 4906 tl_endpt_t *tep; 4907 struct T_unitdata_req *udreq; 4908 mblk_t *err_mp; 4909 t_scalar_t alen; 4910 t_scalar_t olen; 4911 struct T_uderror_ind *uderr; 4912 uchar_t *addr_startp; 4913 4914 err_sz = sizeof (struct T_uderror_ind); 4915 tep = (tl_endpt_t *)wq->q_ptr; 4916 udreq = (struct T_unitdata_req *)mp->b_rptr; 4917 alen = udreq->DEST_length; 4918 olen = udreq->OPT_length; 4919 4920 if (alen > 0) 4921 err_sz = T_ALIGN(err_sz + alen); 4922 if (olen > 0) 4923 err_sz += olen; 4924 4925 err_mp = allocb(err_sz, BPRI_MED); 4926 if (! err_mp) { 4927 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4928 "tl_uderr:allocb failure")); 4929 /* 4930 * Note: no rollback of state needed as it does 4931 * not change in connectionless transport 4932 */ 4933 tl_memrecover(wq, mp, err_sz); 4934 return; 4935 } 4936 4937 DB_TYPE(err_mp) = M_PROTO; 4938 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4939 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4940 uderr->PRIM_type = T_UDERROR_IND; 4941 uderr->ERROR_type = err; 4942 uderr->DEST_length = alen; 4943 uderr->OPT_length = olen; 4944 if (alen <= 0) { 4945 uderr->DEST_offset = 0; 4946 } else { 4947 uderr->DEST_offset = 4948 (t_scalar_t)sizeof (struct T_uderror_ind); 4949 addr_startp = mp->b_rptr + udreq->DEST_offset; 4950 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4951 (size_t)alen); 4952 } 4953 if (olen <= 0) { 4954 uderr->OPT_offset = 0; 4955 } else { 4956 uderr->OPT_offset = 4957 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4958 uderr->DEST_length); 4959 addr_startp = mp->b_rptr + udreq->OPT_offset; 4960 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4961 (size_t)olen); 4962 } 4963 freemsg(mp); 4964 4965 /* 4966 * send indication message 4967 */ 4968 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4969 4970 qreply(wq, err_mp); 4971 } 4972 4973 static void 4974 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4975 { 4976 queue_t *wq = tep->te_wq; 4977 4978 if (!tep->te_closing && (wq->q_first != NULL)) { 4979 TL_PUTQ(tep, mp); 4980 } else if (tep->te_rq != NULL) 4981 tl_unitdata(mp, tep); 4982 else 4983 freemsg(mp); 4984 4985 tl_serializer_exit(tep); 4986 tl_refrele(tep); 4987 } 4988 4989 /* 4990 * Handle T_unitdata_req. 4991 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4992 * If this is a socket pass through options unmodified. 4993 */ 4994 static void 4995 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4996 { 4997 queue_t *wq = tep->te_wq; 4998 soux_addr_t ux_addr; 4999 tl_addr_t destaddr; 5000 uchar_t *addr_startp; 5001 tl_endpt_t *peer_tep; 5002 struct T_unitdata_ind *udind; 5003 struct T_unitdata_req *udreq; 5004 ssize_t msz, ui_sz, reuse_mb_sz; 5005 t_scalar_t alen, aoff, olen, ooff; 5006 t_scalar_t oldolen = 0; 5007 cred_t *cr = NULL; 5008 pid_t cpid; 5009 5010 udreq = (struct T_unitdata_req *)mp->b_rptr; 5011 msz = MBLKL(mp); 5012 5013 /* 5014 * validate the state 5015 */ 5016 if (tep->te_state != TS_IDLE) { 5017 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5018 SL_TRACE|SL_ERROR, 5019 "tl_wput:T_CONN_REQ:out of state")); 5020 tl_merror(wq, mp, EPROTO); 5021 return; 5022 } 5023 /* 5024 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5025 * (state does not change on this event) 5026 */ 5027 5028 /* 5029 * validate the message 5030 * Note: dereference fields in struct inside message only 5031 * after validating the message length. 5032 */ 5033 if (msz < sizeof (struct T_unitdata_req)) { 5034 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5035 "tl_unitdata:invalid message length")); 5036 tl_merror(wq, mp, EINVAL); 5037 return; 5038 } 5039 alen = udreq->DEST_length; 5040 aoff = udreq->DEST_offset; 5041 oldolen = olen = udreq->OPT_length; 5042 ooff = udreq->OPT_offset; 5043 if (olen == 0) 5044 ooff = 0; 5045 5046 if (IS_SOCKET(tep)) { 5047 if ((alen != TL_SOUX_ADDRLEN) || 5048 (aoff < 0) || 5049 (aoff + alen > msz) || 5050 (olen < 0) || (ooff < 0) || 5051 ((olen > 0) && ((ooff + olen) > msz))) { 5052 (void) (STRLOG(TL_ID, tep->te_minor, 5053 1, SL_TRACE|SL_ERROR, 5054 "tl_unitdata_req: invalid socket addr " 5055 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5056 (int)msz, alen, aoff, olen, ooff)); 5057 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5058 return; 5059 } 5060 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5061 5062 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5063 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5064 (void) (STRLOG(TL_ID, tep->te_minor, 5065 1, SL_TRACE|SL_ERROR, 5066 "tl_conn_req: invalid socket magic")); 5067 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5068 return; 5069 } 5070 } else { 5071 if ((alen < 0) || 5072 (aoff < 0) || 5073 ((alen > 0) && ((aoff + alen) > msz)) || 5074 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5075 ((aoff + alen) < 0) || 5076 ((olen > 0) && ((ooff + olen) > msz)) || 5077 (olen < 0) || 5078 (ooff < 0) || 5079 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5080 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5081 SL_TRACE|SL_ERROR, 5082 "tl_unitdata:invalid unit data message")); 5083 tl_merror(wq, mp, EINVAL); 5084 return; 5085 } 5086 } 5087 5088 /* Options not supported unless it's a socket */ 5089 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5090 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5091 "tl_unitdata:option use(unsupported) or zero len addr")); 5092 tl_uderr(wq, mp, EPROTO); 5093 return; 5094 } 5095 #ifdef DEBUG 5096 /* 5097 * Mild form of ASSERT()ion to detect broken TPI apps. 5098 * if (! assertion) 5099 * log warning; 5100 */ 5101 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5102 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5103 "tl_unitdata:addr overlaps TPI message")); 5104 } 5105 #endif 5106 /* 5107 * get destination endpoint 5108 */ 5109 destaddr.ta_alen = alen; 5110 destaddr.ta_abuf = mp->b_rptr + aoff; 5111 destaddr.ta_zoneid = tep->te_zoneid; 5112 5113 /* 5114 * Check whether the destination is the same that was used previously 5115 * and the destination endpoint is in the right state. If something is 5116 * wrong, find destination again and cache it. 5117 */ 5118 peer_tep = tep->te_lastep; 5119 5120 if ((peer_tep == NULL) || peer_tep->te_closing || 5121 (peer_tep->te_state != TS_IDLE) || 5122 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5123 /* 5124 * Not the same as cached destination , need to find the right 5125 * destination. 5126 */ 5127 peer_tep = (IS_SOCKET(tep) ? 5128 tl_sock_find_peer(tep, &ux_addr) : 5129 tl_find_peer(tep, &destaddr)); 5130 5131 if (peer_tep == NULL) { 5132 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5133 SL_TRACE|SL_ERROR, 5134 "tl_unitdata:no one at destination address")); 5135 tl_uderr(wq, mp, ECONNRESET); 5136 return; 5137 } 5138 5139 /* 5140 * Cache the new peer. 5141 */ 5142 if (tep->te_lastep != NULL) 5143 tl_refrele(tep->te_lastep); 5144 5145 tep->te_lastep = peer_tep; 5146 } 5147 5148 if (peer_tep->te_state != TS_IDLE) { 5149 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5150 "tl_unitdata:provider in invalid state")); 5151 tl_uderr(wq, mp, EPROTO); 5152 return; 5153 } 5154 5155 ASSERT(peer_tep->te_rq != NULL); 5156 5157 /* 5158 * Put it back if flow controlled except when we are closing. 5159 * Note: Messages already on queue when we are closing is bounded 5160 * so we can ignore flow control. 5161 */ 5162 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5163 /* record what we are flow controlled on */ 5164 if (tep->te_flowq != NULL) { 5165 list_remove(&tep->te_flowq->te_flowlist, tep); 5166 } 5167 list_insert_head(&peer_tep->te_flowlist, tep); 5168 tep->te_flowq = peer_tep; 5169 TL_PUTBQ(tep, mp); 5170 return; 5171 } 5172 /* 5173 * prepare indication message 5174 */ 5175 5176 /* 5177 * calculate length of message 5178 */ 5179 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5180 cr = msg_getcred(mp, &cpid); 5181 ASSERT(cr != NULL); 5182 5183 if (peer_tep->te_flag & TL_SETCRED) { 5184 ASSERT(olen == 0); 5185 olen = (t_scalar_t)sizeof (struct opthdr) + 5186 OPTLEN(sizeof (tl_credopt_t)); 5187 /* 1 option only */ 5188 } else if (peer_tep->te_flag & TL_SETUCRED) { 5189 ASSERT(olen == 0); 5190 olen = (t_scalar_t)sizeof (struct opthdr) + 5191 OPTLEN(ucredminsize(cr)); 5192 /* 1 option only */ 5193 } else { 5194 /* Possibly more than one option */ 5195 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5196 OPTLEN(ucredminsize(cr)); 5197 } 5198 } 5199 5200 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen; 5201 reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen; 5202 5203 /* 5204 * If the unitdata_ind fits and we are not adding options 5205 * reuse the udreq mblk. 5206 * 5207 * Otherwise, it is possible we need to append an option if one of the 5208 * te_flag bits is set. This requires extra space in the data block for 5209 * the additional option but the traditional technique used below to 5210 * allocate a new block and copy into it will not work when there is a 5211 * message block with a free pointer (since we don't know anything 5212 * about the layout of the data, pointers referencing or within the 5213 * data, etc.). To handle this possibility the upper layers may have 5214 * preallocated some space to use for appending an option. We check the 5215 * overall mblock size against the size we need ('reuse_mb_sz' with the 5216 * original address length [alen] to ensure we won't overrun the 5217 * current mblk data size) to see if there is free space and thus 5218 * avoid allocating a new message block. 5219 */ 5220 if (msz >= ui_sz && alen >= tep->te_alen && 5221 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5222 /* 5223 * Reuse the original mblk. Leave options in place. 5224 */ 5225 udind = (struct T_unitdata_ind *)mp->b_rptr; 5226 udind->PRIM_type = T_UNITDATA_IND; 5227 udind->SRC_length = tep->te_alen; 5228 addr_startp = mp->b_rptr + udind->SRC_offset; 5229 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5230 5231 } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen && 5232 mp->b_datap->db_frtnp != NULL) { 5233 /* 5234 * We have a message block with a free pointer, but extra space 5235 * has been pre-allocated for us in case we need to append an 5236 * option. Reuse the original mblk, leaving existing options in 5237 * place. 5238 */ 5239 udind = (struct T_unitdata_ind *)mp->b_rptr; 5240 udind->PRIM_type = T_UNITDATA_IND; 5241 udind->SRC_length = tep->te_alen; 5242 addr_startp = mp->b_rptr + udind->SRC_offset; 5243 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5244 5245 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5246 ASSERT(cr != NULL); 5247 /* 5248 * We're appending one new option here after the 5249 * original ones. 5250 */ 5251 tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen, 5252 cr, cpid, peer_tep->te_flag, peer_tep->te_credp); 5253 } 5254 5255 } else if (mp->b_datap->db_frtnp != NULL) { 5256 /* 5257 * The next block creates a new mp and tries to copy the data 5258 * block into it, but that cannot handle a message with a free 5259 * pointer (for more details see the comment in kstrputmsg() 5260 * where dupmsg() is called). Since we can never properly 5261 * duplicate the mp while also extending the data, just error 5262 * out now. 5263 */ 5264 tl_uderr(wq, mp, EPROTO); 5265 return; 5266 } else { 5267 /* Allocate a new T_unitdata_ind message */ 5268 mblk_t *ui_mp; 5269 5270 ui_mp = allocb(ui_sz, BPRI_MED); 5271 if (! ui_mp) { 5272 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5273 "tl_unitdata:allocb failure:message queued")); 5274 tl_memrecover(wq, mp, ui_sz); 5275 return; 5276 } 5277 5278 /* 5279 * fill in T_UNITDATA_IND contents 5280 */ 5281 DB_TYPE(ui_mp) = M_PROTO; 5282 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5283 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5284 udind->PRIM_type = T_UNITDATA_IND; 5285 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5286 udind->SRC_length = tep->te_alen; 5287 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5288 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5289 udind->OPT_offset = 5290 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5291 udind->OPT_length = olen; 5292 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5293 5294 if (oldolen != 0) { 5295 bcopy((void *)((uintptr_t)udreq + ooff), 5296 (void *)((uintptr_t)udind + 5297 udind->OPT_offset), 5298 oldolen); 5299 } 5300 ASSERT(cr != NULL); 5301 5302 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5303 oldolen, cr, cpid, 5304 peer_tep->te_flag, peer_tep->te_credp); 5305 } else { 5306 bcopy((void *)((uintptr_t)udreq + ooff), 5307 (void *)((uintptr_t)udind + udind->OPT_offset), 5308 olen); 5309 } 5310 5311 /* 5312 * relink data blocks from mp to ui_mp 5313 */ 5314 ui_mp->b_cont = mp->b_cont; 5315 freeb(mp); 5316 mp = ui_mp; 5317 } 5318 /* 5319 * send indication message 5320 */ 5321 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5322 putnext(peer_tep->te_rq, mp); 5323 } 5324 5325 5326 5327 /* 5328 * Check if a given addr is in use. 5329 * Endpoint ptr returned or NULL if not found. 5330 * The name space is separate for each mode. This implies that 5331 * sockets get their own name space. 5332 */ 5333 static tl_endpt_t * 5334 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5335 { 5336 tl_endpt_t *peer_tep = NULL; 5337 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5338 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5339 5340 ASSERT(! IS_SOCKET(tep)); 5341 5342 ASSERT(ap != NULL && ap->ta_alen > 0); 5343 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5344 ASSERT(ap->ta_abuf != NULL); 5345 EQUIV(rc == 0, peer_tep != NULL); 5346 IMPLY(rc == 0, 5347 (tep->te_zoneid == peer_tep->te_zoneid) && 5348 (tep->te_transport == peer_tep->te_transport)); 5349 5350 if ((rc == 0) && (peer_tep->te_closing)) { 5351 tl_refrele(peer_tep); 5352 peer_tep = NULL; 5353 } 5354 5355 return (peer_tep); 5356 } 5357 5358 /* 5359 * Find peer for a socket based on unix domain address. 5360 * For implicit addresses our peer can be found by minor number in ai hash. For 5361 * explicit binds we look vnode address at addr_hash. 5362 */ 5363 static tl_endpt_t * 5364 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5365 { 5366 tl_endpt_t *peer_tep = NULL; 5367 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5368 tep->te_aihash : tep->te_addrhash; 5369 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5370 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5371 5372 ASSERT(IS_SOCKET(tep)); 5373 EQUIV(rc == 0, peer_tep != NULL); 5374 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)); 5375 5376 if (peer_tep != NULL) { 5377 /* Don't attempt to use closing peer. */ 5378 if (peer_tep->te_closing) 5379 goto errout; 5380 5381 /* 5382 * Cross-zone unix sockets are permitted, but for Trusted 5383 * Extensions only, the "server" for these must be in the 5384 * global zone. 5385 */ 5386 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5387 is_system_labeled() && 5388 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5389 goto errout; 5390 } 5391 5392 return (peer_tep); 5393 5394 errout: 5395 tl_refrele(peer_tep); 5396 return (NULL); 5397 } 5398 5399 /* 5400 * Generate a free addr and return it in struct pointed by ap 5401 * but allocating space for address buffer. 5402 * The generated address will be at least 4 bytes long and, if req->ta_alen 5403 * exceeds 4 bytes, be req->ta_alen bytes long. 5404 * 5405 * If address is found it will be inserted in the hash. 5406 * 5407 * If req->ta_alen is larger than the default alen (4 bytes) the last 5408 * alen-4 bytes will always be the same as in req. 5409 * 5410 * Return 0 for failure. 5411 * Return non-zero for success. 5412 */ 5413 static boolean_t 5414 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5415 { 5416 t_scalar_t alen; 5417 uint32_t loopcnt; /* Limit loop to 2^32 */ 5418 5419 ASSERT(tep->te_hash_hndl != NULL); 5420 ASSERT(! IS_SOCKET(tep)); 5421 5422 if (tep->te_hash_hndl == NULL) 5423 return (B_FALSE); 5424 5425 /* 5426 * check if default addr is in use 5427 * if it is - bump it and try again 5428 */ 5429 if (req == NULL) { 5430 alen = sizeof (uint32_t); 5431 } else { 5432 alen = max(req->ta_alen, sizeof (uint32_t)); 5433 ASSERT(tep->te_zoneid == req->ta_zoneid); 5434 } 5435 5436 if (tep->te_alen < alen) { 5437 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5438 5439 /* 5440 * Not enough space in tep->ta_ap to hold the address, 5441 * allocate a bigger space. 5442 */ 5443 if (abuf == NULL) 5444 return (B_FALSE); 5445 5446 if (tep->te_alen > 0) 5447 kmem_free(tep->te_abuf, tep->te_alen); 5448 5449 tep->te_alen = alen; 5450 tep->te_abuf = abuf; 5451 } 5452 5453 /* Copy in the address in req */ 5454 if (req != NULL) { 5455 ASSERT(alen >= req->ta_alen); 5456 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5457 } 5458 5459 /* 5460 * First try minor number then try default addresses. 5461 */ 5462 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5463 5464 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5465 if (mod_hash_insert_reserve(tep->te_addrhash, 5466 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5467 tep->te_hash_hndl) == 0) { 5468 /* 5469 * found free address 5470 */ 5471 tep->te_flag |= TL_ADDRHASHED; 5472 tep->te_hash_hndl = NULL; 5473 5474 return (B_TRUE); /* successful return */ 5475 } 5476 /* 5477 * Use default address. 5478 */ 5479 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5480 atomic_inc_32(&tep->te_defaddr); 5481 } 5482 5483 /* 5484 * Failed to find anything. 5485 */ 5486 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5487 "tl_get_any_addr:looped 2^32 times")); 5488 return (B_FALSE); 5489 } 5490 5491 /* 5492 * reallocb + set r/w ptrs to reflect size. 5493 */ 5494 static mblk_t * 5495 tl_resizemp(mblk_t *mp, ssize_t new_size) 5496 { 5497 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5498 return (NULL); 5499 5500 mp->b_rptr = DB_BASE(mp); 5501 mp->b_wptr = mp->b_rptr + new_size; 5502 return (mp); 5503 } 5504 5505 static void 5506 tl_cl_backenable(tl_endpt_t *tep) 5507 { 5508 list_t *l = &tep->te_flowlist; 5509 tl_endpt_t *elp; 5510 5511 ASSERT(IS_CLTS(tep)); 5512 5513 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5514 ASSERT(tep->te_ser == elp->te_ser); 5515 ASSERT(elp->te_flowq == tep); 5516 if (! elp->te_closing) 5517 TL_QENABLE(elp); 5518 elp->te_flowq = NULL; 5519 list_remove(l, elp); 5520 } 5521 } 5522 5523 /* 5524 * Unconnect endpoints. 5525 */ 5526 static void 5527 tl_co_unconnect(tl_endpt_t *tep) 5528 { 5529 tl_endpt_t *peer_tep = tep->te_conp; 5530 tl_endpt_t *srv_tep = tep->te_oconp; 5531 list_t *l; 5532 tl_icon_t *tip; 5533 tl_endpt_t *cl_tep; 5534 mblk_t *d_mp; 5535 5536 ASSERT(IS_COTS(tep)); 5537 /* 5538 * If our peer is closing, don't use it. 5539 */ 5540 if ((peer_tep != NULL) && peer_tep->te_closing) { 5541 TL_UNCONNECT(tep->te_conp); 5542 peer_tep = NULL; 5543 } 5544 if ((srv_tep != NULL) && srv_tep->te_closing) { 5545 TL_UNCONNECT(tep->te_oconp); 5546 srv_tep = NULL; 5547 } 5548 5549 if (tep->te_nicon > 0) { 5550 l = &tep->te_iconp; 5551 /* 5552 * If incoming requests pending, change state 5553 * of clients on disconnect ind event and send 5554 * discon_ind pdu to modules above them 5555 * for server: all clients get disconnect 5556 */ 5557 5558 while (tep->te_nicon > 0) { 5559 tip = list_head(l); 5560 cl_tep = tip->ti_tep; 5561 5562 if (cl_tep == NULL) { 5563 tl_freetip(tep, tip); 5564 continue; 5565 } 5566 5567 if (cl_tep->te_oconp != NULL) { 5568 ASSERT(cl_tep != cl_tep->te_oconp); 5569 TL_UNCONNECT(cl_tep->te_oconp); 5570 } 5571 5572 if (cl_tep->te_closing) { 5573 tl_freetip(tep, tip); 5574 continue; 5575 } 5576 5577 enableok(cl_tep->te_wq); 5578 TL_QENABLE(cl_tep); 5579 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5580 if (d_mp != NULL) { 5581 cl_tep->te_state = TS_IDLE; 5582 putnext(cl_tep->te_rq, d_mp); 5583 } else { 5584 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5585 SL_TRACE|SL_ERROR, 5586 "tl_co_unconnect:icmng: " 5587 "allocb failure")); 5588 } 5589 tl_freetip(tep, tip); 5590 } 5591 } else if (srv_tep != NULL) { 5592 /* 5593 * If outgoing request pending, change state 5594 * of server on discon ind event 5595 */ 5596 5597 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5598 IS_COTSORD(srv_tep) && 5599 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5600 /* 5601 * Queue ordrel_ind for server to be picked up 5602 * when the connection is accepted. 5603 */ 5604 d_mp = tl_ordrel_ind_alloc(); 5605 } else { 5606 /* 5607 * send discon_ind to server 5608 */ 5609 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5610 } 5611 if (d_mp == NULL) { 5612 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5613 SL_TRACE|SL_ERROR, 5614 "tl_co_unconnect:outgoing:allocb failure")); 5615 TL_UNCONNECT(tep->te_oconp); 5616 goto discon_peer; 5617 } 5618 5619 /* 5620 * If this is a socket the T_DISCON_IND is queued with 5621 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5622 * from the list of pending connections. 5623 * Note that when te_oconp is set the peer better have 5624 * a t_connind_t for the client. 5625 */ 5626 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5627 /* 5628 * Queue the disconnection message. 5629 */ 5630 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5631 } else { 5632 tip = tl_icon_find(srv_tep, tep->te_seqno); 5633 if (tip == NULL) { 5634 freemsg(d_mp); 5635 } else { 5636 ASSERT(tep == tip->ti_tep); 5637 ASSERT(tep->te_ser == srv_tep->te_ser); 5638 /* 5639 * Delete tip from the server list. 5640 */ 5641 if (srv_tep->te_nicon == 1) { 5642 srv_tep->te_state = 5643 NEXTSTATE(TE_DISCON_IND2, 5644 srv_tep->te_state); 5645 } else { 5646 srv_tep->te_state = 5647 NEXTSTATE(TE_DISCON_IND3, 5648 srv_tep->te_state); 5649 } 5650 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5651 T_DISCON_IND); 5652 putnext(srv_tep->te_rq, d_mp); 5653 tl_freetip(srv_tep, tip); 5654 } 5655 TL_UNCONNECT(tep->te_oconp); 5656 srv_tep = NULL; 5657 } 5658 } else if (peer_tep != NULL) { 5659 /* 5660 * unconnect existing connection 5661 * If connected, change state of peer on 5662 * discon ind event and send discon ind pdu 5663 * to module above it 5664 */ 5665 5666 ASSERT(tep->te_ser == peer_tep->te_ser); 5667 if (IS_COTSORD(peer_tep) && 5668 (peer_tep->te_state == TS_WIND_ORDREL || 5669 peer_tep->te_state == TS_DATA_XFER)) { 5670 /* 5671 * send ordrel ind 5672 */ 5673 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5674 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5675 peer_tep->te_state, 5676 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5677 d_mp = tl_ordrel_ind_alloc(); 5678 if (! d_mp) { 5679 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5680 SL_TRACE|SL_ERROR, 5681 "tl_co_unconnect:connected:" 5682 "allocb failure")); 5683 /* 5684 * Continue with cleaning up peer as 5685 * this side may go away with the close 5686 */ 5687 TL_QENABLE(peer_tep); 5688 goto discon_peer; 5689 } 5690 peer_tep->te_state = 5691 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5692 5693 putnext(peer_tep->te_rq, d_mp); 5694 /* 5695 * Handle flow control case. This will generate 5696 * a t_discon_ind message with reason 0 if there 5697 * is data queued on the write side. 5698 */ 5699 TL_QENABLE(peer_tep); 5700 } else if (IS_COTSORD(peer_tep) && 5701 peer_tep->te_state == TS_WREQ_ORDREL) { 5702 /* 5703 * Sent an ordrel_ind. We send a discon with 5704 * with error 0 to inform that the peer is gone. 5705 */ 5706 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5707 SL_TRACE|SL_ERROR, 5708 "tl_co_unconnect: discon in state %d", 5709 tep->te_state)); 5710 tl_discon_ind(peer_tep, 0); 5711 } else { 5712 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5713 SL_TRACE|SL_ERROR, 5714 "tl_co_unconnect: state %d", tep->te_state)); 5715 tl_discon_ind(peer_tep, ECONNRESET); 5716 } 5717 5718 discon_peer: 5719 /* 5720 * Disconnect cross-pointers only for close 5721 */ 5722 if (tep->te_closing) { 5723 peer_tep = tep->te_conp; 5724 TL_REMOVE_PEER(peer_tep->te_conp); 5725 TL_REMOVE_PEER(tep->te_conp); 5726 } 5727 } 5728 } 5729 5730 /* 5731 * Note: The following routine does not recover from allocb() 5732 * failures 5733 * The reason should be from the <sys/errno.h> space. 5734 */ 5735 static void 5736 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5737 { 5738 mblk_t *d_mp; 5739 5740 if (tep->te_closing) 5741 return; 5742 5743 /* 5744 * flush the queues. 5745 */ 5746 flushq(tep->te_rq, FLUSHDATA); 5747 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5748 5749 /* 5750 * send discon ind 5751 */ 5752 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5753 if (! d_mp) { 5754 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5755 "tl_discon_ind:allocb failure")); 5756 return; 5757 } 5758 tep->te_state = TS_IDLE; 5759 putnext(tep->te_rq, d_mp); 5760 } 5761 5762 /* 5763 * Note: The following routine does not recover from allocb() 5764 * failures 5765 * The reason should be from the <sys/errno.h> space. 5766 */ 5767 static mblk_t * 5768 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5769 { 5770 mblk_t *mp; 5771 struct T_discon_ind *tdi; 5772 5773 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5774 DB_TYPE(mp) = M_PROTO; 5775 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5776 tdi = (struct T_discon_ind *)mp->b_rptr; 5777 tdi->PRIM_type = T_DISCON_IND; 5778 tdi->DISCON_reason = reason; 5779 tdi->SEQ_number = seqnum; 5780 } 5781 return (mp); 5782 } 5783 5784 5785 /* 5786 * Note: The following routine does not recover from allocb() 5787 * failures 5788 */ 5789 static mblk_t * 5790 tl_ordrel_ind_alloc(void) 5791 { 5792 mblk_t *mp; 5793 struct T_ordrel_ind *toi; 5794 5795 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5796 DB_TYPE(mp) = M_PROTO; 5797 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5798 toi = (struct T_ordrel_ind *)mp->b_rptr; 5799 toi->PRIM_type = T_ORDREL_IND; 5800 } 5801 return (mp); 5802 } 5803 5804 5805 /* 5806 * Lookup the seqno in the list of queued connections. 5807 */ 5808 static tl_icon_t * 5809 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5810 { 5811 list_t *l = &tep->te_iconp; 5812 tl_icon_t *tip = list_head(l); 5813 5814 ASSERT(seqno != 0); 5815 5816 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5817 ; 5818 5819 return (tip); 5820 } 5821 5822 /* 5823 * Queue data for a given T_CONN_IND while verifying that redundant 5824 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5825 * Used when the originator of the connection closes. 5826 */ 5827 static void 5828 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5829 { 5830 tl_icon_t *tip; 5831 mblk_t **mpp, *mp; 5832 int prim, nprim; 5833 5834 if (nmp->b_datap->db_type == M_PROTO) 5835 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5836 else 5837 nprim = -1; /* M_DATA */ 5838 5839 tip = tl_icon_find(tep, seqno); 5840 if (tip == NULL) { 5841 freemsg(nmp); 5842 return; 5843 } 5844 5845 ASSERT(tip->ti_seqno != 0); 5846 mpp = &tip->ti_mp; 5847 while (*mpp != NULL) { 5848 mp = *mpp; 5849 5850 if (mp->b_datap->db_type == M_PROTO) 5851 prim = ((union T_primitives *)mp->b_rptr)->type; 5852 else 5853 prim = -1; /* M_DATA */ 5854 5855 /* 5856 * Allow nothing after a T_DISCON_IND 5857 */ 5858 if (prim == T_DISCON_IND) { 5859 freemsg(nmp); 5860 return; 5861 } 5862 /* 5863 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5864 */ 5865 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5866 freemsg(nmp); 5867 return; 5868 } 5869 mpp = &(mp->b_next); 5870 } 5871 *mpp = nmp; 5872 } 5873 5874 /* 5875 * Verify if a certain TPI primitive exists on the connind queue. 5876 * Use prim -1 for M_DATA. 5877 * Return non-zero if found. 5878 */ 5879 static boolean_t 5880 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5881 { 5882 tl_icon_t *tip = tl_icon_find(tep, seqno); 5883 boolean_t found = B_FALSE; 5884 5885 if (tip != NULL) { 5886 mblk_t *mp; 5887 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5888 found = (DB_TYPE(mp) == M_PROTO && 5889 ((union T_primitives *)mp->b_rptr)->type == prim); 5890 } 5891 } 5892 return (found); 5893 } 5894 5895 /* 5896 * Send the b_next mblk chain that has accumulated before the connection 5897 * was accepted. Perform the necessary state transitions. 5898 */ 5899 static void 5900 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5901 { 5902 mblk_t *mp; 5903 union T_primitives *primp; 5904 5905 if (tep->te_closing) { 5906 tl_icon_freemsgs(mpp); 5907 return; 5908 } 5909 5910 ASSERT(tep->te_state == TS_DATA_XFER); 5911 ASSERT(tep->te_rq->q_first == NULL); 5912 5913 while ((mp = *mpp) != NULL) { 5914 *mpp = mp->b_next; 5915 mp->b_next = NULL; 5916 5917 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5918 switch (DB_TYPE(mp)) { 5919 default: 5920 freemsg(mp); 5921 break; 5922 case M_DATA: 5923 putnext(tep->te_rq, mp); 5924 break; 5925 case M_PROTO: 5926 primp = (union T_primitives *)mp->b_rptr; 5927 switch (primp->type) { 5928 case T_UNITDATA_IND: 5929 case T_DATA_IND: 5930 case T_OPTDATA_IND: 5931 case T_EXDATA_IND: 5932 putnext(tep->te_rq, mp); 5933 break; 5934 case T_ORDREL_IND: 5935 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5936 tep->te_state); 5937 putnext(tep->te_rq, mp); 5938 break; 5939 case T_DISCON_IND: 5940 tep->te_state = TS_IDLE; 5941 putnext(tep->te_rq, mp); 5942 break; 5943 default: 5944 #ifdef DEBUG 5945 cmn_err(CE_PANIC, 5946 "tl_icon_sendmsgs: unknown primitive"); 5947 #endif /* DEBUG */ 5948 freemsg(mp); 5949 break; 5950 } 5951 break; 5952 } 5953 } 5954 } 5955 5956 /* 5957 * Free the b_next mblk chain that has accumulated before the connection 5958 * was accepted. 5959 */ 5960 static void 5961 tl_icon_freemsgs(mblk_t **mpp) 5962 { 5963 mblk_t *mp; 5964 5965 while ((mp = *mpp) != NULL) { 5966 *mpp = mp->b_next; 5967 mp->b_next = NULL; 5968 freemsg(mp); 5969 } 5970 } 5971 5972 /* 5973 * Send M_ERROR 5974 * Note: assumes caller ensured enough space in mp or enough 5975 * memory available. Does not attempt recovery from allocb() 5976 * failures 5977 */ 5978 5979 static void 5980 tl_merror(queue_t *wq, mblk_t *mp, int error) 5981 { 5982 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5983 5984 if (tep->te_closing) { 5985 freemsg(mp); 5986 return; 5987 } 5988 5989 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5990 SL_TRACE|SL_ERROR, 5991 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5992 5993 /* 5994 * flush all messages on queue. we are shutting 5995 * the stream down on fatal error 5996 */ 5997 flushq(wq, FLUSHALL); 5998 if (IS_COTS(tep)) { 5999 /* connection oriented - unconnect endpoints */ 6000 tl_co_unconnect(tep); 6001 } 6002 if (mp->b_cont) { 6003 freemsg(mp->b_cont); 6004 mp->b_cont = NULL; 6005 } 6006 6007 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 6008 freemsg(mp); 6009 mp = allocb(1, BPRI_HI); 6010 if (!mp) { 6011 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6012 SL_TRACE|SL_ERROR, 6013 "tl_merror:M_PROTO: out of memory")); 6014 return; 6015 } 6016 } 6017 if (mp) { 6018 DB_TYPE(mp) = M_ERROR; 6019 mp->b_rptr = DB_BASE(mp); 6020 *mp->b_rptr = (char)error; 6021 mp->b_wptr = mp->b_rptr + sizeof (char); 6022 qreply(wq, mp); 6023 } else { 6024 (void) putnextctl1(tep->te_rq, M_ERROR, error); 6025 } 6026 } 6027 6028 static void 6029 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 6030 { 6031 ASSERT(cr != NULL); 6032 6033 if (flag & TL_SETCRED) { 6034 struct opthdr *opt = (struct opthdr *)buf; 6035 tl_credopt_t *tlcred; 6036 6037 opt->level = TL_PROT_LEVEL; 6038 opt->name = TL_OPT_PEER_CRED; 6039 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 6040 6041 tlcred = (tl_credopt_t *)(opt + 1); 6042 tlcred->tc_uid = crgetuid(cr); 6043 tlcred->tc_gid = crgetgid(cr); 6044 tlcred->tc_ruid = crgetruid(cr); 6045 tlcred->tc_rgid = crgetrgid(cr); 6046 tlcred->tc_suid = crgetsuid(cr); 6047 tlcred->tc_sgid = crgetsgid(cr); 6048 tlcred->tc_ngroups = crgetngroups(cr); 6049 } else if (flag & TL_SETUCRED) { 6050 struct opthdr *opt = (struct opthdr *)buf; 6051 6052 opt->level = TL_PROT_LEVEL; 6053 opt->name = TL_OPT_PEER_UCRED; 6054 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); 6055 6056 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 6057 } else { 6058 struct T_opthdr *topt = (struct T_opthdr *)buf; 6059 ASSERT(flag & TL_SOCKUCRED); 6060 6061 topt->level = SOL_SOCKET; 6062 topt->name = SCM_UCRED; 6063 topt->len = ucredminsize(cr) + sizeof (*topt); 6064 topt->status = 0; 6065 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 6066 } 6067 } 6068 6069 /* ARGSUSED */ 6070 static int 6071 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6072 { 6073 /* no default value processed in protocol specific code currently */ 6074 return (-1); 6075 } 6076 6077 /* ARGSUSED */ 6078 static int 6079 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6080 { 6081 int len; 6082 tl_endpt_t *tep; 6083 int *valp; 6084 6085 tep = (tl_endpt_t *)wq->q_ptr; 6086 6087 len = 0; 6088 6089 /* 6090 * Assumes: option level and name sanity check done elsewhere 6091 */ 6092 6093 switch (level) { 6094 case SOL_SOCKET: 6095 if (! IS_SOCKET(tep)) 6096 break; 6097 switch (name) { 6098 case SO_RECVUCRED: 6099 len = sizeof (int); 6100 valp = (int *)ptr; 6101 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6102 break; 6103 default: 6104 break; 6105 } 6106 break; 6107 case TL_PROT_LEVEL: 6108 switch (name) { 6109 case TL_OPT_PEER_CRED: 6110 case TL_OPT_PEER_UCRED: 6111 /* 6112 * option not supposed to retrieved directly 6113 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6114 * when some internal flags set by other options 6115 * Direct retrieval always designed to fail(ignored) 6116 * for this option. 6117 */ 6118 break; 6119 } 6120 } 6121 return (len); 6122 } 6123 6124 /* ARGSUSED */ 6125 static int 6126 tl_set_opt( 6127 queue_t *wq, 6128 uint_t mgmt_flags, 6129 int level, 6130 int name, 6131 uint_t inlen, 6132 uchar_t *invalp, 6133 uint_t *outlenp, 6134 uchar_t *outvalp, 6135 void *thisdg_attrs, 6136 cred_t *cr) 6137 { 6138 int error; 6139 tl_endpt_t *tep; 6140 6141 tep = (tl_endpt_t *)wq->q_ptr; 6142 6143 error = 0; /* NOERROR */ 6144 6145 /* 6146 * Assumes: option level and name sanity checks done elsewhere 6147 */ 6148 6149 switch (level) { 6150 case SOL_SOCKET: 6151 if (! IS_SOCKET(tep)) { 6152 error = EINVAL; 6153 break; 6154 } 6155 /* 6156 * TBD: fill in other AF_UNIX socket options and then stop 6157 * returning error. 6158 */ 6159 switch (name) { 6160 case SO_RECVUCRED: 6161 /* 6162 * We only support this for datagram sockets; 6163 * getpeerucred handles the connection oriented 6164 * transports. 6165 */ 6166 if (! IS_CLTS(tep)) { 6167 error = EINVAL; 6168 break; 6169 } 6170 if (*(int *)invalp == 0) 6171 tep->te_flag &= ~TL_SOCKUCRED; 6172 else 6173 tep->te_flag |= TL_SOCKUCRED; 6174 break; 6175 default: 6176 error = EINVAL; 6177 break; 6178 } 6179 break; 6180 case TL_PROT_LEVEL: 6181 switch (name) { 6182 case TL_OPT_PEER_CRED: 6183 case TL_OPT_PEER_UCRED: 6184 /* 6185 * option not supposed to be set directly 6186 * Its value in initialized for each endpoint at 6187 * driver open time. 6188 * Direct setting always designed to fail for this 6189 * option. 6190 */ 6191 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6192 SL_TRACE|SL_ERROR, 6193 "tl_set_opt: option is not supported")); 6194 error = EPROTO; 6195 break; 6196 } 6197 } 6198 return (error); 6199 } 6200 6201 6202 static void 6203 tl_timer(void *arg) 6204 { 6205 queue_t *wq = arg; 6206 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6207 6208 ASSERT(tep); 6209 6210 tep->te_timoutid = 0; 6211 6212 enableok(wq); 6213 /* 6214 * Note: can call wsrv directly here and save context switch 6215 * Consider change when qtimeout (not timeout) is active 6216 */ 6217 qenable(wq); 6218 } 6219 6220 static void 6221 tl_buffer(void *arg) 6222 { 6223 queue_t *wq = arg; 6224 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6225 6226 ASSERT(tep); 6227 6228 tep->te_bufcid = 0; 6229 tep->te_nowsrv = B_FALSE; 6230 6231 enableok(wq); 6232 /* 6233 * Note: can call wsrv directly here and save context switch 6234 * Consider change when qbufcall (not bufcall) is active 6235 */ 6236 qenable(wq); 6237 } 6238 6239 static void 6240 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6241 { 6242 tl_endpt_t *tep; 6243 6244 tep = (tl_endpt_t *)wq->q_ptr; 6245 6246 if (tep->te_closing) { 6247 freemsg(mp); 6248 return; 6249 } 6250 noenable(wq); 6251 6252 (void) insq(wq, wq->q_first, mp); 6253 6254 if (tep->te_bufcid || tep->te_timoutid) { 6255 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6256 "tl_memrecover:recover %p pending", (void *)wq)); 6257 return; 6258 } 6259 6260 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6261 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6262 drv_usectohz(TL_BUFWAIT)); 6263 } 6264 } 6265 6266 static void 6267 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6268 { 6269 ASSERT(tip->ti_seqno != 0); 6270 6271 if (tip->ti_mp != NULL) { 6272 tl_icon_freemsgs(&tip->ti_mp); 6273 tip->ti_mp = NULL; 6274 } 6275 if (tip->ti_tep != NULL) { 6276 tl_refrele(tip->ti_tep); 6277 tip->ti_tep = NULL; 6278 } 6279 list_remove(&tep->te_iconp, tip); 6280 kmem_free(tip, sizeof (tl_icon_t)); 6281 tep->te_nicon--; 6282 } 6283 6284 /* 6285 * Remove address from address hash. 6286 */ 6287 static void 6288 tl_addr_unbind(tl_endpt_t *tep) 6289 { 6290 tl_endpt_t *elp; 6291 6292 if (tep->te_flag & TL_ADDRHASHED) { 6293 if (IS_SOCKET(tep)) { 6294 (void) mod_hash_remove(tep->te_addrhash, 6295 (mod_hash_key_t)tep->te_vp, 6296 (mod_hash_val_t *)&elp); 6297 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6298 tep->te_magic = SOU_MAGIC_IMPLICIT; 6299 } else { 6300 (void) mod_hash_remove(tep->te_addrhash, 6301 (mod_hash_key_t)&tep->te_ap, 6302 (mod_hash_val_t *)&elp); 6303 (void) kmem_free(tep->te_abuf, tep->te_alen); 6304 tep->te_alen = -1; 6305 tep->te_abuf = NULL; 6306 } 6307 tep->te_flag &= ~TL_ADDRHASHED; 6308 } 6309 } 6310