/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2018, Joyent, Inc. */ /* * Multithreaded STREAMS Local Transport Provider. * * OVERVIEW * ======== * * This driver provides TLI as well as socket semantics. It provides * connectionless, connection oriented, and connection oriented with orderly * release transports for TLI and sockets. Each transport type has separate name * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - * this removes any name space conflicts when binding to socket style transport * addresses. * * NOTE: There is one exception: Socket ticots and ticotsord transports share * the same namespace. In fact, sockets always use ticotsord type transport. * * The driver mode is specified during open() by the minor number used for * open. * * The sockets in addition have the following semantic differences: * No support for passing up credentials (TL_SET[U]CRED). * * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to * T_OPTDATA_IND. * * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before * a T_CONN_RES is received from the acceptor. This means that a socket * connect will complete before the peer has called accept. * * * MULTITHREADING * ============== * * The driver does not use STREAMS protection mechanisms. Instead it uses a * generic "serializer" abstraction. Most of the operations are executed behind * the serializer and are, essentially single-threaded. All functions executed * behind the same serializer are strictly serialized. So if one thread calls * serializer_enter(serializer, foo, mp1, arg1); and another thread calls * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the * same time. * * Connectionless transport use a single serializer per transport type (one for * TLI and one for sockets. Connection-oriented transports use finer-grained * serializers. * * All COTS-type endpoints start their life with private serializers. During * connection request processing the endpoint serializer is switched to the * listener's serializer and the rest of T_CONN_REQ processing is done on the * listener serializer. During T_CONN_RES processing the eager serializer is * switched from listener to acceptor serializer and after that point all * processing for eager and acceptor happens on this serializer. To avoid races * with endpoint closes while its serializer may be changing closes are blocked * while serializers are manipulated. * * References accounting * --------------------- * * Endpoints are reference counted and freed when the last reference is * dropped. Functions within the serializer may access an endpoint state even * after an endpoint closed. The te_closing being set on the endpoint indicates * that the endpoint entered its close routine. * * One reference is held for each opened endpoint instance. The reference * counter is incremented when the endpoint is linked to another endpoint and * decremented when the link disappears. It is also incremented when the * endpoint is found by the hash table lookup. This increment is atomic with the * lookup itself and happens while the hash table read lock is held. * * Close synchronization * --------------------- * * During close the endpoint as marked as closing using te_closing flag. It is * usually enough to check for te_closing flag since all other state changes * happen after this flag is set and the close entered serializer. Immediately * after setting te_closing flag tl_close() enters serializer and waits until * the callback finishes. This allows all functions called within serializer to * simply check te_closing without any locks. * * Serializer management. * --------------------- * * For COTS transports serializers are created when the endpoint is constructed * and destroyed when the endpoint is destructed. CLTS transports use global * serializers - one for sockets and one for TLI. * * COTS serializers have separate reference counts to deal with several * endpoints sharing the same serializer. There is a subtle problem related to * the serializer destruction. The serializer should never be destroyed by any * function executed inside serializer. This means that close has to wait till * all serializer activity for this endpoint is finished before it can drop the * last reference on the endpoint (which may as well free the serializer). This * is only relevant for COTS transports which manage serializers * dynamically. For CLTS transports close may complete without waiting for all * serializer activity to finish since serializer is only destroyed at driver * detach time. * * COTS endpoints keep track of the number of outstanding requests on the * serializer for the endpoint. The code handling accept() avoids changing * client serializer if it has any pending messages on the serializer and * instead moves acceptor to listener's serializer. * * * Use of hash tables * ------------------ * * The driver uses modhash hash table implementation. Each transport uses two * hash tables - one for finding endpoints by acceptor ID and another one for * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same * pair of hash tables since sockets only use TICOTSORD. * * All hash tables lookups increment a reference count for returned endpoints, * so we may safely check the endpoint state even when the endpoint is removed * from the hash by another thread immediately after it is found. * * * CLOSE processing * ================ * * The driver enters serializer twice on close(). The close sequence is the * following: * * 1) Wait until closing is safe (te_closewait becomes zero) * This step is needed to prevent close during serializer switches. In most * cases (close happening after connection establishment) te_closewait is * zero. * 1) Set te_closing. * 2) Call tl_close_ser() within serializer and wait for it to complete. * * te_close_ser simply marks endpoint and wakes up waiting tl_close(). * It also needs to clear write-side q_next pointers - this should be done * before qprocsoff(). * * This synchronous serializer entry during close is needed to ensure that * the queue is valid everywhere inside the serializer. * * Note that in many cases close will execute tl_close_ser() synchronously, * so it will not wait at all. * * 3) Calls qprocsoff(). * 4) Calls tl_close_finish_ser() within the serializer and waits for it to * complete (for COTS transports). For CLTS transport there is no wait. * * tl_close_finish_ser() Finishes the close process and wakes up waiting * close if there is any. * * Note that in most cases close will enter te_close_ser_finish() * synchronously and will not wait at all. * * * Flow Control * ============ * * The driver implements both read and write side service routines. No one calls * putq() on the read queue. The read side service routine tl_rsrv() is called * when the read side stream is back-enabled. It enters serializer synchronously * (waits till serializer processing is complete). Within serializer it * back-enables all endpoints blocked by the queue for connection-less * transports and enables write side service processing for the peer for * connection-oriented transports. * * Read and write side service routines use special mblk_sized space in the * endpoint structure to enter perimeter. * * Write-side flow control * ----------------------- * * Write side flow control is a bit tricky. The driver needs to deal with two * message queues - the explicit STREAMS message queue maintained by * putq()/getq()/putbq() and the implicit queue within the serializer. These two * queues should be synchronized to preserve message ordering and should * maintain a single order determined by the order in which messages enter * tl_wput(). In order to maintain the ordering between these two queues the * STREAMS queue is only manipulated within the serializer, so the ordering is * provided by the serializer. * * Functions called from the tl_wsrv() sometimes may call putbq(). To * immediately stop any further processing of the STREAMS message queues the * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write * side service processing stops when the flag is set. * * The tl_wsrv() function enters serializer synchronously and waits for it to * complete. The serializer call-back tl_wsrv_ser() either drains all messages * on the STREAMS queue or terminates when it notices the te_nowsrv flag * set. Note that the maximum amount of messages processed by tl_wput_ser() is * always bounded by the amount of messages on the STREAMS queue at the time * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS * queue from another serialized entry which can't happen in parallel. This * guarantees that tl_wput_ser() is complete in bounded time (there is no risk * of it draining forever while writer places new messages on the STREAMS * queue). * * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). * * * Unix Domain Sockets * =================== * * The driver knows the structure of Unix Domain sockets addresses and treats * them differently from generic TLI addresses. For sockets implicit binds are * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address * instead of using address length of zero. Explicit binds specify * SOU_MAGIC_EXPLICIT as magic. * * For implicit binds we always use minor number as soua_vp part of the address * and avoid any hash table lookups. This saves two hash tables lookups per * anonymous bind. * * For explicit address we hash the vnode pointer instead of hashing the * full-scale address+zone+length. Hashing by pointer is more efficient then * hashing by the full address. * * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the * tep structure, so it should be never freed. * * Also for sockets the driver always uses minor number as acceptor id. * * TPI VIOLATIONS * -------------- * * This driver violates TPI in several respects for Unix Domain Sockets: * * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind * is requested and the endpoint is already in use. There is no point in * generating an unused address since this address will be rejected by * sockfs anyway. For implicit binds it always generates a new address * (sets soua_vp to its minor number). * * 2) It always uses minor number as acceptor ID and never uses queue * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ * message and they do not use the queue pointer. * * 3) For Listener sockets the usual sequence is to issue bind() zero backlog * followed by listen(). The listen() should be issued with non-zero * backlog, so sotpi_listen() issues unbind request followed by bind * request to the same address but with a non-zero qlen value. Both * tl_bind() and tl_unbind() require write lock on the hash table to * insert/remove the address. The driver does not remove the address from * the hash for endpoints that are bound to the explicit address and have * backlog of zero. During T_BIND_REQ processing if the address requested * is equal to the address the endpoint already has it updates the backlog * without reinserting the address in the hash table. This optimization * avoids two hash table updates for each listener created. It always * avoids the problem of a "stolen" address when another listener may use * the same address between the unbind and bind and suddenly listen() fails * because address is in use even though the bind() succeeded. * * * CONNECTIONLESS TRANSPORTS * ========================= * * Connectionless transports all share the same serializer (one for TLI and one * for Sockets). Functions executing behind serializer can check or modify state * of any endpoint. * * When endpoint X talks to another endpoint Y it caches the pointer to Y in the * te_lastep field. The next time X talks to some address A it checks whether A * is the same as Y's address and if it is there is no need to lookup Y. If the * address is different or the state of Y is not appropriate (e.g. closed or not * idle) X does a lookup using tl_find_peer() and caches the new address. * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold * on the endpoint found. * * During close of endpoint Y it doesn't try to remove itself from other * endpoints caches. They will detect that Y is gone and will search the peer * endpoint again. * * Flow Control Handling. * ---------------------- * * Each connectionless endpoint keeps a list of endpoints which are * flow-controlled by its queue. It also keeps a pointer to the queue which * flow-controls itself. Whenever flow control releases for endpoint X it * enables all queues from the list. During close it also back-enables everyone * in the list. If X is flow-controlled when it is closing it removes it from * the peers list. * * DATA STRUCTURES * =============== * * Each endpoint is represented by the tl_endpt_t structure which keeps all the * endpoint state. For connection-oriented transports it has a keeps a list * of pending connections (tl_icon_t). For connectionless transports it keeps a * list of endpoints flow controlled by this one. * * Each transport type is represented by a per-transport data structure * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the * endpoint address hash tables for each transport. It also contains pointer to * transport serializer for connectionless transports. * * Each endpoint keeps a link to its transport structure, so the code can find * all per-transport information quickly. */ #include #include #include #include #define _SUN_TPI_VERSION 2 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* typedef int (*pfi_t)() for inet/optcom.h */ #include #include #include #include #include #include /* * TBD List * 14 Eliminate state changes through table * 16. AF_UNIX socket options * 17. connect() for ticlts * 18. support for "netstat" to show AF_UNIX plus TLI local * transport connections * 21. sanity check to flushing on sending M_ERROR */ /* * CONSTANT DECLARATIONS * -------------------- */ /* * Local declarations */ #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ /* * Hash tables size. */ #define TL_HASH_SIZE 311 /* * Definitions for module_info */ #define TL_ID (104) /* module ID number */ #define TL_NAME "tl" /* module name */ #define TL_MINPSZ (0) /* min packet size */ #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ #define TL_HIWAT (16*1024) /* hi water mark */ #define TL_LOWAT (256) /* lo water mark */ /* * Definition of minor numbers/modes for new transport provider modes. * We view the socket use as a separate mode to get a separate name space. */ #define TL_TICOTS 0 /* connection oriented transport */ #define TL_TICOTSORD 1 /* COTS w/ orderly release */ #define TL_TICLTS 2 /* connectionless transport */ #define TL_UNUSED 3 #define TL_SOCKET 4 /* Socket */ #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) #define TL_MINOR_MASK 0x7 #define TL_MINOR_START (TL_TICLTS + 1) /* * LOCAL MACROS */ #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) /* * EXTERNAL VARIABLE DECLARATIONS * ----------------------------- */ /* * state table defined in the OS space.c */ extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; /* * STREAMS DRIVER ENTRY POINTS PROTOTYPES */ static int tl_open(queue_t *, dev_t *, int, int, cred_t *); static int tl_close(queue_t *, int, cred_t *); static void tl_wput(queue_t *, mblk_t *); static void tl_wsrv(queue_t *); static void tl_rsrv(queue_t *); static int tl_attach(dev_info_t *, ddi_attach_cmd_t); static int tl_detach(dev_info_t *, ddi_detach_cmd_t); static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); /* * GLOBAL DATA STRUCTURES AND VARIABLES * ----------------------------------- */ /* * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ * For now, we only manage the SO_RECVUCRED option but we also have * harmless dummy options to make things work with some common code we access. */ opdes_t tl_opt_arr[] = { /* The SO_TYPE is needed for the hack below */ { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (t_scalar_t), 0 }, { SO_RECVUCRED, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 } }; /* * Table of all supported levels * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have * any supported options so we need this info separately. * * This is needed only for topmost tpi providers. */ optlevel_t tl_valid_levels_arr[] = { XTI_GENERIC, SOL_SOCKET, TL_PROT_LEVEL }; #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) /* * Current upper bound on the amount of space needed to return all options. * Additional options with data size of sizeof(long) are handled automatically. * Others need hand job. */ #define TL_MAX_OPT_BUF_LEN \ ((A_CNT(tl_opt_arr) << 2) + \ (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ + 64 + sizeof (struct T_optmgmt_ack)) #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) /* * transport addr structure */ typedef struct tl_addr { zoneid_t ta_zoneid; /* Zone scope of address */ t_scalar_t ta_alen; /* length of abuf */ void *ta_abuf; /* the addr itself */ } tl_addr_t; /* * Refcounted version of serializer. */ typedef struct tl_serializer { uint_t ts_refcnt; serializer_t *ts_serializer; } tl_serializer_t; /* * Each transport type has a separate state. * Per-transport state. */ typedef struct tl_transport_state { char *tr_name; minor_t tr_minor; uint32_t tr_defaddr; mod_hash_t *tr_ai_hash; mod_hash_t *tr_addr_hash; tl_serializer_t *tr_serializer; } tl_transport_state_t; #define TL_DFADDR 0x1000 static tl_transport_state_t tl_transports[] = { { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } }; #define TL_MAXTRANSPORT A_CNT(tl_transports) struct tl_endpt; typedef struct tl_endpt tl_endpt_t; typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); /* * Data structure used to represent pending connects. * Records enough information so that the connecting peer can close * before the connection gets accepted. */ typedef struct tl_icon { list_node_t ti_node; struct tl_endpt *ti_tep; /* NULL if peer has already closed */ mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ t_scalar_t ti_seqno; /* Sequence number */ } tl_icon_t; typedef struct so_ux_addr soux_addr_t; #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) /* * Maximum number of unaccepted connection indications allowed per listener. */ #define TL_MAXQLEN 4096 int tl_maxqlen = TL_MAXQLEN; /* * transport endpoint structure */ struct tl_endpt { queue_t *te_rq; /* stream read queue */ queue_t *te_wq; /* stream write queue */ uint32_t te_refcnt; int32_t te_state; /* TPI state of endpoint */ minor_t te_minor; /* minor number */ #define te_seqno te_minor uint_t te_flag; /* flag field */ boolean_t te_nowsrv; tl_serializer_t *te_ser; /* Serializer to use */ #define te_serializer te_ser->ts_serializer soux_addr_t te_uxaddr; /* Socket address */ #define te_magic te_uxaddr.soua_magic #define te_vp te_uxaddr.soua_vp tl_addr_t te_ap; /* addr bound to this endpt */ #define te_zoneid te_ap.ta_zoneid #define te_alen te_ap.ta_alen #define te_abuf te_ap.ta_abuf tl_transport_state_t *te_transport; #define te_addrhash te_transport->tr_addr_hash #define te_aihash te_transport->tr_ai_hash #define te_defaddr te_transport->tr_defaddr cred_t *te_credp; /* endpoint user credentials */ mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ /* * State specific for connection-oriented and connectionless transports. */ union { /* Connection-oriented state. */ struct { t_uscalar_t _te_nicon; /* count of conn requests */ t_uscalar_t _te_qlen; /* max conn requests */ tl_endpt_t *_te_oconp; /* conn request pending */ tl_endpt_t *_te_conp; /* connected endpt */ #ifndef _ILP32 void *_te_pad; #endif list_t _te_iconp; /* list of conn ind. pending */ } _te_cots_state; /* Connection-less state. */ struct { tl_endpt_t *_te_lastep; /* last dest. endpoint */ tl_endpt_t *_te_flowq; /* flow controlled on whom */ list_node_t _te_flows; /* lists of connections */ list_t _te_flowlist; /* Who flowcontrols on me */ } _te_clts_state; } _te_transport_state; #define te_nicon _te_transport_state._te_cots_state._te_nicon #define te_qlen _te_transport_state._te_cots_state._te_qlen #define te_oconp _te_transport_state._te_cots_state._te_oconp #define te_conp _te_transport_state._te_cots_state._te_conp #define te_iconp _te_transport_state._te_cots_state._te_iconp #define te_lastep _te_transport_state._te_clts_state._te_lastep #define te_flowq _te_transport_state._te_clts_state._te_flowq #define te_flowlist _te_transport_state._te_clts_state._te_flowlist #define te_flows _te_transport_state._te_clts_state._te_flows bufcall_id_t te_bufcid; /* outstanding bufcall id */ timeout_id_t te_timoutid; /* outstanding timeout id */ pid_t te_cpid; /* cached pid of endpoint */ t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ /* * Pieces of the endpoint state needed for closing. */ kmutex_t te_closelock; kcondvar_t te_closecv; uint8_t te_closing; /* The endpoint started closing */ uint8_t te_closewait; /* Wait in close until zero */ mblk_t te_closemp; /* for entering serializer on close */ mblk_t te_rsrvmp; /* for entering serializer on rsrv */ mblk_t te_wsrvmp; /* for entering serializer on wsrv */ kmutex_t te_srv_lock; kcondvar_t te_srv_cv; uint8_t te_rsrv_active; /* Running in tl_rsrv() */ uint8_t te_wsrv_active; /* Running in tl_wsrv() */ /* * Pieces of the endpoint state needed for serializer transitions. */ kmutex_t te_ser_lock; /* Protects the count below */ uint_t te_ser_count; /* Number of messages on serializer */ }; /* * Flag values. Lower 4 bits specify that transport used. * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, * they allow to identify the endpoint more easily. */ #define TL_LISTENER 0x00010 /* the listener endpoint */ #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ #define TL_EAGER 0x00040 /* connecting endpoint */ #define TL_ACCEPTED 0x00080 /* accepted connection */ #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ /* * Boolean checks for the endpoint type. */ #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) /* * Certain operations are always used together. These macros reduce the chance * of missing a part of a combination. */ #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } #define TL_PUTBQ(x, mp) { \ ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ (x)->te_nowsrv = B_TRUE; \ (void) putbq((x)->te_wq, mp); \ } #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } /* * STREAMS driver glue data structures. */ static struct module_info tl_minfo = { TL_ID, /* mi_idnum */ TL_NAME, /* mi_idname */ TL_MINPSZ, /* mi_minpsz */ TL_MAXPSZ, /* mi_maxpsz */ TL_HIWAT, /* mi_hiwat */ TL_LOWAT /* mi_lowat */ }; static struct qinit tl_rinit = { NULL, /* qi_putp */ (int (*)())tl_rsrv, /* qi_srvp */ tl_open, /* qi_qopen */ tl_close, /* qi_qclose */ NULL, /* qi_qadmin */ &tl_minfo, /* qi_minfo */ NULL /* qi_mstat */ }; static struct qinit tl_winit = { (int (*)())tl_wput, /* qi_putp */ (int (*)())tl_wsrv, /* qi_srvp */ NULL, /* qi_qopen */ NULL, /* qi_qclose */ NULL, /* qi_qadmin */ &tl_minfo, /* qi_minfo */ NULL /* qi_mstat */ }; static struct streamtab tlinfo = { &tl_rinit, /* st_rdinit */ &tl_winit, /* st_wrinit */ NULL, /* st_muxrinit */ NULL /* st_muxwrinit */ }; DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); static struct modldrv modldrv = { &mod_driverops, /* Type of module -- pseudo driver here */ "TPI Local Transport (tl)", &tl_devops, /* driver ops */ }; /* * Module linkage information for the kernel. */ static struct modlinkage modlinkage = { MODREV_1, &modldrv, NULL }; /* * Templates for response to info request * Check sanity of unlimited connect data etc. */ #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) static struct T_info_ack tl_cots_info_ack = { T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ T_INFINITE, /* TSDU size */ T_INFINITE, /* ETSDU size */ T_INFINITE, /* CDATA_size */ T_INFINITE, /* DDATA_size */ T_INFINITE, /* ADDR_size */ T_INFINITE, /* OPT_size */ 0, /* TIDU_size - fill at run time */ T_COTS, /* SERV_type */ -1, /* CURRENT_state */ TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ }; static struct T_info_ack tl_clts_info_ack = { T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 0, /* TSDU_size - fill at run time */ -2, /* ETSDU_size -2 => not supported */ -2, /* CDATA_size -2 => not supported */ -2, /* DDATA_size -2 => not supported */ -1, /* ADDR_size -1 => infinite */ -1, /* OPT_size */ 0, /* TIDU_size - fill at run time */ T_CLTS, /* SERV_type */ -1, /* CURRENT_state */ TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ }; /* * private copy of devinfo pointer used in tl_info */ static dev_info_t *tl_dip; /* * Endpoints cache. */ static kmem_cache_t *tl_cache; /* * Minor number space. */ static id_space_t *tl_minors; /* * Default Data Unit size. */ static t_scalar_t tl_tidusz; /* * Size of hash tables. */ static size_t tl_hash_size = TL_HASH_SIZE; /* * Debug and test variable ONLY. Turn off T_CONN_IND queueing * for sockets. */ static int tl_disable_early_connect = 0; static int tl_client_closing_when_accepting; static int tl_serializer_noswitch; /* * LOCAL FUNCTION PROTOTYPES * ------------------------- */ static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); static void tl_do_proto(mblk_t *, tl_endpt_t *); static void tl_do_ioctl(mblk_t *, tl_endpt_t *); static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, t_scalar_t); static void tl_bind(mblk_t *, tl_endpt_t *); static void tl_bind_ser(mblk_t *, tl_endpt_t *); static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); static void tl_unbind(mblk_t *, tl_endpt_t *); static void tl_optmgmt(queue_t *, mblk_t *); static void tl_conn_req(queue_t *, mblk_t *); static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); static void tl_conn_res(mblk_t *, tl_endpt_t *); static void tl_discon_req(mblk_t *, tl_endpt_t *); static void tl_capability_req(mblk_t *, tl_endpt_t *); static void tl_info_req_ser(mblk_t *, tl_endpt_t *); static void tl_addr_req_ser(mblk_t *, tl_endpt_t *); static void tl_info_req(mblk_t *, tl_endpt_t *); static void tl_addr_req(mblk_t *, tl_endpt_t *); static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); static void tl_data(mblk_t *, tl_endpt_t *); static void tl_exdata(mblk_t *, tl_endpt_t *); static void tl_ordrel(mblk_t *, tl_endpt_t *); static void tl_unitdata(mblk_t *, tl_endpt_t *); static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); static void tl_cl_backenable(tl_endpt_t *); static void tl_co_unconnect(tl_endpt_t *); static mblk_t *tl_resizemp(mblk_t *, ssize_t); static void tl_discon_ind(tl_endpt_t *, uint32_t); static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); static mblk_t *tl_ordrel_ind_alloc(void); static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); static void tl_icon_freemsgs(mblk_t **); static void tl_merror(queue_t *, mblk_t *, int); static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); static int tl_default_opt(queue_t *, int, int, uchar_t *); static int tl_get_opt(queue_t *, int, int, uchar_t *); static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *); static void tl_memrecover(queue_t *, mblk_t *, size_t); static void tl_freetip(tl_endpt_t *, tl_icon_t *); static void tl_free(tl_endpt_t *); static int tl_constructor(void *, void *, int); static void tl_destructor(void *, void *); static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); static tl_serializer_t *tl_serializer_alloc(int); static void tl_serializer_refhold(tl_serializer_t *); static void tl_serializer_refrele(tl_serializer_t *); static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); static void tl_serializer_exit(tl_endpt_t *); static boolean_t tl_noclose(tl_endpt_t *); static void tl_closeok(tl_endpt_t *); static void tl_refhold(tl_endpt_t *); static void tl_refrele(tl_endpt_t *); static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); static uint_t tl_hash_by_addr(void *, mod_hash_key_t); static void tl_close_ser(mblk_t *, tl_endpt_t *); static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); static void tl_proto_ser(mblk_t *, tl_endpt_t *); static void tl_putq_ser(mblk_t *, tl_endpt_t *); static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); static void tl_wput_ser(mblk_t *, tl_endpt_t *); static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); static void tl_addr_unbind(tl_endpt_t *); /* * Intialize option database object for TL */ optdb_obj_t tl_opt_obj = { tl_default_opt, /* TL default value function pointer */ tl_get_opt, /* TL get function pointer */ tl_set_opt, /* TL set function pointer */ TL_OPT_ARR_CNT, /* TL option database count of entries */ tl_opt_arr, /* TL option database */ TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ tl_valid_levels_arr /* TL valid level array */ }; /* * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS * --------------------------------------- */ /* * Loadable module routines */ int _init(void) { return (mod_install(&modlinkage)); } int _fini(void) { return (mod_remove(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } /* * Driver Entry Points and Other routines */ static int tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { int i; char name[32]; /* * Resume from a checkpoint state. */ if (cmd == DDI_RESUME) return (DDI_SUCCESS); if (cmd != DDI_ATTACH) return (DDI_FAILURE); /* * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that * streams message sizes can be unlimited. We use a defined constant * instead. */ tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; /* * Create subdevices for each transport. */ for (i = 0; i < TL_UNUSED; i++) { if (ddi_create_minor_node(devi, tl_transports[i].tr_name, S_IFCHR, tl_transports[i].tr_minor, DDI_PSEUDO, NULL) == DDI_FAILURE) { ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } } tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); if (tl_cache == NULL) { ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } tl_minors = id_space_create("tl_minor_space", TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); /* * Create ID space for minor numbers */ for (i = 0; i < TL_MAXTRANSPORT; i++) { tl_transport_state_t *t = &tl_transports[i]; if (i == TL_UNUSED) continue; /* Socket COTSORD shares namespace with COTS */ if (i == TL_SOCK_COTSORD) { t->tr_ai_hash = tl_transports[TL_SOCK_COTS].tr_ai_hash; ASSERT(t->tr_ai_hash != NULL); t->tr_addr_hash = tl_transports[TL_SOCK_COTS].tr_addr_hash; ASSERT(t->tr_addr_hash != NULL); continue; } /* * Create hash tables. */ (void) snprintf(name, sizeof (name), "%s_ai_hash", t->tr_name); #ifdef _ILP32 if (i & TL_SOCKET) t->tr_ai_hash = mod_hash_create_idhash(name, tl_hash_size - 1, mod_hash_null_valdtor); else t->tr_ai_hash = mod_hash_create_ptrhash(name, tl_hash_size, mod_hash_null_valdtor, sizeof (queue_t)); #else t->tr_ai_hash = mod_hash_create_idhash(name, tl_hash_size - 1, mod_hash_null_valdtor); #endif /* _ILP32 */ if (i & TL_SOCKET) { (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", t->tr_name); t->tr_addr_hash = mod_hash_create_ptrhash(name, tl_hash_size, mod_hash_null_valdtor, sizeof (uintptr_t)); } else { (void) snprintf(name, sizeof (name), "%s_addr_hash", t->tr_name); t->tr_addr_hash = mod_hash_create_extended(name, tl_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); } /* Create serializer for connectionless transports. */ if (i & TL_TICLTS) t->tr_serializer = tl_serializer_alloc(KM_SLEEP); } tl_dip = devi; return (DDI_SUCCESS); } static int tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) { int i; if (cmd == DDI_SUSPEND) return (DDI_SUCCESS); if (cmd != DDI_DETACH) return (DDI_FAILURE); /* * Destroy arenas and hash tables. */ for (i = 0; i < TL_MAXTRANSPORT; i++) { tl_transport_state_t *t = &tl_transports[i]; if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) continue; EQUIV(i & TL_TICLTS, t->tr_serializer != NULL); if (t->tr_serializer != NULL) { tl_serializer_refrele(t->tr_serializer); t->tr_serializer = NULL; } #ifdef _ILP32 if (i & TL_SOCKET) mod_hash_destroy_idhash(t->tr_ai_hash); else mod_hash_destroy_ptrhash(t->tr_ai_hash); #else mod_hash_destroy_idhash(t->tr_ai_hash); #endif /* _ILP32 */ t->tr_ai_hash = NULL; if (i & TL_SOCKET) mod_hash_destroy_ptrhash(t->tr_addr_hash); else mod_hash_destroy_hash(t->tr_addr_hash); t->tr_addr_hash = NULL; } kmem_cache_destroy(tl_cache); tl_cache = NULL; id_space_destroy(tl_minors); tl_minors = NULL; ddi_remove_minor_node(devi, NULL); return (DDI_SUCCESS); } /* ARGSUSED */ static int tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { int retcode = DDI_FAILURE; switch (infocmd) { case DDI_INFO_DEVT2DEVINFO: if (tl_dip != NULL) { *result = (void *)tl_dip; retcode = DDI_SUCCESS; } break; case DDI_INFO_DEVT2INSTANCE: *result = (void *)0; retcode = DDI_SUCCESS; break; default: break; } return (retcode); } /* * Endpoint reference management. */ static void tl_refhold(tl_endpt_t *tep) { atomic_inc_32(&tep->te_refcnt); } static void tl_refrele(tl_endpt_t *tep) { ASSERT(tep->te_refcnt != 0); if (atomic_dec_32_nv(&tep->te_refcnt) == 0) tl_free(tep); } /*ARGSUSED*/ static int tl_constructor(void *buf, void *cdrarg, int kmflags) { tl_endpt_t *tep = buf; bzero(tep, sizeof (tl_endpt_t)); mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } /*ARGSUSED*/ static void tl_destructor(void *buf, void *cdrarg) { tl_endpt_t *tep = buf; mutex_destroy(&tep->te_closelock); cv_destroy(&tep->te_closecv); mutex_destroy(&tep->te_srv_lock); cv_destroy(&tep->te_srv_cv); mutex_destroy(&tep->te_ser_lock); } static void tl_free(tl_endpt_t *tep) { ASSERT(tep->te_refcnt == 0); ASSERT(tep->te_transport != NULL); ASSERT(tep->te_rq == NULL); ASSERT(tep->te_wq == NULL); ASSERT(tep->te_ser != NULL); ASSERT(tep->te_ser_count == 0); ASSERT(! (tep->te_flag & TL_ADDRHASHED)); if (IS_SOCKET(tep)) { ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); ASSERT(tep->te_abuf == &tep->te_uxaddr); ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); } else if (tep->te_abuf != NULL) { kmem_free(tep->te_abuf, tep->te_alen); tep->te_alen = -1; /* uninitialized */ tep->te_abuf = NULL; } else { ASSERT(tep->te_alen == -1); } id_free(tl_minors, tep->te_minor); ASSERT(tep->te_credp == NULL); if (tep->te_hash_hndl != NULL) mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); if (IS_COTS(tep)) { TL_REMOVE_PEER(tep->te_conp); TL_REMOVE_PEER(tep->te_oconp); tl_serializer_refrele(tep->te_ser); tep->te_ser = NULL; ASSERT(tep->te_nicon == 0); ASSERT(list_head(&tep->te_iconp) == NULL); } else { ASSERT(tep->te_lastep == NULL); ASSERT(list_head(&tep->te_flowlist) == NULL); ASSERT(tep->te_flowq == NULL); } ASSERT(tep->te_bufcid == 0); ASSERT(tep->te_timoutid == 0); bzero(&tep->te_ap, sizeof (tep->te_ap)); tep->te_acceptor_id = 0; ASSERT(tep->te_closewait == 0); ASSERT(!tep->te_rsrv_active); ASSERT(!tep->te_wsrv_active); tep->te_closing = 0; tep->te_nowsrv = B_FALSE; tep->te_flag = 0; kmem_cache_free(tl_cache, tep); } /* * Allocate/free reference-counted wrappers for serializers. */ static tl_serializer_t * tl_serializer_alloc(int flags) { tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); serializer_t *ser; if (s == NULL) return (NULL); ser = serializer_create(flags); if (ser == NULL) { kmem_free(s, sizeof (tl_serializer_t)); return (NULL); } s->ts_refcnt = 1; s->ts_serializer = ser; return (s); } static void tl_serializer_refhold(tl_serializer_t *s) { atomic_inc_32(&s->ts_refcnt); } static void tl_serializer_refrele(tl_serializer_t *s) { if (atomic_dec_32_nv(&s->ts_refcnt) == 0) { serializer_destroy(s->ts_serializer); kmem_free(s, sizeof (tl_serializer_t)); } } /* * Post a request on the endpoint serializer. For COTS transports keep track of * the number of pending requests. */ static void tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) { if (IS_COTS(tep)) { mutex_enter(&tep->te_ser_lock); tep->te_ser_count++; mutex_exit(&tep->te_ser_lock); } serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); } /* * Complete processing the request on the serializer. Decrement the counter for * pending requests for COTS transports. */ static void tl_serializer_exit(tl_endpt_t *tep) { if (IS_COTS(tep)) { mutex_enter(&tep->te_ser_lock); ASSERT(tep->te_ser_count != 0); tep->te_ser_count--; mutex_exit(&tep->te_ser_lock); } } /* * Hash management functions. */ /* * Return TRUE if two addresses are equal, false otherwise. */ static boolean_t tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) { return ((ap1->ta_alen > 0) && (ap1->ta_alen == ap2->ta_alen) && (ap1->ta_zoneid == ap2->ta_zoneid) && (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); } /* * This function is called whenever an endpoint is found in the hash table. */ /* ARGSUSED0 */ static void tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) { tl_refhold((tl_endpt_t *)val); } /* * Address hash function. */ /* ARGSUSED */ static uint_t tl_hash_by_addr(void *hash_data, mod_hash_key_t key) { tl_addr_t *ap = (tl_addr_t *)key; size_t len = ap->ta_alen; uchar_t *p = ap->ta_abuf; uint_t i, g; ASSERT((len > 0) && (p != NULL)); for (i = ap->ta_zoneid; len -- != 0; p++) { i = (i << 4) + (*p); if ((g = (i & 0xf0000000U)) != 0) { i ^= (g >> 24); i ^= g; } } return (i); } /* * This function is used by hash lookups. It compares two generic addresses. */ static int tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) { #ifdef DEBUG tl_addr_t *ap1 = (tl_addr_t *)key1; tl_addr_t *ap2 = (tl_addr_t *)key2; ASSERT(key1 != NULL); ASSERT(key2 != NULL); ASSERT(ap1->ta_abuf != NULL); ASSERT(ap2->ta_abuf != NULL); ASSERT(ap1->ta_alen > 0); ASSERT(ap2->ta_alen > 0); #endif return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); } /* * Prevent endpoint from closing if possible. * Return B_TRUE on success, B_FALSE on failure. */ static boolean_t tl_noclose(tl_endpt_t *tep) { boolean_t rc = B_FALSE; mutex_enter(&tep->te_closelock); if (! tep->te_closing) { ASSERT(tep->te_closewait == 0); tep->te_closewait++; rc = B_TRUE; } mutex_exit(&tep->te_closelock); return (rc); } /* * Allow endpoint to close if needed. */ static void tl_closeok(tl_endpt_t *tep) { ASSERT(tep->te_closewait > 0); mutex_enter(&tep->te_closelock); ASSERT(tep->te_closewait == 1); tep->te_closewait--; cv_signal(&tep->te_closecv); mutex_exit(&tep->te_closelock); } /* * STREAMS open entry point. */ /* ARGSUSED */ static int tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) { tl_endpt_t *tep; minor_t minor = getminor(*devp); /* * Driver is called directly. Both CLONEOPEN and MODOPEN * are illegal */ if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) return (ENXIO); if (rq->q_ptr != NULL) return (0); /* Minor number should specify the mode used for the driver. */ if ((minor >= TL_UNUSED)) return (ENXIO); if (oflag & SO_SOCKSTR) { minor |= TL_SOCKET; } tep = kmem_cache_alloc(tl_cache, KM_SLEEP); tep->te_refcnt = 1; tep->te_cpid = curproc->p_pid; rq->q_ptr = WR(rq)->q_ptr = tep; tep->te_state = TS_UNBND; tep->te_credp = credp; crhold(credp); tep->te_zoneid = getzoneid(); tep->te_flag = minor & TL_MINOR_MASK; tep->te_transport = &tl_transports[minor]; /* Allocate a unique minor number for this instance. */ tep->te_minor = (minor_t)id_alloc(tl_minors); /* Reserve hash handle for bind(). */ (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); /* Transport-specific initialization */ if (IS_COTS(tep)) { /* Use private serializer */ tep->te_ser = tl_serializer_alloc(KM_SLEEP); /* Create list for pending connections */ list_create(&tep->te_iconp, sizeof (tl_icon_t), offsetof(tl_icon_t, ti_node)); tep->te_qlen = 0; tep->te_nicon = 0; tep->te_oconp = NULL; tep->te_conp = NULL; } else { /* Use shared serializer */ tep->te_ser = tep->te_transport->tr_serializer; bzero(&tep->te_flows, sizeof (list_node_t)); /* Create list for flow control */ list_create(&tep->te_flowlist, sizeof (tl_endpt_t), offsetof(tl_endpt_t, te_flows)); tep->te_flowq = NULL; tep->te_lastep = NULL; } /* Initialize endpoint address */ if (IS_SOCKET(tep)) { /* Socket-specific address handling. */ tep->te_alen = TL_SOUX_ADDRLEN; tep->te_abuf = &tep->te_uxaddr; tep->te_vp = (void *)(uintptr_t)tep->te_minor; tep->te_magic = SOU_MAGIC_IMPLICIT; } else { tep->te_alen = -1; tep->te_abuf = NULL; } /* clone the driver */ *devp = makedevice(getmajor(*devp), tep->te_minor); tep->te_rq = rq; tep->te_wq = WR(rq); #ifdef _ILP32 if (IS_SOCKET(tep)) tep->te_acceptor_id = tep->te_minor; else tep->te_acceptor_id = (t_uscalar_t)rq; #else tep->te_acceptor_id = tep->te_minor; #endif /* _ILP32 */ qprocson(rq); /* * Insert acceptor ID in the hash. The AI hash always sleeps on * insertion so insertion can't fail. */ (void) mod_hash_insert(tep->te_transport->tr_ai_hash, (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, (mod_hash_val_t)tep); return (0); } /* ARGSUSED1 */ static int tl_close(queue_t *rq, int flag, cred_t *credp) { tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; tl_endpt_t *elp = NULL; queue_t *wq = tep->te_wq; int rc; ASSERT(wq == WR(rq)); /* * Remove the endpoint from acceptor hash. */ rc = mod_hash_remove(tep->te_transport->tr_ai_hash, (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, (mod_hash_val_t *)&elp); ASSERT(rc == 0 && tep == elp); if ((rc != 0) || (tep != elp)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_close:inconsistency in AI hash")); } /* * Wait till close is safe, then mark endpoint as closing. */ mutex_enter(&tep->te_closelock); while (tep->te_closewait) cv_wait(&tep->te_closecv, &tep->te_closelock); tep->te_closing = B_TRUE; /* * Will wait for the serializer part of the close to finish, so set * te_closewait now. */ tep->te_closewait = 1; tep->te_nowsrv = B_FALSE; mutex_exit(&tep->te_closelock); /* * tl_close_ser doesn't drop reference, so no need to tl_refhold. * It is safe because close will wait for tl_close_ser to finish. */ tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); /* * Wait for the first phase of close to complete before qprocsoff(). */ mutex_enter(&tep->te_closelock); while (tep->te_closewait) cv_wait(&tep->te_closecv, &tep->te_closelock); mutex_exit(&tep->te_closelock); qprocsoff(rq); if (tep->te_bufcid) { qunbufcall(rq, tep->te_bufcid); tep->te_bufcid = 0; } if (tep->te_timoutid) { (void) quntimeout(rq, tep->te_timoutid); tep->te_timoutid = 0; } /* * Finish close behind serializer. * * For a CLTS endpoint increase a refcount and continue close processing * with serializer protection. This processing may happen asynchronously * with the completion of tl_close(). * * Fot a COTS endpoint wait before destroying tep since the serializer * may go away together with tep and we need to destroy serializer * outside of serializer context. */ ASSERT(tep->te_closewait == 0); if (IS_COTS(tep)) tep->te_closewait = 1; else tl_refhold(tep); tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); /* * For connection-oriented transports wait for all serializer activity * to settle down. */ if (IS_COTS(tep)) { mutex_enter(&tep->te_closelock); while (tep->te_closewait) cv_wait(&tep->te_closecv, &tep->te_closelock); mutex_exit(&tep->te_closelock); } crfree(tep->te_credp); tep->te_credp = NULL; tep->te_wq = NULL; tl_refrele(tep); /* * tep is likely to be destroyed now, so can't reference it any more. */ rq->q_ptr = wq->q_ptr = NULL; return (0); } /* * First phase of close processing done behind the serializer. * * Do not drop the reference in the end - tl_close() wants this reference to * stay. */ /* ARGSUSED0 */ static void tl_close_ser(mblk_t *mp, tl_endpt_t *tep) { ASSERT(tep->te_closing); ASSERT(tep->te_closewait == 1); ASSERT(!(tep->te_flag & TL_CLOSE_SER)); tep->te_flag |= TL_CLOSE_SER; /* * Drain out all messages on queue except for TL_TICOTS where the * abortive release semantics permit discarding of data on close */ if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { tl_wsrv_ser(NULL, tep); } /* Remove address from hash table. */ tl_addr_unbind(tep); /* * qprocsoff() gets confused when q->q_next is not NULL on the write * queue of the driver, so clear these before qprocsoff() is called. * Also clear q_next for the peer since this queue is going away. */ if (IS_COTS(tep) && !IS_SOCKET(tep)) { tl_endpt_t *peer_tep = tep->te_conp; tep->te_wq->q_next = NULL; if ((peer_tep != NULL) && !peer_tep->te_closing) peer_tep->te_wq->q_next = NULL; } tep->te_rq = NULL; /* wake up tl_close() */ tl_closeok(tep); tl_serializer_exit(tep); } /* * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop * the reference for CLTS. * * Called from serializer. Should drop reference count for CLTS only. */ /* ARGSUSED0 */ static void tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) { ASSERT(tep->te_closing); IMPLY(IS_CLTS(tep), tep->te_closewait == 0); IMPLY(IS_COTS(tep), tep->te_closewait == 1); tep->te_state = -1; /* Uninitialized */ if (IS_COTS(tep)) { tl_co_unconnect(tep); } else { /* Connectionless specific cleanup */ TL_REMOVE_PEER(tep->te_lastep); /* * Backenable anybody that is flow controlled waiting for * this endpoint. */ tl_cl_backenable(tep); if (tep->te_flowq != NULL) { list_remove(&(tep->te_flowq->te_flowlist), tep); tep->te_flowq = NULL; } } tl_serializer_exit(tep); if (IS_COTS(tep)) tl_closeok(tep); else tl_refrele(tep); } /* * STREAMS write-side put procedure. * Enter serializer for most of the processing. * * The T_CONN_REQ is processed outside of serializer. */ static void tl_wput(queue_t *wq, mblk_t *mp) { tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; ssize_t msz = MBLKL(mp); union T_primitives *prim = (union T_primitives *)mp->b_rptr; tlproc_t *tl_proc = NULL; switch (DB_TYPE(mp)) { case M_DATA: /* Only valid for connection-oriented transports */ if (IS_CLTS(tep)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:M_DATA invalid for ticlts driver")); tl_merror(wq, mp, EPROTO); return; } tl_proc = tl_wput_data_ser; break; case M_IOCTL: switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { case TL_IOC_CREDOPT: /* FALLTHROUGH */ case TL_IOC_UCREDOPT: /* * Serialize endpoint state change. */ tl_proc = tl_do_ioctl_ser; break; default: miocnak(wq, mp, 0, EINVAL); return; } break; case M_FLUSH: /* * do canonical M_FLUSH processing */ if (*mp->b_rptr & FLUSHW) { flushq(wq, FLUSHALL); *mp->b_rptr &= ~FLUSHW; } if (*mp->b_rptr & FLUSHR) { flushq(RD(wq), FLUSHALL); qreply(wq, mp); } else { freemsg(mp); } return; case M_PROTO: if (msz < sizeof (prim->type)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:M_PROTO data too short")); tl_merror(wq, mp, EPROTO); return; } switch (prim->type) { case T_OPTMGMT_REQ: case T_SVR4_OPTMGMT_REQ: /* * Process TPI option management requests immediately * in put procedure regardless of in-order processing * of already queued messages. * (Note: This driver supports AF_UNIX socket * implementation. Unless we implement this processing, * setsockopt() on socket endpoint will block on flow * controlled endpoints which it should not. That is * required for successful execution of VSU socket tests * and is consistent with BSD socket behavior). */ tl_optmgmt(wq, mp); return; case O_T_BIND_REQ: case T_BIND_REQ: tl_proc = tl_bind_ser; break; case T_CONN_REQ: if (IS_CLTS(tep)) { tl_merror(wq, mp, EPROTO); return; } tl_conn_req(wq, mp); return; case T_DATA_REQ: case T_OPTDATA_REQ: case T_EXDATA_REQ: case T_ORDREL_REQ: tl_proc = tl_putq_ser; break; case T_UNITDATA_REQ: if (IS_COTS(tep) || (msz < sizeof (struct T_unitdata_req))) { tl_merror(wq, mp, EPROTO); return; } if ((tep->te_state == TS_IDLE) && !wq->q_first) { tl_proc = tl_unitdata_ser; } else { tl_proc = tl_putq_ser; } break; default: /* * process in service procedure if message already * queued (maintain in-order processing) */ if (wq->q_first != NULL) { tl_proc = tl_putq_ser; } else { tl_proc = tl_wput_ser; } break; } break; case M_PCPROTO: /* * Check that the message has enough data to figure out TPI * primitive. */ if (msz < sizeof (prim->type)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:M_PCROTO data too short")); tl_merror(wq, mp, EPROTO); return; } switch (prim->type) { case T_CAPABILITY_REQ: tl_capability_req(mp, tep); return; case T_INFO_REQ: tl_proc = tl_info_req_ser; break; case T_ADDR_REQ: tl_proc = tl_addr_req_ser; break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:unknown TPI msg primitive")); tl_merror(wq, mp, EPROTO); return; } break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:default:unexpected Streams message")); freemsg(mp); return; } /* * Continue processing via serializer. */ ASSERT(tl_proc != NULL); tl_refhold(tep); tl_serializer_enter(tep, tl_proc, mp); } /* * Place message on the queue while preserving order. */ static void tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) { if (tep->te_closing) { tl_wput_ser(mp, tep); } else { TL_PUTQ(tep, mp); tl_serializer_exit(tep); tl_refrele(tep); } } static void tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) { ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); switch (DB_TYPE(mp)) { case M_DATA: tl_data(mp, tep); break; case M_PROTO: tl_do_proto(mp, tep); break; default: freemsg(mp); break; } } /* * Write side put procedure called from serializer. */ static void tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) { tl_wput_common_ser(mp, tep); tl_serializer_exit(tep); tl_refrele(tep); } /* * M_DATA processing. Called from serializer. */ static void tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) { tl_endpt_t *peer_tep = tep->te_conp; queue_t *peer_rq; ASSERT(DB_TYPE(mp) == M_DATA); ASSERT(IS_COTS(tep)); IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer); /* * fastpath for data. Ignore flow control if tep is closing. */ if ((peer_tep != NULL) && !peer_tep->te_closing && ((tep->te_state == TS_DATA_XFER) || (tep->te_state == TS_WREQ_ORDREL)) && (tep->te_wq != NULL) && (tep->te_wq->q_first == NULL) && ((peer_tep->te_state == TS_DATA_XFER) || (peer_tep->te_state == TS_WREQ_ORDREL)) && ((peer_rq = peer_tep->te_rq) != NULL) && (canputnext(peer_rq) || tep->te_closing)) { putnext(peer_rq, mp); } else if (tep->te_closing) { /* * It is possible that by the time we got here tep started to * close. If the write queue is not empty, and the state is * TS_DATA_XFER the data should be delivered in order, so we * call putq() instead of freeing the data. */ if ((tep->te_wq != NULL) && ((tep->te_state == TS_DATA_XFER) || (tep->te_state == TS_WREQ_ORDREL))) { TL_PUTQ(tep, mp); } else { freemsg(mp); } } else { TL_PUTQ(tep, mp); } tl_serializer_exit(tep); tl_refrele(tep); } /* * Write side service routine. * * All actual processing happens within serializer which is entered * synchronously. It is possible that by the time tl_wsrv() wakes up, some new * messages that need processing may have arrived, so tl_wsrv repeats until * queue is empty or te_nowsrv is set. */ static void tl_wsrv(queue_t *wq) { tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; while ((wq->q_first != NULL) && !tep->te_nowsrv) { mutex_enter(&tep->te_srv_lock); ASSERT(tep->te_wsrv_active == B_FALSE); tep->te_wsrv_active = B_TRUE; mutex_exit(&tep->te_srv_lock); tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); /* * Wait for serializer job to complete. */ mutex_enter(&tep->te_srv_lock); while (tep->te_wsrv_active) { cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); } cv_signal(&tep->te_srv_cv); mutex_exit(&tep->te_srv_lock); } } /* * Serialized write side processing of the STREAMS queue. * May be called either from tl_wsrv() or from tl_close() in which case ser_mp * is NULL. */ static void tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) { mblk_t *mp; queue_t *wq = tep->te_wq; ASSERT(wq != NULL); while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { tl_wput_common_ser(mp, tep); } /* * Wakeup service routine unless called from close. * If ser_mp is specified, the caller is tl_wsrv(). * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't * call tl_serializer_enter() before calling tl_wsrv_ser(), there should * be no matching tl_serializer_exit() in this case. * Also, there is no need to wakeup anyone since tl_close_ser() is not * waiting on te_srv_cv. */ if (ser_mp != NULL) { /* * We are called from tl_wsrv. */ mutex_enter(&tep->te_srv_lock); ASSERT(tep->te_wsrv_active); tep->te_wsrv_active = B_FALSE; cv_signal(&tep->te_srv_cv); mutex_exit(&tep->te_srv_lock); tl_serializer_exit(tep); } } /* * Called when the stream is backenabled. Enter serializer and qenable everyone * flow controlled by tep. * * NOTE: The service routine should enter serializer synchronously. Otherwise it * is possible that two instances of tl_rsrv will be running reusing the same * rsrv mblk. */ static void tl_rsrv(queue_t *rq) { tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; ASSERT(rq->q_first == NULL); ASSERT(tep->te_rsrv_active == 0); tep->te_rsrv_active = B_TRUE; tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); /* * Wait for serializer job to complete. */ mutex_enter(&tep->te_srv_lock); while (tep->te_rsrv_active) { cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); } cv_signal(&tep->te_srv_cv); mutex_exit(&tep->te_srv_lock); } /* ARGSUSED */ static void tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) { tl_endpt_t *peer_tep; if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { tl_cl_backenable(tep); } else if ( IS_COTS(tep) && ((peer_tep = tep->te_conp) != NULL) && !peer_tep->te_closing && ((tep->te_state == TS_DATA_XFER) || (tep->te_state == TS_WIND_ORDREL)|| (tep->te_state == TS_WREQ_ORDREL))) { TL_QENABLE(peer_tep); } /* * Wakeup read side service routine. */ mutex_enter(&tep->te_srv_lock); ASSERT(tep->te_rsrv_active); tep->te_rsrv_active = B_FALSE; cv_signal(&tep->te_srv_cv); mutex_exit(&tep->te_srv_lock); tl_serializer_exit(tep); } /* * process M_PROTO messages. Always called from serializer. */ static void tl_do_proto(mblk_t *mp, tl_endpt_t *tep) { ssize_t msz = MBLKL(mp); union T_primitives *prim = (union T_primitives *)mp->b_rptr; /* Message size was validated by tl_wput(). */ ASSERT(msz >= sizeof (prim->type)); switch (prim->type) { case T_UNBIND_REQ: tl_unbind(mp, tep); break; case T_ADDR_REQ: tl_addr_req(mp, tep); break; case O_T_CONN_RES: case T_CONN_RES: if (IS_CLTS(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_conn_res(mp, tep); break; case T_DISCON_REQ: if (IS_CLTS(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_discon_req(mp, tep); break; case T_DATA_REQ: if (IS_CLTS(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_data(mp, tep); break; case T_OPTDATA_REQ: if (IS_CLTS(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_data(mp, tep); break; case T_EXDATA_REQ: if (IS_CLTS(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_exdata(mp, tep); break; case T_ORDREL_REQ: if (! IS_COTSORD(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_ordrel(mp, tep); break; case T_UNITDATA_REQ: if (IS_COTS(tep)) { tl_merror(tep->te_wq, mp, EPROTO); break; } tl_unitdata(mp, tep); break; default: tl_merror(tep->te_wq, mp, EPROTO); break; } } /* * Process ioctl from serializer. * This is a wrapper around tl_do_ioctl(). */ static void tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) { if (! tep->te_closing) tl_do_ioctl(mp, tep); else freemsg(mp); tl_serializer_exit(tep); tl_refrele(tep); } static void tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) { struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; int cmd = iocbp->ioc_cmd; queue_t *wq = tep->te_wq; int error; int thisopt, otheropt; ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); switch (cmd) { case TL_IOC_CREDOPT: if (cmd == TL_IOC_CREDOPT) { thisopt = TL_SETCRED; otheropt = TL_SETUCRED; } else { /* FALLTHROUGH */ case TL_IOC_UCREDOPT: thisopt = TL_SETUCRED; otheropt = TL_SETCRED; } /* * The credentials passing does not apply to sockets. * Only one of the cred options can be set at a given time. */ if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { miocnak(wq, mp, 0, EINVAL); return; } /* * Turn on generation of credential options for * T_conn_req, T_conn_con, T_unidata_ind. */ error = miocpullup(mp, sizeof (uint32_t)); if (error != 0) { miocnak(wq, mp, 0, error); return; } if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { miocnak(wq, mp, 0, EINVAL); return; } if (*(uint32_t *)mp->b_cont->b_rptr) tep->te_flag |= thisopt; else tep->te_flag &= ~thisopt; miocack(wq, mp, 0, 0); break; default: /* Should not be here */ miocnak(wq, mp, 0, EINVAL); break; } } /* * send T_ERROR_ACK * Note: assumes enough memory or caller passed big enough mp * - no recovery from allocb failures */ static void tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, t_scalar_t unix_err, t_scalar_t type) { struct T_error_ack *err_ack; mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), M_PCPROTO, T_ERROR_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, "tl_error_ack:out of mblk memory")); tl_merror(wq, NULL, ENOSR); return; } err_ack = (struct T_error_ack *)ackmp->b_rptr; err_ack->ERROR_prim = type; err_ack->TLI_error = tli_err; err_ack->UNIX_error = unix_err; /* * send error ack message */ qreply(wq, ackmp); } /* * send T_OK_ACK * Note: assumes enough memory or caller passed big enough mp * - no recovery from allocb failures */ static void tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) { struct T_ok_ack *ok_ack; mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), M_PCPROTO, T_OK_ACK); if (ackmp == NULL) { tl_merror(wq, NULL, ENOMEM); return; } ok_ack = (struct T_ok_ack *)ackmp->b_rptr; ok_ack->CORRECT_prim = type; (void) qreply(wq, ackmp); } /* * Process T_BIND_REQ and O_T_BIND_REQ from serializer. * This is a wrapper around tl_bind(). */ static void tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) { if (! tep->te_closing) tl_bind(mp, tep); else freemsg(mp); tl_serializer_exit(tep); tl_refrele(tep); } /* * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. * Assumes that the endpoint is in the unbound. */ static void tl_bind(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq = tep->te_wq; struct T_bind_ack *b_ack; struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; mblk_t *ackmp, *bamp; soux_addr_t ux_addr; t_uscalar_t qlen = 0; t_scalar_t alen, aoff; tl_addr_t addr_req; void *addr_startp; ssize_t msz = MBLKL(mp), basize; t_scalar_t tli_err = 0, unix_err = 0; t_scalar_t save_prim_type = bind->PRIM_type; t_scalar_t save_state = tep->te_state; if (tep->te_state != TS_UNBND) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:bind_request:out of state, state=%d", tep->te_state)); tli_err = TOUTSTATE; goto error; } if (msz < sizeof (struct T_bind_req)) { tli_err = TSYSERR; unix_err = EINVAL; goto error; } tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); ASSERT((bind->PRIM_type == O_T_BIND_REQ) || (bind->PRIM_type == T_BIND_REQ)); alen = bind->ADDR_length; aoff = bind->ADDR_offset; /* negotiate max conn req pending */ if (IS_COTS(tep)) { qlen = bind->CONIND_number; if (qlen > tl_maxqlen) qlen = tl_maxqlen; } /* * Reserve hash handle. It can only be NULL if the endpoint is unbound * and bound again. */ if ((tep->te_hash_hndl == NULL) && ((tep->te_flag & TL_ADDRHASHED) == 0) && mod_hash_reserve_nosleep(tep->te_addrhash, &tep->te_hash_hndl) != 0) { tli_err = TSYSERR; unix_err = ENOSR; goto error; } /* * Verify address correctness. */ if (IS_SOCKET(tep)) { ASSERT(bind->PRIM_type == O_T_BIND_REQ); if ((alen != TL_SOUX_ADDRLEN) || (aoff < 0) || (aoff + alen > msz)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind: invalid socket addr")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } /* Copy address from message to local buffer. */ bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); /* * Check that we got correct address from sockets */ if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind: invalid socket magic")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && (ux_addr.soua_vp != NULL)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind: implicit addr non-empty")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && (ux_addr.soua_vp == NULL)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind: explicit addr empty")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } } else { if ((alen > 0) && ((aoff < 0) || ((ssize_t)(aoff + alen) > msz) || ((aoff + alen) < 0))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind: invalid message")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TSYSERR; unix_err = EINVAL; goto error; } if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind: bad addr in message")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tli_err = TBADADDR; goto error; } #ifdef DEBUG /* * Mild form of ASSERT()ion to detect broken TPI apps. * if (! assertion) * log warning; */ if (! ((alen == 0 && aoff == 0) || (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_bind: addr overlaps TPI message")); } #endif } /* * Bind the address provided or allocate one if requested. * Allow rebinds with a new qlen value. */ if (IS_SOCKET(tep)) { /* * For anonymous requests the te_ap is already set up properly * so use minor number as an address. * For explicit requests need to check whether the address is * already in use. */ if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { int rc; if (tep->te_flag & TL_ADDRHASHED) { ASSERT(IS_COTS(tep) && tep->te_qlen == 0); if (tep->te_vp == ux_addr.soua_vp) goto skip_addr_bind; else /* Rebind to a new address. */ tl_addr_unbind(tep); } /* * Insert address in the hash if it is not already * there. Since we use preallocated handle, the insert * can fail only if the key is already present. */ rc = mod_hash_insert_reserve(tep->te_addrhash, (mod_hash_key_t)ux_addr.soua_vp, (mod_hash_val_t)tep, tep->te_hash_hndl); if (rc != 0) { ASSERT(rc == MH_ERR_DUPLICATE); /* * Violate O_T_BIND_REQ semantics and fail with * TADDRBUSY - sockets will not use any address * other than supplied one for explicit binds. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind:requested addr %p is busy", ux_addr.soua_vp)); tli_err = TADDRBUSY; unix_err = 0; goto error; } tep->te_uxaddr = ux_addr; tep->te_flag |= TL_ADDRHASHED; tep->te_hash_hndl = NULL; } } else if (alen == 0) { /* * assign any free address */ if (! tl_get_any_addr(tep, NULL)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind:failed to get buffer for any " "address")); tli_err = TSYSERR; unix_err = ENOSR; goto error; } } else { addr_req.ta_alen = alen; addr_req.ta_abuf = (mp->b_rptr + aoff); addr_req.ta_zoneid = tep->te_zoneid; tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); if (tep->te_abuf == NULL) { tli_err = TSYSERR; unix_err = ENOSR; goto error; } bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); tep->te_alen = alen; if (mod_hash_insert_reserve(tep->te_addrhash, (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, tep->te_hash_hndl) != 0) { if (save_prim_type == T_BIND_REQ) { /* * The bind semantics for this primitive * require a failure if the exact address * requested is busy */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind:requested addr is busy")); tli_err = TADDRBUSY; unix_err = 0; goto error; } /* * O_T_BIND_REQ semantics say if address if requested * address is busy, bind to any available free address */ if (! tl_get_any_addr(tep, &addr_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_bind:unable to get any addr buf")); tli_err = TSYSERR; unix_err = ENOMEM; goto error; } } else { tep->te_flag |= TL_ADDRHASHED; tep->te_hash_hndl = NULL; } } ASSERT(tep->te_alen >= 0); skip_addr_bind: /* * prepare T_BIND_ACK TPI message */ basize = sizeof (struct T_bind_ack) + tep->te_alen; bamp = reallocb(mp, basize, 0); if (bamp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:tl_bind: allocb failed")); /* * roll back state changes */ tl_addr_unbind(tep); tep->te_state = TS_UNBND; tl_memrecover(wq, mp, basize); return; } DB_TYPE(bamp) = M_PCPROTO; bamp->b_wptr = bamp->b_rptr + basize; b_ack = (struct T_bind_ack *)bamp->b_rptr; b_ack->PRIM_type = T_BIND_ACK; b_ack->CONIND_number = qlen; b_ack->ADDR_length = tep->te_alen; b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); addr_startp = bamp->b_rptr + b_ack->ADDR_offset; bcopy(tep->te_abuf, addr_startp, tep->te_alen); if (IS_COTS(tep)) { tep->te_qlen = qlen; if (qlen > 0) tep->te_flag |= TL_LISTENER; } tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); /* * send T_BIND_ACK message */ (void) qreply(wq, bamp); return; error: ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); if (ackmp == NULL) { /* * roll back state changes */ tep->te_state = save_state; tl_memrecover(wq, mp, sizeof (struct T_error_ack)); return; } tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); } /* * Process T_UNBIND_REQ. * Called from serializer. */ static void tl_unbind(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq; mblk_t *ackmp; if (tep->te_closing) { freemsg(mp); return; } wq = tep->te_wq; /* * preallocate memory for max of T_OK_ACK and T_ERROR_ACK * ==> allocate for T_ERROR_ACK (known max) */ if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { tl_memrecover(wq, mp, sizeof (struct T_error_ack)); return; } /* * memory resources committed * Note: no message validation. T_UNBIND_REQ message is * same size as PRIM_type field so already verified earlier. */ /* * validate state */ if (tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_UNBIND_REQ:out of state, state=%d", tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); return; } tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); /* * TPI says on T_UNBIND_REQ: * send up a M_FLUSH to flush both * read and write queues */ (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || tep->te_magic != SOU_MAGIC_EXPLICIT) { /* * Sockets use bind with qlen==0 followed by bind() to * the same address with qlen > 0 for listeners. * We allow rebind with a new qlen value. */ tl_addr_unbind(tep); } tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); /* * send T_OK_ACK */ tl_ok_ack(wq, ackmp, T_UNBIND_REQ); } /* * Option management code from drv/ip is used here * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. * However, that is what we want as that option is 'unorthodox' * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND * and not in T_SVR4_OPTMGMT_REQ/ACK * Note2: use of optcom_req means this routine is an exception to * recovery from allocb() failures. */ static void tl_optmgmt(queue_t *wq, mblk_t *mp) { tl_endpt_t *tep; mblk_t *ackmp; union T_primitives *prim; cred_t *cr; tep = (tl_endpt_t *)wq->q_ptr; prim = (union T_primitives *)mp->b_rptr; /* * All Solaris components should pass a db_credp * for this TPI message, hence we ASSERT. * But in case there is some other M_PROTO that looks * like a TPI message sent by some other kernel * component, we check and return an error. */ cr = msg_getcred(mp, NULL); ASSERT(cr != NULL); if (cr == NULL) { tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); return; } /* all states OK for AF_UNIX options ? */ if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && prim->type == T_SVR4_OPTMGMT_REQ) { /* * Broken TLI semantics that options can only be managed * in TS_IDLE state. Needed for Sparc ABI test suite that * tests this TLI (mis)feature using this device driver. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", tep->te_state)); /* * preallocate memory for T_ERROR_ACK */ ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); if (! ackmp) { tl_memrecover(wq, mp, sizeof (struct T_error_ack)); return; } tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); freemsg(mp); return; } /* * call common option management routine from drv/ip */ if (prim->type == T_SVR4_OPTMGMT_REQ) { svr4_optcom_req(wq, mp, cr, &tl_opt_obj); } else { ASSERT(prim->type == T_OPTMGMT_REQ); tpi_optcom_req(wq, mp, cr, &tl_opt_obj); } } /* * Handle T_conn_req - the driver part of accept(). * If TL_SET[U]CRED generate the credentials options. * If this is a socket pass through options unmodified. * For sockets generate the T_CONN_CON here instead of * waiting for the T_CONN_RES. */ static void tl_conn_req(queue_t *wq, mblk_t *mp) { tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; ssize_t msz = MBLKL(mp); t_scalar_t alen, aoff, olen, ooff, err = 0; tl_endpt_t *peer_tep = NULL; mblk_t *ackmp; mblk_t *dimp; struct T_discon_ind *di; soux_addr_t ux_addr; tl_addr_t dst; ASSERT(IS_COTS(tep)); if (tep->te_closing) { freemsg(mp); return; } /* * preallocate memory for: * 1. max of T_ERROR_ACK and T_OK_ACK * ==> known max T_ERROR_ACK * 2. max of T_DISCON_IND and T_CONN_IND */ ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); if (! ackmp) { tl_memrecover(wq, mp, sizeof (struct T_error_ack)); return; } /* * memory committed for T_OK_ACK/T_ERROR_ACK now * will be committed for T_DISCON_IND/T_CONN_IND later */ if (tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_CONN_REQ:out of state, state=%d", tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); freemsg(mp); return; } /* * validate the message * Note: dereference fields in struct inside message only * after validating the message length. */ if (msz < sizeof (struct T_conn_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req:invalid message length")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; } alen = creq->DEST_length; aoff = creq->DEST_offset; olen = creq->OPT_length; ooff = creq->OPT_offset; if (olen == 0) ooff = 0; if (IS_SOCKET(tep)) { if ((alen != TL_SOUX_ADDRLEN) || (aoff < 0) || (aoff + alen > msz) || (alen > msz - sizeof (struct T_conn_req))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req: invalid socket addr")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; } bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req: invalid socket magic")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; } } else { if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || (olen > 0 && ((ssize_t)(ooff + olen) > msz || ooff + olen < 0)) || olen < 0 || ooff < 0) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req:invalid message")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); freemsg(mp); return; } if (alen <= 0 || aoff < 0 || (ssize_t)alen > msz - sizeof (struct T_conn_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req:bad addr in message, " "alen=%d, msz=%ld", alen, msz)); tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); freemsg(mp); return; } #ifdef DEBUG /* * Mild form of ASSERT()ion to detect broken TPI apps. * if (! assertion) * log warning; */ if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_conn_req: addr overlaps TPI message")); } #endif if (olen) { /* * no opts in connect req * supported in this provider except for sockets. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req:options not supported " "in message")); tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); freemsg(mp); return; } } /* * Prevent tep from closing on us. */ if (! tl_noclose(tep)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req:endpoint is closing")); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); freemsg(mp); return; } tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); /* * get endpoint to connect to * check that peer with DEST addr is bound to addr * and has CONIND_number > 0 */ dst.ta_alen = alen; dst.ta_abuf = mp->b_rptr + aoff; dst.ta_zoneid = tep->te_zoneid; /* * Verify if remote addr is in use */ peer_tep = (IS_SOCKET(tep) ? tl_sock_find_peer(tep, &ux_addr) : tl_find_peer(tep, &dst)); if (peer_tep == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req:no one at connect address")); err = ECONNREFUSED; } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { /* * validate that number of incoming connection is * not to capacity on destination endpoint */ (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, "tl_conn_req: qlen overflow connection refused")); err = ECONNREFUSED; } /* * Send T_DISCON_IND in case of error */ if (err != 0) { if (peer_tep != NULL) tl_refrele(peer_tep); /* We are still expected to send T_OK_ACK */ tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); tl_closeok(tep); dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), M_PROTO, T_DISCON_IND); if (dimp == NULL) { tl_merror(wq, NULL, ENOSR); return; } di = (struct T_discon_ind *)dimp->b_rptr; di->DISCON_reason = err; di->SEQ_number = BADSEQNUM; tep->te_state = TS_IDLE; /* * send T_DISCON_IND message */ putnext(tep->te_rq, dimp); return; } ASSERT(IS_COTS(peer_tep)); /* * Found the listener. At this point processing will continue on * listener serializer. Close of the endpoint should be blocked while we * switch serializers. */ tl_serializer_refhold(peer_tep->te_ser); tl_serializer_refrele(tep->te_ser); tep->te_ser = peer_tep->te_ser; ASSERT(tep->te_oconp == NULL); tep->te_oconp = peer_tep; /* * It is safe to close now. Close may continue on listener serializer. */ tl_closeok(tep); /* * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user * data, so we link mp to ackmp. */ ackmp->b_cont = mp; mp = ackmp; tl_refhold(tep); tl_serializer_enter(tep, tl_conn_req_ser, mp); } /* * Finish T_CONN_REQ processing on listener serializer. */ static void tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq; tl_endpt_t *peer_tep = tep->te_oconp; mblk_t *confmp, *cimp, *indmp; void *opts = NULL; mblk_t *ackmp = mp; struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; struct T_conn_ind *ci; tl_icon_t *tip; void *addr_startp; t_scalar_t olen = creq->OPT_length; t_scalar_t ooff = creq->OPT_offset; size_t ci_msz; size_t size; cred_t *cr = NULL; pid_t cpid; if (tep->te_closing) { TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); tl_refrele(tep); freemsg(mp); return; } wq = tep->te_wq; tep->te_flag |= TL_EAGER; /* * Extract preallocated ackmp from mp. */ mp = mp->b_cont; ackmp->b_cont = NULL; if (olen == 0) ooff = 0; if (peer_tep->te_closing || !((peer_tep->te_state == TS_IDLE) || (peer_tep->te_state == TS_WRES_CIND))) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, "tl_conn_req:peer in bad state (%d)", peer_tep->te_state)); TL_UNCONNECT(tep->te_oconp); tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); freemsg(ackmp); tl_serializer_exit(tep); tl_refrele(tep); return; } /* * preallocate now for T_DISCON_IND or T_CONN_IND */ /* * calculate length of T_CONN_IND message */ if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { cr = msg_getcred(mp, &cpid); ASSERT(cr != NULL); if (peer_tep->te_flag & TL_SETCRED) { ooff = 0; olen = (t_scalar_t) sizeof (struct opthdr) + OPTLEN(sizeof (tl_credopt_t)); /* 1 option only */ } else { ooff = 0; olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredminsize(cr)); /* 1 option only */ } } ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; ci_msz = T_ALIGN(ci_msz) + olen; size = max(ci_msz, sizeof (struct T_discon_ind)); /* * Save options from mp - we'll need them for T_CONN_IND. */ if (ooff != 0) { opts = kmem_alloc(olen, KM_NOSLEEP); if (opts == NULL) { /* * roll back state changes */ tep->te_state = TS_IDLE; tl_memrecover(wq, mp, size); freemsg(ackmp); TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); tl_refrele(tep); return; } /* Copy options to a temp buffer */ bcopy(mp->b_rptr + ooff, opts, olen); } if (IS_SOCKET(tep) && !tl_disable_early_connect) { /* * Generate a T_CONN_CON that has the identical address * (and options) as the T_CONN_REQ. * NOTE: assumes that the T_conn_req and T_conn_con structures * are isomorphic. */ confmp = copyb(mp); if (! confmp) { /* * roll back state changes */ tep->te_state = TS_IDLE; tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); freemsg(ackmp); if (opts != NULL) kmem_free(opts, olen); TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); tl_refrele(tep); return; } ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = T_CONN_CON; } else { confmp = NULL; } if ((indmp = reallocb(mp, size, 0)) == NULL) { /* * roll back state changes */ tep->te_state = TS_IDLE; tl_memrecover(wq, mp, size); freemsg(ackmp); if (opts != NULL) kmem_free(opts, olen); freemsg(confmp); TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); tl_refrele(tep); return; } tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); if (tip == NULL) { /* * roll back state changes */ tep->te_state = TS_IDLE; tl_memrecover(wq, indmp, sizeof (*tip)); freemsg(ackmp); if (opts != NULL) kmem_free(opts, olen); freemsg(confmp); TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); tl_refrele(tep); return; } tip->ti_mp = NULL; /* * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON * and tl_icon_t cell. */ /* * ack validity of request and send the peer credential in the ACK. */ tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); if (peer_tep != NULL && peer_tep->te_credp != NULL && confmp != NULL) { mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); } tl_ok_ack(wq, ackmp, T_CONN_REQ); /* * prepare message to send T_CONN_IND */ /* * allocate the message - original data blocks retained * in the returned mblk */ cimp = tl_resizemp(indmp, size); if (! cimp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_conn_req:con_ind:allocb failure")); tl_merror(wq, indmp, ENOMEM); TL_UNCONNECT(tep->te_oconp); tl_serializer_exit(tep); tl_refrele(tep); if (opts != NULL) kmem_free(opts, olen); freemsg(confmp); ASSERT(tip->ti_mp == NULL); kmem_free(tip, sizeof (*tip)); return; } DB_TYPE(cimp) = M_PROTO; ci = (struct T_conn_ind *)cimp->b_rptr; ci->PRIM_type = T_CONN_IND; ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); ci->SRC_length = tep->te_alen; ci->SEQ_number = tep->te_seqno; addr_startp = cimp->b_rptr + ci->SRC_offset; bcopy(tep->te_abuf, addr_startp, tep->te_alen); if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + ci->SRC_length); ci->OPT_length = olen; /* because only 1 option */ tl_fill_option(cimp->b_rptr + ci->OPT_offset, cr, cpid, peer_tep->te_flag, peer_tep->te_credp); } else if (ooff != 0) { /* Copy option from T_CONN_REQ */ ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + ci->SRC_length); ci->OPT_length = olen; ASSERT(opts != NULL); bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); } else { ci->OPT_offset = 0; ci->OPT_length = 0; } if (opts != NULL) kmem_free(opts, olen); /* * register connection request with server peer * append to list of incoming connections * increment references for both peer_tep and tep: peer_tep is placed on * te_oconp and tep is placed on listeners queue. */ tip->ti_tep = tep; tip->ti_seqno = tep->te_seqno; list_insert_tail(&peer_tep->te_iconp, tip); peer_tep->te_nicon++; peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); /* * send the T_CONN_IND message */ putnext(peer_tep->te_rq, cimp); /* * Send a T_CONN_CON message for sockets. * Disable the queues until we have reached the correct state! */ if (confmp != NULL) { tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); noenable(wq); putnext(tep->te_rq, confmp); } /* * Now we need to increment tep reference because tep is referenced by * server list of pending connections. We also need to decrement * reference before exiting serializer. Two operations void each other * so we don't modify reference at all. */ ASSERT(tep->te_refcnt >= 2); ASSERT(peer_tep->te_refcnt >= 2); tl_serializer_exit(tep); } /* * Handle T_conn_res on listener stream. Called on listener serializer. * tl_conn_req has already generated the T_CONN_CON. * tl_conn_res is called on listener serializer. * No one accesses acceptor at this point, so it is safe to modify acceptor. * Switch eager serializer to acceptor's. * * If TL_SET[U]CRED generate the credentials options. * For sockets tl_conn_req has already generated the T_CONN_CON. */ static void tl_conn_res(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq; struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; ssize_t msz = MBLKL(mp); t_scalar_t olen, ooff, err = 0; t_scalar_t prim = cres->PRIM_type; uchar_t *addr_startp; tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; tl_icon_t *tip; size_t size; mblk_t *ackmp, *respmp; mblk_t *dimp, *ccmp = NULL; struct T_discon_ind *di; struct T_conn_con *cc; boolean_t client_noclose_set = B_FALSE; boolean_t switch_client_serializer = B_TRUE; ASSERT(IS_COTS(tep)); if (tep->te_closing) { freemsg(mp); return; } wq = tep->te_wq; /* * preallocate memory for: * 1. max of T_ERROR_ACK and T_OK_ACK * ==> known max T_ERROR_ACK * 2. max of T_DISCON_IND and T_CONN_CON */ ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); if (! ackmp) { tl_memrecover(wq, mp, sizeof (struct T_error_ack)); return; } /* * memory committed for T_OK_ACK/T_ERROR_ACK now * will be committed for T_DISCON_IND/T_CONN_CON later */ ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); /* * validate state */ if (tep->te_state != TS_WRES_CIND) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_CONN_RES:out of state, state=%d", tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); freemsg(mp); return; } /* * validate the message * Note: dereference fields in struct inside message only * after validating the message length. */ if (msz < sizeof (struct T_conn_res)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_res:invalid message length")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); freemsg(mp); return; } olen = cres->OPT_length; ooff = cres->OPT_offset; if (((olen > 0) && ((ooff + olen) > msz))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_res:invalid message")); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); freemsg(mp); return; } if (olen) { /* * no opts in connect res * supported in this provider */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_res:options not supported in message")); tl_error_ack(wq, ackmp, TBADOPT, 0, prim); freemsg(mp); return; } tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); ASSERT(tep->te_state == TS_WACK_CRES); if (cres->SEQ_number < TL_MINOR_START && cres->SEQ_number >= BADSEQNUM) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_conn_res:remote endpoint sequence number bad")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); freemsg(mp); return; } /* * find accepting endpoint. Will have extra reference if found. */ if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_conn_res:bad accepting endpoint")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADF, 0, prim); freemsg(mp); return; } /* * Prevent acceptor from closing. */ if (! tl_noclose(acc_ep)) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_conn_res:bad accepting endpoint")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADF, 0, prim); tl_refrele(acc_ep); freemsg(mp); return; } acc_ep->te_flag |= TL_ACCEPTOR; /* * validate that accepting endpoint, if different from listening * has address bound => state is TS_IDLE * TROUBLE in XPG4 !!? */ if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_conn_res:accepting endpoint has no address bound," "state=%d", acc_ep->te_state)); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); freemsg(mp); tl_closeok(acc_ep); tl_refrele(acc_ep); return; } /* * validate if accepting endpt same as listening, then * no other incoming connection should be on the queue */ if ((tep == acc_ep) && (tep->te_nicon > 1)) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_conn_res: > 1 conn_ind on listener-acceptor")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADF, 0, prim); freemsg(mp); tl_closeok(acc_ep); tl_refrele(acc_ep); return; } /* * Mark for deletion, the entry corresponding to client * on list of pending connections made by the listener * search list to see if client is one of the * recorded as a listener. */ tip = tl_icon_find(tep, cres->SEQ_number); if (tip == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_conn_res:no client in listener list")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); freemsg(mp); tl_closeok(acc_ep); tl_refrele(acc_ep); return; } /* * If ti_tep is NULL the client has already closed. In this case * the code below will avoid any action on the client side * but complete the server and acceptor state transitions. */ ASSERT(tip->ti_tep == NULL || tip->ti_tep->te_seqno == cres->SEQ_number); cl_ep = tip->ti_tep; /* * If the client is present it is switched from listener's to acceptor's * serializer. We should block client closes while serializers are * being switched. * * It is possible that the client is present but is currently being * closed. There are two possible cases: * * 1) The client has already entered tl_close_finish_ser() and sent * T_ORDREL_IND. In this case we can just ignore the client (but we * still need to send all messages from tip->ti_mp to the acceptor). * * 2) The client started the close but has not entered * tl_close_finish_ser() yet. In this case, the client is already * proceeding asynchronously on the listener's serializer, so we're * forced to change the acceptor to use the listener's serializer to * ensure that any operations on the acceptor are serialized with * respect to the close that's in-progress. */ if (cl_ep != NULL) { if (tl_noclose(cl_ep)) { client_noclose_set = B_TRUE; } else { /* * Client is closing. If it it has sent the * T_ORDREL_IND, we can simply ignore it - otherwise, * we have to let let the client continue until it is * sent. * * If we do continue using the client, acceptor will * switch to client's serializer which is used by client * for its close. */ tl_client_closing_when_accepting++; switch_client_serializer = B_FALSE; if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || cl_ep->te_state == -1) cl_ep = NULL; } } if (cl_ep != NULL) { /* * validate client state to be TS_WCON_CREQ or TS_DATA_XFER * (latter for sockets only) */ if (cl_ep->te_state != TS_WCON_CREQ && (cl_ep->te_state != TS_DATA_XFER && IS_SOCKET(cl_ep))) { err = ECONNREFUSED; /* * T_DISCON_IND sent later after committing memory * and acking validity of request */ (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, "tl_conn_res:peer in bad state")); } /* * preallocate now for T_DISCON_IND or T_CONN_CONN * ack validity of request (T_OK_ACK) after memory committed */ if (err) size = sizeof (struct T_discon_ind); else { /* * calculate length of T_CONN_CON message */ olen = 0; if (cl_ep->te_flag & TL_SETCRED) { olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(sizeof (tl_credopt_t)); } else if (cl_ep->te_flag & TL_SETUCRED) { olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredminsize(acc_ep->te_credp)); } size = T_ALIGN(sizeof (struct T_conn_con) + acc_ep->te_alen) + olen; } if ((respmp = reallocb(mp, size, 0)) == NULL) { /* * roll back state changes */ tep->te_state = TS_WRES_CIND; tl_memrecover(wq, mp, size); freemsg(ackmp); if (client_noclose_set) tl_closeok(cl_ep); tl_closeok(acc_ep); tl_refrele(acc_ep); return; } mp = NULL; } /* * Now ack validity of request */ if (tep->te_nicon == 1) { if (tep == acc_ep) tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); else tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); } else tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); /* * send T_DISCON_IND now if client state validation failed earlier */ if (err) { tl_ok_ack(wq, ackmp, prim); /* * flush the queues - why always ? */ (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); dimp = tl_resizemp(respmp, size); if (! dimp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_conn_res:con_ind:allocb failure")); tl_merror(wq, respmp, ENOMEM); tl_closeok(acc_ep); if (client_noclose_set) tl_closeok(cl_ep); tl_refrele(acc_ep); return; } if (dimp->b_cont) { /* no user data in provider generated discon ind */ freemsg(dimp->b_cont); dimp->b_cont = NULL; } DB_TYPE(dimp) = M_PROTO; di = (struct T_discon_ind *)dimp->b_rptr; di->PRIM_type = T_DISCON_IND; di->DISCON_reason = err; di->SEQ_number = BADSEQNUM; tep->te_state = TS_IDLE; /* * send T_DISCON_IND message */ putnext(acc_ep->te_rq, dimp); if (client_noclose_set) tl_closeok(cl_ep); tl_closeok(acc_ep); tl_refrele(acc_ep); return; } /* * now start connecting the accepting endpoint */ if (tep != acc_ep) acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); if (cl_ep == NULL) { /* * The client has already closed. Send up any queued messages * and change the state accordingly. */ tl_ok_ack(wq, ackmp, prim); tl_icon_sendmsgs(acc_ep, &tip->ti_mp); /* * remove endpoint from incoming connection * delete client from list of incoming connections */ tl_freetip(tep, tip); freemsg(mp); tl_closeok(acc_ep); tl_refrele(acc_ep); return; } else if (tip->ti_mp != NULL) { /* * The client could have queued a T_DISCON_IND which needs * to be sent up. * Note that t_discon_req can not operate the same as * t_data_req since it is not possible for it to putbq * the message and return -1 due to the use of qwriter. */ tl_icon_sendmsgs(acc_ep, &tip->ti_mp); } /* * prepare connect confirm T_CONN_CON message */ /* * allocate the message - original data blocks * retained in the returned mblk */ if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { ccmp = tl_resizemp(respmp, size); if (ccmp == NULL) { tl_ok_ack(wq, ackmp, prim); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_conn_res:conn_con:allocb failure")); tl_merror(wq, respmp, ENOMEM); tl_closeok(acc_ep); if (client_noclose_set) tl_closeok(cl_ep); tl_refrele(acc_ep); return; } DB_TYPE(ccmp) = M_PROTO; cc = (struct T_conn_con *)ccmp->b_rptr; cc->PRIM_type = T_CONN_CON; cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); cc->RES_length = acc_ep->te_alen; addr_startp = ccmp->b_rptr + cc->RES_offset; bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + cc->RES_length); cc->OPT_length = olen; tl_fill_option(ccmp->b_rptr + cc->OPT_offset, acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, cl_ep->te_credp); } else { cc->OPT_offset = 0; cc->OPT_length = 0; } /* * Forward the credential in the packet so it can be picked up * at the higher layers for more complete credential processing */ mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); } else { freemsg(respmp); respmp = NULL; } /* * make connection linking * accepting and client endpoints * No need to increment references: * on client: it should already have one from tip->ti_tep linkage. * on acceptor is should already have one from the table lookup. * * At this point both client and acceptor can't close. Set client * serializer to acceptor's. */ ASSERT(cl_ep->te_refcnt >= 2); ASSERT(acc_ep->te_refcnt >= 2); ASSERT(cl_ep->te_conp == NULL); ASSERT(acc_ep->te_conp == NULL); cl_ep->te_conp = acc_ep; acc_ep->te_conp = cl_ep; ASSERT(cl_ep->te_ser == tep->te_ser); if (switch_client_serializer) { mutex_enter(&cl_ep->te_ser_lock); if (cl_ep->te_ser_count > 0) { switch_client_serializer = B_FALSE; tl_serializer_noswitch++; } else { /* * Move client to the acceptor's serializer. */ tl_serializer_refhold(acc_ep->te_ser); tl_serializer_refrele(cl_ep->te_ser); cl_ep->te_ser = acc_ep->te_ser; } mutex_exit(&cl_ep->te_ser_lock); } if (!switch_client_serializer) { /* * It is not possible to switch client to use acceptor's. * Move acceptor to client's serializer (which is the same as * listener's). */ tl_serializer_refhold(cl_ep->te_ser); tl_serializer_refrele(acc_ep->te_ser); acc_ep->te_ser = cl_ep->te_ser; } TL_REMOVE_PEER(cl_ep->te_oconp); TL_REMOVE_PEER(acc_ep->te_oconp); /* * remove endpoint from incoming connection * delete client from list of incoming connections */ tip->ti_tep = NULL; tl_freetip(tep, tip); tl_ok_ack(wq, ackmp, prim); /* * data blocks already linked in reallocb() */ /* * link queues so that I_SENDFD will work */ if (! IS_SOCKET(tep)) { acc_ep->te_wq->q_next = cl_ep->te_rq; cl_ep->te_wq->q_next = acc_ep->te_rq; } /* * send T_CONN_CON up on client side unless it was already * done (for a socket). In cases any data or ordrel req has been * queued make sure that the service procedure runs. */ if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { enableok(cl_ep->te_wq); TL_QENABLE(cl_ep); if (ccmp != NULL) freemsg(ccmp); } else { /* * change client state on TE_CONN_CON event */ cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); putnext(cl_ep->te_rq, ccmp); } /* Mark the both endpoints as accepted */ cl_ep->te_flag |= TL_ACCEPTED; acc_ep->te_flag |= TL_ACCEPTED; /* * Allow client and acceptor to close. */ tl_closeok(acc_ep); if (client_noclose_set) tl_closeok(cl_ep); } static void tl_discon_req(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq; struct T_discon_req *dr; ssize_t msz; tl_endpt_t *peer_tep = tep->te_conp; tl_endpt_t *srv_tep = tep->te_oconp; tl_icon_t *tip; size_t size; mblk_t *ackmp, *dimp, *respmp; struct T_discon_ind *di; t_scalar_t save_state, new_state; if (tep->te_closing) { freemsg(mp); return; } if ((peer_tep != NULL) && peer_tep->te_closing) { TL_UNCONNECT(tep->te_conp); peer_tep = NULL; } if ((srv_tep != NULL) && srv_tep->te_closing) { TL_UNCONNECT(tep->te_oconp); srv_tep = NULL; } wq = tep->te_wq; /* * preallocate memory for: * 1. max of T_ERROR_ACK and T_OK_ACK * ==> known max T_ERROR_ACK * 2. for T_DISCON_IND */ ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); if (! ackmp) { tl_memrecover(wq, mp, sizeof (struct T_error_ack)); return; } /* * memory committed for T_OK_ACK/T_ERROR_ACK now * will be committed for T_DISCON_IND later */ dr = (struct T_discon_req *)mp->b_rptr; msz = MBLKL(mp); /* * validate the state */ save_state = new_state = tep->te_state; if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_DISCON_REQ:out of state, state=%d", tep->te_state)); tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); freemsg(mp); return; } /* * Defer committing the state change until it is determined if * the message will be queued with the tl_icon or not. */ new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); /* validate the message */ if (msz < sizeof (struct T_discon_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_discon_req:invalid message")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); freemsg(mp); return; } /* * if server, then validate that client exists * by connection sequence number etc. */ if (tep->te_nicon > 0) { /* server */ /* * search server list for disconnect client */ tip = tl_icon_find(tep, dr->SEQ_number); if (tip == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_discon_req:no disconnect endpoint")); tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); freemsg(mp); return; } /* * If ti_tep is NULL the client has already closed. In this case * the code below will avoid any action on the client side. */ IMPLY(tip->ti_tep != NULL, tip->ti_tep->te_seqno == dr->SEQ_number); peer_tep = tip->ti_tep; } /* * preallocate now for T_DISCON_IND * ack validity of request (T_OK_ACK) after memory committed */ size = sizeof (struct T_discon_ind); if ((respmp = reallocb(mp, size, 0)) == NULL) { tl_memrecover(wq, mp, size); freemsg(ackmp); return; } /* * prepare message to ack validity of request */ if (tep->te_nicon == 0) new_state = NEXTSTATE(TE_OK_ACK1, new_state); else if (tep->te_nicon == 1) new_state = NEXTSTATE(TE_OK_ACK2, new_state); else new_state = NEXTSTATE(TE_OK_ACK4, new_state); /* * Flushing queues according to TPI. Using the old state. */ if ((tep->te_nicon <= 1) && ((save_state == TS_DATA_XFER) || (save_state == TS_WIND_ORDREL) || (save_state == TS_WREQ_ORDREL))) (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); /* send T_OK_ACK up */ tl_ok_ack(wq, ackmp, T_DISCON_REQ); /* * now do disconnect business */ if (tep->te_nicon > 0) { /* listener */ if (peer_tep != NULL && !peer_tep->te_closing) { /* * disconnect incoming connect request pending to tep */ if ((dimp = tl_resizemp(respmp, size)) == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_discon_req: reallocb failed")); tep->te_state = new_state; tl_merror(wq, respmp, ENOMEM); return; } di = (struct T_discon_ind *)dimp->b_rptr; di->SEQ_number = BADSEQNUM; save_state = peer_tep->te_state; peer_tep->te_state = TS_IDLE; TL_REMOVE_PEER(peer_tep->te_oconp); enableok(peer_tep->te_wq); TL_QENABLE(peer_tep); } else { freemsg(respmp); dimp = NULL; } /* * remove endpoint from incoming connection list * - remove disconnect client from list on server */ tl_freetip(tep, tip); } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ /* * disconnect an outgoing request pending from tep */ if ((dimp = tl_resizemp(respmp, size)) == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_discon_req: reallocb failed")); tep->te_state = new_state; tl_merror(wq, respmp, ENOMEM); return; } di = (struct T_discon_ind *)dimp->b_rptr; DB_TYPE(dimp) = M_PROTO; di->PRIM_type = T_DISCON_IND; di->DISCON_reason = ECONNRESET; di->SEQ_number = tep->te_seqno; /* * If this is a socket the T_DISCON_IND is queued with * the T_CONN_IND. Otherwise the T_CONN_IND is removed * from the list of pending connections. * Note that when te_oconp is set the peer better have * a t_connind_t for the client. */ if (IS_SOCKET(tep) && !tl_disable_early_connect) { /* * No need to check that * ti_tep == NULL since the T_DISCON_IND * takes precedence over other queued * messages. */ tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); peer_tep = NULL; dimp = NULL; /* * Can't clear te_oconp since tl_co_unconnect needs * it as a hint not to free the tep. * Keep the state unchanged since tl_conn_res inspects * it. */ new_state = tep->te_state; } else { /* Found - delete it */ tip = tl_icon_find(peer_tep, tep->te_seqno); if (tip != NULL) { ASSERT(tep == tip->ti_tep); save_state = peer_tep->te_state; if (peer_tep->te_nicon == 1) peer_tep->te_state = NEXTSTATE(TE_DISCON_IND2, peer_tep->te_state); else peer_tep->te_state = NEXTSTATE(TE_DISCON_IND3, peer_tep->te_state); tl_freetip(peer_tep, tip); } ASSERT(tep->te_oconp != NULL); TL_UNCONNECT(tep->te_oconp); } } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ if ((dimp = tl_resizemp(respmp, size)) == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_discon_req: reallocb failed")); tep->te_state = new_state; tl_merror(wq, respmp, ENOMEM); return; } di = (struct T_discon_ind *)dimp->b_rptr; di->SEQ_number = BADSEQNUM; save_state = peer_tep->te_state; peer_tep->te_state = TS_IDLE; } else { /* Not connected */ tep->te_state = new_state; freemsg(respmp); return; } /* Commit state changes */ tep->te_state = new_state; if (peer_tep == NULL) { ASSERT(dimp == NULL); goto done; } /* * Flush queues on peer before sending up * T_DISCON_IND according to TPI */ if ((save_state == TS_DATA_XFER) || (save_state == TS_WIND_ORDREL) || (save_state == TS_WREQ_ORDREL)) (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); DB_TYPE(dimp) = M_PROTO; di->PRIM_type = T_DISCON_IND; di->DISCON_reason = ECONNRESET; /* * data blocks already linked into dimp by reallocb() */ /* * send indication message to peer user module */ ASSERT(dimp != NULL); putnext(peer_tep->te_rq, dimp); done: if (tep->te_conp) { /* disconnect pointers if connected */ ASSERT(! peer_tep->te_closing); /* * Messages may be queued on peer's write queue * waiting to be processed by its write service * procedure. Before the pointer to the peer transport * structure is set to NULL, qenable the peer's write * queue so that the queued up messages are processed. */ if ((save_state == TS_DATA_XFER) || (save_state == TS_WIND_ORDREL) || (save_state == TS_WREQ_ORDREL)) TL_QENABLE(peer_tep); ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); TL_UNCONNECT(peer_tep->te_conp); if (! IS_SOCKET(tep)) { /* * unlink the streams */ tep->te_wq->q_next = NULL; peer_tep->te_wq->q_next = NULL; } TL_UNCONNECT(tep->te_conp); } } static void tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep) { if (!tep->te_closing) tl_addr_req(mp, tep); else freemsg(mp); tl_serializer_exit(tep); tl_refrele(tep); } static void tl_addr_req(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq; size_t ack_sz; mblk_t *ackmp; struct T_addr_ack *taa; if (tep->te_closing) { freemsg(mp); return; } wq = tep->te_wq; /* * Note: T_ADDR_REQ message has only PRIM_type field * so it is already validated earlier. */ if (IS_CLTS(tep) || (tep->te_state > TS_WREQ_ORDREL) || (tep->te_state < TS_DATA_XFER)) { /* * Either connectionless or connection oriented but not * in connected data transfer state or half-closed states. */ ack_sz = sizeof (struct T_addr_ack); if (tep->te_state >= TS_IDLE) /* is bound */ ack_sz += tep->te_alen; ackmp = reallocb(mp, ack_sz, 0); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_addr_req: reallocb failed")); tl_memrecover(wq, mp, ack_sz); return; } taa = (struct T_addr_ack *)ackmp->b_rptr; bzero(taa, sizeof (struct T_addr_ack)); taa->PRIM_type = T_ADDR_ACK; ackmp->b_datap->db_type = M_PCPROTO; ackmp->b_wptr = (uchar_t *)&taa[1]; if (tep->te_state >= TS_IDLE) { /* endpoint is bound */ taa->LOCADDR_length = tep->te_alen; taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); bcopy(tep->te_abuf, ackmp->b_wptr, tep->te_alen); ackmp->b_wptr += tep->te_alen; ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); } (void) qreply(wq, ackmp); } else { ASSERT(tep->te_state == TS_DATA_XFER || tep->te_state == TS_WIND_ORDREL || tep->te_state == TS_WREQ_ORDREL); /* connection oriented in data transfer */ tl_connected_cots_addr_req(mp, tep); } } static void tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) { tl_endpt_t *peer_tep = tep->te_conp; size_t ack_sz; mblk_t *ackmp; struct T_addr_ack *taa; uchar_t *addr_startp; if (tep->te_closing) { freemsg(mp); return; } if (peer_tep == NULL || peer_tep->te_closing) { tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ); return; } ASSERT(tep->te_state >= TS_IDLE); ack_sz = sizeof (struct T_addr_ack); ack_sz += T_ALIGN(tep->te_alen); ack_sz += peer_tep->te_alen; ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_connected_cots_addr_req: reallocb failed")); tl_memrecover(tep->te_wq, mp, ack_sz); return; } taa = (struct T_addr_ack *)ackmp->b_rptr; /* endpoint is bound */ taa->LOCADDR_length = tep->te_alen; taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); addr_startp = (uchar_t *)&taa[1]; bcopy(tep->te_abuf, addr_startp, tep->te_alen); taa->REMADDR_length = peer_tep->te_alen; taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + taa->LOCADDR_length); addr_startp = ackmp->b_rptr + taa->REMADDR_offset; bcopy(peer_tep->te_abuf, addr_startp, peer_tep->te_alen); ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + taa->REMADDR_offset + peer_tep->te_alen; ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); putnext(tep->te_rq, ackmp); } static void tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) { if (IS_CLTS(tep)) { *ia = tl_clts_info_ack; ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ } else { *ia = tl_cots_info_ack; if (IS_COTSORD(tep)) ia->SERV_type = T_COTS_ORD; } ia->TIDU_size = tl_tidusz; ia->CURRENT_state = tep->te_state; } /* * This routine responds to T_CAPABILITY_REQ messages. It is called by * tl_wput. */ static void tl_capability_req(mblk_t *mp, tl_endpt_t *tep) { mblk_t *ackmp; t_uscalar_t cap_bits1; struct T_capability_ack *tcap; if (tep->te_closing) { freemsg(mp); return; } cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), M_PCPROTO, T_CAPABILITY_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_capability_req: reallocb failed")); tl_memrecover(tep->te_wq, mp, sizeof (struct T_capability_ack)); return; } tcap = (struct T_capability_ack *)ackmp->b_rptr; tcap->CAP_bits1 = 0; if (cap_bits1 & TC1_INFO) { tl_copy_info(&tcap->INFO_ack, tep); tcap->CAP_bits1 |= TC1_INFO; } if (cap_bits1 & TC1_ACCEPTOR_ID) { tcap->ACCEPTOR_id = tep->te_acceptor_id; tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; } putnext(tep->te_rq, ackmp); } static void tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) { if (! tep->te_closing) tl_info_req(mp, tep); else freemsg(mp); tl_serializer_exit(tep); tl_refrele(tep); } static void tl_info_req(mblk_t *mp, tl_endpt_t *tep) { mblk_t *ackmp; ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, T_INFO_ACK); if (ackmp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_info_req: reallocb failed")); tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); return; } /* * fill in T_INFO_ACK contents */ tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); /* * send ack message */ putnext(tep->te_rq, ackmp); } /* * Handle M_DATA, T_data_req and T_optdata_req. * If this is a socket pass through T_optdata_req options unmodified. */ static void tl_data(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq = tep->te_wq; union T_primitives *prim = (union T_primitives *)mp->b_rptr; ssize_t msz = MBLKL(mp); tl_endpt_t *peer_tep; queue_t *peer_rq; boolean_t closing = tep->te_closing; if (IS_CLTS(tep)) { (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, "tl_wput:clts:unattached M_DATA")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } /* * If the endpoint is closing it should still forward any data to the * peer (if it has one). If it is not allowed to forward it can just * free the message. */ if (closing && (tep->te_state != TS_DATA_XFER) && (tep->te_state != TS_WREQ_ORDREL)) { freemsg(mp); return; } if (DB_TYPE(mp) == M_PROTO) { if (prim->type == T_DATA_REQ && msz < sizeof (struct T_data_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_data:T_DATA_REQ:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } else if (prim->type == T_OPTDATA_REQ && (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_data:T_OPTDATA_REQ:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } } /* * connection oriented provider */ switch (tep->te_state) { case TS_IDLE: /* * Other end not here - do nothing. */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_data:cots with endpoint idle")); return; case TS_DATA_XFER: /* valid states */ if (tep->te_conp != NULL) break; if (tep->te_oconp == NULL) { if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } /* * For a socket the T_CONN_CON is sent early thus * the peer might not yet have accepted the connection. * If we are closing queue the packet with the T_CONN_IND. * Otherwise defer processing the packet until the peer * accepts the connection. * Note that the queue is noenabled when we go into this * state. */ if (!closing) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_data: ocon")); TL_PUTBQ(tep, mp); return; } if (DB_TYPE(mp) == M_PROTO) { if (msz < sizeof (t_scalar_t)) { freemsg(mp); return; } /* reuse message block - just change REQ to IND */ if (prim->type == T_DATA_REQ) prim->type = T_DATA_IND; else prim->type = T_OPTDATA_IND; } tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); return; case TS_WREQ_ORDREL: if (tep->te_conp == NULL) { /* * Other end closed - generate discon_ind * with reason 0 to cause an EPIPE but no * read side error on AF_UNIX sockets. */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_data: WREQ_ORDREL and no peer")); tl_discon_ind(tep, 0); return; } break; default: /* invalid state for event TE_DATA_REQ */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_data:cots:out of state")); tl_merror(wq, mp, EPROTO); return; } /* * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); * (State stays same on this event) */ /* * get connected endpoint */ if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { freemsg(mp); /* Peer closed */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, "tl_data: peer gone")); return; } ASSERT(tep->te_serializer == peer_tep->te_serializer); peer_rq = peer_tep->te_rq; /* * Put it back if flow controlled * Note: Messages already on queue when we are closing is bounded * so we can ignore flow control. */ if (!canputnext(peer_rq) && !closing) { TL_PUTBQ(tep, mp); return; } /* * validate peer state */ switch (peer_tep->te_state) { case TS_DATA_XFER: case TS_WIND_ORDREL: /* valid states */ break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_data:rx side:invalid state")); tl_merror(peer_tep->te_wq, mp, EPROTO); return; } if (DB_TYPE(mp) == M_PROTO) { /* reuse message block - just change REQ to IND */ if (prim->type == T_DATA_REQ) prim->type = T_DATA_IND; else prim->type = T_OPTDATA_IND; } /* * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); * (peer state stays same on this event) */ /* * send data to connected peer */ putnext(peer_rq, mp); } static void tl_exdata(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq = tep->te_wq; union T_primitives *prim = (union T_primitives *)mp->b_rptr; ssize_t msz = MBLKL(mp); tl_endpt_t *peer_tep; queue_t *peer_rq; boolean_t closing = tep->te_closing; if (msz < sizeof (struct T_exdata_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_exdata:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } /* * If the endpoint is closing it should still forward any data to the * peer (if it has one). If it is not allowed to forward it can just * free the message. */ if (closing && (tep->te_state != TS_DATA_XFER) && (tep->te_state != TS_WREQ_ORDREL)) { freemsg(mp); return; } /* * validate state */ switch (tep->te_state) { case TS_IDLE: /* * Other end not here - do nothing. */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_exdata:cots with endpoint idle")); return; case TS_DATA_XFER: /* valid states */ if (tep->te_conp != NULL) break; if (tep->te_oconp == NULL) { if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } /* * For a socket the T_CONN_CON is sent early thus * the peer might not yet have accepted the connection. * If we are closing queue the packet with the T_CONN_IND. * Otherwise defer processing the packet until the peer * accepts the connection. * Note that the queue is noenabled when we go into this * state. */ if (!closing) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_exdata: ocon")); TL_PUTBQ(tep, mp); return; } (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_exdata: closing socket ocon")); prim->type = T_EXDATA_IND; tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); return; case TS_WREQ_ORDREL: if (tep->te_conp == NULL) { /* * Other end closed - generate discon_ind * with reason 0 to cause an EPIPE but no * read side error on AF_UNIX sockets. */ freemsg(mp); (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_exdata: WREQ_ORDREL and no peer")); tl_discon_ind(tep, 0); return; } break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_EXDATA_REQ:out of state, state=%d", tep->te_state)); tl_merror(wq, mp, EPROTO); return; } /* * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); * (state stays same on this event) */ /* * get connected endpoint */ if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { freemsg(mp); /* Peer closed */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, "tl_exdata: peer gone")); return; } peer_rq = peer_tep->te_rq; /* * Put it back if flow controlled * Note: Messages already on queue when we are closing is bounded * so we can ignore flow control. */ if (!canputnext(peer_rq) && !closing) { TL_PUTBQ(tep, mp); return; } /* * validate state on peer */ switch (peer_tep->te_state) { case TS_DATA_XFER: case TS_WIND_ORDREL: /* valid states */ break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_exdata:rx side:invalid state")); tl_merror(peer_tep->te_wq, mp, EPROTO); return; } /* * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); * (peer state stays same on this event) */ /* * reuse message block */ prim->type = T_EXDATA_IND; /* * send data to connected peer */ putnext(peer_rq, mp); } static void tl_ordrel(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq = tep->te_wq; union T_primitives *prim = (union T_primitives *)mp->b_rptr; ssize_t msz = MBLKL(mp); tl_endpt_t *peer_tep; queue_t *peer_rq; boolean_t closing = tep->te_closing; if (msz < sizeof (struct T_ordrel_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_ordrel:invalid message")); if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } /* * validate state */ switch (tep->te_state) { case TS_DATA_XFER: case TS_WREQ_ORDREL: /* valid states */ if (tep->te_conp != NULL) break; if (tep->te_oconp == NULL) break; /* * For a socket the T_CONN_CON is sent early thus * the peer might not yet have accepted the connection. * If we are closing queue the packet with the T_CONN_IND. * Otherwise defer processing the packet until the peer * accepts the connection. * Note that the queue is noenabled when we go into this * state. */ if (!closing) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_ordlrel: ocon")); TL_PUTBQ(tep, mp); return; } (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_ordlrel: closing socket ocon")); prim->type = T_ORDREL_IND; (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); return; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_ORDREL_REQ:out of state, state=%d", tep->te_state)); if (!closing) { tl_merror(wq, mp, EPROTO); } else { freemsg(mp); } return; } tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); /* * get connected endpoint */ if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { /* Peer closed */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, "tl_ordrel: peer gone")); freemsg(mp); return; } peer_rq = peer_tep->te_rq; /* * Put it back if flow controlled except when we are closing. * Note: Messages already on queue when we are closing is bounded * so we can ignore flow control. */ if (! canputnext(peer_rq) && !closing) { TL_PUTBQ(tep, mp); return; } /* * validate state on peer */ switch (peer_tep->te_state) { case TS_DATA_XFER: case TS_WIND_ORDREL: /* valid states */ break; default: (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_ordrel:rx side:invalid state")); tl_merror(peer_tep->te_wq, mp, EPROTO); return; } peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); /* * reuse message block */ prim->type = T_ORDREL_IND; (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, "tl_ordrel: send ordrel_ind")); /* * send data to connected peer */ putnext(peer_rq, mp); } /* * Send T_UDERROR_IND. The error should be from the space. */ static void tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) { size_t err_sz; tl_endpt_t *tep; struct T_unitdata_req *udreq; mblk_t *err_mp; t_scalar_t alen; t_scalar_t olen; struct T_uderror_ind *uderr; uchar_t *addr_startp; err_sz = sizeof (struct T_uderror_ind); tep = (tl_endpt_t *)wq->q_ptr; udreq = (struct T_unitdata_req *)mp->b_rptr; alen = udreq->DEST_length; olen = udreq->OPT_length; if (alen > 0) err_sz = T_ALIGN(err_sz + alen); if (olen > 0) err_sz += olen; err_mp = allocb(err_sz, BPRI_MED); if (! err_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_uderr:allocb failure")); /* * Note: no rollback of state needed as it does * not change in connectionless transport */ tl_memrecover(wq, mp, err_sz); return; } DB_TYPE(err_mp) = M_PROTO; err_mp->b_wptr = err_mp->b_rptr + err_sz; uderr = (struct T_uderror_ind *)err_mp->b_rptr; uderr->PRIM_type = T_UDERROR_IND; uderr->ERROR_type = err; uderr->DEST_length = alen; uderr->OPT_length = olen; if (alen <= 0) { uderr->DEST_offset = 0; } else { uderr->DEST_offset = (t_scalar_t)sizeof (struct T_uderror_ind); addr_startp = mp->b_rptr + udreq->DEST_offset; bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, (size_t)alen); } if (olen <= 0) { uderr->OPT_offset = 0; } else { uderr->OPT_offset = (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + uderr->DEST_length); addr_startp = mp->b_rptr + udreq->OPT_offset; bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, (size_t)olen); } freemsg(mp); /* * send indication message */ tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); qreply(wq, err_mp); } static void tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq = tep->te_wq; if (!tep->te_closing && (wq->q_first != NULL)) { TL_PUTQ(tep, mp); } else if (tep->te_rq != NULL) tl_unitdata(mp, tep); else freemsg(mp); tl_serializer_exit(tep); tl_refrele(tep); } /* * Handle T_unitdata_req. * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. * If this is a socket pass through options unmodified. */ static void tl_unitdata(mblk_t *mp, tl_endpt_t *tep) { queue_t *wq = tep->te_wq; soux_addr_t ux_addr; tl_addr_t destaddr; uchar_t *addr_startp; tl_endpt_t *peer_tep; struct T_unitdata_ind *udind; struct T_unitdata_req *udreq; ssize_t msz, ui_sz, reuse_mb_sz; t_scalar_t alen, aoff, olen, ooff; t_scalar_t oldolen = 0; cred_t *cr = NULL; pid_t cpid; udreq = (struct T_unitdata_req *)mp->b_rptr; msz = MBLKL(mp); /* * validate the state */ if (tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_wput:T_CONN_REQ:out of state")); tl_merror(wq, mp, EPROTO); return; } /* * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); * (state does not change on this event) */ /* * validate the message * Note: dereference fields in struct inside message only * after validating the message length. */ if (msz < sizeof (struct T_unitdata_req)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_unitdata:invalid message length")); tl_merror(wq, mp, EINVAL); return; } alen = udreq->DEST_length; aoff = udreq->DEST_offset; oldolen = olen = udreq->OPT_length; ooff = udreq->OPT_offset; if (olen == 0) ooff = 0; if (IS_SOCKET(tep)) { if ((alen != TL_SOUX_ADDRLEN) || (aoff < 0) || (aoff + alen > msz) || (olen < 0) || (ooff < 0) || ((olen > 0) && ((ooff + olen) > msz))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_unitdata_req: invalid socket addr " "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", (int)msz, alen, aoff, olen, ooff)); tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); return; } bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_conn_req: invalid socket magic")); tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); return; } } else { if ((alen < 0) || (aoff < 0) || ((alen > 0) && ((aoff + alen) > msz)) || ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || ((aoff + alen) < 0) || ((olen > 0) && ((ooff + olen) > msz)) || (olen < 0) || (ooff < 0) || ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_unitdata:invalid unit data message")); tl_merror(wq, mp, EINVAL); return; } } /* Options not supported unless it's a socket */ if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_unitdata:option use(unsupported) or zero len addr")); tl_uderr(wq, mp, EPROTO); return; } #ifdef DEBUG /* * Mild form of ASSERT()ion to detect broken TPI apps. * if (! assertion) * log warning; */ if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_unitdata:addr overlaps TPI message")); } #endif /* * get destination endpoint */ destaddr.ta_alen = alen; destaddr.ta_abuf = mp->b_rptr + aoff; destaddr.ta_zoneid = tep->te_zoneid; /* * Check whether the destination is the same that was used previously * and the destination endpoint is in the right state. If something is * wrong, find destination again and cache it. */ peer_tep = tep->te_lastep; if ((peer_tep == NULL) || peer_tep->te_closing || (peer_tep->te_state != TS_IDLE) || !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { /* * Not the same as cached destination , need to find the right * destination. */ peer_tep = (IS_SOCKET(tep) ? tl_sock_find_peer(tep, &ux_addr) : tl_find_peer(tep, &destaddr)); if (peer_tep == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_unitdata:no one at destination address")); tl_uderr(wq, mp, ECONNRESET); return; } /* * Cache the new peer. */ if (tep->te_lastep != NULL) tl_refrele(tep->te_lastep); tep->te_lastep = peer_tep; } if (peer_tep->te_state != TS_IDLE) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_unitdata:provider in invalid state")); tl_uderr(wq, mp, EPROTO); return; } ASSERT(peer_tep->te_rq != NULL); /* * Put it back if flow controlled except when we are closing. * Note: Messages already on queue when we are closing is bounded * so we can ignore flow control. */ if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { /* record what we are flow controlled on */ if (tep->te_flowq != NULL) { list_remove(&tep->te_flowq->te_flowlist, tep); } list_insert_head(&peer_tep->te_flowlist, tep); tep->te_flowq = peer_tep; TL_PUTBQ(tep, mp); return; } /* * prepare indication message */ /* * calculate length of message */ if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { cr = msg_getcred(mp, &cpid); ASSERT(cr != NULL); if (peer_tep->te_flag & TL_SETCRED) { ASSERT(olen == 0); olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(sizeof (tl_credopt_t)); /* 1 option only */ } else if (peer_tep->te_flag & TL_SETUCRED) { ASSERT(olen == 0); olen = (t_scalar_t)sizeof (struct opthdr) + OPTLEN(ucredminsize(cr)); /* 1 option only */ } else { /* Possibly more than one option */ olen += (t_scalar_t)sizeof (struct T_opthdr) + OPTLEN(ucredminsize(cr)); } } ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen; reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen; /* * If the unitdata_ind fits and we are not adding options * reuse the udreq mblk. * * Otherwise, it is possible we need to append an option if one of the * te_flag bits is set. This requires extra space in the data block for * the additional option but the traditional technique used below to * allocate a new block and copy into it will not work when there is a * message block with a free pointer (since we don't know anything * about the layout of the data, pointers referencing or within the * data, etc.). To handle this possibility the upper layers may have * preallocated some space to use for appending an option. We check the * overall mblock size against the size we need ('reuse_mb_sz' with the * original address length [alen] to ensure we won't overrun the * current mblk data size) to see if there is free space and thus * avoid allocating a new message block. */ if (msz >= ui_sz && alen >= tep->te_alen && !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { /* * Reuse the original mblk. Leave options in place. */ udind = (struct T_unitdata_ind *)mp->b_rptr; udind->PRIM_type = T_UNITDATA_IND; udind->SRC_length = tep->te_alen; addr_startp = mp->b_rptr + udind->SRC_offset; bcopy(tep->te_abuf, addr_startp, tep->te_alen); } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen && mp->b_datap->db_frtnp != NULL) { /* * We have a message block with a free pointer, but extra space * has been pre-allocated for us in case we need to append an * option. Reuse the original mblk, leaving existing options in * place. */ udind = (struct T_unitdata_ind *)mp->b_rptr; udind->PRIM_type = T_UNITDATA_IND; udind->SRC_length = tep->te_alen; addr_startp = mp->b_rptr + udind->SRC_offset; bcopy(tep->te_abuf, addr_startp, tep->te_alen); if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { ASSERT(cr != NULL); /* * We're appending one new option here after the * original ones. */ tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen, cr, cpid, peer_tep->te_flag, peer_tep->te_credp); } } else if (mp->b_datap->db_frtnp != NULL) { /* * The next block creates a new mp and tries to copy the data * block into it, but that cannot handle a message with a free * pointer (for more details see the comment in kstrputmsg() * where dupmsg() is called). Since we can never properly * duplicate the mp while also extending the data, just error * out now. */ tl_uderr(wq, mp, EPROTO); return; } else { /* Allocate a new T_unitdata_ind message */ mblk_t *ui_mp; ui_mp = allocb(ui_sz, BPRI_MED); if (! ui_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, "tl_unitdata:allocb failure:message queued")); tl_memrecover(wq, mp, ui_sz); return; } /* * fill in T_UNITDATA_IND contents */ DB_TYPE(ui_mp) = M_PROTO; ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; udind = (struct T_unitdata_ind *)ui_mp->b_rptr; udind->PRIM_type = T_UNITDATA_IND; udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); udind->SRC_length = tep->te_alen; addr_startp = ui_mp->b_rptr + udind->SRC_offset; bcopy(tep->te_abuf, addr_startp, tep->te_alen); udind->OPT_offset = (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); udind->OPT_length = olen; if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { if (oldolen != 0) { bcopy((void *)((uintptr_t)udreq + ooff), (void *)((uintptr_t)udind + udind->OPT_offset), oldolen); } ASSERT(cr != NULL); tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + oldolen, cr, cpid, peer_tep->te_flag, peer_tep->te_credp); } else { bcopy((void *)((uintptr_t)udreq + ooff), (void *)((uintptr_t)udind + udind->OPT_offset), olen); } /* * relink data blocks from mp to ui_mp */ ui_mp->b_cont = mp->b_cont; freeb(mp); mp = ui_mp; } /* * send indication message */ peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); putnext(peer_tep->te_rq, mp); } /* * Check if a given addr is in use. * Endpoint ptr returned or NULL if not found. * The name space is separate for each mode. This implies that * sockets get their own name space. */ static tl_endpt_t * tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) { tl_endpt_t *peer_tep = NULL; int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, (mod_hash_val_t *)&peer_tep, tl_find_callback); ASSERT(! IS_SOCKET(tep)); ASSERT(ap != NULL && ap->ta_alen > 0); ASSERT(ap->ta_zoneid == tep->te_zoneid); ASSERT(ap->ta_abuf != NULL); EQUIV(rc == 0, peer_tep != NULL); IMPLY(rc == 0, (tep->te_zoneid == peer_tep->te_zoneid) && (tep->te_transport == peer_tep->te_transport)); if ((rc == 0) && (peer_tep->te_closing)) { tl_refrele(peer_tep); peer_tep = NULL; } return (peer_tep); } /* * Find peer for a socket based on unix domain address. * For implicit addresses our peer can be found by minor number in ai hash. For * explicit binds we look vnode address at addr_hash. */ static tl_endpt_t * tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) { tl_endpt_t *peer_tep = NULL; mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? tep->te_aihash : tep->te_addrhash; int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, (mod_hash_val_t *)&peer_tep, tl_find_callback); ASSERT(IS_SOCKET(tep)); EQUIV(rc == 0, peer_tep != NULL); IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)); if (peer_tep != NULL) { /* Don't attempt to use closing peer. */ if (peer_tep->te_closing) goto errout; /* * Cross-zone unix sockets are permitted, but for Trusted * Extensions only, the "server" for these must be in the * global zone. */ if ((peer_tep->te_zoneid != tep->te_zoneid) && is_system_labeled() && (peer_tep->te_zoneid != GLOBAL_ZONEID)) goto errout; } return (peer_tep); errout: tl_refrele(peer_tep); return (NULL); } /* * Generate a free addr and return it in struct pointed by ap * but allocating space for address buffer. * The generated address will be at least 4 bytes long and, if req->ta_alen * exceeds 4 bytes, be req->ta_alen bytes long. * * If address is found it will be inserted in the hash. * * If req->ta_alen is larger than the default alen (4 bytes) the last * alen-4 bytes will always be the same as in req. * * Return 0 for failure. * Return non-zero for success. */ static boolean_t tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) { t_scalar_t alen; uint32_t loopcnt; /* Limit loop to 2^32 */ ASSERT(tep->te_hash_hndl != NULL); ASSERT(! IS_SOCKET(tep)); if (tep->te_hash_hndl == NULL) return (B_FALSE); /* * check if default addr is in use * if it is - bump it and try again */ if (req == NULL) { alen = sizeof (uint32_t); } else { alen = max(req->ta_alen, sizeof (uint32_t)); ASSERT(tep->te_zoneid == req->ta_zoneid); } if (tep->te_alen < alen) { void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); /* * Not enough space in tep->ta_ap to hold the address, * allocate a bigger space. */ if (abuf == NULL) return (B_FALSE); if (tep->te_alen > 0) kmem_free(tep->te_abuf, tep->te_alen); tep->te_alen = alen; tep->te_abuf = abuf; } /* Copy in the address in req */ if (req != NULL) { ASSERT(alen >= req->ta_alen); bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); } /* * First try minor number then try default addresses. */ bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { if (mod_hash_insert_reserve(tep->te_addrhash, (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, tep->te_hash_hndl) == 0) { /* * found free address */ tep->te_flag |= TL_ADDRHASHED; tep->te_hash_hndl = NULL; return (B_TRUE); /* successful return */ } /* * Use default address. */ bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); atomic_inc_32(&tep->te_defaddr); } /* * Failed to find anything. */ (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, "tl_get_any_addr:looped 2^32 times")); return (B_FALSE); } /* * reallocb + set r/w ptrs to reflect size. */ static mblk_t * tl_resizemp(mblk_t *mp, ssize_t new_size) { if ((mp = reallocb(mp, new_size, 0)) == NULL) return (NULL); mp->b_rptr = DB_BASE(mp); mp->b_wptr = mp->b_rptr + new_size; return (mp); } static void tl_cl_backenable(tl_endpt_t *tep) { list_t *l = &tep->te_flowlist; tl_endpt_t *elp; ASSERT(IS_CLTS(tep)); for (elp = list_head(l); elp != NULL; elp = list_head(l)) { ASSERT(tep->te_ser == elp->te_ser); ASSERT(elp->te_flowq == tep); if (! elp->te_closing) TL_QENABLE(elp); elp->te_flowq = NULL; list_remove(l, elp); } } /* * Unconnect endpoints. */ static void tl_co_unconnect(tl_endpt_t *tep) { tl_endpt_t *peer_tep = tep->te_conp; tl_endpt_t *srv_tep = tep->te_oconp; list_t *l; tl_icon_t *tip; tl_endpt_t *cl_tep; mblk_t *d_mp; ASSERT(IS_COTS(tep)); /* * If our peer is closing, don't use it. */ if ((peer_tep != NULL) && peer_tep->te_closing) { TL_UNCONNECT(tep->te_conp); peer_tep = NULL; } if ((srv_tep != NULL) && srv_tep->te_closing) { TL_UNCONNECT(tep->te_oconp); srv_tep = NULL; } if (tep->te_nicon > 0) { l = &tep->te_iconp; /* * If incoming requests pending, change state * of clients on disconnect ind event and send * discon_ind pdu to modules above them * for server: all clients get disconnect */ while (tep->te_nicon > 0) { tip = list_head(l); cl_tep = tip->ti_tep; if (cl_tep == NULL) { tl_freetip(tep, tip); continue; } if (cl_tep->te_oconp != NULL) { ASSERT(cl_tep != cl_tep->te_oconp); TL_UNCONNECT(cl_tep->te_oconp); } if (cl_tep->te_closing) { tl_freetip(tep, tip); continue; } enableok(cl_tep->te_wq); TL_QENABLE(cl_tep); d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); if (d_mp != NULL) { cl_tep->te_state = TS_IDLE; putnext(cl_tep->te_rq, d_mp); } else { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_co_unconnect:icmng: " "allocb failure")); } tl_freetip(tep, tip); } } else if (srv_tep != NULL) { /* * If outgoing request pending, change state * of server on discon ind event */ if (IS_SOCKET(tep) && !tl_disable_early_connect && IS_COTSORD(srv_tep) && !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { /* * Queue ordrel_ind for server to be picked up * when the connection is accepted. */ d_mp = tl_ordrel_ind_alloc(); } else { /* * send discon_ind to server */ d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); } if (d_mp == NULL) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_co_unconnect:outgoing:allocb failure")); TL_UNCONNECT(tep->te_oconp); goto discon_peer; } /* * If this is a socket the T_DISCON_IND is queued with * the T_CONN_IND. Otherwise the T_CONN_IND is removed * from the list of pending connections. * Note that when te_oconp is set the peer better have * a t_connind_t for the client. */ if (IS_SOCKET(tep) && !tl_disable_early_connect) { /* * Queue the disconnection message. */ tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); } else { tip = tl_icon_find(srv_tep, tep->te_seqno); if (tip == NULL) { freemsg(d_mp); } else { ASSERT(tep == tip->ti_tep); ASSERT(tep->te_ser == srv_tep->te_ser); /* * Delete tip from the server list. */ if (srv_tep->te_nicon == 1) { srv_tep->te_state = NEXTSTATE(TE_DISCON_IND2, srv_tep->te_state); } else { srv_tep->te_state = NEXTSTATE(TE_DISCON_IND3, srv_tep->te_state); } ASSERT(*(uint32_t *)(d_mp->b_rptr) == T_DISCON_IND); putnext(srv_tep->te_rq, d_mp); tl_freetip(srv_tep, tip); } TL_UNCONNECT(tep->te_oconp); srv_tep = NULL; } } else if (peer_tep != NULL) { /* * unconnect existing connection * If connected, change state of peer on * discon ind event and send discon ind pdu * to module above it */ ASSERT(tep->te_ser == peer_tep->te_ser); if (IS_COTSORD(peer_tep) && (peer_tep->te_state == TS_WIND_ORDREL || peer_tep->te_state == TS_DATA_XFER)) { /* * send ordrel ind */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, "tl_co_unconnect:connected: ordrel_ind state %d->%d", peer_tep->te_state, NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); d_mp = tl_ordrel_ind_alloc(); if (! d_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_co_unconnect:connected:" "allocb failure")); /* * Continue with cleaning up peer as * this side may go away with the close */ TL_QENABLE(peer_tep); goto discon_peer; } peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); putnext(peer_tep->te_rq, d_mp); /* * Handle flow control case. This will generate * a t_discon_ind message with reason 0 if there * is data queued on the write side. */ TL_QENABLE(peer_tep); } else if (IS_COTSORD(peer_tep) && peer_tep->te_state == TS_WREQ_ORDREL) { /* * Sent an ordrel_ind. We send a discon with * with error 0 to inform that the peer is gone. */ (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_co_unconnect: discon in state %d", tep->te_state)); tl_discon_ind(peer_tep, 0); } else { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_co_unconnect: state %d", tep->te_state)); tl_discon_ind(peer_tep, ECONNRESET); } discon_peer: /* * Disconnect cross-pointers only for close */ if (tep->te_closing) { peer_tep = tep->te_conp; TL_REMOVE_PEER(peer_tep->te_conp); TL_REMOVE_PEER(tep->te_conp); } } } /* * Note: The following routine does not recover from allocb() * failures * The reason should be from the space. */ static void tl_discon_ind(tl_endpt_t *tep, uint32_t reason) { mblk_t *d_mp; if (tep->te_closing) return; /* * flush the queues. */ flushq(tep->te_rq, FLUSHDATA); (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); /* * send discon ind */ d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); if (! d_mp) { (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, "tl_discon_ind:allocb failure")); return; } tep->te_state = TS_IDLE; putnext(tep->te_rq, d_mp); } /* * Note: The following routine does not recover from allocb() * failures * The reason should be from the space. */ static mblk_t * tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) { mblk_t *mp; struct T_discon_ind *tdi; if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { DB_TYPE(mp) = M_PROTO; mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); tdi = (struct T_discon_ind *)mp->b_rptr; tdi->PRIM_type = T_DISCON_IND; tdi->DISCON_reason = reason; tdi->SEQ_number = seqnum; } return (mp); } /* * Note: The following routine does not recover from allocb() * failures */ static mblk_t * tl_ordrel_ind_alloc(void) { mblk_t *mp; struct T_ordrel_ind *toi; if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { DB_TYPE(mp) = M_PROTO; mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); toi = (struct T_ordrel_ind *)mp->b_rptr; toi->PRIM_type = T_ORDREL_IND; } return (mp); } /* * Lookup the seqno in the list of queued connections. */ static tl_icon_t * tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) { list_t *l = &tep->te_iconp; tl_icon_t *tip = list_head(l); ASSERT(seqno != 0); for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) ; return (tip); } /* * Queue data for a given T_CONN_IND while verifying that redundant * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. * Used when the originator of the connection closes. */ static void tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) { tl_icon_t *tip; mblk_t **mpp, *mp; int prim, nprim; if (nmp->b_datap->db_type == M_PROTO) nprim = ((union T_primitives *)nmp->b_rptr)->type; else nprim = -1; /* M_DATA */ tip = tl_icon_find(tep, seqno); if (tip == NULL) { freemsg(nmp); return; } ASSERT(tip->ti_seqno != 0); mpp = &tip->ti_mp; while (*mpp != NULL) { mp = *mpp; if (mp->b_datap->db_type == M_PROTO) prim = ((union T_primitives *)mp->b_rptr)->type; else prim = -1; /* M_DATA */ /* * Allow nothing after a T_DISCON_IND */ if (prim == T_DISCON_IND) { freemsg(nmp); return; } /* * Only allow a T_DISCON_IND after an T_ORDREL_IND */ if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { freemsg(nmp); return; } mpp = &(mp->b_next); } *mpp = nmp; } /* * Verify if a certain TPI primitive exists on the connind queue. * Use prim -1 for M_DATA. * Return non-zero if found. */ static boolean_t tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) { tl_icon_t *tip = tl_icon_find(tep, seqno); boolean_t found = B_FALSE; if (tip != NULL) { mblk_t *mp; for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { found = (DB_TYPE(mp) == M_PROTO && ((union T_primitives *)mp->b_rptr)->type == prim); } } return (found); } /* * Send the b_next mblk chain that has accumulated before the connection * was accepted. Perform the necessary state transitions. */ static void tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) { mblk_t *mp; union T_primitives *primp; if (tep->te_closing) { tl_icon_freemsgs(mpp); return; } ASSERT(tep->te_state == TS_DATA_XFER); ASSERT(tep->te_rq->q_first == NULL); while ((mp = *mpp) != NULL) { *mpp = mp->b_next; mp->b_next = NULL; ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); switch (DB_TYPE(mp)) { default: freemsg(mp); break; case M_DATA: putnext(tep->te_rq, mp); break; case M_PROTO: primp = (union T_primitives *)mp->b_rptr; switch (primp->type) { case T_UNITDATA_IND: case T_DATA_IND: case T_OPTDATA_IND: case T_EXDATA_IND: putnext(tep->te_rq, mp); break; case T_ORDREL_IND: tep->te_state = NEXTSTATE(TE_ORDREL_IND, tep->te_state); putnext(tep->te_rq, mp); break; case T_DISCON_IND: tep->te_state = TS_IDLE; putnext(tep->te_rq, mp); break; default: #ifdef DEBUG cmn_err(CE_PANIC, "tl_icon_sendmsgs: unknown primitive"); #endif /* DEBUG */ freemsg(mp); break; } break; } } } /* * Free the b_next mblk chain that has accumulated before the connection * was accepted. */ static void tl_icon_freemsgs(mblk_t **mpp) { mblk_t *mp; while ((mp = *mpp) != NULL) { *mpp = mp->b_next; mp->b_next = NULL; freemsg(mp); } } /* * Send M_ERROR * Note: assumes caller ensured enough space in mp or enough * memory available. Does not attempt recovery from allocb() * failures */ static void tl_merror(queue_t *wq, mblk_t *mp, int error) { tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; if (tep->te_closing) { freemsg(mp); return; } (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_merror: tep=%p, err=%d", (void *)tep, error)); /* * flush all messages on queue. we are shutting * the stream down on fatal error */ flushq(wq, FLUSHALL); if (IS_COTS(tep)) { /* connection oriented - unconnect endpoints */ tl_co_unconnect(tep); } if (mp->b_cont) { freemsg(mp->b_cont); mp->b_cont = NULL; } if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { freemsg(mp); mp = allocb(1, BPRI_HI); if (!mp) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_merror:M_PROTO: out of memory")); return; } } if (mp) { DB_TYPE(mp) = M_ERROR; mp->b_rptr = DB_BASE(mp); *mp->b_rptr = (char)error; mp->b_wptr = mp->b_rptr + sizeof (char); qreply(wq, mp); } else { (void) putnextctl1(tep->te_rq, M_ERROR, error); } } static void tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) { ASSERT(cr != NULL); if (flag & TL_SETCRED) { struct opthdr *opt = (struct opthdr *)buf; tl_credopt_t *tlcred; opt->level = TL_PROT_LEVEL; opt->name = TL_OPT_PEER_CRED; opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); tlcred = (tl_credopt_t *)(opt + 1); tlcred->tc_uid = crgetuid(cr); tlcred->tc_gid = crgetgid(cr); tlcred->tc_ruid = crgetruid(cr); tlcred->tc_rgid = crgetrgid(cr); tlcred->tc_suid = crgetsuid(cr); tlcred->tc_sgid = crgetsgid(cr); tlcred->tc_ngroups = crgetngroups(cr); } else if (flag & TL_SETUCRED) { struct opthdr *opt = (struct opthdr *)buf; opt->level = TL_PROT_LEVEL; opt->name = TL_OPT_PEER_UCRED; opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); } else { struct T_opthdr *topt = (struct T_opthdr *)buf; ASSERT(flag & TL_SOCKUCRED); topt->level = SOL_SOCKET; topt->name = SCM_UCRED; topt->len = ucredminsize(cr) + sizeof (*topt); topt->status = 0; (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); } } /* ARGSUSED */ static int tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) { /* no default value processed in protocol specific code currently */ return (-1); } /* ARGSUSED */ static int tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) { int len; tl_endpt_t *tep; int *valp; tep = (tl_endpt_t *)wq->q_ptr; len = 0; /* * Assumes: option level and name sanity check done elsewhere */ switch (level) { case SOL_SOCKET: if (! IS_SOCKET(tep)) break; switch (name) { case SO_RECVUCRED: len = sizeof (int); valp = (int *)ptr; *valp = (tep->te_flag & TL_SOCKUCRED) != 0; break; default: break; } break; case TL_PROT_LEVEL: switch (name) { case TL_OPT_PEER_CRED: case TL_OPT_PEER_UCRED: /* * option not supposed to retrieved directly * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND * when some internal flags set by other options * Direct retrieval always designed to fail(ignored) * for this option. */ break; } } return (len); } /* ARGSUSED */ static int tl_set_opt( queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) { int error; tl_endpt_t *tep; tep = (tl_endpt_t *)wq->q_ptr; error = 0; /* NOERROR */ /* * Assumes: option level and name sanity checks done elsewhere */ switch (level) { case SOL_SOCKET: if (! IS_SOCKET(tep)) { error = EINVAL; break; } /* * TBD: fill in other AF_UNIX socket options and then stop * returning error. */ switch (name) { case SO_RECVUCRED: /* * We only support this for datagram sockets; * getpeerucred handles the connection oriented * transports. */ if (! IS_CLTS(tep)) { error = EINVAL; break; } if (*(int *)invalp == 0) tep->te_flag &= ~TL_SOCKUCRED; else tep->te_flag |= TL_SOCKUCRED; break; default: error = EINVAL; break; } break; case TL_PROT_LEVEL: switch (name) { case TL_OPT_PEER_CRED: case TL_OPT_PEER_UCRED: /* * option not supposed to be set directly * Its value in initialized for each endpoint at * driver open time. * Direct setting always designed to fail for this * option. */ (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_set_opt: option is not supported")); error = EPROTO; break; } } return (error); } static void tl_timer(void *arg) { queue_t *wq = arg; tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; ASSERT(tep); tep->te_timoutid = 0; enableok(wq); /* * Note: can call wsrv directly here and save context switch * Consider change when qtimeout (not timeout) is active */ qenable(wq); } static void tl_buffer(void *arg) { queue_t *wq = arg; tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; ASSERT(tep); tep->te_bufcid = 0; tep->te_nowsrv = B_FALSE; enableok(wq); /* * Note: can call wsrv directly here and save context switch * Consider change when qbufcall (not bufcall) is active */ qenable(wq); } static void tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) { tl_endpt_t *tep; tep = (tl_endpt_t *)wq->q_ptr; if (tep->te_closing) { freemsg(mp); return; } noenable(wq); (void) insq(wq, wq->q_first, mp); if (tep->te_bufcid || tep->te_timoutid) { (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, "tl_memrecover:recover %p pending", (void *)wq)); return; } if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { tep->te_timoutid = qtimeout(wq, tl_timer, wq, drv_usectohz(TL_BUFWAIT)); } } static void tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) { ASSERT(tip->ti_seqno != 0); if (tip->ti_mp != NULL) { tl_icon_freemsgs(&tip->ti_mp); tip->ti_mp = NULL; } if (tip->ti_tep != NULL) { tl_refrele(tip->ti_tep); tip->ti_tep = NULL; } list_remove(&tep->te_iconp, tip); kmem_free(tip, sizeof (tl_icon_t)); tep->te_nicon--; } /* * Remove address from address hash. */ static void tl_addr_unbind(tl_endpt_t *tep) { tl_endpt_t *elp; if (tep->te_flag & TL_ADDRHASHED) { if (IS_SOCKET(tep)) { (void) mod_hash_remove(tep->te_addrhash, (mod_hash_key_t)tep->te_vp, (mod_hash_val_t *)&elp); tep->te_vp = (void *)(uintptr_t)tep->te_minor; tep->te_magic = SOU_MAGIC_IMPLICIT; } else { (void) mod_hash_remove(tep->te_addrhash, (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t *)&elp); (void) kmem_free(tep->te_abuf, tep->te_alen); tep->te_alen = -1; tep->te_abuf = NULL; } tep->te_flag &= ~TL_ADDRHASHED; } }