xref: /titanic_51/usr/src/uts/common/inet/udp/udp.c (revision c227543f6890bd6f2054360ec1820bfef8132431)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /* Copyright (c) 1990 Mentat Inc. */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/strlog.h>
30 #include <sys/strsun.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/timod.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/strsubr.h>
37 #include <sys/suntpi.h>
38 #include <sys/xti_inet.h>
39 #include <sys/kmem.h>
40 #include <sys/cred_impl.h>
41 #include <sys/policy.h>
42 #include <sys/priv.h>
43 #include <sys/ucred.h>
44 #include <sys/zone.h>
45 
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/sockio.h>
49 #include <sys/vtrace.h>
50 #include <sys/sdt.h>
51 #include <sys/debug.h>
52 #include <sys/isa_defs.h>
53 #include <sys/random.h>
54 #include <netinet/in.h>
55 #include <netinet/ip6.h>
56 #include <netinet/icmp6.h>
57 #include <netinet/udp.h>
58 
59 #include <inet/common.h>
60 #include <inet/ip.h>
61 #include <inet/ip_impl.h>
62 #include <inet/ipsec_impl.h>
63 #include <inet/ip6.h>
64 #include <inet/ip_ire.h>
65 #include <inet/ip_if.h>
66 #include <inet/ip_multi.h>
67 #include <inet/ip_ndp.h>
68 #include <inet/proto_set.h>
69 #include <inet/mib2.h>
70 #include <inet/optcom.h>
71 #include <inet/snmpcom.h>
72 #include <inet/kstatcom.h>
73 #include <inet/ipclassifier.h>
74 #include <sys/squeue_impl.h>
75 #include <inet/ipnet.h>
76 #include <sys/ethernet.h>
77 
78 #include <sys/tsol/label.h>
79 #include <sys/tsol/tnet.h>
80 #include <rpc/pmap_prot.h>
81 
82 #include <inet/udp_impl.h>
83 
84 /*
85  * Synchronization notes:
86  *
87  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
88  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
89  * protects the contents of the udp_t. uf_lock protects the address and the
90  * fanout information.
91  * The lock order is conn_lock -> uf_lock.
92  *
93  * The fanout lock uf_lock:
94  * When a UDP endpoint is bound to a local port, it is inserted into
95  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
96  * The size of the array is controlled by the udp_bind_fanout_size variable.
97  * This variable can be changed in /etc/system if the default value is
98  * not large enough.  Each bind hash bucket is protected by a per bucket
99  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
100  * structure and a few other fields in the udp_t. A UDP endpoint is removed
101  * from the bind hash list only when it is being unbound or being closed.
102  * The per bucket lock also protects a UDP endpoint's state changes.
103  *
104  * Plumbing notes:
105  * UDP is always a device driver. For compatibility with mibopen() code
106  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
107  * dummy module.
108  *
109  * The above implies that we don't support any intermediate module to
110  * reside in between /dev/ip and udp -- in fact, we never supported such
111  * scenario in the past as the inter-layer communication semantics have
112  * always been private.
113  */
114 
115 /* For /etc/system control */
116 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
117 
118 static void	udp_addr_req(queue_t *q, mblk_t *mp);
119 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
120 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
121 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
122 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
123     const in6_addr_t *, in_port_t, uint32_t);
124 static void	udp_capability_req(queue_t *q, mblk_t *mp);
125 static int	udp_tpi_close(queue_t *q, int flags);
126 static void	udp_close_free(conn_t *);
127 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
128 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
129 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
130     int sys_error);
131 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
132     t_scalar_t tlierr, int sys_error);
133 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
134 		    cred_t *cr);
135 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
136 		    char *value, caddr_t cp, cred_t *cr);
137 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
138 		    char *value, caddr_t cp, cred_t *cr);
139 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
140 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
141     ip_recv_attr_t *ira);
142 static void	udp_info_req(queue_t *q, mblk_t *mp);
143 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
144 static void	udp_lrput(queue_t *, mblk_t *);
145 static void	udp_lwput(queue_t *, mblk_t *);
146 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
147 		    cred_t *credp, boolean_t isv6);
148 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
149 		    cred_t *credp);
150 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
151 		    cred_t *credp);
152 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
153 int		udp_opt_set(conn_t *connp, uint_t optset_context,
154 		    int level, int name, uint_t inlen,
155 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
156 		    void *thisdg_attrs, cred_t *cr);
157 int		udp_opt_get(conn_t *connp, int level, int name,
158 		    uchar_t *ptr);
159 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
160 		    pid_t pid);
161 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
162     pid_t pid, ip_xmit_attr_t *ixa);
163 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
164 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
165 		    ip_xmit_attr_t *ixa);
166 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
167     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
168     int *);
169 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
170     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
171 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
172 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
173 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
174 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
175     boolean_t random);
176 static void	udp_wput_other(queue_t *q, mblk_t *mp);
177 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
178 static void	udp_wput_fallback(queue_t *q, mblk_t *mp);
179 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
180 
181 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
182 static void	udp_stack_fini(netstackid_t stackid, void *arg);
183 
184 static void	*udp_kstat_init(netstackid_t stackid);
185 static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
186 static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
187 static void	udp_kstat2_fini(netstackid_t, kstat_t *);
188 static int	udp_kstat_update(kstat_t *kp, int rw);
189 
190 
191 /* Common routines for TPI and socket module */
192 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
193 
194 /* Common routine for TPI and socket module */
195 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
196 static void	udp_do_close(conn_t *);
197 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
198     boolean_t);
199 static int	udp_do_unbind(conn_t *);
200 
201 int		udp_getsockname(sock_lower_handle_t,
202     struct sockaddr *, socklen_t *, cred_t *);
203 int		udp_getpeername(sock_lower_handle_t,
204     struct sockaddr *, socklen_t *, cred_t *);
205 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
206     cred_t *, pid_t);
207 
208 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
209 
210 /*
211  * Checks if the given destination addr/port is allowed out.
212  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
213  * Called for each connect() and for sendto()/sendmsg() to a different
214  * destination.
215  * For connect(), called in udp_connect().
216  * For sendto()/sendmsg(), called in udp_output_newdst().
217  *
218  * This macro assumes that the cl_inet_connect2 hook is not NULL.
219  * Please check this before calling this macro.
220  *
221  * void
222  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
223  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
224  */
225 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
226 	(err) = 0;							\
227 	/*								\
228 	 * Running in cluster mode - check and register active		\
229 	 * "connection" information					\
230 	 */								\
231 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
232 		(err) = (*cl_inet_connect2)(				\
233 		    (cp)->conn_netstack->netstack_stackid,		\
234 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
235 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
236 		    (cp)->conn_lport,					\
237 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
238 		    (in_port_t)(fport), NULL);				\
239 	else								\
240 		(err) = (*cl_inet_connect2)(				\
241 		    (cp)->conn_netstack->netstack_stackid,		\
242 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
243 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
244 		    (cp)->conn_lport,					\
245 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
246 }
247 
248 static struct module_info udp_mod_info =  {
249 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
250 };
251 
252 /*
253  * Entry points for UDP as a device.
254  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
255  */
256 static struct qinit udp_rinitv4 = {
257 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
258 };
259 
260 static struct qinit udp_rinitv6 = {
261 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
262 };
263 
264 static struct qinit udp_winit = {
265 	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
266 };
267 
268 /* UDP entry point during fallback */
269 struct qinit udp_fallback_sock_winit = {
270 	(pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
271 };
272 
273 /*
274  * UDP needs to handle I_LINK and I_PLINK since ifconfig
275  * likes to use it as a place to hang the various streams.
276  */
277 static struct qinit udp_lrinit = {
278 	(pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
279 };
280 
281 static struct qinit udp_lwinit = {
282 	(pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
283 };
284 
285 /* For AF_INET aka /dev/udp */
286 struct streamtab udpinfov4 = {
287 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
288 };
289 
290 /* For AF_INET6 aka /dev/udp6 */
291 struct streamtab udpinfov6 = {
292 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
293 };
294 
295 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
296 
297 /* Default structure copied into T_INFO_ACK messages */
298 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
299 	T_INFO_ACK,
300 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
301 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
302 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
303 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
304 	sizeof (sin_t),	/* ADDR_size. */
305 	0,		/* OPT_size - not initialized here */
306 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
307 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
308 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
309 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
310 };
311 
312 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
313 
314 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
315 	T_INFO_ACK,
316 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
317 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
318 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
319 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
320 	sizeof (sin6_t), /* ADDR_size. */
321 	0,		/* OPT_size - not initialized here */
322 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
323 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
324 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
325 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
326 };
327 
328 /*
329  * UDP tunables related declarations. Definitions are in udp_tunables.c
330  */
331 extern mod_prop_info_t udp_propinfo_tbl[];
332 extern int udp_propinfo_count;
333 
334 /* Setable in /etc/system */
335 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
336 uint32_t udp_random_anon_port = 1;
337 
338 /*
339  * Hook functions to enable cluster networking.
340  * On non-clustered systems these vectors must always be NULL
341  */
342 
343 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
344     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
345     void *args) = NULL;
346 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
347     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
348     void *args) = NULL;
349 
350 typedef union T_primitives *t_primp_t;
351 
352 /*
353  * Return the next anonymous port in the privileged port range for
354  * bind checking.
355  *
356  * Trusted Extension (TX) notes: TX allows administrator to mark or
357  * reserve ports as Multilevel ports (MLP). MLP has special function
358  * on TX systems. Once a port is made MLP, it's not available as
359  * ordinary port. This creates "holes" in the port name space. It
360  * may be necessary to skip the "holes" find a suitable anon port.
361  */
362 static in_port_t
363 udp_get_next_priv_port(udp_t *udp)
364 {
365 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
366 	in_port_t nextport;
367 	boolean_t restart = B_FALSE;
368 	udp_stack_t *us = udp->udp_us;
369 
370 retry:
371 	if (next_priv_port < us->us_min_anonpriv_port ||
372 	    next_priv_port >= IPPORT_RESERVED) {
373 		next_priv_port = IPPORT_RESERVED - 1;
374 		if (restart)
375 			return (0);
376 		restart = B_TRUE;
377 	}
378 
379 	if (is_system_labeled() &&
380 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
381 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
382 		next_priv_port = nextport;
383 		goto retry;
384 	}
385 
386 	return (next_priv_port--);
387 }
388 
389 /*
390  * Hash list removal routine for udp_t structures.
391  */
392 static void
393 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
394 {
395 	udp_t		*udpnext;
396 	kmutex_t	*lockp;
397 	udp_stack_t	*us = udp->udp_us;
398 	conn_t		*connp = udp->udp_connp;
399 
400 	if (udp->udp_ptpbhn == NULL)
401 		return;
402 
403 	/*
404 	 * Extract the lock pointer in case there are concurrent
405 	 * hash_remove's for this instance.
406 	 */
407 	ASSERT(connp->conn_lport != 0);
408 	if (!caller_holds_lock) {
409 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
410 		    us->us_bind_fanout_size)].uf_lock;
411 		ASSERT(lockp != NULL);
412 		mutex_enter(lockp);
413 	}
414 	if (udp->udp_ptpbhn != NULL) {
415 		udpnext = udp->udp_bind_hash;
416 		if (udpnext != NULL) {
417 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
418 			udp->udp_bind_hash = NULL;
419 		}
420 		*udp->udp_ptpbhn = udpnext;
421 		udp->udp_ptpbhn = NULL;
422 	}
423 	if (!caller_holds_lock) {
424 		mutex_exit(lockp);
425 	}
426 }
427 
428 static void
429 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
430 {
431 	conn_t	*connp = udp->udp_connp;
432 	udp_t	**udpp;
433 	udp_t	*udpnext;
434 	conn_t	*connext;
435 
436 	ASSERT(MUTEX_HELD(&uf->uf_lock));
437 	ASSERT(udp->udp_ptpbhn == NULL);
438 	udpp = &uf->uf_udp;
439 	udpnext = udpp[0];
440 	if (udpnext != NULL) {
441 		/*
442 		 * If the new udp bound to the INADDR_ANY address
443 		 * and the first one in the list is not bound to
444 		 * INADDR_ANY we skip all entries until we find the
445 		 * first one bound to INADDR_ANY.
446 		 * This makes sure that applications binding to a
447 		 * specific address get preference over those binding to
448 		 * INADDR_ANY.
449 		 */
450 		connext = udpnext->udp_connp;
451 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
452 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
453 			while ((udpnext = udpp[0]) != NULL &&
454 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
455 				udpp = &(udpnext->udp_bind_hash);
456 			}
457 			if (udpnext != NULL)
458 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
459 		} else {
460 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
461 		}
462 	}
463 	udp->udp_bind_hash = udpnext;
464 	udp->udp_ptpbhn = udpp;
465 	udpp[0] = udp;
466 }
467 
468 /*
469  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
470  * passed to udp_wput.
471  * It associates a port number and local address with the stream.
472  * It calls IP to verify the local IP address, and calls IP to insert
473  * the conn_t in the fanout table.
474  * If everything is ok it then sends the T_BIND_ACK back up.
475  *
476  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
477  * without setting SO_REUSEADDR. This is needed so that they
478  * can be viewed as two independent transport protocols.
479  * However, anonymouns ports are allocated from the same range to avoid
480  * duplicating the us->us_next_port_to_try.
481  */
482 static void
483 udp_tpi_bind(queue_t *q, mblk_t *mp)
484 {
485 	sin_t		*sin;
486 	sin6_t		*sin6;
487 	mblk_t		*mp1;
488 	struct T_bind_req *tbr;
489 	conn_t		*connp;
490 	udp_t		*udp;
491 	int		error;
492 	struct sockaddr	*sa;
493 	cred_t		*cr;
494 
495 	/*
496 	 * All Solaris components should pass a db_credp
497 	 * for this TPI message, hence we ASSERT.
498 	 * But in case there is some other M_PROTO that looks
499 	 * like a TPI message sent by some other kernel
500 	 * component, we check and return an error.
501 	 */
502 	cr = msg_getcred(mp, NULL);
503 	ASSERT(cr != NULL);
504 	if (cr == NULL) {
505 		udp_err_ack(q, mp, TSYSERR, EINVAL);
506 		return;
507 	}
508 
509 	connp = Q_TO_CONN(q);
510 	udp = connp->conn_udp;
511 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
512 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
513 		    "udp_bind: bad req, len %u",
514 		    (uint_t)(mp->b_wptr - mp->b_rptr));
515 		udp_err_ack(q, mp, TPROTO, 0);
516 		return;
517 	}
518 	if (udp->udp_state != TS_UNBND) {
519 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
520 		    "udp_bind: bad state, %u", udp->udp_state);
521 		udp_err_ack(q, mp, TOUTSTATE, 0);
522 		return;
523 	}
524 	/*
525 	 * Reallocate the message to make sure we have enough room for an
526 	 * address.
527 	 */
528 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
529 	if (mp1 == NULL) {
530 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
531 		return;
532 	}
533 
534 	mp = mp1;
535 
536 	/* Reset the message type in preparation for shipping it back. */
537 	DB_TYPE(mp) = M_PCPROTO;
538 
539 	tbr = (struct T_bind_req *)mp->b_rptr;
540 	switch (tbr->ADDR_length) {
541 	case 0:			/* Request for a generic port */
542 		tbr->ADDR_offset = sizeof (struct T_bind_req);
543 		if (connp->conn_family == AF_INET) {
544 			tbr->ADDR_length = sizeof (sin_t);
545 			sin = (sin_t *)&tbr[1];
546 			*sin = sin_null;
547 			sin->sin_family = AF_INET;
548 			mp->b_wptr = (uchar_t *)&sin[1];
549 			sa = (struct sockaddr *)sin;
550 		} else {
551 			ASSERT(connp->conn_family == AF_INET6);
552 			tbr->ADDR_length = sizeof (sin6_t);
553 			sin6 = (sin6_t *)&tbr[1];
554 			*sin6 = sin6_null;
555 			sin6->sin6_family = AF_INET6;
556 			mp->b_wptr = (uchar_t *)&sin6[1];
557 			sa = (struct sockaddr *)sin6;
558 		}
559 		break;
560 
561 	case sizeof (sin_t):	/* Complete IPv4 address */
562 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
563 		    sizeof (sin_t));
564 		if (sa == NULL || !OK_32PTR((char *)sa)) {
565 			udp_err_ack(q, mp, TSYSERR, EINVAL);
566 			return;
567 		}
568 		if (connp->conn_family != AF_INET ||
569 		    sa->sa_family != AF_INET) {
570 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
571 			return;
572 		}
573 		break;
574 
575 	case sizeof (sin6_t):	/* complete IPv6 address */
576 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
577 		    sizeof (sin6_t));
578 		if (sa == NULL || !OK_32PTR((char *)sa)) {
579 			udp_err_ack(q, mp, TSYSERR, EINVAL);
580 			return;
581 		}
582 		if (connp->conn_family != AF_INET6 ||
583 		    sa->sa_family != AF_INET6) {
584 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
585 			return;
586 		}
587 		break;
588 
589 	default:		/* Invalid request */
590 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
591 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
592 		udp_err_ack(q, mp, TBADADDR, 0);
593 		return;
594 	}
595 
596 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
597 	    tbr->PRIM_type != O_T_BIND_REQ);
598 
599 	if (error != 0) {
600 		if (error > 0) {
601 			udp_err_ack(q, mp, TSYSERR, error);
602 		} else {
603 			udp_err_ack(q, mp, -error, 0);
604 		}
605 	} else {
606 		tbr->PRIM_type = T_BIND_ACK;
607 		qreply(q, mp);
608 	}
609 }
610 
611 /*
612  * This routine handles each T_CONN_REQ message passed to udp.  It
613  * associates a default destination address with the stream.
614  *
615  * After various error checks are completed, udp_connect() lays
616  * the target address and port into the composite header template.
617  * Then we ask IP for information, including a source address if we didn't
618  * already have one. Finally we send up the T_OK_ACK reply message.
619  */
620 static void
621 udp_tpi_connect(queue_t *q, mblk_t *mp)
622 {
623 	conn_t	*connp = Q_TO_CONN(q);
624 	int	error;
625 	socklen_t	len;
626 	struct sockaddr		*sa;
627 	struct T_conn_req	*tcr;
628 	cred_t		*cr;
629 	pid_t		pid;
630 	/*
631 	 * All Solaris components should pass a db_credp
632 	 * for this TPI message, hence we ASSERT.
633 	 * But in case there is some other M_PROTO that looks
634 	 * like a TPI message sent by some other kernel
635 	 * component, we check and return an error.
636 	 */
637 	cr = msg_getcred(mp, &pid);
638 	ASSERT(cr != NULL);
639 	if (cr == NULL) {
640 		udp_err_ack(q, mp, TSYSERR, EINVAL);
641 		return;
642 	}
643 
644 	tcr = (struct T_conn_req *)mp->b_rptr;
645 
646 	/* A bit of sanity checking */
647 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
648 		udp_err_ack(q, mp, TPROTO, 0);
649 		return;
650 	}
651 
652 	if (tcr->OPT_length != 0) {
653 		udp_err_ack(q, mp, TBADOPT, 0);
654 		return;
655 	}
656 
657 	/*
658 	 * Determine packet type based on type of address passed in
659 	 * the request should contain an IPv4 or IPv6 address.
660 	 * Make sure that address family matches the type of
661 	 * family of the address passed down.
662 	 */
663 	len = tcr->DEST_length;
664 	switch (tcr->DEST_length) {
665 	default:
666 		udp_err_ack(q, mp, TBADADDR, 0);
667 		return;
668 
669 	case sizeof (sin_t):
670 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
671 		    sizeof (sin_t));
672 		break;
673 
674 	case sizeof (sin6_t):
675 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
676 		    sizeof (sin6_t));
677 		break;
678 	}
679 
680 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
681 	if (error != 0) {
682 		udp_err_ack(q, mp, TSYSERR, error);
683 		return;
684 	}
685 
686 	error = udp_do_connect(connp, sa, len, cr, pid);
687 	if (error != 0) {
688 		if (error < 0)
689 			udp_err_ack(q, mp, -error, 0);
690 		else
691 			udp_err_ack(q, mp, TSYSERR, error);
692 	} else {
693 		mblk_t	*mp1;
694 		/*
695 		 * We have to send a connection confirmation to
696 		 * keep TLI happy.
697 		 */
698 		if (connp->conn_family == AF_INET) {
699 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
700 			    sizeof (sin_t), NULL, 0);
701 		} else {
702 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
703 			    sizeof (sin6_t), NULL, 0);
704 		}
705 		if (mp1 == NULL) {
706 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
707 			return;
708 		}
709 
710 		/*
711 		 * Send ok_ack for T_CONN_REQ
712 		 */
713 		mp = mi_tpi_ok_ack_alloc(mp);
714 		if (mp == NULL) {
715 			/* Unable to reuse the T_CONN_REQ for the ack. */
716 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
717 			return;
718 		}
719 
720 		putnext(connp->conn_rq, mp);
721 		putnext(connp->conn_rq, mp1);
722 	}
723 }
724 
725 static int
726 udp_tpi_close(queue_t *q, int flags)
727 {
728 	conn_t	*connp;
729 
730 	if (flags & SO_FALLBACK) {
731 		/*
732 		 * stream is being closed while in fallback
733 		 * simply free the resources that were allocated
734 		 */
735 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
736 		qprocsoff(q);
737 		goto done;
738 	}
739 
740 	connp = Q_TO_CONN(q);
741 	udp_do_close(connp);
742 done:
743 	q->q_ptr = WR(q)->q_ptr = NULL;
744 	return (0);
745 }
746 
747 static void
748 udp_close_free(conn_t *connp)
749 {
750 	udp_t *udp = connp->conn_udp;
751 
752 	/* If there are any options associated with the stream, free them. */
753 	if (udp->udp_recv_ipp.ipp_fields != 0)
754 		ip_pkt_free(&udp->udp_recv_ipp);
755 
756 	/*
757 	 * Clear any fields which the kmem_cache constructor clears.
758 	 * Only udp_connp needs to be preserved.
759 	 * TBD: We should make this more efficient to avoid clearing
760 	 * everything.
761 	 */
762 	ASSERT(udp->udp_connp == connp);
763 	bzero(udp, sizeof (udp_t));
764 	udp->udp_connp = connp;
765 }
766 
767 static int
768 udp_do_disconnect(conn_t *connp)
769 {
770 	udp_t	*udp;
771 	udp_fanout_t *udpf;
772 	udp_stack_t *us;
773 	int	error;
774 
775 	udp = connp->conn_udp;
776 	us = udp->udp_us;
777 	mutex_enter(&connp->conn_lock);
778 	if (udp->udp_state != TS_DATA_XFER) {
779 		mutex_exit(&connp->conn_lock);
780 		return (-TOUTSTATE);
781 	}
782 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
783 	    us->us_bind_fanout_size)];
784 	mutex_enter(&udpf->uf_lock);
785 	if (connp->conn_mcbc_bind)
786 		connp->conn_saddr_v6 = ipv6_all_zeros;
787 	else
788 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
789 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
790 	connp->conn_faddr_v6 = ipv6_all_zeros;
791 	connp->conn_fport = 0;
792 	udp->udp_state = TS_IDLE;
793 	mutex_exit(&udpf->uf_lock);
794 
795 	/* Remove any remnants of mapped address binding */
796 	if (connp->conn_family == AF_INET6)
797 		connp->conn_ipversion = IPV6_VERSION;
798 
799 	connp->conn_v6lastdst = ipv6_all_zeros;
800 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
801 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
802 	mutex_exit(&connp->conn_lock);
803 	if (error != 0)
804 		return (error);
805 
806 	/*
807 	 * Tell IP to remove the full binding and revert
808 	 * to the local address binding.
809 	 */
810 	return (ip_laddr_fanout_insert(connp));
811 }
812 
813 static void
814 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
815 {
816 	conn_t	*connp = Q_TO_CONN(q);
817 	int	error;
818 
819 	/*
820 	 * Allocate the largest primitive we need to send back
821 	 * T_error_ack is > than T_ok_ack
822 	 */
823 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
824 	if (mp == NULL) {
825 		/* Unable to reuse the T_DISCON_REQ for the ack. */
826 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
827 		return;
828 	}
829 
830 	error = udp_do_disconnect(connp);
831 
832 	if (error != 0) {
833 		if (error < 0) {
834 			udp_err_ack(q, mp, -error, 0);
835 		} else {
836 			udp_err_ack(q, mp, TSYSERR, error);
837 		}
838 	} else {
839 		mp = mi_tpi_ok_ack_alloc(mp);
840 		ASSERT(mp != NULL);
841 		qreply(q, mp);
842 	}
843 }
844 
845 int
846 udp_disconnect(conn_t *connp)
847 {
848 	int error;
849 
850 	connp->conn_dgram_errind = B_FALSE;
851 	error = udp_do_disconnect(connp);
852 	if (error < 0)
853 		error = proto_tlitosyserr(-error);
854 
855 	return (error);
856 }
857 
858 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
859 static void
860 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
861 {
862 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
863 		qreply(q, mp);
864 }
865 
866 /* Shorthand to generate and send TPI error acks to our client */
867 static void
868 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
869     t_scalar_t t_error, int sys_error)
870 {
871 	struct T_error_ack	*teackp;
872 
873 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
874 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
875 		teackp = (struct T_error_ack *)mp->b_rptr;
876 		teackp->ERROR_prim = primitive;
877 		teackp->TLI_error = t_error;
878 		teackp->UNIX_error = sys_error;
879 		qreply(q, mp);
880 	}
881 }
882 
883 /* At minimum we need 4 bytes of UDP header */
884 #define	ICMP_MIN_UDP_HDR	4
885 
886 /*
887  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
888  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
889  * Assumes that IP has pulled up everything up to and including the ICMP header.
890  */
891 /* ARGSUSED2 */
892 static void
893 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
894 {
895 	conn_t		*connp = (conn_t *)arg1;
896 	icmph_t		*icmph;
897 	ipha_t		*ipha;
898 	int		iph_hdr_length;
899 	udpha_t		*udpha;
900 	sin_t		sin;
901 	sin6_t		sin6;
902 	mblk_t		*mp1;
903 	int		error = 0;
904 	udp_t		*udp = connp->conn_udp;
905 
906 	ipha = (ipha_t *)mp->b_rptr;
907 
908 	ASSERT(OK_32PTR(mp->b_rptr));
909 
910 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
911 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
912 		udp_icmp_error_ipv6(connp, mp, ira);
913 		return;
914 	}
915 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
916 
917 	/* Skip past the outer IP and ICMP headers */
918 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
919 	iph_hdr_length = ira->ira_ip_hdr_length;
920 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
921 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
922 
923 	/* Skip past the inner IP and find the ULP header */
924 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
925 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
926 
927 	switch (icmph->icmph_type) {
928 	case ICMP_DEST_UNREACHABLE:
929 		switch (icmph->icmph_code) {
930 		case ICMP_FRAGMENTATION_NEEDED: {
931 			ipha_t		*ipha;
932 			ip_xmit_attr_t	*ixa;
933 			/*
934 			 * IP has already adjusted the path MTU.
935 			 * But we need to adjust DF for IPv4.
936 			 */
937 			if (connp->conn_ipversion != IPV4_VERSION)
938 				break;
939 
940 			ixa = conn_get_ixa(connp, B_FALSE);
941 			if (ixa == NULL || ixa->ixa_ire == NULL) {
942 				/*
943 				 * Some other thread holds conn_ixa. We will
944 				 * redo this on the next ICMP too big.
945 				 */
946 				if (ixa != NULL)
947 					ixa_refrele(ixa);
948 				break;
949 			}
950 			(void) ip_get_pmtu(ixa);
951 
952 			mutex_enter(&connp->conn_lock);
953 			ipha = (ipha_t *)connp->conn_ht_iphc;
954 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
955 				ipha->ipha_fragment_offset_and_flags |=
956 				    IPH_DF_HTONS;
957 			} else {
958 				ipha->ipha_fragment_offset_and_flags &=
959 				    ~IPH_DF_HTONS;
960 			}
961 			mutex_exit(&connp->conn_lock);
962 			ixa_refrele(ixa);
963 			break;
964 		}
965 		case ICMP_PORT_UNREACHABLE:
966 		case ICMP_PROTOCOL_UNREACHABLE:
967 			error = ECONNREFUSED;
968 			break;
969 		default:
970 			/* Transient errors */
971 			break;
972 		}
973 		break;
974 	default:
975 		/* Transient errors */
976 		break;
977 	}
978 	if (error == 0) {
979 		freemsg(mp);
980 		return;
981 	}
982 
983 	/*
984 	 * Deliver T_UDERROR_IND when the application has asked for it.
985 	 * The socket layer enables this automatically when connected.
986 	 */
987 	if (!connp->conn_dgram_errind) {
988 		freemsg(mp);
989 		return;
990 	}
991 
992 	switch (connp->conn_family) {
993 	case AF_INET:
994 		sin = sin_null;
995 		sin.sin_family = AF_INET;
996 		sin.sin_addr.s_addr = ipha->ipha_dst;
997 		sin.sin_port = udpha->uha_dst_port;
998 		if (IPCL_IS_NONSTR(connp)) {
999 			mutex_enter(&connp->conn_lock);
1000 			if (udp->udp_state == TS_DATA_XFER) {
1001 				if (sin.sin_port == connp->conn_fport &&
1002 				    sin.sin_addr.s_addr ==
1003 				    connp->conn_faddr_v4) {
1004 					mutex_exit(&connp->conn_lock);
1005 					(*connp->conn_upcalls->su_set_error)
1006 					    (connp->conn_upper_handle, error);
1007 					goto done;
1008 				}
1009 			} else {
1010 				udp->udp_delayed_error = error;
1011 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1012 			}
1013 			mutex_exit(&connp->conn_lock);
1014 		} else {
1015 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1016 			    NULL, 0, error);
1017 			if (mp1 != NULL)
1018 				putnext(connp->conn_rq, mp1);
1019 		}
1020 		break;
1021 	case AF_INET6:
1022 		sin6 = sin6_null;
1023 		sin6.sin6_family = AF_INET6;
1024 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1025 		sin6.sin6_port = udpha->uha_dst_port;
1026 		if (IPCL_IS_NONSTR(connp)) {
1027 			mutex_enter(&connp->conn_lock);
1028 			if (udp->udp_state == TS_DATA_XFER) {
1029 				if (sin6.sin6_port == connp->conn_fport &&
1030 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1031 				    &connp->conn_faddr_v6)) {
1032 					mutex_exit(&connp->conn_lock);
1033 					(*connp->conn_upcalls->su_set_error)
1034 					    (connp->conn_upper_handle, error);
1035 					goto done;
1036 				}
1037 			} else {
1038 				udp->udp_delayed_error = error;
1039 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1040 			}
1041 			mutex_exit(&connp->conn_lock);
1042 		} else {
1043 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1044 			    NULL, 0, error);
1045 			if (mp1 != NULL)
1046 				putnext(connp->conn_rq, mp1);
1047 		}
1048 		break;
1049 	}
1050 done:
1051 	freemsg(mp);
1052 }
1053 
1054 /*
1055  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1056  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1057  * Assumes that IP has pulled up all the extension headers as well as the
1058  * ICMPv6 header.
1059  */
1060 static void
1061 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1062 {
1063 	icmp6_t		*icmp6;
1064 	ip6_t		*ip6h, *outer_ip6h;
1065 	uint16_t	iph_hdr_length;
1066 	uint8_t		*nexthdrp;
1067 	udpha_t		*udpha;
1068 	sin6_t		sin6;
1069 	mblk_t		*mp1;
1070 	int		error = 0;
1071 	udp_t		*udp = connp->conn_udp;
1072 	udp_stack_t	*us = udp->udp_us;
1073 
1074 	outer_ip6h = (ip6_t *)mp->b_rptr;
1075 #ifdef DEBUG
1076 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1077 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1078 	else
1079 		iph_hdr_length = IPV6_HDR_LEN;
1080 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1081 #endif
1082 	/* Skip past the outer IP and ICMP headers */
1083 	iph_hdr_length = ira->ira_ip_hdr_length;
1084 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1085 
1086 	/* Skip past the inner IP and find the ULP header */
1087 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
1088 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1089 		freemsg(mp);
1090 		return;
1091 	}
1092 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1093 
1094 	switch (icmp6->icmp6_type) {
1095 	case ICMP6_DST_UNREACH:
1096 		switch (icmp6->icmp6_code) {
1097 		case ICMP6_DST_UNREACH_NOPORT:
1098 			error = ECONNREFUSED;
1099 			break;
1100 		case ICMP6_DST_UNREACH_ADMIN:
1101 		case ICMP6_DST_UNREACH_NOROUTE:
1102 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1103 		case ICMP6_DST_UNREACH_ADDR:
1104 			/* Transient errors */
1105 			break;
1106 		default:
1107 			break;
1108 		}
1109 		break;
1110 	case ICMP6_PACKET_TOO_BIG: {
1111 		struct T_unitdata_ind	*tudi;
1112 		struct T_opthdr		*toh;
1113 		size_t			udi_size;
1114 		mblk_t			*newmp;
1115 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1116 		    sizeof (struct ip6_mtuinfo);
1117 		sin6_t			*sin6;
1118 		struct ip6_mtuinfo	*mtuinfo;
1119 
1120 		/*
1121 		 * If the application has requested to receive path mtu
1122 		 * information, send up an empty message containing an
1123 		 * IPV6_PATHMTU ancillary data item.
1124 		 */
1125 		if (!connp->conn_ipv6_recvpathmtu)
1126 			break;
1127 
1128 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1129 		    opt_length;
1130 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1131 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
1132 			break;
1133 		}
1134 
1135 		/*
1136 		 * newmp->b_cont is left to NULL on purpose.  This is an
1137 		 * empty message containing only ancillary data.
1138 		 */
1139 		newmp->b_datap->db_type = M_PROTO;
1140 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1141 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1142 		tudi->PRIM_type = T_UNITDATA_IND;
1143 		tudi->SRC_length = sizeof (sin6_t);
1144 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1145 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1146 		tudi->OPT_length = opt_length;
1147 
1148 		sin6 = (sin6_t *)&tudi[1];
1149 		bzero(sin6, sizeof (sin6_t));
1150 		sin6->sin6_family = AF_INET6;
1151 		sin6->sin6_addr = connp->conn_faddr_v6;
1152 
1153 		toh = (struct T_opthdr *)&sin6[1];
1154 		toh->level = IPPROTO_IPV6;
1155 		toh->name = IPV6_PATHMTU;
1156 		toh->len = opt_length;
1157 		toh->status = 0;
1158 
1159 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1160 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1161 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1162 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1163 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1164 		/*
1165 		 * We've consumed everything we need from the original
1166 		 * message.  Free it, then send our empty message.
1167 		 */
1168 		freemsg(mp);
1169 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1170 		return;
1171 	}
1172 	case ICMP6_TIME_EXCEEDED:
1173 		/* Transient errors */
1174 		break;
1175 	case ICMP6_PARAM_PROB:
1176 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1177 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1178 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1179 		    (uchar_t *)nexthdrp) {
1180 			error = ECONNREFUSED;
1181 			break;
1182 		}
1183 		break;
1184 	}
1185 	if (error == 0) {
1186 		freemsg(mp);
1187 		return;
1188 	}
1189 
1190 	/*
1191 	 * Deliver T_UDERROR_IND when the application has asked for it.
1192 	 * The socket layer enables this automatically when connected.
1193 	 */
1194 	if (!connp->conn_dgram_errind) {
1195 		freemsg(mp);
1196 		return;
1197 	}
1198 
1199 	sin6 = sin6_null;
1200 	sin6.sin6_family = AF_INET6;
1201 	sin6.sin6_addr = ip6h->ip6_dst;
1202 	sin6.sin6_port = udpha->uha_dst_port;
1203 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1204 
1205 	if (IPCL_IS_NONSTR(connp)) {
1206 		mutex_enter(&connp->conn_lock);
1207 		if (udp->udp_state == TS_DATA_XFER) {
1208 			if (sin6.sin6_port == connp->conn_fport &&
1209 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1210 			    &connp->conn_faddr_v6)) {
1211 				mutex_exit(&connp->conn_lock);
1212 				(*connp->conn_upcalls->su_set_error)
1213 				    (connp->conn_upper_handle, error);
1214 				goto done;
1215 			}
1216 		} else {
1217 			udp->udp_delayed_error = error;
1218 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1219 		}
1220 		mutex_exit(&connp->conn_lock);
1221 	} else {
1222 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1223 		    NULL, 0, error);
1224 		if (mp1 != NULL)
1225 			putnext(connp->conn_rq, mp1);
1226 	}
1227 done:
1228 	freemsg(mp);
1229 }
1230 
1231 /*
1232  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1233  * The local address is filled in if endpoint is bound. The remote address
1234  * is filled in if remote address has been precified ("connected endpoint")
1235  * (The concept of connected CLTS sockets is alien to published TPI
1236  *  but we support it anyway).
1237  */
1238 static void
1239 udp_addr_req(queue_t *q, mblk_t *mp)
1240 {
1241 	struct sockaddr *sa;
1242 	mblk_t	*ackmp;
1243 	struct T_addr_ack *taa;
1244 	udp_t	*udp = Q_TO_UDP(q);
1245 	conn_t	*connp = udp->udp_connp;
1246 	uint_t	addrlen;
1247 
1248 	/* Make it large enough for worst case */
1249 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1250 	    2 * sizeof (sin6_t), 1);
1251 	if (ackmp == NULL) {
1252 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1253 		return;
1254 	}
1255 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1256 
1257 	bzero(taa, sizeof (struct T_addr_ack));
1258 	ackmp->b_wptr = (uchar_t *)&taa[1];
1259 
1260 	taa->PRIM_type = T_ADDR_ACK;
1261 	ackmp->b_datap->db_type = M_PCPROTO;
1262 
1263 	if (connp->conn_family == AF_INET)
1264 		addrlen = sizeof (sin_t);
1265 	else
1266 		addrlen = sizeof (sin6_t);
1267 
1268 	mutex_enter(&connp->conn_lock);
1269 	/*
1270 	 * Note: Following code assumes 32 bit alignment of basic
1271 	 * data structures like sin_t and struct T_addr_ack.
1272 	 */
1273 	if (udp->udp_state != TS_UNBND) {
1274 		/*
1275 		 * Fill in local address first
1276 		 */
1277 		taa->LOCADDR_offset = sizeof (*taa);
1278 		taa->LOCADDR_length = addrlen;
1279 		sa = (struct sockaddr *)&taa[1];
1280 		(void) conn_getsockname(connp, sa, &addrlen);
1281 		ackmp->b_wptr += addrlen;
1282 	}
1283 	if (udp->udp_state == TS_DATA_XFER) {
1284 		/*
1285 		 * connected, fill remote address too
1286 		 */
1287 		taa->REMADDR_length = addrlen;
1288 		/* assumed 32-bit alignment */
1289 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1290 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1291 		(void) conn_getpeername(connp, sa, &addrlen);
1292 		ackmp->b_wptr += addrlen;
1293 	}
1294 	mutex_exit(&connp->conn_lock);
1295 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1296 	qreply(q, ackmp);
1297 }
1298 
1299 static void
1300 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1301 {
1302 	conn_t		*connp = udp->udp_connp;
1303 
1304 	if (connp->conn_family == AF_INET) {
1305 		*tap = udp_g_t_info_ack_ipv4;
1306 	} else {
1307 		*tap = udp_g_t_info_ack_ipv6;
1308 	}
1309 	tap->CURRENT_state = udp->udp_state;
1310 	tap->OPT_size = udp_max_optsize;
1311 }
1312 
1313 static void
1314 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1315     t_uscalar_t cap_bits1)
1316 {
1317 	tcap->CAP_bits1 = 0;
1318 
1319 	if (cap_bits1 & TC1_INFO) {
1320 		udp_copy_info(&tcap->INFO_ack, udp);
1321 		tcap->CAP_bits1 |= TC1_INFO;
1322 	}
1323 }
1324 
1325 /*
1326  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1327  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1328  * udp_g_t_info_ack.  The current state of the stream is copied from
1329  * udp_state.
1330  */
1331 static void
1332 udp_capability_req(queue_t *q, mblk_t *mp)
1333 {
1334 	t_uscalar_t		cap_bits1;
1335 	struct T_capability_ack	*tcap;
1336 	udp_t	*udp = Q_TO_UDP(q);
1337 
1338 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1339 
1340 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1341 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1342 	if (!mp)
1343 		return;
1344 
1345 	tcap = (struct T_capability_ack *)mp->b_rptr;
1346 	udp_do_capability_ack(udp, tcap, cap_bits1);
1347 
1348 	qreply(q, mp);
1349 }
1350 
1351 /*
1352  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1353  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1354  * The current state of the stream is copied from udp_state.
1355  */
1356 static void
1357 udp_info_req(queue_t *q, mblk_t *mp)
1358 {
1359 	udp_t *udp = Q_TO_UDP(q);
1360 
1361 	/* Create a T_INFO_ACK message. */
1362 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1363 	    T_INFO_ACK);
1364 	if (!mp)
1365 		return;
1366 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1367 	qreply(q, mp);
1368 }
1369 
1370 /* For /dev/udp aka AF_INET open */
1371 static int
1372 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1373 {
1374 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1375 }
1376 
1377 /* For /dev/udp6 aka AF_INET6 open */
1378 static int
1379 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1380 {
1381 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1382 }
1383 
1384 /*
1385  * This is the open routine for udp.  It allocates a udp_t structure for
1386  * the stream and, on the first open of the module, creates an ND table.
1387  */
1388 static int
1389 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1390     boolean_t isv6)
1391 {
1392 	udp_t		*udp;
1393 	conn_t		*connp;
1394 	dev_t		conn_dev;
1395 	vmem_t		*minor_arena;
1396 	int		err;
1397 
1398 	/* If the stream is already open, return immediately. */
1399 	if (q->q_ptr != NULL)
1400 		return (0);
1401 
1402 	if (sflag == MODOPEN)
1403 		return (EINVAL);
1404 
1405 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1406 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1407 		minor_arena = ip_minor_arena_la;
1408 	} else {
1409 		/*
1410 		 * Either minor numbers in the large arena were exhausted
1411 		 * or a non socket application is doing the open.
1412 		 * Try to allocate from the small arena.
1413 		 */
1414 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1415 			return (EBUSY);
1416 
1417 		minor_arena = ip_minor_arena_sa;
1418 	}
1419 
1420 	if (flag & SO_FALLBACK) {
1421 		/*
1422 		 * Non streams socket needs a stream to fallback to
1423 		 */
1424 		RD(q)->q_ptr = (void *)conn_dev;
1425 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1426 		WR(q)->q_ptr = (void *)minor_arena;
1427 		qprocson(q);
1428 		return (0);
1429 	}
1430 
1431 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1432 	if (connp == NULL) {
1433 		inet_minor_free(minor_arena, conn_dev);
1434 		return (err);
1435 	}
1436 	udp = connp->conn_udp;
1437 
1438 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1439 	connp->conn_dev = conn_dev;
1440 	connp->conn_minor_arena = minor_arena;
1441 
1442 	/*
1443 	 * Initialize the udp_t structure for this stream.
1444 	 */
1445 	q->q_ptr = connp;
1446 	WR(q)->q_ptr = connp;
1447 	connp->conn_rq = q;
1448 	connp->conn_wq = WR(q);
1449 
1450 	/*
1451 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1452 	 * need to lock anything.
1453 	 */
1454 	ASSERT(connp->conn_proto == IPPROTO_UDP);
1455 	ASSERT(connp->conn_udp == udp);
1456 	ASSERT(udp->udp_connp == connp);
1457 
1458 	if (flag & SO_SOCKSTR) {
1459 		udp->udp_issocket = B_TRUE;
1460 	}
1461 
1462 	WR(q)->q_hiwat = connp->conn_sndbuf;
1463 	WR(q)->q_lowat = connp->conn_sndlowat;
1464 
1465 	qprocson(q);
1466 
1467 	/* Set the Stream head write offset and high watermark. */
1468 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1469 	(void) proto_set_rx_hiwat(q, connp,
1470 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1471 
1472 	mutex_enter(&connp->conn_lock);
1473 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1474 	mutex_exit(&connp->conn_lock);
1475 	return (0);
1476 }
1477 
1478 /*
1479  * Which UDP options OK to set through T_UNITDATA_REQ...
1480  */
1481 /* ARGSUSED */
1482 static boolean_t
1483 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1484 {
1485 	return (B_TRUE);
1486 }
1487 
1488 /*
1489  * This routine gets default values of certain options whose default
1490  * values are maintained by protcol specific code
1491  */
1492 int
1493 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1494 {
1495 	udp_t		*udp = Q_TO_UDP(q);
1496 	udp_stack_t *us = udp->udp_us;
1497 	int *i1 = (int *)ptr;
1498 
1499 	switch (level) {
1500 	case IPPROTO_IP:
1501 		switch (name) {
1502 		case IP_MULTICAST_TTL:
1503 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1504 			return (sizeof (uchar_t));
1505 		case IP_MULTICAST_LOOP:
1506 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1507 			return (sizeof (uchar_t));
1508 		}
1509 		break;
1510 	case IPPROTO_IPV6:
1511 		switch (name) {
1512 		case IPV6_MULTICAST_HOPS:
1513 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1514 			return (sizeof (int));
1515 		case IPV6_MULTICAST_LOOP:
1516 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1517 			return (sizeof (int));
1518 		case IPV6_UNICAST_HOPS:
1519 			*i1 = us->us_ipv6_hoplimit;
1520 			return (sizeof (int));
1521 		}
1522 		break;
1523 	}
1524 	return (-1);
1525 }
1526 
1527 /*
1528  * This routine retrieves the current status of socket options.
1529  * It returns the size of the option retrieved, or -1.
1530  */
1531 int
1532 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1533     uchar_t *ptr)
1534 {
1535 	int		*i1 = (int *)ptr;
1536 	udp_t		*udp = connp->conn_udp;
1537 	int		len;
1538 	conn_opt_arg_t	coas;
1539 	int		retval;
1540 
1541 	coas.coa_connp = connp;
1542 	coas.coa_ixa = connp->conn_ixa;
1543 	coas.coa_ipp = &connp->conn_xmit_ipp;
1544 	coas.coa_ancillary = B_FALSE;
1545 	coas.coa_changed = 0;
1546 
1547 	/*
1548 	 * We assume that the optcom framework has checked for the set
1549 	 * of levels and names that are supported, hence we don't worry
1550 	 * about rejecting based on that.
1551 	 * First check for UDP specific handling, then pass to common routine.
1552 	 */
1553 	switch (level) {
1554 	case IPPROTO_IP:
1555 		/*
1556 		 * Only allow IPv4 option processing on IPv4 sockets.
1557 		 */
1558 		if (connp->conn_family != AF_INET)
1559 			return (-1);
1560 
1561 		switch (name) {
1562 		case IP_OPTIONS:
1563 		case T_IP_OPTIONS:
1564 			mutex_enter(&connp->conn_lock);
1565 			if (!(udp->udp_recv_ipp.ipp_fields &
1566 			    IPPF_IPV4_OPTIONS)) {
1567 				mutex_exit(&connp->conn_lock);
1568 				return (0);
1569 			}
1570 
1571 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1572 			ASSERT(len != 0);
1573 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1574 			mutex_exit(&connp->conn_lock);
1575 			return (len);
1576 		}
1577 		break;
1578 	case IPPROTO_UDP:
1579 		switch (name) {
1580 		case UDP_NAT_T_ENDPOINT:
1581 			mutex_enter(&connp->conn_lock);
1582 			*i1 = udp->udp_nat_t_endpoint;
1583 			mutex_exit(&connp->conn_lock);
1584 			return (sizeof (int));
1585 		case UDP_RCVHDR:
1586 			mutex_enter(&connp->conn_lock);
1587 			*i1 = udp->udp_rcvhdr ? 1 : 0;
1588 			mutex_exit(&connp->conn_lock);
1589 			return (sizeof (int));
1590 		}
1591 	}
1592 	mutex_enter(&connp->conn_lock);
1593 	retval = conn_opt_get(&coas, level, name, ptr);
1594 	mutex_exit(&connp->conn_lock);
1595 	return (retval);
1596 }
1597 
1598 /*
1599  * This routine retrieves the current status of socket options.
1600  * It returns the size of the option retrieved, or -1.
1601  */
1602 int
1603 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1604 {
1605 	conn_t		*connp = Q_TO_CONN(q);
1606 	int		err;
1607 
1608 	err = udp_opt_get(connp, level, name, ptr);
1609 	return (err);
1610 }
1611 
1612 /*
1613  * This routine sets socket options.
1614  */
1615 int
1616 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1617     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1618 {
1619 	conn_t		*connp = coa->coa_connp;
1620 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1621 	udp_t		*udp = connp->conn_udp;
1622 	udp_stack_t	*us = udp->udp_us;
1623 	int		*i1 = (int *)invalp;
1624 	boolean_t 	onoff = (*i1 == 0) ? 0 : 1;
1625 	int		error;
1626 
1627 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1628 	/*
1629 	 * First do UDP specific sanity checks and handle UDP specific
1630 	 * options. Note that some IPPROTO_UDP options are handled
1631 	 * by conn_opt_set.
1632 	 */
1633 	switch (level) {
1634 	case SOL_SOCKET:
1635 		switch (name) {
1636 		case SO_SNDBUF:
1637 			if (*i1 > us->us_max_buf) {
1638 				return (ENOBUFS);
1639 			}
1640 			break;
1641 		case SO_RCVBUF:
1642 			if (*i1 > us->us_max_buf) {
1643 				return (ENOBUFS);
1644 			}
1645 			break;
1646 
1647 		case SCM_UCRED: {
1648 			struct ucred_s *ucr;
1649 			cred_t *newcr;
1650 			ts_label_t *tsl;
1651 
1652 			/*
1653 			 * Only sockets that have proper privileges and are
1654 			 * bound to MLPs will have any other value here, so
1655 			 * this implicitly tests for privilege to set label.
1656 			 */
1657 			if (connp->conn_mlp_type == mlptSingle)
1658 				break;
1659 
1660 			ucr = (struct ucred_s *)invalp;
1661 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1662 			    ucr->uc_labeloff < sizeof (*ucr) ||
1663 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1664 				return (EINVAL);
1665 			if (!checkonly) {
1666 				/*
1667 				 * Set ixa_tsl to the new label.
1668 				 * We assume that crgetzoneid doesn't change
1669 				 * as part of the SCM_UCRED.
1670 				 */
1671 				ASSERT(cr != NULL);
1672 				if ((tsl = crgetlabel(cr)) == NULL)
1673 					return (EINVAL);
1674 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1675 				    tsl->tsl_doi, KM_NOSLEEP);
1676 				if (newcr == NULL)
1677 					return (ENOSR);
1678 				ASSERT(newcr->cr_label != NULL);
1679 				/*
1680 				 * Move the hold on the cr_label to ixa_tsl by
1681 				 * setting cr_label to NULL. Then release newcr.
1682 				 */
1683 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1684 				ixa->ixa_flags |= IXAF_UCRED_TSL;
1685 				newcr->cr_label = NULL;
1686 				crfree(newcr);
1687 				coa->coa_changed |= COA_HEADER_CHANGED;
1688 				coa->coa_changed |= COA_WROFF_CHANGED;
1689 			}
1690 			/* Fully handled this option. */
1691 			return (0);
1692 		}
1693 		}
1694 		break;
1695 	case IPPROTO_UDP:
1696 		switch (name) {
1697 		case UDP_NAT_T_ENDPOINT:
1698 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1699 				return (error);
1700 			}
1701 
1702 			/*
1703 			 * Use conn_family instead so we can avoid ambiguitites
1704 			 * with AF_INET6 sockets that may switch from IPv4
1705 			 * to IPv6.
1706 			 */
1707 			if (connp->conn_family != AF_INET) {
1708 				return (EAFNOSUPPORT);
1709 			}
1710 
1711 			if (!checkonly) {
1712 				mutex_enter(&connp->conn_lock);
1713 				udp->udp_nat_t_endpoint = onoff;
1714 				mutex_exit(&connp->conn_lock);
1715 				coa->coa_changed |= COA_HEADER_CHANGED;
1716 				coa->coa_changed |= COA_WROFF_CHANGED;
1717 			}
1718 			/* Fully handled this option. */
1719 			return (0);
1720 		case UDP_RCVHDR:
1721 			mutex_enter(&connp->conn_lock);
1722 			udp->udp_rcvhdr = onoff;
1723 			mutex_exit(&connp->conn_lock);
1724 			return (0);
1725 		}
1726 		break;
1727 	}
1728 	error = conn_opt_set(coa, level, name, inlen, invalp,
1729 	    checkonly, cr);
1730 	return (error);
1731 }
1732 
1733 /*
1734  * This routine sets socket options.
1735  */
1736 int
1737 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1738     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1739     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1740 {
1741 	udp_t		*udp = connp->conn_udp;
1742 	int		err;
1743 	conn_opt_arg_t	coas, *coa;
1744 	boolean_t	checkonly;
1745 	udp_stack_t	*us = udp->udp_us;
1746 
1747 	switch (optset_context) {
1748 	case SETFN_OPTCOM_CHECKONLY:
1749 		checkonly = B_TRUE;
1750 		/*
1751 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1752 		 * inlen != 0 implies value supplied and
1753 		 * 	we have to "pretend" to set it.
1754 		 * inlen == 0 implies that there is no
1755 		 * 	value part in T_CHECK request and just validation
1756 		 * done elsewhere should be enough, we just return here.
1757 		 */
1758 		if (inlen == 0) {
1759 			*outlenp = 0;
1760 			return (0);
1761 		}
1762 		break;
1763 	case SETFN_OPTCOM_NEGOTIATE:
1764 		checkonly = B_FALSE;
1765 		break;
1766 	case SETFN_UD_NEGOTIATE:
1767 	case SETFN_CONN_NEGOTIATE:
1768 		checkonly = B_FALSE;
1769 		/*
1770 		 * Negotiating local and "association-related" options
1771 		 * through T_UNITDATA_REQ.
1772 		 *
1773 		 * Following routine can filter out ones we do not
1774 		 * want to be "set" this way.
1775 		 */
1776 		if (!udp_opt_allow_udr_set(level, name)) {
1777 			*outlenp = 0;
1778 			return (EINVAL);
1779 		}
1780 		break;
1781 	default:
1782 		/*
1783 		 * We should never get here
1784 		 */
1785 		*outlenp = 0;
1786 		return (EINVAL);
1787 	}
1788 
1789 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1790 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1791 
1792 	if (thisdg_attrs != NULL) {
1793 		/* Options from T_UNITDATA_REQ */
1794 		coa = (conn_opt_arg_t *)thisdg_attrs;
1795 		ASSERT(coa->coa_connp == connp);
1796 		ASSERT(coa->coa_ixa != NULL);
1797 		ASSERT(coa->coa_ipp != NULL);
1798 		ASSERT(coa->coa_ancillary);
1799 	} else {
1800 		coa = &coas;
1801 		coas.coa_connp = connp;
1802 		/* Get a reference on conn_ixa to prevent concurrent mods */
1803 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1804 		if (coas.coa_ixa == NULL) {
1805 			*outlenp = 0;
1806 			return (ENOMEM);
1807 		}
1808 		coas.coa_ipp = &connp->conn_xmit_ipp;
1809 		coas.coa_ancillary = B_FALSE;
1810 		coas.coa_changed = 0;
1811 	}
1812 
1813 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
1814 	    cr, checkonly);
1815 	if (err != 0) {
1816 errout:
1817 		if (!coa->coa_ancillary)
1818 			ixa_refrele(coa->coa_ixa);
1819 		*outlenp = 0;
1820 		return (err);
1821 	}
1822 	/* Handle DHCPINIT here outside of lock */
1823 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1824 		uint_t	ifindex;
1825 		ill_t	*ill;
1826 
1827 		ifindex = *(uint_t *)invalp;
1828 		if (ifindex == 0) {
1829 			ill = NULL;
1830 		} else {
1831 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1832 			    coa->coa_ixa->ixa_ipst);
1833 			if (ill == NULL) {
1834 				err = ENXIO;
1835 				goto errout;
1836 			}
1837 
1838 			mutex_enter(&ill->ill_lock);
1839 			if (ill->ill_state_flags & ILL_CONDEMNED) {
1840 				mutex_exit(&ill->ill_lock);
1841 				ill_refrele(ill);
1842 				err = ENXIO;
1843 				goto errout;
1844 			}
1845 			if (IS_VNI(ill)) {
1846 				mutex_exit(&ill->ill_lock);
1847 				ill_refrele(ill);
1848 				err = EINVAL;
1849 				goto errout;
1850 			}
1851 		}
1852 		mutex_enter(&connp->conn_lock);
1853 
1854 		if (connp->conn_dhcpinit_ill != NULL) {
1855 			/*
1856 			 * We've locked the conn so conn_cleanup_ill()
1857 			 * cannot clear conn_dhcpinit_ill -- so it's
1858 			 * safe to access the ill.
1859 			 */
1860 			ill_t *oill = connp->conn_dhcpinit_ill;
1861 
1862 			ASSERT(oill->ill_dhcpinit != 0);
1863 			atomic_dec_32(&oill->ill_dhcpinit);
1864 			ill_set_inputfn(connp->conn_dhcpinit_ill);
1865 			connp->conn_dhcpinit_ill = NULL;
1866 		}
1867 
1868 		if (ill != NULL) {
1869 			connp->conn_dhcpinit_ill = ill;
1870 			atomic_inc_32(&ill->ill_dhcpinit);
1871 			ill_set_inputfn(ill);
1872 			mutex_exit(&connp->conn_lock);
1873 			mutex_exit(&ill->ill_lock);
1874 			ill_refrele(ill);
1875 		} else {
1876 			mutex_exit(&connp->conn_lock);
1877 		}
1878 	}
1879 
1880 	/*
1881 	 * Common case of OK return with outval same as inval.
1882 	 */
1883 	if (invalp != outvalp) {
1884 		/* don't trust bcopy for identical src/dst */
1885 		(void) bcopy(invalp, outvalp, inlen);
1886 	}
1887 	*outlenp = inlen;
1888 
1889 	/*
1890 	 * If this was not ancillary data, then we rebuild the headers,
1891 	 * update the IRE/NCE, and IPsec as needed.
1892 	 * Since the label depends on the destination we go through
1893 	 * ip_set_destination first.
1894 	 */
1895 	if (coa->coa_ancillary) {
1896 		return (0);
1897 	}
1898 
1899 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
1900 		in6_addr_t saddr, faddr, nexthop;
1901 		in_port_t fport;
1902 
1903 		/*
1904 		 * We clear lastdst to make sure we pick up the change
1905 		 * next time sending.
1906 		 * If we are connected we re-cache the information.
1907 		 * We ignore errors to preserve BSD behavior.
1908 		 * Note that we don't redo IPsec policy lookup here
1909 		 * since the final destination (or source) didn't change.
1910 		 */
1911 		mutex_enter(&connp->conn_lock);
1912 		connp->conn_v6lastdst = ipv6_all_zeros;
1913 
1914 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
1915 		    &connp->conn_faddr_v6, &nexthop);
1916 		saddr = connp->conn_saddr_v6;
1917 		faddr = connp->conn_faddr_v6;
1918 		fport = connp->conn_fport;
1919 		mutex_exit(&connp->conn_lock);
1920 
1921 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
1922 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
1923 			(void) ip_attr_connect(connp, coa->coa_ixa,
1924 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
1925 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
1926 		}
1927 	}
1928 
1929 	ixa_refrele(coa->coa_ixa);
1930 
1931 	if (coa->coa_changed & COA_HEADER_CHANGED) {
1932 		/*
1933 		 * Rebuild the header template if we are connected.
1934 		 * Otherwise clear conn_v6lastdst so we rebuild the header
1935 		 * in the data path.
1936 		 */
1937 		mutex_enter(&connp->conn_lock);
1938 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1939 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1940 			err = udp_build_hdr_template(connp,
1941 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
1942 			    connp->conn_fport, connp->conn_flowinfo);
1943 			if (err != 0) {
1944 				mutex_exit(&connp->conn_lock);
1945 				return (err);
1946 			}
1947 		} else {
1948 			connp->conn_v6lastdst = ipv6_all_zeros;
1949 		}
1950 		mutex_exit(&connp->conn_lock);
1951 	}
1952 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
1953 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
1954 		    connp->conn_rcvbuf);
1955 	}
1956 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1957 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1958 	}
1959 	if (coa->coa_changed & COA_WROFF_CHANGED) {
1960 		/* Increase wroff if needed */
1961 		uint_t wroff;
1962 
1963 		mutex_enter(&connp->conn_lock);
1964 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
1965 		if (udp->udp_nat_t_endpoint)
1966 			wroff += sizeof (uint32_t);
1967 		if (wroff > connp->conn_wroff) {
1968 			connp->conn_wroff = wroff;
1969 			mutex_exit(&connp->conn_lock);
1970 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
1971 		} else {
1972 			mutex_exit(&connp->conn_lock);
1973 		}
1974 	}
1975 	return (err);
1976 }
1977 
1978 /* This routine sets socket options. */
1979 int
1980 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
1981     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
1982     void *thisdg_attrs, cred_t *cr)
1983 {
1984 	conn_t	*connp = Q_TO_CONN(q);
1985 	int error;
1986 
1987 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
1988 	    outlenp, outvalp, thisdg_attrs, cr);
1989 	return (error);
1990 }
1991 
1992 /*
1993  * Setup IP and UDP headers.
1994  * Returns NULL on allocation failure, in which case data_mp is freed.
1995  */
1996 mblk_t *
1997 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
1998     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
1999     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2000 {
2001 	mblk_t		*mp;
2002 	udpha_t		*udpha;
2003 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
2004 	uint_t		data_len;
2005 	uint32_t	cksum;
2006 	udp_t		*udp = connp->conn_udp;
2007 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
2008 	uint_t		ulp_hdr_len;
2009 
2010 	data_len = msgdsize(data_mp);
2011 	ulp_hdr_len = UDPH_SIZE;
2012 	if (insert_spi)
2013 		ulp_hdr_len += sizeof (uint32_t);
2014 
2015 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2016 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2017 	if (mp == NULL) {
2018 		ASSERT(*errorp != 0);
2019 		return (NULL);
2020 	}
2021 
2022 	data_len += ulp_hdr_len;
2023 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2024 
2025 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2026 	udpha->uha_src_port = connp->conn_lport;
2027 	udpha->uha_dst_port = dstport;
2028 	udpha->uha_checksum = 0;
2029 	udpha->uha_length = htons(data_len);
2030 
2031 	/*
2032 	 * If there was a routing option/header then conn_prepend_hdr
2033 	 * has massaged it and placed the pseudo-header checksum difference
2034 	 * in the cksum argument.
2035 	 *
2036 	 * Setup header length and prepare for ULP checksum done in IP.
2037 	 *
2038 	 * We make it easy for IP to include our pseudo header
2039 	 * by putting our length in uha_checksum.
2040 	 * The IP source, destination, and length have already been set by
2041 	 * conn_prepend_hdr.
2042 	 */
2043 	cksum += data_len;
2044 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
2045 	ASSERT(cksum < 0x10000);
2046 
2047 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2048 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
2049 
2050 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2051 
2052 		/* IP does the checksum if uha_checksum is non-zero */
2053 		if (us->us_do_checksum) {
2054 			if (cksum == 0)
2055 				udpha->uha_checksum = 0xffff;
2056 			else
2057 				udpha->uha_checksum = htons(cksum);
2058 		} else {
2059 			udpha->uha_checksum = 0;
2060 		}
2061 	} else {
2062 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2063 
2064 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2065 		if (cksum == 0)
2066 			udpha->uha_checksum = 0xffff;
2067 		else
2068 			udpha->uha_checksum = htons(cksum);
2069 	}
2070 
2071 	/* Insert all-0s SPI now. */
2072 	if (insert_spi)
2073 		*((uint32_t *)(udpha + 1)) = 0;
2074 
2075 	return (mp);
2076 }
2077 
2078 static int
2079 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2080     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2081 {
2082 	udpha_t		*udpha;
2083 	int		error;
2084 
2085 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2086 	/*
2087 	 * We clear lastdst to make sure we don't use the lastdst path
2088 	 * next time sending since we might not have set v6dst yet.
2089 	 */
2090 	connp->conn_v6lastdst = ipv6_all_zeros;
2091 
2092 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2093 	    flowinfo);
2094 	if (error != 0)
2095 		return (error);
2096 
2097 	/*
2098 	 * Any routing header/option has been massaged. The checksum difference
2099 	 * is stored in conn_sum.
2100 	 */
2101 	udpha = (udpha_t *)connp->conn_ht_ulp;
2102 	udpha->uha_src_port = connp->conn_lport;
2103 	udpha->uha_dst_port = dstport;
2104 	udpha->uha_checksum = 0;
2105 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
2106 	return (0);
2107 }
2108 
2109 static mblk_t *
2110 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2111 {
2112 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2113 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
2114 		/*
2115 		 * fallback has started but messages have not been moved yet
2116 		 */
2117 		if (udp->udp_fallback_queue_head == NULL) {
2118 			ASSERT(udp->udp_fallback_queue_tail == NULL);
2119 			udp->udp_fallback_queue_head = mp;
2120 			udp->udp_fallback_queue_tail = mp;
2121 		} else {
2122 			ASSERT(udp->udp_fallback_queue_tail != NULL);
2123 			udp->udp_fallback_queue_tail->b_next = mp;
2124 			udp->udp_fallback_queue_tail = mp;
2125 		}
2126 		return (NULL);
2127 	} else {
2128 		/*
2129 		 * Fallback completed, let the caller putnext() the mblk.
2130 		 */
2131 		return (mp);
2132 	}
2133 }
2134 
2135 /*
2136  * Deliver data to ULP. In case we have a socket, and it's falling back to
2137  * TPI, then we'll queue the mp for later processing.
2138  */
2139 static void
2140 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2141 {
2142 	if (IPCL_IS_NONSTR(connp)) {
2143 		udp_t *udp = connp->conn_udp;
2144 		int error;
2145 
2146 		ASSERT(len == msgdsize(mp));
2147 		if ((*connp->conn_upcalls->su_recv)
2148 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2149 			mutex_enter(&udp->udp_recv_lock);
2150 			if (error == ENOSPC) {
2151 				/*
2152 				 * let's confirm while holding the lock
2153 				 */
2154 				if ((*connp->conn_upcalls->su_recv)
2155 				    (connp->conn_upper_handle, NULL, 0, 0,
2156 				    &error, NULL) < 0) {
2157 					ASSERT(error == ENOSPC);
2158 					if (error == ENOSPC) {
2159 						connp->conn_flow_cntrld =
2160 						    B_TRUE;
2161 					}
2162 				}
2163 				mutex_exit(&udp->udp_recv_lock);
2164 			} else {
2165 				ASSERT(error == EOPNOTSUPP);
2166 				mp = udp_queue_fallback(udp, mp);
2167 				mutex_exit(&udp->udp_recv_lock);
2168 				if (mp != NULL)
2169 					putnext(connp->conn_rq, mp);
2170 			}
2171 		}
2172 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2173 	} else {
2174 		if (is_system_labeled()) {
2175 			ASSERT(ira->ira_cred != NULL);
2176 			/*
2177 			 * Provide for protocols above UDP such as RPC
2178 			 * NOPID leaves db_cpid unchanged.
2179 			 */
2180 			mblk_setcred(mp, ira->ira_cred, NOPID);
2181 		}
2182 
2183 		putnext(connp->conn_rq, mp);
2184 	}
2185 }
2186 
2187 /*
2188  * This is the inbound data path.
2189  * IP has already pulled up the IP plus UDP headers and verified alignment
2190  * etc.
2191  */
2192 /* ARGSUSED2 */
2193 static void
2194 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2195 {
2196 	conn_t			*connp = (conn_t *)arg1;
2197 	struct T_unitdata_ind	*tudi;
2198 	uchar_t			*rptr;		/* Pointer to IP header */
2199 	int			hdr_length;	/* Length of IP+UDP headers */
2200 	int			udi_size;	/* Size of T_unitdata_ind */
2201 	int			pkt_len;
2202 	udp_t			*udp;
2203 	udpha_t			*udpha;
2204 	ip_pkt_t		ipps;
2205 	ip6_t			*ip6h;
2206 	mblk_t			*mp1;
2207 	uint32_t		udp_ipv4_options_len;
2208 	crb_t			recv_ancillary;
2209 	udp_stack_t		*us;
2210 
2211 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2212 
2213 	udp = connp->conn_udp;
2214 	us = udp->udp_us;
2215 	rptr = mp->b_rptr;
2216 
2217 	ASSERT(DB_TYPE(mp) == M_DATA);
2218 	ASSERT(OK_32PTR(rptr));
2219 	ASSERT(ira->ira_pktlen == msgdsize(mp));
2220 	pkt_len = ira->ira_pktlen;
2221 
2222 	/*
2223 	 * Get a snapshot of these and allow other threads to change
2224 	 * them after that. We need the same recv_ancillary when determining
2225 	 * the size as when adding the ancillary data items.
2226 	 */
2227 	mutex_enter(&connp->conn_lock);
2228 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2229 	recv_ancillary = connp->conn_recv_ancillary;
2230 	mutex_exit(&connp->conn_lock);
2231 
2232 	hdr_length = ira->ira_ip_hdr_length;
2233 
2234 	/*
2235 	 * IP inspected the UDP header thus all of it must be in the mblk.
2236 	 * UDP length check is performed for IPv6 packets and IPv4 packets
2237 	 * to check if the size of the packet as specified
2238 	 * by the UDP header is the same as the length derived from the IP
2239 	 * header.
2240 	 */
2241 	udpha = (udpha_t *)(rptr + hdr_length);
2242 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2243 		goto tossit;
2244 
2245 	hdr_length += UDPH_SIZE;
2246 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
2247 
2248 	/* Initialize regardless of IP version */
2249 	ipps.ipp_fields = 0;
2250 
2251 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2252 	    udp_ipv4_options_len > 0) &&
2253 	    connp->conn_family == AF_INET) {
2254 		int	err;
2255 
2256 		/*
2257 		 * Record/update udp_recv_ipp with the lock
2258 		 * held. Not needed for AF_INET6 sockets
2259 		 * since they don't support a getsockopt of IP_OPTIONS.
2260 		 */
2261 		mutex_enter(&connp->conn_lock);
2262 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2263 		    B_TRUE);
2264 		if (err != 0) {
2265 			/* Allocation failed. Drop packet */
2266 			mutex_exit(&connp->conn_lock);
2267 			freemsg(mp);
2268 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
2269 			return;
2270 		}
2271 		mutex_exit(&connp->conn_lock);
2272 	}
2273 
2274 	if (recv_ancillary.crb_all != 0) {
2275 		/*
2276 		 * Record packet information in the ip_pkt_t
2277 		 */
2278 		if (ira->ira_flags & IRAF_IS_IPV4) {
2279 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2280 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2281 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2282 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2283 
2284 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2285 		} else {
2286 			uint8_t nexthdrp;
2287 
2288 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2289 			/*
2290 			 * IPv6 packets can only be received by applications
2291 			 * that are prepared to receive IPv6 addresses.
2292 			 * The IP fanout must ensure this.
2293 			 */
2294 			ASSERT(connp->conn_family == AF_INET6);
2295 
2296 			ip6h = (ip6_t *)rptr;
2297 
2298 			/* We don't care about the length, but need the ipp */
2299 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2300 			    &nexthdrp);
2301 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
2302 			/* Restore */
2303 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2304 			ASSERT(nexthdrp == IPPROTO_UDP);
2305 		}
2306 	}
2307 
2308 	/*
2309 	 * This is the inbound data path.  Packets are passed upstream as
2310 	 * T_UNITDATA_IND messages.
2311 	 */
2312 	if (connp->conn_family == AF_INET) {
2313 		sin_t *sin;
2314 
2315 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2316 
2317 		/*
2318 		 * Normally only send up the source address.
2319 		 * If any ancillary data items are wanted we add those.
2320 		 */
2321 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2322 		if (recv_ancillary.crb_all != 0) {
2323 			udi_size += conn_recvancillary_size(connp,
2324 			    recv_ancillary, ira, mp, &ipps);
2325 		}
2326 
2327 		/* Allocate a message block for the T_UNITDATA_IND structure. */
2328 		mp1 = allocb(udi_size, BPRI_MED);
2329 		if (mp1 == NULL) {
2330 			freemsg(mp);
2331 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
2332 			return;
2333 		}
2334 		mp1->b_cont = mp;
2335 		mp1->b_datap->db_type = M_PROTO;
2336 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2337 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2338 		tudi->PRIM_type = T_UNITDATA_IND;
2339 		tudi->SRC_length = sizeof (sin_t);
2340 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2341 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2342 		    sizeof (sin_t);
2343 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2344 		tudi->OPT_length = udi_size;
2345 		sin = (sin_t *)&tudi[1];
2346 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2347 		sin->sin_port =	udpha->uha_src_port;
2348 		sin->sin_family = connp->conn_family;
2349 		*(uint32_t *)&sin->sin_zero[0] = 0;
2350 		*(uint32_t *)&sin->sin_zero[4] = 0;
2351 
2352 		/*
2353 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2354 		 * IP_RECVTTL has been set.
2355 		 */
2356 		if (udi_size != 0) {
2357 			conn_recvancillary_add(connp, recv_ancillary, ira,
2358 			    &ipps, (uchar_t *)&sin[1], udi_size);
2359 		}
2360 	} else {
2361 		sin6_t *sin6;
2362 
2363 		/*
2364 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2365 		 *
2366 		 * Normally we only send up the address. If receiving of any
2367 		 * optional receive side information is enabled, we also send
2368 		 * that up as options.
2369 		 */
2370 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2371 
2372 		if (recv_ancillary.crb_all != 0) {
2373 			udi_size += conn_recvancillary_size(connp,
2374 			    recv_ancillary, ira, mp, &ipps);
2375 		}
2376 
2377 		mp1 = allocb(udi_size, BPRI_MED);
2378 		if (mp1 == NULL) {
2379 			freemsg(mp);
2380 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
2381 			return;
2382 		}
2383 		mp1->b_cont = mp;
2384 		mp1->b_datap->db_type = M_PROTO;
2385 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2386 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2387 		tudi->PRIM_type = T_UNITDATA_IND;
2388 		tudi->SRC_length = sizeof (sin6_t);
2389 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2390 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2391 		    sizeof (sin6_t);
2392 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2393 		tudi->OPT_length = udi_size;
2394 		sin6 = (sin6_t *)&tudi[1];
2395 		if (ira->ira_flags & IRAF_IS_IPV4) {
2396 			in6_addr_t v6dst;
2397 
2398 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2399 			    &sin6->sin6_addr);
2400 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2401 			    &v6dst);
2402 			sin6->sin6_flowinfo = 0;
2403 			sin6->sin6_scope_id = 0;
2404 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2405 			    IPCL_ZONEID(connp), us->us_netstack);
2406 		} else {
2407 			ip6h = (ip6_t *)rptr;
2408 
2409 			sin6->sin6_addr = ip6h->ip6_src;
2410 			/* No sin6_flowinfo per API */
2411 			sin6->sin6_flowinfo = 0;
2412 			/* For link-scope pass up scope id */
2413 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2414 				sin6->sin6_scope_id = ira->ira_ruifindex;
2415 			else
2416 				sin6->sin6_scope_id = 0;
2417 			sin6->__sin6_src_id = ip_srcid_find_addr(
2418 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
2419 			    us->us_netstack);
2420 		}
2421 		sin6->sin6_port = udpha->uha_src_port;
2422 		sin6->sin6_family = connp->conn_family;
2423 
2424 		if (udi_size != 0) {
2425 			conn_recvancillary_add(connp, recv_ancillary, ira,
2426 			    &ipps, (uchar_t *)&sin6[1], udi_size);
2427 		}
2428 	}
2429 
2430 	/* Walk past the headers unless IP_RECVHDR was set. */
2431 	if (!udp->udp_rcvhdr) {
2432 		mp->b_rptr = rptr + hdr_length;
2433 		pkt_len -= hdr_length;
2434 	}
2435 
2436 	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
2437 	udp_ulp_recv(connp, mp1, pkt_len, ira);
2438 	return;
2439 
2440 tossit:
2441 	freemsg(mp);
2442 	BUMP_MIB(&us->us_udp_mib, udpInErrors);
2443 }
2444 
2445 /*
2446  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
2447  * information that can be changing beneath us.
2448  */
2449 mblk_t *
2450 udp_snmp_get(queue_t *q, mblk_t *mpctl)
2451 {
2452 	mblk_t			*mpdata;
2453 	mblk_t			*mp_conn_ctl;
2454 	mblk_t			*mp_attr_ctl;
2455 	mblk_t			*mp6_conn_ctl;
2456 	mblk_t			*mp6_attr_ctl;
2457 	mblk_t			*mp_conn_tail;
2458 	mblk_t			*mp_attr_tail;
2459 	mblk_t			*mp6_conn_tail;
2460 	mblk_t			*mp6_attr_tail;
2461 	struct opthdr		*optp;
2462 	mib2_udpEntry_t		ude;
2463 	mib2_udp6Entry_t	ude6;
2464 	mib2_transportMLPEntry_t mlp;
2465 	int			state;
2466 	zoneid_t		zoneid;
2467 	int			i;
2468 	connf_t			*connfp;
2469 	conn_t			*connp = Q_TO_CONN(q);
2470 	int			v4_conn_idx;
2471 	int			v6_conn_idx;
2472 	boolean_t		needattr;
2473 	udp_t			*udp;
2474 	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
2475 	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
2476 	mblk_t			*mp2ctl;
2477 
2478 	/*
2479 	 * make a copy of the original message
2480 	 */
2481 	mp2ctl = copymsg(mpctl);
2482 
2483 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
2484 	if (mpctl == NULL ||
2485 	    (mpdata = mpctl->b_cont) == NULL ||
2486 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
2487 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
2488 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
2489 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
2490 		freemsg(mp_conn_ctl);
2491 		freemsg(mp_attr_ctl);
2492 		freemsg(mp6_conn_ctl);
2493 		freemsg(mpctl);
2494 		freemsg(mp2ctl);
2495 		return (0);
2496 	}
2497 
2498 	zoneid = connp->conn_zoneid;
2499 
2500 	/* fixed length structure for IPv4 and IPv6 counters */
2501 	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
2502 	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
2503 	/* synchronize 64- and 32-bit counters */
2504 	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
2505 	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
2506 
2507 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
2508 	optp->level = MIB2_UDP;
2509 	optp->name = 0;
2510 	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
2511 	    sizeof (us->us_udp_mib));
2512 	optp->len = msgdsize(mpdata);
2513 	qreply(q, mpctl);
2514 
2515 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
2516 	v4_conn_idx = v6_conn_idx = 0;
2517 
2518 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2519 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2520 		connp = NULL;
2521 
2522 		while ((connp = ipcl_get_next_conn(connfp, connp,
2523 		    IPCL_UDPCONN))) {
2524 			udp = connp->conn_udp;
2525 			if (zoneid != connp->conn_zoneid)
2526 				continue;
2527 
2528 			/*
2529 			 * Note that the port numbers are sent in
2530 			 * host byte order
2531 			 */
2532 
2533 			if (udp->udp_state == TS_UNBND)
2534 				state = MIB2_UDP_unbound;
2535 			else if (udp->udp_state == TS_IDLE)
2536 				state = MIB2_UDP_idle;
2537 			else if (udp->udp_state == TS_DATA_XFER)
2538 				state = MIB2_UDP_connected;
2539 			else
2540 				state = MIB2_UDP_unknown;
2541 
2542 			needattr = B_FALSE;
2543 			bzero(&mlp, sizeof (mlp));
2544 			if (connp->conn_mlp_type != mlptSingle) {
2545 				if (connp->conn_mlp_type == mlptShared ||
2546 				    connp->conn_mlp_type == mlptBoth)
2547 					mlp.tme_flags |= MIB2_TMEF_SHARED;
2548 				if (connp->conn_mlp_type == mlptPrivate ||
2549 				    connp->conn_mlp_type == mlptBoth)
2550 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
2551 				needattr = B_TRUE;
2552 			}
2553 			if (connp->conn_anon_mlp) {
2554 				mlp.tme_flags |= MIB2_TMEF_ANONMLP;
2555 				needattr = B_TRUE;
2556 			}
2557 			switch (connp->conn_mac_mode) {
2558 			case CONN_MAC_DEFAULT:
2559 				break;
2560 			case CONN_MAC_AWARE:
2561 				mlp.tme_flags |= MIB2_TMEF_MACEXEMPT;
2562 				needattr = B_TRUE;
2563 				break;
2564 			case CONN_MAC_IMPLICIT:
2565 				mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT;
2566 				needattr = B_TRUE;
2567 				break;
2568 			}
2569 			mutex_enter(&connp->conn_lock);
2570 			if (udp->udp_state == TS_DATA_XFER &&
2571 			    connp->conn_ixa->ixa_tsl != NULL) {
2572 				ts_label_t *tsl;
2573 
2574 				tsl = connp->conn_ixa->ixa_tsl;
2575 				mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
2576 				mlp.tme_doi = label2doi(tsl);
2577 				mlp.tme_label = *label2bslabel(tsl);
2578 				needattr = B_TRUE;
2579 			}
2580 			mutex_exit(&connp->conn_lock);
2581 
2582 			/*
2583 			 * Create an IPv4 table entry for IPv4 entries and also
2584 			 * any IPv6 entries which are bound to in6addr_any
2585 			 * (i.e. anything a IPv4 peer could connect/send to).
2586 			 */
2587 			if (connp->conn_ipversion == IPV4_VERSION ||
2588 			    (udp->udp_state <= TS_IDLE &&
2589 			    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
2590 				ude.udpEntryInfo.ue_state = state;
2591 				/*
2592 				 * If in6addr_any this will set it to
2593 				 * INADDR_ANY
2594 				 */
2595 				ude.udpLocalAddress = connp->conn_laddr_v4;
2596 				ude.udpLocalPort = ntohs(connp->conn_lport);
2597 				if (udp->udp_state == TS_DATA_XFER) {
2598 					/*
2599 					 * Can potentially get here for
2600 					 * v6 socket if another process
2601 					 * (say, ping) has just done a
2602 					 * sendto(), changing the state
2603 					 * from the TS_IDLE above to
2604 					 * TS_DATA_XFER by the time we hit
2605 					 * this part of the code.
2606 					 */
2607 					ude.udpEntryInfo.ue_RemoteAddress =
2608 					    connp->conn_faddr_v4;
2609 					ude.udpEntryInfo.ue_RemotePort =
2610 					    ntohs(connp->conn_fport);
2611 				} else {
2612 					ude.udpEntryInfo.ue_RemoteAddress = 0;
2613 					ude.udpEntryInfo.ue_RemotePort = 0;
2614 				}
2615 
2616 				/*
2617 				 * We make the assumption that all udp_t
2618 				 * structs will be created within an address
2619 				 * region no larger than 32-bits.
2620 				 */
2621 				ude.udpInstance = (uint32_t)(uintptr_t)udp;
2622 				ude.udpCreationProcess =
2623 				    (connp->conn_cpid < 0) ?
2624 				    MIB2_UNKNOWN_PROCESS :
2625 				    connp->conn_cpid;
2626 				ude.udpCreationTime = connp->conn_open_time;
2627 
2628 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
2629 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
2630 				mlp.tme_connidx = v4_conn_idx++;
2631 				if (needattr)
2632 					(void) snmp_append_data2(
2633 					    mp_attr_ctl->b_cont, &mp_attr_tail,
2634 					    (char *)&mlp, sizeof (mlp));
2635 			}
2636 			if (connp->conn_ipversion == IPV6_VERSION) {
2637 				ude6.udp6EntryInfo.ue_state  = state;
2638 				ude6.udp6LocalAddress = connp->conn_laddr_v6;
2639 				ude6.udp6LocalPort = ntohs(connp->conn_lport);
2640 				mutex_enter(&connp->conn_lock);
2641 				if (connp->conn_ixa->ixa_flags &
2642 				    IXAF_SCOPEID_SET) {
2643 					ude6.udp6IfIndex =
2644 					    connp->conn_ixa->ixa_scopeid;
2645 				} else {
2646 					ude6.udp6IfIndex = connp->conn_bound_if;
2647 				}
2648 				mutex_exit(&connp->conn_lock);
2649 				if (udp->udp_state == TS_DATA_XFER) {
2650 					ude6.udp6EntryInfo.ue_RemoteAddress =
2651 					    connp->conn_faddr_v6;
2652 					ude6.udp6EntryInfo.ue_RemotePort =
2653 					    ntohs(connp->conn_fport);
2654 				} else {
2655 					ude6.udp6EntryInfo.ue_RemoteAddress =
2656 					    sin6_null.sin6_addr;
2657 					ude6.udp6EntryInfo.ue_RemotePort = 0;
2658 				}
2659 				/*
2660 				 * We make the assumption that all udp_t
2661 				 * structs will be created within an address
2662 				 * region no larger than 32-bits.
2663 				 */
2664 				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
2665 				ude6.udp6CreationProcess =
2666 				    (connp->conn_cpid < 0) ?
2667 				    MIB2_UNKNOWN_PROCESS :
2668 				    connp->conn_cpid;
2669 				ude6.udp6CreationTime = connp->conn_open_time;
2670 
2671 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
2672 				    &mp6_conn_tail, (char *)&ude6,
2673 				    sizeof (ude6));
2674 				mlp.tme_connidx = v6_conn_idx++;
2675 				if (needattr)
2676 					(void) snmp_append_data2(
2677 					    mp6_attr_ctl->b_cont,
2678 					    &mp6_attr_tail, (char *)&mlp,
2679 					    sizeof (mlp));
2680 			}
2681 		}
2682 	}
2683 
2684 	/* IPv4 UDP endpoints */
2685 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
2686 	    sizeof (struct T_optmgmt_ack)];
2687 	optp->level = MIB2_UDP;
2688 	optp->name = MIB2_UDP_ENTRY;
2689 	optp->len = msgdsize(mp_conn_ctl->b_cont);
2690 	qreply(q, mp_conn_ctl);
2691 
2692 	/* table of MLP attributes... */
2693 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
2694 	    sizeof (struct T_optmgmt_ack)];
2695 	optp->level = MIB2_UDP;
2696 	optp->name = EXPER_XPORT_MLP;
2697 	optp->len = msgdsize(mp_attr_ctl->b_cont);
2698 	if (optp->len == 0)
2699 		freemsg(mp_attr_ctl);
2700 	else
2701 		qreply(q, mp_attr_ctl);
2702 
2703 	/* IPv6 UDP endpoints */
2704 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
2705 	    sizeof (struct T_optmgmt_ack)];
2706 	optp->level = MIB2_UDP6;
2707 	optp->name = MIB2_UDP6_ENTRY;
2708 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
2709 	qreply(q, mp6_conn_ctl);
2710 
2711 	/* table of MLP attributes... */
2712 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
2713 	    sizeof (struct T_optmgmt_ack)];
2714 	optp->level = MIB2_UDP6;
2715 	optp->name = EXPER_XPORT_MLP;
2716 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
2717 	if (optp->len == 0)
2718 		freemsg(mp6_attr_ctl);
2719 	else
2720 		qreply(q, mp6_attr_ctl);
2721 
2722 	return (mp2ctl);
2723 }
2724 
2725 /*
2726  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
2727  * NOTE: Per MIB-II, UDP has no writable data.
2728  * TODO:  If this ever actually tries to set anything, it needs to be
2729  * to do the appropriate locking.
2730  */
2731 /* ARGSUSED */
2732 int
2733 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
2734     uchar_t *ptr, int len)
2735 {
2736 	switch (level) {
2737 	case MIB2_UDP:
2738 		return (0);
2739 	default:
2740 		return (1);
2741 	}
2742 }
2743 
2744 /*
2745  * This routine creates a T_UDERROR_IND message and passes it upstream.
2746  * The address and options are copied from the T_UNITDATA_REQ message
2747  * passed in mp.  This message is freed.
2748  */
2749 static void
2750 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2751 {
2752 	struct T_unitdata_req *tudr;
2753 	mblk_t	*mp1;
2754 	uchar_t *destaddr;
2755 	t_scalar_t destlen;
2756 	uchar_t	*optaddr;
2757 	t_scalar_t optlen;
2758 
2759 	if ((mp->b_wptr < mp->b_rptr) ||
2760 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2761 		goto done;
2762 	}
2763 	tudr = (struct T_unitdata_req *)mp->b_rptr;
2764 	destaddr = mp->b_rptr + tudr->DEST_offset;
2765 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2766 	    destaddr + tudr->DEST_length < mp->b_rptr ||
2767 	    destaddr + tudr->DEST_length > mp->b_wptr) {
2768 		goto done;
2769 	}
2770 	optaddr = mp->b_rptr + tudr->OPT_offset;
2771 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2772 	    optaddr + tudr->OPT_length < mp->b_rptr ||
2773 	    optaddr + tudr->OPT_length > mp->b_wptr) {
2774 		goto done;
2775 	}
2776 	destlen = tudr->DEST_length;
2777 	optlen = tudr->OPT_length;
2778 
2779 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2780 	    (char *)optaddr, optlen, err);
2781 	if (mp1 != NULL)
2782 		qreply(q, mp1);
2783 
2784 done:
2785 	freemsg(mp);
2786 }
2787 
2788 /*
2789  * This routine removes a port number association from a stream.  It
2790  * is called by udp_wput to handle T_UNBIND_REQ messages.
2791  */
2792 static void
2793 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2794 {
2795 	conn_t	*connp = Q_TO_CONN(q);
2796 	int	error;
2797 
2798 	error = udp_do_unbind(connp);
2799 	if (error) {
2800 		if (error < 0)
2801 			udp_err_ack(q, mp, -error, 0);
2802 		else
2803 			udp_err_ack(q, mp, TSYSERR, error);
2804 		return;
2805 	}
2806 
2807 	mp = mi_tpi_ok_ack_alloc(mp);
2808 	ASSERT(mp != NULL);
2809 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2810 	qreply(q, mp);
2811 }
2812 
2813 /*
2814  * Don't let port fall into the privileged range.
2815  * Since the extra privileged ports can be arbitrary we also
2816  * ensure that we exclude those from consideration.
2817  * us->us_epriv_ports is not sorted thus we loop over it until
2818  * there are no changes.
2819  */
2820 static in_port_t
2821 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2822 {
2823 	int i;
2824 	in_port_t nextport;
2825 	boolean_t restart = B_FALSE;
2826 	udp_stack_t *us = udp->udp_us;
2827 
2828 	if (random && udp_random_anon_port != 0) {
2829 		(void) random_get_pseudo_bytes((uint8_t *)&port,
2830 		    sizeof (in_port_t));
2831 		/*
2832 		 * Unless changed by a sys admin, the smallest anon port
2833 		 * is 32768 and the largest anon port is 65535.  It is
2834 		 * very likely (50%) for the random port to be smaller
2835 		 * than the smallest anon port.  When that happens,
2836 		 * add port % (anon port range) to the smallest anon
2837 		 * port to get the random port.  It should fall into the
2838 		 * valid anon port range.
2839 		 */
2840 		if (port < us->us_smallest_anon_port) {
2841 			port = us->us_smallest_anon_port +
2842 			    port % (us->us_largest_anon_port -
2843 			    us->us_smallest_anon_port);
2844 		}
2845 	}
2846 
2847 retry:
2848 	if (port < us->us_smallest_anon_port)
2849 		port = us->us_smallest_anon_port;
2850 
2851 	if (port > us->us_largest_anon_port) {
2852 		port = us->us_smallest_anon_port;
2853 		if (restart)
2854 			return (0);
2855 		restart = B_TRUE;
2856 	}
2857 
2858 	if (port < us->us_smallest_nonpriv_port)
2859 		port = us->us_smallest_nonpriv_port;
2860 
2861 	for (i = 0; i < us->us_num_epriv_ports; i++) {
2862 		if (port == us->us_epriv_ports[i]) {
2863 			port++;
2864 			/*
2865 			 * Make sure that the port is in the
2866 			 * valid range.
2867 			 */
2868 			goto retry;
2869 		}
2870 	}
2871 
2872 	if (is_system_labeled() &&
2873 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2874 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
2875 		port = nextport;
2876 		goto retry;
2877 	}
2878 
2879 	return (port);
2880 }
2881 
2882 /*
2883  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2884  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2885  * the TPI options, otherwise we take them from msg_control.
2886  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2887  * Always consumes mp; never consumes tudr_mp.
2888  */
2889 static int
2890 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2891     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2892 {
2893 	udp_t		*udp = connp->conn_udp;
2894 	udp_stack_t	*us = udp->udp_us;
2895 	int		error;
2896 	ip_xmit_attr_t	*ixa;
2897 	ip_pkt_t	*ipp;
2898 	in6_addr_t	v6src;
2899 	in6_addr_t	v6dst;
2900 	in6_addr_t	v6nexthop;
2901 	in_port_t	dstport;
2902 	uint32_t	flowinfo;
2903 	uint_t		srcid;
2904 	int		is_absreq_failure = 0;
2905 	conn_opt_arg_t	coas, *coa;
2906 
2907 	ASSERT(tudr_mp != NULL || msg != NULL);
2908 
2909 	/*
2910 	 * Get ixa before checking state to handle a disconnect race.
2911 	 *
2912 	 * We need an exclusive copy of conn_ixa since the ancillary data
2913 	 * options might modify it. That copy has no pointers hence we
2914 	 * need to set them up once we've parsed the ancillary data.
2915 	 */
2916 	ixa = conn_get_ixa_exclusive(connp);
2917 	if (ixa == NULL) {
2918 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2919 		freemsg(mp);
2920 		return (ENOMEM);
2921 	}
2922 	ASSERT(cr != NULL);
2923 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2924 	ixa->ixa_cred = cr;
2925 	ixa->ixa_cpid = pid;
2926 	if (is_system_labeled()) {
2927 		/* We need to restart with a label based on the cred */
2928 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2929 	}
2930 
2931 	/* In case previous destination was multicast or multirt */
2932 	ip_attr_newdst(ixa);
2933 
2934 	/* Get a copy of conn_xmit_ipp since the options might change it */
2935 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2936 	if (ipp == NULL) {
2937 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2938 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2939 		ixa->ixa_cpid = connp->conn_cpid;
2940 		ixa_refrele(ixa);
2941 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2942 		freemsg(mp);
2943 		return (ENOMEM);
2944 	}
2945 	mutex_enter(&connp->conn_lock);
2946 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2947 	mutex_exit(&connp->conn_lock);
2948 	if (error != 0) {
2949 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2950 		freemsg(mp);
2951 		goto done;
2952 	}
2953 
2954 	/*
2955 	 * Parse the options and update ixa and ipp as a result.
2956 	 * Note that ixa_tsl can be updated if SCM_UCRED.
2957 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2958 	 */
2959 
2960 	coa = &coas;
2961 	coa->coa_connp = connp;
2962 	coa->coa_ixa = ixa;
2963 	coa->coa_ipp = ipp;
2964 	coa->coa_ancillary = B_TRUE;
2965 	coa->coa_changed = 0;
2966 
2967 	if (msg != NULL) {
2968 		error = process_auxiliary_options(connp, msg->msg_control,
2969 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2970 	} else {
2971 		struct T_unitdata_req *tudr;
2972 
2973 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2974 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2975 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2976 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2977 		    coa, &is_absreq_failure);
2978 	}
2979 	if (error != 0) {
2980 		/*
2981 		 * Note: No special action needed in this
2982 		 * module for "is_absreq_failure"
2983 		 */
2984 		freemsg(mp);
2985 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2986 		goto done;
2987 	}
2988 	ASSERT(is_absreq_failure == 0);
2989 
2990 	mutex_enter(&connp->conn_lock);
2991 	/*
2992 	 * If laddr is unspecified then we look at sin6_src_id.
2993 	 * We will give precedence to a source address set with IPV6_PKTINFO
2994 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2995 	 * want ip_attr_connect to select a source (since it can fail) when
2996 	 * IPV6_PKTINFO is specified.
2997 	 * If this doesn't result in a source address then we get a source
2998 	 * from ip_attr_connect() below.
2999 	 */
3000 	v6src = connp->conn_saddr_v6;
3001 	if (sin != NULL) {
3002 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3003 		dstport = sin->sin_port;
3004 		flowinfo = 0;
3005 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3006 		ixa->ixa_flags |= IXAF_IS_IPV4;
3007 	} else if (sin6 != NULL) {
3008 		v6dst = sin6->sin6_addr;
3009 		dstport = sin6->sin6_port;
3010 		flowinfo = sin6->sin6_flowinfo;
3011 		srcid = sin6->__sin6_src_id;
3012 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3013 			ixa->ixa_scopeid = sin6->sin6_scope_id;
3014 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
3015 		} else {
3016 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3017 		}
3018 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3019 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3020 			    connp->conn_netstack);
3021 		}
3022 		if (IN6_IS_ADDR_V4MAPPED(&v6dst))
3023 			ixa->ixa_flags |= IXAF_IS_IPV4;
3024 		else
3025 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
3026 	} else {
3027 		/* Connected case */
3028 		v6dst = connp->conn_faddr_v6;
3029 		dstport = connp->conn_fport;
3030 		flowinfo = connp->conn_flowinfo;
3031 	}
3032 	mutex_exit(&connp->conn_lock);
3033 
3034 	/* Handle IPV6_PKTINFO setting source address. */
3035 	if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
3036 	    (ipp->ipp_fields & IPPF_ADDR)) {
3037 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3038 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3039 				v6src = ipp->ipp_addr;
3040 		} else {
3041 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3042 				v6src = ipp->ipp_addr;
3043 		}
3044 	}
3045 
3046 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
3047 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3048 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3049 
3050 	switch (error) {
3051 	case 0:
3052 		break;
3053 	case EADDRNOTAVAIL:
3054 		/*
3055 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3056 		 * Don't have the application see that errno
3057 		 */
3058 		error = ENETUNREACH;
3059 		goto failed;
3060 	case ENETDOWN:
3061 		/*
3062 		 * Have !ipif_addr_ready address; drop packet silently
3063 		 * until we can get applications to not send until we
3064 		 * are ready.
3065 		 */
3066 		error = 0;
3067 		goto failed;
3068 	case EHOSTUNREACH:
3069 	case ENETUNREACH:
3070 		if (ixa->ixa_ire != NULL) {
3071 			/*
3072 			 * Let conn_ip_output/ire_send_noroute return
3073 			 * the error and send any local ICMP error.
3074 			 */
3075 			error = 0;
3076 			break;
3077 		}
3078 		/* FALLTHRU */
3079 	default:
3080 	failed:
3081 		freemsg(mp);
3082 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3083 		goto done;
3084 	}
3085 
3086 	/*
3087 	 * We might be going to a different destination than last time,
3088 	 * thus check that TX allows the communication and compute any
3089 	 * needed label.
3090 	 *
3091 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
3092 	 * don't have to worry about concurrent threads.
3093 	 */
3094 	if (is_system_labeled()) {
3095 		/* Using UDP MLP requires SCM_UCRED from user */
3096 		if (connp->conn_mlp_type != mlptSingle &&
3097 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
3098 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3099 			error = ECONNREFUSED;
3100 			freemsg(mp);
3101 			goto done;
3102 		}
3103 		/*
3104 		 * Check whether Trusted Solaris policy allows communication
3105 		 * with this host, and pretend that the destination is
3106 		 * unreachable if not.
3107 		 * Compute any needed label and place it in ipp_label_v4/v6.
3108 		 *
3109 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
3110 		 * ipp_label_v4/v6 to form the packet.
3111 		 *
3112 		 * Tsol note: We have ipp structure local to this thread so
3113 		 * no locking is needed.
3114 		 */
3115 		error = conn_update_label(connp, ixa, &v6dst, ipp);
3116 		if (error != 0) {
3117 			freemsg(mp);
3118 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3119 			goto done;
3120 		}
3121 	}
3122 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
3123 	    flowinfo, mp, &error);
3124 	if (mp == NULL) {
3125 		ASSERT(error != 0);
3126 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3127 		goto done;
3128 	}
3129 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
3130 		error = EMSGSIZE;
3131 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3132 		freemsg(mp);
3133 		goto done;
3134 	}
3135 	/* We're done.  Pass the packet to ip. */
3136 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
3137 
3138 	error = conn_ip_output(mp, ixa);
3139 	/* No udpOutErrors if an error since IP increases its error counter */
3140 	switch (error) {
3141 	case 0:
3142 		break;
3143 	case EWOULDBLOCK:
3144 		(void) ixa_check_drain_insert(connp, ixa);
3145 		error = 0;
3146 		break;
3147 	case EADDRNOTAVAIL:
3148 		/*
3149 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3150 		 * Don't have the application see that errno
3151 		 */
3152 		error = ENETUNREACH;
3153 		/* FALLTHRU */
3154 	default:
3155 		mutex_enter(&connp->conn_lock);
3156 		/*
3157 		 * Clear the source and v6lastdst so we call ip_attr_connect
3158 		 * for the next packet and try to pick a better source.
3159 		 */
3160 		if (connp->conn_mcbc_bind)
3161 			connp->conn_saddr_v6 = ipv6_all_zeros;
3162 		else
3163 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3164 		connp->conn_v6lastdst = ipv6_all_zeros;
3165 		mutex_exit(&connp->conn_lock);
3166 		break;
3167 	}
3168 done:
3169 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3170 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3171 	ixa->ixa_cpid = connp->conn_cpid;
3172 	ixa_refrele(ixa);
3173 	ip_pkt_free(ipp);
3174 	kmem_free(ipp, sizeof (*ipp));
3175 	return (error);
3176 }
3177 
3178 /*
3179  * Handle sending an M_DATA for a connected socket.
3180  * Handles both IPv4 and IPv6.
3181  */
3182 static int
3183 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3184 {
3185 	udp_t		*udp = connp->conn_udp;
3186 	udp_stack_t	*us = udp->udp_us;
3187 	int		error;
3188 	ip_xmit_attr_t	*ixa;
3189 
3190 	/*
3191 	 * If no other thread is using conn_ixa this just gets a reference to
3192 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3193 	 */
3194 	ixa = conn_get_ixa(connp, B_FALSE);
3195 	if (ixa == NULL) {
3196 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3197 		freemsg(mp);
3198 		return (ENOMEM);
3199 	}
3200 
3201 	ASSERT(cr != NULL);
3202 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3203 	ixa->ixa_cred = cr;
3204 	ixa->ixa_cpid = pid;
3205 
3206 	mutex_enter(&connp->conn_lock);
3207 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3208 	    connp->conn_fport, connp->conn_flowinfo, &error);
3209 
3210 	if (mp == NULL) {
3211 		ASSERT(error != 0);
3212 		mutex_exit(&connp->conn_lock);
3213 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3214 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3215 		ixa->ixa_cpid = connp->conn_cpid;
3216 		ixa_refrele(ixa);
3217 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3218 		freemsg(mp);
3219 		return (error);
3220 	}
3221 
3222 	/*
3223 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3224 	 * safe copy, then we need to fill in any pointers in it.
3225 	 */
3226 	if (ixa->ixa_ire == NULL) {
3227 		in6_addr_t	faddr, saddr;
3228 		in6_addr_t	nexthop;
3229 		in_port_t	fport;
3230 
3231 		saddr = connp->conn_saddr_v6;
3232 		faddr = connp->conn_faddr_v6;
3233 		fport = connp->conn_fport;
3234 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3235 		mutex_exit(&connp->conn_lock);
3236 
3237 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3238 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3239 		    IPDF_IPSEC);
3240 		switch (error) {
3241 		case 0:
3242 			break;
3243 		case EADDRNOTAVAIL:
3244 			/*
3245 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3246 			 * Don't have the application see that errno
3247 			 */
3248 			error = ENETUNREACH;
3249 			goto failed;
3250 		case ENETDOWN:
3251 			/*
3252 			 * Have !ipif_addr_ready address; drop packet silently
3253 			 * until we can get applications to not send until we
3254 			 * are ready.
3255 			 */
3256 			error = 0;
3257 			goto failed;
3258 		case EHOSTUNREACH:
3259 		case ENETUNREACH:
3260 			if (ixa->ixa_ire != NULL) {
3261 				/*
3262 				 * Let conn_ip_output/ire_send_noroute return
3263 				 * the error and send any local ICMP error.
3264 				 */
3265 				error = 0;
3266 				break;
3267 			}
3268 			/* FALLTHRU */
3269 		default:
3270 		failed:
3271 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3272 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3273 			ixa->ixa_cpid = connp->conn_cpid;
3274 			ixa_refrele(ixa);
3275 			freemsg(mp);
3276 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3277 			return (error);
3278 		}
3279 	} else {
3280 		/* Done with conn_t */
3281 		mutex_exit(&connp->conn_lock);
3282 	}
3283 	ASSERT(ixa->ixa_ire != NULL);
3284 
3285 	/* We're done.  Pass the packet to ip. */
3286 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
3287 
3288 	error = conn_ip_output(mp, ixa);
3289 	/* No udpOutErrors if an error since IP increases its error counter */
3290 	switch (error) {
3291 	case 0:
3292 		break;
3293 	case EWOULDBLOCK:
3294 		(void) ixa_check_drain_insert(connp, ixa);
3295 		error = 0;
3296 		break;
3297 	case EADDRNOTAVAIL:
3298 		/*
3299 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3300 		 * Don't have the application see that errno
3301 		 */
3302 		error = ENETUNREACH;
3303 		break;
3304 	}
3305 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3306 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3307 	ixa->ixa_cpid = connp->conn_cpid;
3308 	ixa_refrele(ixa);
3309 	return (error);
3310 }
3311 
3312 /*
3313  * Handle sending an M_DATA to the last destination.
3314  * Handles both IPv4 and IPv6.
3315  *
3316  * NOTE: The caller must hold conn_lock and we drop it here.
3317  */
3318 static int
3319 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3320     ip_xmit_attr_t *ixa)
3321 {
3322 	udp_t		*udp = connp->conn_udp;
3323 	udp_stack_t	*us = udp->udp_us;
3324 	int		error;
3325 
3326 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3327 	ASSERT(ixa != NULL);
3328 
3329 	ASSERT(cr != NULL);
3330 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3331 	ixa->ixa_cred = cr;
3332 	ixa->ixa_cpid = pid;
3333 
3334 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3335 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3336 
3337 	if (mp == NULL) {
3338 		ASSERT(error != 0);
3339 		mutex_exit(&connp->conn_lock);
3340 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3341 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3342 		ixa->ixa_cpid = connp->conn_cpid;
3343 		ixa_refrele(ixa);
3344 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3345 		freemsg(mp);
3346 		return (error);
3347 	}
3348 
3349 	/*
3350 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3351 	 * safe copy, then we need to fill in any pointers in it.
3352 	 */
3353 	if (ixa->ixa_ire == NULL) {
3354 		in6_addr_t	lastdst, lastsrc;
3355 		in6_addr_t	nexthop;
3356 		in_port_t	lastport;
3357 
3358 		lastsrc = connp->conn_v6lastsrc;
3359 		lastdst = connp->conn_v6lastdst;
3360 		lastport = connp->conn_lastdstport;
3361 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3362 		mutex_exit(&connp->conn_lock);
3363 
3364 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3365 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3366 		    IPDF_VERIFY_DST | IPDF_IPSEC);
3367 		switch (error) {
3368 		case 0:
3369 			break;
3370 		case EADDRNOTAVAIL:
3371 			/*
3372 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3373 			 * Don't have the application see that errno
3374 			 */
3375 			error = ENETUNREACH;
3376 			goto failed;
3377 		case ENETDOWN:
3378 			/*
3379 			 * Have !ipif_addr_ready address; drop packet silently
3380 			 * until we can get applications to not send until we
3381 			 * are ready.
3382 			 */
3383 			error = 0;
3384 			goto failed;
3385 		case EHOSTUNREACH:
3386 		case ENETUNREACH:
3387 			if (ixa->ixa_ire != NULL) {
3388 				/*
3389 				 * Let conn_ip_output/ire_send_noroute return
3390 				 * the error and send any local ICMP error.
3391 				 */
3392 				error = 0;
3393 				break;
3394 			}
3395 			/* FALLTHRU */
3396 		default:
3397 		failed:
3398 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3399 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3400 			ixa->ixa_cpid = connp->conn_cpid;
3401 			ixa_refrele(ixa);
3402 			freemsg(mp);
3403 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3404 			return (error);
3405 		}
3406 	} else {
3407 		/* Done with conn_t */
3408 		mutex_exit(&connp->conn_lock);
3409 	}
3410 
3411 	/* We're done.  Pass the packet to ip. */
3412 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
3413 
3414 	error = conn_ip_output(mp, ixa);
3415 	/* No udpOutErrors if an error since IP increases its error counter */
3416 	switch (error) {
3417 	case 0:
3418 		break;
3419 	case EWOULDBLOCK:
3420 		(void) ixa_check_drain_insert(connp, ixa);
3421 		error = 0;
3422 		break;
3423 	case EADDRNOTAVAIL:
3424 		/*
3425 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3426 		 * Don't have the application see that errno
3427 		 */
3428 		error = ENETUNREACH;
3429 		/* FALLTHRU */
3430 	default:
3431 		mutex_enter(&connp->conn_lock);
3432 		/*
3433 		 * Clear the source and v6lastdst so we call ip_attr_connect
3434 		 * for the next packet and try to pick a better source.
3435 		 */
3436 		if (connp->conn_mcbc_bind)
3437 			connp->conn_saddr_v6 = ipv6_all_zeros;
3438 		else
3439 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3440 		connp->conn_v6lastdst = ipv6_all_zeros;
3441 		mutex_exit(&connp->conn_lock);
3442 		break;
3443 	}
3444 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3445 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3446 	ixa->ixa_cpid = connp->conn_cpid;
3447 	ixa_refrele(ixa);
3448 	return (error);
3449 }
3450 
3451 
3452 /*
3453  * Prepend the header template and then fill in the source and
3454  * flowinfo. The caller needs to handle the destination address since
3455  * it's setting is different if rthdr or source route.
3456  *
3457  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3458  * When it returns NULL it sets errorp.
3459  */
3460 static mblk_t *
3461 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3462     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3463 {
3464 	udp_t		*udp = connp->conn_udp;
3465 	udp_stack_t	*us = udp->udp_us;
3466 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
3467 	uint_t		pktlen;
3468 	uint_t		alloclen;
3469 	uint_t		copylen;
3470 	uint8_t		*iph;
3471 	uint_t		ip_hdr_length;
3472 	udpha_t		*udpha;
3473 	uint32_t	cksum;
3474 	ip_pkt_t	*ipp;
3475 
3476 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3477 
3478 	/*
3479 	 * Copy the header template and leave space for an SPI
3480 	 */
3481 	copylen = connp->conn_ht_iphc_len;
3482 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3483 	pktlen = alloclen + msgdsize(mp);
3484 	if (pktlen > IP_MAXPACKET) {
3485 		freemsg(mp);
3486 		*errorp = EMSGSIZE;
3487 		return (NULL);
3488 	}
3489 	ixa->ixa_pktlen = pktlen;
3490 
3491 	/* check/fix buffer config, setup pointers into it */
3492 	iph = mp->b_rptr - alloclen;
3493 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3494 		mblk_t *mp1;
3495 
3496 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3497 		if (mp1 == NULL) {
3498 			freemsg(mp);
3499 			*errorp = ENOMEM;
3500 			return (NULL);
3501 		}
3502 		mp1->b_wptr = DB_LIM(mp1);
3503 		mp1->b_cont = mp;
3504 		mp = mp1;
3505 		iph = (mp->b_wptr - alloclen);
3506 	}
3507 	mp->b_rptr = iph;
3508 	bcopy(connp->conn_ht_iphc, iph, copylen);
3509 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3510 
3511 	ixa->ixa_ip_hdr_length = ip_hdr_length;
3512 	udpha = (udpha_t *)(iph + ip_hdr_length);
3513 
3514 	/*
3515 	 * Setup header length and prepare for ULP checksum done in IP.
3516 	 * udp_build_hdr_template has already massaged any routing header
3517 	 * and placed the result in conn_sum.
3518 	 *
3519 	 * We make it easy for IP to include our pseudo header
3520 	 * by putting our length in uha_checksum.
3521 	 */
3522 	cksum = pktlen - ip_hdr_length;
3523 	udpha->uha_length = htons(cksum);
3524 
3525 	cksum += connp->conn_sum;
3526 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
3527 	ASSERT(cksum < 0x10000);
3528 
3529 	ipp = &connp->conn_xmit_ipp;
3530 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3531 		ipha_t	*ipha = (ipha_t *)iph;
3532 
3533 		ipha->ipha_length = htons((uint16_t)pktlen);
3534 
3535 		/* IP does the checksum if uha_checksum is non-zero */
3536 		if (us->us_do_checksum)
3537 			udpha->uha_checksum = htons(cksum);
3538 
3539 		/* if IP_PKTINFO specified an addres it wins over bind() */
3540 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3541 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3542 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3543 			ipha->ipha_src = ipp->ipp_addr_v4;
3544 		} else {
3545 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3546 		}
3547 	} else {
3548 		ip6_t *ip6h = (ip6_t *)iph;
3549 
3550 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3551 		udpha->uha_checksum = htons(cksum);
3552 
3553 		/* if IP_PKTINFO specified an addres it wins over bind() */
3554 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3555 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3556 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3557 			ip6h->ip6_src = ipp->ipp_addr;
3558 		} else {
3559 			ip6h->ip6_src = *v6src;
3560 		}
3561 		ip6h->ip6_vcf =
3562 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3563 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3564 		if (ipp->ipp_fields & IPPF_TCLASS) {
3565 			/* Overrides the class part of flowinfo */
3566 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3567 			    ipp->ipp_tclass);
3568 		}
3569 	}
3570 
3571 	/* Insert all-0s SPI now. */
3572 	if (insert_spi)
3573 		*((uint32_t *)(udpha + 1)) = 0;
3574 
3575 	udpha->uha_dst_port = dstport;
3576 	return (mp);
3577 }
3578 
3579 /*
3580  * Send a T_UDERR_IND in response to an M_DATA
3581  */
3582 static void
3583 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3584 {
3585 	struct sockaddr_storage ss;
3586 	sin_t		*sin;
3587 	sin6_t		*sin6;
3588 	struct sockaddr	*addr;
3589 	socklen_t	addrlen;
3590 	mblk_t		*mp1;
3591 
3592 	mutex_enter(&connp->conn_lock);
3593 	/* Initialize addr and addrlen as if they're passed in */
3594 	if (connp->conn_family == AF_INET) {
3595 		sin = (sin_t *)&ss;
3596 		*sin = sin_null;
3597 		sin->sin_family = AF_INET;
3598 		sin->sin_port = connp->conn_fport;
3599 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
3600 		addr = (struct sockaddr *)sin;
3601 		addrlen = sizeof (*sin);
3602 	} else {
3603 		sin6 = (sin6_t *)&ss;
3604 		*sin6 = sin6_null;
3605 		sin6->sin6_family = AF_INET6;
3606 		sin6->sin6_port = connp->conn_fport;
3607 		sin6->sin6_flowinfo = connp->conn_flowinfo;
3608 		sin6->sin6_addr = connp->conn_faddr_v6;
3609 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3610 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3611 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3612 		} else {
3613 			sin6->sin6_scope_id = 0;
3614 		}
3615 		sin6->__sin6_src_id = 0;
3616 		addr = (struct sockaddr *)sin6;
3617 		addrlen = sizeof (*sin6);
3618 	}
3619 	mutex_exit(&connp->conn_lock);
3620 
3621 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3622 	if (mp1 != NULL)
3623 		putnext(connp->conn_rq, mp1);
3624 }
3625 
3626 /*
3627  * This routine handles all messages passed downstream.  It either
3628  * consumes the message or passes it downstream; it never queues a
3629  * a message.
3630  *
3631  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3632  * is valid when we are directly beneath the stream head, and thus sockfs
3633  * is able to bypass STREAMS and directly call us, passing along the sockaddr
3634  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3635  * connected endpoints.
3636  */
3637 void
3638 udp_wput(queue_t *q, mblk_t *mp)
3639 {
3640 	sin6_t		*sin6;
3641 	sin_t		*sin = NULL;
3642 	uint_t		srcid;
3643 	conn_t		*connp = Q_TO_CONN(q);
3644 	udp_t		*udp = connp->conn_udp;
3645 	int		error = 0;
3646 	struct sockaddr	*addr = NULL;
3647 	socklen_t	addrlen;
3648 	udp_stack_t	*us = udp->udp_us;
3649 	struct T_unitdata_req *tudr;
3650 	mblk_t		*data_mp;
3651 	ushort_t	ipversion;
3652 	cred_t		*cr;
3653 	pid_t		pid;
3654 
3655 	/*
3656 	 * We directly handle several cases here: T_UNITDATA_REQ message
3657 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3658 	 * socket.
3659 	 */
3660 	switch (DB_TYPE(mp)) {
3661 	case M_DATA:
3662 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3663 			/* Not connected; address is required */
3664 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3665 			UDP_DBGSTAT(us, udp_data_notconn);
3666 			UDP_STAT(us, udp_out_err_notconn);
3667 			freemsg(mp);
3668 			return;
3669 		}
3670 		/*
3671 		 * All Solaris components should pass a db_credp
3672 		 * for this message, hence we ASSERT.
3673 		 * On production kernels we return an error to be robust against
3674 		 * random streams modules sitting on top of us.
3675 		 */
3676 		cr = msg_getcred(mp, &pid);
3677 		ASSERT(cr != NULL);
3678 		if (cr == NULL) {
3679 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3680 			freemsg(mp);
3681 			return;
3682 		}
3683 		ASSERT(udp->udp_issocket);
3684 		UDP_DBGSTAT(us, udp_data_conn);
3685 		error = udp_output_connected(connp, mp, cr, pid);
3686 		if (error != 0) {
3687 			UDP_STAT(us, udp_out_err_output);
3688 			if (connp->conn_rq != NULL)
3689 				udp_ud_err_connected(connp, (t_scalar_t)error);
3690 #ifdef DEBUG
3691 			printf("udp_output_connected returned %d\n", error);
3692 #endif
3693 		}
3694 		return;
3695 
3696 	case M_PROTO:
3697 	case M_PCPROTO:
3698 		tudr = (struct T_unitdata_req *)mp->b_rptr;
3699 		if (MBLKL(mp) < sizeof (*tudr) ||
3700 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3701 			udp_wput_other(q, mp);
3702 			return;
3703 		}
3704 		break;
3705 
3706 	default:
3707 		udp_wput_other(q, mp);
3708 		return;
3709 	}
3710 
3711 	/* Handle valid T_UNITDATA_REQ here */
3712 	data_mp = mp->b_cont;
3713 	if (data_mp == NULL) {
3714 		error = EPROTO;
3715 		goto ud_error2;
3716 	}
3717 	mp->b_cont = NULL;
3718 
3719 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3720 		error = EADDRNOTAVAIL;
3721 		goto ud_error2;
3722 	}
3723 
3724 	/*
3725 	 * All Solaris components should pass a db_credp
3726 	 * for this TPI message, hence we should ASSERT.
3727 	 * However, RPC (svc_clts_ksend) does this odd thing where it
3728 	 * passes the options from a T_UNITDATA_IND unchanged in a
3729 	 * T_UNITDATA_REQ. While that is the right thing to do for
3730 	 * some options, SCM_UCRED being the key one, this also makes it
3731 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3732 	 */
3733 	cr = msg_getcred(mp, &pid);
3734 	if (cr == NULL) {
3735 		cr = connp->conn_cred;
3736 		pid = connp->conn_cpid;
3737 	}
3738 
3739 	/*
3740 	 * If a port has not been bound to the stream, fail.
3741 	 * This is not a problem when sockfs is directly
3742 	 * above us, because it will ensure that the socket
3743 	 * is first bound before allowing data to be sent.
3744 	 */
3745 	if (udp->udp_state == TS_UNBND) {
3746 		error = EPROTO;
3747 		goto ud_error2;
3748 	}
3749 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3750 	addrlen = tudr->DEST_length;
3751 
3752 	switch (connp->conn_family) {
3753 	case AF_INET6:
3754 		sin6 = (sin6_t *)addr;
3755 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3756 		    (sin6->sin6_family != AF_INET6)) {
3757 			error = EADDRNOTAVAIL;
3758 			goto ud_error2;
3759 		}
3760 
3761 		srcid = sin6->__sin6_src_id;
3762 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3763 			/*
3764 			 * Destination is a non-IPv4-compatible IPv6 address.
3765 			 * Send out an IPv6 format packet.
3766 			 */
3767 
3768 			/*
3769 			 * If the local address is a mapped address return
3770 			 * an error.
3771 			 * It would be possible to send an IPv6 packet but the
3772 			 * response would never make it back to the application
3773 			 * since it is bound to a mapped address.
3774 			 */
3775 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3776 				error = EADDRNOTAVAIL;
3777 				goto ud_error2;
3778 			}
3779 
3780 			UDP_DBGSTAT(us, udp_out_ipv6);
3781 
3782 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3783 				sin6->sin6_addr = ipv6_loopback;
3784 			ipversion = IPV6_VERSION;
3785 		} else {
3786 			if (connp->conn_ipv6_v6only) {
3787 				error = EADDRNOTAVAIL;
3788 				goto ud_error2;
3789 			}
3790 
3791 			/*
3792 			 * If the local address is not zero or a mapped address
3793 			 * return an error.  It would be possible to send an
3794 			 * IPv4 packet but the response would never make it
3795 			 * back to the application since it is bound to a
3796 			 * non-mapped address.
3797 			 */
3798 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3799 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3800 				error = EADDRNOTAVAIL;
3801 				goto ud_error2;
3802 			}
3803 			UDP_DBGSTAT(us, udp_out_mapped);
3804 
3805 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3806 				V4_PART_OF_V6(sin6->sin6_addr) =
3807 				    htonl(INADDR_LOOPBACK);
3808 			}
3809 			ipversion = IPV4_VERSION;
3810 		}
3811 
3812 		if (tudr->OPT_length != 0) {
3813 			/*
3814 			 * If we are connected then the destination needs to be
3815 			 * the same as the connected one.
3816 			 */
3817 			if (udp->udp_state == TS_DATA_XFER &&
3818 			    !conn_same_as_last_v6(connp, sin6)) {
3819 				error = EISCONN;
3820 				goto ud_error2;
3821 			}
3822 			UDP_STAT(us, udp_out_opt);
3823 			error = udp_output_ancillary(connp, NULL, sin6,
3824 			    data_mp, mp, NULL, cr, pid);
3825 		} else {
3826 			ip_xmit_attr_t *ixa;
3827 
3828 			/*
3829 			 * We have to allocate an ip_xmit_attr_t before we grab
3830 			 * conn_lock and we need to hold conn_lock once we've
3831 			 * checked conn_same_as_last_v6 to handle concurrent
3832 			 * send* calls on a socket.
3833 			 */
3834 			ixa = conn_get_ixa(connp, B_FALSE);
3835 			if (ixa == NULL) {
3836 				error = ENOMEM;
3837 				goto ud_error2;
3838 			}
3839 			mutex_enter(&connp->conn_lock);
3840 
3841 			if (conn_same_as_last_v6(connp, sin6) &&
3842 			    connp->conn_lastsrcid == srcid &&
3843 			    ipsec_outbound_policy_current(ixa)) {
3844 				UDP_DBGSTAT(us, udp_out_lastdst);
3845 				/* udp_output_lastdst drops conn_lock */
3846 				error = udp_output_lastdst(connp, data_mp, cr,
3847 				    pid, ixa);
3848 			} else {
3849 				UDP_DBGSTAT(us, udp_out_diffdst);
3850 				/* udp_output_newdst drops conn_lock */
3851 				error = udp_output_newdst(connp, data_mp, NULL,
3852 				    sin6, ipversion, cr, pid, ixa);
3853 			}
3854 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3855 		}
3856 		if (error == 0) {
3857 			freeb(mp);
3858 			return;
3859 		}
3860 		break;
3861 
3862 	case AF_INET:
3863 		sin = (sin_t *)addr;
3864 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3865 		    (sin->sin_family != AF_INET)) {
3866 			error = EADDRNOTAVAIL;
3867 			goto ud_error2;
3868 		}
3869 		UDP_DBGSTAT(us, udp_out_ipv4);
3870 		if (sin->sin_addr.s_addr == INADDR_ANY)
3871 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3872 		ipversion = IPV4_VERSION;
3873 
3874 		srcid = 0;
3875 		if (tudr->OPT_length != 0) {
3876 			/*
3877 			 * If we are connected then the destination needs to be
3878 			 * the same as the connected one.
3879 			 */
3880 			if (udp->udp_state == TS_DATA_XFER &&
3881 			    !conn_same_as_last_v4(connp, sin)) {
3882 				error = EISCONN;
3883 				goto ud_error2;
3884 			}
3885 			UDP_STAT(us, udp_out_opt);
3886 			error = udp_output_ancillary(connp, sin, NULL,
3887 			    data_mp, mp, NULL, cr, pid);
3888 		} else {
3889 			ip_xmit_attr_t *ixa;
3890 
3891 			/*
3892 			 * We have to allocate an ip_xmit_attr_t before we grab
3893 			 * conn_lock and we need to hold conn_lock once we've
3894 			 * checked conn_same_as_last_v4 to handle concurrent
3895 			 * send* calls on a socket.
3896 			 */
3897 			ixa = conn_get_ixa(connp, B_FALSE);
3898 			if (ixa == NULL) {
3899 				error = ENOMEM;
3900 				goto ud_error2;
3901 			}
3902 			mutex_enter(&connp->conn_lock);
3903 
3904 			if (conn_same_as_last_v4(connp, sin) &&
3905 			    ipsec_outbound_policy_current(ixa)) {
3906 				UDP_DBGSTAT(us, udp_out_lastdst);
3907 				/* udp_output_lastdst drops conn_lock */
3908 				error = udp_output_lastdst(connp, data_mp, cr,
3909 				    pid, ixa);
3910 			} else {
3911 				UDP_DBGSTAT(us, udp_out_diffdst);
3912 				/* udp_output_newdst drops conn_lock */
3913 				error = udp_output_newdst(connp, data_mp, sin,
3914 				    NULL, ipversion, cr, pid, ixa);
3915 			}
3916 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3917 		}
3918 		if (error == 0) {
3919 			freeb(mp);
3920 			return;
3921 		}
3922 		break;
3923 	}
3924 	UDP_STAT(us, udp_out_err_output);
3925 	ASSERT(mp != NULL);
3926 	/* mp is freed by the following routine */
3927 	udp_ud_err(q, mp, (t_scalar_t)error);
3928 	return;
3929 
3930 ud_error2:
3931 	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3932 	freemsg(data_mp);
3933 	UDP_STAT(us, udp_out_err_output);
3934 	ASSERT(mp != NULL);
3935 	/* mp is freed by the following routine */
3936 	udp_ud_err(q, mp, (t_scalar_t)error);
3937 }
3938 
3939 /*
3940  * Handle the case of the IP address, port, flow label being different
3941  * for both IPv4 and IPv6.
3942  *
3943  * NOTE: The caller must hold conn_lock and we drop it here.
3944  */
3945 static int
3946 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3947     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3948 {
3949 	uint_t		srcid;
3950 	uint32_t	flowinfo;
3951 	udp_t		*udp = connp->conn_udp;
3952 	int		error = 0;
3953 	ip_xmit_attr_t	*oldixa;
3954 	udp_stack_t	*us = udp->udp_us;
3955 	in6_addr_t	v6src;
3956 	in6_addr_t	v6dst;
3957 	in6_addr_t	v6nexthop;
3958 	in_port_t	dstport;
3959 
3960 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3961 	ASSERT(ixa != NULL);
3962 	/*
3963 	 * We hold conn_lock across all the use and modifications of
3964 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3965 	 * stay consistent.
3966 	 */
3967 
3968 	ASSERT(cr != NULL);
3969 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3970 	ixa->ixa_cred = cr;
3971 	ixa->ixa_cpid = pid;
3972 	if (is_system_labeled()) {
3973 		/* We need to restart with a label based on the cred */
3974 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3975 	}
3976 
3977 	/*
3978 	 * If we are connected then the destination needs to be the
3979 	 * same as the connected one, which is not the case here since we
3980 	 * checked for that above.
3981 	 */
3982 	if (udp->udp_state == TS_DATA_XFER) {
3983 		mutex_exit(&connp->conn_lock);
3984 		error = EISCONN;
3985 		goto ud_error;
3986 	}
3987 
3988 	/* In case previous destination was multicast or multirt */
3989 	ip_attr_newdst(ixa);
3990 
3991 	/*
3992 	 * If laddr is unspecified then we look at sin6_src_id.
3993 	 * We will give precedence to a source address set with IPV6_PKTINFO
3994 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3995 	 * want ip_attr_connect to select a source (since it can fail) when
3996 	 * IPV6_PKTINFO is specified.
3997 	 * If this doesn't result in a source address then we get a source
3998 	 * from ip_attr_connect() below.
3999 	 */
4000 	v6src = connp->conn_saddr_v6;
4001 	if (sin != NULL) {
4002 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
4003 		dstport = sin->sin_port;
4004 		flowinfo = 0;
4005 		srcid = 0;
4006 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
4007 		if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
4008 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
4009 			    connp->conn_netstack);
4010 		}
4011 		ixa->ixa_flags |= IXAF_IS_IPV4;
4012 	} else {
4013 		v6dst = sin6->sin6_addr;
4014 		dstport = sin6->sin6_port;
4015 		flowinfo = sin6->sin6_flowinfo;
4016 		srcid = sin6->__sin6_src_id;
4017 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
4018 			ixa->ixa_scopeid = sin6->sin6_scope_id;
4019 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
4020 		} else {
4021 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
4022 		}
4023 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4024 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
4025 			    connp->conn_netstack);
4026 		}
4027 		if (IN6_IS_ADDR_V4MAPPED(&v6dst))
4028 			ixa->ixa_flags |= IXAF_IS_IPV4;
4029 		else
4030 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
4031 	}
4032 	/* Handle IPV6_PKTINFO setting source address. */
4033 	if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
4034 	    (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) {
4035 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
4036 
4037 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4038 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4039 				v6src = ipp->ipp_addr;
4040 		} else {
4041 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4042 				v6src = ipp->ipp_addr;
4043 		}
4044 	}
4045 
4046 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
4047 	mutex_exit(&connp->conn_lock);
4048 
4049 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
4050 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
4051 	switch (error) {
4052 	case 0:
4053 		break;
4054 	case EADDRNOTAVAIL:
4055 		/*
4056 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4057 		 * Don't have the application see that errno
4058 		 */
4059 		error = ENETUNREACH;
4060 		goto failed;
4061 	case ENETDOWN:
4062 		/*
4063 		 * Have !ipif_addr_ready address; drop packet silently
4064 		 * until we can get applications to not send until we
4065 		 * are ready.
4066 		 */
4067 		error = 0;
4068 		goto failed;
4069 	case EHOSTUNREACH:
4070 	case ENETUNREACH:
4071 		if (ixa->ixa_ire != NULL) {
4072 			/*
4073 			 * Let conn_ip_output/ire_send_noroute return
4074 			 * the error and send any local ICMP error.
4075 			 */
4076 			error = 0;
4077 			break;
4078 		}
4079 		/* FALLTHRU */
4080 	failed:
4081 	default:
4082 		goto ud_error;
4083 	}
4084 
4085 
4086 	/*
4087 	 * Cluster note: we let the cluster hook know that we are sending to a
4088 	 * new address and/or port.
4089 	 */
4090 	if (cl_inet_connect2 != NULL) {
4091 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
4092 		if (error != 0) {
4093 			error = EHOSTUNREACH;
4094 			goto ud_error;
4095 		}
4096 	}
4097 
4098 	mutex_enter(&connp->conn_lock);
4099 	/*
4100 	 * While we dropped the lock some other thread might have connected
4101 	 * this socket. If so we bail out with EISCONN to ensure that the
4102 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
4103 	 * and conn_*last*.
4104 	 */
4105 	if (udp->udp_state == TS_DATA_XFER) {
4106 		mutex_exit(&connp->conn_lock);
4107 		error = EISCONN;
4108 		goto ud_error;
4109 	}
4110 
4111 	/*
4112 	 * We need to rebuild the headers if
4113 	 *  - we are labeling packets (could be different for different
4114 	 *    destinations)
4115 	 *  - we have a source route (or routing header) since we need to
4116 	 *    massage that to get the pseudo-header checksum
4117 	 *  - the IP version is different than the last time
4118 	 *  - a socket option with COA_HEADER_CHANGED has been set which
4119 	 *    set conn_v6lastdst to zero.
4120 	 *
4121 	 * Otherwise the prepend function will just update the src, dst,
4122 	 * dstport, and flow label.
4123 	 */
4124 	if (is_system_labeled()) {
4125 		/* TX MLP requires SCM_UCRED and don't have that here */
4126 		if (connp->conn_mlp_type != mlptSingle) {
4127 			mutex_exit(&connp->conn_lock);
4128 			error = ECONNREFUSED;
4129 			goto ud_error;
4130 		}
4131 		/*
4132 		 * Check whether Trusted Solaris policy allows communication
4133 		 * with this host, and pretend that the destination is
4134 		 * unreachable if not.
4135 		 * Compute any needed label and place it in ipp_label_v4/v6.
4136 		 *
4137 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
4138 		 * ipp_label_v4/v6 to form the packet.
4139 		 *
4140 		 * Tsol note: Since we hold conn_lock we know no other
4141 		 * thread manipulates conn_xmit_ipp.
4142 		 */
4143 		error = conn_update_label(connp, ixa, &v6dst,
4144 		    &connp->conn_xmit_ipp);
4145 		if (error != 0) {
4146 			mutex_exit(&connp->conn_lock);
4147 			goto ud_error;
4148 		}
4149 		/* Rebuild the header template */
4150 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4151 		    flowinfo);
4152 		if (error != 0) {
4153 			mutex_exit(&connp->conn_lock);
4154 			goto ud_error;
4155 		}
4156 	} else if ((connp->conn_xmit_ipp.ipp_fields &
4157 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4158 	    ipversion != connp->conn_lastipversion ||
4159 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4160 		/* Rebuild the header template */
4161 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4162 		    flowinfo);
4163 		if (error != 0) {
4164 			mutex_exit(&connp->conn_lock);
4165 			goto ud_error;
4166 		}
4167 	} else {
4168 		/* Simply update the destination address if no source route */
4169 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4170 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
4171 
4172 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4173 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4174 				ipha->ipha_fragment_offset_and_flags |=
4175 				    IPH_DF_HTONS;
4176 			} else {
4177 				ipha->ipha_fragment_offset_and_flags &=
4178 				    ~IPH_DF_HTONS;
4179 			}
4180 		} else {
4181 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4182 			ip6h->ip6_dst = v6dst;
4183 		}
4184 	}
4185 
4186 	/*
4187 	 * Remember the dst/dstport etc which corresponds to the built header
4188 	 * template and conn_ixa.
4189 	 */
4190 	oldixa = conn_replace_ixa(connp, ixa);
4191 	connp->conn_v6lastdst = v6dst;
4192 	connp->conn_lastipversion = ipversion;
4193 	connp->conn_lastdstport = dstport;
4194 	connp->conn_lastflowinfo = flowinfo;
4195 	connp->conn_lastscopeid = ixa->ixa_scopeid;
4196 	connp->conn_lastsrcid = srcid;
4197 	/* Also remember a source to use together with lastdst */
4198 	connp->conn_v6lastsrc = v6src;
4199 
4200 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4201 	    dstport, flowinfo, &error);
4202 
4203 	/* Done with conn_t */
4204 	mutex_exit(&connp->conn_lock);
4205 	ixa_refrele(oldixa);
4206 
4207 	if (data_mp == NULL) {
4208 		ASSERT(error != 0);
4209 		goto ud_error;
4210 	}
4211 
4212 	/* We're done.  Pass the packet to ip. */
4213 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
4214 
4215 	error = conn_ip_output(data_mp, ixa);
4216 	/* No udpOutErrors if an error since IP increases its error counter */
4217 	switch (error) {
4218 	case 0:
4219 		break;
4220 	case EWOULDBLOCK:
4221 		(void) ixa_check_drain_insert(connp, ixa);
4222 		error = 0;
4223 		break;
4224 	case EADDRNOTAVAIL:
4225 		/*
4226 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4227 		 * Don't have the application see that errno
4228 		 */
4229 		error = ENETUNREACH;
4230 		/* FALLTHRU */
4231 	default:
4232 		mutex_enter(&connp->conn_lock);
4233 		/*
4234 		 * Clear the source and v6lastdst so we call ip_attr_connect
4235 		 * for the next packet and try to pick a better source.
4236 		 */
4237 		if (connp->conn_mcbc_bind)
4238 			connp->conn_saddr_v6 = ipv6_all_zeros;
4239 		else
4240 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4241 		connp->conn_v6lastdst = ipv6_all_zeros;
4242 		mutex_exit(&connp->conn_lock);
4243 		break;
4244 	}
4245 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4246 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4247 	ixa->ixa_cpid = connp->conn_cpid;
4248 	ixa_refrele(ixa);
4249 	return (error);
4250 
4251 ud_error:
4252 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4253 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4254 	ixa->ixa_cpid = connp->conn_cpid;
4255 	ixa_refrele(ixa);
4256 
4257 	freemsg(data_mp);
4258 	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
4259 	UDP_STAT(us, udp_out_err_output);
4260 	return (error);
4261 }
4262 
4263 /* ARGSUSED */
4264 static void
4265 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4266 {
4267 #ifdef DEBUG
4268 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4269 #endif
4270 	freemsg(mp);
4271 }
4272 
4273 
4274 /*
4275  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4276  */
4277 static void
4278 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4279 {
4280 	void	*data;
4281 	mblk_t	*datamp = mp->b_cont;
4282 	conn_t	*connp = Q_TO_CONN(q);
4283 	udp_t	*udp = connp->conn_udp;
4284 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4285 
4286 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4287 		cmdp->cb_error = EPROTO;
4288 		qreply(q, mp);
4289 		return;
4290 	}
4291 	data = datamp->b_rptr;
4292 
4293 	mutex_enter(&connp->conn_lock);
4294 	switch (cmdp->cb_cmd) {
4295 	case TI_GETPEERNAME:
4296 		if (udp->udp_state != TS_DATA_XFER)
4297 			cmdp->cb_error = ENOTCONN;
4298 		else
4299 			cmdp->cb_error = conn_getpeername(connp, data,
4300 			    &cmdp->cb_len);
4301 		break;
4302 	case TI_GETMYNAME:
4303 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4304 		break;
4305 	default:
4306 		cmdp->cb_error = EINVAL;
4307 		break;
4308 	}
4309 	mutex_exit(&connp->conn_lock);
4310 
4311 	qreply(q, mp);
4312 }
4313 
4314 static void
4315 udp_use_pure_tpi(udp_t *udp)
4316 {
4317 	conn_t	*connp = udp->udp_connp;
4318 
4319 	mutex_enter(&connp->conn_lock);
4320 	udp->udp_issocket = B_FALSE;
4321 	mutex_exit(&connp->conn_lock);
4322 	UDP_STAT(udp->udp_us, udp_sock_fallback);
4323 }
4324 
4325 static void
4326 udp_wput_other(queue_t *q, mblk_t *mp)
4327 {
4328 	uchar_t	*rptr = mp->b_rptr;
4329 	struct iocblk *iocp;
4330 	conn_t	*connp = Q_TO_CONN(q);
4331 	udp_t	*udp = connp->conn_udp;
4332 	cred_t	*cr;
4333 
4334 	switch (mp->b_datap->db_type) {
4335 	case M_CMD:
4336 		udp_wput_cmdblk(q, mp);
4337 		return;
4338 
4339 	case M_PROTO:
4340 	case M_PCPROTO:
4341 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4342 			/*
4343 			 * If the message does not contain a PRIM_type,
4344 			 * throw it away.
4345 			 */
4346 			freemsg(mp);
4347 			return;
4348 		}
4349 		switch (((t_primp_t)rptr)->type) {
4350 		case T_ADDR_REQ:
4351 			udp_addr_req(q, mp);
4352 			return;
4353 		case O_T_BIND_REQ:
4354 		case T_BIND_REQ:
4355 			udp_tpi_bind(q, mp);
4356 			return;
4357 		case T_CONN_REQ:
4358 			udp_tpi_connect(q, mp);
4359 			return;
4360 		case T_CAPABILITY_REQ:
4361 			udp_capability_req(q, mp);
4362 			return;
4363 		case T_INFO_REQ:
4364 			udp_info_req(q, mp);
4365 			return;
4366 		case T_UNITDATA_REQ:
4367 			/*
4368 			 * If a T_UNITDATA_REQ gets here, the address must
4369 			 * be bad.  Valid T_UNITDATA_REQs are handled
4370 			 * in udp_wput.
4371 			 */
4372 			udp_ud_err(q, mp, EADDRNOTAVAIL);
4373 			return;
4374 		case T_UNBIND_REQ:
4375 			udp_tpi_unbind(q, mp);
4376 			return;
4377 		case T_SVR4_OPTMGMT_REQ:
4378 			/*
4379 			 * All Solaris components should pass a db_credp
4380 			 * for this TPI message, hence we ASSERT.
4381 			 * But in case there is some other M_PROTO that looks
4382 			 * like a TPI message sent by some other kernel
4383 			 * component, we check and return an error.
4384 			 */
4385 			cr = msg_getcred(mp, NULL);
4386 			ASSERT(cr != NULL);
4387 			if (cr == NULL) {
4388 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4389 				return;
4390 			}
4391 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4392 			    cr)) {
4393 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4394 			}
4395 			return;
4396 
4397 		case T_OPTMGMT_REQ:
4398 			/*
4399 			 * All Solaris components should pass a db_credp
4400 			 * for this TPI message, hence we ASSERT.
4401 			 * But in case there is some other M_PROTO that looks
4402 			 * like a TPI message sent by some other kernel
4403 			 * component, we check and return an error.
4404 			 */
4405 			cr = msg_getcred(mp, NULL);
4406 			ASSERT(cr != NULL);
4407 			if (cr == NULL) {
4408 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4409 				return;
4410 			}
4411 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4412 			return;
4413 
4414 		case T_DISCON_REQ:
4415 			udp_tpi_disconnect(q, mp);
4416 			return;
4417 
4418 		/* The following TPI message is not supported by udp. */
4419 		case O_T_CONN_RES:
4420 		case T_CONN_RES:
4421 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4422 			return;
4423 
4424 		/* The following 3 TPI requests are illegal for udp. */
4425 		case T_DATA_REQ:
4426 		case T_EXDATA_REQ:
4427 		case T_ORDREL_REQ:
4428 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4429 			return;
4430 		default:
4431 			break;
4432 		}
4433 		break;
4434 	case M_FLUSH:
4435 		if (*rptr & FLUSHW)
4436 			flushq(q, FLUSHDATA);
4437 		break;
4438 	case M_IOCTL:
4439 		iocp = (struct iocblk *)mp->b_rptr;
4440 		switch (iocp->ioc_cmd) {
4441 		case TI_GETPEERNAME:
4442 			if (udp->udp_state != TS_DATA_XFER) {
4443 				/*
4444 				 * If a default destination address has not
4445 				 * been associated with the stream, then we
4446 				 * don't know the peer's name.
4447 				 */
4448 				iocp->ioc_error = ENOTCONN;
4449 				iocp->ioc_count = 0;
4450 				mp->b_datap->db_type = M_IOCACK;
4451 				qreply(q, mp);
4452 				return;
4453 			}
4454 			/* FALLTHRU */
4455 		case TI_GETMYNAME:
4456 			/*
4457 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4458 			 * need to copyin the user's strbuf structure.
4459 			 * Processing will continue in the M_IOCDATA case
4460 			 * below.
4461 			 */
4462 			mi_copyin(q, mp, NULL,
4463 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4464 			return;
4465 		case _SIOCSOCKFALLBACK:
4466 			/*
4467 			 * Either sockmod is about to be popped and the
4468 			 * socket would now be treated as a plain stream,
4469 			 * or a module is about to be pushed so we have
4470 			 * to follow pure TPI semantics.
4471 			 */
4472 			if (!udp->udp_issocket) {
4473 				DB_TYPE(mp) = M_IOCNAK;
4474 				iocp->ioc_error = EINVAL;
4475 			} else {
4476 				udp_use_pure_tpi(udp);
4477 
4478 				DB_TYPE(mp) = M_IOCACK;
4479 				iocp->ioc_error = 0;
4480 			}
4481 			iocp->ioc_count = 0;
4482 			iocp->ioc_rval = 0;
4483 			qreply(q, mp);
4484 			return;
4485 		default:
4486 			break;
4487 		}
4488 		break;
4489 	case M_IOCDATA:
4490 		udp_wput_iocdata(q, mp);
4491 		return;
4492 	default:
4493 		/* Unrecognized messages are passed through without change. */
4494 		break;
4495 	}
4496 	ip_wput_nondata(q, mp);
4497 }
4498 
4499 /*
4500  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4501  * messages.
4502  */
4503 static void
4504 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4505 {
4506 	mblk_t		*mp1;
4507 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
4508 	STRUCT_HANDLE(strbuf, sb);
4509 	uint_t		addrlen;
4510 	conn_t		*connp = Q_TO_CONN(q);
4511 	udp_t		*udp = connp->conn_udp;
4512 
4513 	/* Make sure it is one of ours. */
4514 	switch (iocp->ioc_cmd) {
4515 	case TI_GETMYNAME:
4516 	case TI_GETPEERNAME:
4517 		break;
4518 	default:
4519 		ip_wput_nondata(q, mp);
4520 		return;
4521 	}
4522 
4523 	switch (mi_copy_state(q, mp, &mp1)) {
4524 	case -1:
4525 		return;
4526 	case MI_COPY_CASE(MI_COPY_IN, 1):
4527 		break;
4528 	case MI_COPY_CASE(MI_COPY_OUT, 1):
4529 		/*
4530 		 * The address has been copied out, so now
4531 		 * copyout the strbuf.
4532 		 */
4533 		mi_copyout(q, mp);
4534 		return;
4535 	case MI_COPY_CASE(MI_COPY_OUT, 2):
4536 		/*
4537 		 * The address and strbuf have been copied out.
4538 		 * We're done, so just acknowledge the original
4539 		 * M_IOCTL.
4540 		 */
4541 		mi_copy_done(q, mp, 0);
4542 		return;
4543 	default:
4544 		/*
4545 		 * Something strange has happened, so acknowledge
4546 		 * the original M_IOCTL with an EPROTO error.
4547 		 */
4548 		mi_copy_done(q, mp, EPROTO);
4549 		return;
4550 	}
4551 
4552 	/*
4553 	 * Now we have the strbuf structure for TI_GETMYNAME
4554 	 * and TI_GETPEERNAME.  Next we copyout the requested
4555 	 * address and then we'll copyout the strbuf.
4556 	 */
4557 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4558 
4559 	if (connp->conn_family == AF_INET)
4560 		addrlen = sizeof (sin_t);
4561 	else
4562 		addrlen = sizeof (sin6_t);
4563 
4564 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
4565 		mi_copy_done(q, mp, EINVAL);
4566 		return;
4567 	}
4568 
4569 	switch (iocp->ioc_cmd) {
4570 	case TI_GETMYNAME:
4571 		break;
4572 	case TI_GETPEERNAME:
4573 		if (udp->udp_state != TS_DATA_XFER) {
4574 			mi_copy_done(q, mp, ENOTCONN);
4575 			return;
4576 		}
4577 		break;
4578 	}
4579 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4580 	if (!mp1)
4581 		return;
4582 
4583 	STRUCT_FSET(sb, len, addrlen);
4584 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4585 	case TI_GETMYNAME:
4586 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4587 		    &addrlen);
4588 		break;
4589 	case TI_GETPEERNAME:
4590 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4591 		    &addrlen);
4592 		break;
4593 	}
4594 	mp1->b_wptr += addrlen;
4595 	/* Copy out the address */
4596 	mi_copyout(q, mp);
4597 }
4598 
4599 void
4600 udp_ddi_g_init(void)
4601 {
4602 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4603 	    udp_opt_obj.odb_opt_arr_cnt);
4604 
4605 	/*
4606 	 * We want to be informed each time a stack is created or
4607 	 * destroyed in the kernel, so we can maintain the
4608 	 * set of udp_stack_t's.
4609 	 */
4610 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4611 }
4612 
4613 void
4614 udp_ddi_g_destroy(void)
4615 {
4616 	netstack_unregister(NS_UDP);
4617 }
4618 
4619 #define	INET_NAME	"ip"
4620 
4621 /*
4622  * Initialize the UDP stack instance.
4623  */
4624 static void *
4625 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4626 {
4627 	udp_stack_t	*us;
4628 	int		i;
4629 	int		error = 0;
4630 	major_t		major;
4631 	size_t		arrsz;
4632 
4633 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4634 	us->us_netstack = ns;
4635 
4636 	mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4637 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4638 	us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4639 	us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4640 
4641 	/*
4642 	 * The smallest anonymous port in the priviledged port range which UDP
4643 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4644 	 */
4645 	us->us_min_anonpriv_port = 512;
4646 
4647 	us->us_bind_fanout_size = udp_bind_fanout_size;
4648 
4649 	/* Roundup variable that might have been modified in /etc/system */
4650 	if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
4651 		/* Not a power of two. Round up to nearest power of two */
4652 		for (i = 0; i < 31; i++) {
4653 			if (us->us_bind_fanout_size < (1 << i))
4654 				break;
4655 		}
4656 		us->us_bind_fanout_size = 1 << i;
4657 	}
4658 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4659 	    sizeof (udp_fanout_t), KM_SLEEP);
4660 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4661 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4662 		    NULL);
4663 	}
4664 
4665 	arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4666 	us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4667 	    KM_SLEEP);
4668 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4669 
4670 	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
4671 	us->us_mibkp = udp_kstat_init(stackid);
4672 
4673 	major = mod_name_to_major(INET_NAME);
4674 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
4675 	ASSERT(error == 0);
4676 	return (us);
4677 }
4678 
4679 /*
4680  * Free the UDP stack instance.
4681  */
4682 static void
4683 udp_stack_fini(netstackid_t stackid, void *arg)
4684 {
4685 	udp_stack_t *us = (udp_stack_t *)arg;
4686 	int i;
4687 
4688 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4689 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4690 	}
4691 
4692 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4693 	    sizeof (udp_fanout_t));
4694 
4695 	us->us_bind_fanout = NULL;
4696 
4697 	kmem_free(us->us_propinfo_tbl,
4698 	    udp_propinfo_count * sizeof (mod_prop_info_t));
4699 	us->us_propinfo_tbl = NULL;
4700 
4701 	udp_kstat_fini(stackid, us->us_mibkp);
4702 	us->us_mibkp = NULL;
4703 
4704 	udp_kstat2_fini(stackid, us->us_kstat);
4705 	us->us_kstat = NULL;
4706 	bzero(&us->us_statistics, sizeof (us->us_statistics));
4707 
4708 	mutex_destroy(&us->us_epriv_port_lock);
4709 	ldi_ident_release(us->us_ldi_ident);
4710 	kmem_free(us, sizeof (*us));
4711 }
4712 
4713 static void *
4714 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
4715 {
4716 	kstat_t *ksp;
4717 
4718 	udp_stat_t template = {
4719 		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
4720 		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
4721 		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
4722 		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
4723 		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
4724 #ifdef DEBUG
4725 		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
4726 		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
4727 		{ "udp_out_lastdst",		KSTAT_DATA_UINT64 },
4728 		{ "udp_out_diffdst",		KSTAT_DATA_UINT64 },
4729 		{ "udp_out_ipv6",		KSTAT_DATA_UINT64 },
4730 		{ "udp_out_mapped",		KSTAT_DATA_UINT64 },
4731 		{ "udp_out_ipv4",		KSTAT_DATA_UINT64 },
4732 #endif
4733 	};
4734 
4735 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
4736 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4737 	    KSTAT_FLAG_VIRTUAL, stackid);
4738 
4739 	if (ksp == NULL)
4740 		return (NULL);
4741 
4742 	bcopy(&template, us_statisticsp, sizeof (template));
4743 	ksp->ks_data = (void *)us_statisticsp;
4744 	ksp->ks_private = (void *)(uintptr_t)stackid;
4745 
4746 	kstat_install(ksp);
4747 	return (ksp);
4748 }
4749 
4750 static void
4751 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
4752 {
4753 	if (ksp != NULL) {
4754 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4755 		kstat_delete_netstack(ksp, stackid);
4756 	}
4757 }
4758 
4759 static void *
4760 udp_kstat_init(netstackid_t stackid)
4761 {
4762 	kstat_t	*ksp;
4763 
4764 	udp_named_kstat_t template = {
4765 		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
4766 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
4767 		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
4768 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
4769 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
4770 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
4771 	};
4772 
4773 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
4774 	    KSTAT_TYPE_NAMED,
4775 	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
4776 
4777 	if (ksp == NULL || ksp->ks_data == NULL)
4778 		return (NULL);
4779 
4780 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
4781 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
4782 
4783 	bcopy(&template, ksp->ks_data, sizeof (template));
4784 	ksp->ks_update = udp_kstat_update;
4785 	ksp->ks_private = (void *)(uintptr_t)stackid;
4786 
4787 	kstat_install(ksp);
4788 	return (ksp);
4789 }
4790 
4791 static void
4792 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4793 {
4794 	if (ksp != NULL) {
4795 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4796 		kstat_delete_netstack(ksp, stackid);
4797 	}
4798 }
4799 
4800 static int
4801 udp_kstat_update(kstat_t *kp, int rw)
4802 {
4803 	udp_named_kstat_t *udpkp;
4804 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
4805 	netstack_t	*ns;
4806 	udp_stack_t	*us;
4807 
4808 	if ((kp == NULL) || (kp->ks_data == NULL))
4809 		return (EIO);
4810 
4811 	if (rw == KSTAT_WRITE)
4812 		return (EACCES);
4813 
4814 	ns = netstack_find_by_stackid(stackid);
4815 	if (ns == NULL)
4816 		return (-1);
4817 	us = ns->netstack_udp;
4818 	if (us == NULL) {
4819 		netstack_rele(ns);
4820 		return (-1);
4821 	}
4822 	udpkp = (udp_named_kstat_t *)kp->ks_data;
4823 
4824 	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
4825 	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
4826 	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
4827 	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
4828 	netstack_rele(ns);
4829 	return (0);
4830 }
4831 
4832 static size_t
4833 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4834 {
4835 	udp_stack_t *us = udp->udp_us;
4836 
4837 	/* We add a bit of extra buffering */
4838 	size += size >> 1;
4839 	if (size > us->us_max_buf)
4840 		size = us->us_max_buf;
4841 
4842 	udp->udp_rcv_hiwat = size;
4843 	return (size);
4844 }
4845 
4846 /*
4847  * For the lower queue so that UDP can be a dummy mux.
4848  * Nobody should be sending
4849  * packets up this stream
4850  */
4851 static void
4852 udp_lrput(queue_t *q, mblk_t *mp)
4853 {
4854 	switch (mp->b_datap->db_type) {
4855 	case M_FLUSH:
4856 		/* Turn around */
4857 		if (*mp->b_rptr & FLUSHW) {
4858 			*mp->b_rptr &= ~FLUSHR;
4859 			qreply(q, mp);
4860 			return;
4861 		}
4862 		break;
4863 	}
4864 	freemsg(mp);
4865 }
4866 
4867 /*
4868  * For the lower queue so that UDP can be a dummy mux.
4869  * Nobody should be sending packets down this stream.
4870  */
4871 /* ARGSUSED */
4872 void
4873 udp_lwput(queue_t *q, mblk_t *mp)
4874 {
4875 	freemsg(mp);
4876 }
4877 
4878 /*
4879  * Below routines for UDP socket module.
4880  */
4881 
4882 static conn_t *
4883 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4884 {
4885 	udp_t		*udp;
4886 	conn_t		*connp;
4887 	zoneid_t 	zoneid;
4888 	netstack_t 	*ns;
4889 	udp_stack_t 	*us;
4890 	int		len;
4891 
4892 	ASSERT(errorp != NULL);
4893 
4894 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4895 		return (NULL);
4896 
4897 	ns = netstack_find_by_cred(credp);
4898 	ASSERT(ns != NULL);
4899 	us = ns->netstack_udp;
4900 	ASSERT(us != NULL);
4901 
4902 	/*
4903 	 * For exclusive stacks we set the zoneid to zero
4904 	 * to make UDP operate as if in the global zone.
4905 	 */
4906 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4907 		zoneid = GLOBAL_ZONEID;
4908 	else
4909 		zoneid = crgetzoneid(credp);
4910 
4911 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4912 
4913 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4914 	if (connp == NULL) {
4915 		netstack_rele(ns);
4916 		*errorp = ENOMEM;
4917 		return (NULL);
4918 	}
4919 	udp = connp->conn_udp;
4920 
4921 	/*
4922 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4923 	 * done by netstack_find_by_cred()
4924 	 */
4925 	netstack_rele(ns);
4926 
4927 	/*
4928 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4929 	 * need to lock anything.
4930 	 */
4931 	ASSERT(connp->conn_proto == IPPROTO_UDP);
4932 	ASSERT(connp->conn_udp == udp);
4933 	ASSERT(udp->udp_connp == connp);
4934 
4935 	/* Set the initial state of the stream and the privilege status. */
4936 	udp->udp_state = TS_UNBND;
4937 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4938 	if (isv6) {
4939 		connp->conn_family = AF_INET6;
4940 		connp->conn_ipversion = IPV6_VERSION;
4941 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4942 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
4943 		len = sizeof (ip6_t) + UDPH_SIZE;
4944 	} else {
4945 		connp->conn_family = AF_INET;
4946 		connp->conn_ipversion = IPV4_VERSION;
4947 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4948 		connp->conn_default_ttl = us->us_ipv4_ttl;
4949 		len = sizeof (ipha_t) + UDPH_SIZE;
4950 	}
4951 
4952 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4953 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4954 
4955 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4956 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4957 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4958 	connp->conn_ixa->ixa_zoneid = zoneid;
4959 
4960 	connp->conn_zoneid = zoneid;
4961 
4962 	/*
4963 	 * If the caller has the process-wide flag set, then default to MAC
4964 	 * exempt mode.  This allows read-down to unlabeled hosts.
4965 	 */
4966 	if (getpflags(NET_MAC_AWARE, credp) != 0)
4967 		connp->conn_mac_mode = CONN_MAC_AWARE;
4968 
4969 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4970 
4971 	udp->udp_us = us;
4972 
4973 	connp->conn_rcvbuf = us->us_recv_hiwat;
4974 	connp->conn_sndbuf = us->us_xmit_hiwat;
4975 	connp->conn_sndlowat = us->us_xmit_lowat;
4976 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4977 
4978 	connp->conn_wroff = len + us->us_wroff_extra;
4979 	connp->conn_so_type = SOCK_DGRAM;
4980 
4981 	connp->conn_recv = udp_input;
4982 	connp->conn_recvicmp = udp_icmp_input;
4983 	crhold(credp);
4984 	connp->conn_cred = credp;
4985 	connp->conn_cpid = curproc->p_pid;
4986 	connp->conn_open_time = ddi_get_lbolt64();
4987 	/* Cache things in ixa without an extra refhold */
4988 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4989 	connp->conn_ixa->ixa_cred = connp->conn_cred;
4990 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4991 	if (is_system_labeled())
4992 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4993 
4994 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4995 
4996 	if (us->us_pmtu_discovery)
4997 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4998 
4999 	return (connp);
5000 }
5001 
5002 sock_lower_handle_t
5003 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
5004     uint_t *smodep, int *errorp, int flags, cred_t *credp)
5005 {
5006 	udp_t		*udp = NULL;
5007 	udp_stack_t	*us;
5008 	conn_t		*connp;
5009 	boolean_t	isv6;
5010 
5011 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
5012 	    (proto != 0 && proto != IPPROTO_UDP)) {
5013 		*errorp = EPROTONOSUPPORT;
5014 		return (NULL);
5015 	}
5016 
5017 	if (family == AF_INET6)
5018 		isv6 = B_TRUE;
5019 	else
5020 		isv6 = B_FALSE;
5021 
5022 	connp = udp_do_open(credp, isv6, flags, errorp);
5023 	if (connp == NULL)
5024 		return (NULL);
5025 
5026 	udp = connp->conn_udp;
5027 	ASSERT(udp != NULL);
5028 	us = udp->udp_us;
5029 	ASSERT(us != NULL);
5030 
5031 	udp->udp_issocket = B_TRUE;
5032 	connp->conn_flags |= IPCL_NONSTR;
5033 
5034 	/*
5035 	 * Set flow control
5036 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
5037 	 * need to lock anything.
5038 	 */
5039 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
5040 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
5041 
5042 	connp->conn_flow_cntrld = B_FALSE;
5043 
5044 	mutex_enter(&connp->conn_lock);
5045 	connp->conn_state_flags &= ~CONN_INCIPIENT;
5046 	mutex_exit(&connp->conn_lock);
5047 
5048 	*errorp = 0;
5049 	*smodep = SM_ATOMIC;
5050 	*sock_downcalls = &sock_udp_downcalls;
5051 	return ((sock_lower_handle_t)connp);
5052 }
5053 
5054 /* ARGSUSED3 */
5055 void
5056 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
5057     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
5058 {
5059 	conn_t 		*connp = (conn_t *)proto_handle;
5060 	struct sock_proto_props sopp;
5061 
5062 	/* All Solaris components should pass a cred for this operation. */
5063 	ASSERT(cr != NULL);
5064 
5065 	connp->conn_upcalls = sock_upcalls;
5066 	connp->conn_upper_handle = sock_handle;
5067 
5068 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
5069 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
5070 	sopp.sopp_wroff = connp->conn_wroff;
5071 	sopp.sopp_maxblk = INFPSZ;
5072 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
5073 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
5074 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
5075 	sopp.sopp_maxpsz =
5076 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
5077 	    UDP_MAXPACKET_IPV6;
5078 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
5079 	    udp_mod_info.mi_minpsz;
5080 
5081 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
5082 	    &sopp);
5083 }
5084 
5085 static void
5086 udp_do_close(conn_t *connp)
5087 {
5088 	udp_t	*udp;
5089 
5090 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
5091 	udp = connp->conn_udp;
5092 
5093 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
5094 		/*
5095 		 * Running in cluster mode - register unbind information
5096 		 */
5097 		if (connp->conn_ipversion == IPV4_VERSION) {
5098 			(*cl_inet_unbind)(
5099 			    connp->conn_netstack->netstack_stackid,
5100 			    IPPROTO_UDP, AF_INET,
5101 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5102 			    (in_port_t)connp->conn_lport, NULL);
5103 		} else {
5104 			(*cl_inet_unbind)(
5105 			    connp->conn_netstack->netstack_stackid,
5106 			    IPPROTO_UDP, AF_INET6,
5107 			    (uint8_t *)&(connp->conn_laddr_v6),
5108 			    (in_port_t)connp->conn_lport, NULL);
5109 		}
5110 	}
5111 
5112 	udp_bind_hash_remove(udp, B_FALSE);
5113 
5114 	ip_quiesce_conn(connp);
5115 
5116 	if (!IPCL_IS_NONSTR(connp)) {
5117 		ASSERT(connp->conn_wq != NULL);
5118 		ASSERT(connp->conn_rq != NULL);
5119 		qprocsoff(connp->conn_rq);
5120 	}
5121 
5122 	udp_close_free(connp);
5123 
5124 	/*
5125 	 * Now we are truly single threaded on this stream, and can
5126 	 * delete the things hanging off the connp, and finally the connp.
5127 	 * We removed this connp from the fanout list, it cannot be
5128 	 * accessed thru the fanouts, and we already waited for the
5129 	 * conn_ref to drop to 0. We are already in close, so
5130 	 * there cannot be any other thread from the top. qprocsoff
5131 	 * has completed, and service has completed or won't run in
5132 	 * future.
5133 	 */
5134 	ASSERT(connp->conn_ref == 1);
5135 
5136 	if (!IPCL_IS_NONSTR(connp)) {
5137 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
5138 	} else {
5139 		ip_free_helper_stream(connp);
5140 	}
5141 
5142 	connp->conn_ref--;
5143 	ipcl_conn_destroy(connp);
5144 }
5145 
5146 /* ARGSUSED1 */
5147 int
5148 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
5149 {
5150 	conn_t	*connp = (conn_t *)proto_handle;
5151 
5152 	/* All Solaris components should pass a cred for this operation. */
5153 	ASSERT(cr != NULL);
5154 
5155 	udp_do_close(connp);
5156 	return (0);
5157 }
5158 
5159 static int
5160 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
5161     boolean_t bind_to_req_port_only)
5162 {
5163 	sin_t		*sin;
5164 	sin6_t		*sin6;
5165 	udp_t		*udp = connp->conn_udp;
5166 	int		error = 0;
5167 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
5168 	in_port_t	port;		/* Host byte order */
5169 	in_port_t	requested_port;	/* Host byte order */
5170 	int		count;
5171 	ipaddr_t	v4src;		/* Set if AF_INET */
5172 	in6_addr_t	v6src;
5173 	int		loopmax;
5174 	udp_fanout_t	*udpf;
5175 	in_port_t	lport;		/* Network byte order */
5176 	uint_t		scopeid = 0;
5177 	zoneid_t	zoneid = IPCL_ZONEID(connp);
5178 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5179 	boolean_t	is_inaddr_any;
5180 	mlp_type_t	addrtype, mlptype;
5181 	udp_stack_t	*us = udp->udp_us;
5182 
5183 	switch (len) {
5184 	case sizeof (sin_t):	/* Complete IPv4 address */
5185 		sin = (sin_t *)sa;
5186 
5187 		if (sin == NULL || !OK_32PTR((char *)sin))
5188 			return (EINVAL);
5189 
5190 		if (connp->conn_family != AF_INET ||
5191 		    sin->sin_family != AF_INET) {
5192 			return (EAFNOSUPPORT);
5193 		}
5194 		v4src = sin->sin_addr.s_addr;
5195 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
5196 		if (v4src != INADDR_ANY) {
5197 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
5198 			    B_TRUE);
5199 		}
5200 		port = ntohs(sin->sin_port);
5201 		break;
5202 
5203 	case sizeof (sin6_t):	/* complete IPv6 address */
5204 		sin6 = (sin6_t *)sa;
5205 
5206 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
5207 			return (EINVAL);
5208 
5209 		if (connp->conn_family != AF_INET6 ||
5210 		    sin6->sin6_family != AF_INET6) {
5211 			return (EAFNOSUPPORT);
5212 		}
5213 		v6src = sin6->sin6_addr;
5214 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5215 			if (connp->conn_ipv6_v6only)
5216 				return (EADDRNOTAVAIL);
5217 
5218 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5219 			if (v4src != INADDR_ANY) {
5220 				laddr_type = ip_laddr_verify_v4(v4src,
5221 				    zoneid, ipst, B_FALSE);
5222 			}
5223 		} else {
5224 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5225 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5226 					scopeid = sin6->sin6_scope_id;
5227 				laddr_type = ip_laddr_verify_v6(&v6src,
5228 				    zoneid, ipst, B_TRUE, scopeid);
5229 			}
5230 		}
5231 		port = ntohs(sin6->sin6_port);
5232 		break;
5233 
5234 	default:		/* Invalid request */
5235 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5236 		    "udp_bind: bad ADDR_length length %u", len);
5237 		return (-TBADADDR);
5238 	}
5239 
5240 	/* Is the local address a valid unicast, multicast, or broadcast? */
5241 	if (laddr_type == IPVL_BAD)
5242 		return (EADDRNOTAVAIL);
5243 
5244 	requested_port = port;
5245 
5246 	if (requested_port == 0 || !bind_to_req_port_only)
5247 		bind_to_req_port_only = B_FALSE;
5248 	else		/* T_BIND_REQ and requested_port != 0 */
5249 		bind_to_req_port_only = B_TRUE;
5250 
5251 	if (requested_port == 0) {
5252 		/*
5253 		 * If the application passed in zero for the port number, it
5254 		 * doesn't care which port number we bind to. Get one in the
5255 		 * valid range.
5256 		 */
5257 		if (connp->conn_anon_priv_bind) {
5258 			port = udp_get_next_priv_port(udp);
5259 		} else {
5260 			port = udp_update_next_port(udp,
5261 			    us->us_next_port_to_try, B_TRUE);
5262 		}
5263 	} else {
5264 		/*
5265 		 * If the port is in the well-known privileged range,
5266 		 * make sure the caller was privileged.
5267 		 */
5268 		int i;
5269 		boolean_t priv = B_FALSE;
5270 
5271 		if (port < us->us_smallest_nonpriv_port) {
5272 			priv = B_TRUE;
5273 		} else {
5274 			for (i = 0; i < us->us_num_epriv_ports; i++) {
5275 				if (port == us->us_epriv_ports[i]) {
5276 					priv = B_TRUE;
5277 					break;
5278 				}
5279 			}
5280 		}
5281 
5282 		if (priv) {
5283 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5284 				return (-TACCES);
5285 		}
5286 	}
5287 
5288 	if (port == 0)
5289 		return (-TNOADDR);
5290 
5291 	/*
5292 	 * The state must be TS_UNBND. TPI mandates that users must send
5293 	 * TPI primitives only 1 at a time and wait for the response before
5294 	 * sending the next primitive.
5295 	 */
5296 	mutex_enter(&connp->conn_lock);
5297 	if (udp->udp_state != TS_UNBND) {
5298 		mutex_exit(&connp->conn_lock);
5299 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5300 		    "udp_bind: bad state, %u", udp->udp_state);
5301 		return (-TOUTSTATE);
5302 	}
5303 	/*
5304 	 * Copy the source address into our udp structure. This address
5305 	 * may still be zero; if so, IP will fill in the correct address
5306 	 * each time an outbound packet is passed to it. Since the udp is
5307 	 * not yet in the bind hash list, we don't grab the uf_lock to
5308 	 * change conn_ipversion
5309 	 */
5310 	if (connp->conn_family == AF_INET) {
5311 		ASSERT(sin != NULL);
5312 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5313 	} else {
5314 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5315 			/*
5316 			 * no need to hold the uf_lock to set the conn_ipversion
5317 			 * since we are not yet in the fanout list
5318 			 */
5319 			connp->conn_ipversion = IPV4_VERSION;
5320 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5321 		} else {
5322 			connp->conn_ipversion = IPV6_VERSION;
5323 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5324 		}
5325 	}
5326 
5327 	/*
5328 	 * If conn_reuseaddr is not set, then we have to make sure that
5329 	 * the IP address and port number the application requested
5330 	 * (or we selected for the application) is not being used by
5331 	 * another stream.  If another stream is already using the
5332 	 * requested IP address and port, the behavior depends on
5333 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
5334 	 * search for any an unused port to bind to the stream.
5335 	 *
5336 	 * As per the BSD semantics, as modified by the Deering multicast
5337 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
5338 	 * to the same port independent of the local IP address.
5339 	 *
5340 	 * This is slightly different than in SunOS 4.X which did not
5341 	 * support IP multicast. Note that the change implemented by the
5342 	 * Deering multicast code effects all binds - not only binding
5343 	 * to IP multicast addresses.
5344 	 *
5345 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
5346 	 * order to guarantee a unique port.
5347 	 */
5348 
5349 	count = 0;
5350 	if (connp->conn_anon_priv_bind) {
5351 		/*
5352 		 * loopmax = (IPPORT_RESERVED-1) -
5353 		 *    us->us_min_anonpriv_port + 1
5354 		 */
5355 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5356 	} else {
5357 		loopmax = us->us_largest_anon_port -
5358 		    us->us_smallest_anon_port + 1;
5359 	}
5360 
5361 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5362 
5363 	for (;;) {
5364 		udp_t		*udp1;
5365 		boolean_t	found_exclbind = B_FALSE;
5366 		conn_t		*connp1;
5367 
5368 		/*
5369 		 * Walk through the list of udp streams bound to
5370 		 * requested port with the same IP address.
5371 		 */
5372 		lport = htons(port);
5373 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5374 		    us->us_bind_fanout_size)];
5375 		mutex_enter(&udpf->uf_lock);
5376 		for (udp1 = udpf->uf_udp; udp1 != NULL;
5377 		    udp1 = udp1->udp_bind_hash) {
5378 			connp1 = udp1->udp_connp;
5379 
5380 			if (lport != connp1->conn_lport)
5381 				continue;
5382 
5383 			/*
5384 			 * On a labeled system, we must treat bindings to ports
5385 			 * on shared IP addresses by sockets with MAC exemption
5386 			 * privilege as being in all zones, as there's
5387 			 * otherwise no way to identify the right receiver.
5388 			 */
5389 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5390 				continue;
5391 
5392 			/*
5393 			 * If UDP_EXCLBIND is set for either the bound or
5394 			 * binding endpoint, the semantics of bind
5395 			 * is changed according to the following chart.
5396 			 *
5397 			 * spec = specified address (v4 or v6)
5398 			 * unspec = unspecified address (v4 or v6)
5399 			 * A = specified addresses are different for endpoints
5400 			 *
5401 			 * bound	bind to		allowed?
5402 			 * -------------------------------------
5403 			 * unspec	unspec		no
5404 			 * unspec	spec		no
5405 			 * spec		unspec		no
5406 			 * spec		spec		yes if A
5407 			 *
5408 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
5409 			 * as UDP_EXCLBIND, except that zoneid is ignored.
5410 			 */
5411 			if (connp1->conn_exclbind || connp->conn_exclbind ||
5412 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5413 				if (V6_OR_V4_INADDR_ANY(
5414 				    connp1->conn_bound_addr_v6) ||
5415 				    is_inaddr_any ||
5416 				    IN6_ARE_ADDR_EQUAL(
5417 				    &connp1->conn_bound_addr_v6,
5418 				    &v6src)) {
5419 					found_exclbind = B_TRUE;
5420 					break;
5421 				}
5422 				continue;
5423 			}
5424 
5425 			/*
5426 			 * Check ipversion to allow IPv4 and IPv6 sockets to
5427 			 * have disjoint port number spaces.
5428 			 */
5429 			if (connp->conn_ipversion != connp1->conn_ipversion) {
5430 
5431 				/*
5432 				 * On the first time through the loop, if the
5433 				 * the user intentionally specified a
5434 				 * particular port number, then ignore any
5435 				 * bindings of the other protocol that may
5436 				 * conflict. This allows the user to bind IPv6
5437 				 * alone and get both v4 and v6, or bind both
5438 				 * both and get each seperately. On subsequent
5439 				 * times through the loop, we're checking a
5440 				 * port that we chose (not the user) and thus
5441 				 * we do not allow casual duplicate bindings.
5442 				 */
5443 				if (count == 0 && requested_port != 0)
5444 					continue;
5445 			}
5446 
5447 			/*
5448 			 * No difference depending on SO_REUSEADDR.
5449 			 *
5450 			 * If existing port is bound to a
5451 			 * non-wildcard IP address and
5452 			 * the requesting stream is bound to
5453 			 * a distinct different IP addresses
5454 			 * (non-wildcard, also), keep going.
5455 			 */
5456 			if (!is_inaddr_any &&
5457 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5458 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5459 			    &v6src)) {
5460 				continue;
5461 			}
5462 			break;
5463 		}
5464 
5465 		if (!found_exclbind &&
5466 		    (connp->conn_reuseaddr && requested_port != 0)) {
5467 			break;
5468 		}
5469 
5470 		if (udp1 == NULL) {
5471 			/*
5472 			 * No other stream has this IP address
5473 			 * and port number. We can use it.
5474 			 */
5475 			break;
5476 		}
5477 		mutex_exit(&udpf->uf_lock);
5478 		if (bind_to_req_port_only) {
5479 			/*
5480 			 * We get here only when requested port
5481 			 * is bound (and only first  of the for()
5482 			 * loop iteration).
5483 			 *
5484 			 * The semantics of this bind request
5485 			 * require it to fail so we return from
5486 			 * the routine (and exit the loop).
5487 			 *
5488 			 */
5489 			mutex_exit(&connp->conn_lock);
5490 			return (-TADDRBUSY);
5491 		}
5492 
5493 		if (connp->conn_anon_priv_bind) {
5494 			port = udp_get_next_priv_port(udp);
5495 		} else {
5496 			if ((count == 0) && (requested_port != 0)) {
5497 				/*
5498 				 * If the application wants us to find
5499 				 * a port, get one to start with. Set
5500 				 * requested_port to 0, so that we will
5501 				 * update us->us_next_port_to_try below.
5502 				 */
5503 				port = udp_update_next_port(udp,
5504 				    us->us_next_port_to_try, B_TRUE);
5505 				requested_port = 0;
5506 			} else {
5507 				port = udp_update_next_port(udp, port + 1,
5508 				    B_FALSE);
5509 			}
5510 		}
5511 
5512 		if (port == 0 || ++count >= loopmax) {
5513 			/*
5514 			 * We've tried every possible port number and
5515 			 * there are none available, so send an error
5516 			 * to the user.
5517 			 */
5518 			mutex_exit(&connp->conn_lock);
5519 			return (-TNOADDR);
5520 		}
5521 	}
5522 
5523 	/*
5524 	 * Copy the source address into our udp structure.  This address
5525 	 * may still be zero; if so, ip_attr_connect will fill in the correct
5526 	 * address when a packet is about to be sent.
5527 	 * If we are binding to a broadcast or multicast address then
5528 	 * we just set the conn_bound_addr since we don't want to use
5529 	 * that as the source address when sending.
5530 	 */
5531 	connp->conn_bound_addr_v6 = v6src;
5532 	connp->conn_laddr_v6 = v6src;
5533 	if (scopeid != 0) {
5534 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5535 		connp->conn_ixa->ixa_scopeid = scopeid;
5536 		connp->conn_incoming_ifindex = scopeid;
5537 	} else {
5538 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5539 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5540 	}
5541 
5542 	switch (laddr_type) {
5543 	case IPVL_UNICAST_UP:
5544 	case IPVL_UNICAST_DOWN:
5545 		connp->conn_saddr_v6 = v6src;
5546 		connp->conn_mcbc_bind = B_FALSE;
5547 		break;
5548 	case IPVL_MCAST:
5549 	case IPVL_BCAST:
5550 		/* ip_set_destination will pick a source address later */
5551 		connp->conn_saddr_v6 = ipv6_all_zeros;
5552 		connp->conn_mcbc_bind = B_TRUE;
5553 		break;
5554 	}
5555 
5556 	/* Any errors after this point should use late_error */
5557 	connp->conn_lport = lport;
5558 
5559 	/*
5560 	 * Now reset the next anonymous port if the application requested
5561 	 * an anonymous port, or we handed out the next anonymous port.
5562 	 */
5563 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5564 		us->us_next_port_to_try = port + 1;
5565 	}
5566 
5567 	/* Initialize the T_BIND_ACK. */
5568 	if (connp->conn_family == AF_INET) {
5569 		sin->sin_port = connp->conn_lport;
5570 	} else {
5571 		sin6->sin6_port = connp->conn_lport;
5572 	}
5573 	udp->udp_state = TS_IDLE;
5574 	udp_bind_hash_insert(udpf, udp);
5575 	mutex_exit(&udpf->uf_lock);
5576 	mutex_exit(&connp->conn_lock);
5577 
5578 	if (cl_inet_bind) {
5579 		/*
5580 		 * Running in cluster mode - register bind information
5581 		 */
5582 		if (connp->conn_ipversion == IPV4_VERSION) {
5583 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5584 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5585 			    (in_port_t)connp->conn_lport, NULL);
5586 		} else {
5587 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5588 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5589 			    (in_port_t)connp->conn_lport, NULL);
5590 		}
5591 	}
5592 
5593 	mutex_enter(&connp->conn_lock);
5594 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5595 	if (is_system_labeled() && (!connp->conn_anon_port ||
5596 	    connp->conn_anon_mlp)) {
5597 		uint16_t mlpport;
5598 		zone_t *zone;
5599 
5600 		zone = crgetzone(cr);
5601 		connp->conn_mlp_type =
5602 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5603 		    mlptSingle;
5604 		addrtype = tsol_mlp_addr_type(
5605 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5606 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5607 		if (addrtype == mlptSingle) {
5608 			error = -TNOADDR;
5609 			mutex_exit(&connp->conn_lock);
5610 			goto late_error;
5611 		}
5612 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
5613 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5614 		    addrtype);
5615 
5616 		/*
5617 		 * It is a coding error to attempt to bind an MLP port
5618 		 * without first setting SOL_SOCKET/SCM_UCRED.
5619 		 */
5620 		if (mlptype != mlptSingle &&
5621 		    connp->conn_mlp_type == mlptSingle) {
5622 			error = EINVAL;
5623 			mutex_exit(&connp->conn_lock);
5624 			goto late_error;
5625 		}
5626 
5627 		/*
5628 		 * It is an access violation to attempt to bind an MLP port
5629 		 * without NET_BINDMLP privilege.
5630 		 */
5631 		if (mlptype != mlptSingle &&
5632 		    secpolicy_net_bindmlp(cr) != 0) {
5633 			if (connp->conn_debug) {
5634 				(void) strlog(UDP_MOD_ID, 0, 1,
5635 				    SL_ERROR|SL_TRACE,
5636 				    "udp_bind: no priv for multilevel port %d",
5637 				    mlpport);
5638 			}
5639 			error = -TACCES;
5640 			mutex_exit(&connp->conn_lock);
5641 			goto late_error;
5642 		}
5643 
5644 		/*
5645 		 * If we're specifically binding a shared IP address and the
5646 		 * port is MLP on shared addresses, then check to see if this
5647 		 * zone actually owns the MLP.  Reject if not.
5648 		 */
5649 		if (mlptype == mlptShared && addrtype == mlptShared) {
5650 			/*
5651 			 * No need to handle exclusive-stack zones since
5652 			 * ALL_ZONES only applies to the shared stack.
5653 			 */
5654 			zoneid_t mlpzone;
5655 
5656 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5657 			    htons(mlpport));
5658 			if (connp->conn_zoneid != mlpzone) {
5659 				if (connp->conn_debug) {
5660 					(void) strlog(UDP_MOD_ID, 0, 1,
5661 					    SL_ERROR|SL_TRACE,
5662 					    "udp_bind: attempt to bind port "
5663 					    "%d on shared addr in zone %d "
5664 					    "(should be %d)",
5665 					    mlpport, connp->conn_zoneid,
5666 					    mlpzone);
5667 				}
5668 				error = -TACCES;
5669 				mutex_exit(&connp->conn_lock);
5670 				goto late_error;
5671 			}
5672 		}
5673 		if (connp->conn_anon_port) {
5674 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5675 			    port, B_TRUE);
5676 			if (error != 0) {
5677 				if (connp->conn_debug) {
5678 					(void) strlog(UDP_MOD_ID, 0, 1,
5679 					    SL_ERROR|SL_TRACE,
5680 					    "udp_bind: cannot establish anon "
5681 					    "MLP for port %d", port);
5682 				}
5683 				error = -TACCES;
5684 				mutex_exit(&connp->conn_lock);
5685 				goto late_error;
5686 			}
5687 		}
5688 		connp->conn_mlp_type = mlptype;
5689 	}
5690 
5691 	/*
5692 	 * We create an initial header template here to make a subsequent
5693 	 * sendto have a starting point. Since conn_last_dst is zero the
5694 	 * first sendto will always follow the 'dst changed' code path.
5695 	 * Note that we defer massaging options and the related checksum
5696 	 * adjustment until we have a destination address.
5697 	 */
5698 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5699 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5700 	if (error != 0) {
5701 		mutex_exit(&connp->conn_lock);
5702 		goto late_error;
5703 	}
5704 	/* Just in case */
5705 	connp->conn_faddr_v6 = ipv6_all_zeros;
5706 	connp->conn_fport = 0;
5707 	connp->conn_v6lastdst = ipv6_all_zeros;
5708 	mutex_exit(&connp->conn_lock);
5709 
5710 	error = ip_laddr_fanout_insert(connp);
5711 	if (error != 0)
5712 		goto late_error;
5713 
5714 	/* Bind succeeded */
5715 	return (0);
5716 
5717 late_error:
5718 	/* We had already picked the port number, and then the bind failed */
5719 	mutex_enter(&connp->conn_lock);
5720 	udpf = &us->us_bind_fanout[
5721 	    UDP_BIND_HASH(connp->conn_lport,
5722 	    us->us_bind_fanout_size)];
5723 	mutex_enter(&udpf->uf_lock);
5724 	connp->conn_saddr_v6 = ipv6_all_zeros;
5725 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5726 	connp->conn_laddr_v6 = ipv6_all_zeros;
5727 	if (scopeid != 0) {
5728 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5729 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5730 	}
5731 	udp->udp_state = TS_UNBND;
5732 	udp_bind_hash_remove(udp, B_TRUE);
5733 	connp->conn_lport = 0;
5734 	mutex_exit(&udpf->uf_lock);
5735 	connp->conn_anon_port = B_FALSE;
5736 	connp->conn_mlp_type = mlptSingle;
5737 
5738 	connp->conn_v6lastdst = ipv6_all_zeros;
5739 
5740 	/* Restore the header that was built above - different source address */
5741 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5742 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5743 	mutex_exit(&connp->conn_lock);
5744 	return (error);
5745 }
5746 
5747 int
5748 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5749     socklen_t len, cred_t *cr)
5750 {
5751 	int		error;
5752 	conn_t		*connp;
5753 
5754 	/* All Solaris components should pass a cred for this operation. */
5755 	ASSERT(cr != NULL);
5756 
5757 	connp = (conn_t *)proto_handle;
5758 
5759 	if (sa == NULL)
5760 		error = udp_do_unbind(connp);
5761 	else
5762 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5763 
5764 	if (error < 0) {
5765 		if (error == -TOUTSTATE)
5766 			error = EINVAL;
5767 		else
5768 			error = proto_tlitosyserr(-error);
5769 	}
5770 
5771 	return (error);
5772 }
5773 
5774 static int
5775 udp_implicit_bind(conn_t *connp, cred_t *cr)
5776 {
5777 	sin6_t sin6addr;
5778 	sin_t *sin;
5779 	sin6_t *sin6;
5780 	socklen_t len;
5781 	int error;
5782 
5783 	/* All Solaris components should pass a cred for this operation. */
5784 	ASSERT(cr != NULL);
5785 
5786 	if (connp->conn_family == AF_INET) {
5787 		len = sizeof (struct sockaddr_in);
5788 		sin = (sin_t *)&sin6addr;
5789 		*sin = sin_null;
5790 		sin->sin_family = AF_INET;
5791 		sin->sin_addr.s_addr = INADDR_ANY;
5792 	} else {
5793 		ASSERT(connp->conn_family == AF_INET6);
5794 		len = sizeof (sin6_t);
5795 		sin6 = (sin6_t *)&sin6addr;
5796 		*sin6 = sin6_null;
5797 		sin6->sin6_family = AF_INET6;
5798 		V6_SET_ZERO(sin6->sin6_addr);
5799 	}
5800 
5801 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5802 	    cr, B_FALSE);
5803 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5804 }
5805 
5806 /*
5807  * This routine removes a port number association from a stream. It
5808  * is called by udp_unbind and udp_tpi_unbind.
5809  */
5810 static int
5811 udp_do_unbind(conn_t *connp)
5812 {
5813 	udp_t 		*udp = connp->conn_udp;
5814 	udp_fanout_t	*udpf;
5815 	udp_stack_t	*us = udp->udp_us;
5816 
5817 	if (cl_inet_unbind != NULL) {
5818 		/*
5819 		 * Running in cluster mode - register unbind information
5820 		 */
5821 		if (connp->conn_ipversion == IPV4_VERSION) {
5822 			(*cl_inet_unbind)(
5823 			    connp->conn_netstack->netstack_stackid,
5824 			    IPPROTO_UDP, AF_INET,
5825 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5826 			    (in_port_t)connp->conn_lport, NULL);
5827 		} else {
5828 			(*cl_inet_unbind)(
5829 			    connp->conn_netstack->netstack_stackid,
5830 			    IPPROTO_UDP, AF_INET6,
5831 			    (uint8_t *)&(connp->conn_laddr_v6),
5832 			    (in_port_t)connp->conn_lport, NULL);
5833 		}
5834 	}
5835 
5836 	mutex_enter(&connp->conn_lock);
5837 	/* If a bind has not been done, we can't unbind. */
5838 	if (udp->udp_state == TS_UNBND) {
5839 		mutex_exit(&connp->conn_lock);
5840 		return (-TOUTSTATE);
5841 	}
5842 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5843 	    us->us_bind_fanout_size)];
5844 	mutex_enter(&udpf->uf_lock);
5845 	udp_bind_hash_remove(udp, B_TRUE);
5846 	connp->conn_saddr_v6 = ipv6_all_zeros;
5847 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5848 	connp->conn_laddr_v6 = ipv6_all_zeros;
5849 	connp->conn_mcbc_bind = B_FALSE;
5850 	connp->conn_lport = 0;
5851 	/* In case we were also connected */
5852 	connp->conn_faddr_v6 = ipv6_all_zeros;
5853 	connp->conn_fport = 0;
5854 	mutex_exit(&udpf->uf_lock);
5855 
5856 	connp->conn_v6lastdst = ipv6_all_zeros;
5857 	udp->udp_state = TS_UNBND;
5858 
5859 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5860 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5861 	mutex_exit(&connp->conn_lock);
5862 
5863 	ip_unbind(connp);
5864 
5865 	return (0);
5866 }
5867 
5868 /*
5869  * It associates a default destination address with the stream.
5870  */
5871 static int
5872 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5873     cred_t *cr, pid_t pid)
5874 {
5875 	sin6_t		*sin6;
5876 	sin_t		*sin;
5877 	in6_addr_t 	v6dst;
5878 	ipaddr_t 	v4dst;
5879 	uint16_t 	dstport;
5880 	uint32_t 	flowinfo;
5881 	udp_fanout_t	*udpf;
5882 	udp_t		*udp, *udp1;
5883 	ushort_t	ipversion;
5884 	udp_stack_t	*us;
5885 	int		error;
5886 	conn_t		*connp1;
5887 	ip_xmit_attr_t	*ixa;
5888 	ip_xmit_attr_t	*oldixa;
5889 	uint_t		scopeid = 0;
5890 	uint_t		srcid = 0;
5891 	in6_addr_t	v6src = connp->conn_saddr_v6;
5892 
5893 	udp = connp->conn_udp;
5894 	us = udp->udp_us;
5895 
5896 	/*
5897 	 * Address has been verified by the caller
5898 	 */
5899 	switch (len) {
5900 	default:
5901 		/*
5902 		 * Should never happen
5903 		 */
5904 		return (EINVAL);
5905 
5906 	case sizeof (sin_t):
5907 		sin = (sin_t *)sa;
5908 		v4dst = sin->sin_addr.s_addr;
5909 		dstport = sin->sin_port;
5910 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5911 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
5912 		ipversion = IPV4_VERSION;
5913 		break;
5914 
5915 	case sizeof (sin6_t):
5916 		sin6 = (sin6_t *)sa;
5917 		v6dst = sin6->sin6_addr;
5918 		dstport = sin6->sin6_port;
5919 		srcid = sin6->__sin6_src_id;
5920 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5921 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5922 			    connp->conn_netstack);
5923 		}
5924 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
5925 			if (connp->conn_ipv6_v6only)
5926 				return (EADDRNOTAVAIL);
5927 
5928 			/*
5929 			 * Destination adress is mapped IPv6 address.
5930 			 * Source bound address should be unspecified or
5931 			 * IPv6 mapped address as well.
5932 			 */
5933 			if (!IN6_IS_ADDR_UNSPECIFIED(
5934 			    &connp->conn_bound_addr_v6) &&
5935 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5936 				return (EADDRNOTAVAIL);
5937 			}
5938 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5939 			ipversion = IPV4_VERSION;
5940 			flowinfo = 0;
5941 		} else {
5942 			ipversion = IPV6_VERSION;
5943 			flowinfo = sin6->sin6_flowinfo;
5944 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5945 				scopeid = sin6->sin6_scope_id;
5946 		}
5947 		break;
5948 	}
5949 
5950 	if (dstport == 0)
5951 		return (-TBADADDR);
5952 
5953 	/*
5954 	 * If there is a different thread using conn_ixa then we get a new
5955 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
5956 	 * conn_ixa and prevent any other thread from using/changing it.
5957 	 * Once connect() is done other threads can use conn_ixa since the
5958 	 * refcnt will be back at one.
5959 	 * We defer updating conn_ixa until later to handle any concurrent
5960 	 * conn_ixa_cleanup thread.
5961 	 */
5962 	ixa = conn_get_ixa(connp, B_FALSE);
5963 	if (ixa == NULL)
5964 		return (ENOMEM);
5965 
5966 	ASSERT(ixa->ixa_refcnt >= 2);
5967 	ASSERT(ixa == connp->conn_ixa);
5968 
5969 	mutex_enter(&connp->conn_lock);
5970 	/*
5971 	 * This udp_t must have bound to a port already before doing a connect.
5972 	 * Reject if a connect is in progress (we drop conn_lock during
5973 	 * udp_do_connect).
5974 	 */
5975 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5976 		mutex_exit(&connp->conn_lock);
5977 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5978 		    "udp_connect: bad state, %u", udp->udp_state);
5979 		ixa_refrele(ixa);
5980 		return (-TOUTSTATE);
5981 	}
5982 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5983 
5984 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5985 	    us->us_bind_fanout_size)];
5986 
5987 	mutex_enter(&udpf->uf_lock);
5988 	if (udp->udp_state == TS_DATA_XFER) {
5989 		/* Already connected - clear out state */
5990 		if (connp->conn_mcbc_bind)
5991 			connp->conn_saddr_v6 = ipv6_all_zeros;
5992 		else
5993 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5994 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5995 		connp->conn_faddr_v6 = ipv6_all_zeros;
5996 		connp->conn_fport = 0;
5997 		udp->udp_state = TS_IDLE;
5998 	}
5999 
6000 	connp->conn_fport = dstport;
6001 	connp->conn_ipversion = ipversion;
6002 	if (ipversion == IPV4_VERSION) {
6003 		/*
6004 		 * Interpret a zero destination to mean loopback.
6005 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
6006 		 * generate the T_CONN_CON.
6007 		 */
6008 		if (v4dst == INADDR_ANY) {
6009 			v4dst = htonl(INADDR_LOOPBACK);
6010 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
6011 			if (connp->conn_family == AF_INET) {
6012 				sin->sin_addr.s_addr = v4dst;
6013 			} else {
6014 				sin6->sin6_addr = v6dst;
6015 			}
6016 		}
6017 		connp->conn_faddr_v6 = v6dst;
6018 		connp->conn_flowinfo = 0;
6019 	} else {
6020 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
6021 		/*
6022 		 * Interpret a zero destination to mean loopback.
6023 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
6024 		 * generate the T_CONN_CON.
6025 		 */
6026 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
6027 			v6dst = ipv6_loopback;
6028 			sin6->sin6_addr = v6dst;
6029 		}
6030 		connp->conn_faddr_v6 = v6dst;
6031 		connp->conn_flowinfo = flowinfo;
6032 	}
6033 	mutex_exit(&udpf->uf_lock);
6034 
6035 	/*
6036 	 * We update our cred/cpid based on the caller of connect
6037 	 */
6038 	if (connp->conn_cred != cr) {
6039 		crhold(cr);
6040 		crfree(connp->conn_cred);
6041 		connp->conn_cred = cr;
6042 	}
6043 	connp->conn_cpid = pid;
6044 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
6045 	ixa->ixa_cred = cr;
6046 	ixa->ixa_cpid = pid;
6047 	if (is_system_labeled()) {
6048 		/* We need to restart with a label based on the cred */
6049 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
6050 	}
6051 
6052 	if (scopeid != 0) {
6053 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
6054 		ixa->ixa_scopeid = scopeid;
6055 		connp->conn_incoming_ifindex = scopeid;
6056 	} else {
6057 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
6058 		connp->conn_incoming_ifindex = connp->conn_bound_if;
6059 	}
6060 	/*
6061 	 * conn_connect will drop conn_lock and reacquire it.
6062 	 * To prevent a send* from messing with this udp_t while the lock
6063 	 * is dropped we set udp_state and clear conn_v6lastdst.
6064 	 * That will make all send* fail with EISCONN.
6065 	 */
6066 	connp->conn_v6lastdst = ipv6_all_zeros;
6067 	udp->udp_state = TS_WCON_CREQ;
6068 
6069 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
6070 	mutex_exit(&connp->conn_lock);
6071 	if (error != 0)
6072 		goto connect_failed;
6073 
6074 	/*
6075 	 * The addresses have been verified. Time to insert in
6076 	 * the correct fanout list.
6077 	 */
6078 	error = ipcl_conn_insert(connp);
6079 	if (error != 0)
6080 		goto connect_failed;
6081 
6082 	mutex_enter(&connp->conn_lock);
6083 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
6084 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
6085 	if (error != 0) {
6086 		mutex_exit(&connp->conn_lock);
6087 		goto connect_failed;
6088 	}
6089 
6090 	udp->udp_state = TS_DATA_XFER;
6091 	/* Record this as the "last" send even though we haven't sent any */
6092 	connp->conn_v6lastdst = connp->conn_faddr_v6;
6093 	connp->conn_lastipversion = connp->conn_ipversion;
6094 	connp->conn_lastdstport = connp->conn_fport;
6095 	connp->conn_lastflowinfo = connp->conn_flowinfo;
6096 	connp->conn_lastscopeid = scopeid;
6097 	connp->conn_lastsrcid = srcid;
6098 	/* Also remember a source to use together with lastdst */
6099 	connp->conn_v6lastsrc = v6src;
6100 
6101 	oldixa = conn_replace_ixa(connp, ixa);
6102 	mutex_exit(&connp->conn_lock);
6103 	ixa_refrele(oldixa);
6104 
6105 	/*
6106 	 * We've picked a source address above. Now we can
6107 	 * verify that the src/port/dst/port is unique for all
6108 	 * connections in TS_DATA_XFER, skipping ourselves.
6109 	 */
6110 	mutex_enter(&udpf->uf_lock);
6111 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
6112 		if (udp1->udp_state != TS_DATA_XFER)
6113 			continue;
6114 
6115 		if (udp1 == udp)
6116 			continue;
6117 
6118 		connp1 = udp1->udp_connp;
6119 		if (connp->conn_lport != connp1->conn_lport ||
6120 		    connp->conn_ipversion != connp1->conn_ipversion ||
6121 		    dstport != connp1->conn_fport ||
6122 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6123 		    &connp1->conn_laddr_v6) ||
6124 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
6125 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
6126 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
6127 			continue;
6128 		mutex_exit(&udpf->uf_lock);
6129 		error = -TBADADDR;
6130 		goto connect_failed;
6131 	}
6132 	if (cl_inet_connect2 != NULL) {
6133 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
6134 		if (error != 0) {
6135 			mutex_exit(&udpf->uf_lock);
6136 			error = -TBADADDR;
6137 			goto connect_failed;
6138 		}
6139 	}
6140 	mutex_exit(&udpf->uf_lock);
6141 
6142 	ixa_refrele(ixa);
6143 	return (0);
6144 
6145 connect_failed:
6146 	if (ixa != NULL)
6147 		ixa_refrele(ixa);
6148 	mutex_enter(&connp->conn_lock);
6149 	mutex_enter(&udpf->uf_lock);
6150 	udp->udp_state = TS_IDLE;
6151 	connp->conn_faddr_v6 = ipv6_all_zeros;
6152 	connp->conn_fport = 0;
6153 	/* In case the source address was set above */
6154 	if (connp->conn_mcbc_bind)
6155 		connp->conn_saddr_v6 = ipv6_all_zeros;
6156 	else
6157 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
6158 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
6159 	mutex_exit(&udpf->uf_lock);
6160 
6161 	connp->conn_v6lastdst = ipv6_all_zeros;
6162 	connp->conn_flowinfo = 0;
6163 
6164 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
6165 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
6166 	mutex_exit(&connp->conn_lock);
6167 	return (error);
6168 }
6169 
6170 static int
6171 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
6172     socklen_t len, sock_connid_t *id, cred_t *cr)
6173 {
6174 	conn_t	*connp = (conn_t *)proto_handle;
6175 	udp_t	*udp = connp->conn_udp;
6176 	int	error;
6177 	boolean_t did_bind = B_FALSE;
6178 	pid_t	pid = curproc->p_pid;
6179 
6180 	/* All Solaris components should pass a cred for this operation. */
6181 	ASSERT(cr != NULL);
6182 
6183 	if (sa == NULL) {
6184 		/*
6185 		 * Disconnect
6186 		 * Make sure we are connected
6187 		 */
6188 		if (udp->udp_state != TS_DATA_XFER)
6189 			return (EINVAL);
6190 
6191 		error = udp_disconnect(connp);
6192 		return (error);
6193 	}
6194 
6195 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
6196 	if (error != 0)
6197 		goto done;
6198 
6199 	/* do an implicit bind if necessary */
6200 	if (udp->udp_state == TS_UNBND) {
6201 		error = udp_implicit_bind(connp, cr);
6202 		/*
6203 		 * We could be racing with an actual bind, in which case
6204 		 * we would see EPROTO. We cross our fingers and try
6205 		 * to connect.
6206 		 */
6207 		if (!(error == 0 || error == EPROTO))
6208 			goto done;
6209 		did_bind = B_TRUE;
6210 	}
6211 	/*
6212 	 * set SO_DGRAM_ERRIND
6213 	 */
6214 	connp->conn_dgram_errind = B_TRUE;
6215 
6216 	error = udp_do_connect(connp, sa, len, cr, pid);
6217 
6218 	if (error != 0 && did_bind) {
6219 		int unbind_err;
6220 
6221 		unbind_err = udp_do_unbind(connp);
6222 		ASSERT(unbind_err == 0);
6223 	}
6224 
6225 	if (error == 0) {
6226 		*id = 0;
6227 		(*connp->conn_upcalls->su_connected)
6228 		    (connp->conn_upper_handle, 0, NULL, -1);
6229 	} else if (error < 0) {
6230 		error = proto_tlitosyserr(-error);
6231 	}
6232 
6233 done:
6234 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6235 		/*
6236 		 * No need to hold locks to set state
6237 		 * after connect failure socket state is undefined
6238 		 * We set the state only to imitate old sockfs behavior
6239 		 */
6240 		udp->udp_state = TS_IDLE;
6241 	}
6242 	return (error);
6243 }
6244 
6245 int
6246 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6247     cred_t *cr)
6248 {
6249 	sin6_t		*sin6;
6250 	sin_t		*sin = NULL;
6251 	uint_t		srcid;
6252 	conn_t		*connp = (conn_t *)proto_handle;
6253 	udp_t		*udp = connp->conn_udp;
6254 	int		error = 0;
6255 	udp_stack_t	*us = udp->udp_us;
6256 	ushort_t	ipversion;
6257 	pid_t		pid = curproc->p_pid;
6258 	ip_xmit_attr_t	*ixa;
6259 
6260 	ASSERT(DB_TYPE(mp) == M_DATA);
6261 
6262 	/* All Solaris components should pass a cred for this operation. */
6263 	ASSERT(cr != NULL);
6264 
6265 	/* do an implicit bind if necessary */
6266 	if (udp->udp_state == TS_UNBND) {
6267 		error = udp_implicit_bind(connp, cr);
6268 		/*
6269 		 * We could be racing with an actual bind, in which case
6270 		 * we would see EPROTO. We cross our fingers and try
6271 		 * to connect.
6272 		 */
6273 		if (!(error == 0 || error == EPROTO)) {
6274 			freemsg(mp);
6275 			return (error);
6276 		}
6277 	}
6278 
6279 	/* Connected? */
6280 	if (msg->msg_name == NULL) {
6281 		if (udp->udp_state != TS_DATA_XFER) {
6282 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6283 			return (EDESTADDRREQ);
6284 		}
6285 		if (msg->msg_controllen != 0) {
6286 			error = udp_output_ancillary(connp, NULL, NULL, mp,
6287 			    NULL, msg, cr, pid);
6288 		} else {
6289 			error = udp_output_connected(connp, mp, cr, pid);
6290 		}
6291 		if (us->us_sendto_ignerr)
6292 			return (0);
6293 		else
6294 			return (error);
6295 	}
6296 	if (udp->udp_state == TS_DATA_XFER) {
6297 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6298 		return (EISCONN);
6299 	}
6300 	error = proto_verify_ip_addr(connp->conn_family,
6301 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6302 	if (error != 0) {
6303 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6304 		return (error);
6305 	}
6306 	switch (connp->conn_family) {
6307 	case AF_INET6:
6308 		sin6 = (sin6_t *)msg->msg_name;
6309 
6310 		srcid = sin6->__sin6_src_id;
6311 
6312 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6313 			/*
6314 			 * Destination is a non-IPv4-compatible IPv6 address.
6315 			 * Send out an IPv6 format packet.
6316 			 */
6317 
6318 			/*
6319 			 * If the local address is a mapped address return
6320 			 * an error.
6321 			 * It would be possible to send an IPv6 packet but the
6322 			 * response would never make it back to the application
6323 			 * since it is bound to a mapped address.
6324 			 */
6325 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6326 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6327 				return (EADDRNOTAVAIL);
6328 			}
6329 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6330 				sin6->sin6_addr = ipv6_loopback;
6331 			ipversion = IPV6_VERSION;
6332 		} else {
6333 			if (connp->conn_ipv6_v6only) {
6334 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6335 				return (EADDRNOTAVAIL);
6336 			}
6337 
6338 			/*
6339 			 * If the local address is not zero or a mapped address
6340 			 * return an error.  It would be possible to send an
6341 			 * IPv4 packet but the response would never make it
6342 			 * back to the application since it is bound to a
6343 			 * non-mapped address.
6344 			 */
6345 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6346 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6347 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6348 				return (EADDRNOTAVAIL);
6349 			}
6350 
6351 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6352 				V4_PART_OF_V6(sin6->sin6_addr) =
6353 				    htonl(INADDR_LOOPBACK);
6354 			}
6355 			ipversion = IPV4_VERSION;
6356 		}
6357 
6358 		/*
6359 		 * We have to allocate an ip_xmit_attr_t before we grab
6360 		 * conn_lock and we need to hold conn_lock once we've check
6361 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
6362 		 * socket.
6363 		 */
6364 		if (msg->msg_controllen == 0) {
6365 			ixa = conn_get_ixa(connp, B_FALSE);
6366 			if (ixa == NULL) {
6367 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6368 				return (ENOMEM);
6369 			}
6370 		} else {
6371 			ixa = NULL;
6372 		}
6373 		mutex_enter(&connp->conn_lock);
6374 		if (udp->udp_delayed_error != 0) {
6375 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6376 
6377 			error = udp->udp_delayed_error;
6378 			udp->udp_delayed_error = 0;
6379 
6380 			/* Compare IP address, port, and family */
6381 
6382 			if (sin6->sin6_port == sin2->sin6_port &&
6383 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6384 			    &sin2->sin6_addr) &&
6385 			    sin6->sin6_family == sin2->sin6_family) {
6386 				mutex_exit(&connp->conn_lock);
6387 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6388 				if (ixa != NULL)
6389 					ixa_refrele(ixa);
6390 				return (error);
6391 			}
6392 		}
6393 
6394 		if (msg->msg_controllen != 0) {
6395 			mutex_exit(&connp->conn_lock);
6396 			ASSERT(ixa == NULL);
6397 			error = udp_output_ancillary(connp, NULL, sin6, mp,
6398 			    NULL, msg, cr, pid);
6399 		} else if (conn_same_as_last_v6(connp, sin6) &&
6400 		    connp->conn_lastsrcid == srcid &&
6401 		    ipsec_outbound_policy_current(ixa)) {
6402 			/* udp_output_lastdst drops conn_lock */
6403 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6404 		} else {
6405 			/* udp_output_newdst drops conn_lock */
6406 			error = udp_output_newdst(connp, mp, NULL, sin6,
6407 			    ipversion, cr, pid, ixa);
6408 		}
6409 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6410 		if (us->us_sendto_ignerr)
6411 			return (0);
6412 		else
6413 			return (error);
6414 	case AF_INET:
6415 		sin = (sin_t *)msg->msg_name;
6416 
6417 		ipversion = IPV4_VERSION;
6418 
6419 		if (sin->sin_addr.s_addr == INADDR_ANY)
6420 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6421 
6422 		/*
6423 		 * We have to allocate an ip_xmit_attr_t before we grab
6424 		 * conn_lock and we need to hold conn_lock once we've check
6425 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6426 		 */
6427 		if (msg->msg_controllen == 0) {
6428 			ixa = conn_get_ixa(connp, B_FALSE);
6429 			if (ixa == NULL) {
6430 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6431 				return (ENOMEM);
6432 			}
6433 		} else {
6434 			ixa = NULL;
6435 		}
6436 		mutex_enter(&connp->conn_lock);
6437 		if (udp->udp_delayed_error != 0) {
6438 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6439 
6440 			error = udp->udp_delayed_error;
6441 			udp->udp_delayed_error = 0;
6442 
6443 			/* Compare IP address and port */
6444 
6445 			if (sin->sin_port == sin2->sin_port &&
6446 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6447 				mutex_exit(&connp->conn_lock);
6448 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6449 				if (ixa != NULL)
6450 					ixa_refrele(ixa);
6451 				return (error);
6452 			}
6453 		}
6454 		if (msg->msg_controllen != 0) {
6455 			mutex_exit(&connp->conn_lock);
6456 			ASSERT(ixa == NULL);
6457 			error = udp_output_ancillary(connp, sin, NULL, mp,
6458 			    NULL, msg, cr, pid);
6459 		} else if (conn_same_as_last_v4(connp, sin) &&
6460 		    ipsec_outbound_policy_current(ixa)) {
6461 			/* udp_output_lastdst drops conn_lock */
6462 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6463 		} else {
6464 			/* udp_output_newdst drops conn_lock */
6465 			error = udp_output_newdst(connp, mp, sin, NULL,
6466 			    ipversion, cr, pid, ixa);
6467 		}
6468 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6469 		if (us->us_sendto_ignerr)
6470 			return (0);
6471 		else
6472 			return (error);
6473 	default:
6474 		return (EINVAL);
6475 	}
6476 }
6477 
6478 int
6479 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6480     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb)
6481 {
6482 	conn_t 	*connp = (conn_t *)proto_handle;
6483 	udp_t	*udp;
6484 	struct T_capability_ack tca;
6485 	struct sockaddr_in6 laddr, faddr;
6486 	socklen_t laddrlen, faddrlen;
6487 	short opts;
6488 	struct stroptions *stropt;
6489 	mblk_t *stropt_mp;
6490 	int error;
6491 
6492 	udp = connp->conn_udp;
6493 
6494 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6495 
6496 	/*
6497 	 * setup the fallback stream that was allocated
6498 	 */
6499 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
6500 	connp->conn_minor_arena = WR(q)->q_ptr;
6501 
6502 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
6503 
6504 	WR(q)->q_qinfo = &udp_winit;
6505 
6506 	connp->conn_rq = RD(q);
6507 	connp->conn_wq = WR(q);
6508 
6509 	/* Notify stream head about options before sending up data */
6510 	stropt_mp->b_datap->db_type = M_SETOPTS;
6511 	stropt_mp->b_wptr += sizeof (*stropt);
6512 	stropt = (struct stroptions *)stropt_mp->b_rptr;
6513 	stropt->so_flags = SO_WROFF | SO_HIWAT;
6514 	stropt->so_wroff = connp->conn_wroff;
6515 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6516 	putnext(RD(q), stropt_mp);
6517 
6518 	/*
6519 	 * Free the helper stream
6520 	 */
6521 	ip_free_helper_stream(connp);
6522 
6523 	if (!issocket)
6524 		udp_use_pure_tpi(udp);
6525 
6526 	/*
6527 	 * Collect the information needed to sync with the sonode
6528 	 */
6529 	udp_do_capability_ack(udp, &tca, TC1_INFO);
6530 
6531 	laddrlen = faddrlen = sizeof (sin6_t);
6532 	(void) udp_getsockname((sock_lower_handle_t)connp,
6533 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
6534 	error = udp_getpeername((sock_lower_handle_t)connp,
6535 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
6536 	if (error != 0)
6537 		faddrlen = 0;
6538 
6539 	opts = 0;
6540 	if (connp->conn_dgram_errind)
6541 		opts |= SO_DGRAM_ERRIND;
6542 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6543 		opts |= SO_DONTROUTE;
6544 
6545 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
6546 	    (struct sockaddr *)&laddr, laddrlen,
6547 	    (struct sockaddr *)&faddr, faddrlen, opts);
6548 
6549 	mutex_enter(&udp->udp_recv_lock);
6550 	/*
6551 	 * Attempts to send data up during fallback will result in it being
6552 	 * queued in udp_t. Now we push up any queued packets.
6553 	 */
6554 	while (udp->udp_fallback_queue_head != NULL) {
6555 		mblk_t *mp;
6556 		mp = udp->udp_fallback_queue_head;
6557 		udp->udp_fallback_queue_head = mp->b_next;
6558 		mutex_exit(&udp->udp_recv_lock);
6559 		mp->b_next = NULL;
6560 		putnext(RD(q), mp);
6561 		mutex_enter(&udp->udp_recv_lock);
6562 	}
6563 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6564 	/*
6565 	 * No longer a streams less socket
6566 	 */
6567 	mutex_enter(&connp->conn_lock);
6568 	connp->conn_flags &= ~IPCL_NONSTR;
6569 	mutex_exit(&connp->conn_lock);
6570 
6571 	mutex_exit(&udp->udp_recv_lock);
6572 
6573 	ASSERT(connp->conn_ref >= 1);
6574 
6575 	return (0);
6576 }
6577 
6578 /* ARGSUSED3 */
6579 int
6580 udp_getpeername(sock_lower_handle_t  proto_handle, struct sockaddr *sa,
6581     socklen_t *salenp, cred_t *cr)
6582 {
6583 	conn_t	*connp = (conn_t *)proto_handle;
6584 	udp_t	*udp = connp->conn_udp;
6585 	int error;
6586 
6587 	/* All Solaris components should pass a cred for this operation. */
6588 	ASSERT(cr != NULL);
6589 
6590 	mutex_enter(&connp->conn_lock);
6591 	if (udp->udp_state != TS_DATA_XFER)
6592 		error = ENOTCONN;
6593 	else
6594 		error = conn_getpeername(connp, sa, salenp);
6595 	mutex_exit(&connp->conn_lock);
6596 	return (error);
6597 }
6598 
6599 /* ARGSUSED3 */
6600 int
6601 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6602     socklen_t *salenp, cred_t *cr)
6603 {
6604 	conn_t	*connp = (conn_t *)proto_handle;
6605 	int error;
6606 
6607 	/* All Solaris components should pass a cred for this operation. */
6608 	ASSERT(cr != NULL);
6609 
6610 	mutex_enter(&connp->conn_lock);
6611 	error = conn_getsockname(connp, sa, salenp);
6612 	mutex_exit(&connp->conn_lock);
6613 	return (error);
6614 }
6615 
6616 int
6617 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6618     void *optvalp, socklen_t *optlen, cred_t *cr)
6619 {
6620 	conn_t		*connp = (conn_t *)proto_handle;
6621 	int		error;
6622 	t_uscalar_t	max_optbuf_len;
6623 	void		*optvalp_buf;
6624 	int		len;
6625 
6626 	/* All Solaris components should pass a cred for this operation. */
6627 	ASSERT(cr != NULL);
6628 
6629 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6630 	    udp_opt_obj.odb_opt_des_arr,
6631 	    udp_opt_obj.odb_opt_arr_cnt,
6632 	    B_FALSE, B_TRUE, cr);
6633 	if (error != 0) {
6634 		if (error < 0)
6635 			error = proto_tlitosyserr(-error);
6636 		return (error);
6637 	}
6638 
6639 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6640 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
6641 	if (len == -1) {
6642 		kmem_free(optvalp_buf, max_optbuf_len);
6643 		return (EINVAL);
6644 	}
6645 
6646 	/*
6647 	 * update optlen and copy option value
6648 	 */
6649 	t_uscalar_t size = MIN(len, *optlen);
6650 
6651 	bcopy(optvalp_buf, optvalp, size);
6652 	bcopy(&size, optlen, sizeof (size));
6653 
6654 	kmem_free(optvalp_buf, max_optbuf_len);
6655 	return (0);
6656 }
6657 
6658 int
6659 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6660     const void *optvalp, socklen_t optlen, cred_t *cr)
6661 {
6662 	conn_t		*connp = (conn_t *)proto_handle;
6663 	int		error;
6664 
6665 	/* All Solaris components should pass a cred for this operation. */
6666 	ASSERT(cr != NULL);
6667 
6668 	error = proto_opt_check(level, option_name, optlen, NULL,
6669 	    udp_opt_obj.odb_opt_des_arr,
6670 	    udp_opt_obj.odb_opt_arr_cnt,
6671 	    B_TRUE, B_FALSE, cr);
6672 
6673 	if (error != 0) {
6674 		if (error < 0)
6675 			error = proto_tlitosyserr(-error);
6676 		return (error);
6677 	}
6678 
6679 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6680 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6681 	    NULL, cr);
6682 
6683 	ASSERT(error >= 0);
6684 
6685 	return (error);
6686 }
6687 
6688 void
6689 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6690 {
6691 	conn_t	*connp = (conn_t *)proto_handle;
6692 	udp_t	*udp = connp->conn_udp;
6693 
6694 	mutex_enter(&udp->udp_recv_lock);
6695 	connp->conn_flow_cntrld = B_FALSE;
6696 	mutex_exit(&udp->udp_recv_lock);
6697 }
6698 
6699 /* ARGSUSED2 */
6700 int
6701 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6702 {
6703 	conn_t	*connp = (conn_t *)proto_handle;
6704 
6705 	/* All Solaris components should pass a cred for this operation. */
6706 	ASSERT(cr != NULL);
6707 
6708 	/* shut down the send side */
6709 	if (how != SHUT_RD)
6710 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6711 		    SOCK_OPCTL_SHUT_SEND, 0);
6712 	/* shut down the recv side */
6713 	if (how != SHUT_WR)
6714 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6715 		    SOCK_OPCTL_SHUT_RECV, 0);
6716 	return (0);
6717 }
6718 
6719 int
6720 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6721     int mode, int32_t *rvalp, cred_t *cr)
6722 {
6723 	conn_t  	*connp = (conn_t *)proto_handle;
6724 	int		error;
6725 
6726 	/* All Solaris components should pass a cred for this operation. */
6727 	ASSERT(cr != NULL);
6728 
6729 	/*
6730 	 * If we don't have a helper stream then create one.
6731 	 * ip_create_helper_stream takes care of locking the conn_t,
6732 	 * so this check for NULL is just a performance optimization.
6733 	 */
6734 	if (connp->conn_helper_info == NULL) {
6735 		udp_stack_t *us = connp->conn_udp->udp_us;
6736 
6737 		ASSERT(us->us_ldi_ident != NULL);
6738 
6739 		/*
6740 		 * Create a helper stream for non-STREAMS socket.
6741 		 */
6742 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
6743 		if (error != 0) {
6744 			ip0dbg(("tcp_ioctl: create of IP helper stream "
6745 			    "failed %d\n", error));
6746 			return (error);
6747 		}
6748 	}
6749 
6750 	switch (cmd) {
6751 		case _SIOCSOCKFALLBACK:
6752 		case TI_GETPEERNAME:
6753 		case TI_GETMYNAME:
6754 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6755 			    cmd));
6756 			error = EINVAL;
6757 			break;
6758 		default:
6759 			/*
6760 			 * Pass on to IP using helper stream
6761 			 */
6762 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6763 			    cmd, arg, mode, cr, rvalp);
6764 			break;
6765 	}
6766 	return (error);
6767 }
6768 
6769 /* ARGSUSED */
6770 int
6771 udp_accept(sock_lower_handle_t lproto_handle,
6772     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6773     cred_t *cr)
6774 {
6775 	return (EOPNOTSUPP);
6776 }
6777 
6778 /* ARGSUSED */
6779 int
6780 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6781 {
6782 	return (EOPNOTSUPP);
6783 }
6784 
6785 sock_downcalls_t sock_udp_downcalls = {
6786 	udp_activate,		/* sd_activate */
6787 	udp_accept,		/* sd_accept */
6788 	udp_bind,		/* sd_bind */
6789 	udp_listen,		/* sd_listen */
6790 	udp_connect,		/* sd_connect */
6791 	udp_getpeername,	/* sd_getpeername */
6792 	udp_getsockname,	/* sd_getsockname */
6793 	udp_getsockopt,		/* sd_getsockopt */
6794 	udp_setsockopt,		/* sd_setsockopt */
6795 	udp_send,		/* sd_send */
6796 	NULL,			/* sd_send_uio */
6797 	NULL,			/* sd_recv_uio */
6798 	NULL,			/* sd_poll */
6799 	udp_shutdown,		/* sd_shutdown */
6800 	udp_clr_flowctrl,	/* sd_setflowctrl */
6801 	udp_ioctl,		/* sd_ioctl */
6802 	udp_close		/* sd_close */
6803 };
6804