xref: /illumos-gate/usr/src/uts/common/inet/udp/udp.c (revision fb2a9bae0030340ad72b9c26ba1ffee2ee3cafec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/timod.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsubr.h>
38 #include <sys/suntpi.h>
39 #include <sys/xti_inet.h>
40 #include <sys/kmem.h>
41 #include <sys/cred_impl.h>
42 #include <sys/policy.h>
43 #include <sys/priv.h>
44 #include <sys/ucred.h>
45 #include <sys/zone.h>
46 
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/sockio.h>
50 #include <sys/vtrace.h>
51 #include <sys/sdt.h>
52 #include <sys/debug.h>
53 #include <sys/isa_defs.h>
54 #include <sys/random.h>
55 #include <netinet/in.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet/udp.h>
59 
60 #include <inet/common.h>
61 #include <inet/ip.h>
62 #include <inet/ip_impl.h>
63 #include <inet/ipsec_impl.h>
64 #include <inet/ip6.h>
65 #include <inet/ip_ire.h>
66 #include <inet/ip_if.h>
67 #include <inet/ip_multi.h>
68 #include <inet/ip_ndp.h>
69 #include <inet/proto_set.h>
70 #include <inet/mib2.h>
71 #include <inet/optcom.h>
72 #include <inet/snmpcom.h>
73 #include <inet/kstatcom.h>
74 #include <inet/ipclassifier.h>
75 #include <sys/squeue_impl.h>
76 #include <inet/ipnet.h>
77 #include <sys/ethernet.h>
78 
79 #include <sys/tsol/label.h>
80 #include <sys/tsol/tnet.h>
81 #include <rpc/pmap_prot.h>
82 
83 #include <inet/udp_impl.h>
84 
85 /*
86  * Synchronization notes:
87  *
88  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
89  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
90  * protects the contents of the udp_t. uf_lock protects the address and the
91  * fanout information.
92  * The lock order is conn_lock -> uf_lock.
93  *
94  * The fanout lock uf_lock:
95  * When a UDP endpoint is bound to a local port, it is inserted into
96  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
97  * The size of the array is controlled by the udp_bind_fanout_size variable.
98  * This variable can be changed in /etc/system if the default value is
99  * not large enough.  Each bind hash bucket is protected by a per bucket
100  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
101  * structure and a few other fields in the udp_t. A UDP endpoint is removed
102  * from the bind hash list only when it is being unbound or being closed.
103  * The per bucket lock also protects a UDP endpoint's state changes.
104  *
105  * Plumbing notes:
106  * UDP is always a device driver. For compatibility with mibopen() code
107  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
108  * dummy module.
109  *
110  * The above implies that we don't support any intermediate module to
111  * reside in between /dev/ip and udp -- in fact, we never supported such
112  * scenario in the past as the inter-layer communication semantics have
113  * always been private.
114  */
115 
116 /* For /etc/system control */
117 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
118 
119 static void	udp_addr_req(queue_t *q, mblk_t *mp);
120 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
121 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
122 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
123 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
124     const in6_addr_t *, in_port_t, uint32_t);
125 static void	udp_capability_req(queue_t *q, mblk_t *mp);
126 static int	udp_tpi_close(queue_t *q, int flags);
127 static void	udp_close_free(conn_t *);
128 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
129 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
130 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
131     int sys_error);
132 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
133     t_scalar_t tlierr, int sys_error);
134 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
135 		    cred_t *cr);
136 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
137 		    char *value, caddr_t cp, cred_t *cr);
138 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
139 		    char *value, caddr_t cp, cred_t *cr);
140 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
141 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
142     ip_recv_attr_t *ira);
143 static void	udp_info_req(queue_t *q, mblk_t *mp);
144 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
145 static void	udp_lrput(queue_t *, mblk_t *);
146 static void	udp_lwput(queue_t *, mblk_t *);
147 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
148 		    cred_t *credp, boolean_t isv6);
149 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
150 		    cred_t *credp);
151 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
152 		    cred_t *credp);
153 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
154 int		udp_opt_set(conn_t *connp, uint_t optset_context,
155 		    int level, int name, uint_t inlen,
156 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
157 		    void *thisdg_attrs, cred_t *cr);
158 int		udp_opt_get(conn_t *connp, int level, int name,
159 		    uchar_t *ptr);
160 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
161 		    pid_t pid);
162 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
163     pid_t pid, ip_xmit_attr_t *ixa);
164 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
165 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
166 		    ip_xmit_attr_t *ixa);
167 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
168     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
169     int *);
170 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
171     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
172 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
173 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
174 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
175 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
176     boolean_t random);
177 static void	udp_wput_other(queue_t *q, mblk_t *mp);
178 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
179 static void	udp_wput_fallback(queue_t *q, mblk_t *mp);
180 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
181 
182 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
183 static void	udp_stack_fini(netstackid_t stackid, void *arg);
184 
185 static void	*udp_kstat_init(netstackid_t stackid);
186 static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
187 static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
188 static void	udp_kstat2_fini(netstackid_t, kstat_t *);
189 static int	udp_kstat_update(kstat_t *kp, int rw);
190 
191 
192 /* Common routines for TPI and socket module */
193 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
194 
195 /* Common routine for TPI and socket module */
196 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
197 static void	udp_do_close(conn_t *);
198 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
199     boolean_t);
200 static int	udp_do_unbind(conn_t *);
201 
202 int		udp_getsockname(sock_lower_handle_t,
203     struct sockaddr *, socklen_t *, cred_t *);
204 int		udp_getpeername(sock_lower_handle_t,
205     struct sockaddr *, socklen_t *, cred_t *);
206 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
207     cred_t *, pid_t);
208 
209 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
210 
211 /*
212  * Checks if the given destination addr/port is allowed out.
213  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
214  * Called for each connect() and for sendto()/sendmsg() to a different
215  * destination.
216  * For connect(), called in udp_connect().
217  * For sendto()/sendmsg(), called in udp_output_newdst().
218  *
219  * This macro assumes that the cl_inet_connect2 hook is not NULL.
220  * Please check this before calling this macro.
221  *
222  * void
223  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
224  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
225  */
226 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
227 	(err) = 0;							\
228 	/*								\
229 	 * Running in cluster mode - check and register active		\
230 	 * "connection" information					\
231 	 */								\
232 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
233 		(err) = (*cl_inet_connect2)(				\
234 		    (cp)->conn_netstack->netstack_stackid,		\
235 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
236 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
237 		    (cp)->conn_lport,					\
238 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
239 		    (in_port_t)(fport), NULL);				\
240 	else								\
241 		(err) = (*cl_inet_connect2)(				\
242 		    (cp)->conn_netstack->netstack_stackid,		\
243 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
244 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
245 		    (cp)->conn_lport,					\
246 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
247 }
248 
249 static struct module_info udp_mod_info =  {
250 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
251 };
252 
253 /*
254  * Entry points for UDP as a device.
255  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
256  */
257 static struct qinit udp_rinitv4 = {
258 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
259 };
260 
261 static struct qinit udp_rinitv6 = {
262 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
263 };
264 
265 static struct qinit udp_winit = {
266 	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
267 };
268 
269 /* UDP entry point during fallback */
270 struct qinit udp_fallback_sock_winit = {
271 	(pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
272 };
273 
274 /*
275  * UDP needs to handle I_LINK and I_PLINK since ifconfig
276  * likes to use it as a place to hang the various streams.
277  */
278 static struct qinit udp_lrinit = {
279 	(pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
280 };
281 
282 static struct qinit udp_lwinit = {
283 	(pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
284 };
285 
286 /* For AF_INET aka /dev/udp */
287 struct streamtab udpinfov4 = {
288 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
289 };
290 
291 /* For AF_INET6 aka /dev/udp6 */
292 struct streamtab udpinfov6 = {
293 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
294 };
295 
296 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
297 
298 /* Default structure copied into T_INFO_ACK messages */
299 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
300 	T_INFO_ACK,
301 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
302 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
303 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
304 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
305 	sizeof (sin_t),	/* ADDR_size. */
306 	0,		/* OPT_size - not initialized here */
307 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
308 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
309 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
310 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
311 };
312 
313 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
314 
315 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
316 	T_INFO_ACK,
317 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
318 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
319 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
320 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
321 	sizeof (sin6_t), /* ADDR_size. */
322 	0,		/* OPT_size - not initialized here */
323 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
324 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
325 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
326 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
327 };
328 
329 /*
330  * UDP tunables related declarations. Definitions are in udp_tunables.c
331  */
332 extern mod_prop_info_t udp_propinfo_tbl[];
333 extern int udp_propinfo_count;
334 
335 /* Setable in /etc/system */
336 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
337 uint32_t udp_random_anon_port = 1;
338 
339 /*
340  * Hook functions to enable cluster networking.
341  * On non-clustered systems these vectors must always be NULL
342  */
343 
344 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
345     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
346     void *args) = NULL;
347 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
348     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
349     void *args) = NULL;
350 
351 typedef union T_primitives *t_primp_t;
352 
353 /*
354  * Return the next anonymous port in the privileged port range for
355  * bind checking.
356  *
357  * Trusted Extension (TX) notes: TX allows administrator to mark or
358  * reserve ports as Multilevel ports (MLP). MLP has special function
359  * on TX systems. Once a port is made MLP, it's not available as
360  * ordinary port. This creates "holes" in the port name space. It
361  * may be necessary to skip the "holes" find a suitable anon port.
362  */
363 static in_port_t
364 udp_get_next_priv_port(udp_t *udp)
365 {
366 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
367 	in_port_t nextport;
368 	boolean_t restart = B_FALSE;
369 	udp_stack_t *us = udp->udp_us;
370 
371 retry:
372 	if (next_priv_port < us->us_min_anonpriv_port ||
373 	    next_priv_port >= IPPORT_RESERVED) {
374 		next_priv_port = IPPORT_RESERVED - 1;
375 		if (restart)
376 			return (0);
377 		restart = B_TRUE;
378 	}
379 
380 	if (is_system_labeled() &&
381 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
382 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
383 		next_priv_port = nextport;
384 		goto retry;
385 	}
386 
387 	return (next_priv_port--);
388 }
389 
390 /*
391  * Hash list removal routine for udp_t structures.
392  */
393 static void
394 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
395 {
396 	udp_t		*udpnext;
397 	kmutex_t	*lockp;
398 	udp_stack_t	*us = udp->udp_us;
399 	conn_t		*connp = udp->udp_connp;
400 
401 	if (udp->udp_ptpbhn == NULL)
402 		return;
403 
404 	/*
405 	 * Extract the lock pointer in case there are concurrent
406 	 * hash_remove's for this instance.
407 	 */
408 	ASSERT(connp->conn_lport != 0);
409 	if (!caller_holds_lock) {
410 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
411 		    us->us_bind_fanout_size)].uf_lock;
412 		ASSERT(lockp != NULL);
413 		mutex_enter(lockp);
414 	}
415 	if (udp->udp_ptpbhn != NULL) {
416 		udpnext = udp->udp_bind_hash;
417 		if (udpnext != NULL) {
418 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
419 			udp->udp_bind_hash = NULL;
420 		}
421 		*udp->udp_ptpbhn = udpnext;
422 		udp->udp_ptpbhn = NULL;
423 	}
424 	if (!caller_holds_lock) {
425 		mutex_exit(lockp);
426 	}
427 }
428 
429 static void
430 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
431 {
432 	conn_t	*connp = udp->udp_connp;
433 	udp_t	**udpp;
434 	udp_t	*udpnext;
435 	conn_t	*connext;
436 
437 	ASSERT(MUTEX_HELD(&uf->uf_lock));
438 	ASSERT(udp->udp_ptpbhn == NULL);
439 	udpp = &uf->uf_udp;
440 	udpnext = udpp[0];
441 	if (udpnext != NULL) {
442 		/*
443 		 * If the new udp bound to the INADDR_ANY address
444 		 * and the first one in the list is not bound to
445 		 * INADDR_ANY we skip all entries until we find the
446 		 * first one bound to INADDR_ANY.
447 		 * This makes sure that applications binding to a
448 		 * specific address get preference over those binding to
449 		 * INADDR_ANY.
450 		 */
451 		connext = udpnext->udp_connp;
452 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
453 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
454 			while ((udpnext = udpp[0]) != NULL &&
455 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
456 				udpp = &(udpnext->udp_bind_hash);
457 			}
458 			if (udpnext != NULL)
459 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
460 		} else {
461 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
462 		}
463 	}
464 	udp->udp_bind_hash = udpnext;
465 	udp->udp_ptpbhn = udpp;
466 	udpp[0] = udp;
467 }
468 
469 /*
470  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
471  * passed to udp_wput.
472  * It associates a port number and local address with the stream.
473  * It calls IP to verify the local IP address, and calls IP to insert
474  * the conn_t in the fanout table.
475  * If everything is ok it then sends the T_BIND_ACK back up.
476  *
477  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
478  * without setting SO_REUSEADDR. This is needed so that they
479  * can be viewed as two independent transport protocols.
480  * However, anonymouns ports are allocated from the same range to avoid
481  * duplicating the us->us_next_port_to_try.
482  */
483 static void
484 udp_tpi_bind(queue_t *q, mblk_t *mp)
485 {
486 	sin_t		*sin;
487 	sin6_t		*sin6;
488 	mblk_t		*mp1;
489 	struct T_bind_req *tbr;
490 	conn_t		*connp;
491 	udp_t		*udp;
492 	int		error;
493 	struct sockaddr	*sa;
494 	cred_t		*cr;
495 
496 	/*
497 	 * All Solaris components should pass a db_credp
498 	 * for this TPI message, hence we ASSERT.
499 	 * But in case there is some other M_PROTO that looks
500 	 * like a TPI message sent by some other kernel
501 	 * component, we check and return an error.
502 	 */
503 	cr = msg_getcred(mp, NULL);
504 	ASSERT(cr != NULL);
505 	if (cr == NULL) {
506 		udp_err_ack(q, mp, TSYSERR, EINVAL);
507 		return;
508 	}
509 
510 	connp = Q_TO_CONN(q);
511 	udp = connp->conn_udp;
512 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
513 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
514 		    "udp_bind: bad req, len %u",
515 		    (uint_t)(mp->b_wptr - mp->b_rptr));
516 		udp_err_ack(q, mp, TPROTO, 0);
517 		return;
518 	}
519 	if (udp->udp_state != TS_UNBND) {
520 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
521 		    "udp_bind: bad state, %u", udp->udp_state);
522 		udp_err_ack(q, mp, TOUTSTATE, 0);
523 		return;
524 	}
525 	/*
526 	 * Reallocate the message to make sure we have enough room for an
527 	 * address.
528 	 */
529 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
530 	if (mp1 == NULL) {
531 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
532 		return;
533 	}
534 
535 	mp = mp1;
536 
537 	/* Reset the message type in preparation for shipping it back. */
538 	DB_TYPE(mp) = M_PCPROTO;
539 
540 	tbr = (struct T_bind_req *)mp->b_rptr;
541 	switch (tbr->ADDR_length) {
542 	case 0:			/* Request for a generic port */
543 		tbr->ADDR_offset = sizeof (struct T_bind_req);
544 		if (connp->conn_family == AF_INET) {
545 			tbr->ADDR_length = sizeof (sin_t);
546 			sin = (sin_t *)&tbr[1];
547 			*sin = sin_null;
548 			sin->sin_family = AF_INET;
549 			mp->b_wptr = (uchar_t *)&sin[1];
550 			sa = (struct sockaddr *)sin;
551 		} else {
552 			ASSERT(connp->conn_family == AF_INET6);
553 			tbr->ADDR_length = sizeof (sin6_t);
554 			sin6 = (sin6_t *)&tbr[1];
555 			*sin6 = sin6_null;
556 			sin6->sin6_family = AF_INET6;
557 			mp->b_wptr = (uchar_t *)&sin6[1];
558 			sa = (struct sockaddr *)sin6;
559 		}
560 		break;
561 
562 	case sizeof (sin_t):	/* Complete IPv4 address */
563 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
564 		    sizeof (sin_t));
565 		if (sa == NULL || !OK_32PTR((char *)sa)) {
566 			udp_err_ack(q, mp, TSYSERR, EINVAL);
567 			return;
568 		}
569 		if (connp->conn_family != AF_INET ||
570 		    sa->sa_family != AF_INET) {
571 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
572 			return;
573 		}
574 		break;
575 
576 	case sizeof (sin6_t):	/* complete IPv6 address */
577 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
578 		    sizeof (sin6_t));
579 		if (sa == NULL || !OK_32PTR((char *)sa)) {
580 			udp_err_ack(q, mp, TSYSERR, EINVAL);
581 			return;
582 		}
583 		if (connp->conn_family != AF_INET6 ||
584 		    sa->sa_family != AF_INET6) {
585 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
586 			return;
587 		}
588 		break;
589 
590 	default:		/* Invalid request */
591 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
592 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
593 		udp_err_ack(q, mp, TBADADDR, 0);
594 		return;
595 	}
596 
597 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
598 	    tbr->PRIM_type != O_T_BIND_REQ);
599 
600 	if (error != 0) {
601 		if (error > 0) {
602 			udp_err_ack(q, mp, TSYSERR, error);
603 		} else {
604 			udp_err_ack(q, mp, -error, 0);
605 		}
606 	} else {
607 		tbr->PRIM_type = T_BIND_ACK;
608 		qreply(q, mp);
609 	}
610 }
611 
612 /*
613  * This routine handles each T_CONN_REQ message passed to udp.  It
614  * associates a default destination address with the stream.
615  *
616  * After various error checks are completed, udp_connect() lays
617  * the target address and port into the composite header template.
618  * Then we ask IP for information, including a source address if we didn't
619  * already have one. Finally we send up the T_OK_ACK reply message.
620  */
621 static void
622 udp_tpi_connect(queue_t *q, mblk_t *mp)
623 {
624 	conn_t	*connp = Q_TO_CONN(q);
625 	int	error;
626 	socklen_t	len;
627 	struct sockaddr		*sa;
628 	struct T_conn_req	*tcr;
629 	cred_t		*cr;
630 	pid_t		pid;
631 	/*
632 	 * All Solaris components should pass a db_credp
633 	 * for this TPI message, hence we ASSERT.
634 	 * But in case there is some other M_PROTO that looks
635 	 * like a TPI message sent by some other kernel
636 	 * component, we check and return an error.
637 	 */
638 	cr = msg_getcred(mp, &pid);
639 	ASSERT(cr != NULL);
640 	if (cr == NULL) {
641 		udp_err_ack(q, mp, TSYSERR, EINVAL);
642 		return;
643 	}
644 
645 	tcr = (struct T_conn_req *)mp->b_rptr;
646 
647 	/* A bit of sanity checking */
648 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
649 		udp_err_ack(q, mp, TPROTO, 0);
650 		return;
651 	}
652 
653 	if (tcr->OPT_length != 0) {
654 		udp_err_ack(q, mp, TBADOPT, 0);
655 		return;
656 	}
657 
658 	/*
659 	 * Determine packet type based on type of address passed in
660 	 * the request should contain an IPv4 or IPv6 address.
661 	 * Make sure that address family matches the type of
662 	 * family of the address passed down.
663 	 */
664 	len = tcr->DEST_length;
665 	switch (tcr->DEST_length) {
666 	default:
667 		udp_err_ack(q, mp, TBADADDR, 0);
668 		return;
669 
670 	case sizeof (sin_t):
671 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
672 		    sizeof (sin_t));
673 		break;
674 
675 	case sizeof (sin6_t):
676 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
677 		    sizeof (sin6_t));
678 		break;
679 	}
680 
681 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
682 	if (error != 0) {
683 		udp_err_ack(q, mp, TSYSERR, error);
684 		return;
685 	}
686 
687 	error = udp_do_connect(connp, sa, len, cr, pid);
688 	if (error != 0) {
689 		if (error < 0)
690 			udp_err_ack(q, mp, -error, 0);
691 		else
692 			udp_err_ack(q, mp, TSYSERR, error);
693 	} else {
694 		mblk_t	*mp1;
695 		/*
696 		 * We have to send a connection confirmation to
697 		 * keep TLI happy.
698 		 */
699 		if (connp->conn_family == AF_INET) {
700 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
701 			    sizeof (sin_t), NULL, 0);
702 		} else {
703 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
704 			    sizeof (sin6_t), NULL, 0);
705 		}
706 		if (mp1 == NULL) {
707 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
708 			return;
709 		}
710 
711 		/*
712 		 * Send ok_ack for T_CONN_REQ
713 		 */
714 		mp = mi_tpi_ok_ack_alloc(mp);
715 		if (mp == NULL) {
716 			/* Unable to reuse the T_CONN_REQ for the ack. */
717 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
718 			return;
719 		}
720 
721 		putnext(connp->conn_rq, mp);
722 		putnext(connp->conn_rq, mp1);
723 	}
724 }
725 
726 static int
727 udp_tpi_close(queue_t *q, int flags)
728 {
729 	conn_t	*connp;
730 
731 	if (flags & SO_FALLBACK) {
732 		/*
733 		 * stream is being closed while in fallback
734 		 * simply free the resources that were allocated
735 		 */
736 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
737 		qprocsoff(q);
738 		goto done;
739 	}
740 
741 	connp = Q_TO_CONN(q);
742 	udp_do_close(connp);
743 done:
744 	q->q_ptr = WR(q)->q_ptr = NULL;
745 	return (0);
746 }
747 
748 static void
749 udp_close_free(conn_t *connp)
750 {
751 	udp_t *udp = connp->conn_udp;
752 
753 	/* If there are any options associated with the stream, free them. */
754 	if (udp->udp_recv_ipp.ipp_fields != 0)
755 		ip_pkt_free(&udp->udp_recv_ipp);
756 
757 	/*
758 	 * Clear any fields which the kmem_cache constructor clears.
759 	 * Only udp_connp needs to be preserved.
760 	 * TBD: We should make this more efficient to avoid clearing
761 	 * everything.
762 	 */
763 	ASSERT(udp->udp_connp == connp);
764 	bzero(udp, sizeof (udp_t));
765 	udp->udp_connp = connp;
766 }
767 
768 static int
769 udp_do_disconnect(conn_t *connp)
770 {
771 	udp_t	*udp;
772 	udp_fanout_t *udpf;
773 	udp_stack_t *us;
774 	int	error;
775 
776 	udp = connp->conn_udp;
777 	us = udp->udp_us;
778 	mutex_enter(&connp->conn_lock);
779 	if (udp->udp_state != TS_DATA_XFER) {
780 		mutex_exit(&connp->conn_lock);
781 		return (-TOUTSTATE);
782 	}
783 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
784 	    us->us_bind_fanout_size)];
785 	mutex_enter(&udpf->uf_lock);
786 	if (connp->conn_mcbc_bind)
787 		connp->conn_saddr_v6 = ipv6_all_zeros;
788 	else
789 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
790 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
791 	connp->conn_faddr_v6 = ipv6_all_zeros;
792 	connp->conn_fport = 0;
793 	udp->udp_state = TS_IDLE;
794 	mutex_exit(&udpf->uf_lock);
795 
796 	/* Remove any remnants of mapped address binding */
797 	if (connp->conn_family == AF_INET6)
798 		connp->conn_ipversion = IPV6_VERSION;
799 
800 	connp->conn_v6lastdst = ipv6_all_zeros;
801 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
802 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
803 	mutex_exit(&connp->conn_lock);
804 	if (error != 0)
805 		return (error);
806 
807 	/*
808 	 * Tell IP to remove the full binding and revert
809 	 * to the local address binding.
810 	 */
811 	return (ip_laddr_fanout_insert(connp));
812 }
813 
814 static void
815 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
816 {
817 	conn_t	*connp = Q_TO_CONN(q);
818 	int	error;
819 
820 	/*
821 	 * Allocate the largest primitive we need to send back
822 	 * T_error_ack is > than T_ok_ack
823 	 */
824 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
825 	if (mp == NULL) {
826 		/* Unable to reuse the T_DISCON_REQ for the ack. */
827 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
828 		return;
829 	}
830 
831 	error = udp_do_disconnect(connp);
832 
833 	if (error != 0) {
834 		if (error < 0) {
835 			udp_err_ack(q, mp, -error, 0);
836 		} else {
837 			udp_err_ack(q, mp, TSYSERR, error);
838 		}
839 	} else {
840 		mp = mi_tpi_ok_ack_alloc(mp);
841 		ASSERT(mp != NULL);
842 		qreply(q, mp);
843 	}
844 }
845 
846 int
847 udp_disconnect(conn_t *connp)
848 {
849 	int error;
850 
851 	connp->conn_dgram_errind = B_FALSE;
852 	error = udp_do_disconnect(connp);
853 	if (error < 0)
854 		error = proto_tlitosyserr(-error);
855 
856 	return (error);
857 }
858 
859 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
860 static void
861 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
862 {
863 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
864 		qreply(q, mp);
865 }
866 
867 /* Shorthand to generate and send TPI error acks to our client */
868 static void
869 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
870     t_scalar_t t_error, int sys_error)
871 {
872 	struct T_error_ack	*teackp;
873 
874 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
875 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
876 		teackp = (struct T_error_ack *)mp->b_rptr;
877 		teackp->ERROR_prim = primitive;
878 		teackp->TLI_error = t_error;
879 		teackp->UNIX_error = sys_error;
880 		qreply(q, mp);
881 	}
882 }
883 
884 /* At minimum we need 4 bytes of UDP header */
885 #define	ICMP_MIN_UDP_HDR	4
886 
887 /*
888  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
889  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
890  * Assumes that IP has pulled up everything up to and including the ICMP header.
891  */
892 /* ARGSUSED2 */
893 static void
894 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
895 {
896 	conn_t		*connp = (conn_t *)arg1;
897 	icmph_t		*icmph;
898 	ipha_t		*ipha;
899 	int		iph_hdr_length;
900 	udpha_t		*udpha;
901 	sin_t		sin;
902 	sin6_t		sin6;
903 	mblk_t		*mp1;
904 	int		error = 0;
905 	udp_t		*udp = connp->conn_udp;
906 
907 	ipha = (ipha_t *)mp->b_rptr;
908 
909 	ASSERT(OK_32PTR(mp->b_rptr));
910 
911 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
912 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
913 		udp_icmp_error_ipv6(connp, mp, ira);
914 		return;
915 	}
916 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
917 
918 	/* Skip past the outer IP and ICMP headers */
919 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
920 	iph_hdr_length = ira->ira_ip_hdr_length;
921 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
922 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
923 
924 	/* Skip past the inner IP and find the ULP header */
925 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
926 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
927 
928 	switch (icmph->icmph_type) {
929 	case ICMP_DEST_UNREACHABLE:
930 		switch (icmph->icmph_code) {
931 		case ICMP_FRAGMENTATION_NEEDED: {
932 			ipha_t		*ipha;
933 			ip_xmit_attr_t	*ixa;
934 			/*
935 			 * IP has already adjusted the path MTU.
936 			 * But we need to adjust DF for IPv4.
937 			 */
938 			if (connp->conn_ipversion != IPV4_VERSION)
939 				break;
940 
941 			ixa = conn_get_ixa(connp, B_FALSE);
942 			if (ixa == NULL || ixa->ixa_ire == NULL) {
943 				/*
944 				 * Some other thread holds conn_ixa. We will
945 				 * redo this on the next ICMP too big.
946 				 */
947 				if (ixa != NULL)
948 					ixa_refrele(ixa);
949 				break;
950 			}
951 			(void) ip_get_pmtu(ixa);
952 
953 			mutex_enter(&connp->conn_lock);
954 			ipha = (ipha_t *)connp->conn_ht_iphc;
955 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
956 				ipha->ipha_fragment_offset_and_flags |=
957 				    IPH_DF_HTONS;
958 			} else {
959 				ipha->ipha_fragment_offset_and_flags &=
960 				    ~IPH_DF_HTONS;
961 			}
962 			mutex_exit(&connp->conn_lock);
963 			ixa_refrele(ixa);
964 			break;
965 		}
966 		case ICMP_PORT_UNREACHABLE:
967 		case ICMP_PROTOCOL_UNREACHABLE:
968 			error = ECONNREFUSED;
969 			break;
970 		default:
971 			/* Transient errors */
972 			break;
973 		}
974 		break;
975 	default:
976 		/* Transient errors */
977 		break;
978 	}
979 	if (error == 0) {
980 		freemsg(mp);
981 		return;
982 	}
983 
984 	/*
985 	 * Deliver T_UDERROR_IND when the application has asked for it.
986 	 * The socket layer enables this automatically when connected.
987 	 */
988 	if (!connp->conn_dgram_errind) {
989 		freemsg(mp);
990 		return;
991 	}
992 
993 	switch (connp->conn_family) {
994 	case AF_INET:
995 		sin = sin_null;
996 		sin.sin_family = AF_INET;
997 		sin.sin_addr.s_addr = ipha->ipha_dst;
998 		sin.sin_port = udpha->uha_dst_port;
999 		if (IPCL_IS_NONSTR(connp)) {
1000 			mutex_enter(&connp->conn_lock);
1001 			if (udp->udp_state == TS_DATA_XFER) {
1002 				if (sin.sin_port == connp->conn_fport &&
1003 				    sin.sin_addr.s_addr ==
1004 				    connp->conn_faddr_v4) {
1005 					mutex_exit(&connp->conn_lock);
1006 					(*connp->conn_upcalls->su_set_error)
1007 					    (connp->conn_upper_handle, error);
1008 					goto done;
1009 				}
1010 			} else {
1011 				udp->udp_delayed_error = error;
1012 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1013 			}
1014 			mutex_exit(&connp->conn_lock);
1015 		} else {
1016 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1017 			    NULL, 0, error);
1018 			if (mp1 != NULL)
1019 				putnext(connp->conn_rq, mp1);
1020 		}
1021 		break;
1022 	case AF_INET6:
1023 		sin6 = sin6_null;
1024 		sin6.sin6_family = AF_INET6;
1025 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1026 		sin6.sin6_port = udpha->uha_dst_port;
1027 		if (IPCL_IS_NONSTR(connp)) {
1028 			mutex_enter(&connp->conn_lock);
1029 			if (udp->udp_state == TS_DATA_XFER) {
1030 				if (sin6.sin6_port == connp->conn_fport &&
1031 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1032 				    &connp->conn_faddr_v6)) {
1033 					mutex_exit(&connp->conn_lock);
1034 					(*connp->conn_upcalls->su_set_error)
1035 					    (connp->conn_upper_handle, error);
1036 					goto done;
1037 				}
1038 			} else {
1039 				udp->udp_delayed_error = error;
1040 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1041 			}
1042 			mutex_exit(&connp->conn_lock);
1043 		} else {
1044 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1045 			    NULL, 0, error);
1046 			if (mp1 != NULL)
1047 				putnext(connp->conn_rq, mp1);
1048 		}
1049 		break;
1050 	}
1051 done:
1052 	freemsg(mp);
1053 }
1054 
1055 /*
1056  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1057  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1058  * Assumes that IP has pulled up all the extension headers as well as the
1059  * ICMPv6 header.
1060  */
1061 static void
1062 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1063 {
1064 	icmp6_t		*icmp6;
1065 	ip6_t		*ip6h, *outer_ip6h;
1066 	uint16_t	iph_hdr_length;
1067 	uint8_t		*nexthdrp;
1068 	udpha_t		*udpha;
1069 	sin6_t		sin6;
1070 	mblk_t		*mp1;
1071 	int		error = 0;
1072 	udp_t		*udp = connp->conn_udp;
1073 	udp_stack_t	*us = udp->udp_us;
1074 
1075 	outer_ip6h = (ip6_t *)mp->b_rptr;
1076 #ifdef DEBUG
1077 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1078 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1079 	else
1080 		iph_hdr_length = IPV6_HDR_LEN;
1081 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1082 #endif
1083 	/* Skip past the outer IP and ICMP headers */
1084 	iph_hdr_length = ira->ira_ip_hdr_length;
1085 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1086 
1087 	/* Skip past the inner IP and find the ULP header */
1088 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
1089 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1090 		freemsg(mp);
1091 		return;
1092 	}
1093 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1094 
1095 	switch (icmp6->icmp6_type) {
1096 	case ICMP6_DST_UNREACH:
1097 		switch (icmp6->icmp6_code) {
1098 		case ICMP6_DST_UNREACH_NOPORT:
1099 			error = ECONNREFUSED;
1100 			break;
1101 		case ICMP6_DST_UNREACH_ADMIN:
1102 		case ICMP6_DST_UNREACH_NOROUTE:
1103 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1104 		case ICMP6_DST_UNREACH_ADDR:
1105 			/* Transient errors */
1106 			break;
1107 		default:
1108 			break;
1109 		}
1110 		break;
1111 	case ICMP6_PACKET_TOO_BIG: {
1112 		struct T_unitdata_ind	*tudi;
1113 		struct T_opthdr		*toh;
1114 		size_t			udi_size;
1115 		mblk_t			*newmp;
1116 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1117 		    sizeof (struct ip6_mtuinfo);
1118 		sin6_t			*sin6;
1119 		struct ip6_mtuinfo	*mtuinfo;
1120 
1121 		/*
1122 		 * If the application has requested to receive path mtu
1123 		 * information, send up an empty message containing an
1124 		 * IPV6_PATHMTU ancillary data item.
1125 		 */
1126 		if (!connp->conn_ipv6_recvpathmtu)
1127 			break;
1128 
1129 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1130 		    opt_length;
1131 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1132 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
1133 			break;
1134 		}
1135 
1136 		/*
1137 		 * newmp->b_cont is left to NULL on purpose.  This is an
1138 		 * empty message containing only ancillary data.
1139 		 */
1140 		newmp->b_datap->db_type = M_PROTO;
1141 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1142 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1143 		tudi->PRIM_type = T_UNITDATA_IND;
1144 		tudi->SRC_length = sizeof (sin6_t);
1145 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1146 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1147 		tudi->OPT_length = opt_length;
1148 
1149 		sin6 = (sin6_t *)&tudi[1];
1150 		bzero(sin6, sizeof (sin6_t));
1151 		sin6->sin6_family = AF_INET6;
1152 		sin6->sin6_addr = connp->conn_faddr_v6;
1153 
1154 		toh = (struct T_opthdr *)&sin6[1];
1155 		toh->level = IPPROTO_IPV6;
1156 		toh->name = IPV6_PATHMTU;
1157 		toh->len = opt_length;
1158 		toh->status = 0;
1159 
1160 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1161 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1162 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1163 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1164 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1165 		/*
1166 		 * We've consumed everything we need from the original
1167 		 * message.  Free it, then send our empty message.
1168 		 */
1169 		freemsg(mp);
1170 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1171 		return;
1172 	}
1173 	case ICMP6_TIME_EXCEEDED:
1174 		/* Transient errors */
1175 		break;
1176 	case ICMP6_PARAM_PROB:
1177 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1178 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1179 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1180 		    (uchar_t *)nexthdrp) {
1181 			error = ECONNREFUSED;
1182 			break;
1183 		}
1184 		break;
1185 	}
1186 	if (error == 0) {
1187 		freemsg(mp);
1188 		return;
1189 	}
1190 
1191 	/*
1192 	 * Deliver T_UDERROR_IND when the application has asked for it.
1193 	 * The socket layer enables this automatically when connected.
1194 	 */
1195 	if (!connp->conn_dgram_errind) {
1196 		freemsg(mp);
1197 		return;
1198 	}
1199 
1200 	sin6 = sin6_null;
1201 	sin6.sin6_family = AF_INET6;
1202 	sin6.sin6_addr = ip6h->ip6_dst;
1203 	sin6.sin6_port = udpha->uha_dst_port;
1204 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1205 
1206 	if (IPCL_IS_NONSTR(connp)) {
1207 		mutex_enter(&connp->conn_lock);
1208 		if (udp->udp_state == TS_DATA_XFER) {
1209 			if (sin6.sin6_port == connp->conn_fport &&
1210 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1211 			    &connp->conn_faddr_v6)) {
1212 				mutex_exit(&connp->conn_lock);
1213 				(*connp->conn_upcalls->su_set_error)
1214 				    (connp->conn_upper_handle, error);
1215 				goto done;
1216 			}
1217 		} else {
1218 			udp->udp_delayed_error = error;
1219 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1220 		}
1221 		mutex_exit(&connp->conn_lock);
1222 	} else {
1223 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1224 		    NULL, 0, error);
1225 		if (mp1 != NULL)
1226 			putnext(connp->conn_rq, mp1);
1227 	}
1228 done:
1229 	freemsg(mp);
1230 }
1231 
1232 /*
1233  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1234  * The local address is filled in if endpoint is bound. The remote address
1235  * is filled in if remote address has been precified ("connected endpoint")
1236  * (The concept of connected CLTS sockets is alien to published TPI
1237  *  but we support it anyway).
1238  */
1239 static void
1240 udp_addr_req(queue_t *q, mblk_t *mp)
1241 {
1242 	struct sockaddr *sa;
1243 	mblk_t	*ackmp;
1244 	struct T_addr_ack *taa;
1245 	udp_t	*udp = Q_TO_UDP(q);
1246 	conn_t	*connp = udp->udp_connp;
1247 	uint_t	addrlen;
1248 
1249 	/* Make it large enough for worst case */
1250 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1251 	    2 * sizeof (sin6_t), 1);
1252 	if (ackmp == NULL) {
1253 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1254 		return;
1255 	}
1256 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1257 
1258 	bzero(taa, sizeof (struct T_addr_ack));
1259 	ackmp->b_wptr = (uchar_t *)&taa[1];
1260 
1261 	taa->PRIM_type = T_ADDR_ACK;
1262 	ackmp->b_datap->db_type = M_PCPROTO;
1263 
1264 	if (connp->conn_family == AF_INET)
1265 		addrlen = sizeof (sin_t);
1266 	else
1267 		addrlen = sizeof (sin6_t);
1268 
1269 	mutex_enter(&connp->conn_lock);
1270 	/*
1271 	 * Note: Following code assumes 32 bit alignment of basic
1272 	 * data structures like sin_t and struct T_addr_ack.
1273 	 */
1274 	if (udp->udp_state != TS_UNBND) {
1275 		/*
1276 		 * Fill in local address first
1277 		 */
1278 		taa->LOCADDR_offset = sizeof (*taa);
1279 		taa->LOCADDR_length = addrlen;
1280 		sa = (struct sockaddr *)&taa[1];
1281 		(void) conn_getsockname(connp, sa, &addrlen);
1282 		ackmp->b_wptr += addrlen;
1283 	}
1284 	if (udp->udp_state == TS_DATA_XFER) {
1285 		/*
1286 		 * connected, fill remote address too
1287 		 */
1288 		taa->REMADDR_length = addrlen;
1289 		/* assumed 32-bit alignment */
1290 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1291 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1292 		(void) conn_getpeername(connp, sa, &addrlen);
1293 		ackmp->b_wptr += addrlen;
1294 	}
1295 	mutex_exit(&connp->conn_lock);
1296 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1297 	qreply(q, ackmp);
1298 }
1299 
1300 static void
1301 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1302 {
1303 	conn_t		*connp = udp->udp_connp;
1304 
1305 	if (connp->conn_family == AF_INET) {
1306 		*tap = udp_g_t_info_ack_ipv4;
1307 	} else {
1308 		*tap = udp_g_t_info_ack_ipv6;
1309 	}
1310 	tap->CURRENT_state = udp->udp_state;
1311 	tap->OPT_size = udp_max_optsize;
1312 }
1313 
1314 static void
1315 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1316     t_uscalar_t cap_bits1)
1317 {
1318 	tcap->CAP_bits1 = 0;
1319 
1320 	if (cap_bits1 & TC1_INFO) {
1321 		udp_copy_info(&tcap->INFO_ack, udp);
1322 		tcap->CAP_bits1 |= TC1_INFO;
1323 	}
1324 }
1325 
1326 /*
1327  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1328  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1329  * udp_g_t_info_ack.  The current state of the stream is copied from
1330  * udp_state.
1331  */
1332 static void
1333 udp_capability_req(queue_t *q, mblk_t *mp)
1334 {
1335 	t_uscalar_t		cap_bits1;
1336 	struct T_capability_ack	*tcap;
1337 	udp_t	*udp = Q_TO_UDP(q);
1338 
1339 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1340 
1341 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1342 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1343 	if (!mp)
1344 		return;
1345 
1346 	tcap = (struct T_capability_ack *)mp->b_rptr;
1347 	udp_do_capability_ack(udp, tcap, cap_bits1);
1348 
1349 	qreply(q, mp);
1350 }
1351 
1352 /*
1353  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1354  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1355  * The current state of the stream is copied from udp_state.
1356  */
1357 static void
1358 udp_info_req(queue_t *q, mblk_t *mp)
1359 {
1360 	udp_t *udp = Q_TO_UDP(q);
1361 
1362 	/* Create a T_INFO_ACK message. */
1363 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1364 	    T_INFO_ACK);
1365 	if (!mp)
1366 		return;
1367 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1368 	qreply(q, mp);
1369 }
1370 
1371 /* For /dev/udp aka AF_INET open */
1372 static int
1373 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1374 {
1375 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1376 }
1377 
1378 /* For /dev/udp6 aka AF_INET6 open */
1379 static int
1380 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1381 {
1382 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1383 }
1384 
1385 /*
1386  * This is the open routine for udp.  It allocates a udp_t structure for
1387  * the stream and, on the first open of the module, creates an ND table.
1388  */
1389 static int
1390 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1391     boolean_t isv6)
1392 {
1393 	udp_t		*udp;
1394 	conn_t		*connp;
1395 	dev_t		conn_dev;
1396 	vmem_t		*minor_arena;
1397 	int		err;
1398 
1399 	/* If the stream is already open, return immediately. */
1400 	if (q->q_ptr != NULL)
1401 		return (0);
1402 
1403 	if (sflag == MODOPEN)
1404 		return (EINVAL);
1405 
1406 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1407 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1408 		minor_arena = ip_minor_arena_la;
1409 	} else {
1410 		/*
1411 		 * Either minor numbers in the large arena were exhausted
1412 		 * or a non socket application is doing the open.
1413 		 * Try to allocate from the small arena.
1414 		 */
1415 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1416 			return (EBUSY);
1417 
1418 		minor_arena = ip_minor_arena_sa;
1419 	}
1420 
1421 	if (flag & SO_FALLBACK) {
1422 		/*
1423 		 * Non streams socket needs a stream to fallback to
1424 		 */
1425 		RD(q)->q_ptr = (void *)conn_dev;
1426 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1427 		WR(q)->q_ptr = (void *)minor_arena;
1428 		qprocson(q);
1429 		return (0);
1430 	}
1431 
1432 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1433 	if (connp == NULL) {
1434 		inet_minor_free(minor_arena, conn_dev);
1435 		return (err);
1436 	}
1437 	udp = connp->conn_udp;
1438 
1439 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1440 	connp->conn_dev = conn_dev;
1441 	connp->conn_minor_arena = minor_arena;
1442 
1443 	/*
1444 	 * Initialize the udp_t structure for this stream.
1445 	 */
1446 	q->q_ptr = connp;
1447 	WR(q)->q_ptr = connp;
1448 	connp->conn_rq = q;
1449 	connp->conn_wq = WR(q);
1450 
1451 	/*
1452 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1453 	 * need to lock anything.
1454 	 */
1455 	ASSERT(connp->conn_proto == IPPROTO_UDP);
1456 	ASSERT(connp->conn_udp == udp);
1457 	ASSERT(udp->udp_connp == connp);
1458 
1459 	if (flag & SO_SOCKSTR) {
1460 		udp->udp_issocket = B_TRUE;
1461 	}
1462 
1463 	WR(q)->q_hiwat = connp->conn_sndbuf;
1464 	WR(q)->q_lowat = connp->conn_sndlowat;
1465 
1466 	qprocson(q);
1467 
1468 	/* Set the Stream head write offset and high watermark. */
1469 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1470 	(void) proto_set_rx_hiwat(q, connp,
1471 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1472 
1473 	mutex_enter(&connp->conn_lock);
1474 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1475 	mutex_exit(&connp->conn_lock);
1476 	return (0);
1477 }
1478 
1479 /*
1480  * Which UDP options OK to set through T_UNITDATA_REQ...
1481  */
1482 /* ARGSUSED */
1483 static boolean_t
1484 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1485 {
1486 	return (B_TRUE);
1487 }
1488 
1489 /*
1490  * This routine gets default values of certain options whose default
1491  * values are maintained by protcol specific code
1492  */
1493 int
1494 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1495 {
1496 	udp_t		*udp = Q_TO_UDP(q);
1497 	udp_stack_t *us = udp->udp_us;
1498 	int *i1 = (int *)ptr;
1499 
1500 	switch (level) {
1501 	case IPPROTO_IP:
1502 		switch (name) {
1503 		case IP_MULTICAST_TTL:
1504 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1505 			return (sizeof (uchar_t));
1506 		case IP_MULTICAST_LOOP:
1507 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1508 			return (sizeof (uchar_t));
1509 		}
1510 		break;
1511 	case IPPROTO_IPV6:
1512 		switch (name) {
1513 		case IPV6_MULTICAST_HOPS:
1514 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1515 			return (sizeof (int));
1516 		case IPV6_MULTICAST_LOOP:
1517 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1518 			return (sizeof (int));
1519 		case IPV6_UNICAST_HOPS:
1520 			*i1 = us->us_ipv6_hoplimit;
1521 			return (sizeof (int));
1522 		}
1523 		break;
1524 	}
1525 	return (-1);
1526 }
1527 
1528 /*
1529  * This routine retrieves the current status of socket options.
1530  * It returns the size of the option retrieved, or -1.
1531  */
1532 int
1533 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1534     uchar_t *ptr)
1535 {
1536 	int		*i1 = (int *)ptr;
1537 	udp_t		*udp = connp->conn_udp;
1538 	int		len;
1539 	conn_opt_arg_t	coas;
1540 	int		retval;
1541 
1542 	coas.coa_connp = connp;
1543 	coas.coa_ixa = connp->conn_ixa;
1544 	coas.coa_ipp = &connp->conn_xmit_ipp;
1545 	coas.coa_ancillary = B_FALSE;
1546 	coas.coa_changed = 0;
1547 
1548 	/*
1549 	 * We assume that the optcom framework has checked for the set
1550 	 * of levels and names that are supported, hence we don't worry
1551 	 * about rejecting based on that.
1552 	 * First check for UDP specific handling, then pass to common routine.
1553 	 */
1554 	switch (level) {
1555 	case IPPROTO_IP:
1556 		/*
1557 		 * Only allow IPv4 option processing on IPv4 sockets.
1558 		 */
1559 		if (connp->conn_family != AF_INET)
1560 			return (-1);
1561 
1562 		switch (name) {
1563 		case IP_OPTIONS:
1564 		case T_IP_OPTIONS:
1565 			mutex_enter(&connp->conn_lock);
1566 			if (!(udp->udp_recv_ipp.ipp_fields &
1567 			    IPPF_IPV4_OPTIONS)) {
1568 				mutex_exit(&connp->conn_lock);
1569 				return (0);
1570 			}
1571 
1572 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1573 			ASSERT(len != 0);
1574 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1575 			mutex_exit(&connp->conn_lock);
1576 			return (len);
1577 		}
1578 		break;
1579 	case IPPROTO_UDP:
1580 		switch (name) {
1581 		case UDP_NAT_T_ENDPOINT:
1582 			mutex_enter(&connp->conn_lock);
1583 			*i1 = udp->udp_nat_t_endpoint;
1584 			mutex_exit(&connp->conn_lock);
1585 			return (sizeof (int));
1586 		case UDP_RCVHDR:
1587 			mutex_enter(&connp->conn_lock);
1588 			*i1 = udp->udp_rcvhdr ? 1 : 0;
1589 			mutex_exit(&connp->conn_lock);
1590 			return (sizeof (int));
1591 		}
1592 	}
1593 	mutex_enter(&connp->conn_lock);
1594 	retval = conn_opt_get(&coas, level, name, ptr);
1595 	mutex_exit(&connp->conn_lock);
1596 	return (retval);
1597 }
1598 
1599 /*
1600  * This routine retrieves the current status of socket options.
1601  * It returns the size of the option retrieved, or -1.
1602  */
1603 int
1604 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1605 {
1606 	conn_t		*connp = Q_TO_CONN(q);
1607 	int		err;
1608 
1609 	err = udp_opt_get(connp, level, name, ptr);
1610 	return (err);
1611 }
1612 
1613 /*
1614  * This routine sets socket options.
1615  */
1616 int
1617 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1618     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1619 {
1620 	conn_t		*connp = coa->coa_connp;
1621 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1622 	udp_t		*udp = connp->conn_udp;
1623 	udp_stack_t	*us = udp->udp_us;
1624 	int		*i1 = (int *)invalp;
1625 	boolean_t 	onoff = (*i1 == 0) ? 0 : 1;
1626 	int		error;
1627 
1628 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1629 	/*
1630 	 * First do UDP specific sanity checks and handle UDP specific
1631 	 * options. Note that some IPPROTO_UDP options are handled
1632 	 * by conn_opt_set.
1633 	 */
1634 	switch (level) {
1635 	case SOL_SOCKET:
1636 		switch (name) {
1637 		case SO_SNDBUF:
1638 			if (*i1 > us->us_max_buf) {
1639 				return (ENOBUFS);
1640 			}
1641 			break;
1642 		case SO_RCVBUF:
1643 			if (*i1 > us->us_max_buf) {
1644 				return (ENOBUFS);
1645 			}
1646 			break;
1647 
1648 		case SCM_UCRED: {
1649 			struct ucred_s *ucr;
1650 			cred_t *newcr;
1651 			ts_label_t *tsl;
1652 
1653 			/*
1654 			 * Only sockets that have proper privileges and are
1655 			 * bound to MLPs will have any other value here, so
1656 			 * this implicitly tests for privilege to set label.
1657 			 */
1658 			if (connp->conn_mlp_type == mlptSingle)
1659 				break;
1660 
1661 			ucr = (struct ucred_s *)invalp;
1662 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1663 			    ucr->uc_labeloff < sizeof (*ucr) ||
1664 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1665 				return (EINVAL);
1666 			if (!checkonly) {
1667 				/*
1668 				 * Set ixa_tsl to the new label.
1669 				 * We assume that crgetzoneid doesn't change
1670 				 * as part of the SCM_UCRED.
1671 				 */
1672 				ASSERT(cr != NULL);
1673 				if ((tsl = crgetlabel(cr)) == NULL)
1674 					return (EINVAL);
1675 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1676 				    tsl->tsl_doi, KM_NOSLEEP);
1677 				if (newcr == NULL)
1678 					return (ENOSR);
1679 				ASSERT(newcr->cr_label != NULL);
1680 				/*
1681 				 * Move the hold on the cr_label to ixa_tsl by
1682 				 * setting cr_label to NULL. Then release newcr.
1683 				 */
1684 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1685 				ixa->ixa_flags |= IXAF_UCRED_TSL;
1686 				newcr->cr_label = NULL;
1687 				crfree(newcr);
1688 				coa->coa_changed |= COA_HEADER_CHANGED;
1689 				coa->coa_changed |= COA_WROFF_CHANGED;
1690 			}
1691 			/* Fully handled this option. */
1692 			return (0);
1693 		}
1694 		}
1695 		break;
1696 	case IPPROTO_UDP:
1697 		switch (name) {
1698 		case UDP_NAT_T_ENDPOINT:
1699 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1700 				return (error);
1701 			}
1702 
1703 			/*
1704 			 * Use conn_family instead so we can avoid ambiguitites
1705 			 * with AF_INET6 sockets that may switch from IPv4
1706 			 * to IPv6.
1707 			 */
1708 			if (connp->conn_family != AF_INET) {
1709 				return (EAFNOSUPPORT);
1710 			}
1711 
1712 			if (!checkonly) {
1713 				mutex_enter(&connp->conn_lock);
1714 				udp->udp_nat_t_endpoint = onoff;
1715 				mutex_exit(&connp->conn_lock);
1716 				coa->coa_changed |= COA_HEADER_CHANGED;
1717 				coa->coa_changed |= COA_WROFF_CHANGED;
1718 			}
1719 			/* Fully handled this option. */
1720 			return (0);
1721 		case UDP_RCVHDR:
1722 			mutex_enter(&connp->conn_lock);
1723 			udp->udp_rcvhdr = onoff;
1724 			mutex_exit(&connp->conn_lock);
1725 			return (0);
1726 		}
1727 		break;
1728 	}
1729 	error = conn_opt_set(coa, level, name, inlen, invalp,
1730 	    checkonly, cr);
1731 	return (error);
1732 }
1733 
1734 /*
1735  * This routine sets socket options.
1736  */
1737 int
1738 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1739     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1740     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1741 {
1742 	udp_t		*udp = connp->conn_udp;
1743 	int		err;
1744 	conn_opt_arg_t	coas, *coa;
1745 	boolean_t	checkonly;
1746 	udp_stack_t	*us = udp->udp_us;
1747 
1748 	switch (optset_context) {
1749 	case SETFN_OPTCOM_CHECKONLY:
1750 		checkonly = B_TRUE;
1751 		/*
1752 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1753 		 * inlen != 0 implies value supplied and
1754 		 * 	we have to "pretend" to set it.
1755 		 * inlen == 0 implies that there is no
1756 		 * 	value part in T_CHECK request and just validation
1757 		 * done elsewhere should be enough, we just return here.
1758 		 */
1759 		if (inlen == 0) {
1760 			*outlenp = 0;
1761 			return (0);
1762 		}
1763 		break;
1764 	case SETFN_OPTCOM_NEGOTIATE:
1765 		checkonly = B_FALSE;
1766 		break;
1767 	case SETFN_UD_NEGOTIATE:
1768 	case SETFN_CONN_NEGOTIATE:
1769 		checkonly = B_FALSE;
1770 		/*
1771 		 * Negotiating local and "association-related" options
1772 		 * through T_UNITDATA_REQ.
1773 		 *
1774 		 * Following routine can filter out ones we do not
1775 		 * want to be "set" this way.
1776 		 */
1777 		if (!udp_opt_allow_udr_set(level, name)) {
1778 			*outlenp = 0;
1779 			return (EINVAL);
1780 		}
1781 		break;
1782 	default:
1783 		/*
1784 		 * We should never get here
1785 		 */
1786 		*outlenp = 0;
1787 		return (EINVAL);
1788 	}
1789 
1790 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1791 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1792 
1793 	if (thisdg_attrs != NULL) {
1794 		/* Options from T_UNITDATA_REQ */
1795 		coa = (conn_opt_arg_t *)thisdg_attrs;
1796 		ASSERT(coa->coa_connp == connp);
1797 		ASSERT(coa->coa_ixa != NULL);
1798 		ASSERT(coa->coa_ipp != NULL);
1799 		ASSERT(coa->coa_ancillary);
1800 	} else {
1801 		coa = &coas;
1802 		coas.coa_connp = connp;
1803 		/* Get a reference on conn_ixa to prevent concurrent mods */
1804 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1805 		if (coas.coa_ixa == NULL) {
1806 			*outlenp = 0;
1807 			return (ENOMEM);
1808 		}
1809 		coas.coa_ipp = &connp->conn_xmit_ipp;
1810 		coas.coa_ancillary = B_FALSE;
1811 		coas.coa_changed = 0;
1812 	}
1813 
1814 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
1815 	    cr, checkonly);
1816 	if (err != 0) {
1817 errout:
1818 		if (!coa->coa_ancillary)
1819 			ixa_refrele(coa->coa_ixa);
1820 		*outlenp = 0;
1821 		return (err);
1822 	}
1823 	/* Handle DHCPINIT here outside of lock */
1824 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1825 		uint_t	ifindex;
1826 		ill_t	*ill;
1827 
1828 		ifindex = *(uint_t *)invalp;
1829 		if (ifindex == 0) {
1830 			ill = NULL;
1831 		} else {
1832 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1833 			    coa->coa_ixa->ixa_ipst);
1834 			if (ill == NULL) {
1835 				err = ENXIO;
1836 				goto errout;
1837 			}
1838 
1839 			mutex_enter(&ill->ill_lock);
1840 			if (ill->ill_state_flags & ILL_CONDEMNED) {
1841 				mutex_exit(&ill->ill_lock);
1842 				ill_refrele(ill);
1843 				err = ENXIO;
1844 				goto errout;
1845 			}
1846 			if (IS_VNI(ill)) {
1847 				mutex_exit(&ill->ill_lock);
1848 				ill_refrele(ill);
1849 				err = EINVAL;
1850 				goto errout;
1851 			}
1852 		}
1853 		mutex_enter(&connp->conn_lock);
1854 
1855 		if (connp->conn_dhcpinit_ill != NULL) {
1856 			/*
1857 			 * We've locked the conn so conn_cleanup_ill()
1858 			 * cannot clear conn_dhcpinit_ill -- so it's
1859 			 * safe to access the ill.
1860 			 */
1861 			ill_t *oill = connp->conn_dhcpinit_ill;
1862 
1863 			ASSERT(oill->ill_dhcpinit != 0);
1864 			atomic_dec_32(&oill->ill_dhcpinit);
1865 			ill_set_inputfn(connp->conn_dhcpinit_ill);
1866 			connp->conn_dhcpinit_ill = NULL;
1867 		}
1868 
1869 		if (ill != NULL) {
1870 			connp->conn_dhcpinit_ill = ill;
1871 			atomic_inc_32(&ill->ill_dhcpinit);
1872 			ill_set_inputfn(ill);
1873 			mutex_exit(&connp->conn_lock);
1874 			mutex_exit(&ill->ill_lock);
1875 			ill_refrele(ill);
1876 		} else {
1877 			mutex_exit(&connp->conn_lock);
1878 		}
1879 	}
1880 
1881 	/*
1882 	 * Common case of OK return with outval same as inval.
1883 	 */
1884 	if (invalp != outvalp) {
1885 		/* don't trust bcopy for identical src/dst */
1886 		(void) bcopy(invalp, outvalp, inlen);
1887 	}
1888 	*outlenp = inlen;
1889 
1890 	/*
1891 	 * If this was not ancillary data, then we rebuild the headers,
1892 	 * update the IRE/NCE, and IPsec as needed.
1893 	 * Since the label depends on the destination we go through
1894 	 * ip_set_destination first.
1895 	 */
1896 	if (coa->coa_ancillary) {
1897 		return (0);
1898 	}
1899 
1900 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
1901 		in6_addr_t saddr, faddr, nexthop;
1902 		in_port_t fport;
1903 
1904 		/*
1905 		 * We clear lastdst to make sure we pick up the change
1906 		 * next time sending.
1907 		 * If we are connected we re-cache the information.
1908 		 * We ignore errors to preserve BSD behavior.
1909 		 * Note that we don't redo IPsec policy lookup here
1910 		 * since the final destination (or source) didn't change.
1911 		 */
1912 		mutex_enter(&connp->conn_lock);
1913 		connp->conn_v6lastdst = ipv6_all_zeros;
1914 
1915 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
1916 		    &connp->conn_faddr_v6, &nexthop);
1917 		saddr = connp->conn_saddr_v6;
1918 		faddr = connp->conn_faddr_v6;
1919 		fport = connp->conn_fport;
1920 		mutex_exit(&connp->conn_lock);
1921 
1922 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
1923 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
1924 			(void) ip_attr_connect(connp, coa->coa_ixa,
1925 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
1926 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
1927 		}
1928 	}
1929 
1930 	ixa_refrele(coa->coa_ixa);
1931 
1932 	if (coa->coa_changed & COA_HEADER_CHANGED) {
1933 		/*
1934 		 * Rebuild the header template if we are connected.
1935 		 * Otherwise clear conn_v6lastdst so we rebuild the header
1936 		 * in the data path.
1937 		 */
1938 		mutex_enter(&connp->conn_lock);
1939 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1940 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1941 			err = udp_build_hdr_template(connp,
1942 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
1943 			    connp->conn_fport, connp->conn_flowinfo);
1944 			if (err != 0) {
1945 				mutex_exit(&connp->conn_lock);
1946 				return (err);
1947 			}
1948 		} else {
1949 			connp->conn_v6lastdst = ipv6_all_zeros;
1950 		}
1951 		mutex_exit(&connp->conn_lock);
1952 	}
1953 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
1954 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
1955 		    connp->conn_rcvbuf);
1956 	}
1957 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1958 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1959 	}
1960 	if (coa->coa_changed & COA_WROFF_CHANGED) {
1961 		/* Increase wroff if needed */
1962 		uint_t wroff;
1963 
1964 		mutex_enter(&connp->conn_lock);
1965 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
1966 		if (udp->udp_nat_t_endpoint)
1967 			wroff += sizeof (uint32_t);
1968 		if (wroff > connp->conn_wroff) {
1969 			connp->conn_wroff = wroff;
1970 			mutex_exit(&connp->conn_lock);
1971 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
1972 		} else {
1973 			mutex_exit(&connp->conn_lock);
1974 		}
1975 	}
1976 	return (err);
1977 }
1978 
1979 /* This routine sets socket options. */
1980 int
1981 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
1982     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
1983     void *thisdg_attrs, cred_t *cr)
1984 {
1985 	conn_t	*connp = Q_TO_CONN(q);
1986 	int error;
1987 
1988 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
1989 	    outlenp, outvalp, thisdg_attrs, cr);
1990 	return (error);
1991 }
1992 
1993 /*
1994  * Setup IP and UDP headers.
1995  * Returns NULL on allocation failure, in which case data_mp is freed.
1996  */
1997 mblk_t *
1998 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
1999     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2000     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2001 {
2002 	mblk_t		*mp;
2003 	udpha_t		*udpha;
2004 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
2005 	uint_t		data_len;
2006 	uint32_t	cksum;
2007 	udp_t		*udp = connp->conn_udp;
2008 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
2009 	uint_t		ulp_hdr_len;
2010 
2011 	data_len = msgdsize(data_mp);
2012 	ulp_hdr_len = UDPH_SIZE;
2013 	if (insert_spi)
2014 		ulp_hdr_len += sizeof (uint32_t);
2015 
2016 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2017 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2018 	if (mp == NULL) {
2019 		ASSERT(*errorp != 0);
2020 		return (NULL);
2021 	}
2022 
2023 	data_len += ulp_hdr_len;
2024 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2025 
2026 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2027 	udpha->uha_src_port = connp->conn_lport;
2028 	udpha->uha_dst_port = dstport;
2029 	udpha->uha_checksum = 0;
2030 	udpha->uha_length = htons(data_len);
2031 
2032 	/*
2033 	 * If there was a routing option/header then conn_prepend_hdr
2034 	 * has massaged it and placed the pseudo-header checksum difference
2035 	 * in the cksum argument.
2036 	 *
2037 	 * Setup header length and prepare for ULP checksum done in IP.
2038 	 *
2039 	 * We make it easy for IP to include our pseudo header
2040 	 * by putting our length in uha_checksum.
2041 	 * The IP source, destination, and length have already been set by
2042 	 * conn_prepend_hdr.
2043 	 */
2044 	cksum += data_len;
2045 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
2046 	ASSERT(cksum < 0x10000);
2047 
2048 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2049 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
2050 
2051 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2052 
2053 		/* IP does the checksum if uha_checksum is non-zero */
2054 		if (us->us_do_checksum) {
2055 			if (cksum == 0)
2056 				udpha->uha_checksum = 0xffff;
2057 			else
2058 				udpha->uha_checksum = htons(cksum);
2059 		} else {
2060 			udpha->uha_checksum = 0;
2061 		}
2062 	} else {
2063 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2064 
2065 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2066 		if (cksum == 0)
2067 			udpha->uha_checksum = 0xffff;
2068 		else
2069 			udpha->uha_checksum = htons(cksum);
2070 	}
2071 
2072 	/* Insert all-0s SPI now. */
2073 	if (insert_spi)
2074 		*((uint32_t *)(udpha + 1)) = 0;
2075 
2076 	return (mp);
2077 }
2078 
2079 static int
2080 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2081     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2082 {
2083 	udpha_t		*udpha;
2084 	int		error;
2085 
2086 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2087 	/*
2088 	 * We clear lastdst to make sure we don't use the lastdst path
2089 	 * next time sending since we might not have set v6dst yet.
2090 	 */
2091 	connp->conn_v6lastdst = ipv6_all_zeros;
2092 
2093 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2094 	    flowinfo);
2095 	if (error != 0)
2096 		return (error);
2097 
2098 	/*
2099 	 * Any routing header/option has been massaged. The checksum difference
2100 	 * is stored in conn_sum.
2101 	 */
2102 	udpha = (udpha_t *)connp->conn_ht_ulp;
2103 	udpha->uha_src_port = connp->conn_lport;
2104 	udpha->uha_dst_port = dstport;
2105 	udpha->uha_checksum = 0;
2106 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
2107 	return (0);
2108 }
2109 
2110 static mblk_t *
2111 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2112 {
2113 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2114 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
2115 		/*
2116 		 * fallback has started but messages have not been moved yet
2117 		 */
2118 		if (udp->udp_fallback_queue_head == NULL) {
2119 			ASSERT(udp->udp_fallback_queue_tail == NULL);
2120 			udp->udp_fallback_queue_head = mp;
2121 			udp->udp_fallback_queue_tail = mp;
2122 		} else {
2123 			ASSERT(udp->udp_fallback_queue_tail != NULL);
2124 			udp->udp_fallback_queue_tail->b_next = mp;
2125 			udp->udp_fallback_queue_tail = mp;
2126 		}
2127 		return (NULL);
2128 	} else {
2129 		/*
2130 		 * Fallback completed, let the caller putnext() the mblk.
2131 		 */
2132 		return (mp);
2133 	}
2134 }
2135 
2136 /*
2137  * Deliver data to ULP. In case we have a socket, and it's falling back to
2138  * TPI, then we'll queue the mp for later processing.
2139  */
2140 static void
2141 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2142 {
2143 	if (IPCL_IS_NONSTR(connp)) {
2144 		udp_t *udp = connp->conn_udp;
2145 		int error;
2146 
2147 		ASSERT(len == msgdsize(mp));
2148 		if ((*connp->conn_upcalls->su_recv)
2149 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2150 			mutex_enter(&udp->udp_recv_lock);
2151 			if (error == ENOSPC) {
2152 				/*
2153 				 * let's confirm while holding the lock
2154 				 */
2155 				if ((*connp->conn_upcalls->su_recv)
2156 				    (connp->conn_upper_handle, NULL, 0, 0,
2157 				    &error, NULL) < 0) {
2158 					ASSERT(error == ENOSPC);
2159 					if (error == ENOSPC) {
2160 						connp->conn_flow_cntrld =
2161 						    B_TRUE;
2162 					}
2163 				}
2164 				mutex_exit(&udp->udp_recv_lock);
2165 			} else {
2166 				ASSERT(error == EOPNOTSUPP);
2167 				mp = udp_queue_fallback(udp, mp);
2168 				mutex_exit(&udp->udp_recv_lock);
2169 				if (mp != NULL)
2170 					putnext(connp->conn_rq, mp);
2171 			}
2172 		}
2173 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2174 	} else {
2175 		if (is_system_labeled()) {
2176 			ASSERT(ira->ira_cred != NULL);
2177 			/*
2178 			 * Provide for protocols above UDP such as RPC
2179 			 * NOPID leaves db_cpid unchanged.
2180 			 */
2181 			mblk_setcred(mp, ira->ira_cred, NOPID);
2182 		}
2183 
2184 		putnext(connp->conn_rq, mp);
2185 	}
2186 }
2187 
2188 /*
2189  * This is the inbound data path.
2190  * IP has already pulled up the IP plus UDP headers and verified alignment
2191  * etc.
2192  */
2193 /* ARGSUSED2 */
2194 static void
2195 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2196 {
2197 	conn_t			*connp = (conn_t *)arg1;
2198 	struct T_unitdata_ind	*tudi;
2199 	uchar_t			*rptr;		/* Pointer to IP header */
2200 	int			hdr_length;	/* Length of IP+UDP headers */
2201 	int			udi_size;	/* Size of T_unitdata_ind */
2202 	int			pkt_len;
2203 	udp_t			*udp;
2204 	udpha_t			*udpha;
2205 	ip_pkt_t		ipps;
2206 	ip6_t			*ip6h;
2207 	mblk_t			*mp1;
2208 	uint32_t		udp_ipv4_options_len;
2209 	crb_t			recv_ancillary;
2210 	udp_stack_t		*us;
2211 
2212 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2213 
2214 	udp = connp->conn_udp;
2215 	us = udp->udp_us;
2216 	rptr = mp->b_rptr;
2217 
2218 	ASSERT(DB_TYPE(mp) == M_DATA);
2219 	ASSERT(OK_32PTR(rptr));
2220 	ASSERT(ira->ira_pktlen == msgdsize(mp));
2221 	pkt_len = ira->ira_pktlen;
2222 
2223 	/*
2224 	 * Get a snapshot of these and allow other threads to change
2225 	 * them after that. We need the same recv_ancillary when determining
2226 	 * the size as when adding the ancillary data items.
2227 	 */
2228 	mutex_enter(&connp->conn_lock);
2229 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2230 	recv_ancillary = connp->conn_recv_ancillary;
2231 	mutex_exit(&connp->conn_lock);
2232 
2233 	hdr_length = ira->ira_ip_hdr_length;
2234 
2235 	/*
2236 	 * IP inspected the UDP header thus all of it must be in the mblk.
2237 	 * UDP length check is performed for IPv6 packets and IPv4 packets
2238 	 * to check if the size of the packet as specified
2239 	 * by the UDP header is the same as the length derived from the IP
2240 	 * header.
2241 	 */
2242 	udpha = (udpha_t *)(rptr + hdr_length);
2243 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2244 		goto tossit;
2245 
2246 	hdr_length += UDPH_SIZE;
2247 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
2248 
2249 	/* Initialize regardless of IP version */
2250 	ipps.ipp_fields = 0;
2251 
2252 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2253 	    udp_ipv4_options_len > 0) &&
2254 	    connp->conn_family == AF_INET) {
2255 		int	err;
2256 
2257 		/*
2258 		 * Record/update udp_recv_ipp with the lock
2259 		 * held. Not needed for AF_INET6 sockets
2260 		 * since they don't support a getsockopt of IP_OPTIONS.
2261 		 */
2262 		mutex_enter(&connp->conn_lock);
2263 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2264 		    B_TRUE);
2265 		if (err != 0) {
2266 			/* Allocation failed. Drop packet */
2267 			mutex_exit(&connp->conn_lock);
2268 			freemsg(mp);
2269 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
2270 			return;
2271 		}
2272 		mutex_exit(&connp->conn_lock);
2273 	}
2274 
2275 	if (recv_ancillary.crb_all != 0) {
2276 		/*
2277 		 * Record packet information in the ip_pkt_t
2278 		 */
2279 		if (ira->ira_flags & IRAF_IS_IPV4) {
2280 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2281 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2282 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2283 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2284 
2285 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2286 		} else {
2287 			uint8_t nexthdrp;
2288 
2289 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2290 			/*
2291 			 * IPv6 packets can only be received by applications
2292 			 * that are prepared to receive IPv6 addresses.
2293 			 * The IP fanout must ensure this.
2294 			 */
2295 			ASSERT(connp->conn_family == AF_INET6);
2296 
2297 			ip6h = (ip6_t *)rptr;
2298 
2299 			/* We don't care about the length, but need the ipp */
2300 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2301 			    &nexthdrp);
2302 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
2303 			/* Restore */
2304 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2305 			ASSERT(nexthdrp == IPPROTO_UDP);
2306 		}
2307 	}
2308 
2309 	/*
2310 	 * This is the inbound data path.  Packets are passed upstream as
2311 	 * T_UNITDATA_IND messages.
2312 	 */
2313 	if (connp->conn_family == AF_INET) {
2314 		sin_t *sin;
2315 
2316 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2317 
2318 		/*
2319 		 * Normally only send up the source address.
2320 		 * If any ancillary data items are wanted we add those.
2321 		 */
2322 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2323 		if (recv_ancillary.crb_all != 0) {
2324 			udi_size += conn_recvancillary_size(connp,
2325 			    recv_ancillary, ira, mp, &ipps);
2326 		}
2327 
2328 		/* Allocate a message block for the T_UNITDATA_IND structure. */
2329 		mp1 = allocb(udi_size, BPRI_MED);
2330 		if (mp1 == NULL) {
2331 			freemsg(mp);
2332 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
2333 			return;
2334 		}
2335 		mp1->b_cont = mp;
2336 		mp1->b_datap->db_type = M_PROTO;
2337 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2338 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2339 		tudi->PRIM_type = T_UNITDATA_IND;
2340 		tudi->SRC_length = sizeof (sin_t);
2341 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2342 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2343 		    sizeof (sin_t);
2344 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2345 		tudi->OPT_length = udi_size;
2346 		sin = (sin_t *)&tudi[1];
2347 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2348 		sin->sin_port =	udpha->uha_src_port;
2349 		sin->sin_family = connp->conn_family;
2350 		*(uint32_t *)&sin->sin_zero[0] = 0;
2351 		*(uint32_t *)&sin->sin_zero[4] = 0;
2352 
2353 		/*
2354 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2355 		 * IP_RECVTTL has been set.
2356 		 */
2357 		if (udi_size != 0) {
2358 			conn_recvancillary_add(connp, recv_ancillary, ira,
2359 			    &ipps, (uchar_t *)&sin[1], udi_size);
2360 		}
2361 	} else {
2362 		sin6_t *sin6;
2363 
2364 		/*
2365 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2366 		 *
2367 		 * Normally we only send up the address. If receiving of any
2368 		 * optional receive side information is enabled, we also send
2369 		 * that up as options.
2370 		 */
2371 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2372 
2373 		if (recv_ancillary.crb_all != 0) {
2374 			udi_size += conn_recvancillary_size(connp,
2375 			    recv_ancillary, ira, mp, &ipps);
2376 		}
2377 
2378 		mp1 = allocb(udi_size, BPRI_MED);
2379 		if (mp1 == NULL) {
2380 			freemsg(mp);
2381 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
2382 			return;
2383 		}
2384 		mp1->b_cont = mp;
2385 		mp1->b_datap->db_type = M_PROTO;
2386 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2387 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2388 		tudi->PRIM_type = T_UNITDATA_IND;
2389 		tudi->SRC_length = sizeof (sin6_t);
2390 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2391 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2392 		    sizeof (sin6_t);
2393 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2394 		tudi->OPT_length = udi_size;
2395 		sin6 = (sin6_t *)&tudi[1];
2396 		if (ira->ira_flags & IRAF_IS_IPV4) {
2397 			in6_addr_t v6dst;
2398 
2399 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2400 			    &sin6->sin6_addr);
2401 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2402 			    &v6dst);
2403 			sin6->sin6_flowinfo = 0;
2404 			sin6->sin6_scope_id = 0;
2405 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2406 			    IPCL_ZONEID(connp), us->us_netstack);
2407 		} else {
2408 			ip6h = (ip6_t *)rptr;
2409 
2410 			sin6->sin6_addr = ip6h->ip6_src;
2411 			/* No sin6_flowinfo per API */
2412 			sin6->sin6_flowinfo = 0;
2413 			/* For link-scope pass up scope id */
2414 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2415 				sin6->sin6_scope_id = ira->ira_ruifindex;
2416 			else
2417 				sin6->sin6_scope_id = 0;
2418 			sin6->__sin6_src_id = ip_srcid_find_addr(
2419 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
2420 			    us->us_netstack);
2421 		}
2422 		sin6->sin6_port = udpha->uha_src_port;
2423 		sin6->sin6_family = connp->conn_family;
2424 
2425 		if (udi_size != 0) {
2426 			conn_recvancillary_add(connp, recv_ancillary, ira,
2427 			    &ipps, (uchar_t *)&sin6[1], udi_size);
2428 		}
2429 	}
2430 
2431 	/* Walk past the headers unless IP_RECVHDR was set. */
2432 	if (!udp->udp_rcvhdr) {
2433 		mp->b_rptr = rptr + hdr_length;
2434 		pkt_len -= hdr_length;
2435 	}
2436 
2437 	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
2438 	udp_ulp_recv(connp, mp1, pkt_len, ira);
2439 	return;
2440 
2441 tossit:
2442 	freemsg(mp);
2443 	BUMP_MIB(&us->us_udp_mib, udpInErrors);
2444 }
2445 
2446 /*
2447  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
2448  * information that can be changing beneath us.
2449  */
2450 mblk_t *
2451 udp_snmp_get(queue_t *q, mblk_t *mpctl)
2452 {
2453 	mblk_t			*mpdata;
2454 	mblk_t			*mp_conn_ctl;
2455 	mblk_t			*mp_attr_ctl;
2456 	mblk_t			*mp6_conn_ctl;
2457 	mblk_t			*mp6_attr_ctl;
2458 	mblk_t			*mp_conn_tail;
2459 	mblk_t			*mp_attr_tail;
2460 	mblk_t			*mp6_conn_tail;
2461 	mblk_t			*mp6_attr_tail;
2462 	struct opthdr		*optp;
2463 	mib2_udpEntry_t		ude;
2464 	mib2_udp6Entry_t	ude6;
2465 	mib2_transportMLPEntry_t mlp;
2466 	int			state;
2467 	zoneid_t		zoneid;
2468 	int			i;
2469 	connf_t			*connfp;
2470 	conn_t			*connp = Q_TO_CONN(q);
2471 	int			v4_conn_idx;
2472 	int			v6_conn_idx;
2473 	boolean_t		needattr;
2474 	udp_t			*udp;
2475 	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
2476 	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
2477 	mblk_t			*mp2ctl;
2478 
2479 	/*
2480 	 * make a copy of the original message
2481 	 */
2482 	mp2ctl = copymsg(mpctl);
2483 
2484 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
2485 	if (mpctl == NULL ||
2486 	    (mpdata = mpctl->b_cont) == NULL ||
2487 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
2488 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
2489 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
2490 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
2491 		freemsg(mp_conn_ctl);
2492 		freemsg(mp_attr_ctl);
2493 		freemsg(mp6_conn_ctl);
2494 		freemsg(mpctl);
2495 		freemsg(mp2ctl);
2496 		return (0);
2497 	}
2498 
2499 	zoneid = connp->conn_zoneid;
2500 
2501 	/* fixed length structure for IPv4 and IPv6 counters */
2502 	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
2503 	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
2504 	/* synchronize 64- and 32-bit counters */
2505 	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
2506 	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
2507 
2508 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
2509 	optp->level = MIB2_UDP;
2510 	optp->name = 0;
2511 	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
2512 	    sizeof (us->us_udp_mib));
2513 	optp->len = msgdsize(mpdata);
2514 	qreply(q, mpctl);
2515 
2516 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
2517 	v4_conn_idx = v6_conn_idx = 0;
2518 
2519 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2520 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2521 		connp = NULL;
2522 
2523 		while ((connp = ipcl_get_next_conn(connfp, connp,
2524 		    IPCL_UDPCONN))) {
2525 			udp = connp->conn_udp;
2526 			if (zoneid != connp->conn_zoneid)
2527 				continue;
2528 
2529 			/*
2530 			 * Note that the port numbers are sent in
2531 			 * host byte order
2532 			 */
2533 
2534 			if (udp->udp_state == TS_UNBND)
2535 				state = MIB2_UDP_unbound;
2536 			else if (udp->udp_state == TS_IDLE)
2537 				state = MIB2_UDP_idle;
2538 			else if (udp->udp_state == TS_DATA_XFER)
2539 				state = MIB2_UDP_connected;
2540 			else
2541 				state = MIB2_UDP_unknown;
2542 
2543 			needattr = B_FALSE;
2544 			bzero(&mlp, sizeof (mlp));
2545 			if (connp->conn_mlp_type != mlptSingle) {
2546 				if (connp->conn_mlp_type == mlptShared ||
2547 				    connp->conn_mlp_type == mlptBoth)
2548 					mlp.tme_flags |= MIB2_TMEF_SHARED;
2549 				if (connp->conn_mlp_type == mlptPrivate ||
2550 				    connp->conn_mlp_type == mlptBoth)
2551 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
2552 				needattr = B_TRUE;
2553 			}
2554 			if (connp->conn_anon_mlp) {
2555 				mlp.tme_flags |= MIB2_TMEF_ANONMLP;
2556 				needattr = B_TRUE;
2557 			}
2558 			switch (connp->conn_mac_mode) {
2559 			case CONN_MAC_DEFAULT:
2560 				break;
2561 			case CONN_MAC_AWARE:
2562 				mlp.tme_flags |= MIB2_TMEF_MACEXEMPT;
2563 				needattr = B_TRUE;
2564 				break;
2565 			case CONN_MAC_IMPLICIT:
2566 				mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT;
2567 				needattr = B_TRUE;
2568 				break;
2569 			}
2570 			mutex_enter(&connp->conn_lock);
2571 			if (udp->udp_state == TS_DATA_XFER &&
2572 			    connp->conn_ixa->ixa_tsl != NULL) {
2573 				ts_label_t *tsl;
2574 
2575 				tsl = connp->conn_ixa->ixa_tsl;
2576 				mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
2577 				mlp.tme_doi = label2doi(tsl);
2578 				mlp.tme_label = *label2bslabel(tsl);
2579 				needattr = B_TRUE;
2580 			}
2581 			mutex_exit(&connp->conn_lock);
2582 
2583 			/*
2584 			 * Create an IPv4 table entry for IPv4 entries and also
2585 			 * any IPv6 entries which are bound to in6addr_any
2586 			 * (i.e. anything a IPv4 peer could connect/send to).
2587 			 */
2588 			if (connp->conn_ipversion == IPV4_VERSION ||
2589 			    (udp->udp_state <= TS_IDLE &&
2590 			    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
2591 				ude.udpEntryInfo.ue_state = state;
2592 				/*
2593 				 * If in6addr_any this will set it to
2594 				 * INADDR_ANY
2595 				 */
2596 				ude.udpLocalAddress = connp->conn_laddr_v4;
2597 				ude.udpLocalPort = ntohs(connp->conn_lport);
2598 				if (udp->udp_state == TS_DATA_XFER) {
2599 					/*
2600 					 * Can potentially get here for
2601 					 * v6 socket if another process
2602 					 * (say, ping) has just done a
2603 					 * sendto(), changing the state
2604 					 * from the TS_IDLE above to
2605 					 * TS_DATA_XFER by the time we hit
2606 					 * this part of the code.
2607 					 */
2608 					ude.udpEntryInfo.ue_RemoteAddress =
2609 					    connp->conn_faddr_v4;
2610 					ude.udpEntryInfo.ue_RemotePort =
2611 					    ntohs(connp->conn_fport);
2612 				} else {
2613 					ude.udpEntryInfo.ue_RemoteAddress = 0;
2614 					ude.udpEntryInfo.ue_RemotePort = 0;
2615 				}
2616 
2617 				/*
2618 				 * We make the assumption that all udp_t
2619 				 * structs will be created within an address
2620 				 * region no larger than 32-bits.
2621 				 */
2622 				ude.udpInstance = (uint32_t)(uintptr_t)udp;
2623 				ude.udpCreationProcess =
2624 				    (connp->conn_cpid < 0) ?
2625 				    MIB2_UNKNOWN_PROCESS :
2626 				    connp->conn_cpid;
2627 				ude.udpCreationTime = connp->conn_open_time;
2628 
2629 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
2630 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
2631 				mlp.tme_connidx = v4_conn_idx++;
2632 				if (needattr)
2633 					(void) snmp_append_data2(
2634 					    mp_attr_ctl->b_cont, &mp_attr_tail,
2635 					    (char *)&mlp, sizeof (mlp));
2636 			}
2637 			if (connp->conn_ipversion == IPV6_VERSION) {
2638 				ude6.udp6EntryInfo.ue_state  = state;
2639 				ude6.udp6LocalAddress = connp->conn_laddr_v6;
2640 				ude6.udp6LocalPort = ntohs(connp->conn_lport);
2641 				mutex_enter(&connp->conn_lock);
2642 				if (connp->conn_ixa->ixa_flags &
2643 				    IXAF_SCOPEID_SET) {
2644 					ude6.udp6IfIndex =
2645 					    connp->conn_ixa->ixa_scopeid;
2646 				} else {
2647 					ude6.udp6IfIndex = connp->conn_bound_if;
2648 				}
2649 				mutex_exit(&connp->conn_lock);
2650 				if (udp->udp_state == TS_DATA_XFER) {
2651 					ude6.udp6EntryInfo.ue_RemoteAddress =
2652 					    connp->conn_faddr_v6;
2653 					ude6.udp6EntryInfo.ue_RemotePort =
2654 					    ntohs(connp->conn_fport);
2655 				} else {
2656 					ude6.udp6EntryInfo.ue_RemoteAddress =
2657 					    sin6_null.sin6_addr;
2658 					ude6.udp6EntryInfo.ue_RemotePort = 0;
2659 				}
2660 				/*
2661 				 * We make the assumption that all udp_t
2662 				 * structs will be created within an address
2663 				 * region no larger than 32-bits.
2664 				 */
2665 				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
2666 				ude6.udp6CreationProcess =
2667 				    (connp->conn_cpid < 0) ?
2668 				    MIB2_UNKNOWN_PROCESS :
2669 				    connp->conn_cpid;
2670 				ude6.udp6CreationTime = connp->conn_open_time;
2671 
2672 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
2673 				    &mp6_conn_tail, (char *)&ude6,
2674 				    sizeof (ude6));
2675 				mlp.tme_connidx = v6_conn_idx++;
2676 				if (needattr)
2677 					(void) snmp_append_data2(
2678 					    mp6_attr_ctl->b_cont,
2679 					    &mp6_attr_tail, (char *)&mlp,
2680 					    sizeof (mlp));
2681 			}
2682 		}
2683 	}
2684 
2685 	/* IPv4 UDP endpoints */
2686 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
2687 	    sizeof (struct T_optmgmt_ack)];
2688 	optp->level = MIB2_UDP;
2689 	optp->name = MIB2_UDP_ENTRY;
2690 	optp->len = msgdsize(mp_conn_ctl->b_cont);
2691 	qreply(q, mp_conn_ctl);
2692 
2693 	/* table of MLP attributes... */
2694 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
2695 	    sizeof (struct T_optmgmt_ack)];
2696 	optp->level = MIB2_UDP;
2697 	optp->name = EXPER_XPORT_MLP;
2698 	optp->len = msgdsize(mp_attr_ctl->b_cont);
2699 	if (optp->len == 0)
2700 		freemsg(mp_attr_ctl);
2701 	else
2702 		qreply(q, mp_attr_ctl);
2703 
2704 	/* IPv6 UDP endpoints */
2705 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
2706 	    sizeof (struct T_optmgmt_ack)];
2707 	optp->level = MIB2_UDP6;
2708 	optp->name = MIB2_UDP6_ENTRY;
2709 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
2710 	qreply(q, mp6_conn_ctl);
2711 
2712 	/* table of MLP attributes... */
2713 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
2714 	    sizeof (struct T_optmgmt_ack)];
2715 	optp->level = MIB2_UDP6;
2716 	optp->name = EXPER_XPORT_MLP;
2717 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
2718 	if (optp->len == 0)
2719 		freemsg(mp6_attr_ctl);
2720 	else
2721 		qreply(q, mp6_attr_ctl);
2722 
2723 	return (mp2ctl);
2724 }
2725 
2726 /*
2727  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
2728  * NOTE: Per MIB-II, UDP has no writable data.
2729  * TODO:  If this ever actually tries to set anything, it needs to be
2730  * to do the appropriate locking.
2731  */
2732 /* ARGSUSED */
2733 int
2734 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
2735     uchar_t *ptr, int len)
2736 {
2737 	switch (level) {
2738 	case MIB2_UDP:
2739 		return (0);
2740 	default:
2741 		return (1);
2742 	}
2743 }
2744 
2745 /*
2746  * This routine creates a T_UDERROR_IND message and passes it upstream.
2747  * The address and options are copied from the T_UNITDATA_REQ message
2748  * passed in mp.  This message is freed.
2749  */
2750 static void
2751 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2752 {
2753 	struct T_unitdata_req *tudr;
2754 	mblk_t	*mp1;
2755 	uchar_t *destaddr;
2756 	t_scalar_t destlen;
2757 	uchar_t	*optaddr;
2758 	t_scalar_t optlen;
2759 
2760 	if ((mp->b_wptr < mp->b_rptr) ||
2761 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2762 		goto done;
2763 	}
2764 	tudr = (struct T_unitdata_req *)mp->b_rptr;
2765 	destaddr = mp->b_rptr + tudr->DEST_offset;
2766 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2767 	    destaddr + tudr->DEST_length < mp->b_rptr ||
2768 	    destaddr + tudr->DEST_length > mp->b_wptr) {
2769 		goto done;
2770 	}
2771 	optaddr = mp->b_rptr + tudr->OPT_offset;
2772 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2773 	    optaddr + tudr->OPT_length < mp->b_rptr ||
2774 	    optaddr + tudr->OPT_length > mp->b_wptr) {
2775 		goto done;
2776 	}
2777 	destlen = tudr->DEST_length;
2778 	optlen = tudr->OPT_length;
2779 
2780 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2781 	    (char *)optaddr, optlen, err);
2782 	if (mp1 != NULL)
2783 		qreply(q, mp1);
2784 
2785 done:
2786 	freemsg(mp);
2787 }
2788 
2789 /*
2790  * This routine removes a port number association from a stream.  It
2791  * is called by udp_wput to handle T_UNBIND_REQ messages.
2792  */
2793 static void
2794 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2795 {
2796 	conn_t	*connp = Q_TO_CONN(q);
2797 	int	error;
2798 
2799 	error = udp_do_unbind(connp);
2800 	if (error) {
2801 		if (error < 0)
2802 			udp_err_ack(q, mp, -error, 0);
2803 		else
2804 			udp_err_ack(q, mp, TSYSERR, error);
2805 		return;
2806 	}
2807 
2808 	mp = mi_tpi_ok_ack_alloc(mp);
2809 	ASSERT(mp != NULL);
2810 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2811 	qreply(q, mp);
2812 }
2813 
2814 /*
2815  * Don't let port fall into the privileged range.
2816  * Since the extra privileged ports can be arbitrary we also
2817  * ensure that we exclude those from consideration.
2818  * us->us_epriv_ports is not sorted thus we loop over it until
2819  * there are no changes.
2820  */
2821 static in_port_t
2822 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2823 {
2824 	int i;
2825 	in_port_t nextport;
2826 	boolean_t restart = B_FALSE;
2827 	udp_stack_t *us = udp->udp_us;
2828 
2829 	if (random && udp_random_anon_port != 0) {
2830 		(void) random_get_pseudo_bytes((uint8_t *)&port,
2831 		    sizeof (in_port_t));
2832 		/*
2833 		 * Unless changed by a sys admin, the smallest anon port
2834 		 * is 32768 and the largest anon port is 65535.  It is
2835 		 * very likely (50%) for the random port to be smaller
2836 		 * than the smallest anon port.  When that happens,
2837 		 * add port % (anon port range) to the smallest anon
2838 		 * port to get the random port.  It should fall into the
2839 		 * valid anon port range.
2840 		 */
2841 		if (port < us->us_smallest_anon_port) {
2842 			port = us->us_smallest_anon_port +
2843 			    port % (us->us_largest_anon_port -
2844 			    us->us_smallest_anon_port);
2845 		}
2846 	}
2847 
2848 retry:
2849 	if (port < us->us_smallest_anon_port)
2850 		port = us->us_smallest_anon_port;
2851 
2852 	if (port > us->us_largest_anon_port) {
2853 		port = us->us_smallest_anon_port;
2854 		if (restart)
2855 			return (0);
2856 		restart = B_TRUE;
2857 	}
2858 
2859 	if (port < us->us_smallest_nonpriv_port)
2860 		port = us->us_smallest_nonpriv_port;
2861 
2862 	for (i = 0; i < us->us_num_epriv_ports; i++) {
2863 		if (port == us->us_epriv_ports[i]) {
2864 			port++;
2865 			/*
2866 			 * Make sure that the port is in the
2867 			 * valid range.
2868 			 */
2869 			goto retry;
2870 		}
2871 	}
2872 
2873 	if (is_system_labeled() &&
2874 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2875 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
2876 		port = nextport;
2877 		goto retry;
2878 	}
2879 
2880 	return (port);
2881 }
2882 
2883 /*
2884  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2885  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2886  * the TPI options, otherwise we take them from msg_control.
2887  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2888  * Always consumes mp; never consumes tudr_mp.
2889  */
2890 static int
2891 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2892     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2893 {
2894 	udp_t		*udp = connp->conn_udp;
2895 	udp_stack_t	*us = udp->udp_us;
2896 	int		error;
2897 	ip_xmit_attr_t	*ixa;
2898 	ip_pkt_t	*ipp;
2899 	in6_addr_t	v6src;
2900 	in6_addr_t	v6dst;
2901 	in6_addr_t	v6nexthop;
2902 	in_port_t	dstport;
2903 	uint32_t	flowinfo;
2904 	uint_t		srcid;
2905 	int		is_absreq_failure = 0;
2906 	conn_opt_arg_t	coas, *coa;
2907 
2908 	ASSERT(tudr_mp != NULL || msg != NULL);
2909 
2910 	/*
2911 	 * Get ixa before checking state to handle a disconnect race.
2912 	 *
2913 	 * We need an exclusive copy of conn_ixa since the ancillary data
2914 	 * options might modify it. That copy has no pointers hence we
2915 	 * need to set them up once we've parsed the ancillary data.
2916 	 */
2917 	ixa = conn_get_ixa_exclusive(connp);
2918 	if (ixa == NULL) {
2919 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2920 		freemsg(mp);
2921 		return (ENOMEM);
2922 	}
2923 	ASSERT(cr != NULL);
2924 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2925 	ixa->ixa_cred = cr;
2926 	ixa->ixa_cpid = pid;
2927 	if (is_system_labeled()) {
2928 		/* We need to restart with a label based on the cred */
2929 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2930 	}
2931 
2932 	/* In case previous destination was multicast or multirt */
2933 	ip_attr_newdst(ixa);
2934 
2935 	/* Get a copy of conn_xmit_ipp since the options might change it */
2936 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2937 	if (ipp == NULL) {
2938 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2939 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2940 		ixa->ixa_cpid = connp->conn_cpid;
2941 		ixa_refrele(ixa);
2942 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2943 		freemsg(mp);
2944 		return (ENOMEM);
2945 	}
2946 	mutex_enter(&connp->conn_lock);
2947 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2948 	mutex_exit(&connp->conn_lock);
2949 	if (error != 0) {
2950 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2951 		freemsg(mp);
2952 		goto done;
2953 	}
2954 
2955 	/*
2956 	 * Parse the options and update ixa and ipp as a result.
2957 	 * Note that ixa_tsl can be updated if SCM_UCRED.
2958 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2959 	 */
2960 
2961 	coa = &coas;
2962 	coa->coa_connp = connp;
2963 	coa->coa_ixa = ixa;
2964 	coa->coa_ipp = ipp;
2965 	coa->coa_ancillary = B_TRUE;
2966 	coa->coa_changed = 0;
2967 
2968 	if (msg != NULL) {
2969 		error = process_auxiliary_options(connp, msg->msg_control,
2970 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2971 	} else {
2972 		struct T_unitdata_req *tudr;
2973 
2974 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2975 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2976 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2977 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2978 		    coa, &is_absreq_failure);
2979 	}
2980 	if (error != 0) {
2981 		/*
2982 		 * Note: No special action needed in this
2983 		 * module for "is_absreq_failure"
2984 		 */
2985 		freemsg(mp);
2986 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
2987 		goto done;
2988 	}
2989 	ASSERT(is_absreq_failure == 0);
2990 
2991 	mutex_enter(&connp->conn_lock);
2992 	/*
2993 	 * If laddr is unspecified then we look at sin6_src_id.
2994 	 * We will give precedence to a source address set with IPV6_PKTINFO
2995 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2996 	 * want ip_attr_connect to select a source (since it can fail) when
2997 	 * IPV6_PKTINFO is specified.
2998 	 * If this doesn't result in a source address then we get a source
2999 	 * from ip_attr_connect() below.
3000 	 */
3001 	v6src = connp->conn_saddr_v6;
3002 	if (sin != NULL) {
3003 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3004 		dstport = sin->sin_port;
3005 		flowinfo = 0;
3006 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3007 		ixa->ixa_flags |= IXAF_IS_IPV4;
3008 	} else if (sin6 != NULL) {
3009 		v6dst = sin6->sin6_addr;
3010 		dstport = sin6->sin6_port;
3011 		flowinfo = sin6->sin6_flowinfo;
3012 		srcid = sin6->__sin6_src_id;
3013 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3014 			ixa->ixa_scopeid = sin6->sin6_scope_id;
3015 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
3016 		} else {
3017 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3018 		}
3019 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3020 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3021 			    connp->conn_netstack);
3022 		}
3023 		if (IN6_IS_ADDR_V4MAPPED(&v6dst))
3024 			ixa->ixa_flags |= IXAF_IS_IPV4;
3025 		else
3026 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
3027 	} else {
3028 		/* Connected case */
3029 		v6dst = connp->conn_faddr_v6;
3030 		dstport = connp->conn_fport;
3031 		flowinfo = connp->conn_flowinfo;
3032 	}
3033 	mutex_exit(&connp->conn_lock);
3034 
3035 	/* Handle IPV6_PKTINFO setting source address. */
3036 	if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
3037 	    (ipp->ipp_fields & IPPF_ADDR)) {
3038 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3039 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3040 				v6src = ipp->ipp_addr;
3041 		} else {
3042 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3043 				v6src = ipp->ipp_addr;
3044 		}
3045 	}
3046 
3047 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
3048 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3049 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3050 
3051 	switch (error) {
3052 	case 0:
3053 		break;
3054 	case EADDRNOTAVAIL:
3055 		/*
3056 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3057 		 * Don't have the application see that errno
3058 		 */
3059 		error = ENETUNREACH;
3060 		goto failed;
3061 	case ENETDOWN:
3062 		/*
3063 		 * Have !ipif_addr_ready address; drop packet silently
3064 		 * until we can get applications to not send until we
3065 		 * are ready.
3066 		 */
3067 		error = 0;
3068 		goto failed;
3069 	case EHOSTUNREACH:
3070 	case ENETUNREACH:
3071 		if (ixa->ixa_ire != NULL) {
3072 			/*
3073 			 * Let conn_ip_output/ire_send_noroute return
3074 			 * the error and send any local ICMP error.
3075 			 */
3076 			error = 0;
3077 			break;
3078 		}
3079 		/* FALLTHRU */
3080 	default:
3081 	failed:
3082 		freemsg(mp);
3083 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3084 		goto done;
3085 	}
3086 
3087 	/*
3088 	 * We might be going to a different destination than last time,
3089 	 * thus check that TX allows the communication and compute any
3090 	 * needed label.
3091 	 *
3092 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
3093 	 * don't have to worry about concurrent threads.
3094 	 */
3095 	if (is_system_labeled()) {
3096 		/* Using UDP MLP requires SCM_UCRED from user */
3097 		if (connp->conn_mlp_type != mlptSingle &&
3098 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
3099 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3100 			error = ECONNREFUSED;
3101 			freemsg(mp);
3102 			goto done;
3103 		}
3104 		/*
3105 		 * Check whether Trusted Solaris policy allows communication
3106 		 * with this host, and pretend that the destination is
3107 		 * unreachable if not.
3108 		 * Compute any needed label and place it in ipp_label_v4/v6.
3109 		 *
3110 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
3111 		 * ipp_label_v4/v6 to form the packet.
3112 		 *
3113 		 * Tsol note: We have ipp structure local to this thread so
3114 		 * no locking is needed.
3115 		 */
3116 		error = conn_update_label(connp, ixa, &v6dst, ipp);
3117 		if (error != 0) {
3118 			freemsg(mp);
3119 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3120 			goto done;
3121 		}
3122 	}
3123 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
3124 	    flowinfo, mp, &error);
3125 	if (mp == NULL) {
3126 		ASSERT(error != 0);
3127 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3128 		goto done;
3129 	}
3130 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
3131 		error = EMSGSIZE;
3132 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3133 		freemsg(mp);
3134 		goto done;
3135 	}
3136 	/* We're done.  Pass the packet to ip. */
3137 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
3138 
3139 	error = conn_ip_output(mp, ixa);
3140 	/* No udpOutErrors if an error since IP increases its error counter */
3141 	switch (error) {
3142 	case 0:
3143 		break;
3144 	case EWOULDBLOCK:
3145 		(void) ixa_check_drain_insert(connp, ixa);
3146 		error = 0;
3147 		break;
3148 	case EADDRNOTAVAIL:
3149 		/*
3150 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3151 		 * Don't have the application see that errno
3152 		 */
3153 		error = ENETUNREACH;
3154 		/* FALLTHRU */
3155 	default:
3156 		mutex_enter(&connp->conn_lock);
3157 		/*
3158 		 * Clear the source and v6lastdst so we call ip_attr_connect
3159 		 * for the next packet and try to pick a better source.
3160 		 */
3161 		if (connp->conn_mcbc_bind)
3162 			connp->conn_saddr_v6 = ipv6_all_zeros;
3163 		else
3164 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3165 		connp->conn_v6lastdst = ipv6_all_zeros;
3166 		mutex_exit(&connp->conn_lock);
3167 		break;
3168 	}
3169 done:
3170 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3171 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3172 	ixa->ixa_cpid = connp->conn_cpid;
3173 	ixa_refrele(ixa);
3174 	ip_pkt_free(ipp);
3175 	kmem_free(ipp, sizeof (*ipp));
3176 	return (error);
3177 }
3178 
3179 /*
3180  * Handle sending an M_DATA for a connected socket.
3181  * Handles both IPv4 and IPv6.
3182  */
3183 static int
3184 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3185 {
3186 	udp_t		*udp = connp->conn_udp;
3187 	udp_stack_t	*us = udp->udp_us;
3188 	int		error;
3189 	ip_xmit_attr_t	*ixa;
3190 
3191 	/*
3192 	 * If no other thread is using conn_ixa this just gets a reference to
3193 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3194 	 */
3195 	ixa = conn_get_ixa(connp, B_FALSE);
3196 	if (ixa == NULL) {
3197 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3198 		freemsg(mp);
3199 		return (ENOMEM);
3200 	}
3201 
3202 	ASSERT(cr != NULL);
3203 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3204 	ixa->ixa_cred = cr;
3205 	ixa->ixa_cpid = pid;
3206 
3207 	mutex_enter(&connp->conn_lock);
3208 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3209 	    connp->conn_fport, connp->conn_flowinfo, &error);
3210 
3211 	if (mp == NULL) {
3212 		ASSERT(error != 0);
3213 		mutex_exit(&connp->conn_lock);
3214 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3215 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3216 		ixa->ixa_cpid = connp->conn_cpid;
3217 		ixa_refrele(ixa);
3218 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3219 		freemsg(mp);
3220 		return (error);
3221 	}
3222 
3223 	/*
3224 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3225 	 * safe copy, then we need to fill in any pointers in it.
3226 	 */
3227 	if (ixa->ixa_ire == NULL) {
3228 		in6_addr_t	faddr, saddr;
3229 		in6_addr_t	nexthop;
3230 		in_port_t	fport;
3231 
3232 		saddr = connp->conn_saddr_v6;
3233 		faddr = connp->conn_faddr_v6;
3234 		fport = connp->conn_fport;
3235 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3236 		mutex_exit(&connp->conn_lock);
3237 
3238 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3239 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3240 		    IPDF_IPSEC);
3241 		switch (error) {
3242 		case 0:
3243 			break;
3244 		case EADDRNOTAVAIL:
3245 			/*
3246 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3247 			 * Don't have the application see that errno
3248 			 */
3249 			error = ENETUNREACH;
3250 			goto failed;
3251 		case ENETDOWN:
3252 			/*
3253 			 * Have !ipif_addr_ready address; drop packet silently
3254 			 * until we can get applications to not send until we
3255 			 * are ready.
3256 			 */
3257 			error = 0;
3258 			goto failed;
3259 		case EHOSTUNREACH:
3260 		case ENETUNREACH:
3261 			if (ixa->ixa_ire != NULL) {
3262 				/*
3263 				 * Let conn_ip_output/ire_send_noroute return
3264 				 * the error and send any local ICMP error.
3265 				 */
3266 				error = 0;
3267 				break;
3268 			}
3269 			/* FALLTHRU */
3270 		default:
3271 		failed:
3272 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3273 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3274 			ixa->ixa_cpid = connp->conn_cpid;
3275 			ixa_refrele(ixa);
3276 			freemsg(mp);
3277 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3278 			return (error);
3279 		}
3280 	} else {
3281 		/* Done with conn_t */
3282 		mutex_exit(&connp->conn_lock);
3283 	}
3284 	ASSERT(ixa->ixa_ire != NULL);
3285 
3286 	/* We're done.  Pass the packet to ip. */
3287 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
3288 
3289 	error = conn_ip_output(mp, ixa);
3290 	/* No udpOutErrors if an error since IP increases its error counter */
3291 	switch (error) {
3292 	case 0:
3293 		break;
3294 	case EWOULDBLOCK:
3295 		(void) ixa_check_drain_insert(connp, ixa);
3296 		error = 0;
3297 		break;
3298 	case EADDRNOTAVAIL:
3299 		/*
3300 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3301 		 * Don't have the application see that errno
3302 		 */
3303 		error = ENETUNREACH;
3304 		break;
3305 	}
3306 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3307 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3308 	ixa->ixa_cpid = connp->conn_cpid;
3309 	ixa_refrele(ixa);
3310 	return (error);
3311 }
3312 
3313 /*
3314  * Handle sending an M_DATA to the last destination.
3315  * Handles both IPv4 and IPv6.
3316  *
3317  * NOTE: The caller must hold conn_lock and we drop it here.
3318  */
3319 static int
3320 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3321     ip_xmit_attr_t *ixa)
3322 {
3323 	udp_t		*udp = connp->conn_udp;
3324 	udp_stack_t	*us = udp->udp_us;
3325 	int		error;
3326 
3327 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3328 	ASSERT(ixa != NULL);
3329 
3330 	ASSERT(cr != NULL);
3331 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3332 	ixa->ixa_cred = cr;
3333 	ixa->ixa_cpid = pid;
3334 
3335 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3336 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3337 
3338 	if (mp == NULL) {
3339 		ASSERT(error != 0);
3340 		mutex_exit(&connp->conn_lock);
3341 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3342 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3343 		ixa->ixa_cpid = connp->conn_cpid;
3344 		ixa_refrele(ixa);
3345 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3346 		freemsg(mp);
3347 		return (error);
3348 	}
3349 
3350 	/*
3351 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3352 	 * safe copy, then we need to fill in any pointers in it.
3353 	 */
3354 	if (ixa->ixa_ire == NULL) {
3355 		in6_addr_t	lastdst, lastsrc;
3356 		in6_addr_t	nexthop;
3357 		in_port_t	lastport;
3358 
3359 		lastsrc = connp->conn_v6lastsrc;
3360 		lastdst = connp->conn_v6lastdst;
3361 		lastport = connp->conn_lastdstport;
3362 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3363 		mutex_exit(&connp->conn_lock);
3364 
3365 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3366 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3367 		    IPDF_VERIFY_DST | IPDF_IPSEC);
3368 		switch (error) {
3369 		case 0:
3370 			break;
3371 		case EADDRNOTAVAIL:
3372 			/*
3373 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3374 			 * Don't have the application see that errno
3375 			 */
3376 			error = ENETUNREACH;
3377 			goto failed;
3378 		case ENETDOWN:
3379 			/*
3380 			 * Have !ipif_addr_ready address; drop packet silently
3381 			 * until we can get applications to not send until we
3382 			 * are ready.
3383 			 */
3384 			error = 0;
3385 			goto failed;
3386 		case EHOSTUNREACH:
3387 		case ENETUNREACH:
3388 			if (ixa->ixa_ire != NULL) {
3389 				/*
3390 				 * Let conn_ip_output/ire_send_noroute return
3391 				 * the error and send any local ICMP error.
3392 				 */
3393 				error = 0;
3394 				break;
3395 			}
3396 			/* FALLTHRU */
3397 		default:
3398 		failed:
3399 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3400 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3401 			ixa->ixa_cpid = connp->conn_cpid;
3402 			ixa_refrele(ixa);
3403 			freemsg(mp);
3404 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3405 			return (error);
3406 		}
3407 	} else {
3408 		/* Done with conn_t */
3409 		mutex_exit(&connp->conn_lock);
3410 	}
3411 
3412 	/* We're done.  Pass the packet to ip. */
3413 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
3414 
3415 	error = conn_ip_output(mp, ixa);
3416 	/* No udpOutErrors if an error since IP increases its error counter */
3417 	switch (error) {
3418 	case 0:
3419 		break;
3420 	case EWOULDBLOCK:
3421 		(void) ixa_check_drain_insert(connp, ixa);
3422 		error = 0;
3423 		break;
3424 	case EADDRNOTAVAIL:
3425 		/*
3426 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3427 		 * Don't have the application see that errno
3428 		 */
3429 		error = ENETUNREACH;
3430 		/* FALLTHRU */
3431 	default:
3432 		mutex_enter(&connp->conn_lock);
3433 		/*
3434 		 * Clear the source and v6lastdst so we call ip_attr_connect
3435 		 * for the next packet and try to pick a better source.
3436 		 */
3437 		if (connp->conn_mcbc_bind)
3438 			connp->conn_saddr_v6 = ipv6_all_zeros;
3439 		else
3440 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3441 		connp->conn_v6lastdst = ipv6_all_zeros;
3442 		mutex_exit(&connp->conn_lock);
3443 		break;
3444 	}
3445 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3446 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3447 	ixa->ixa_cpid = connp->conn_cpid;
3448 	ixa_refrele(ixa);
3449 	return (error);
3450 }
3451 
3452 
3453 /*
3454  * Prepend the header template and then fill in the source and
3455  * flowinfo. The caller needs to handle the destination address since
3456  * it's setting is different if rthdr or source route.
3457  *
3458  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3459  * When it returns NULL it sets errorp.
3460  */
3461 static mblk_t *
3462 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3463     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3464 {
3465 	udp_t		*udp = connp->conn_udp;
3466 	udp_stack_t	*us = udp->udp_us;
3467 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
3468 	uint_t		pktlen;
3469 	uint_t		alloclen;
3470 	uint_t		copylen;
3471 	uint8_t		*iph;
3472 	uint_t		ip_hdr_length;
3473 	udpha_t		*udpha;
3474 	uint32_t	cksum;
3475 	ip_pkt_t	*ipp;
3476 
3477 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3478 
3479 	/*
3480 	 * Copy the header template and leave space for an SPI
3481 	 */
3482 	copylen = connp->conn_ht_iphc_len;
3483 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3484 	pktlen = alloclen + msgdsize(mp);
3485 	if (pktlen > IP_MAXPACKET) {
3486 		freemsg(mp);
3487 		*errorp = EMSGSIZE;
3488 		return (NULL);
3489 	}
3490 	ixa->ixa_pktlen = pktlen;
3491 
3492 	/* check/fix buffer config, setup pointers into it */
3493 	iph = mp->b_rptr - alloclen;
3494 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3495 		mblk_t *mp1;
3496 
3497 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3498 		if (mp1 == NULL) {
3499 			freemsg(mp);
3500 			*errorp = ENOMEM;
3501 			return (NULL);
3502 		}
3503 		mp1->b_wptr = DB_LIM(mp1);
3504 		mp1->b_cont = mp;
3505 		mp = mp1;
3506 		iph = (mp->b_wptr - alloclen);
3507 	}
3508 	mp->b_rptr = iph;
3509 	bcopy(connp->conn_ht_iphc, iph, copylen);
3510 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3511 
3512 	ixa->ixa_ip_hdr_length = ip_hdr_length;
3513 	udpha = (udpha_t *)(iph + ip_hdr_length);
3514 
3515 	/*
3516 	 * Setup header length and prepare for ULP checksum done in IP.
3517 	 * udp_build_hdr_template has already massaged any routing header
3518 	 * and placed the result in conn_sum.
3519 	 *
3520 	 * We make it easy for IP to include our pseudo header
3521 	 * by putting our length in uha_checksum.
3522 	 */
3523 	cksum = pktlen - ip_hdr_length;
3524 	udpha->uha_length = htons(cksum);
3525 
3526 	cksum += connp->conn_sum;
3527 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
3528 	ASSERT(cksum < 0x10000);
3529 
3530 	ipp = &connp->conn_xmit_ipp;
3531 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3532 		ipha_t	*ipha = (ipha_t *)iph;
3533 
3534 		ipha->ipha_length = htons((uint16_t)pktlen);
3535 
3536 		/* IP does the checksum if uha_checksum is non-zero */
3537 		if (us->us_do_checksum)
3538 			udpha->uha_checksum = htons(cksum);
3539 
3540 		/* if IP_PKTINFO specified an addres it wins over bind() */
3541 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3542 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3543 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3544 			ipha->ipha_src = ipp->ipp_addr_v4;
3545 		} else {
3546 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3547 		}
3548 	} else {
3549 		ip6_t *ip6h = (ip6_t *)iph;
3550 
3551 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3552 		udpha->uha_checksum = htons(cksum);
3553 
3554 		/* if IP_PKTINFO specified an addres it wins over bind() */
3555 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3556 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3557 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3558 			ip6h->ip6_src = ipp->ipp_addr;
3559 		} else {
3560 			ip6h->ip6_src = *v6src;
3561 		}
3562 		ip6h->ip6_vcf =
3563 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3564 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3565 		if (ipp->ipp_fields & IPPF_TCLASS) {
3566 			/* Overrides the class part of flowinfo */
3567 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3568 			    ipp->ipp_tclass);
3569 		}
3570 	}
3571 
3572 	/* Insert all-0s SPI now. */
3573 	if (insert_spi)
3574 		*((uint32_t *)(udpha + 1)) = 0;
3575 
3576 	udpha->uha_dst_port = dstport;
3577 	return (mp);
3578 }
3579 
3580 /*
3581  * Send a T_UDERR_IND in response to an M_DATA
3582  */
3583 static void
3584 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3585 {
3586 	struct sockaddr_storage ss;
3587 	sin_t		*sin;
3588 	sin6_t		*sin6;
3589 	struct sockaddr	*addr;
3590 	socklen_t	addrlen;
3591 	mblk_t		*mp1;
3592 
3593 	mutex_enter(&connp->conn_lock);
3594 	/* Initialize addr and addrlen as if they're passed in */
3595 	if (connp->conn_family == AF_INET) {
3596 		sin = (sin_t *)&ss;
3597 		*sin = sin_null;
3598 		sin->sin_family = AF_INET;
3599 		sin->sin_port = connp->conn_fport;
3600 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
3601 		addr = (struct sockaddr *)sin;
3602 		addrlen = sizeof (*sin);
3603 	} else {
3604 		sin6 = (sin6_t *)&ss;
3605 		*sin6 = sin6_null;
3606 		sin6->sin6_family = AF_INET6;
3607 		sin6->sin6_port = connp->conn_fport;
3608 		sin6->sin6_flowinfo = connp->conn_flowinfo;
3609 		sin6->sin6_addr = connp->conn_faddr_v6;
3610 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3611 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3612 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3613 		} else {
3614 			sin6->sin6_scope_id = 0;
3615 		}
3616 		sin6->__sin6_src_id = 0;
3617 		addr = (struct sockaddr *)sin6;
3618 		addrlen = sizeof (*sin6);
3619 	}
3620 	mutex_exit(&connp->conn_lock);
3621 
3622 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3623 	if (mp1 != NULL)
3624 		putnext(connp->conn_rq, mp1);
3625 }
3626 
3627 /*
3628  * This routine handles all messages passed downstream.  It either
3629  * consumes the message or passes it downstream; it never queues a
3630  * a message.
3631  *
3632  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3633  * is valid when we are directly beneath the stream head, and thus sockfs
3634  * is able to bypass STREAMS and directly call us, passing along the sockaddr
3635  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3636  * connected endpoints.
3637  */
3638 void
3639 udp_wput(queue_t *q, mblk_t *mp)
3640 {
3641 	sin6_t		*sin6;
3642 	sin_t		*sin = NULL;
3643 	uint_t		srcid;
3644 	conn_t		*connp = Q_TO_CONN(q);
3645 	udp_t		*udp = connp->conn_udp;
3646 	int		error = 0;
3647 	struct sockaddr	*addr = NULL;
3648 	socklen_t	addrlen;
3649 	udp_stack_t	*us = udp->udp_us;
3650 	struct T_unitdata_req *tudr;
3651 	mblk_t		*data_mp;
3652 	ushort_t	ipversion;
3653 	cred_t		*cr;
3654 	pid_t		pid;
3655 
3656 	/*
3657 	 * We directly handle several cases here: T_UNITDATA_REQ message
3658 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3659 	 * socket.
3660 	 */
3661 	switch (DB_TYPE(mp)) {
3662 	case M_DATA:
3663 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3664 			/* Not connected; address is required */
3665 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3666 			UDP_DBGSTAT(us, udp_data_notconn);
3667 			UDP_STAT(us, udp_out_err_notconn);
3668 			freemsg(mp);
3669 			return;
3670 		}
3671 		/*
3672 		 * All Solaris components should pass a db_credp
3673 		 * for this message, hence we ASSERT.
3674 		 * On production kernels we return an error to be robust against
3675 		 * random streams modules sitting on top of us.
3676 		 */
3677 		cr = msg_getcred(mp, &pid);
3678 		ASSERT(cr != NULL);
3679 		if (cr == NULL) {
3680 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3681 			freemsg(mp);
3682 			return;
3683 		}
3684 		ASSERT(udp->udp_issocket);
3685 		UDP_DBGSTAT(us, udp_data_conn);
3686 		error = udp_output_connected(connp, mp, cr, pid);
3687 		if (error != 0) {
3688 			UDP_STAT(us, udp_out_err_output);
3689 			if (connp->conn_rq != NULL)
3690 				udp_ud_err_connected(connp, (t_scalar_t)error);
3691 #ifdef DEBUG
3692 			printf("udp_output_connected returned %d\n", error);
3693 #endif
3694 		}
3695 		return;
3696 
3697 	case M_PROTO:
3698 	case M_PCPROTO:
3699 		tudr = (struct T_unitdata_req *)mp->b_rptr;
3700 		if (MBLKL(mp) < sizeof (*tudr) ||
3701 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3702 			udp_wput_other(q, mp);
3703 			return;
3704 		}
3705 		break;
3706 
3707 	default:
3708 		udp_wput_other(q, mp);
3709 		return;
3710 	}
3711 
3712 	/* Handle valid T_UNITDATA_REQ here */
3713 	data_mp = mp->b_cont;
3714 	if (data_mp == NULL) {
3715 		error = EPROTO;
3716 		goto ud_error2;
3717 	}
3718 	mp->b_cont = NULL;
3719 
3720 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3721 		error = EADDRNOTAVAIL;
3722 		goto ud_error2;
3723 	}
3724 
3725 	/*
3726 	 * All Solaris components should pass a db_credp
3727 	 * for this TPI message, hence we should ASSERT.
3728 	 * However, RPC (svc_clts_ksend) does this odd thing where it
3729 	 * passes the options from a T_UNITDATA_IND unchanged in a
3730 	 * T_UNITDATA_REQ. While that is the right thing to do for
3731 	 * some options, SCM_UCRED being the key one, this also makes it
3732 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3733 	 */
3734 	cr = msg_getcred(mp, &pid);
3735 	if (cr == NULL) {
3736 		cr = connp->conn_cred;
3737 		pid = connp->conn_cpid;
3738 	}
3739 
3740 	/*
3741 	 * If a port has not been bound to the stream, fail.
3742 	 * This is not a problem when sockfs is directly
3743 	 * above us, because it will ensure that the socket
3744 	 * is first bound before allowing data to be sent.
3745 	 */
3746 	if (udp->udp_state == TS_UNBND) {
3747 		error = EPROTO;
3748 		goto ud_error2;
3749 	}
3750 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3751 	addrlen = tudr->DEST_length;
3752 
3753 	switch (connp->conn_family) {
3754 	case AF_INET6:
3755 		sin6 = (sin6_t *)addr;
3756 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3757 		    (sin6->sin6_family != AF_INET6)) {
3758 			error = EADDRNOTAVAIL;
3759 			goto ud_error2;
3760 		}
3761 
3762 		srcid = sin6->__sin6_src_id;
3763 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3764 			/*
3765 			 * Destination is a non-IPv4-compatible IPv6 address.
3766 			 * Send out an IPv6 format packet.
3767 			 */
3768 
3769 			/*
3770 			 * If the local address is a mapped address return
3771 			 * an error.
3772 			 * It would be possible to send an IPv6 packet but the
3773 			 * response would never make it back to the application
3774 			 * since it is bound to a mapped address.
3775 			 */
3776 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3777 				error = EADDRNOTAVAIL;
3778 				goto ud_error2;
3779 			}
3780 
3781 			UDP_DBGSTAT(us, udp_out_ipv6);
3782 
3783 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3784 				sin6->sin6_addr = ipv6_loopback;
3785 			ipversion = IPV6_VERSION;
3786 		} else {
3787 			if (connp->conn_ipv6_v6only) {
3788 				error = EADDRNOTAVAIL;
3789 				goto ud_error2;
3790 			}
3791 
3792 			/*
3793 			 * If the local address is not zero or a mapped address
3794 			 * return an error.  It would be possible to send an
3795 			 * IPv4 packet but the response would never make it
3796 			 * back to the application since it is bound to a
3797 			 * non-mapped address.
3798 			 */
3799 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3800 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3801 				error = EADDRNOTAVAIL;
3802 				goto ud_error2;
3803 			}
3804 			UDP_DBGSTAT(us, udp_out_mapped);
3805 
3806 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3807 				V4_PART_OF_V6(sin6->sin6_addr) =
3808 				    htonl(INADDR_LOOPBACK);
3809 			}
3810 			ipversion = IPV4_VERSION;
3811 		}
3812 
3813 		if (tudr->OPT_length != 0) {
3814 			/*
3815 			 * If we are connected then the destination needs to be
3816 			 * the same as the connected one.
3817 			 */
3818 			if (udp->udp_state == TS_DATA_XFER &&
3819 			    !conn_same_as_last_v6(connp, sin6)) {
3820 				error = EISCONN;
3821 				goto ud_error2;
3822 			}
3823 			UDP_STAT(us, udp_out_opt);
3824 			error = udp_output_ancillary(connp, NULL, sin6,
3825 			    data_mp, mp, NULL, cr, pid);
3826 		} else {
3827 			ip_xmit_attr_t *ixa;
3828 
3829 			/*
3830 			 * We have to allocate an ip_xmit_attr_t before we grab
3831 			 * conn_lock and we need to hold conn_lock once we've
3832 			 * checked conn_same_as_last_v6 to handle concurrent
3833 			 * send* calls on a socket.
3834 			 */
3835 			ixa = conn_get_ixa(connp, B_FALSE);
3836 			if (ixa == NULL) {
3837 				error = ENOMEM;
3838 				goto ud_error2;
3839 			}
3840 			mutex_enter(&connp->conn_lock);
3841 
3842 			if (conn_same_as_last_v6(connp, sin6) &&
3843 			    connp->conn_lastsrcid == srcid &&
3844 			    ipsec_outbound_policy_current(ixa)) {
3845 				UDP_DBGSTAT(us, udp_out_lastdst);
3846 				/* udp_output_lastdst drops conn_lock */
3847 				error = udp_output_lastdst(connp, data_mp, cr,
3848 				    pid, ixa);
3849 			} else {
3850 				UDP_DBGSTAT(us, udp_out_diffdst);
3851 				/* udp_output_newdst drops conn_lock */
3852 				error = udp_output_newdst(connp, data_mp, NULL,
3853 				    sin6, ipversion, cr, pid, ixa);
3854 			}
3855 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3856 		}
3857 		if (error == 0) {
3858 			freeb(mp);
3859 			return;
3860 		}
3861 		break;
3862 
3863 	case AF_INET:
3864 		sin = (sin_t *)addr;
3865 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3866 		    (sin->sin_family != AF_INET)) {
3867 			error = EADDRNOTAVAIL;
3868 			goto ud_error2;
3869 		}
3870 		UDP_DBGSTAT(us, udp_out_ipv4);
3871 		if (sin->sin_addr.s_addr == INADDR_ANY)
3872 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3873 		ipversion = IPV4_VERSION;
3874 
3875 		srcid = 0;
3876 		if (tudr->OPT_length != 0) {
3877 			/*
3878 			 * If we are connected then the destination needs to be
3879 			 * the same as the connected one.
3880 			 */
3881 			if (udp->udp_state == TS_DATA_XFER &&
3882 			    !conn_same_as_last_v4(connp, sin)) {
3883 				error = EISCONN;
3884 				goto ud_error2;
3885 			}
3886 			UDP_STAT(us, udp_out_opt);
3887 			error = udp_output_ancillary(connp, sin, NULL,
3888 			    data_mp, mp, NULL, cr, pid);
3889 		} else {
3890 			ip_xmit_attr_t *ixa;
3891 
3892 			/*
3893 			 * We have to allocate an ip_xmit_attr_t before we grab
3894 			 * conn_lock and we need to hold conn_lock once we've
3895 			 * checked conn_same_as_last_v4 to handle concurrent
3896 			 * send* calls on a socket.
3897 			 */
3898 			ixa = conn_get_ixa(connp, B_FALSE);
3899 			if (ixa == NULL) {
3900 				error = ENOMEM;
3901 				goto ud_error2;
3902 			}
3903 			mutex_enter(&connp->conn_lock);
3904 
3905 			if (conn_same_as_last_v4(connp, sin) &&
3906 			    ipsec_outbound_policy_current(ixa)) {
3907 				UDP_DBGSTAT(us, udp_out_lastdst);
3908 				/* udp_output_lastdst drops conn_lock */
3909 				error = udp_output_lastdst(connp, data_mp, cr,
3910 				    pid, ixa);
3911 			} else {
3912 				UDP_DBGSTAT(us, udp_out_diffdst);
3913 				/* udp_output_newdst drops conn_lock */
3914 				error = udp_output_newdst(connp, data_mp, sin,
3915 				    NULL, ipversion, cr, pid, ixa);
3916 			}
3917 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3918 		}
3919 		if (error == 0) {
3920 			freeb(mp);
3921 			return;
3922 		}
3923 		break;
3924 	}
3925 	UDP_STAT(us, udp_out_err_output);
3926 	ASSERT(mp != NULL);
3927 	/* mp is freed by the following routine */
3928 	udp_ud_err(q, mp, (t_scalar_t)error);
3929 	return;
3930 
3931 ud_error2:
3932 	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
3933 	freemsg(data_mp);
3934 	UDP_STAT(us, udp_out_err_output);
3935 	ASSERT(mp != NULL);
3936 	/* mp is freed by the following routine */
3937 	udp_ud_err(q, mp, (t_scalar_t)error);
3938 }
3939 
3940 /*
3941  * Handle the case of the IP address, port, flow label being different
3942  * for both IPv4 and IPv6.
3943  *
3944  * NOTE: The caller must hold conn_lock and we drop it here.
3945  */
3946 static int
3947 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3948     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3949 {
3950 	uint_t		srcid;
3951 	uint32_t	flowinfo;
3952 	udp_t		*udp = connp->conn_udp;
3953 	int		error = 0;
3954 	ip_xmit_attr_t	*oldixa;
3955 	udp_stack_t	*us = udp->udp_us;
3956 	in6_addr_t	v6src;
3957 	in6_addr_t	v6dst;
3958 	in6_addr_t	v6nexthop;
3959 	in_port_t	dstport;
3960 
3961 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3962 	ASSERT(ixa != NULL);
3963 	/*
3964 	 * We hold conn_lock across all the use and modifications of
3965 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3966 	 * stay consistent.
3967 	 */
3968 
3969 	ASSERT(cr != NULL);
3970 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3971 	ixa->ixa_cred = cr;
3972 	ixa->ixa_cpid = pid;
3973 	if (is_system_labeled()) {
3974 		/* We need to restart with a label based on the cred */
3975 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3976 	}
3977 
3978 	/*
3979 	 * If we are connected then the destination needs to be the
3980 	 * same as the connected one, which is not the case here since we
3981 	 * checked for that above.
3982 	 */
3983 	if (udp->udp_state == TS_DATA_XFER) {
3984 		mutex_exit(&connp->conn_lock);
3985 		error = EISCONN;
3986 		goto ud_error;
3987 	}
3988 
3989 	/* In case previous destination was multicast or multirt */
3990 	ip_attr_newdst(ixa);
3991 
3992 	/*
3993 	 * If laddr is unspecified then we look at sin6_src_id.
3994 	 * We will give precedence to a source address set with IPV6_PKTINFO
3995 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3996 	 * want ip_attr_connect to select a source (since it can fail) when
3997 	 * IPV6_PKTINFO is specified.
3998 	 * If this doesn't result in a source address then we get a source
3999 	 * from ip_attr_connect() below.
4000 	 */
4001 	v6src = connp->conn_saddr_v6;
4002 	if (sin != NULL) {
4003 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
4004 		dstport = sin->sin_port;
4005 		flowinfo = 0;
4006 		srcid = 0;
4007 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
4008 		if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
4009 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
4010 			    connp->conn_netstack);
4011 		}
4012 		ixa->ixa_flags |= IXAF_IS_IPV4;
4013 	} else {
4014 		v6dst = sin6->sin6_addr;
4015 		dstport = sin6->sin6_port;
4016 		flowinfo = sin6->sin6_flowinfo;
4017 		srcid = sin6->__sin6_src_id;
4018 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
4019 			ixa->ixa_scopeid = sin6->sin6_scope_id;
4020 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
4021 		} else {
4022 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
4023 		}
4024 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4025 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
4026 			    connp->conn_netstack);
4027 		}
4028 		if (IN6_IS_ADDR_V4MAPPED(&v6dst))
4029 			ixa->ixa_flags |= IXAF_IS_IPV4;
4030 		else
4031 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
4032 	}
4033 	/* Handle IPV6_PKTINFO setting source address. */
4034 	if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
4035 	    (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) {
4036 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
4037 
4038 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4039 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4040 				v6src = ipp->ipp_addr;
4041 		} else {
4042 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4043 				v6src = ipp->ipp_addr;
4044 		}
4045 	}
4046 
4047 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
4048 	mutex_exit(&connp->conn_lock);
4049 
4050 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
4051 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
4052 	switch (error) {
4053 	case 0:
4054 		break;
4055 	case EADDRNOTAVAIL:
4056 		/*
4057 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4058 		 * Don't have the application see that errno
4059 		 */
4060 		error = ENETUNREACH;
4061 		goto failed;
4062 	case ENETDOWN:
4063 		/*
4064 		 * Have !ipif_addr_ready address; drop packet silently
4065 		 * until we can get applications to not send until we
4066 		 * are ready.
4067 		 */
4068 		error = 0;
4069 		goto failed;
4070 	case EHOSTUNREACH:
4071 	case ENETUNREACH:
4072 		if (ixa->ixa_ire != NULL) {
4073 			/*
4074 			 * Let conn_ip_output/ire_send_noroute return
4075 			 * the error and send any local ICMP error.
4076 			 */
4077 			error = 0;
4078 			break;
4079 		}
4080 		/* FALLTHRU */
4081 	failed:
4082 	default:
4083 		goto ud_error;
4084 	}
4085 
4086 
4087 	/*
4088 	 * Cluster note: we let the cluster hook know that we are sending to a
4089 	 * new address and/or port.
4090 	 */
4091 	if (cl_inet_connect2 != NULL) {
4092 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
4093 		if (error != 0) {
4094 			error = EHOSTUNREACH;
4095 			goto ud_error;
4096 		}
4097 	}
4098 
4099 	mutex_enter(&connp->conn_lock);
4100 	/*
4101 	 * While we dropped the lock some other thread might have connected
4102 	 * this socket. If so we bail out with EISCONN to ensure that the
4103 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
4104 	 * and conn_*last*.
4105 	 */
4106 	if (udp->udp_state == TS_DATA_XFER) {
4107 		mutex_exit(&connp->conn_lock);
4108 		error = EISCONN;
4109 		goto ud_error;
4110 	}
4111 
4112 	/*
4113 	 * We need to rebuild the headers if
4114 	 *  - we are labeling packets (could be different for different
4115 	 *    destinations)
4116 	 *  - we have a source route (or routing header) since we need to
4117 	 *    massage that to get the pseudo-header checksum
4118 	 *  - the IP version is different than the last time
4119 	 *  - a socket option with COA_HEADER_CHANGED has been set which
4120 	 *    set conn_v6lastdst to zero.
4121 	 *
4122 	 * Otherwise the prepend function will just update the src, dst,
4123 	 * dstport, and flow label.
4124 	 */
4125 	if (is_system_labeled()) {
4126 		/* TX MLP requires SCM_UCRED and don't have that here */
4127 		if (connp->conn_mlp_type != mlptSingle) {
4128 			mutex_exit(&connp->conn_lock);
4129 			error = ECONNREFUSED;
4130 			goto ud_error;
4131 		}
4132 		/*
4133 		 * Check whether Trusted Solaris policy allows communication
4134 		 * with this host, and pretend that the destination is
4135 		 * unreachable if not.
4136 		 * Compute any needed label and place it in ipp_label_v4/v6.
4137 		 *
4138 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
4139 		 * ipp_label_v4/v6 to form the packet.
4140 		 *
4141 		 * Tsol note: Since we hold conn_lock we know no other
4142 		 * thread manipulates conn_xmit_ipp.
4143 		 */
4144 		error = conn_update_label(connp, ixa, &v6dst,
4145 		    &connp->conn_xmit_ipp);
4146 		if (error != 0) {
4147 			mutex_exit(&connp->conn_lock);
4148 			goto ud_error;
4149 		}
4150 		/* Rebuild the header template */
4151 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4152 		    flowinfo);
4153 		if (error != 0) {
4154 			mutex_exit(&connp->conn_lock);
4155 			goto ud_error;
4156 		}
4157 	} else if ((connp->conn_xmit_ipp.ipp_fields &
4158 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4159 	    ipversion != connp->conn_lastipversion ||
4160 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4161 		/* Rebuild the header template */
4162 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4163 		    flowinfo);
4164 		if (error != 0) {
4165 			mutex_exit(&connp->conn_lock);
4166 			goto ud_error;
4167 		}
4168 	} else {
4169 		/* Simply update the destination address if no source route */
4170 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4171 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
4172 
4173 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4174 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4175 				ipha->ipha_fragment_offset_and_flags |=
4176 				    IPH_DF_HTONS;
4177 			} else {
4178 				ipha->ipha_fragment_offset_and_flags &=
4179 				    ~IPH_DF_HTONS;
4180 			}
4181 		} else {
4182 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4183 			ip6h->ip6_dst = v6dst;
4184 		}
4185 	}
4186 
4187 	/*
4188 	 * Remember the dst/dstport etc which corresponds to the built header
4189 	 * template and conn_ixa.
4190 	 */
4191 	oldixa = conn_replace_ixa(connp, ixa);
4192 	connp->conn_v6lastdst = v6dst;
4193 	connp->conn_lastipversion = ipversion;
4194 	connp->conn_lastdstport = dstport;
4195 	connp->conn_lastflowinfo = flowinfo;
4196 	connp->conn_lastscopeid = ixa->ixa_scopeid;
4197 	connp->conn_lastsrcid = srcid;
4198 	/* Also remember a source to use together with lastdst */
4199 	connp->conn_v6lastsrc = v6src;
4200 
4201 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4202 	    dstport, flowinfo, &error);
4203 
4204 	/* Done with conn_t */
4205 	mutex_exit(&connp->conn_lock);
4206 	ixa_refrele(oldixa);
4207 
4208 	if (data_mp == NULL) {
4209 		ASSERT(error != 0);
4210 		goto ud_error;
4211 	}
4212 
4213 	/* We're done.  Pass the packet to ip. */
4214 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
4215 
4216 	error = conn_ip_output(data_mp, ixa);
4217 	/* No udpOutErrors if an error since IP increases its error counter */
4218 	switch (error) {
4219 	case 0:
4220 		break;
4221 	case EWOULDBLOCK:
4222 		(void) ixa_check_drain_insert(connp, ixa);
4223 		error = 0;
4224 		break;
4225 	case EADDRNOTAVAIL:
4226 		/*
4227 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4228 		 * Don't have the application see that errno
4229 		 */
4230 		error = ENETUNREACH;
4231 		/* FALLTHRU */
4232 	default:
4233 		mutex_enter(&connp->conn_lock);
4234 		/*
4235 		 * Clear the source and v6lastdst so we call ip_attr_connect
4236 		 * for the next packet and try to pick a better source.
4237 		 */
4238 		if (connp->conn_mcbc_bind)
4239 			connp->conn_saddr_v6 = ipv6_all_zeros;
4240 		else
4241 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4242 		connp->conn_v6lastdst = ipv6_all_zeros;
4243 		mutex_exit(&connp->conn_lock);
4244 		break;
4245 	}
4246 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4247 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4248 	ixa->ixa_cpid = connp->conn_cpid;
4249 	ixa_refrele(ixa);
4250 	return (error);
4251 
4252 ud_error:
4253 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4254 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4255 	ixa->ixa_cpid = connp->conn_cpid;
4256 	ixa_refrele(ixa);
4257 
4258 	freemsg(data_mp);
4259 	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
4260 	UDP_STAT(us, udp_out_err_output);
4261 	return (error);
4262 }
4263 
4264 /* ARGSUSED */
4265 static void
4266 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4267 {
4268 #ifdef DEBUG
4269 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4270 #endif
4271 	freemsg(mp);
4272 }
4273 
4274 
4275 /*
4276  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4277  */
4278 static void
4279 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4280 {
4281 	void	*data;
4282 	mblk_t	*datamp = mp->b_cont;
4283 	conn_t	*connp = Q_TO_CONN(q);
4284 	udp_t	*udp = connp->conn_udp;
4285 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4286 
4287 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4288 		cmdp->cb_error = EPROTO;
4289 		qreply(q, mp);
4290 		return;
4291 	}
4292 	data = datamp->b_rptr;
4293 
4294 	mutex_enter(&connp->conn_lock);
4295 	switch (cmdp->cb_cmd) {
4296 	case TI_GETPEERNAME:
4297 		if (udp->udp_state != TS_DATA_XFER)
4298 			cmdp->cb_error = ENOTCONN;
4299 		else
4300 			cmdp->cb_error = conn_getpeername(connp, data,
4301 			    &cmdp->cb_len);
4302 		break;
4303 	case TI_GETMYNAME:
4304 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4305 		break;
4306 	default:
4307 		cmdp->cb_error = EINVAL;
4308 		break;
4309 	}
4310 	mutex_exit(&connp->conn_lock);
4311 
4312 	qreply(q, mp);
4313 }
4314 
4315 static void
4316 udp_use_pure_tpi(udp_t *udp)
4317 {
4318 	conn_t	*connp = udp->udp_connp;
4319 
4320 	mutex_enter(&connp->conn_lock);
4321 	udp->udp_issocket = B_FALSE;
4322 	mutex_exit(&connp->conn_lock);
4323 	UDP_STAT(udp->udp_us, udp_sock_fallback);
4324 }
4325 
4326 static void
4327 udp_wput_other(queue_t *q, mblk_t *mp)
4328 {
4329 	uchar_t	*rptr = mp->b_rptr;
4330 	struct iocblk *iocp;
4331 	conn_t	*connp = Q_TO_CONN(q);
4332 	udp_t	*udp = connp->conn_udp;
4333 	cred_t	*cr;
4334 
4335 	switch (mp->b_datap->db_type) {
4336 	case M_CMD:
4337 		udp_wput_cmdblk(q, mp);
4338 		return;
4339 
4340 	case M_PROTO:
4341 	case M_PCPROTO:
4342 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4343 			/*
4344 			 * If the message does not contain a PRIM_type,
4345 			 * throw it away.
4346 			 */
4347 			freemsg(mp);
4348 			return;
4349 		}
4350 		switch (((t_primp_t)rptr)->type) {
4351 		case T_ADDR_REQ:
4352 			udp_addr_req(q, mp);
4353 			return;
4354 		case O_T_BIND_REQ:
4355 		case T_BIND_REQ:
4356 			udp_tpi_bind(q, mp);
4357 			return;
4358 		case T_CONN_REQ:
4359 			udp_tpi_connect(q, mp);
4360 			return;
4361 		case T_CAPABILITY_REQ:
4362 			udp_capability_req(q, mp);
4363 			return;
4364 		case T_INFO_REQ:
4365 			udp_info_req(q, mp);
4366 			return;
4367 		case T_UNITDATA_REQ:
4368 			/*
4369 			 * If a T_UNITDATA_REQ gets here, the address must
4370 			 * be bad.  Valid T_UNITDATA_REQs are handled
4371 			 * in udp_wput.
4372 			 */
4373 			udp_ud_err(q, mp, EADDRNOTAVAIL);
4374 			return;
4375 		case T_UNBIND_REQ:
4376 			udp_tpi_unbind(q, mp);
4377 			return;
4378 		case T_SVR4_OPTMGMT_REQ:
4379 			/*
4380 			 * All Solaris components should pass a db_credp
4381 			 * for this TPI message, hence we ASSERT.
4382 			 * But in case there is some other M_PROTO that looks
4383 			 * like a TPI message sent by some other kernel
4384 			 * component, we check and return an error.
4385 			 */
4386 			cr = msg_getcred(mp, NULL);
4387 			ASSERT(cr != NULL);
4388 			if (cr == NULL) {
4389 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4390 				return;
4391 			}
4392 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4393 			    cr)) {
4394 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4395 			}
4396 			return;
4397 
4398 		case T_OPTMGMT_REQ:
4399 			/*
4400 			 * All Solaris components should pass a db_credp
4401 			 * for this TPI message, hence we ASSERT.
4402 			 * But in case there is some other M_PROTO that looks
4403 			 * like a TPI message sent by some other kernel
4404 			 * component, we check and return an error.
4405 			 */
4406 			cr = msg_getcred(mp, NULL);
4407 			ASSERT(cr != NULL);
4408 			if (cr == NULL) {
4409 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4410 				return;
4411 			}
4412 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4413 			return;
4414 
4415 		case T_DISCON_REQ:
4416 			udp_tpi_disconnect(q, mp);
4417 			return;
4418 
4419 		/* The following TPI message is not supported by udp. */
4420 		case O_T_CONN_RES:
4421 		case T_CONN_RES:
4422 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4423 			return;
4424 
4425 		/* The following 3 TPI requests are illegal for udp. */
4426 		case T_DATA_REQ:
4427 		case T_EXDATA_REQ:
4428 		case T_ORDREL_REQ:
4429 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4430 			return;
4431 		default:
4432 			break;
4433 		}
4434 		break;
4435 	case M_FLUSH:
4436 		if (*rptr & FLUSHW)
4437 			flushq(q, FLUSHDATA);
4438 		break;
4439 	case M_IOCTL:
4440 		iocp = (struct iocblk *)mp->b_rptr;
4441 		switch (iocp->ioc_cmd) {
4442 		case TI_GETPEERNAME:
4443 			if (udp->udp_state != TS_DATA_XFER) {
4444 				/*
4445 				 * If a default destination address has not
4446 				 * been associated with the stream, then we
4447 				 * don't know the peer's name.
4448 				 */
4449 				iocp->ioc_error = ENOTCONN;
4450 				iocp->ioc_count = 0;
4451 				mp->b_datap->db_type = M_IOCACK;
4452 				qreply(q, mp);
4453 				return;
4454 			}
4455 			/* FALLTHRU */
4456 		case TI_GETMYNAME:
4457 			/*
4458 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4459 			 * need to copyin the user's strbuf structure.
4460 			 * Processing will continue in the M_IOCDATA case
4461 			 * below.
4462 			 */
4463 			mi_copyin(q, mp, NULL,
4464 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4465 			return;
4466 		case _SIOCSOCKFALLBACK:
4467 			/*
4468 			 * Either sockmod is about to be popped and the
4469 			 * socket would now be treated as a plain stream,
4470 			 * or a module is about to be pushed so we have
4471 			 * to follow pure TPI semantics.
4472 			 */
4473 			if (!udp->udp_issocket) {
4474 				DB_TYPE(mp) = M_IOCNAK;
4475 				iocp->ioc_error = EINVAL;
4476 			} else {
4477 				udp_use_pure_tpi(udp);
4478 
4479 				DB_TYPE(mp) = M_IOCACK;
4480 				iocp->ioc_error = 0;
4481 			}
4482 			iocp->ioc_count = 0;
4483 			iocp->ioc_rval = 0;
4484 			qreply(q, mp);
4485 			return;
4486 		default:
4487 			break;
4488 		}
4489 		break;
4490 	case M_IOCDATA:
4491 		udp_wput_iocdata(q, mp);
4492 		return;
4493 	default:
4494 		/* Unrecognized messages are passed through without change. */
4495 		break;
4496 	}
4497 	ip_wput_nondata(q, mp);
4498 }
4499 
4500 /*
4501  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4502  * messages.
4503  */
4504 static void
4505 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4506 {
4507 	mblk_t		*mp1;
4508 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
4509 	STRUCT_HANDLE(strbuf, sb);
4510 	uint_t		addrlen;
4511 	conn_t		*connp = Q_TO_CONN(q);
4512 	udp_t		*udp = connp->conn_udp;
4513 
4514 	/* Make sure it is one of ours. */
4515 	switch (iocp->ioc_cmd) {
4516 	case TI_GETMYNAME:
4517 	case TI_GETPEERNAME:
4518 		break;
4519 	default:
4520 		ip_wput_nondata(q, mp);
4521 		return;
4522 	}
4523 
4524 	switch (mi_copy_state(q, mp, &mp1)) {
4525 	case -1:
4526 		return;
4527 	case MI_COPY_CASE(MI_COPY_IN, 1):
4528 		break;
4529 	case MI_COPY_CASE(MI_COPY_OUT, 1):
4530 		/*
4531 		 * The address has been copied out, so now
4532 		 * copyout the strbuf.
4533 		 */
4534 		mi_copyout(q, mp);
4535 		return;
4536 	case MI_COPY_CASE(MI_COPY_OUT, 2):
4537 		/*
4538 		 * The address and strbuf have been copied out.
4539 		 * We're done, so just acknowledge the original
4540 		 * M_IOCTL.
4541 		 */
4542 		mi_copy_done(q, mp, 0);
4543 		return;
4544 	default:
4545 		/*
4546 		 * Something strange has happened, so acknowledge
4547 		 * the original M_IOCTL with an EPROTO error.
4548 		 */
4549 		mi_copy_done(q, mp, EPROTO);
4550 		return;
4551 	}
4552 
4553 	/*
4554 	 * Now we have the strbuf structure for TI_GETMYNAME
4555 	 * and TI_GETPEERNAME.  Next we copyout the requested
4556 	 * address and then we'll copyout the strbuf.
4557 	 */
4558 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4559 
4560 	if (connp->conn_family == AF_INET)
4561 		addrlen = sizeof (sin_t);
4562 	else
4563 		addrlen = sizeof (sin6_t);
4564 
4565 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
4566 		mi_copy_done(q, mp, EINVAL);
4567 		return;
4568 	}
4569 
4570 	switch (iocp->ioc_cmd) {
4571 	case TI_GETMYNAME:
4572 		break;
4573 	case TI_GETPEERNAME:
4574 		if (udp->udp_state != TS_DATA_XFER) {
4575 			mi_copy_done(q, mp, ENOTCONN);
4576 			return;
4577 		}
4578 		break;
4579 	}
4580 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4581 	if (!mp1)
4582 		return;
4583 
4584 	STRUCT_FSET(sb, len, addrlen);
4585 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4586 	case TI_GETMYNAME:
4587 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4588 		    &addrlen);
4589 		break;
4590 	case TI_GETPEERNAME:
4591 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4592 		    &addrlen);
4593 		break;
4594 	}
4595 	mp1->b_wptr += addrlen;
4596 	/* Copy out the address */
4597 	mi_copyout(q, mp);
4598 }
4599 
4600 void
4601 udp_ddi_g_init(void)
4602 {
4603 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4604 	    udp_opt_obj.odb_opt_arr_cnt);
4605 
4606 	/*
4607 	 * We want to be informed each time a stack is created or
4608 	 * destroyed in the kernel, so we can maintain the
4609 	 * set of udp_stack_t's.
4610 	 */
4611 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4612 }
4613 
4614 void
4615 udp_ddi_g_destroy(void)
4616 {
4617 	netstack_unregister(NS_UDP);
4618 }
4619 
4620 #define	INET_NAME	"ip"
4621 
4622 /*
4623  * Initialize the UDP stack instance.
4624  */
4625 static void *
4626 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4627 {
4628 	udp_stack_t	*us;
4629 	int		i;
4630 	int		error = 0;
4631 	major_t		major;
4632 	size_t		arrsz;
4633 
4634 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4635 	us->us_netstack = ns;
4636 
4637 	mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4638 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4639 	us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4640 	us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4641 
4642 	/*
4643 	 * The smallest anonymous port in the priviledged port range which UDP
4644 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4645 	 */
4646 	us->us_min_anonpriv_port = 512;
4647 
4648 	us->us_bind_fanout_size = udp_bind_fanout_size;
4649 
4650 	/* Roundup variable that might have been modified in /etc/system */
4651 	if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
4652 		/* Not a power of two. Round up to nearest power of two */
4653 		for (i = 0; i < 31; i++) {
4654 			if (us->us_bind_fanout_size < (1 << i))
4655 				break;
4656 		}
4657 		us->us_bind_fanout_size = 1 << i;
4658 	}
4659 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4660 	    sizeof (udp_fanout_t), KM_SLEEP);
4661 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4662 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4663 		    NULL);
4664 	}
4665 
4666 	arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4667 	us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4668 	    KM_SLEEP);
4669 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4670 
4671 	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
4672 	us->us_mibkp = udp_kstat_init(stackid);
4673 
4674 	major = mod_name_to_major(INET_NAME);
4675 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
4676 	ASSERT(error == 0);
4677 	return (us);
4678 }
4679 
4680 /*
4681  * Free the UDP stack instance.
4682  */
4683 static void
4684 udp_stack_fini(netstackid_t stackid, void *arg)
4685 {
4686 	udp_stack_t *us = (udp_stack_t *)arg;
4687 	int i;
4688 
4689 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4690 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4691 	}
4692 
4693 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4694 	    sizeof (udp_fanout_t));
4695 
4696 	us->us_bind_fanout = NULL;
4697 
4698 	kmem_free(us->us_propinfo_tbl,
4699 	    udp_propinfo_count * sizeof (mod_prop_info_t));
4700 	us->us_propinfo_tbl = NULL;
4701 
4702 	udp_kstat_fini(stackid, us->us_mibkp);
4703 	us->us_mibkp = NULL;
4704 
4705 	udp_kstat2_fini(stackid, us->us_kstat);
4706 	us->us_kstat = NULL;
4707 	bzero(&us->us_statistics, sizeof (us->us_statistics));
4708 
4709 	mutex_destroy(&us->us_epriv_port_lock);
4710 	ldi_ident_release(us->us_ldi_ident);
4711 	kmem_free(us, sizeof (*us));
4712 }
4713 
4714 static void *
4715 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
4716 {
4717 	kstat_t *ksp;
4718 
4719 	udp_stat_t template = {
4720 		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
4721 		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
4722 		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
4723 		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
4724 		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
4725 #ifdef DEBUG
4726 		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
4727 		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
4728 		{ "udp_out_lastdst",		KSTAT_DATA_UINT64 },
4729 		{ "udp_out_diffdst",		KSTAT_DATA_UINT64 },
4730 		{ "udp_out_ipv6",		KSTAT_DATA_UINT64 },
4731 		{ "udp_out_mapped",		KSTAT_DATA_UINT64 },
4732 		{ "udp_out_ipv4",		KSTAT_DATA_UINT64 },
4733 #endif
4734 	};
4735 
4736 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
4737 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4738 	    KSTAT_FLAG_VIRTUAL, stackid);
4739 
4740 	if (ksp == NULL)
4741 		return (NULL);
4742 
4743 	bcopy(&template, us_statisticsp, sizeof (template));
4744 	ksp->ks_data = (void *)us_statisticsp;
4745 	ksp->ks_private = (void *)(uintptr_t)stackid;
4746 
4747 	kstat_install(ksp);
4748 	return (ksp);
4749 }
4750 
4751 static void
4752 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
4753 {
4754 	if (ksp != NULL) {
4755 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4756 		kstat_delete_netstack(ksp, stackid);
4757 	}
4758 }
4759 
4760 static void *
4761 udp_kstat_init(netstackid_t stackid)
4762 {
4763 	kstat_t	*ksp;
4764 
4765 	udp_named_kstat_t template = {
4766 		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
4767 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
4768 		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
4769 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
4770 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
4771 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
4772 	};
4773 
4774 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
4775 	    KSTAT_TYPE_NAMED,
4776 	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
4777 
4778 	if (ksp == NULL || ksp->ks_data == NULL)
4779 		return (NULL);
4780 
4781 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
4782 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
4783 
4784 	bcopy(&template, ksp->ks_data, sizeof (template));
4785 	ksp->ks_update = udp_kstat_update;
4786 	ksp->ks_private = (void *)(uintptr_t)stackid;
4787 
4788 	kstat_install(ksp);
4789 	return (ksp);
4790 }
4791 
4792 static void
4793 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4794 {
4795 	if (ksp != NULL) {
4796 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4797 		kstat_delete_netstack(ksp, stackid);
4798 	}
4799 }
4800 
4801 static int
4802 udp_kstat_update(kstat_t *kp, int rw)
4803 {
4804 	udp_named_kstat_t *udpkp;
4805 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
4806 	netstack_t	*ns;
4807 	udp_stack_t	*us;
4808 
4809 	if ((kp == NULL) || (kp->ks_data == NULL))
4810 		return (EIO);
4811 
4812 	if (rw == KSTAT_WRITE)
4813 		return (EACCES);
4814 
4815 	ns = netstack_find_by_stackid(stackid);
4816 	if (ns == NULL)
4817 		return (-1);
4818 	us = ns->netstack_udp;
4819 	if (us == NULL) {
4820 		netstack_rele(ns);
4821 		return (-1);
4822 	}
4823 	udpkp = (udp_named_kstat_t *)kp->ks_data;
4824 
4825 	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
4826 	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
4827 	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
4828 	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
4829 	netstack_rele(ns);
4830 	return (0);
4831 }
4832 
4833 static size_t
4834 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4835 {
4836 	udp_stack_t *us = udp->udp_us;
4837 
4838 	/* We add a bit of extra buffering */
4839 	size += size >> 1;
4840 	if (size > us->us_max_buf)
4841 		size = us->us_max_buf;
4842 
4843 	udp->udp_rcv_hiwat = size;
4844 	return (size);
4845 }
4846 
4847 /*
4848  * For the lower queue so that UDP can be a dummy mux.
4849  * Nobody should be sending
4850  * packets up this stream
4851  */
4852 static void
4853 udp_lrput(queue_t *q, mblk_t *mp)
4854 {
4855 	switch (mp->b_datap->db_type) {
4856 	case M_FLUSH:
4857 		/* Turn around */
4858 		if (*mp->b_rptr & FLUSHW) {
4859 			*mp->b_rptr &= ~FLUSHR;
4860 			qreply(q, mp);
4861 			return;
4862 		}
4863 		break;
4864 	}
4865 	freemsg(mp);
4866 }
4867 
4868 /*
4869  * For the lower queue so that UDP can be a dummy mux.
4870  * Nobody should be sending packets down this stream.
4871  */
4872 /* ARGSUSED */
4873 void
4874 udp_lwput(queue_t *q, mblk_t *mp)
4875 {
4876 	freemsg(mp);
4877 }
4878 
4879 /*
4880  * Below routines for UDP socket module.
4881  */
4882 
4883 static conn_t *
4884 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4885 {
4886 	udp_t		*udp;
4887 	conn_t		*connp;
4888 	zoneid_t 	zoneid;
4889 	netstack_t 	*ns;
4890 	udp_stack_t 	*us;
4891 	int		len;
4892 
4893 	ASSERT(errorp != NULL);
4894 
4895 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4896 		return (NULL);
4897 
4898 	ns = netstack_find_by_cred(credp);
4899 	ASSERT(ns != NULL);
4900 	us = ns->netstack_udp;
4901 	ASSERT(us != NULL);
4902 
4903 	/*
4904 	 * For exclusive stacks we set the zoneid to zero
4905 	 * to make UDP operate as if in the global zone.
4906 	 */
4907 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4908 		zoneid = GLOBAL_ZONEID;
4909 	else
4910 		zoneid = crgetzoneid(credp);
4911 
4912 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4913 
4914 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4915 	if (connp == NULL) {
4916 		netstack_rele(ns);
4917 		*errorp = ENOMEM;
4918 		return (NULL);
4919 	}
4920 	udp = connp->conn_udp;
4921 
4922 	/*
4923 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4924 	 * done by netstack_find_by_cred()
4925 	 */
4926 	netstack_rele(ns);
4927 
4928 	/*
4929 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4930 	 * need to lock anything.
4931 	 */
4932 	ASSERT(connp->conn_proto == IPPROTO_UDP);
4933 	ASSERT(connp->conn_udp == udp);
4934 	ASSERT(udp->udp_connp == connp);
4935 
4936 	/* Set the initial state of the stream and the privilege status. */
4937 	udp->udp_state = TS_UNBND;
4938 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4939 	if (isv6) {
4940 		connp->conn_family = AF_INET6;
4941 		connp->conn_ipversion = IPV6_VERSION;
4942 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4943 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
4944 		len = sizeof (ip6_t) + UDPH_SIZE;
4945 	} else {
4946 		connp->conn_family = AF_INET;
4947 		connp->conn_ipversion = IPV4_VERSION;
4948 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4949 		connp->conn_default_ttl = us->us_ipv4_ttl;
4950 		len = sizeof (ipha_t) + UDPH_SIZE;
4951 	}
4952 
4953 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4954 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4955 
4956 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4957 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4958 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4959 	connp->conn_ixa->ixa_zoneid = zoneid;
4960 
4961 	connp->conn_zoneid = zoneid;
4962 
4963 	/*
4964 	 * If the caller has the process-wide flag set, then default to MAC
4965 	 * exempt mode.  This allows read-down to unlabeled hosts.
4966 	 */
4967 	if (getpflags(NET_MAC_AWARE, credp) != 0)
4968 		connp->conn_mac_mode = CONN_MAC_AWARE;
4969 
4970 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4971 
4972 	udp->udp_us = us;
4973 
4974 	connp->conn_rcvbuf = us->us_recv_hiwat;
4975 	connp->conn_sndbuf = us->us_xmit_hiwat;
4976 	connp->conn_sndlowat = us->us_xmit_lowat;
4977 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4978 
4979 	connp->conn_wroff = len + us->us_wroff_extra;
4980 	connp->conn_so_type = SOCK_DGRAM;
4981 
4982 	connp->conn_recv = udp_input;
4983 	connp->conn_recvicmp = udp_icmp_input;
4984 	crhold(credp);
4985 	connp->conn_cred = credp;
4986 	connp->conn_cpid = curproc->p_pid;
4987 	connp->conn_open_time = ddi_get_lbolt64();
4988 	/* Cache things in ixa without an extra refhold */
4989 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4990 	connp->conn_ixa->ixa_cred = connp->conn_cred;
4991 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4992 	if (is_system_labeled())
4993 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4994 
4995 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4996 
4997 	if (us->us_pmtu_discovery)
4998 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4999 
5000 	return (connp);
5001 }
5002 
5003 sock_lower_handle_t
5004 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
5005     uint_t *smodep, int *errorp, int flags, cred_t *credp)
5006 {
5007 	udp_t		*udp = NULL;
5008 	udp_stack_t	*us;
5009 	conn_t		*connp;
5010 	boolean_t	isv6;
5011 
5012 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
5013 	    (proto != 0 && proto != IPPROTO_UDP)) {
5014 		*errorp = EPROTONOSUPPORT;
5015 		return (NULL);
5016 	}
5017 
5018 	if (family == AF_INET6)
5019 		isv6 = B_TRUE;
5020 	else
5021 		isv6 = B_FALSE;
5022 
5023 	connp = udp_do_open(credp, isv6, flags, errorp);
5024 	if (connp == NULL)
5025 		return (NULL);
5026 
5027 	udp = connp->conn_udp;
5028 	ASSERT(udp != NULL);
5029 	us = udp->udp_us;
5030 	ASSERT(us != NULL);
5031 
5032 	udp->udp_issocket = B_TRUE;
5033 	connp->conn_flags |= IPCL_NONSTR;
5034 
5035 	/*
5036 	 * Set flow control
5037 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
5038 	 * need to lock anything.
5039 	 */
5040 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
5041 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
5042 
5043 	connp->conn_flow_cntrld = B_FALSE;
5044 
5045 	mutex_enter(&connp->conn_lock);
5046 	connp->conn_state_flags &= ~CONN_INCIPIENT;
5047 	mutex_exit(&connp->conn_lock);
5048 
5049 	*errorp = 0;
5050 	*smodep = SM_ATOMIC;
5051 	*sock_downcalls = &sock_udp_downcalls;
5052 	return ((sock_lower_handle_t)connp);
5053 }
5054 
5055 /* ARGSUSED3 */
5056 void
5057 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
5058     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
5059 {
5060 	conn_t 		*connp = (conn_t *)proto_handle;
5061 	struct sock_proto_props sopp;
5062 
5063 	/* All Solaris components should pass a cred for this operation. */
5064 	ASSERT(cr != NULL);
5065 
5066 	connp->conn_upcalls = sock_upcalls;
5067 	connp->conn_upper_handle = sock_handle;
5068 
5069 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
5070 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
5071 	sopp.sopp_wroff = connp->conn_wroff;
5072 	sopp.sopp_maxblk = INFPSZ;
5073 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
5074 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
5075 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
5076 	sopp.sopp_maxpsz =
5077 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
5078 	    UDP_MAXPACKET_IPV6;
5079 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
5080 	    udp_mod_info.mi_minpsz;
5081 
5082 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
5083 	    &sopp);
5084 }
5085 
5086 static void
5087 udp_do_close(conn_t *connp)
5088 {
5089 	udp_t	*udp;
5090 
5091 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
5092 	udp = connp->conn_udp;
5093 
5094 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
5095 		/*
5096 		 * Running in cluster mode - register unbind information
5097 		 */
5098 		if (connp->conn_ipversion == IPV4_VERSION) {
5099 			(*cl_inet_unbind)(
5100 			    connp->conn_netstack->netstack_stackid,
5101 			    IPPROTO_UDP, AF_INET,
5102 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5103 			    (in_port_t)connp->conn_lport, NULL);
5104 		} else {
5105 			(*cl_inet_unbind)(
5106 			    connp->conn_netstack->netstack_stackid,
5107 			    IPPROTO_UDP, AF_INET6,
5108 			    (uint8_t *)&(connp->conn_laddr_v6),
5109 			    (in_port_t)connp->conn_lport, NULL);
5110 		}
5111 	}
5112 
5113 	udp_bind_hash_remove(udp, B_FALSE);
5114 
5115 	ip_quiesce_conn(connp);
5116 
5117 	if (!IPCL_IS_NONSTR(connp)) {
5118 		ASSERT(connp->conn_wq != NULL);
5119 		ASSERT(connp->conn_rq != NULL);
5120 		qprocsoff(connp->conn_rq);
5121 	}
5122 
5123 	udp_close_free(connp);
5124 
5125 	/*
5126 	 * Now we are truly single threaded on this stream, and can
5127 	 * delete the things hanging off the connp, and finally the connp.
5128 	 * We removed this connp from the fanout list, it cannot be
5129 	 * accessed thru the fanouts, and we already waited for the
5130 	 * conn_ref to drop to 0. We are already in close, so
5131 	 * there cannot be any other thread from the top. qprocsoff
5132 	 * has completed, and service has completed or won't run in
5133 	 * future.
5134 	 */
5135 	ASSERT(connp->conn_ref == 1);
5136 
5137 	if (!IPCL_IS_NONSTR(connp)) {
5138 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
5139 	} else {
5140 		ip_free_helper_stream(connp);
5141 	}
5142 
5143 	connp->conn_ref--;
5144 	ipcl_conn_destroy(connp);
5145 }
5146 
5147 /* ARGSUSED1 */
5148 int
5149 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
5150 {
5151 	conn_t	*connp = (conn_t *)proto_handle;
5152 
5153 	/* All Solaris components should pass a cred for this operation. */
5154 	ASSERT(cr != NULL);
5155 
5156 	udp_do_close(connp);
5157 	return (0);
5158 }
5159 
5160 static int
5161 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
5162     boolean_t bind_to_req_port_only)
5163 {
5164 	sin_t		*sin;
5165 	sin6_t		*sin6;
5166 	udp_t		*udp = connp->conn_udp;
5167 	int		error = 0;
5168 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
5169 	in_port_t	port;		/* Host byte order */
5170 	in_port_t	requested_port;	/* Host byte order */
5171 	int		count;
5172 	ipaddr_t	v4src;		/* Set if AF_INET */
5173 	in6_addr_t	v6src;
5174 	int		loopmax;
5175 	udp_fanout_t	*udpf;
5176 	in_port_t	lport;		/* Network byte order */
5177 	uint_t		scopeid = 0;
5178 	zoneid_t	zoneid = IPCL_ZONEID(connp);
5179 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5180 	boolean_t	is_inaddr_any;
5181 	mlp_type_t	addrtype, mlptype;
5182 	udp_stack_t	*us = udp->udp_us;
5183 
5184 	switch (len) {
5185 	case sizeof (sin_t):	/* Complete IPv4 address */
5186 		sin = (sin_t *)sa;
5187 
5188 		if (sin == NULL || !OK_32PTR((char *)sin))
5189 			return (EINVAL);
5190 
5191 		if (connp->conn_family != AF_INET ||
5192 		    sin->sin_family != AF_INET) {
5193 			return (EAFNOSUPPORT);
5194 		}
5195 		v4src = sin->sin_addr.s_addr;
5196 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
5197 		if (v4src != INADDR_ANY) {
5198 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
5199 			    B_TRUE);
5200 		}
5201 		port = ntohs(sin->sin_port);
5202 		break;
5203 
5204 	case sizeof (sin6_t):	/* complete IPv6 address */
5205 		sin6 = (sin6_t *)sa;
5206 
5207 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
5208 			return (EINVAL);
5209 
5210 		if (connp->conn_family != AF_INET6 ||
5211 		    sin6->sin6_family != AF_INET6) {
5212 			return (EAFNOSUPPORT);
5213 		}
5214 		v6src = sin6->sin6_addr;
5215 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5216 			if (connp->conn_ipv6_v6only)
5217 				return (EADDRNOTAVAIL);
5218 
5219 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5220 			if (v4src != INADDR_ANY) {
5221 				laddr_type = ip_laddr_verify_v4(v4src,
5222 				    zoneid, ipst, B_FALSE);
5223 			}
5224 		} else {
5225 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5226 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5227 					scopeid = sin6->sin6_scope_id;
5228 				laddr_type = ip_laddr_verify_v6(&v6src,
5229 				    zoneid, ipst, B_TRUE, scopeid);
5230 			}
5231 		}
5232 		port = ntohs(sin6->sin6_port);
5233 		break;
5234 
5235 	default:		/* Invalid request */
5236 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5237 		    "udp_bind: bad ADDR_length length %u", len);
5238 		return (-TBADADDR);
5239 	}
5240 
5241 	/* Is the local address a valid unicast, multicast, or broadcast? */
5242 	if (laddr_type == IPVL_BAD)
5243 		return (EADDRNOTAVAIL);
5244 
5245 	requested_port = port;
5246 
5247 	if (requested_port == 0 || !bind_to_req_port_only)
5248 		bind_to_req_port_only = B_FALSE;
5249 	else		/* T_BIND_REQ and requested_port != 0 */
5250 		bind_to_req_port_only = B_TRUE;
5251 
5252 	if (requested_port == 0) {
5253 		/*
5254 		 * If the application passed in zero for the port number, it
5255 		 * doesn't care which port number we bind to. Get one in the
5256 		 * valid range.
5257 		 */
5258 		if (connp->conn_anon_priv_bind) {
5259 			port = udp_get_next_priv_port(udp);
5260 		} else {
5261 			port = udp_update_next_port(udp,
5262 			    us->us_next_port_to_try, B_TRUE);
5263 		}
5264 	} else {
5265 		/*
5266 		 * If the port is in the well-known privileged range,
5267 		 * make sure the caller was privileged.
5268 		 */
5269 		int i;
5270 		boolean_t priv = B_FALSE;
5271 
5272 		if (port < us->us_smallest_nonpriv_port) {
5273 			priv = B_TRUE;
5274 		} else {
5275 			for (i = 0; i < us->us_num_epriv_ports; i++) {
5276 				if (port == us->us_epriv_ports[i]) {
5277 					priv = B_TRUE;
5278 					break;
5279 				}
5280 			}
5281 		}
5282 
5283 		if (priv) {
5284 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5285 				return (-TACCES);
5286 		}
5287 	}
5288 
5289 	if (port == 0)
5290 		return (-TNOADDR);
5291 
5292 	/*
5293 	 * The state must be TS_UNBND. TPI mandates that users must send
5294 	 * TPI primitives only 1 at a time and wait for the response before
5295 	 * sending the next primitive.
5296 	 */
5297 	mutex_enter(&connp->conn_lock);
5298 	if (udp->udp_state != TS_UNBND) {
5299 		mutex_exit(&connp->conn_lock);
5300 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5301 		    "udp_bind: bad state, %u", udp->udp_state);
5302 		return (-TOUTSTATE);
5303 	}
5304 	/*
5305 	 * Copy the source address into our udp structure. This address
5306 	 * may still be zero; if so, IP will fill in the correct address
5307 	 * each time an outbound packet is passed to it. Since the udp is
5308 	 * not yet in the bind hash list, we don't grab the uf_lock to
5309 	 * change conn_ipversion
5310 	 */
5311 	if (connp->conn_family == AF_INET) {
5312 		ASSERT(sin != NULL);
5313 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5314 	} else {
5315 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5316 			/*
5317 			 * no need to hold the uf_lock to set the conn_ipversion
5318 			 * since we are not yet in the fanout list
5319 			 */
5320 			connp->conn_ipversion = IPV4_VERSION;
5321 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5322 		} else {
5323 			connp->conn_ipversion = IPV6_VERSION;
5324 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5325 		}
5326 	}
5327 
5328 	/*
5329 	 * If conn_reuseaddr is not set, then we have to make sure that
5330 	 * the IP address and port number the application requested
5331 	 * (or we selected for the application) is not being used by
5332 	 * another stream.  If another stream is already using the
5333 	 * requested IP address and port, the behavior depends on
5334 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
5335 	 * search for any an unused port to bind to the stream.
5336 	 *
5337 	 * As per the BSD semantics, as modified by the Deering multicast
5338 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
5339 	 * to the same port independent of the local IP address.
5340 	 *
5341 	 * This is slightly different than in SunOS 4.X which did not
5342 	 * support IP multicast. Note that the change implemented by the
5343 	 * Deering multicast code effects all binds - not only binding
5344 	 * to IP multicast addresses.
5345 	 *
5346 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
5347 	 * order to guarantee a unique port.
5348 	 */
5349 
5350 	count = 0;
5351 	if (connp->conn_anon_priv_bind) {
5352 		/*
5353 		 * loopmax = (IPPORT_RESERVED-1) -
5354 		 *    us->us_min_anonpriv_port + 1
5355 		 */
5356 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5357 	} else {
5358 		loopmax = us->us_largest_anon_port -
5359 		    us->us_smallest_anon_port + 1;
5360 	}
5361 
5362 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5363 
5364 	for (;;) {
5365 		udp_t		*udp1;
5366 		boolean_t	found_exclbind = B_FALSE;
5367 		conn_t		*connp1;
5368 
5369 		/*
5370 		 * Walk through the list of udp streams bound to
5371 		 * requested port with the same IP address.
5372 		 */
5373 		lport = htons(port);
5374 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5375 		    us->us_bind_fanout_size)];
5376 		mutex_enter(&udpf->uf_lock);
5377 		for (udp1 = udpf->uf_udp; udp1 != NULL;
5378 		    udp1 = udp1->udp_bind_hash) {
5379 			connp1 = udp1->udp_connp;
5380 
5381 			if (lport != connp1->conn_lport)
5382 				continue;
5383 
5384 			/*
5385 			 * On a labeled system, we must treat bindings to ports
5386 			 * on shared IP addresses by sockets with MAC exemption
5387 			 * privilege as being in all zones, as there's
5388 			 * otherwise no way to identify the right receiver.
5389 			 */
5390 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5391 				continue;
5392 
5393 			/*
5394 			 * If UDP_EXCLBIND is set for either the bound or
5395 			 * binding endpoint, the semantics of bind
5396 			 * is changed according to the following chart.
5397 			 *
5398 			 * spec = specified address (v4 or v6)
5399 			 * unspec = unspecified address (v4 or v6)
5400 			 * A = specified addresses are different for endpoints
5401 			 *
5402 			 * bound	bind to		allowed?
5403 			 * -------------------------------------
5404 			 * unspec	unspec		no
5405 			 * unspec	spec		no
5406 			 * spec		unspec		no
5407 			 * spec		spec		yes if A
5408 			 *
5409 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
5410 			 * as UDP_EXCLBIND, except that zoneid is ignored.
5411 			 */
5412 			if (connp1->conn_exclbind || connp->conn_exclbind ||
5413 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5414 				if (V6_OR_V4_INADDR_ANY(
5415 				    connp1->conn_bound_addr_v6) ||
5416 				    is_inaddr_any ||
5417 				    IN6_ARE_ADDR_EQUAL(
5418 				    &connp1->conn_bound_addr_v6,
5419 				    &v6src)) {
5420 					found_exclbind = B_TRUE;
5421 					break;
5422 				}
5423 				continue;
5424 			}
5425 
5426 			/*
5427 			 * Check ipversion to allow IPv4 and IPv6 sockets to
5428 			 * have disjoint port number spaces.
5429 			 */
5430 			if (connp->conn_ipversion != connp1->conn_ipversion) {
5431 
5432 				/*
5433 				 * On the first time through the loop, if the
5434 				 * the user intentionally specified a
5435 				 * particular port number, then ignore any
5436 				 * bindings of the other protocol that may
5437 				 * conflict. This allows the user to bind IPv6
5438 				 * alone and get both v4 and v6, or bind both
5439 				 * both and get each seperately. On subsequent
5440 				 * times through the loop, we're checking a
5441 				 * port that we chose (not the user) and thus
5442 				 * we do not allow casual duplicate bindings.
5443 				 */
5444 				if (count == 0 && requested_port != 0)
5445 					continue;
5446 			}
5447 
5448 			/*
5449 			 * No difference depending on SO_REUSEADDR.
5450 			 *
5451 			 * If existing port is bound to a
5452 			 * non-wildcard IP address and
5453 			 * the requesting stream is bound to
5454 			 * a distinct different IP addresses
5455 			 * (non-wildcard, also), keep going.
5456 			 */
5457 			if (!is_inaddr_any &&
5458 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5459 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5460 			    &v6src)) {
5461 				continue;
5462 			}
5463 			break;
5464 		}
5465 
5466 		if (!found_exclbind &&
5467 		    (connp->conn_reuseaddr && requested_port != 0)) {
5468 			break;
5469 		}
5470 
5471 		if (udp1 == NULL) {
5472 			/*
5473 			 * No other stream has this IP address
5474 			 * and port number. We can use it.
5475 			 */
5476 			break;
5477 		}
5478 		mutex_exit(&udpf->uf_lock);
5479 		if (bind_to_req_port_only) {
5480 			/*
5481 			 * We get here only when requested port
5482 			 * is bound (and only first  of the for()
5483 			 * loop iteration).
5484 			 *
5485 			 * The semantics of this bind request
5486 			 * require it to fail so we return from
5487 			 * the routine (and exit the loop).
5488 			 *
5489 			 */
5490 			mutex_exit(&connp->conn_lock);
5491 			return (-TADDRBUSY);
5492 		}
5493 
5494 		if (connp->conn_anon_priv_bind) {
5495 			port = udp_get_next_priv_port(udp);
5496 		} else {
5497 			if ((count == 0) && (requested_port != 0)) {
5498 				/*
5499 				 * If the application wants us to find
5500 				 * a port, get one to start with. Set
5501 				 * requested_port to 0, so that we will
5502 				 * update us->us_next_port_to_try below.
5503 				 */
5504 				port = udp_update_next_port(udp,
5505 				    us->us_next_port_to_try, B_TRUE);
5506 				requested_port = 0;
5507 			} else {
5508 				port = udp_update_next_port(udp, port + 1,
5509 				    B_FALSE);
5510 			}
5511 		}
5512 
5513 		if (port == 0 || ++count >= loopmax) {
5514 			/*
5515 			 * We've tried every possible port number and
5516 			 * there are none available, so send an error
5517 			 * to the user.
5518 			 */
5519 			mutex_exit(&connp->conn_lock);
5520 			return (-TNOADDR);
5521 		}
5522 	}
5523 
5524 	/*
5525 	 * Copy the source address into our udp structure.  This address
5526 	 * may still be zero; if so, ip_attr_connect will fill in the correct
5527 	 * address when a packet is about to be sent.
5528 	 * If we are binding to a broadcast or multicast address then
5529 	 * we just set the conn_bound_addr since we don't want to use
5530 	 * that as the source address when sending.
5531 	 */
5532 	connp->conn_bound_addr_v6 = v6src;
5533 	connp->conn_laddr_v6 = v6src;
5534 	if (scopeid != 0) {
5535 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5536 		connp->conn_ixa->ixa_scopeid = scopeid;
5537 		connp->conn_incoming_ifindex = scopeid;
5538 	} else {
5539 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5540 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5541 	}
5542 
5543 	switch (laddr_type) {
5544 	case IPVL_UNICAST_UP:
5545 	case IPVL_UNICAST_DOWN:
5546 		connp->conn_saddr_v6 = v6src;
5547 		connp->conn_mcbc_bind = B_FALSE;
5548 		break;
5549 	case IPVL_MCAST:
5550 	case IPVL_BCAST:
5551 		/* ip_set_destination will pick a source address later */
5552 		connp->conn_saddr_v6 = ipv6_all_zeros;
5553 		connp->conn_mcbc_bind = B_TRUE;
5554 		break;
5555 	}
5556 
5557 	/* Any errors after this point should use late_error */
5558 	connp->conn_lport = lport;
5559 
5560 	/*
5561 	 * Now reset the next anonymous port if the application requested
5562 	 * an anonymous port, or we handed out the next anonymous port.
5563 	 */
5564 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5565 		us->us_next_port_to_try = port + 1;
5566 	}
5567 
5568 	/* Initialize the T_BIND_ACK. */
5569 	if (connp->conn_family == AF_INET) {
5570 		sin->sin_port = connp->conn_lport;
5571 	} else {
5572 		sin6->sin6_port = connp->conn_lport;
5573 	}
5574 	udp->udp_state = TS_IDLE;
5575 	udp_bind_hash_insert(udpf, udp);
5576 	mutex_exit(&udpf->uf_lock);
5577 	mutex_exit(&connp->conn_lock);
5578 
5579 	if (cl_inet_bind) {
5580 		/*
5581 		 * Running in cluster mode - register bind information
5582 		 */
5583 		if (connp->conn_ipversion == IPV4_VERSION) {
5584 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5585 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5586 			    (in_port_t)connp->conn_lport, NULL);
5587 		} else {
5588 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5589 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5590 			    (in_port_t)connp->conn_lport, NULL);
5591 		}
5592 	}
5593 
5594 	mutex_enter(&connp->conn_lock);
5595 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5596 	if (is_system_labeled() && (!connp->conn_anon_port ||
5597 	    connp->conn_anon_mlp)) {
5598 		uint16_t mlpport;
5599 		zone_t *zone;
5600 
5601 		zone = crgetzone(cr);
5602 		connp->conn_mlp_type =
5603 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5604 		    mlptSingle;
5605 		addrtype = tsol_mlp_addr_type(
5606 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5607 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5608 		if (addrtype == mlptSingle) {
5609 			error = -TNOADDR;
5610 			mutex_exit(&connp->conn_lock);
5611 			goto late_error;
5612 		}
5613 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
5614 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5615 		    addrtype);
5616 
5617 		/*
5618 		 * It is a coding error to attempt to bind an MLP port
5619 		 * without first setting SOL_SOCKET/SCM_UCRED.
5620 		 */
5621 		if (mlptype != mlptSingle &&
5622 		    connp->conn_mlp_type == mlptSingle) {
5623 			error = EINVAL;
5624 			mutex_exit(&connp->conn_lock);
5625 			goto late_error;
5626 		}
5627 
5628 		/*
5629 		 * It is an access violation to attempt to bind an MLP port
5630 		 * without NET_BINDMLP privilege.
5631 		 */
5632 		if (mlptype != mlptSingle &&
5633 		    secpolicy_net_bindmlp(cr) != 0) {
5634 			if (connp->conn_debug) {
5635 				(void) strlog(UDP_MOD_ID, 0, 1,
5636 				    SL_ERROR|SL_TRACE,
5637 				    "udp_bind: no priv for multilevel port %d",
5638 				    mlpport);
5639 			}
5640 			error = -TACCES;
5641 			mutex_exit(&connp->conn_lock);
5642 			goto late_error;
5643 		}
5644 
5645 		/*
5646 		 * If we're specifically binding a shared IP address and the
5647 		 * port is MLP on shared addresses, then check to see if this
5648 		 * zone actually owns the MLP.  Reject if not.
5649 		 */
5650 		if (mlptype == mlptShared && addrtype == mlptShared) {
5651 			/*
5652 			 * No need to handle exclusive-stack zones since
5653 			 * ALL_ZONES only applies to the shared stack.
5654 			 */
5655 			zoneid_t mlpzone;
5656 
5657 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5658 			    htons(mlpport));
5659 			if (connp->conn_zoneid != mlpzone) {
5660 				if (connp->conn_debug) {
5661 					(void) strlog(UDP_MOD_ID, 0, 1,
5662 					    SL_ERROR|SL_TRACE,
5663 					    "udp_bind: attempt to bind port "
5664 					    "%d on shared addr in zone %d "
5665 					    "(should be %d)",
5666 					    mlpport, connp->conn_zoneid,
5667 					    mlpzone);
5668 				}
5669 				error = -TACCES;
5670 				mutex_exit(&connp->conn_lock);
5671 				goto late_error;
5672 			}
5673 		}
5674 		if (connp->conn_anon_port) {
5675 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5676 			    port, B_TRUE);
5677 			if (error != 0) {
5678 				if (connp->conn_debug) {
5679 					(void) strlog(UDP_MOD_ID, 0, 1,
5680 					    SL_ERROR|SL_TRACE,
5681 					    "udp_bind: cannot establish anon "
5682 					    "MLP for port %d", port);
5683 				}
5684 				error = -TACCES;
5685 				mutex_exit(&connp->conn_lock);
5686 				goto late_error;
5687 			}
5688 		}
5689 		connp->conn_mlp_type = mlptype;
5690 	}
5691 
5692 	/*
5693 	 * We create an initial header template here to make a subsequent
5694 	 * sendto have a starting point. Since conn_last_dst is zero the
5695 	 * first sendto will always follow the 'dst changed' code path.
5696 	 * Note that we defer massaging options and the related checksum
5697 	 * adjustment until we have a destination address.
5698 	 */
5699 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5700 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5701 	if (error != 0) {
5702 		mutex_exit(&connp->conn_lock);
5703 		goto late_error;
5704 	}
5705 	/* Just in case */
5706 	connp->conn_faddr_v6 = ipv6_all_zeros;
5707 	connp->conn_fport = 0;
5708 	connp->conn_v6lastdst = ipv6_all_zeros;
5709 	mutex_exit(&connp->conn_lock);
5710 
5711 	error = ip_laddr_fanout_insert(connp);
5712 	if (error != 0)
5713 		goto late_error;
5714 
5715 	/* Bind succeeded */
5716 	return (0);
5717 
5718 late_error:
5719 	/* We had already picked the port number, and then the bind failed */
5720 	mutex_enter(&connp->conn_lock);
5721 	udpf = &us->us_bind_fanout[
5722 	    UDP_BIND_HASH(connp->conn_lport,
5723 	    us->us_bind_fanout_size)];
5724 	mutex_enter(&udpf->uf_lock);
5725 	connp->conn_saddr_v6 = ipv6_all_zeros;
5726 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5727 	connp->conn_laddr_v6 = ipv6_all_zeros;
5728 	if (scopeid != 0) {
5729 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5730 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5731 	}
5732 	udp->udp_state = TS_UNBND;
5733 	udp_bind_hash_remove(udp, B_TRUE);
5734 	connp->conn_lport = 0;
5735 	mutex_exit(&udpf->uf_lock);
5736 	connp->conn_anon_port = B_FALSE;
5737 	connp->conn_mlp_type = mlptSingle;
5738 
5739 	connp->conn_v6lastdst = ipv6_all_zeros;
5740 
5741 	/* Restore the header that was built above - different source address */
5742 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5743 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5744 	mutex_exit(&connp->conn_lock);
5745 	return (error);
5746 }
5747 
5748 int
5749 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5750     socklen_t len, cred_t *cr)
5751 {
5752 	int		error;
5753 	conn_t		*connp;
5754 
5755 	/* All Solaris components should pass a cred for this operation. */
5756 	ASSERT(cr != NULL);
5757 
5758 	connp = (conn_t *)proto_handle;
5759 
5760 	if (sa == NULL)
5761 		error = udp_do_unbind(connp);
5762 	else
5763 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5764 
5765 	if (error < 0) {
5766 		if (error == -TOUTSTATE)
5767 			error = EINVAL;
5768 		else
5769 			error = proto_tlitosyserr(-error);
5770 	}
5771 
5772 	return (error);
5773 }
5774 
5775 static int
5776 udp_implicit_bind(conn_t *connp, cred_t *cr)
5777 {
5778 	sin6_t sin6addr;
5779 	sin_t *sin;
5780 	sin6_t *sin6;
5781 	socklen_t len;
5782 	int error;
5783 
5784 	/* All Solaris components should pass a cred for this operation. */
5785 	ASSERT(cr != NULL);
5786 
5787 	if (connp->conn_family == AF_INET) {
5788 		len = sizeof (struct sockaddr_in);
5789 		sin = (sin_t *)&sin6addr;
5790 		*sin = sin_null;
5791 		sin->sin_family = AF_INET;
5792 		sin->sin_addr.s_addr = INADDR_ANY;
5793 	} else {
5794 		ASSERT(connp->conn_family == AF_INET6);
5795 		len = sizeof (sin6_t);
5796 		sin6 = (sin6_t *)&sin6addr;
5797 		*sin6 = sin6_null;
5798 		sin6->sin6_family = AF_INET6;
5799 		V6_SET_ZERO(sin6->sin6_addr);
5800 	}
5801 
5802 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5803 	    cr, B_FALSE);
5804 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5805 }
5806 
5807 /*
5808  * This routine removes a port number association from a stream. It
5809  * is called by udp_unbind and udp_tpi_unbind.
5810  */
5811 static int
5812 udp_do_unbind(conn_t *connp)
5813 {
5814 	udp_t 		*udp = connp->conn_udp;
5815 	udp_fanout_t	*udpf;
5816 	udp_stack_t	*us = udp->udp_us;
5817 
5818 	if (cl_inet_unbind != NULL) {
5819 		/*
5820 		 * Running in cluster mode - register unbind information
5821 		 */
5822 		if (connp->conn_ipversion == IPV4_VERSION) {
5823 			(*cl_inet_unbind)(
5824 			    connp->conn_netstack->netstack_stackid,
5825 			    IPPROTO_UDP, AF_INET,
5826 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5827 			    (in_port_t)connp->conn_lport, NULL);
5828 		} else {
5829 			(*cl_inet_unbind)(
5830 			    connp->conn_netstack->netstack_stackid,
5831 			    IPPROTO_UDP, AF_INET6,
5832 			    (uint8_t *)&(connp->conn_laddr_v6),
5833 			    (in_port_t)connp->conn_lport, NULL);
5834 		}
5835 	}
5836 
5837 	mutex_enter(&connp->conn_lock);
5838 	/* If a bind has not been done, we can't unbind. */
5839 	if (udp->udp_state == TS_UNBND) {
5840 		mutex_exit(&connp->conn_lock);
5841 		return (-TOUTSTATE);
5842 	}
5843 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5844 	    us->us_bind_fanout_size)];
5845 	mutex_enter(&udpf->uf_lock);
5846 	udp_bind_hash_remove(udp, B_TRUE);
5847 	connp->conn_saddr_v6 = ipv6_all_zeros;
5848 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5849 	connp->conn_laddr_v6 = ipv6_all_zeros;
5850 	connp->conn_mcbc_bind = B_FALSE;
5851 	connp->conn_lport = 0;
5852 	/* In case we were also connected */
5853 	connp->conn_faddr_v6 = ipv6_all_zeros;
5854 	connp->conn_fport = 0;
5855 	mutex_exit(&udpf->uf_lock);
5856 
5857 	connp->conn_v6lastdst = ipv6_all_zeros;
5858 	udp->udp_state = TS_UNBND;
5859 
5860 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5861 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5862 	mutex_exit(&connp->conn_lock);
5863 
5864 	ip_unbind(connp);
5865 
5866 	return (0);
5867 }
5868 
5869 /*
5870  * It associates a default destination address with the stream.
5871  */
5872 static int
5873 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5874     cred_t *cr, pid_t pid)
5875 {
5876 	sin6_t		*sin6;
5877 	sin_t		*sin;
5878 	in6_addr_t 	v6dst;
5879 	ipaddr_t 	v4dst;
5880 	uint16_t 	dstport;
5881 	uint32_t 	flowinfo;
5882 	udp_fanout_t	*udpf;
5883 	udp_t		*udp, *udp1;
5884 	ushort_t	ipversion;
5885 	udp_stack_t	*us;
5886 	int		error;
5887 	conn_t		*connp1;
5888 	ip_xmit_attr_t	*ixa;
5889 	uint_t		scopeid = 0;
5890 	uint_t		srcid = 0;
5891 	in6_addr_t	v6src = connp->conn_saddr_v6;
5892 
5893 	udp = connp->conn_udp;
5894 	us = udp->udp_us;
5895 
5896 	/*
5897 	 * Address has been verified by the caller
5898 	 */
5899 	switch (len) {
5900 	default:
5901 		/*
5902 		 * Should never happen
5903 		 */
5904 		return (EINVAL);
5905 
5906 	case sizeof (sin_t):
5907 		sin = (sin_t *)sa;
5908 		v4dst = sin->sin_addr.s_addr;
5909 		dstport = sin->sin_port;
5910 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5911 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
5912 		ipversion = IPV4_VERSION;
5913 		break;
5914 
5915 	case sizeof (sin6_t):
5916 		sin6 = (sin6_t *)sa;
5917 		v6dst = sin6->sin6_addr;
5918 		dstport = sin6->sin6_port;
5919 		srcid = sin6->__sin6_src_id;
5920 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5921 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5922 			    connp->conn_netstack);
5923 		}
5924 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
5925 			if (connp->conn_ipv6_v6only)
5926 				return (EADDRNOTAVAIL);
5927 
5928 			/*
5929 			 * Destination adress is mapped IPv6 address.
5930 			 * Source bound address should be unspecified or
5931 			 * IPv6 mapped address as well.
5932 			 */
5933 			if (!IN6_IS_ADDR_UNSPECIFIED(
5934 			    &connp->conn_bound_addr_v6) &&
5935 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5936 				return (EADDRNOTAVAIL);
5937 			}
5938 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5939 			ipversion = IPV4_VERSION;
5940 			flowinfo = 0;
5941 		} else {
5942 			ipversion = IPV6_VERSION;
5943 			flowinfo = sin6->sin6_flowinfo;
5944 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5945 				scopeid = sin6->sin6_scope_id;
5946 		}
5947 		break;
5948 	}
5949 
5950 	if (dstport == 0)
5951 		return (-TBADADDR);
5952 
5953 	/*
5954 	 * If there is a different thread using conn_ixa then we get a new
5955 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
5956 	 * conn_ixa and prevent any other thread from using/changing it.
5957 	 * Once connect() is done other threads can use conn_ixa since the
5958 	 * refcnt will be back at one.
5959 	 */
5960 	ixa = conn_get_ixa(connp, B_TRUE);
5961 	if (ixa == NULL)
5962 		return (ENOMEM);
5963 
5964 	ASSERT(ixa->ixa_refcnt >= 2);
5965 	ASSERT(ixa == connp->conn_ixa);
5966 
5967 	mutex_enter(&connp->conn_lock);
5968 	/*
5969 	 * This udp_t must have bound to a port already before doing a connect.
5970 	 * Reject if a connect is in progress (we drop conn_lock during
5971 	 * udp_do_connect).
5972 	 */
5973 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5974 		mutex_exit(&connp->conn_lock);
5975 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5976 		    "udp_connect: bad state, %u", udp->udp_state);
5977 		ixa_refrele(ixa);
5978 		return (-TOUTSTATE);
5979 	}
5980 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5981 
5982 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5983 	    us->us_bind_fanout_size)];
5984 
5985 	mutex_enter(&udpf->uf_lock);
5986 	if (udp->udp_state == TS_DATA_XFER) {
5987 		/* Already connected - clear out state */
5988 		if (connp->conn_mcbc_bind)
5989 			connp->conn_saddr_v6 = ipv6_all_zeros;
5990 		else
5991 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5992 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5993 		connp->conn_faddr_v6 = ipv6_all_zeros;
5994 		connp->conn_fport = 0;
5995 		udp->udp_state = TS_IDLE;
5996 	}
5997 
5998 	connp->conn_fport = dstport;
5999 	connp->conn_ipversion = ipversion;
6000 	if (ipversion == IPV4_VERSION) {
6001 		/*
6002 		 * Interpret a zero destination to mean loopback.
6003 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
6004 		 * generate the T_CONN_CON.
6005 		 */
6006 		if (v4dst == INADDR_ANY) {
6007 			v4dst = htonl(INADDR_LOOPBACK);
6008 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
6009 			if (connp->conn_family == AF_INET) {
6010 				sin->sin_addr.s_addr = v4dst;
6011 			} else {
6012 				sin6->sin6_addr = v6dst;
6013 			}
6014 		}
6015 		connp->conn_faddr_v6 = v6dst;
6016 		connp->conn_flowinfo = 0;
6017 	} else {
6018 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
6019 		/*
6020 		 * Interpret a zero destination to mean loopback.
6021 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
6022 		 * generate the T_CONN_CON.
6023 		 */
6024 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
6025 			v6dst = ipv6_loopback;
6026 			sin6->sin6_addr = v6dst;
6027 		}
6028 		connp->conn_faddr_v6 = v6dst;
6029 		connp->conn_flowinfo = flowinfo;
6030 	}
6031 	mutex_exit(&udpf->uf_lock);
6032 
6033 	/*
6034 	 * We update our cred/cpid based on the caller of connect
6035 	 */
6036 	if (connp->conn_cred != cr) {
6037 		crhold(cr);
6038 		crfree(connp->conn_cred);
6039 		connp->conn_cred = cr;
6040 	}
6041 	connp->conn_cpid = pid;
6042 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
6043 	ixa->ixa_cred = cr;
6044 	ixa->ixa_cpid = pid;
6045 	if (is_system_labeled()) {
6046 		/* We need to restart with a label based on the cred */
6047 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
6048 	}
6049 
6050 	if (scopeid != 0) {
6051 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
6052 		ixa->ixa_scopeid = scopeid;
6053 		connp->conn_incoming_ifindex = scopeid;
6054 	} else {
6055 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
6056 		connp->conn_incoming_ifindex = connp->conn_bound_if;
6057 	}
6058 	/*
6059 	 * conn_connect will drop conn_lock and reacquire it.
6060 	 * To prevent a send* from messing with this udp_t while the lock
6061 	 * is dropped we set udp_state and clear conn_v6lastdst.
6062 	 * That will make all send* fail with EISCONN.
6063 	 */
6064 	connp->conn_v6lastdst = ipv6_all_zeros;
6065 	udp->udp_state = TS_WCON_CREQ;
6066 
6067 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
6068 	mutex_exit(&connp->conn_lock);
6069 	if (error != 0)
6070 		goto connect_failed;
6071 
6072 	/*
6073 	 * The addresses have been verified. Time to insert in
6074 	 * the correct fanout list.
6075 	 */
6076 	error = ipcl_conn_insert(connp);
6077 	if (error != 0)
6078 		goto connect_failed;
6079 
6080 	mutex_enter(&connp->conn_lock);
6081 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
6082 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
6083 	if (error != 0) {
6084 		mutex_exit(&connp->conn_lock);
6085 		goto connect_failed;
6086 	}
6087 
6088 	udp->udp_state = TS_DATA_XFER;
6089 	/* Record this as the "last" send even though we haven't sent any */
6090 	connp->conn_v6lastdst = connp->conn_faddr_v6;
6091 	connp->conn_lastipversion = connp->conn_ipversion;
6092 	connp->conn_lastdstport = connp->conn_fport;
6093 	connp->conn_lastflowinfo = connp->conn_flowinfo;
6094 	connp->conn_lastscopeid = scopeid;
6095 	connp->conn_lastsrcid = srcid;
6096 	/* Also remember a source to use together with lastdst */
6097 	connp->conn_v6lastsrc = v6src;
6098 	mutex_exit(&connp->conn_lock);
6099 
6100 	/*
6101 	 * We've picked a source address above. Now we can
6102 	 * verify that the src/port/dst/port is unique for all
6103 	 * connections in TS_DATA_XFER, skipping ourselves.
6104 	 */
6105 	mutex_enter(&udpf->uf_lock);
6106 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
6107 		if (udp1->udp_state != TS_DATA_XFER)
6108 			continue;
6109 
6110 		if (udp1 == udp)
6111 			continue;
6112 
6113 		connp1 = udp1->udp_connp;
6114 		if (connp->conn_lport != connp1->conn_lport ||
6115 		    connp->conn_ipversion != connp1->conn_ipversion ||
6116 		    dstport != connp1->conn_fport ||
6117 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6118 		    &connp1->conn_laddr_v6) ||
6119 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
6120 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
6121 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
6122 			continue;
6123 		mutex_exit(&udpf->uf_lock);
6124 		error = -TBADADDR;
6125 		goto connect_failed;
6126 	}
6127 	if (cl_inet_connect2 != NULL) {
6128 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
6129 		if (error != 0) {
6130 			mutex_exit(&udpf->uf_lock);
6131 			error = -TBADADDR;
6132 			goto connect_failed;
6133 		}
6134 	}
6135 	mutex_exit(&udpf->uf_lock);
6136 
6137 	ixa_refrele(ixa);
6138 	return (0);
6139 
6140 connect_failed:
6141 	if (ixa != NULL)
6142 		ixa_refrele(ixa);
6143 	mutex_enter(&connp->conn_lock);
6144 	mutex_enter(&udpf->uf_lock);
6145 	udp->udp_state = TS_IDLE;
6146 	connp->conn_faddr_v6 = ipv6_all_zeros;
6147 	connp->conn_fport = 0;
6148 	/* In case the source address was set above */
6149 	if (connp->conn_mcbc_bind)
6150 		connp->conn_saddr_v6 = ipv6_all_zeros;
6151 	else
6152 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
6153 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
6154 	mutex_exit(&udpf->uf_lock);
6155 
6156 	connp->conn_v6lastdst = ipv6_all_zeros;
6157 	connp->conn_flowinfo = 0;
6158 
6159 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
6160 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
6161 	mutex_exit(&connp->conn_lock);
6162 	return (error);
6163 }
6164 
6165 static int
6166 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
6167     socklen_t len, sock_connid_t *id, cred_t *cr)
6168 {
6169 	conn_t	*connp = (conn_t *)proto_handle;
6170 	udp_t	*udp = connp->conn_udp;
6171 	int	error;
6172 	boolean_t did_bind = B_FALSE;
6173 	pid_t	pid = curproc->p_pid;
6174 
6175 	/* All Solaris components should pass a cred for this operation. */
6176 	ASSERT(cr != NULL);
6177 
6178 	if (sa == NULL) {
6179 		/*
6180 		 * Disconnect
6181 		 * Make sure we are connected
6182 		 */
6183 		if (udp->udp_state != TS_DATA_XFER)
6184 			return (EINVAL);
6185 
6186 		error = udp_disconnect(connp);
6187 		return (error);
6188 	}
6189 
6190 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
6191 	if (error != 0)
6192 		goto done;
6193 
6194 	/* do an implicit bind if necessary */
6195 	if (udp->udp_state == TS_UNBND) {
6196 		error = udp_implicit_bind(connp, cr);
6197 		/*
6198 		 * We could be racing with an actual bind, in which case
6199 		 * we would see EPROTO. We cross our fingers and try
6200 		 * to connect.
6201 		 */
6202 		if (!(error == 0 || error == EPROTO))
6203 			goto done;
6204 		did_bind = B_TRUE;
6205 	}
6206 	/*
6207 	 * set SO_DGRAM_ERRIND
6208 	 */
6209 	connp->conn_dgram_errind = B_TRUE;
6210 
6211 	error = udp_do_connect(connp, sa, len, cr, pid);
6212 
6213 	if (error != 0 && did_bind) {
6214 		int unbind_err;
6215 
6216 		unbind_err = udp_do_unbind(connp);
6217 		ASSERT(unbind_err == 0);
6218 	}
6219 
6220 	if (error == 0) {
6221 		*id = 0;
6222 		(*connp->conn_upcalls->su_connected)
6223 		    (connp->conn_upper_handle, 0, NULL, -1);
6224 	} else if (error < 0) {
6225 		error = proto_tlitosyserr(-error);
6226 	}
6227 
6228 done:
6229 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6230 		/*
6231 		 * No need to hold locks to set state
6232 		 * after connect failure socket state is undefined
6233 		 * We set the state only to imitate old sockfs behavior
6234 		 */
6235 		udp->udp_state = TS_IDLE;
6236 	}
6237 	return (error);
6238 }
6239 
6240 int
6241 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6242     cred_t *cr)
6243 {
6244 	sin6_t		*sin6;
6245 	sin_t		*sin = NULL;
6246 	uint_t		srcid;
6247 	conn_t		*connp = (conn_t *)proto_handle;
6248 	udp_t		*udp = connp->conn_udp;
6249 	int		error = 0;
6250 	udp_stack_t	*us = udp->udp_us;
6251 	ushort_t	ipversion;
6252 	pid_t		pid = curproc->p_pid;
6253 	ip_xmit_attr_t	*ixa;
6254 
6255 	ASSERT(DB_TYPE(mp) == M_DATA);
6256 
6257 	/* All Solaris components should pass a cred for this operation. */
6258 	ASSERT(cr != NULL);
6259 
6260 	/* do an implicit bind if necessary */
6261 	if (udp->udp_state == TS_UNBND) {
6262 		error = udp_implicit_bind(connp, cr);
6263 		/*
6264 		 * We could be racing with an actual bind, in which case
6265 		 * we would see EPROTO. We cross our fingers and try
6266 		 * to connect.
6267 		 */
6268 		if (!(error == 0 || error == EPROTO)) {
6269 			freemsg(mp);
6270 			return (error);
6271 		}
6272 	}
6273 
6274 	/* Connected? */
6275 	if (msg->msg_name == NULL) {
6276 		if (udp->udp_state != TS_DATA_XFER) {
6277 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6278 			return (EDESTADDRREQ);
6279 		}
6280 		if (msg->msg_controllen != 0) {
6281 			error = udp_output_ancillary(connp, NULL, NULL, mp,
6282 			    NULL, msg, cr, pid);
6283 		} else {
6284 			error = udp_output_connected(connp, mp, cr, pid);
6285 		}
6286 		if (us->us_sendto_ignerr)
6287 			return (0);
6288 		else
6289 			return (error);
6290 	}
6291 	if (udp->udp_state == TS_DATA_XFER) {
6292 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6293 		return (EISCONN);
6294 	}
6295 	error = proto_verify_ip_addr(connp->conn_family,
6296 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6297 	if (error != 0) {
6298 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6299 		return (error);
6300 	}
6301 	switch (connp->conn_family) {
6302 	case AF_INET6:
6303 		sin6 = (sin6_t *)msg->msg_name;
6304 
6305 		srcid = sin6->__sin6_src_id;
6306 
6307 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6308 			/*
6309 			 * Destination is a non-IPv4-compatible IPv6 address.
6310 			 * Send out an IPv6 format packet.
6311 			 */
6312 
6313 			/*
6314 			 * If the local address is a mapped address return
6315 			 * an error.
6316 			 * It would be possible to send an IPv6 packet but the
6317 			 * response would never make it back to the application
6318 			 * since it is bound to a mapped address.
6319 			 */
6320 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6321 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6322 				return (EADDRNOTAVAIL);
6323 			}
6324 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6325 				sin6->sin6_addr = ipv6_loopback;
6326 			ipversion = IPV6_VERSION;
6327 		} else {
6328 			if (connp->conn_ipv6_v6only) {
6329 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6330 				return (EADDRNOTAVAIL);
6331 			}
6332 
6333 			/*
6334 			 * If the local address is not zero or a mapped address
6335 			 * return an error.  It would be possible to send an
6336 			 * IPv4 packet but the response would never make it
6337 			 * back to the application since it is bound to a
6338 			 * non-mapped address.
6339 			 */
6340 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6341 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6342 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6343 				return (EADDRNOTAVAIL);
6344 			}
6345 
6346 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6347 				V4_PART_OF_V6(sin6->sin6_addr) =
6348 				    htonl(INADDR_LOOPBACK);
6349 			}
6350 			ipversion = IPV4_VERSION;
6351 		}
6352 
6353 		/*
6354 		 * We have to allocate an ip_xmit_attr_t before we grab
6355 		 * conn_lock and we need to hold conn_lock once we've check
6356 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
6357 		 * socket.
6358 		 */
6359 		if (msg->msg_controllen == 0) {
6360 			ixa = conn_get_ixa(connp, B_FALSE);
6361 			if (ixa == NULL) {
6362 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6363 				return (ENOMEM);
6364 			}
6365 		} else {
6366 			ixa = NULL;
6367 		}
6368 		mutex_enter(&connp->conn_lock);
6369 		if (udp->udp_delayed_error != 0) {
6370 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6371 
6372 			error = udp->udp_delayed_error;
6373 			udp->udp_delayed_error = 0;
6374 
6375 			/* Compare IP address, port, and family */
6376 
6377 			if (sin6->sin6_port == sin2->sin6_port &&
6378 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6379 			    &sin2->sin6_addr) &&
6380 			    sin6->sin6_family == sin2->sin6_family) {
6381 				mutex_exit(&connp->conn_lock);
6382 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6383 				if (ixa != NULL)
6384 					ixa_refrele(ixa);
6385 				return (error);
6386 			}
6387 		}
6388 
6389 		if (msg->msg_controllen != 0) {
6390 			mutex_exit(&connp->conn_lock);
6391 			ASSERT(ixa == NULL);
6392 			error = udp_output_ancillary(connp, NULL, sin6, mp,
6393 			    NULL, msg, cr, pid);
6394 		} else if (conn_same_as_last_v6(connp, sin6) &&
6395 		    connp->conn_lastsrcid == srcid &&
6396 		    ipsec_outbound_policy_current(ixa)) {
6397 			/* udp_output_lastdst drops conn_lock */
6398 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6399 		} else {
6400 			/* udp_output_newdst drops conn_lock */
6401 			error = udp_output_newdst(connp, mp, NULL, sin6,
6402 			    ipversion, cr, pid, ixa);
6403 		}
6404 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6405 		if (us->us_sendto_ignerr)
6406 			return (0);
6407 		else
6408 			return (error);
6409 	case AF_INET:
6410 		sin = (sin_t *)msg->msg_name;
6411 
6412 		ipversion = IPV4_VERSION;
6413 
6414 		if (sin->sin_addr.s_addr == INADDR_ANY)
6415 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6416 
6417 		/*
6418 		 * We have to allocate an ip_xmit_attr_t before we grab
6419 		 * conn_lock and we need to hold conn_lock once we've check
6420 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6421 		 */
6422 		if (msg->msg_controllen == 0) {
6423 			ixa = conn_get_ixa(connp, B_FALSE);
6424 			if (ixa == NULL) {
6425 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6426 				return (ENOMEM);
6427 			}
6428 		} else {
6429 			ixa = NULL;
6430 		}
6431 		mutex_enter(&connp->conn_lock);
6432 		if (udp->udp_delayed_error != 0) {
6433 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6434 
6435 			error = udp->udp_delayed_error;
6436 			udp->udp_delayed_error = 0;
6437 
6438 			/* Compare IP address and port */
6439 
6440 			if (sin->sin_port == sin2->sin_port &&
6441 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6442 				mutex_exit(&connp->conn_lock);
6443 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6444 				if (ixa != NULL)
6445 					ixa_refrele(ixa);
6446 				return (error);
6447 			}
6448 		}
6449 		if (msg->msg_controllen != 0) {
6450 			mutex_exit(&connp->conn_lock);
6451 			ASSERT(ixa == NULL);
6452 			error = udp_output_ancillary(connp, sin, NULL, mp,
6453 			    NULL, msg, cr, pid);
6454 		} else if (conn_same_as_last_v4(connp, sin) &&
6455 		    ipsec_outbound_policy_current(ixa)) {
6456 			/* udp_output_lastdst drops conn_lock */
6457 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6458 		} else {
6459 			/* udp_output_newdst drops conn_lock */
6460 			error = udp_output_newdst(connp, mp, sin, NULL,
6461 			    ipversion, cr, pid, ixa);
6462 		}
6463 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6464 		if (us->us_sendto_ignerr)
6465 			return (0);
6466 		else
6467 			return (error);
6468 	default:
6469 		return (EINVAL);
6470 	}
6471 }
6472 
6473 int
6474 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6475     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb)
6476 {
6477 	conn_t 	*connp = (conn_t *)proto_handle;
6478 	udp_t	*udp;
6479 	struct T_capability_ack tca;
6480 	struct sockaddr_in6 laddr, faddr;
6481 	socklen_t laddrlen, faddrlen;
6482 	short opts;
6483 	struct stroptions *stropt;
6484 	mblk_t *stropt_mp;
6485 	int error;
6486 
6487 	udp = connp->conn_udp;
6488 
6489 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6490 
6491 	/*
6492 	 * setup the fallback stream that was allocated
6493 	 */
6494 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
6495 	connp->conn_minor_arena = WR(q)->q_ptr;
6496 
6497 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
6498 
6499 	WR(q)->q_qinfo = &udp_winit;
6500 
6501 	connp->conn_rq = RD(q);
6502 	connp->conn_wq = WR(q);
6503 
6504 	/* Notify stream head about options before sending up data */
6505 	stropt_mp->b_datap->db_type = M_SETOPTS;
6506 	stropt_mp->b_wptr += sizeof (*stropt);
6507 	stropt = (struct stroptions *)stropt_mp->b_rptr;
6508 	stropt->so_flags = SO_WROFF | SO_HIWAT;
6509 	stropt->so_wroff = connp->conn_wroff;
6510 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6511 	putnext(RD(q), stropt_mp);
6512 
6513 	/*
6514 	 * Free the helper stream
6515 	 */
6516 	ip_free_helper_stream(connp);
6517 
6518 	if (!issocket)
6519 		udp_use_pure_tpi(udp);
6520 
6521 	/*
6522 	 * Collect the information needed to sync with the sonode
6523 	 */
6524 	udp_do_capability_ack(udp, &tca, TC1_INFO);
6525 
6526 	laddrlen = faddrlen = sizeof (sin6_t);
6527 	(void) udp_getsockname((sock_lower_handle_t)connp,
6528 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
6529 	error = udp_getpeername((sock_lower_handle_t)connp,
6530 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
6531 	if (error != 0)
6532 		faddrlen = 0;
6533 
6534 	opts = 0;
6535 	if (connp->conn_dgram_errind)
6536 		opts |= SO_DGRAM_ERRIND;
6537 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6538 		opts |= SO_DONTROUTE;
6539 
6540 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
6541 	    (struct sockaddr *)&laddr, laddrlen,
6542 	    (struct sockaddr *)&faddr, faddrlen, opts);
6543 
6544 	mutex_enter(&udp->udp_recv_lock);
6545 	/*
6546 	 * Attempts to send data up during fallback will result in it being
6547 	 * queued in udp_t. Now we push up any queued packets.
6548 	 */
6549 	while (udp->udp_fallback_queue_head != NULL) {
6550 		mblk_t *mp;
6551 		mp = udp->udp_fallback_queue_head;
6552 		udp->udp_fallback_queue_head = mp->b_next;
6553 		mutex_exit(&udp->udp_recv_lock);
6554 		mp->b_next = NULL;
6555 		putnext(RD(q), mp);
6556 		mutex_enter(&udp->udp_recv_lock);
6557 	}
6558 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6559 	/*
6560 	 * No longer a streams less socket
6561 	 */
6562 	mutex_enter(&connp->conn_lock);
6563 	connp->conn_flags &= ~IPCL_NONSTR;
6564 	mutex_exit(&connp->conn_lock);
6565 
6566 	mutex_exit(&udp->udp_recv_lock);
6567 
6568 	ASSERT(connp->conn_ref >= 1);
6569 
6570 	return (0);
6571 }
6572 
6573 /* ARGSUSED3 */
6574 int
6575 udp_getpeername(sock_lower_handle_t  proto_handle, struct sockaddr *sa,
6576     socklen_t *salenp, cred_t *cr)
6577 {
6578 	conn_t	*connp = (conn_t *)proto_handle;
6579 	udp_t	*udp = connp->conn_udp;
6580 	int error;
6581 
6582 	/* All Solaris components should pass a cred for this operation. */
6583 	ASSERT(cr != NULL);
6584 
6585 	mutex_enter(&connp->conn_lock);
6586 	if (udp->udp_state != TS_DATA_XFER)
6587 		error = ENOTCONN;
6588 	else
6589 		error = conn_getpeername(connp, sa, salenp);
6590 	mutex_exit(&connp->conn_lock);
6591 	return (error);
6592 }
6593 
6594 /* ARGSUSED3 */
6595 int
6596 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6597     socklen_t *salenp, cred_t *cr)
6598 {
6599 	conn_t	*connp = (conn_t *)proto_handle;
6600 	int error;
6601 
6602 	/* All Solaris components should pass a cred for this operation. */
6603 	ASSERT(cr != NULL);
6604 
6605 	mutex_enter(&connp->conn_lock);
6606 	error = conn_getsockname(connp, sa, salenp);
6607 	mutex_exit(&connp->conn_lock);
6608 	return (error);
6609 }
6610 
6611 int
6612 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6613     void *optvalp, socklen_t *optlen, cred_t *cr)
6614 {
6615 	conn_t		*connp = (conn_t *)proto_handle;
6616 	int		error;
6617 	t_uscalar_t	max_optbuf_len;
6618 	void		*optvalp_buf;
6619 	int		len;
6620 
6621 	/* All Solaris components should pass a cred for this operation. */
6622 	ASSERT(cr != NULL);
6623 
6624 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6625 	    udp_opt_obj.odb_opt_des_arr,
6626 	    udp_opt_obj.odb_opt_arr_cnt,
6627 	    B_FALSE, B_TRUE, cr);
6628 	if (error != 0) {
6629 		if (error < 0)
6630 			error = proto_tlitosyserr(-error);
6631 		return (error);
6632 	}
6633 
6634 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6635 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
6636 	if (len == -1) {
6637 		kmem_free(optvalp_buf, max_optbuf_len);
6638 		return (EINVAL);
6639 	}
6640 
6641 	/*
6642 	 * update optlen and copy option value
6643 	 */
6644 	t_uscalar_t size = MIN(len, *optlen);
6645 
6646 	bcopy(optvalp_buf, optvalp, size);
6647 	bcopy(&size, optlen, sizeof (size));
6648 
6649 	kmem_free(optvalp_buf, max_optbuf_len);
6650 	return (0);
6651 }
6652 
6653 int
6654 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6655     const void *optvalp, socklen_t optlen, cred_t *cr)
6656 {
6657 	conn_t		*connp = (conn_t *)proto_handle;
6658 	int		error;
6659 
6660 	/* All Solaris components should pass a cred for this operation. */
6661 	ASSERT(cr != NULL);
6662 
6663 	error = proto_opt_check(level, option_name, optlen, NULL,
6664 	    udp_opt_obj.odb_opt_des_arr,
6665 	    udp_opt_obj.odb_opt_arr_cnt,
6666 	    B_TRUE, B_FALSE, cr);
6667 
6668 	if (error != 0) {
6669 		if (error < 0)
6670 			error = proto_tlitosyserr(-error);
6671 		return (error);
6672 	}
6673 
6674 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6675 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6676 	    NULL, cr);
6677 
6678 	ASSERT(error >= 0);
6679 
6680 	return (error);
6681 }
6682 
6683 void
6684 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6685 {
6686 	conn_t	*connp = (conn_t *)proto_handle;
6687 	udp_t	*udp = connp->conn_udp;
6688 
6689 	mutex_enter(&udp->udp_recv_lock);
6690 	connp->conn_flow_cntrld = B_FALSE;
6691 	mutex_exit(&udp->udp_recv_lock);
6692 }
6693 
6694 /* ARGSUSED2 */
6695 int
6696 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6697 {
6698 	conn_t	*connp = (conn_t *)proto_handle;
6699 
6700 	/* All Solaris components should pass a cred for this operation. */
6701 	ASSERT(cr != NULL);
6702 
6703 	/* shut down the send side */
6704 	if (how != SHUT_RD)
6705 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6706 		    SOCK_OPCTL_SHUT_SEND, 0);
6707 	/* shut down the recv side */
6708 	if (how != SHUT_WR)
6709 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6710 		    SOCK_OPCTL_SHUT_RECV, 0);
6711 	return (0);
6712 }
6713 
6714 int
6715 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6716     int mode, int32_t *rvalp, cred_t *cr)
6717 {
6718 	conn_t  	*connp = (conn_t *)proto_handle;
6719 	int		error;
6720 
6721 	/* All Solaris components should pass a cred for this operation. */
6722 	ASSERT(cr != NULL);
6723 
6724 	/*
6725 	 * If we don't have a helper stream then create one.
6726 	 * ip_create_helper_stream takes care of locking the conn_t,
6727 	 * so this check for NULL is just a performance optimization.
6728 	 */
6729 	if (connp->conn_helper_info == NULL) {
6730 		udp_stack_t *us = connp->conn_udp->udp_us;
6731 
6732 		ASSERT(us->us_ldi_ident != NULL);
6733 
6734 		/*
6735 		 * Create a helper stream for non-STREAMS socket.
6736 		 */
6737 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
6738 		if (error != 0) {
6739 			ip0dbg(("tcp_ioctl: create of IP helper stream "
6740 			    "failed %d\n", error));
6741 			return (error);
6742 		}
6743 	}
6744 
6745 	switch (cmd) {
6746 		case _SIOCSOCKFALLBACK:
6747 		case TI_GETPEERNAME:
6748 		case TI_GETMYNAME:
6749 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6750 			    cmd));
6751 			error = EINVAL;
6752 			break;
6753 		default:
6754 			/*
6755 			 * Pass on to IP using helper stream
6756 			 */
6757 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6758 			    cmd, arg, mode, cr, rvalp);
6759 			break;
6760 	}
6761 	return (error);
6762 }
6763 
6764 /* ARGSUSED */
6765 int
6766 udp_accept(sock_lower_handle_t lproto_handle,
6767     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6768     cred_t *cr)
6769 {
6770 	return (EOPNOTSUPP);
6771 }
6772 
6773 /* ARGSUSED */
6774 int
6775 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6776 {
6777 	return (EOPNOTSUPP);
6778 }
6779 
6780 sock_downcalls_t sock_udp_downcalls = {
6781 	udp_activate,		/* sd_activate */
6782 	udp_accept,		/* sd_accept */
6783 	udp_bind,		/* sd_bind */
6784 	udp_listen,		/* sd_listen */
6785 	udp_connect,		/* sd_connect */
6786 	udp_getpeername,	/* sd_getpeername */
6787 	udp_getsockname,	/* sd_getsockname */
6788 	udp_getsockopt,		/* sd_getsockopt */
6789 	udp_setsockopt,		/* sd_setsockopt */
6790 	udp_send,		/* sd_send */
6791 	NULL,			/* sd_send_uio */
6792 	NULL,			/* sd_recv_uio */
6793 	NULL,			/* sd_poll */
6794 	udp_shutdown,		/* sd_shutdown */
6795 	udp_clr_flowctrl,	/* sd_setflowctrl */
6796 	udp_ioctl,		/* sd_ioctl */
6797 	udp_close		/* sd_close */
6798 };
6799