xref: /titanic_50/usr/src/uts/common/inet/udp/udp.c (revision db3659e514c8bf3f03bcca6dd082e363bd7b466a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/pattr.h>
31 #include <sys/stropts.h>
32 #include <sys/strlog.h>
33 #include <sys/strsun.h>
34 #include <sys/time.h>
35 #define	_SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/suntpi.h>
42 #include <sys/xti_inet.h>
43 #include <sys/kmem.h>
44 #include <sys/policy.h>
45 #include <sys/ucred.h>
46 #include <sys/zone.h>
47 
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sockio.h>
51 #include <sys/vtrace.h>
52 #include <sys/sdt.h>
53 #include <sys/debug.h>
54 #include <sys/isa_defs.h>
55 #include <sys/random.h>
56 #include <netinet/in.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 #include <netinet/udp.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 
63 #include <inet/common.h>
64 #include <inet/ip.h>
65 #include <inet/ip_impl.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_ire.h>
68 #include <inet/ip_if.h>
69 #include <inet/ip_multi.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/proto_set.h>
72 #include <inet/mib2.h>
73 #include <inet/nd.h>
74 #include <inet/optcom.h>
75 #include <inet/snmpcom.h>
76 #include <inet/kstatcom.h>
77 #include <inet/udp_impl.h>
78 #include <inet/ipclassifier.h>
79 #include <inet/ipsec_impl.h>
80 #include <inet/ipp_common.h>
81 #include <sys/squeue_impl.h>
82 #include <inet/ipnet.h>
83 #include <sys/ethernet.h>
84 
85 /*
86  * The ipsec_info.h header file is here since it has the definition for the
87  * M_CTL message types used by IP to convey information to the ULP. The
88  * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence.
89  */
90 #include <net/pfkeyv2.h>
91 #include <inet/ipsec_info.h>
92 
93 #include <sys/tsol/label.h>
94 #include <sys/tsol/tnet.h>
95 #include <rpc/pmap_prot.h>
96 
97 /*
98  * Synchronization notes:
99  *
100  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
101  * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock.
102  * We also use conn_lock when updating things that affect the IP classifier
103  * lookup.
104  * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock.
105  *
106  * The fanout lock uf_lock:
107  * When a UDP endpoint is bound to a local port, it is inserted into
108  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
109  * The size of the array is controlled by the udp_bind_fanout_size variable.
110  * This variable can be changed in /etc/system if the default value is
111  * not large enough.  Each bind hash bucket is protected by a per bucket
112  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
113  * structure and a few other fields in the udp_t. A UDP endpoint is removed
114  * from the bind hash list only when it is being unbound or being closed.
115  * The per bucket lock also protects a UDP endpoint's state changes.
116  *
117  * The udp_rwlock:
118  * This protects most of the other fields in the udp_t. The exact list of
119  * fields which are protected by each of the above locks is documented in
120  * the udp_t structure definition.
121  *
122  * Plumbing notes:
123  * UDP is always a device driver. For compatibility with mibopen() code
124  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
125  * dummy module.
126  *
127  * The above implies that we don't support any intermediate module to
128  * reside in between /dev/ip and udp -- in fact, we never supported such
129  * scenario in the past as the inter-layer communication semantics have
130  * always been private.
131  */
132 
133 /* For /etc/system control */
134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
135 
136 #define	NDD_TOO_QUICK_MSG \
137 	"ndd get info rate too high for non-privileged users, try again " \
138 	"later.\n"
139 #define	NDD_OUT_OF_BUF_MSG	"<< Out of buffer >>\n"
140 
141 /* Option processing attrs */
142 typedef struct udpattrs_s {
143 	union {
144 		ip6_pkt_t	*udpattr_ipp6;	/* For V6 */
145 		ip4_pkt_t 	*udpattr_ipp4;	/* For V4 */
146 	} udpattr_ippu;
147 #define	udpattr_ipp6 udpattr_ippu.udpattr_ipp6
148 #define	udpattr_ipp4 udpattr_ippu.udpattr_ipp4
149 	mblk_t		*udpattr_mb;
150 	boolean_t	udpattr_credset;
151 } udpattrs_t;
152 
153 static void	udp_addr_req(queue_t *q, mblk_t *mp);
154 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
155 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
156 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
157 static int	udp_build_hdrs(udp_t *udp);
158 static void	udp_capability_req(queue_t *q, mblk_t *mp);
159 static int	udp_tpi_close(queue_t *q, int flags);
160 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
161 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
162 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
163 		    int sys_error);
164 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
165 		    t_scalar_t tlierr, int unixerr);
166 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
167 		    cred_t *cr);
168 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
169 		    char *value, caddr_t cp, cred_t *cr);
170 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
171 		    char *value, caddr_t cp, cred_t *cr);
172 static void	udp_icmp_error(conn_t *, mblk_t *);
173 static void	udp_icmp_error_ipv6(conn_t *, mblk_t *);
174 static void	udp_info_req(queue_t *q, mblk_t *mp);
175 static void	udp_input(void *, mblk_t *, void *);
176 static mblk_t	*udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
177 		    t_scalar_t addr_length);
178 static void	udp_lrput(queue_t *, mblk_t *);
179 static void	udp_lwput(queue_t *, mblk_t *);
180 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
181 		    cred_t *credp, boolean_t isv6);
182 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
183 		    cred_t *credp);
184 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
185 		    cred_t *credp);
186 static  int	udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
187 		    int *errorp, udpattrs_t *udpattrs);
188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
189 static int	udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
191 static int	udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
192 		    cred_t *cr);
193 static void	udp_report_item(mblk_t *mp, udp_t *udp);
194 static int	udp_rinfop(queue_t *q, infod_t *dp);
195 static int	udp_rrw(queue_t *q, struiod_t *dp);
196 static int	udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
197 		    cred_t *cr);
198 static void	udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp,
199 		    ipha_t *ipha);
200 static void	udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
201 		    t_scalar_t destlen, t_scalar_t err);
202 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
204     boolean_t random);
205 static mblk_t	*udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
206 		    int *, boolean_t, struct nmsghdr *, cred_t *, pid_t);
207 static mblk_t	*udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
208 		    int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid);
209 static void	udp_wput_other(queue_t *q, mblk_t *mp);
210 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
211 static void	udp_wput_fallback(queue_t *q, mblk_t *mp);
212 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
213 
214 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
215 static void	udp_stack_fini(netstackid_t stackid, void *arg);
216 
217 static void	*udp_kstat_init(netstackid_t stackid);
218 static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
219 static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
220 static void	udp_kstat2_fini(netstackid_t, kstat_t *);
221 static int	udp_kstat_update(kstat_t *kp, int rw);
222 
223 static void	udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
224 		    uint_t pkt_len);
225 static void	udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
226 static void	udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
227 
228 static int	udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *,
229 		    cred_t *, pid_t);
230 
231 /* Common routine for TPI and socket module */
232 static conn_t	*udp_do_open(cred_t *, boolean_t, int);
233 static void	udp_do_close(conn_t *);
234 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
235     boolean_t);
236 static int	udp_do_unbind(conn_t *);
237 static int	udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *);
238 static int	udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *);
239 
240 int		udp_getsockname(sock_lower_handle_t,
241     struct sockaddr *, socklen_t *, cred_t *);
242 int		udp_getpeername(sock_lower_handle_t,
243     struct sockaddr *, socklen_t *, cred_t *);
244 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t);
245 static int	udp_post_ip_bind_connect(udp_t *, mblk_t *, int);
246 
247 #define	UDP_RECV_HIWATER	(56 * 1024)
248 #define	UDP_RECV_LOWATER	128
249 #define	UDP_XMIT_HIWATER	(56 * 1024)
250 #define	UDP_XMIT_LOWATER	1024
251 
252 /*
253  * The following is defined in tcp.c
254  */
255 extern int	(*cl_inet_connect2)(netstackid_t stack_id,
256 		    uint8_t protocol, boolean_t is_outgoing,
257 		    sa_family_t addr_family,
258 		    uint8_t *laddrp, in_port_t lport,
259 		    uint8_t *faddrp, in_port_t fport, void *args);
260 
261 /*
262  * Checks if the given destination addr/port is allowed out.
263  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
264  * Called for each connect() and for sendto()/sendmsg() to a different
265  * destination.
266  * For connect(), called in udp_connect().
267  * For sendto()/sendmsg(), called in udp_output_v{4,6}().
268  *
269  * This macro assumes that the cl_inet_connect2 hook is not NULL.
270  * Please check this before calling this macro.
271  *
272  * void
273  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
274  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
275  */
276 #define	CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) {	\
277 	(err) = 0;							\
278 	/*								\
279 	 * Running in cluster mode - check and register active		\
280 	 * "connection" information					\
281 	 */								\
282 	if ((udp)->udp_ipversion == IPV4_VERSION)			\
283 		(err) = (*cl_inet_connect2)(				\
284 		    (cp)->conn_netstack->netstack_stackid,		\
285 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
286 		    (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]),	\
287 		    (udp)->udp_port,					\
288 		    (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]),		\
289 		    (in_port_t)(fport), NULL);				\
290 	else								\
291 		(err) = (*cl_inet_connect2)(				\
292 		    (cp)->conn_netstack->netstack_stackid,		\
293 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
294 		    (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port,	\
295 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
296 }
297 
298 static struct module_info udp_mod_info =  {
299 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
300 };
301 
302 /*
303  * Entry points for UDP as a device.
304  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
305  */
306 static struct qinit udp_rinitv4 = {
307 	NULL, NULL, udp_openv4, udp_tpi_close, NULL,
308 	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
309 };
310 
311 static struct qinit udp_rinitv6 = {
312 	NULL, NULL, udp_openv6, udp_tpi_close, NULL,
313 	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
314 };
315 
316 static struct qinit udp_winit = {
317 	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL,
318 	&udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE
319 };
320 
321 /* UDP entry point during fallback */
322 struct qinit udp_fallback_sock_winit = {
323 	(pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
324 };
325 
326 /*
327  * UDP needs to handle I_LINK and I_PLINK since ifconfig
328  * likes to use it as a place to hang the various streams.
329  */
330 static struct qinit udp_lrinit = {
331 	(pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL,
332 	&udp_mod_info
333 };
334 
335 static struct qinit udp_lwinit = {
336 	(pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL,
337 	&udp_mod_info
338 };
339 
340 /* For AF_INET aka /dev/udp */
341 struct streamtab udpinfov4 = {
342 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
343 };
344 
345 /* For AF_INET6 aka /dev/udp6 */
346 struct streamtab udpinfov6 = {
347 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
348 };
349 
350 static	sin_t	sin_null;	/* Zero address for quick clears */
351 static	sin6_t	sin6_null;	/* Zero address for quick clears */
352 
353 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
354 
355 /* Default structure copied into T_INFO_ACK messages */
356 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
357 	T_INFO_ACK,
358 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
359 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
360 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
361 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
362 	sizeof (sin_t),	/* ADDR_size. */
363 	0,		/* OPT_size - not initialized here */
364 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
365 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
366 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
367 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
368 };
369 
370 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
371 
372 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
373 	T_INFO_ACK,
374 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
375 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
376 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
377 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
378 	sizeof (sin6_t), /* ADDR_size. */
379 	0,		/* OPT_size - not initialized here */
380 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
381 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
382 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
383 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
384 };
385 
386 /* largest UDP port number */
387 #define	UDP_MAX_PORT	65535
388 
389 /*
390  * Table of ND variables supported by udp.  These are loaded into us_nd
391  * in udp_open.
392  * All of these are alterable, within the min/max values given, at run time.
393  */
394 /* BEGIN CSTYLED */
395 udpparam_t udp_param_arr[] = {
396  /*min		max		value		name */
397  { 0L,		256,		32,		"udp_wroff_extra" },
398  { 1L,		255,		255,		"udp_ipv4_ttl" },
399  { 0,		IPV6_MAX_HOPS,	IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"},
400  { 1024,	(32 * 1024),	1024,		"udp_smallest_nonpriv_port" },
401  { 0,		1,		1,		"udp_do_checksum" },
402  { 1024,	UDP_MAX_PORT,	(32 * 1024),	"udp_smallest_anon_port" },
403  { 1024,	UDP_MAX_PORT,	UDP_MAX_PORT,	"udp_largest_anon_port" },
404  { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER,	"udp_xmit_hiwat"},
405  { 0,		     (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
406  { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER,	"udp_recv_hiwat"},
407  { 65536,	(1<<30),	2*1024*1024,	"udp_max_buf"},
408  { 100,		60000,		1000,		"udp_ndd_get_info_interval"},
409 };
410 /* END CSTYLED */
411 
412 /* Setable in /etc/system */
413 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
414 uint32_t udp_random_anon_port = 1;
415 
416 /*
417  * Hook functions to enable cluster networking.
418  * On non-clustered systems these vectors must always be NULL
419  */
420 
421 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
422     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
423     void *args) = NULL;
424 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
425     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
426     void *args) = NULL;
427 
428 typedef union T_primitives *t_primp_t;
429 
430 /*
431  * Return the next anonymous port in the privileged port range for
432  * bind checking.
433  *
434  * Trusted Extension (TX) notes: TX allows administrator to mark or
435  * reserve ports as Multilevel ports (MLP). MLP has special function
436  * on TX systems. Once a port is made MLP, it's not available as
437  * ordinary port. This creates "holes" in the port name space. It
438  * may be necessary to skip the "holes" find a suitable anon port.
439  */
440 static in_port_t
441 udp_get_next_priv_port(udp_t *udp)
442 {
443 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
444 	in_port_t nextport;
445 	boolean_t restart = B_FALSE;
446 	udp_stack_t *us = udp->udp_us;
447 
448 retry:
449 	if (next_priv_port < us->us_min_anonpriv_port ||
450 	    next_priv_port >= IPPORT_RESERVED) {
451 		next_priv_port = IPPORT_RESERVED - 1;
452 		if (restart)
453 			return (0);
454 		restart = B_TRUE;
455 	}
456 
457 	if (is_system_labeled() &&
458 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
459 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
460 		next_priv_port = nextport;
461 		goto retry;
462 	}
463 
464 	return (next_priv_port--);
465 }
466 
467 /* UDP bind hash report triggered via the Named Dispatch mechanism. */
468 /* ARGSUSED */
469 static int
470 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
471 {
472 	udp_fanout_t	*udpf;
473 	int		i;
474 	zoneid_t	zoneid;
475 	conn_t		*connp;
476 	udp_t		*udp;
477 	udp_stack_t	*us;
478 
479 	connp = Q_TO_CONN(q);
480 	udp = connp->conn_udp;
481 	us = udp->udp_us;
482 
483 	/* Refer to comments in udp_status_report(). */
484 	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
485 		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
486 		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
487 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
488 			return (0);
489 		}
490 	}
491 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
492 		/* The following may work even if we cannot get a large buf. */
493 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
494 		return (0);
495 	}
496 
497 	(void) mi_mpprintf(mp,
498 	    "UDP     " MI_COL_HDRPAD_STR
499 	/*   12345678[89ABCDEF] */
500 	    " zone lport src addr        dest addr       port  state");
501 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
502 
503 	zoneid = connp->conn_zoneid;
504 
505 	for (i = 0; i < us->us_bind_fanout_size; i++) {
506 		udpf = &us->us_bind_fanout[i];
507 		mutex_enter(&udpf->uf_lock);
508 
509 		/* Print the hash index. */
510 		udp = udpf->uf_udp;
511 		if (zoneid != GLOBAL_ZONEID) {
512 			/* skip to first entry in this zone; might be none */
513 			while (udp != NULL &&
514 			    udp->udp_connp->conn_zoneid != zoneid)
515 				udp = udp->udp_bind_hash;
516 		}
517 		if (udp != NULL) {
518 			uint_t print_len, buf_len;
519 
520 			buf_len = mp->b_cont->b_datap->db_lim -
521 			    mp->b_cont->b_wptr;
522 			print_len = snprintf((char *)mp->b_cont->b_wptr,
523 			    buf_len, "%d\n", i);
524 			if (print_len < buf_len) {
525 				mp->b_cont->b_wptr += print_len;
526 			} else {
527 				mp->b_cont->b_wptr += buf_len;
528 			}
529 			for (; udp != NULL; udp = udp->udp_bind_hash) {
530 				if (zoneid == GLOBAL_ZONEID ||
531 				    zoneid == udp->udp_connp->conn_zoneid)
532 					udp_report_item(mp->b_cont, udp);
533 			}
534 		}
535 		mutex_exit(&udpf->uf_lock);
536 	}
537 	us->us_last_ndd_get_info_time = ddi_get_lbolt();
538 	return (0);
539 }
540 
541 /*
542  * Hash list removal routine for udp_t structures.
543  */
544 static void
545 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
546 {
547 	udp_t	*udpnext;
548 	kmutex_t *lockp;
549 	udp_stack_t *us = udp->udp_us;
550 
551 	if (udp->udp_ptpbhn == NULL)
552 		return;
553 
554 	/*
555 	 * Extract the lock pointer in case there are concurrent
556 	 * hash_remove's for this instance.
557 	 */
558 	ASSERT(udp->udp_port != 0);
559 	if (!caller_holds_lock) {
560 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
561 		    us->us_bind_fanout_size)].uf_lock;
562 		ASSERT(lockp != NULL);
563 		mutex_enter(lockp);
564 	}
565 	if (udp->udp_ptpbhn != NULL) {
566 		udpnext = udp->udp_bind_hash;
567 		if (udpnext != NULL) {
568 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
569 			udp->udp_bind_hash = NULL;
570 		}
571 		*udp->udp_ptpbhn = udpnext;
572 		udp->udp_ptpbhn = NULL;
573 	}
574 	if (!caller_holds_lock) {
575 		mutex_exit(lockp);
576 	}
577 }
578 
579 static void
580 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
581 {
582 	udp_t	**udpp;
583 	udp_t	*udpnext;
584 
585 	ASSERT(MUTEX_HELD(&uf->uf_lock));
586 	ASSERT(udp->udp_ptpbhn == NULL);
587 	udpp = &uf->uf_udp;
588 	udpnext = udpp[0];
589 	if (udpnext != NULL) {
590 		/*
591 		 * If the new udp bound to the INADDR_ANY address
592 		 * and the first one in the list is not bound to
593 		 * INADDR_ANY we skip all entries until we find the
594 		 * first one bound to INADDR_ANY.
595 		 * This makes sure that applications binding to a
596 		 * specific address get preference over those binding to
597 		 * INADDR_ANY.
598 		 */
599 		if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) &&
600 		    !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) {
601 			while ((udpnext = udpp[0]) != NULL &&
602 			    !V6_OR_V4_INADDR_ANY(
603 			    udpnext->udp_bound_v6src)) {
604 				udpp = &(udpnext->udp_bind_hash);
605 			}
606 			if (udpnext != NULL)
607 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
608 		} else {
609 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
610 		}
611 	}
612 	udp->udp_bind_hash = udpnext;
613 	udp->udp_ptpbhn = udpp;
614 	udpp[0] = udp;
615 }
616 
617 /*
618  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
619  * passed to udp_wput.
620  * It associates a port number and local address with the stream.
621  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP
622  * protocol type (IPPROTO_UDP) placed in the message following the address.
623  * A T_BIND_ACK message is passed upstream when ip acknowledges the request.
624  * (Called as writer.)
625  *
626  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
627  * without setting SO_REUSEADDR. This is needed so that they
628  * can be viewed as two independent transport protocols.
629  * However, anonymouns ports are allocated from the same range to avoid
630  * duplicating the us->us_next_port_to_try.
631  */
632 static void
633 udp_tpi_bind(queue_t *q, mblk_t *mp)
634 {
635 	sin_t		*sin;
636 	sin6_t		*sin6;
637 	mblk_t		*mp1;
638 	struct T_bind_req *tbr;
639 	conn_t		*connp;
640 	udp_t		*udp;
641 	int		error;
642 	struct sockaddr	*sa;
643 
644 	connp = Q_TO_CONN(q);
645 	udp = connp->conn_udp;
646 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
647 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
648 		    "udp_bind: bad req, len %u",
649 		    (uint_t)(mp->b_wptr - mp->b_rptr));
650 		udp_err_ack(q, mp, TPROTO, 0);
651 		return;
652 	}
653 	if (udp->udp_state != TS_UNBND) {
654 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
655 		    "udp_bind: bad state, %u", udp->udp_state);
656 		udp_err_ack(q, mp, TOUTSTATE, 0);
657 		return;
658 	}
659 	/*
660 	 * Reallocate the message to make sure we have enough room for an
661 	 * address and the protocol type.
662 	 */
663 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
664 	if (!mp1) {
665 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
666 		return;
667 	}
668 
669 	mp = mp1;
670 
671 	/* Reset the message type in preparation for shipping it back. */
672 	DB_TYPE(mp) = M_PCPROTO;
673 
674 	tbr = (struct T_bind_req *)mp->b_rptr;
675 	switch (tbr->ADDR_length) {
676 	case 0:			/* Request for a generic port */
677 		tbr->ADDR_offset = sizeof (struct T_bind_req);
678 		if (udp->udp_family == AF_INET) {
679 			tbr->ADDR_length = sizeof (sin_t);
680 			sin = (sin_t *)&tbr[1];
681 			*sin = sin_null;
682 			sin->sin_family = AF_INET;
683 			mp->b_wptr = (uchar_t *)&sin[1];
684 			sa = (struct sockaddr *)sin;
685 		} else {
686 			ASSERT(udp->udp_family == AF_INET6);
687 			tbr->ADDR_length = sizeof (sin6_t);
688 			sin6 = (sin6_t *)&tbr[1];
689 			*sin6 = sin6_null;
690 			sin6->sin6_family = AF_INET6;
691 			mp->b_wptr = (uchar_t *)&sin6[1];
692 			sa = (struct sockaddr *)sin6;
693 		}
694 		break;
695 
696 	case sizeof (sin_t):	/* Complete IPv4 address */
697 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
698 		    sizeof (sin_t));
699 		if (sa == NULL || !OK_32PTR((char *)sa)) {
700 			udp_err_ack(q, mp, TSYSERR, EINVAL);
701 			return;
702 		}
703 		if (udp->udp_family != AF_INET ||
704 		    sa->sa_family != AF_INET) {
705 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
706 			return;
707 		}
708 		break;
709 
710 	case sizeof (sin6_t):	/* complete IPv6 address */
711 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
712 		    sizeof (sin6_t));
713 		if (sa == NULL || !OK_32PTR((char *)sa)) {
714 			udp_err_ack(q, mp, TSYSERR, EINVAL);
715 			return;
716 		}
717 		if (udp->udp_family != AF_INET6 ||
718 		    sa->sa_family != AF_INET6) {
719 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
720 			return;
721 		}
722 		break;
723 
724 	default:		/* Invalid request */
725 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
726 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
727 		udp_err_ack(q, mp, TBADADDR, 0);
728 		return;
729 	}
730 
731 
732 	cred_t *cr = DB_CREDDEF(mp, connp->conn_cred);
733 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
734 	    tbr->PRIM_type != O_T_BIND_REQ);
735 
736 	if (error != 0) {
737 		if (error > 0) {
738 			udp_err_ack(q, mp, TSYSERR, error);
739 		} else {
740 			udp_err_ack(q, mp, -error, 0);
741 		}
742 	} else {
743 		tbr->PRIM_type = T_BIND_ACK;
744 		qreply(q, mp);
745 	}
746 }
747 
748 /*
749  * This routine handles each T_CONN_REQ message passed to udp.  It
750  * associates a default destination address with the stream.
751  *
752  * This routine sends down a T_BIND_REQ to IP with the following mblks:
753  *	T_BIND_REQ	- specifying local and remote address/port
754  *	IRE_DB_REQ_TYPE	- to get an IRE back containing ire_type and src
755  *	T_OK_ACK	- for the T_CONN_REQ
756  *	T_CONN_CON	- to keep the TPI user happy
757  *
758  * The connect completes in udp_do_connect.
759  * When a T_BIND_ACK is received information is extracted from the IRE
760  * and the two appended messages are sent to the TPI user.
761  * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
762  * convert it to an error ack for the appropriate primitive.
763  */
764 static void
765 udp_tpi_connect(queue_t *q, mblk_t *mp)
766 {
767 	mblk_t	*mp1;
768 	udp_t	*udp;
769 	conn_t	*connp = Q_TO_CONN(q);
770 	int	error;
771 	socklen_t	len;
772 	struct sockaddr		*sa;
773 	struct T_conn_req	*tcr;
774 
775 	udp = connp->conn_udp;
776 	tcr = (struct T_conn_req *)mp->b_rptr;
777 
778 	/* A bit of sanity checking */
779 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
780 		udp_err_ack(q, mp, TPROTO, 0);
781 		return;
782 	}
783 
784 	if (tcr->OPT_length != 0) {
785 		udp_err_ack(q, mp, TBADOPT, 0);
786 		return;
787 	}
788 
789 	/*
790 	 * Determine packet type based on type of address passed in
791 	 * the request should contain an IPv4 or IPv6 address.
792 	 * Make sure that address family matches the type of
793 	 * family of the the address passed down
794 	 */
795 	len = tcr->DEST_length;
796 	switch (tcr->DEST_length) {
797 	default:
798 		udp_err_ack(q, mp, TBADADDR, 0);
799 		return;
800 
801 	case sizeof (sin_t):
802 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
803 		    sizeof (sin_t));
804 		break;
805 
806 	case sizeof (sin6_t):
807 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
808 		    sizeof (sin6_t));
809 		break;
810 	}
811 
812 	error = proto_verify_ip_addr(udp->udp_family, sa, len);
813 	if (error != 0) {
814 		udp_err_ack(q, mp, TSYSERR, error);
815 		return;
816 	}
817 
818 	/*
819 	 * We have to send a connection confirmation to
820 	 * keep TLI happy.
821 	 */
822 	if (udp->udp_family == AF_INET) {
823 		mp1 = mi_tpi_conn_con(NULL, (char *)sa,
824 		    sizeof (sin_t), NULL, 0);
825 	} else {
826 		mp1 = mi_tpi_conn_con(NULL, (char *)sa,
827 		    sizeof (sin6_t), NULL, 0);
828 	}
829 	if (mp1 == NULL) {
830 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
831 		return;
832 	}
833 
834 	/*
835 	 * ok_ack for T_CONN_REQ
836 	 */
837 	mp = mi_tpi_ok_ack_alloc(mp);
838 	if (mp == NULL) {
839 		/* Unable to reuse the T_CONN_REQ for the ack. */
840 		freemsg(mp1);
841 		udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
842 		return;
843 	}
844 
845 	error = udp_do_connect(connp, sa, len);
846 	if (error != 0) {
847 		freeb(mp1);
848 		if (error < 0)
849 			udp_err_ack(q, mp, -error, 0);
850 		else
851 			udp_err_ack(q, mp, TSYSERR, error);
852 	} else {
853 		putnext(connp->conn_rq, mp);
854 		putnext(connp->conn_rq, mp1);
855 	}
856 }
857 
858 static int
859 udp_tpi_close(queue_t *q, int flags)
860 {
861 	conn_t	*connp;
862 
863 	if (flags & SO_FALLBACK) {
864 		/*
865 		 * stream is being closed while in fallback
866 		 * simply free the resources that were allocated
867 		 */
868 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
869 		qprocsoff(q);
870 		goto done;
871 	}
872 
873 	connp = Q_TO_CONN(q);
874 	udp_do_close(connp);
875 done:
876 	q->q_ptr = WR(q)->q_ptr = NULL;
877 	return (0);
878 }
879 
880 /*
881  * Called in the close path to quiesce the conn
882  */
883 void
884 udp_quiesce_conn(conn_t *connp)
885 {
886 	udp_t	*udp = connp->conn_udp;
887 
888 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
889 		/*
890 		 * Running in cluster mode - register unbind information
891 		 */
892 		if (udp->udp_ipversion == IPV4_VERSION) {
893 			(*cl_inet_unbind)(
894 			    connp->conn_netstack->netstack_stackid,
895 			    IPPROTO_UDP, AF_INET,
896 			    (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))),
897 			    (in_port_t)udp->udp_port, NULL);
898 		} else {
899 			(*cl_inet_unbind)(
900 			    connp->conn_netstack->netstack_stackid,
901 			    IPPROTO_UDP, AF_INET6,
902 			    (uint8_t *)(&(udp->udp_v6src)),
903 			    (in_port_t)udp->udp_port, NULL);
904 		}
905 	}
906 
907 	udp_bind_hash_remove(udp, B_FALSE);
908 
909 }
910 
911 void
912 udp_close_free(conn_t *connp)
913 {
914 	udp_t *udp = connp->conn_udp;
915 
916 	/* If there are any options associated with the stream, free them. */
917 	if (udp->udp_ip_snd_options != NULL) {
918 		mi_free((char *)udp->udp_ip_snd_options);
919 		udp->udp_ip_snd_options = NULL;
920 		udp->udp_ip_snd_options_len = 0;
921 	}
922 
923 	if (udp->udp_ip_rcv_options != NULL) {
924 		mi_free((char *)udp->udp_ip_rcv_options);
925 		udp->udp_ip_rcv_options = NULL;
926 		udp->udp_ip_rcv_options_len = 0;
927 	}
928 
929 	/* Free memory associated with sticky options */
930 	if (udp->udp_sticky_hdrs_len != 0) {
931 		kmem_free(udp->udp_sticky_hdrs,
932 		    udp->udp_sticky_hdrs_len);
933 		udp->udp_sticky_hdrs = NULL;
934 		udp->udp_sticky_hdrs_len = 0;
935 	}
936 
937 	ip6_pkt_free(&udp->udp_sticky_ipp);
938 
939 	/*
940 	 * Clear any fields which the kmem_cache constructor clears.
941 	 * Only udp_connp needs to be preserved.
942 	 * TBD: We should make this more efficient to avoid clearing
943 	 * everything.
944 	 */
945 	ASSERT(udp->udp_connp == connp);
946 	bzero(udp, sizeof (udp_t));
947 	udp->udp_connp = connp;
948 }
949 
950 static int
951 udp_do_disconnect(conn_t *connp)
952 {
953 	udp_t	*udp;
954 	mblk_t	*ire_mp;
955 	udp_fanout_t *udpf;
956 	udp_stack_t *us;
957 	int	error;
958 
959 	udp = connp->conn_udp;
960 	us = udp->udp_us;
961 	rw_enter(&udp->udp_rwlock, RW_WRITER);
962 	if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) {
963 		rw_exit(&udp->udp_rwlock);
964 		return (-TOUTSTATE);
965 	}
966 	udp->udp_pending_op = T_DISCON_REQ;
967 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
968 	    us->us_bind_fanout_size)];
969 	mutex_enter(&udpf->uf_lock);
970 	udp->udp_v6src = udp->udp_bound_v6src;
971 	udp->udp_state = TS_IDLE;
972 	mutex_exit(&udpf->uf_lock);
973 
974 	if (udp->udp_family == AF_INET6) {
975 		/* Rebuild the header template */
976 		error = udp_build_hdrs(udp);
977 		if (error != 0) {
978 			udp->udp_pending_op = -1;
979 			rw_exit(&udp->udp_rwlock);
980 			return (error);
981 		}
982 	}
983 
984 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
985 	if (ire_mp == NULL) {
986 		mutex_enter(&udpf->uf_lock);
987 		udp->udp_pending_op = -1;
988 		mutex_exit(&udpf->uf_lock);
989 		rw_exit(&udp->udp_rwlock);
990 		return (ENOMEM);
991 	}
992 
993 	rw_exit(&udp->udp_rwlock);
994 
995 	if (udp->udp_family == AF_INET6) {
996 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP,
997 		    &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
998 	} else {
999 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP,
1000 		    V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE);
1001 	}
1002 
1003 	return (udp_post_ip_bind_connect(udp, ire_mp, error));
1004 }
1005 
1006 
1007 static void
1008 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
1009 {
1010 	conn_t	*connp = Q_TO_CONN(q);
1011 	int	error;
1012 
1013 	/*
1014 	 * Allocate the largest primitive we need to send back
1015 	 * T_error_ack is > than T_ok_ack
1016 	 */
1017 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
1018 	if (mp == NULL) {
1019 		/* Unable to reuse the T_DISCON_REQ for the ack. */
1020 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
1021 		return;
1022 	}
1023 
1024 	error = udp_do_disconnect(connp);
1025 
1026 	if (error != 0) {
1027 		if (error < 0) {
1028 			udp_err_ack(q, mp, -error, 0);
1029 		} else {
1030 			udp_err_ack(q, mp, TSYSERR, error);
1031 		}
1032 	} else {
1033 		mp = mi_tpi_ok_ack_alloc(mp);
1034 		ASSERT(mp != NULL);
1035 		qreply(q, mp);
1036 	}
1037 }
1038 
1039 int
1040 udp_disconnect(conn_t *connp)
1041 {
1042 	int error;
1043 	udp_t *udp = connp->conn_udp;
1044 
1045 	udp->udp_dgram_errind = B_FALSE;
1046 
1047 	error = udp_do_disconnect(connp);
1048 
1049 	if (error < 0)
1050 		error = proto_tlitosyserr(-error);
1051 
1052 	return (error);
1053 }
1054 
1055 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
1056 static void
1057 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
1058 {
1059 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1060 		qreply(q, mp);
1061 }
1062 
1063 /* Shorthand to generate and send TPI error acks to our client */
1064 static void
1065 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
1066     int sys_error)
1067 {
1068 	struct T_error_ack	*teackp;
1069 
1070 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1071 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
1072 		teackp = (struct T_error_ack *)mp->b_rptr;
1073 		teackp->ERROR_prim = primitive;
1074 		teackp->TLI_error = t_error;
1075 		teackp->UNIX_error = sys_error;
1076 		qreply(q, mp);
1077 	}
1078 }
1079 
1080 /*ARGSUSED*/
1081 static int
1082 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
1083 {
1084 	int i;
1085 	udp_t		*udp = Q_TO_UDP(q);
1086 	udp_stack_t *us = udp->udp_us;
1087 
1088 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1089 		if (us->us_epriv_ports[i] != 0)
1090 			(void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]);
1091 	}
1092 	return (0);
1093 }
1094 
1095 /* ARGSUSED */
1096 static int
1097 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1098     cred_t *cr)
1099 {
1100 	long	new_value;
1101 	int	i;
1102 	udp_t		*udp = Q_TO_UDP(q);
1103 	udp_stack_t *us = udp->udp_us;
1104 
1105 	/*
1106 	 * Fail the request if the new value does not lie within the
1107 	 * port number limits.
1108 	 */
1109 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
1110 	    new_value <= 0 || new_value >= 65536) {
1111 		return (EINVAL);
1112 	}
1113 
1114 	/* Check if the value is already in the list */
1115 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1116 		if (new_value == us->us_epriv_ports[i]) {
1117 			return (EEXIST);
1118 		}
1119 	}
1120 	/* Find an empty slot */
1121 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1122 		if (us->us_epriv_ports[i] == 0)
1123 			break;
1124 	}
1125 	if (i == us->us_num_epriv_ports) {
1126 		return (EOVERFLOW);
1127 	}
1128 
1129 	/* Set the new value */
1130 	us->us_epriv_ports[i] = (in_port_t)new_value;
1131 	return (0);
1132 }
1133 
1134 /* ARGSUSED */
1135 static int
1136 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1137     cred_t *cr)
1138 {
1139 	long	new_value;
1140 	int	i;
1141 	udp_t		*udp = Q_TO_UDP(q);
1142 	udp_stack_t *us = udp->udp_us;
1143 
1144 	/*
1145 	 * Fail the request if the new value does not lie within the
1146 	 * port number limits.
1147 	 */
1148 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
1149 	    new_value <= 0 || new_value >= 65536) {
1150 		return (EINVAL);
1151 	}
1152 
1153 	/* Check that the value is already in the list */
1154 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1155 		if (us->us_epriv_ports[i] == new_value)
1156 			break;
1157 	}
1158 	if (i == us->us_num_epriv_ports) {
1159 		return (ESRCH);
1160 	}
1161 
1162 	/* Clear the value */
1163 	us->us_epriv_ports[i] = 0;
1164 	return (0);
1165 }
1166 
1167 /* At minimum we need 4 bytes of UDP header */
1168 #define	ICMP_MIN_UDP_HDR	4
1169 
1170 /*
1171  * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP.
1172  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1173  * Assumes that IP has pulled up everything up to and including the ICMP header.
1174  */
1175 static void
1176 udp_icmp_error(conn_t *connp, mblk_t *mp)
1177 			    {
1178 	icmph_t *icmph;
1179 	ipha_t	*ipha;
1180 	int	iph_hdr_length;
1181 	udpha_t	*udpha;
1182 	sin_t	sin;
1183 	sin6_t	sin6;
1184 	mblk_t	*mp1;
1185 	int	error = 0;
1186 	udp_t	*udp = connp->conn_udp;
1187 
1188 	mp1 = NULL;
1189 	ipha = (ipha_t *)mp->b_rptr;
1190 
1191 	ASSERT(OK_32PTR(mp->b_rptr));
1192 
1193 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1194 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
1195 		udp_icmp_error_ipv6(connp, mp);
1196 		return;
1197 	}
1198 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
1199 
1200 	/* Skip past the outer IP and ICMP headers */
1201 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1202 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1203 	ipha = (ipha_t *)&icmph[1];
1204 
1205 	/* Skip past the inner IP and find the ULP header */
1206 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1207 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1208 
1209 	switch (icmph->icmph_type) {
1210 	case ICMP_DEST_UNREACHABLE:
1211 		switch (icmph->icmph_code) {
1212 		case ICMP_FRAGMENTATION_NEEDED:
1213 			/*
1214 			 * IP has already adjusted the path MTU.
1215 			 */
1216 			break;
1217 		case ICMP_PORT_UNREACHABLE:
1218 		case ICMP_PROTOCOL_UNREACHABLE:
1219 			error = ECONNREFUSED;
1220 			break;
1221 		default:
1222 			/* Transient errors */
1223 			break;
1224 		}
1225 		break;
1226 	default:
1227 		/* Transient errors */
1228 		break;
1229 	}
1230 	if (error == 0) {
1231 		freemsg(mp);
1232 		return;
1233 	}
1234 
1235 	/*
1236 	 * Deliver T_UDERROR_IND when the application has asked for it.
1237 	 * The socket layer enables this automatically when connected.
1238 	 */
1239 	if (!udp->udp_dgram_errind) {
1240 		freemsg(mp);
1241 		return;
1242 	}
1243 
1244 
1245 	switch (udp->udp_family) {
1246 	case AF_INET:
1247 		sin = sin_null;
1248 		sin.sin_family = AF_INET;
1249 		sin.sin_addr.s_addr = ipha->ipha_dst;
1250 		sin.sin_port = udpha->uha_dst_port;
1251 		if (IPCL_IS_NONSTR(connp)) {
1252 			rw_enter(&udp->udp_rwlock, RW_WRITER);
1253 			if (udp->udp_state == TS_DATA_XFER) {
1254 				if (sin.sin_port == udp->udp_dstport &&
1255 				    sin.sin_addr.s_addr ==
1256 				    V4_PART_OF_V6(udp->udp_v6dst)) {
1257 
1258 					rw_exit(&udp->udp_rwlock);
1259 					(*connp->conn_upcalls->su_set_error)
1260 					    (connp->conn_upper_handle, error);
1261 					goto done;
1262 				}
1263 			} else {
1264 				udp->udp_delayed_error = error;
1265 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1266 			}
1267 			rw_exit(&udp->udp_rwlock);
1268 		} else {
1269 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1270 			    NULL, 0, error);
1271 		}
1272 		break;
1273 	case AF_INET6:
1274 		sin6 = sin6_null;
1275 		sin6.sin6_family = AF_INET6;
1276 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1277 		sin6.sin6_port = udpha->uha_dst_port;
1278 		if (IPCL_IS_NONSTR(connp)) {
1279 			rw_enter(&udp->udp_rwlock, RW_WRITER);
1280 			if (udp->udp_state == TS_DATA_XFER) {
1281 				if (sin6.sin6_port == udp->udp_dstport &&
1282 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1283 				    &udp->udp_v6dst)) {
1284 					rw_exit(&udp->udp_rwlock);
1285 					(*connp->conn_upcalls->su_set_error)
1286 					    (connp->conn_upper_handle, error);
1287 					goto done;
1288 				}
1289 			} else {
1290 				udp->udp_delayed_error = error;
1291 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1292 			}
1293 			rw_exit(&udp->udp_rwlock);
1294 		} else {
1295 
1296 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1297 			    NULL, 0, error);
1298 		}
1299 		break;
1300 	}
1301 	if (mp1 != NULL)
1302 		putnext(connp->conn_rq, mp1);
1303 done:
1304 	freemsg(mp);
1305 }
1306 
1307 /*
1308  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1309  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1310  * Assumes that IP has pulled up all the extension headers as well as the
1311  * ICMPv6 header.
1312  */
1313 static void
1314 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
1315 {
1316 	icmp6_t		*icmp6;
1317 	ip6_t		*ip6h, *outer_ip6h;
1318 	uint16_t	iph_hdr_length;
1319 	uint8_t		*nexthdrp;
1320 	udpha_t		*udpha;
1321 	sin6_t		sin6;
1322 	mblk_t		*mp1;
1323 	int		error = 0;
1324 	udp_t		*udp = connp->conn_udp;
1325 	udp_stack_t	*us = udp->udp_us;
1326 
1327 	outer_ip6h = (ip6_t *)mp->b_rptr;
1328 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1329 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1330 	else
1331 		iph_hdr_length = IPV6_HDR_LEN;
1332 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1333 	ip6h = (ip6_t *)&icmp6[1];
1334 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1335 		freemsg(mp);
1336 		return;
1337 	}
1338 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1339 
1340 	switch (icmp6->icmp6_type) {
1341 	case ICMP6_DST_UNREACH:
1342 		switch (icmp6->icmp6_code) {
1343 		case ICMP6_DST_UNREACH_NOPORT:
1344 			error = ECONNREFUSED;
1345 			break;
1346 		case ICMP6_DST_UNREACH_ADMIN:
1347 		case ICMP6_DST_UNREACH_NOROUTE:
1348 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1349 		case ICMP6_DST_UNREACH_ADDR:
1350 			/* Transient errors */
1351 			break;
1352 		default:
1353 			break;
1354 		}
1355 		break;
1356 	case ICMP6_PACKET_TOO_BIG: {
1357 		struct T_unitdata_ind	*tudi;
1358 		struct T_opthdr		*toh;
1359 		size_t			udi_size;
1360 		mblk_t			*newmp;
1361 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1362 		    sizeof (struct ip6_mtuinfo);
1363 		sin6_t			*sin6;
1364 		struct ip6_mtuinfo	*mtuinfo;
1365 
1366 		/*
1367 		 * If the application has requested to receive path mtu
1368 		 * information, send up an empty message containing an
1369 		 * IPV6_PATHMTU ancillary data item.
1370 		 */
1371 		if (!udp->udp_ipv6_recvpathmtu)
1372 			break;
1373 
1374 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1375 		    opt_length;
1376 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1377 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
1378 			break;
1379 		}
1380 
1381 		/*
1382 		 * newmp->b_cont is left to NULL on purpose.  This is an
1383 		 * empty message containing only ancillary data.
1384 		 */
1385 		newmp->b_datap->db_type = M_PROTO;
1386 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1387 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1388 		tudi->PRIM_type = T_UNITDATA_IND;
1389 		tudi->SRC_length = sizeof (sin6_t);
1390 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1391 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1392 		tudi->OPT_length = opt_length;
1393 
1394 		sin6 = (sin6_t *)&tudi[1];
1395 		bzero(sin6, sizeof (sin6_t));
1396 		sin6->sin6_family = AF_INET6;
1397 		sin6->sin6_addr = udp->udp_v6dst;
1398 
1399 		toh = (struct T_opthdr *)&sin6[1];
1400 		toh->level = IPPROTO_IPV6;
1401 		toh->name = IPV6_PATHMTU;
1402 		toh->len = opt_length;
1403 		toh->status = 0;
1404 
1405 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1406 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1407 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1408 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1409 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1410 		/*
1411 		 * We've consumed everything we need from the original
1412 		 * message.  Free it, then send our empty message.
1413 		 */
1414 		freemsg(mp);
1415 		if (!IPCL_IS_NONSTR(connp)) {
1416 			putnext(connp->conn_rq, newmp);
1417 		} else {
1418 			(*connp->conn_upcalls->su_recv)
1419 			    (connp->conn_upper_handle, newmp, 0, 0, &error,
1420 			    NULL);
1421 		}
1422 		return;
1423 	}
1424 	case ICMP6_TIME_EXCEEDED:
1425 		/* Transient errors */
1426 		break;
1427 	case ICMP6_PARAM_PROB:
1428 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1429 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1430 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1431 		    (uchar_t *)nexthdrp) {
1432 			error = ECONNREFUSED;
1433 			break;
1434 		}
1435 		break;
1436 	}
1437 	if (error == 0) {
1438 		freemsg(mp);
1439 		return;
1440 	}
1441 
1442 	/*
1443 	 * Deliver T_UDERROR_IND when the application has asked for it.
1444 	 * The socket layer enables this automatically when connected.
1445 	 */
1446 	if (!udp->udp_dgram_errind) {
1447 		freemsg(mp);
1448 		return;
1449 	}
1450 
1451 	sin6 = sin6_null;
1452 	sin6.sin6_family = AF_INET6;
1453 	sin6.sin6_addr = ip6h->ip6_dst;
1454 	sin6.sin6_port = udpha->uha_dst_port;
1455 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1456 
1457 	if (IPCL_IS_NONSTR(connp)) {
1458 		rw_enter(&udp->udp_rwlock, RW_WRITER);
1459 		if (udp->udp_state == TS_DATA_XFER) {
1460 			if (sin6.sin6_port == udp->udp_dstport &&
1461 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1462 			    &udp->udp_v6dst)) {
1463 				rw_exit(&udp->udp_rwlock);
1464 				(*connp->conn_upcalls->su_set_error)
1465 				    (connp->conn_upper_handle, error);
1466 				goto done;
1467 			}
1468 		} else {
1469 			udp->udp_delayed_error = error;
1470 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1471 		}
1472 		rw_exit(&udp->udp_rwlock);
1473 	} else {
1474 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1475 		    NULL, 0, error);
1476 		if (mp1 != NULL)
1477 			putnext(connp->conn_rq, mp1);
1478 	}
1479 
1480 done:
1481 	freemsg(mp);
1482 }
1483 
1484 /*
1485  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1486  * The local address is filled in if endpoint is bound. The remote address
1487  * is filled in if remote address has been precified ("connected endpoint")
1488  * (The concept of connected CLTS sockets is alien to published TPI
1489  *  but we support it anyway).
1490  */
1491 static void
1492 udp_addr_req(queue_t *q, mblk_t *mp)
1493 {
1494 	sin_t	*sin;
1495 	sin6_t	*sin6;
1496 	mblk_t	*ackmp;
1497 	struct T_addr_ack *taa;
1498 	udp_t	*udp = Q_TO_UDP(q);
1499 
1500 	/* Make it large enough for worst case */
1501 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1502 	    2 * sizeof (sin6_t), 1);
1503 	if (ackmp == NULL) {
1504 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1505 		return;
1506 	}
1507 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1508 
1509 	bzero(taa, sizeof (struct T_addr_ack));
1510 	ackmp->b_wptr = (uchar_t *)&taa[1];
1511 
1512 	taa->PRIM_type = T_ADDR_ACK;
1513 	ackmp->b_datap->db_type = M_PCPROTO;
1514 	rw_enter(&udp->udp_rwlock, RW_READER);
1515 	/*
1516 	 * Note: Following code assumes 32 bit alignment of basic
1517 	 * data structures like sin_t and struct T_addr_ack.
1518 	 */
1519 	if (udp->udp_state != TS_UNBND) {
1520 		/*
1521 		 * Fill in local address first
1522 		 */
1523 		taa->LOCADDR_offset = sizeof (*taa);
1524 		if (udp->udp_family == AF_INET) {
1525 			taa->LOCADDR_length = sizeof (sin_t);
1526 			sin = (sin_t *)&taa[1];
1527 			/* Fill zeroes and then initialize non-zero fields */
1528 			*sin = sin_null;
1529 			sin->sin_family = AF_INET;
1530 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
1531 			    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
1532 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src,
1533 				    sin->sin_addr.s_addr);
1534 			} else {
1535 				/*
1536 				 * INADDR_ANY
1537 				 * udp_v6src is not set, we might be bound to
1538 				 * broadcast/multicast. Use udp_bound_v6src as
1539 				 * local address instead (that could
1540 				 * also still be INADDR_ANY)
1541 				 */
1542 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src,
1543 				    sin->sin_addr.s_addr);
1544 			}
1545 			sin->sin_port = udp->udp_port;
1546 			ackmp->b_wptr = (uchar_t *)&sin[1];
1547 			if (udp->udp_state == TS_DATA_XFER) {
1548 				/*
1549 				 * connected, fill remote address too
1550 				 */
1551 				taa->REMADDR_length = sizeof (sin_t);
1552 				/* assumed 32-bit alignment */
1553 				taa->REMADDR_offset = taa->LOCADDR_offset +
1554 				    taa->LOCADDR_length;
1555 
1556 				sin = (sin_t *)(ackmp->b_rptr +
1557 				    taa->REMADDR_offset);
1558 				/* initialize */
1559 				*sin = sin_null;
1560 				sin->sin_family = AF_INET;
1561 				sin->sin_addr.s_addr =
1562 				    V4_PART_OF_V6(udp->udp_v6dst);
1563 				sin->sin_port = udp->udp_dstport;
1564 				ackmp->b_wptr = (uchar_t *)&sin[1];
1565 			}
1566 		} else {
1567 			taa->LOCADDR_length = sizeof (sin6_t);
1568 			sin6 = (sin6_t *)&taa[1];
1569 			/* Fill zeroes and then initialize non-zero fields */
1570 			*sin6 = sin6_null;
1571 			sin6->sin6_family = AF_INET6;
1572 			if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
1573 				sin6->sin6_addr = udp->udp_v6src;
1574 			} else {
1575 				/*
1576 				 * UNSPECIFIED
1577 				 * udp_v6src is not set, we might be bound to
1578 				 * broadcast/multicast. Use udp_bound_v6src as
1579 				 * local address instead (that could
1580 				 * also still be UNSPECIFIED)
1581 				 */
1582 				sin6->sin6_addr =
1583 				    udp->udp_bound_v6src;
1584 			}
1585 			sin6->sin6_port = udp->udp_port;
1586 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1587 			if (udp->udp_state == TS_DATA_XFER) {
1588 				/*
1589 				 * connected, fill remote address too
1590 				 */
1591 				taa->REMADDR_length = sizeof (sin6_t);
1592 				/* assumed 32-bit alignment */
1593 				taa->REMADDR_offset = taa->LOCADDR_offset +
1594 				    taa->LOCADDR_length;
1595 
1596 				sin6 = (sin6_t *)(ackmp->b_rptr +
1597 				    taa->REMADDR_offset);
1598 				/* initialize */
1599 				*sin6 = sin6_null;
1600 				sin6->sin6_family = AF_INET6;
1601 				sin6->sin6_addr = udp->udp_v6dst;
1602 				sin6->sin6_port =  udp->udp_dstport;
1603 				ackmp->b_wptr = (uchar_t *)&sin6[1];
1604 			}
1605 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1606 		}
1607 	}
1608 	rw_exit(&udp->udp_rwlock);
1609 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1610 	qreply(q, ackmp);
1611 }
1612 
1613 static void
1614 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1615 {
1616 	if (udp->udp_family == AF_INET) {
1617 		*tap = udp_g_t_info_ack_ipv4;
1618 	} else {
1619 		*tap = udp_g_t_info_ack_ipv6;
1620 	}
1621 	tap->CURRENT_state = udp->udp_state;
1622 	tap->OPT_size = udp_max_optsize;
1623 }
1624 
1625 static void
1626 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1627     t_uscalar_t cap_bits1)
1628 {
1629 	tcap->CAP_bits1 = 0;
1630 
1631 	if (cap_bits1 & TC1_INFO) {
1632 		udp_copy_info(&tcap->INFO_ack, udp);
1633 		tcap->CAP_bits1 |= TC1_INFO;
1634 	}
1635 }
1636 
1637 /*
1638  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1639  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1640  * udp_g_t_info_ack.  The current state of the stream is copied from
1641  * udp_state.
1642  */
1643 static void
1644 udp_capability_req(queue_t *q, mblk_t *mp)
1645 {
1646 	t_uscalar_t		cap_bits1;
1647 	struct T_capability_ack	*tcap;
1648 	udp_t	*udp = Q_TO_UDP(q);
1649 
1650 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1651 
1652 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1653 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1654 	if (!mp)
1655 		return;
1656 
1657 	tcap = (struct T_capability_ack *)mp->b_rptr;
1658 	udp_do_capability_ack(udp, tcap, cap_bits1);
1659 
1660 	qreply(q, mp);
1661 }
1662 
1663 /*
1664  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1665  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1666  * The current state of the stream is copied from udp_state.
1667  */
1668 static void
1669 udp_info_req(queue_t *q, mblk_t *mp)
1670 {
1671 	udp_t *udp = Q_TO_UDP(q);
1672 
1673 	/* Create a T_INFO_ACK message. */
1674 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1675 	    T_INFO_ACK);
1676 	if (!mp)
1677 		return;
1678 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1679 	qreply(q, mp);
1680 }
1681 
1682 /*
1683  * IP recognizes seven kinds of bind requests:
1684  *
1685  * - A zero-length address binds only to the protocol number.
1686  *
1687  * - A 4-byte address is treated as a request to
1688  * validate that the address is a valid local IPv4
1689  * address, appropriate for an application to bind to.
1690  * IP does the verification, but does not make any note
1691  * of the address at this time.
1692  *
1693  * - A 16-byte address contains is treated as a request
1694  * to validate a local IPv6 address, as the 4-byte
1695  * address case above.
1696  *
1697  * - A 16-byte sockaddr_in to validate the local IPv4 address and also
1698  * use it for the inbound fanout of packets.
1699  *
1700  * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
1701  * use it for the inbound fanout of packets.
1702  *
1703  * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
1704  * information consisting of local and remote addresses
1705  * and ports.  In this case, the addresses are both
1706  * validated as appropriate for this operation, and, if
1707  * so, the information is retained for use in the
1708  * inbound fanout.
1709  *
1710  * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
1711  * fanout information, like the 12-byte case above.
1712  *
1713  * IP will also fill in the IRE request mblk with information
1714  * regarding our peer.  In all cases, we notify IP of our protocol
1715  * type by appending a single protocol byte to the bind request.
1716  */
1717 static mblk_t *
1718 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length)
1719 {
1720 	char	*cp;
1721 	mblk_t	*mp;
1722 	struct T_bind_req *tbr;
1723 	ipa_conn_t	*ac;
1724 	ipa6_conn_t	*ac6;
1725 	sin_t		*sin;
1726 	sin6_t		*sin6;
1727 
1728 	ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
1729 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
1730 	mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
1731 	if (!mp)
1732 		return (mp);
1733 	mp->b_datap->db_type = M_PROTO;
1734 	tbr = (struct T_bind_req *)mp->b_rptr;
1735 	tbr->PRIM_type = bind_prim;
1736 	tbr->ADDR_offset = sizeof (*tbr);
1737 	tbr->CONIND_number = 0;
1738 	tbr->ADDR_length = addr_length;
1739 	cp = (char *)&tbr[1];
1740 	switch (addr_length) {
1741 	case sizeof (ipa_conn_t):
1742 		ASSERT(udp->udp_family == AF_INET);
1743 		/* Append a request for an IRE */
1744 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1745 		if (!mp->b_cont) {
1746 			freemsg(mp);
1747 			return (NULL);
1748 		}
1749 		mp->b_cont->b_wptr += sizeof (ire_t);
1750 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1751 
1752 		/* cp known to be 32 bit aligned */
1753 		ac = (ipa_conn_t *)cp;
1754 		ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src);
1755 		ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst);
1756 		ac->ac_fport = udp->udp_dstport;
1757 		ac->ac_lport = udp->udp_port;
1758 		break;
1759 
1760 	case sizeof (ipa6_conn_t):
1761 		ASSERT(udp->udp_family == AF_INET6);
1762 		/* Append a request for an IRE */
1763 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1764 		if (!mp->b_cont) {
1765 			freemsg(mp);
1766 			return (NULL);
1767 		}
1768 		mp->b_cont->b_wptr += sizeof (ire_t);
1769 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1770 
1771 		/* cp known to be 32 bit aligned */
1772 		ac6 = (ipa6_conn_t *)cp;
1773 		ac6->ac6_laddr = udp->udp_v6src;
1774 		ac6->ac6_faddr = udp->udp_v6dst;
1775 		ac6->ac6_fport = udp->udp_dstport;
1776 		ac6->ac6_lport = udp->udp_port;
1777 		break;
1778 
1779 	case sizeof (sin_t):
1780 		ASSERT(udp->udp_family == AF_INET);
1781 		/* Append a request for an IRE */
1782 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1783 		if (!mp->b_cont) {
1784 			freemsg(mp);
1785 			return (NULL);
1786 		}
1787 		mp->b_cont->b_wptr += sizeof (ire_t);
1788 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1789 
1790 		sin = (sin_t *)cp;
1791 		*sin = sin_null;
1792 		sin->sin_family = AF_INET;
1793 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src);
1794 		sin->sin_port = udp->udp_port;
1795 		break;
1796 
1797 	case sizeof (sin6_t):
1798 		ASSERT(udp->udp_family == AF_INET6);
1799 		/* Append a request for an IRE */
1800 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1801 		if (!mp->b_cont) {
1802 			freemsg(mp);
1803 			return (NULL);
1804 		}
1805 		mp->b_cont->b_wptr += sizeof (ire_t);
1806 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1807 
1808 		sin6 = (sin6_t *)cp;
1809 		*sin6 = sin6_null;
1810 		sin6->sin6_family = AF_INET6;
1811 		sin6->sin6_addr = udp->udp_bound_v6src;
1812 		sin6->sin6_port = udp->udp_port;
1813 		break;
1814 	}
1815 	/* Add protocol number to end */
1816 	cp[addr_length] = (char)IPPROTO_UDP;
1817 	mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
1818 	return (mp);
1819 }
1820 
1821 /* For /dev/udp aka AF_INET open */
1822 static int
1823 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1824 {
1825 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1826 }
1827 
1828 /* For /dev/udp6 aka AF_INET6 open */
1829 static int
1830 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1831 {
1832 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1833 }
1834 
1835 /*
1836  * This is the open routine for udp.  It allocates a udp_t structure for
1837  * the stream and, on the first open of the module, creates an ND table.
1838  */
1839 /*ARGSUSED2*/
1840 static int
1841 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1842     boolean_t isv6)
1843 {
1844 	int		error;
1845 	udp_t		*udp;
1846 	conn_t		*connp;
1847 	dev_t		conn_dev;
1848 	udp_stack_t	*us;
1849 	vmem_t		*minor_arena;
1850 
1851 	TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q);
1852 
1853 	/* If the stream is already open, return immediately. */
1854 	if (q->q_ptr != NULL)
1855 		return (0);
1856 
1857 	if (sflag == MODOPEN)
1858 		return (EINVAL);
1859 
1860 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1861 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1862 		minor_arena = ip_minor_arena_la;
1863 	} else {
1864 		/*
1865 		 * Either minor numbers in the large arena were exhausted
1866 		 * or a non socket application is doing the open.
1867 		 * Try to allocate from the small arena.
1868 		 */
1869 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1870 			return (EBUSY);
1871 
1872 		minor_arena = ip_minor_arena_sa;
1873 	}
1874 
1875 	if (flag & SO_FALLBACK) {
1876 		/*
1877 		 * Non streams socket needs a stream to fallback to
1878 		 */
1879 		RD(q)->q_ptr = (void *)conn_dev;
1880 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1881 		WR(q)->q_ptr = (void *)minor_arena;
1882 		qprocson(q);
1883 		return (0);
1884 	}
1885 
1886 	connp = udp_do_open(credp, isv6, KM_SLEEP);
1887 	if (connp == NULL) {
1888 		inet_minor_free(minor_arena, conn_dev);
1889 		return (ENOMEM);
1890 	}
1891 	udp = connp->conn_udp;
1892 	us = udp->udp_us;
1893 
1894 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1895 	connp->conn_dev = conn_dev;
1896 	connp->conn_minor_arena = minor_arena;
1897 
1898 	/*
1899 	 * Initialize the udp_t structure for this stream.
1900 	 */
1901 	q->q_ptr = connp;
1902 	WR(q)->q_ptr = connp;
1903 	connp->conn_rq = q;
1904 	connp->conn_wq = WR(q);
1905 
1906 	rw_enter(&udp->udp_rwlock, RW_WRITER);
1907 	ASSERT(connp->conn_ulp == IPPROTO_UDP);
1908 	ASSERT(connp->conn_udp == udp);
1909 	ASSERT(udp->udp_connp == connp);
1910 
1911 	if (flag & SO_SOCKSTR) {
1912 		connp->conn_flags |= IPCL_SOCKET;
1913 		udp->udp_issocket = B_TRUE;
1914 		udp->udp_direct_sockfs = B_TRUE;
1915 	}
1916 
1917 	q->q_hiwat = us->us_recv_hiwat;
1918 	WR(q)->q_hiwat = us->us_xmit_hiwat;
1919 	WR(q)->q_lowat = us->us_xmit_lowat;
1920 
1921 	qprocson(q);
1922 
1923 	if (udp->udp_family == AF_INET6) {
1924 		/* Build initial header template for transmit */
1925 		if ((error = udp_build_hdrs(udp)) != 0) {
1926 			rw_exit(&udp->udp_rwlock);
1927 			qprocsoff(q);
1928 			inet_minor_free(minor_arena, conn_dev);
1929 			ipcl_conn_destroy(connp);
1930 			return (error);
1931 		}
1932 	}
1933 	rw_exit(&udp->udp_rwlock);
1934 
1935 	/* Set the Stream head write offset and high watermark. */
1936 	(void) proto_set_tx_wroff(q, connp,
1937 	    udp->udp_max_hdr_len + us->us_wroff_extra);
1938 	/* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */
1939 	(void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat));
1940 
1941 	mutex_enter(&connp->conn_lock);
1942 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1943 	mutex_exit(&connp->conn_lock);
1944 	return (0);
1945 }
1946 
1947 /*
1948  * Which UDP options OK to set through T_UNITDATA_REQ...
1949  */
1950 /* ARGSUSED */
1951 static boolean_t
1952 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1953 {
1954 	return (B_TRUE);
1955 }
1956 
1957 /*
1958  * This routine gets default values of certain options whose default
1959  * values are maintained by protcol specific code
1960  */
1961 /* ARGSUSED */
1962 int
1963 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1964 {
1965 	udp_t		*udp = Q_TO_UDP(q);
1966 	udp_stack_t *us = udp->udp_us;
1967 	int *i1 = (int *)ptr;
1968 
1969 	switch (level) {
1970 	case IPPROTO_IP:
1971 		switch (name) {
1972 		case IP_MULTICAST_TTL:
1973 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1974 			return (sizeof (uchar_t));
1975 		case IP_MULTICAST_LOOP:
1976 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1977 			return (sizeof (uchar_t));
1978 		}
1979 		break;
1980 	case IPPROTO_IPV6:
1981 		switch (name) {
1982 		case IPV6_MULTICAST_HOPS:
1983 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1984 			return (sizeof (int));
1985 		case IPV6_MULTICAST_LOOP:
1986 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1987 			return (sizeof (int));
1988 		case IPV6_UNICAST_HOPS:
1989 			*i1 = us->us_ipv6_hoplimit;
1990 			return (sizeof (int));
1991 		}
1992 		break;
1993 	}
1994 	return (-1);
1995 }
1996 
1997 /*
1998  * This routine retrieves the current status of socket options.
1999  * It returns the size of the option retrieved.
2000  */
2001 static int
2002 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
2003 {
2004 	udp_t		*udp = connp->conn_udp;
2005 	udp_stack_t	*us = udp->udp_us;
2006 	int		*i1 = (int *)ptr;
2007 	ip6_pkt_t 	*ipp = &udp->udp_sticky_ipp;
2008 	int		len;
2009 
2010 	ASSERT(RW_READ_HELD(&udp->udp_rwlock));
2011 	switch (level) {
2012 	case SOL_SOCKET:
2013 		switch (name) {
2014 		case SO_DEBUG:
2015 			*i1 = udp->udp_debug;
2016 			break;	/* goto sizeof (int) option return */
2017 		case SO_REUSEADDR:
2018 			*i1 = udp->udp_reuseaddr;
2019 			break;	/* goto sizeof (int) option return */
2020 		case SO_TYPE:
2021 			*i1 = SOCK_DGRAM;
2022 			break;	/* goto sizeof (int) option return */
2023 
2024 		/*
2025 		 * The following three items are available here,
2026 		 * but are only meaningful to IP.
2027 		 */
2028 		case SO_DONTROUTE:
2029 			*i1 = udp->udp_dontroute;
2030 			break;	/* goto sizeof (int) option return */
2031 		case SO_USELOOPBACK:
2032 			*i1 = udp->udp_useloopback;
2033 			break;	/* goto sizeof (int) option return */
2034 		case SO_BROADCAST:
2035 			*i1 = udp->udp_broadcast;
2036 			break;	/* goto sizeof (int) option return */
2037 
2038 		case SO_SNDBUF:
2039 			*i1 = udp->udp_xmit_hiwat;
2040 			break;	/* goto sizeof (int) option return */
2041 		case SO_RCVBUF:
2042 			*i1 = udp->udp_rcv_disply_hiwat;
2043 			break;	/* goto sizeof (int) option return */
2044 		case SO_DGRAM_ERRIND:
2045 			*i1 = udp->udp_dgram_errind;
2046 			break;	/* goto sizeof (int) option return */
2047 		case SO_RECVUCRED:
2048 			*i1 = udp->udp_recvucred;
2049 			break;	/* goto sizeof (int) option return */
2050 		case SO_TIMESTAMP:
2051 			*i1 = udp->udp_timestamp;
2052 			break;	/* goto sizeof (int) option return */
2053 		case SO_ANON_MLP:
2054 			*i1 = connp->conn_anon_mlp;
2055 			break;	/* goto sizeof (int) option return */
2056 		case SO_MAC_EXEMPT:
2057 			*i1 = connp->conn_mac_exempt;
2058 			break;	/* goto sizeof (int) option return */
2059 		case SO_ALLZONES:
2060 			*i1 = connp->conn_allzones;
2061 			break;	/* goto sizeof (int) option return */
2062 		case SO_EXCLBIND:
2063 			*i1 = udp->udp_exclbind ? SO_EXCLBIND : 0;
2064 			break;
2065 		case SO_PROTOTYPE:
2066 			*i1 = IPPROTO_UDP;
2067 			break;
2068 		case SO_DOMAIN:
2069 			*i1 = udp->udp_family;
2070 			break;
2071 		default:
2072 			return (-1);
2073 		}
2074 		break;
2075 	case IPPROTO_IP:
2076 		if (udp->udp_family != AF_INET)
2077 			return (-1);
2078 		switch (name) {
2079 		case IP_OPTIONS:
2080 		case T_IP_OPTIONS:
2081 			len = udp->udp_ip_rcv_options_len - udp->udp_label_len;
2082 			if (len > 0) {
2083 				bcopy(udp->udp_ip_rcv_options +
2084 				    udp->udp_label_len, ptr, len);
2085 			}
2086 			return (len);
2087 		case IP_TOS:
2088 		case T_IP_TOS:
2089 			*i1 = (int)udp->udp_type_of_service;
2090 			break;	/* goto sizeof (int) option return */
2091 		case IP_TTL:
2092 			*i1 = (int)udp->udp_ttl;
2093 			break;	/* goto sizeof (int) option return */
2094 		case IP_DHCPINIT_IF:
2095 			return (-EINVAL);
2096 		case IP_NEXTHOP:
2097 		case IP_RECVPKTINFO:
2098 			/*
2099 			 * This also handles IP_PKTINFO.
2100 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2101 			 * Differentiation is based on the size of the argument
2102 			 * passed in.
2103 			 * This option is handled in IP which will return an
2104 			 * error for IP_PKTINFO as it's not supported as a
2105 			 * sticky option.
2106 			 */
2107 			return (-EINVAL);
2108 		case IP_MULTICAST_IF:
2109 			/* 0 address if not set */
2110 			*(ipaddr_t *)ptr = udp->udp_multicast_if_addr;
2111 			return (sizeof (ipaddr_t));
2112 		case IP_MULTICAST_TTL:
2113 			*(uchar_t *)ptr = udp->udp_multicast_ttl;
2114 			return (sizeof (uchar_t));
2115 		case IP_MULTICAST_LOOP:
2116 			*ptr = connp->conn_multicast_loop;
2117 			return (sizeof (uint8_t));
2118 		case IP_RECVOPTS:
2119 			*i1 = udp->udp_recvopts;
2120 			break;	/* goto sizeof (int) option return */
2121 		case IP_RECVDSTADDR:
2122 			*i1 = udp->udp_recvdstaddr;
2123 			break;	/* goto sizeof (int) option return */
2124 		case IP_RECVIF:
2125 			*i1 = udp->udp_recvif;
2126 			break;	/* goto sizeof (int) option return */
2127 		case IP_RECVSLLA:
2128 			*i1 = udp->udp_recvslla;
2129 			break;	/* goto sizeof (int) option return */
2130 		case IP_RECVTTL:
2131 			*i1 = udp->udp_recvttl;
2132 			break;	/* goto sizeof (int) option return */
2133 		case IP_ADD_MEMBERSHIP:
2134 		case IP_DROP_MEMBERSHIP:
2135 		case IP_BLOCK_SOURCE:
2136 		case IP_UNBLOCK_SOURCE:
2137 		case IP_ADD_SOURCE_MEMBERSHIP:
2138 		case IP_DROP_SOURCE_MEMBERSHIP:
2139 		case MCAST_JOIN_GROUP:
2140 		case MCAST_LEAVE_GROUP:
2141 		case MCAST_BLOCK_SOURCE:
2142 		case MCAST_UNBLOCK_SOURCE:
2143 		case MCAST_JOIN_SOURCE_GROUP:
2144 		case MCAST_LEAVE_SOURCE_GROUP:
2145 			/* cannot "get" the value for these */
2146 			return (-1);
2147 		case IP_BOUND_IF:
2148 			/* Zero if not set */
2149 			*i1 = udp->udp_bound_if;
2150 			break;	/* goto sizeof (int) option return */
2151 		case IP_UNSPEC_SRC:
2152 			*i1 = udp->udp_unspec_source;
2153 			break;	/* goto sizeof (int) option return */
2154 		case IP_BROADCAST_TTL:
2155 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
2156 			return (sizeof (uchar_t));
2157 		default:
2158 			return (-1);
2159 		}
2160 		break;
2161 	case IPPROTO_IPV6:
2162 		if (udp->udp_family != AF_INET6)
2163 			return (-1);
2164 		switch (name) {
2165 		case IPV6_UNICAST_HOPS:
2166 			*i1 = (unsigned int)udp->udp_ttl;
2167 			break;	/* goto sizeof (int) option return */
2168 		case IPV6_MULTICAST_IF:
2169 			/* 0 index if not set */
2170 			*i1 = udp->udp_multicast_if_index;
2171 			break;	/* goto sizeof (int) option return */
2172 		case IPV6_MULTICAST_HOPS:
2173 			*i1 = udp->udp_multicast_ttl;
2174 			break;	/* goto sizeof (int) option return */
2175 		case IPV6_MULTICAST_LOOP:
2176 			*i1 = connp->conn_multicast_loop;
2177 			break;	/* goto sizeof (int) option return */
2178 		case IPV6_JOIN_GROUP:
2179 		case IPV6_LEAVE_GROUP:
2180 		case MCAST_JOIN_GROUP:
2181 		case MCAST_LEAVE_GROUP:
2182 		case MCAST_BLOCK_SOURCE:
2183 		case MCAST_UNBLOCK_SOURCE:
2184 		case MCAST_JOIN_SOURCE_GROUP:
2185 		case MCAST_LEAVE_SOURCE_GROUP:
2186 			/* cannot "get" the value for these */
2187 			return (-1);
2188 		case IPV6_BOUND_IF:
2189 			/* Zero if not set */
2190 			*i1 = udp->udp_bound_if;
2191 			break;	/* goto sizeof (int) option return */
2192 		case IPV6_UNSPEC_SRC:
2193 			*i1 = udp->udp_unspec_source;
2194 			break;	/* goto sizeof (int) option return */
2195 		case IPV6_RECVPKTINFO:
2196 			*i1 = udp->udp_ip_recvpktinfo;
2197 			break;	/* goto sizeof (int) option return */
2198 		case IPV6_RECVTCLASS:
2199 			*i1 = udp->udp_ipv6_recvtclass;
2200 			break;	/* goto sizeof (int) option return */
2201 		case IPV6_RECVPATHMTU:
2202 			*i1 = udp->udp_ipv6_recvpathmtu;
2203 			break;	/* goto sizeof (int) option return */
2204 		case IPV6_RECVHOPLIMIT:
2205 			*i1 = udp->udp_ipv6_recvhoplimit;
2206 			break;	/* goto sizeof (int) option return */
2207 		case IPV6_RECVHOPOPTS:
2208 			*i1 = udp->udp_ipv6_recvhopopts;
2209 			break;	/* goto sizeof (int) option return */
2210 		case IPV6_RECVDSTOPTS:
2211 			*i1 = udp->udp_ipv6_recvdstopts;
2212 			break;	/* goto sizeof (int) option return */
2213 		case _OLD_IPV6_RECVDSTOPTS:
2214 			*i1 = udp->udp_old_ipv6_recvdstopts;
2215 			break;	/* goto sizeof (int) option return */
2216 		case IPV6_RECVRTHDRDSTOPTS:
2217 			*i1 = udp->udp_ipv6_recvrthdrdstopts;
2218 			break;	/* goto sizeof (int) option return */
2219 		case IPV6_RECVRTHDR:
2220 			*i1 = udp->udp_ipv6_recvrthdr;
2221 			break;	/* goto sizeof (int) option return */
2222 		case IPV6_PKTINFO: {
2223 			/* XXX assumes that caller has room for max size! */
2224 			struct in6_pktinfo *pkti;
2225 
2226 			pkti = (struct in6_pktinfo *)ptr;
2227 			if (ipp->ipp_fields & IPPF_IFINDEX)
2228 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
2229 			else
2230 				pkti->ipi6_ifindex = 0;
2231 			if (ipp->ipp_fields & IPPF_ADDR)
2232 				pkti->ipi6_addr = ipp->ipp_addr;
2233 			else
2234 				pkti->ipi6_addr = ipv6_all_zeros;
2235 			return (sizeof (struct in6_pktinfo));
2236 		}
2237 		case IPV6_TCLASS:
2238 			if (ipp->ipp_fields & IPPF_TCLASS)
2239 				*i1 = ipp->ipp_tclass;
2240 			else
2241 				*i1 = IPV6_FLOW_TCLASS(
2242 				    IPV6_DEFAULT_VERS_AND_FLOW);
2243 			break;	/* goto sizeof (int) option return */
2244 		case IPV6_NEXTHOP: {
2245 			sin6_t *sin6 = (sin6_t *)ptr;
2246 
2247 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
2248 				return (0);
2249 			*sin6 = sin6_null;
2250 			sin6->sin6_family = AF_INET6;
2251 			sin6->sin6_addr = ipp->ipp_nexthop;
2252 			return (sizeof (sin6_t));
2253 		}
2254 		case IPV6_HOPOPTS:
2255 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
2256 				return (0);
2257 			if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6)
2258 				return (0);
2259 			/*
2260 			 * The cipso/label option is added by kernel.
2261 			 * User is not usually aware of this option.
2262 			 * We copy out the hbh opt after the label option.
2263 			 */
2264 			bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6,
2265 			    ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6);
2266 			if (udp->udp_label_len_v6 > 0) {
2267 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
2268 				ptr[1] = (ipp->ipp_hopoptslen -
2269 				    udp->udp_label_len_v6 + 7) / 8 - 1;
2270 			}
2271 			return (ipp->ipp_hopoptslen - udp->udp_label_len_v6);
2272 		case IPV6_RTHDRDSTOPTS:
2273 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
2274 				return (0);
2275 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
2276 			return (ipp->ipp_rtdstoptslen);
2277 		case IPV6_RTHDR:
2278 			if (!(ipp->ipp_fields & IPPF_RTHDR))
2279 				return (0);
2280 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
2281 			return (ipp->ipp_rthdrlen);
2282 		case IPV6_DSTOPTS:
2283 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
2284 				return (0);
2285 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
2286 			return (ipp->ipp_dstoptslen);
2287 		case IPV6_PATHMTU:
2288 			return (ip_fill_mtuinfo(&udp->udp_v6dst,
2289 			    udp->udp_dstport, (struct ip6_mtuinfo *)ptr,
2290 			    us->us_netstack));
2291 		default:
2292 			return (-1);
2293 		}
2294 		break;
2295 	case IPPROTO_UDP:
2296 		switch (name) {
2297 		case UDP_ANONPRIVBIND:
2298 			*i1 = udp->udp_anon_priv_bind;
2299 			break;
2300 		case UDP_EXCLBIND:
2301 			*i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0;
2302 			break;
2303 		case UDP_RCVHDR:
2304 			*i1 = udp->udp_rcvhdr ? 1 : 0;
2305 			break;
2306 		case UDP_NAT_T_ENDPOINT:
2307 			*i1 = udp->udp_nat_t_endpoint;
2308 			break;
2309 		default:
2310 			return (-1);
2311 		}
2312 		break;
2313 	default:
2314 		return (-1);
2315 	}
2316 	return (sizeof (int));
2317 }
2318 
2319 int
2320 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
2321 {
2322 	udp_t   *udp;
2323 	int	err;
2324 
2325 	udp = Q_TO_UDP(q);
2326 
2327 	rw_enter(&udp->udp_rwlock, RW_READER);
2328 	err = udp_opt_get(Q_TO_CONN(q), level, name, ptr);
2329 	rw_exit(&udp->udp_rwlock);
2330 	return (err);
2331 }
2332 
2333 /*
2334  * This routine sets socket options.
2335  */
2336 /* ARGSUSED */
2337 static int
2338 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
2339     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
2340     void *thisdg_attrs, boolean_t checkonly)
2341 {
2342 	udpattrs_t *attrs = thisdg_attrs;
2343 	int	*i1 = (int *)invalp;
2344 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2345 	udp_t	*udp = connp->conn_udp;
2346 	udp_stack_t	*us = udp->udp_us;
2347 	int	error;
2348 	uint_t	newlen;
2349 	size_t	sth_wroff;
2350 
2351 	ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
2352 	/*
2353 	 * For fixed length options, no sanity check
2354 	 * of passed in length is done. It is assumed *_optcom_req()
2355 	 * routines do the right thing.
2356 	 */
2357 	switch (level) {
2358 	case SOL_SOCKET:
2359 		switch (name) {
2360 		case SO_REUSEADDR:
2361 			if (!checkonly) {
2362 				udp->udp_reuseaddr = onoff;
2363 				PASS_OPT_TO_IP(connp);
2364 			}
2365 			break;
2366 		case SO_DEBUG:
2367 			if (!checkonly)
2368 				udp->udp_debug = onoff;
2369 			break;
2370 		/*
2371 		 * The following three items are available here,
2372 		 * but are only meaningful to IP.
2373 		 */
2374 		case SO_DONTROUTE:
2375 			if (!checkonly) {
2376 				udp->udp_dontroute = onoff;
2377 				PASS_OPT_TO_IP(connp);
2378 			}
2379 			break;
2380 		case SO_USELOOPBACK:
2381 			if (!checkonly) {
2382 				udp->udp_useloopback = onoff;
2383 				PASS_OPT_TO_IP(connp);
2384 			}
2385 			break;
2386 		case SO_BROADCAST:
2387 			if (!checkonly) {
2388 				udp->udp_broadcast = onoff;
2389 				PASS_OPT_TO_IP(connp);
2390 			}
2391 			break;
2392 
2393 		case SO_SNDBUF:
2394 			if (*i1 > us->us_max_buf) {
2395 				*outlenp = 0;
2396 				return (ENOBUFS);
2397 			}
2398 			if (!checkonly) {
2399 				udp->udp_xmit_hiwat = *i1;
2400 				connp->conn_wq->q_hiwat = *i1;
2401 			}
2402 			break;
2403 		case SO_RCVBUF:
2404 			if (*i1 > us->us_max_buf) {
2405 				*outlenp = 0;
2406 				return (ENOBUFS);
2407 			}
2408 			if (!checkonly) {
2409 				int size;
2410 
2411 				udp->udp_rcv_disply_hiwat = *i1;
2412 				size = udp_set_rcv_hiwat(udp, *i1);
2413 				rw_exit(&udp->udp_rwlock);
2414 				(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2415 				    size);
2416 				rw_enter(&udp->udp_rwlock, RW_WRITER);
2417 			}
2418 			break;
2419 		case SO_DGRAM_ERRIND:
2420 			if (!checkonly)
2421 				udp->udp_dgram_errind = onoff;
2422 			break;
2423 		case SO_RECVUCRED:
2424 			if (!checkonly)
2425 				udp->udp_recvucred = onoff;
2426 			break;
2427 		case SO_ALLZONES:
2428 			/*
2429 			 * "soft" error (negative)
2430 			 * option not handled at this level
2431 			 * Do not modify *outlenp.
2432 			 */
2433 			return (-EINVAL);
2434 		case SO_TIMESTAMP:
2435 			if (!checkonly)
2436 				udp->udp_timestamp = onoff;
2437 			break;
2438 		case SO_ANON_MLP:
2439 			if (!checkonly) {
2440 				connp->conn_anon_mlp = onoff;
2441 				PASS_OPT_TO_IP(connp);
2442 			}
2443 			break;
2444 		case SO_MAC_EXEMPT:
2445 			if (secpolicy_net_mac_aware(cr) != 0 ||
2446 			    udp->udp_state != TS_UNBND)
2447 				return (EACCES);
2448 			if (!checkonly) {
2449 				connp->conn_mac_exempt = onoff;
2450 				PASS_OPT_TO_IP(connp);
2451 			}
2452 			break;
2453 		case SCM_UCRED: {
2454 			struct ucred_s *ucr;
2455 			cred_t *cr, *newcr;
2456 			ts_label_t *tsl;
2457 
2458 			/*
2459 			 * Only sockets that have proper privileges and are
2460 			 * bound to MLPs will have any other value here, so
2461 			 * this implicitly tests for privilege to set label.
2462 			 */
2463 			if (connp->conn_mlp_type == mlptSingle)
2464 				break;
2465 			ucr = (struct ucred_s *)invalp;
2466 			if (inlen != ucredsize ||
2467 			    ucr->uc_labeloff < sizeof (*ucr) ||
2468 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
2469 				return (EINVAL);
2470 			if (!checkonly) {
2471 				mblk_t *mb;
2472 
2473 				if (attrs == NULL ||
2474 				    (mb = attrs->udpattr_mb) == NULL)
2475 					return (EINVAL);
2476 				if ((cr = DB_CRED(mb)) == NULL)
2477 					cr = udp->udp_connp->conn_cred;
2478 				ASSERT(cr != NULL);
2479 				if ((tsl = crgetlabel(cr)) == NULL)
2480 					return (EINVAL);
2481 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
2482 				    tsl->tsl_doi, KM_NOSLEEP);
2483 				if (newcr == NULL)
2484 					return (ENOSR);
2485 				mblk_setcred(mb, newcr);
2486 				attrs->udpattr_credset = B_TRUE;
2487 				crfree(newcr);
2488 			}
2489 			break;
2490 		}
2491 		case SO_EXCLBIND:
2492 			if (!checkonly)
2493 				udp->udp_exclbind = onoff;
2494 			break;
2495 		case SO_RCVTIMEO:
2496 		case SO_SNDTIMEO:
2497 			/*
2498 			 * Pass these two options in order for third part
2499 			 * protocol usage. Here just return directly.
2500 			 */
2501 			return (0);
2502 		default:
2503 			*outlenp = 0;
2504 			return (EINVAL);
2505 		}
2506 		break;
2507 	case IPPROTO_IP:
2508 		if (udp->udp_family != AF_INET) {
2509 			*outlenp = 0;
2510 			return (ENOPROTOOPT);
2511 		}
2512 		switch (name) {
2513 		case IP_OPTIONS:
2514 		case T_IP_OPTIONS:
2515 			/* Save options for use by IP. */
2516 			newlen = inlen + udp->udp_label_len;
2517 			if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
2518 				*outlenp = 0;
2519 				return (EINVAL);
2520 			}
2521 			if (checkonly)
2522 				break;
2523 
2524 			/*
2525 			 * Update the stored options taking into account
2526 			 * any CIPSO option which we should not overwrite.
2527 			 */
2528 			if (!tsol_option_set(&udp->udp_ip_snd_options,
2529 			    &udp->udp_ip_snd_options_len,
2530 			    udp->udp_label_len, invalp, inlen)) {
2531 				*outlenp = 0;
2532 				return (ENOMEM);
2533 			}
2534 
2535 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2536 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
2537 			sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
2538 			rw_exit(&udp->udp_rwlock);
2539 			(void) proto_set_tx_wroff(connp->conn_rq, connp,
2540 			    sth_wroff);
2541 			rw_enter(&udp->udp_rwlock, RW_WRITER);
2542 			break;
2543 
2544 		case IP_TTL:
2545 			if (!checkonly) {
2546 				udp->udp_ttl = (uchar_t)*i1;
2547 			}
2548 			break;
2549 		case IP_TOS:
2550 		case T_IP_TOS:
2551 			if (!checkonly) {
2552 				udp->udp_type_of_service = (uchar_t)*i1;
2553 			}
2554 			break;
2555 		case IP_MULTICAST_IF: {
2556 			/*
2557 			 * TODO should check OPTMGMT reply and undo this if
2558 			 * there is an error.
2559 			 */
2560 			struct in_addr *inap = (struct in_addr *)invalp;
2561 			if (!checkonly) {
2562 				udp->udp_multicast_if_addr =
2563 				    inap->s_addr;
2564 				PASS_OPT_TO_IP(connp);
2565 			}
2566 			break;
2567 		}
2568 		case IP_MULTICAST_TTL:
2569 			if (!checkonly)
2570 				udp->udp_multicast_ttl = *invalp;
2571 			break;
2572 		case IP_MULTICAST_LOOP:
2573 			if (!checkonly) {
2574 				connp->conn_multicast_loop = *invalp;
2575 				PASS_OPT_TO_IP(connp);
2576 			}
2577 			break;
2578 		case IP_RECVOPTS:
2579 			if (!checkonly)
2580 				udp->udp_recvopts = onoff;
2581 			break;
2582 		case IP_RECVDSTADDR:
2583 			if (!checkonly)
2584 				udp->udp_recvdstaddr = onoff;
2585 			break;
2586 		case IP_RECVIF:
2587 			if (!checkonly) {
2588 				udp->udp_recvif = onoff;
2589 				PASS_OPT_TO_IP(connp);
2590 			}
2591 			break;
2592 		case IP_RECVSLLA:
2593 			if (!checkonly) {
2594 				udp->udp_recvslla = onoff;
2595 				PASS_OPT_TO_IP(connp);
2596 			}
2597 			break;
2598 		case IP_RECVTTL:
2599 			if (!checkonly)
2600 				udp->udp_recvttl = onoff;
2601 			break;
2602 		case IP_PKTINFO: {
2603 			/*
2604 			 * This also handles IP_RECVPKTINFO.
2605 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
2606 			 * Differentiation is based on the size of the
2607 			 * argument passed in.
2608 			 */
2609 			struct in_pktinfo *pktinfop;
2610 			ip4_pkt_t *attr_pktinfop;
2611 
2612 			if (checkonly)
2613 				break;
2614 
2615 			if (inlen == sizeof (int)) {
2616 				/*
2617 				 * This is IP_RECVPKTINFO option.
2618 				 * Keep a local copy of whether this option is
2619 				 * set or not and pass it down to IP for
2620 				 * processing.
2621 				 */
2622 
2623 				udp->udp_ip_recvpktinfo = onoff;
2624 				return (-EINVAL);
2625 			}
2626 
2627 			if (attrs == NULL ||
2628 			    (attr_pktinfop = attrs->udpattr_ipp4) == NULL) {
2629 				/*
2630 				 * sticky option or no buffer to return
2631 				 * the results.
2632 				 */
2633 				return (EINVAL);
2634 			}
2635 
2636 			if (inlen != sizeof (struct in_pktinfo))
2637 				return (EINVAL);
2638 
2639 			pktinfop = (struct in_pktinfo *)invalp;
2640 
2641 			/*
2642 			 * At least one of the values should be specified
2643 			 */
2644 			if (pktinfop->ipi_ifindex == 0 &&
2645 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2646 				return (EINVAL);
2647 			}
2648 
2649 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2650 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2651 
2652 			break;
2653 		}
2654 		case IP_ADD_MEMBERSHIP:
2655 		case IP_DROP_MEMBERSHIP:
2656 		case IP_BLOCK_SOURCE:
2657 		case IP_UNBLOCK_SOURCE:
2658 		case IP_ADD_SOURCE_MEMBERSHIP:
2659 		case IP_DROP_SOURCE_MEMBERSHIP:
2660 		case MCAST_JOIN_GROUP:
2661 		case MCAST_LEAVE_GROUP:
2662 		case MCAST_BLOCK_SOURCE:
2663 		case MCAST_UNBLOCK_SOURCE:
2664 		case MCAST_JOIN_SOURCE_GROUP:
2665 		case MCAST_LEAVE_SOURCE_GROUP:
2666 		case IP_SEC_OPT:
2667 		case IP_NEXTHOP:
2668 		case IP_DHCPINIT_IF:
2669 			/*
2670 			 * "soft" error (negative)
2671 			 * option not handled at this level
2672 			 * Do not modify *outlenp.
2673 			 */
2674 			return (-EINVAL);
2675 		case IP_BOUND_IF:
2676 			if (!checkonly) {
2677 				udp->udp_bound_if = *i1;
2678 				PASS_OPT_TO_IP(connp);
2679 			}
2680 			break;
2681 		case IP_UNSPEC_SRC:
2682 			if (!checkonly) {
2683 				udp->udp_unspec_source = onoff;
2684 				PASS_OPT_TO_IP(connp);
2685 			}
2686 			break;
2687 		case IP_BROADCAST_TTL:
2688 			if (!checkonly)
2689 				connp->conn_broadcast_ttl = *invalp;
2690 			break;
2691 		default:
2692 			*outlenp = 0;
2693 			return (EINVAL);
2694 		}
2695 		break;
2696 	case IPPROTO_IPV6: {
2697 		ip6_pkt_t		*ipp;
2698 		boolean_t		sticky;
2699 
2700 		if (udp->udp_family != AF_INET6) {
2701 			*outlenp = 0;
2702 			return (ENOPROTOOPT);
2703 		}
2704 		/*
2705 		 * Deal with both sticky options and ancillary data
2706 		 */
2707 		sticky = B_FALSE;
2708 		if (attrs == NULL || (ipp = attrs->udpattr_ipp6) ==
2709 		    NULL) {
2710 			/* sticky options, or none */
2711 			ipp = &udp->udp_sticky_ipp;
2712 			sticky = B_TRUE;
2713 		}
2714 
2715 		switch (name) {
2716 		case IPV6_MULTICAST_IF:
2717 			if (!checkonly) {
2718 				udp->udp_multicast_if_index = *i1;
2719 				PASS_OPT_TO_IP(connp);
2720 			}
2721 			break;
2722 		case IPV6_UNICAST_HOPS:
2723 			/* -1 means use default */
2724 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2725 				*outlenp = 0;
2726 				return (EINVAL);
2727 			}
2728 			if (!checkonly) {
2729 				if (*i1 == -1) {
2730 					udp->udp_ttl = ipp->ipp_unicast_hops =
2731 					    us->us_ipv6_hoplimit;
2732 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2733 					/* Pass modified value to IP. */
2734 					*i1 = udp->udp_ttl;
2735 				} else {
2736 					udp->udp_ttl = ipp->ipp_unicast_hops =
2737 					    (uint8_t)*i1;
2738 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2739 				}
2740 				/* Rebuild the header template */
2741 				error = udp_build_hdrs(udp);
2742 				if (error != 0) {
2743 					*outlenp = 0;
2744 					return (error);
2745 				}
2746 			}
2747 			break;
2748 		case IPV6_MULTICAST_HOPS:
2749 			/* -1 means use default */
2750 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2751 				*outlenp = 0;
2752 				return (EINVAL);
2753 			}
2754 			if (!checkonly) {
2755 				if (*i1 == -1) {
2756 					udp->udp_multicast_ttl =
2757 					    ipp->ipp_multicast_hops =
2758 					    IP_DEFAULT_MULTICAST_TTL;
2759 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2760 					/* Pass modified value to IP. */
2761 					*i1 = udp->udp_multicast_ttl;
2762 				} else {
2763 					udp->udp_multicast_ttl =
2764 					    ipp->ipp_multicast_hops =
2765 					    (uint8_t)*i1;
2766 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2767 				}
2768 			}
2769 			break;
2770 		case IPV6_MULTICAST_LOOP:
2771 			if (*i1 != 0 && *i1 != 1) {
2772 				*outlenp = 0;
2773 				return (EINVAL);
2774 			}
2775 			if (!checkonly) {
2776 				connp->conn_multicast_loop = *i1;
2777 				PASS_OPT_TO_IP(connp);
2778 			}
2779 			break;
2780 		case IPV6_JOIN_GROUP:
2781 		case IPV6_LEAVE_GROUP:
2782 		case MCAST_JOIN_GROUP:
2783 		case MCAST_LEAVE_GROUP:
2784 		case MCAST_BLOCK_SOURCE:
2785 		case MCAST_UNBLOCK_SOURCE:
2786 		case MCAST_JOIN_SOURCE_GROUP:
2787 		case MCAST_LEAVE_SOURCE_GROUP:
2788 			/*
2789 			 * "soft" error (negative)
2790 			 * option not handled at this level
2791 			 * Note: Do not modify *outlenp
2792 			 */
2793 			return (-EINVAL);
2794 		case IPV6_BOUND_IF:
2795 			if (!checkonly) {
2796 				udp->udp_bound_if = *i1;
2797 				PASS_OPT_TO_IP(connp);
2798 			}
2799 			break;
2800 		case IPV6_UNSPEC_SRC:
2801 			if (!checkonly) {
2802 				udp->udp_unspec_source = onoff;
2803 				PASS_OPT_TO_IP(connp);
2804 			}
2805 			break;
2806 		/*
2807 		 * Set boolean switches for ancillary data delivery
2808 		 */
2809 		case IPV6_RECVPKTINFO:
2810 			if (!checkonly) {
2811 				udp->udp_ip_recvpktinfo = onoff;
2812 				PASS_OPT_TO_IP(connp);
2813 			}
2814 			break;
2815 		case IPV6_RECVTCLASS:
2816 			if (!checkonly) {
2817 				udp->udp_ipv6_recvtclass = onoff;
2818 				PASS_OPT_TO_IP(connp);
2819 			}
2820 			break;
2821 		case IPV6_RECVPATHMTU:
2822 			if (!checkonly) {
2823 				udp->udp_ipv6_recvpathmtu = onoff;
2824 				PASS_OPT_TO_IP(connp);
2825 			}
2826 			break;
2827 		case IPV6_RECVHOPLIMIT:
2828 			if (!checkonly) {
2829 				udp->udp_ipv6_recvhoplimit = onoff;
2830 				PASS_OPT_TO_IP(connp);
2831 			}
2832 			break;
2833 		case IPV6_RECVHOPOPTS:
2834 			if (!checkonly) {
2835 				udp->udp_ipv6_recvhopopts = onoff;
2836 				PASS_OPT_TO_IP(connp);
2837 			}
2838 			break;
2839 		case IPV6_RECVDSTOPTS:
2840 			if (!checkonly) {
2841 				udp->udp_ipv6_recvdstopts = onoff;
2842 				PASS_OPT_TO_IP(connp);
2843 			}
2844 			break;
2845 		case _OLD_IPV6_RECVDSTOPTS:
2846 			if (!checkonly)
2847 				udp->udp_old_ipv6_recvdstopts = onoff;
2848 			break;
2849 		case IPV6_RECVRTHDRDSTOPTS:
2850 			if (!checkonly) {
2851 				udp->udp_ipv6_recvrthdrdstopts = onoff;
2852 				PASS_OPT_TO_IP(connp);
2853 			}
2854 			break;
2855 		case IPV6_RECVRTHDR:
2856 			if (!checkonly) {
2857 				udp->udp_ipv6_recvrthdr = onoff;
2858 				PASS_OPT_TO_IP(connp);
2859 			}
2860 			break;
2861 		/*
2862 		 * Set sticky options or ancillary data.
2863 		 * If sticky options, (re)build any extension headers
2864 		 * that might be needed as a result.
2865 		 */
2866 		case IPV6_PKTINFO:
2867 			/*
2868 			 * The source address and ifindex are verified
2869 			 * in ip_opt_set(). For ancillary data the
2870 			 * source address is checked in ip_wput_v6.
2871 			 */
2872 			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
2873 				return (EINVAL);
2874 			if (checkonly)
2875 				break;
2876 
2877 			if (inlen == 0) {
2878 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2879 				ipp->ipp_sticky_ignored |=
2880 				    (IPPF_IFINDEX|IPPF_ADDR);
2881 			} else {
2882 				struct in6_pktinfo *pkti;
2883 
2884 				pkti = (struct in6_pktinfo *)invalp;
2885 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2886 				ipp->ipp_addr = pkti->ipi6_addr;
2887 				if (ipp->ipp_ifindex != 0)
2888 					ipp->ipp_fields |= IPPF_IFINDEX;
2889 				else
2890 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2891 				if (!IN6_IS_ADDR_UNSPECIFIED(
2892 				    &ipp->ipp_addr))
2893 					ipp->ipp_fields |= IPPF_ADDR;
2894 				else
2895 					ipp->ipp_fields &= ~IPPF_ADDR;
2896 			}
2897 			if (sticky) {
2898 				error = udp_build_hdrs(udp);
2899 				if (error != 0)
2900 					return (error);
2901 				PASS_OPT_TO_IP(connp);
2902 			}
2903 			break;
2904 		case IPV6_HOPLIMIT:
2905 			if (sticky)
2906 				return (EINVAL);
2907 			if (inlen != 0 && inlen != sizeof (int))
2908 				return (EINVAL);
2909 			if (checkonly)
2910 				break;
2911 
2912 			if (inlen == 0) {
2913 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2914 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2915 			} else {
2916 				if (*i1 > 255 || *i1 < -1)
2917 					return (EINVAL);
2918 				if (*i1 == -1)
2919 					ipp->ipp_hoplimit =
2920 					    us->us_ipv6_hoplimit;
2921 				else
2922 					ipp->ipp_hoplimit = *i1;
2923 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2924 			}
2925 			break;
2926 		case IPV6_TCLASS:
2927 			if (inlen != 0 && inlen != sizeof (int))
2928 				return (EINVAL);
2929 			if (checkonly)
2930 				break;
2931 
2932 			if (inlen == 0) {
2933 				ipp->ipp_fields &= ~IPPF_TCLASS;
2934 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2935 			} else {
2936 				if (*i1 > 255 || *i1 < -1)
2937 					return (EINVAL);
2938 				if (*i1 == -1)
2939 					ipp->ipp_tclass = 0;
2940 				else
2941 					ipp->ipp_tclass = *i1;
2942 				ipp->ipp_fields |= IPPF_TCLASS;
2943 			}
2944 			if (sticky) {
2945 				error = udp_build_hdrs(udp);
2946 				if (error != 0)
2947 					return (error);
2948 			}
2949 			break;
2950 		case IPV6_NEXTHOP:
2951 			/*
2952 			 * IP will verify that the nexthop is reachable
2953 			 * and fail for sticky options.
2954 			 */
2955 			if (inlen != 0 && inlen != sizeof (sin6_t))
2956 				return (EINVAL);
2957 			if (checkonly)
2958 				break;
2959 
2960 			if (inlen == 0) {
2961 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2962 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2963 			} else {
2964 				sin6_t *sin6 = (sin6_t *)invalp;
2965 
2966 				if (sin6->sin6_family != AF_INET6) {
2967 					return (EAFNOSUPPORT);
2968 				}
2969 				if (IN6_IS_ADDR_V4MAPPED(
2970 				    &sin6->sin6_addr))
2971 					return (EADDRNOTAVAIL);
2972 				ipp->ipp_nexthop = sin6->sin6_addr;
2973 				if (!IN6_IS_ADDR_UNSPECIFIED(
2974 				    &ipp->ipp_nexthop))
2975 					ipp->ipp_fields |= IPPF_NEXTHOP;
2976 				else
2977 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
2978 			}
2979 			if (sticky) {
2980 				error = udp_build_hdrs(udp);
2981 				if (error != 0)
2982 					return (error);
2983 				PASS_OPT_TO_IP(connp);
2984 			}
2985 			break;
2986 		case IPV6_HOPOPTS: {
2987 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
2988 			/*
2989 			 * Sanity checks - minimum size, size a multiple of
2990 			 * eight bytes, and matching size passed in.
2991 			 */
2992 			if (inlen != 0 &&
2993 			    inlen != (8 * (hopts->ip6h_len + 1)))
2994 				return (EINVAL);
2995 
2996 			if (checkonly)
2997 				break;
2998 
2999 			error = optcom_pkt_set(invalp, inlen, sticky,
3000 			    (uchar_t **)&ipp->ipp_hopopts,
3001 			    &ipp->ipp_hopoptslen,
3002 			    sticky ? udp->udp_label_len_v6 : 0);
3003 			if (error != 0)
3004 				return (error);
3005 			if (ipp->ipp_hopoptslen == 0) {
3006 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
3007 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
3008 			} else {
3009 				ipp->ipp_fields |= IPPF_HOPOPTS;
3010 			}
3011 			if (sticky) {
3012 				error = udp_build_hdrs(udp);
3013 				if (error != 0)
3014 					return (error);
3015 			}
3016 			break;
3017 		}
3018 		case IPV6_RTHDRDSTOPTS: {
3019 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3020 
3021 			/*
3022 			 * Sanity checks - minimum size, size a multiple of
3023 			 * eight bytes, and matching size passed in.
3024 			 */
3025 			if (inlen != 0 &&
3026 			    inlen != (8 * (dopts->ip6d_len + 1)))
3027 				return (EINVAL);
3028 
3029 			if (checkonly)
3030 				break;
3031 
3032 			if (inlen == 0) {
3033 				if (sticky &&
3034 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
3035 					kmem_free(ipp->ipp_rtdstopts,
3036 					    ipp->ipp_rtdstoptslen);
3037 					ipp->ipp_rtdstopts = NULL;
3038 					ipp->ipp_rtdstoptslen = 0;
3039 				}
3040 
3041 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
3042 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
3043 			} else {
3044 				error = optcom_pkt_set(invalp, inlen, sticky,
3045 				    (uchar_t **)&ipp->ipp_rtdstopts,
3046 				    &ipp->ipp_rtdstoptslen, 0);
3047 				if (error != 0)
3048 					return (error);
3049 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
3050 			}
3051 			if (sticky) {
3052 				error = udp_build_hdrs(udp);
3053 				if (error != 0)
3054 					return (error);
3055 			}
3056 			break;
3057 		}
3058 		case IPV6_DSTOPTS: {
3059 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3060 
3061 			/*
3062 			 * Sanity checks - minimum size, size a multiple of
3063 			 * eight bytes, and matching size passed in.
3064 			 */
3065 			if (inlen != 0 &&
3066 			    inlen != (8 * (dopts->ip6d_len + 1)))
3067 				return (EINVAL);
3068 
3069 			if (checkonly)
3070 				break;
3071 
3072 			if (inlen == 0) {
3073 				if (sticky &&
3074 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
3075 					kmem_free(ipp->ipp_dstopts,
3076 					    ipp->ipp_dstoptslen);
3077 					ipp->ipp_dstopts = NULL;
3078 					ipp->ipp_dstoptslen = 0;
3079 				}
3080 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
3081 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
3082 			} else {
3083 				error = optcom_pkt_set(invalp, inlen, sticky,
3084 				    (uchar_t **)&ipp->ipp_dstopts,
3085 				    &ipp->ipp_dstoptslen, 0);
3086 				if (error != 0)
3087 					return (error);
3088 				ipp->ipp_fields |= IPPF_DSTOPTS;
3089 			}
3090 			if (sticky) {
3091 				error = udp_build_hdrs(udp);
3092 				if (error != 0)
3093 					return (error);
3094 			}
3095 			break;
3096 		}
3097 		case IPV6_RTHDR: {
3098 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
3099 
3100 			/*
3101 			 * Sanity checks - minimum size, size a multiple of
3102 			 * eight bytes, and matching size passed in.
3103 			 */
3104 			if (inlen != 0 &&
3105 			    inlen != (8 * (rt->ip6r_len + 1)))
3106 				return (EINVAL);
3107 
3108 			if (checkonly)
3109 				break;
3110 
3111 			if (inlen == 0) {
3112 				if (sticky &&
3113 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
3114 					kmem_free(ipp->ipp_rthdr,
3115 					    ipp->ipp_rthdrlen);
3116 					ipp->ipp_rthdr = NULL;
3117 					ipp->ipp_rthdrlen = 0;
3118 				}
3119 				ipp->ipp_fields &= ~IPPF_RTHDR;
3120 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
3121 			} else {
3122 				error = optcom_pkt_set(invalp, inlen, sticky,
3123 				    (uchar_t **)&ipp->ipp_rthdr,
3124 				    &ipp->ipp_rthdrlen, 0);
3125 				if (error != 0)
3126 					return (error);
3127 				ipp->ipp_fields |= IPPF_RTHDR;
3128 			}
3129 			if (sticky) {
3130 				error = udp_build_hdrs(udp);
3131 				if (error != 0)
3132 					return (error);
3133 			}
3134 			break;
3135 		}
3136 
3137 		case IPV6_DONTFRAG:
3138 			if (checkonly)
3139 				break;
3140 
3141 			if (onoff) {
3142 				ipp->ipp_fields |= IPPF_DONTFRAG;
3143 			} else {
3144 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
3145 			}
3146 			break;
3147 
3148 		case IPV6_USE_MIN_MTU:
3149 			if (inlen != sizeof (int))
3150 				return (EINVAL);
3151 
3152 			if (*i1 < -1 || *i1 > 1)
3153 				return (EINVAL);
3154 
3155 			if (checkonly)
3156 				break;
3157 
3158 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
3159 			ipp->ipp_use_min_mtu = *i1;
3160 			break;
3161 
3162 		case IPV6_SEC_OPT:
3163 		case IPV6_SRC_PREFERENCES:
3164 		case IPV6_V6ONLY:
3165 			/* Handled at the IP level */
3166 			return (-EINVAL);
3167 		default:
3168 			*outlenp = 0;
3169 			return (EINVAL);
3170 		}
3171 		break;
3172 		}		/* end IPPROTO_IPV6 */
3173 	case IPPROTO_UDP:
3174 		switch (name) {
3175 		case UDP_ANONPRIVBIND:
3176 			if ((error = secpolicy_net_privaddr(cr, 0,
3177 			    IPPROTO_UDP)) != 0) {
3178 				*outlenp = 0;
3179 				return (error);
3180 			}
3181 			if (!checkonly) {
3182 				udp->udp_anon_priv_bind = onoff;
3183 			}
3184 			break;
3185 		case UDP_EXCLBIND:
3186 			if (!checkonly)
3187 				udp->udp_exclbind = onoff;
3188 			break;
3189 		case UDP_RCVHDR:
3190 			if (!checkonly)
3191 				udp->udp_rcvhdr = onoff;
3192 			break;
3193 		case UDP_NAT_T_ENDPOINT:
3194 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
3195 				*outlenp = 0;
3196 				return (error);
3197 			}
3198 
3199 			/*
3200 			 * Use udp_family instead so we can avoid ambiguitites
3201 			 * with AF_INET6 sockets that may switch from IPv4
3202 			 * to IPv6.
3203 			 */
3204 			if (udp->udp_family != AF_INET) {
3205 				*outlenp = 0;
3206 				return (EAFNOSUPPORT);
3207 			}
3208 
3209 			if (!checkonly) {
3210 				int size;
3211 
3212 				udp->udp_nat_t_endpoint = onoff;
3213 
3214 				udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
3215 				    UDPH_SIZE + udp->udp_ip_snd_options_len;
3216 
3217 				/* Also, adjust wroff */
3218 				if (onoff) {
3219 					udp->udp_max_hdr_len +=
3220 					    sizeof (uint32_t);
3221 				}
3222 				size = udp->udp_max_hdr_len +
3223 				    us->us_wroff_extra;
3224 				(void) proto_set_tx_wroff(connp->conn_rq, connp,
3225 				    size);
3226 			}
3227 			break;
3228 		default:
3229 			*outlenp = 0;
3230 			return (EINVAL);
3231 		}
3232 		break;
3233 	default:
3234 		*outlenp = 0;
3235 		return (EINVAL);
3236 	}
3237 	/*
3238 	 * Common case of OK return with outval same as inval.
3239 	 */
3240 	if (invalp != outvalp) {
3241 		/* don't trust bcopy for identical src/dst */
3242 		(void) bcopy(invalp, outvalp, inlen);
3243 	}
3244 	*outlenp = inlen;
3245 	return (0);
3246 }
3247 
3248 int
3249 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
3250     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3251     void *thisdg_attrs, cred_t *cr)
3252 {
3253 	int		error;
3254 	boolean_t	checkonly;
3255 
3256 	error = 0;
3257 	switch (optset_context) {
3258 	case SETFN_OPTCOM_CHECKONLY:
3259 		checkonly = B_TRUE;
3260 		/*
3261 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
3262 		 * inlen != 0 implies value supplied and
3263 		 * 	we have to "pretend" to set it.
3264 		 * inlen == 0 implies that there is no
3265 		 * 	value part in T_CHECK request and just validation
3266 		 * done elsewhere should be enough, we just return here.
3267 		 */
3268 		if (inlen == 0) {
3269 			*outlenp = 0;
3270 			goto done;
3271 		}
3272 		break;
3273 	case SETFN_OPTCOM_NEGOTIATE:
3274 		checkonly = B_FALSE;
3275 		break;
3276 	case SETFN_UD_NEGOTIATE:
3277 	case SETFN_CONN_NEGOTIATE:
3278 		checkonly = B_FALSE;
3279 		/*
3280 		 * Negotiating local and "association-related" options
3281 		 * through T_UNITDATA_REQ.
3282 		 *
3283 		 * Following routine can filter out ones we do not
3284 		 * want to be "set" this way.
3285 		 */
3286 		if (!udp_opt_allow_udr_set(level, name)) {
3287 			*outlenp = 0;
3288 			error = EINVAL;
3289 			goto done;
3290 		}
3291 		break;
3292 	default:
3293 		/*
3294 		 * We should never get here
3295 		 */
3296 		*outlenp = 0;
3297 		error = EINVAL;
3298 		goto done;
3299 	}
3300 
3301 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
3302 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
3303 
3304 	error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
3305 	    outvalp, cr, thisdg_attrs, checkonly);
3306 done:
3307 	return (error);
3308 }
3309 
3310 /* ARGSUSED */
3311 int
3312 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
3313     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3314     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3315 {
3316 	conn_t  *connp =  Q_TO_CONN(q);
3317 	int error;
3318 	udp_t	*udp = connp->conn_udp;
3319 
3320 	rw_enter(&udp->udp_rwlock, RW_WRITER);
3321 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
3322 	    outlenp, outvalp, thisdg_attrs, cr);
3323 	rw_exit(&udp->udp_rwlock);
3324 	return (error);
3325 }
3326 
3327 /*
3328  * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl.
3329  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
3330  * headers, and the udp header.
3331  * Returns failure if can't allocate memory.
3332  */
3333 static int
3334 udp_build_hdrs(udp_t *udp)
3335 {
3336 	udp_stack_t *us = udp->udp_us;
3337 	uchar_t	*hdrs;
3338 	uint_t	hdrs_len;
3339 	ip6_t	*ip6h;
3340 	ip6i_t	*ip6i;
3341 	udpha_t	*udpha;
3342 	ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
3343 	size_t	sth_wroff;
3344 	conn_t	*connp = udp->udp_connp;
3345 
3346 	ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
3347 	ASSERT(connp != NULL);
3348 
3349 	hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
3350 	ASSERT(hdrs_len != 0);
3351 	if (hdrs_len != udp->udp_sticky_hdrs_len) {
3352 		/* Need to reallocate */
3353 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3354 		if (hdrs == NULL)
3355 			return (ENOMEM);
3356 
3357 		if (udp->udp_sticky_hdrs_len != 0) {
3358 			kmem_free(udp->udp_sticky_hdrs,
3359 			    udp->udp_sticky_hdrs_len);
3360 		}
3361 		udp->udp_sticky_hdrs = hdrs;
3362 		udp->udp_sticky_hdrs_len = hdrs_len;
3363 	}
3364 	ip_build_hdrs_v6(udp->udp_sticky_hdrs,
3365 	    udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP);
3366 
3367 	/* Set header fields not in ipp */
3368 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3369 		ip6i = (ip6i_t *)udp->udp_sticky_hdrs;
3370 		ip6h = (ip6_t *)&ip6i[1];
3371 	} else {
3372 		ip6h = (ip6_t *)udp->udp_sticky_hdrs;
3373 	}
3374 
3375 	if (!(ipp->ipp_fields & IPPF_ADDR))
3376 		ip6h->ip6_src = udp->udp_v6src;
3377 
3378 	udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE);
3379 	udpha->uha_src_port = udp->udp_port;
3380 
3381 	/* Try to get everything in a single mblk */
3382 	if (hdrs_len > udp->udp_max_hdr_len) {
3383 		udp->udp_max_hdr_len = hdrs_len;
3384 		sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
3385 		rw_exit(&udp->udp_rwlock);
3386 		(void) proto_set_tx_wroff(udp->udp_connp->conn_rq,
3387 		    udp->udp_connp, sth_wroff);
3388 		rw_enter(&udp->udp_rwlock, RW_WRITER);
3389 	}
3390 	return (0);
3391 }
3392 
3393 /*
3394  * This routine retrieves the value of an ND variable in a udpparam_t
3395  * structure.  It is called through nd_getset when a user reads the
3396  * variable.
3397  */
3398 /* ARGSUSED */
3399 static int
3400 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3401 {
3402 	udpparam_t *udppa = (udpparam_t *)cp;
3403 
3404 	(void) mi_mpprintf(mp, "%d", udppa->udp_param_value);
3405 	return (0);
3406 }
3407 
3408 /*
3409  * Walk through the param array specified registering each element with the
3410  * named dispatch (ND) handler.
3411  */
3412 static boolean_t
3413 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt)
3414 {
3415 	for (; cnt-- > 0; udppa++) {
3416 		if (udppa->udp_param_name && udppa->udp_param_name[0]) {
3417 			if (!nd_load(ndp, udppa->udp_param_name,
3418 			    udp_param_get, udp_param_set,
3419 			    (caddr_t)udppa)) {
3420 				nd_free(ndp);
3421 				return (B_FALSE);
3422 			}
3423 		}
3424 	}
3425 	if (!nd_load(ndp, "udp_extra_priv_ports",
3426 	    udp_extra_priv_ports_get, NULL, NULL)) {
3427 		nd_free(ndp);
3428 		return (B_FALSE);
3429 	}
3430 	if (!nd_load(ndp, "udp_extra_priv_ports_add",
3431 	    NULL, udp_extra_priv_ports_add, NULL)) {
3432 		nd_free(ndp);
3433 		return (B_FALSE);
3434 	}
3435 	if (!nd_load(ndp, "udp_extra_priv_ports_del",
3436 	    NULL, udp_extra_priv_ports_del, NULL)) {
3437 		nd_free(ndp);
3438 		return (B_FALSE);
3439 	}
3440 	if (!nd_load(ndp, "udp_status", udp_status_report, NULL,
3441 	    NULL)) {
3442 		nd_free(ndp);
3443 		return (B_FALSE);
3444 	}
3445 	if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL,
3446 	    NULL)) {
3447 		nd_free(ndp);
3448 		return (B_FALSE);
3449 	}
3450 	return (B_TRUE);
3451 }
3452 
3453 /* This routine sets an ND variable in a udpparam_t structure. */
3454 /* ARGSUSED */
3455 static int
3456 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3457 {
3458 	long		new_value;
3459 	udpparam_t	*udppa = (udpparam_t *)cp;
3460 
3461 	/*
3462 	 * Fail the request if the new value does not lie within the
3463 	 * required bounds.
3464 	 */
3465 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3466 	    new_value < udppa->udp_param_min ||
3467 	    new_value > udppa->udp_param_max) {
3468 		return (EINVAL);
3469 	}
3470 
3471 	/* Set the new value */
3472 	udppa->udp_param_value = new_value;
3473 	return (0);
3474 }
3475 
3476 /*
3477  * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with
3478  * T_opthdr) and return the number of bytes copied.  'dbuf' may be NULL to
3479  * just count the length needed for allocation.  If 'dbuf' is non-NULL,
3480  * then it's assumed to be allocated to be large enough.
3481  *
3482  * Returns zero if trimming of the security option causes all options to go
3483  * away.
3484  */
3485 static size_t
3486 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf)
3487 {
3488 	struct T_opthdr *toh;
3489 	size_t hol = ipp->ipp_hopoptslen;
3490 	ip6_hbh_t *dstopt = NULL;
3491 	const ip6_hbh_t *srcopt = ipp->ipp_hopopts;
3492 	size_t tlen, olen, plen;
3493 	boolean_t deleting;
3494 	const struct ip6_opt *sopt, *lastpad;
3495 	struct ip6_opt *dopt;
3496 
3497 	if ((toh = (struct T_opthdr *)dbuf) != NULL) {
3498 		toh->level = IPPROTO_IPV6;
3499 		toh->name = IPV6_HOPOPTS;
3500 		toh->status = 0;
3501 		dstopt = (ip6_hbh_t *)(toh + 1);
3502 	}
3503 
3504 	/*
3505 	 * If labeling is enabled, then skip the label option
3506 	 * but get other options if there are any.
3507 	 */
3508 	if (is_system_labeled()) {
3509 		dopt = NULL;
3510 		if (dstopt != NULL) {
3511 			/* will fill in ip6h_len later */
3512 			dstopt->ip6h_nxt = srcopt->ip6h_nxt;
3513 			dopt = (struct ip6_opt *)(dstopt + 1);
3514 		}
3515 		sopt = (const struct ip6_opt *)(srcopt + 1);
3516 		hol -= sizeof (*srcopt);
3517 		tlen = sizeof (*dstopt);
3518 		lastpad = NULL;
3519 		deleting = B_FALSE;
3520 		/*
3521 		 * This loop finds the first (lastpad pointer) of any number of
3522 		 * pads that preceeds the security option, then treats the
3523 		 * security option as though it were a pad, and then finds the
3524 		 * next non-pad option (or end of list).
3525 		 *
3526 		 * It then treats the entire block as one big pad.  To preserve
3527 		 * alignment of any options that follow, or just the end of the
3528 		 * list, it computes a minimal new padding size that keeps the
3529 		 * same alignment for the next option.
3530 		 *
3531 		 * If it encounters just a sequence of pads with no security
3532 		 * option, those are copied as-is rather than collapsed.
3533 		 *
3534 		 * Note that to handle the end of list case, the code makes one
3535 		 * loop with 'hol' set to zero.
3536 		 */
3537 		for (;;) {
3538 			if (hol > 0) {
3539 				if (sopt->ip6o_type == IP6OPT_PAD1) {
3540 					if (lastpad == NULL)
3541 						lastpad = sopt;
3542 					sopt = (const struct ip6_opt *)
3543 					    &sopt->ip6o_len;
3544 					hol--;
3545 					continue;
3546 				}
3547 				olen = sopt->ip6o_len + sizeof (*sopt);
3548 				if (olen > hol)
3549 					olen = hol;
3550 				if (sopt->ip6o_type == IP6OPT_PADN ||
3551 				    sopt->ip6o_type == ip6opt_ls) {
3552 					if (sopt->ip6o_type == ip6opt_ls)
3553 						deleting = B_TRUE;
3554 					if (lastpad == NULL)
3555 						lastpad = sopt;
3556 					sopt = (const struct ip6_opt *)
3557 					    ((const char *)sopt + olen);
3558 					hol -= olen;
3559 					continue;
3560 				}
3561 			} else {
3562 				/* if nothing was copied at all, then delete */
3563 				if (tlen == sizeof (*dstopt))
3564 					return (0);
3565 				/* last pass; pick up any trailing padding */
3566 				olen = 0;
3567 			}
3568 			if (deleting) {
3569 				/*
3570 				 * compute aligning effect of deleted material
3571 				 * to reproduce with pad.
3572 				 */
3573 				plen = ((const char *)sopt -
3574 				    (const char *)lastpad) & 7;
3575 				tlen += plen;
3576 				if (dopt != NULL) {
3577 					if (plen == 1) {
3578 						dopt->ip6o_type = IP6OPT_PAD1;
3579 					} else if (plen > 1) {
3580 						plen -= sizeof (*dopt);
3581 						dopt->ip6o_type = IP6OPT_PADN;
3582 						dopt->ip6o_len = plen;
3583 						if (plen > 0)
3584 							bzero(dopt + 1, plen);
3585 					}
3586 					dopt = (struct ip6_opt *)
3587 					    ((char *)dopt + plen);
3588 				}
3589 				deleting = B_FALSE;
3590 				lastpad = NULL;
3591 			}
3592 			/* if there's uncopied padding, then copy that now */
3593 			if (lastpad != NULL) {
3594 				olen += (const char *)sopt -
3595 				    (const char *)lastpad;
3596 				sopt = lastpad;
3597 				lastpad = NULL;
3598 			}
3599 			if (dopt != NULL && olen > 0) {
3600 				bcopy(sopt, dopt, olen);
3601 				dopt = (struct ip6_opt *)((char *)dopt + olen);
3602 			}
3603 			if (hol == 0)
3604 				break;
3605 			tlen += olen;
3606 			sopt = (const struct ip6_opt *)
3607 			    ((const char *)sopt + olen);
3608 			hol -= olen;
3609 		}
3610 		/* go back and patch up the length value, rounded upward */
3611 		if (dstopt != NULL)
3612 			dstopt->ip6h_len = (tlen - 1) >> 3;
3613 	} else {
3614 		tlen = hol;
3615 		if (dstopt != NULL)
3616 			bcopy(srcopt, dstopt, hol);
3617 	}
3618 
3619 	tlen += sizeof (*toh);
3620 	if (toh != NULL)
3621 		toh->len = tlen;
3622 
3623 	return (tlen);
3624 }
3625 
3626 /*
3627  * Update udp_rcv_opt_len from the packet.
3628  * Called when options received, and when no options received but
3629  * udp_ip_recv_opt_len has previously recorded options.
3630  */
3631 static void
3632 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len)
3633 {
3634 	/* Save the options if any */
3635 	if (opt_len > 0) {
3636 		if (opt_len > udp->udp_ip_rcv_options_len) {
3637 			/* Need to allocate larger buffer */
3638 			if (udp->udp_ip_rcv_options_len != 0)
3639 				mi_free((char *)udp->udp_ip_rcv_options);
3640 			udp->udp_ip_rcv_options_len = 0;
3641 			udp->udp_ip_rcv_options =
3642 			    (uchar_t *)mi_alloc(opt_len, BPRI_HI);
3643 			if (udp->udp_ip_rcv_options != NULL)
3644 				udp->udp_ip_rcv_options_len = opt_len;
3645 		}
3646 		if (udp->udp_ip_rcv_options_len != 0) {
3647 			bcopy(opt, udp->udp_ip_rcv_options, opt_len);
3648 			/* Adjust length if we are resusing the space */
3649 			udp->udp_ip_rcv_options_len = opt_len;
3650 		}
3651 	} else if (udp->udp_ip_rcv_options_len != 0) {
3652 		/* Clear out previously recorded options */
3653 		mi_free((char *)udp->udp_ip_rcv_options);
3654 		udp->udp_ip_rcv_options = NULL;
3655 		udp->udp_ip_rcv_options_len = 0;
3656 	}
3657 }
3658 
3659 static void
3660 udp_queue_fallback(udp_t *udp, mblk_t *mp)
3661 {
3662 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
3663 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
3664 		/*
3665 		 * fallback has started but messages have not been moved yet
3666 		 */
3667 		if (udp->udp_fallback_queue_head == NULL) {
3668 			ASSERT(udp->udp_fallback_queue_tail == NULL);
3669 			udp->udp_fallback_queue_head = mp;
3670 			udp->udp_fallback_queue_tail = mp;
3671 		} else {
3672 			ASSERT(udp->udp_fallback_queue_tail != NULL);
3673 			udp->udp_fallback_queue_tail->b_next = mp;
3674 			udp->udp_fallback_queue_tail = mp;
3675 		}
3676 		mutex_exit(&udp->udp_recv_lock);
3677 	} else {
3678 		/*
3679 		 * no more fallbacks possible, ok to drop lock.
3680 		 */
3681 		mutex_exit(&udp->udp_recv_lock);
3682 		putnext(udp->udp_connp->conn_rq, mp);
3683 	}
3684 }
3685 
3686 /* ARGSUSED2 */
3687 static void
3688 udp_input(void *arg1, mblk_t *mp, void *arg2)
3689 {
3690 	conn_t *connp = (conn_t *)arg1;
3691 	struct T_unitdata_ind	*tudi;
3692 	uchar_t			*rptr;		/* Pointer to IP header */
3693 	int			hdr_length;	/* Length of IP+UDP headers */
3694 	int			opt_len;
3695 	int			udi_size;	/* Size of T_unitdata_ind */
3696 	int			mp_len;
3697 	udp_t			*udp;
3698 	udpha_t			*udpha;
3699 	int			ipversion;
3700 	ip6_pkt_t		ipp;
3701 	ip6_t			*ip6h;
3702 	ip6i_t			*ip6i;
3703 	mblk_t			*mp1;
3704 	mblk_t			*options_mp = NULL;
3705 	ip_pktinfo_t		*pinfo = NULL;
3706 	cred_t			*cr = NULL;
3707 	pid_t			cpid;
3708 	uint32_t		udp_ip_rcv_options_len;
3709 	udp_bits_t		udp_bits;
3710 	cred_t			*rcr = connp->conn_cred;
3711 	udp_stack_t *us;
3712 
3713 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
3714 
3715 	udp = connp->conn_udp;
3716 	us = udp->udp_us;
3717 	rptr = mp->b_rptr;
3718 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3719 	ASSERT(OK_32PTR(rptr));
3720 
3721 	/*
3722 	 * IP should have prepended the options data in an M_CTL
3723 	 * Check M_CTL "type" to make sure are not here bcos of
3724 	 * a valid ICMP message
3725 	 */
3726 	if (DB_TYPE(mp) == M_CTL) {
3727 		if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3728 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3729 		    IN_PKTINFO) {
3730 			/*
3731 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3732 			 * has been prepended to the packet by IP. We need to
3733 			 * extract the mblk and adjust the rptr
3734 			 */
3735 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3736 			options_mp = mp;
3737 			mp = mp->b_cont;
3738 			rptr = mp->b_rptr;
3739 			UDP_STAT(us, udp_in_pktinfo);
3740 		} else {
3741 			/*
3742 			 * ICMP messages.
3743 			 */
3744 			udp_icmp_error(connp, mp);
3745 			return;
3746 		}
3747 	}
3748 
3749 	mp_len = msgdsize(mp);
3750 	/*
3751 	 * This is the inbound data path.
3752 	 * First, we check to make sure the IP version number is correct,
3753 	 * and then pull the IP and UDP headers into the first mblk.
3754 	 */
3755 
3756 	/* Initialize regardless if ipversion is IPv4 or IPv6 */
3757 	ipp.ipp_fields = 0;
3758 
3759 	ipversion = IPH_HDR_VERSION(rptr);
3760 
3761 	rw_enter(&udp->udp_rwlock, RW_READER);
3762 	udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len;
3763 	udp_bits = udp->udp_bits;
3764 	rw_exit(&udp->udp_rwlock);
3765 
3766 	switch (ipversion) {
3767 	case IPV4_VERSION:
3768 		ASSERT(MBLKL(mp) >= sizeof (ipha_t));
3769 		ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
3770 		hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
3771 		opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
3772 		if ((opt_len > 0 || udp_ip_rcv_options_len > 0) &&
3773 		    udp->udp_family == AF_INET) {
3774 			/*
3775 			 * Record/update udp_ip_rcv_options with the lock
3776 			 * held. Not needed for AF_INET6 sockets
3777 			 * since they don't support a getsockopt of IP_OPTIONS.
3778 			 */
3779 			rw_enter(&udp->udp_rwlock, RW_WRITER);
3780 			udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH,
3781 			    opt_len);
3782 			rw_exit(&udp->udp_rwlock);
3783 		}
3784 		/* Handle IPV6_RECVPKTINFO even for IPv4 packet. */
3785 		if ((udp->udp_family == AF_INET6) && (pinfo != NULL) &&
3786 		    udp->udp_ip_recvpktinfo) {
3787 			if (pinfo->ip_pkt_flags & IPF_RECVIF) {
3788 				ipp.ipp_fields |= IPPF_IFINDEX;
3789 				ipp.ipp_ifindex = pinfo->ip_pkt_ifindex;
3790 			}
3791 		}
3792 		break;
3793 	case IPV6_VERSION:
3794 		/*
3795 		 * IPv6 packets can only be received by applications
3796 		 * that are prepared to receive IPv6 addresses.
3797 		 * The IP fanout must ensure this.
3798 		 */
3799 		ASSERT(udp->udp_family == AF_INET6);
3800 
3801 		ip6h = (ip6_t *)rptr;
3802 		ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
3803 
3804 		if (ip6h->ip6_nxt != IPPROTO_UDP) {
3805 			uint8_t nexthdrp;
3806 			/* Look for ifindex information */
3807 			if (ip6h->ip6_nxt == IPPROTO_RAW) {
3808 				ip6i = (ip6i_t *)ip6h;
3809 				if ((uchar_t *)&ip6i[1] > mp->b_wptr)
3810 					goto tossit;
3811 
3812 				if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3813 					ASSERT(ip6i->ip6i_ifindex != 0);
3814 					ipp.ipp_fields |= IPPF_IFINDEX;
3815 					ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3816 				}
3817 				rptr = (uchar_t *)&ip6i[1];
3818 				mp->b_rptr = rptr;
3819 				if (rptr == mp->b_wptr) {
3820 					mp1 = mp->b_cont;
3821 					freeb(mp);
3822 					mp = mp1;
3823 					rptr = mp->b_rptr;
3824 				}
3825 				if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE))
3826 					goto tossit;
3827 				ip6h = (ip6_t *)rptr;
3828 				mp_len = msgdsize(mp);
3829 			}
3830 			/*
3831 			 * Find any potentially interesting extension headers
3832 			 * as well as the length of the IPv6 + extension
3833 			 * headers.
3834 			 */
3835 			hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) +
3836 			    UDPH_SIZE;
3837 			ASSERT(nexthdrp == IPPROTO_UDP);
3838 		} else {
3839 			hdr_length = IPV6_HDR_LEN + UDPH_SIZE;
3840 			ip6i = NULL;
3841 		}
3842 		break;
3843 	default:
3844 		ASSERT(0);
3845 	}
3846 
3847 	/*
3848 	 * IP inspected the UDP header thus all of it must be in the mblk.
3849 	 * UDP length check is performed for IPv6 packets and IPv4 packets
3850 	 * to check if the size of the packet as specified
3851 	 * by the header is the same as the physical size of the packet.
3852 	 * FIXME? Didn't IP already check this?
3853 	 */
3854 	udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
3855 	if ((MBLKL(mp) < hdr_length) ||
3856 	    (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) {
3857 		goto tossit;
3858 	}
3859 
3860 
3861 	/* Walk past the headers unless IP_RECVHDR was set. */
3862 	if (!udp_bits.udpb_rcvhdr) {
3863 		mp->b_rptr = rptr + hdr_length;
3864 		mp_len -= hdr_length;
3865 	}
3866 
3867 	/*
3868 	 * This is the inbound data path.  Packets are passed upstream as
3869 	 * T_UNITDATA_IND messages with full IP headers still attached.
3870 	 */
3871 	if (udp->udp_family == AF_INET) {
3872 		sin_t *sin;
3873 
3874 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
3875 
3876 		/*
3877 		 * Normally only send up the source address.
3878 		 * If IP_RECVDSTADDR is set we include the destination IP
3879 		 * address as an option. With IP_RECVOPTS we include all
3880 		 * the IP options.
3881 		 */
3882 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3883 		if (udp_bits.udpb_recvdstaddr) {
3884 			udi_size += sizeof (struct T_opthdr) +
3885 			    sizeof (struct in_addr);
3886 			UDP_STAT(us, udp_in_recvdstaddr);
3887 		}
3888 
3889 		if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) &&
3890 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3891 			udi_size += sizeof (struct T_opthdr) +
3892 			    sizeof (struct in_pktinfo);
3893 			UDP_STAT(us, udp_ip_rcvpktinfo);
3894 		}
3895 
3896 		if ((udp_bits.udpb_recvopts) && opt_len > 0) {
3897 			udi_size += sizeof (struct T_opthdr) + opt_len;
3898 			UDP_STAT(us, udp_in_recvopts);
3899 		}
3900 
3901 		/*
3902 		 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
3903 		 * space accordingly
3904 		 */
3905 		if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
3906 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3907 			udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
3908 			UDP_STAT(us, udp_in_recvif);
3909 		}
3910 
3911 		if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
3912 		    (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
3913 			udi_size += sizeof (struct T_opthdr) +
3914 			    sizeof (struct sockaddr_dl);
3915 			UDP_STAT(us, udp_in_recvslla);
3916 		}
3917 
3918 		if ((udp_bits.udpb_recvucred) &&
3919 		    (cr = DB_CRED(mp)) != NULL) {
3920 			udi_size += sizeof (struct T_opthdr) + ucredsize;
3921 			cpid = DB_CPID(mp);
3922 			UDP_STAT(us, udp_in_recvucred);
3923 		}
3924 
3925 		/*
3926 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3927 		 * buffer. Since gethrestime() expects a pointer aligned
3928 		 * argument, we allocate space necessary for extra
3929 		 * alignment (even though it might not be used).
3930 		 */
3931 		if (udp_bits.udpb_timestamp) {
3932 			udi_size += sizeof (struct T_opthdr) +
3933 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3934 			UDP_STAT(us, udp_in_timestamp);
3935 		}
3936 
3937 		/*
3938 		 * If IP_RECVTTL is set allocate the appropriate sized buffer
3939 		 */
3940 		if (udp_bits.udpb_recvttl) {
3941 			udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
3942 			UDP_STAT(us, udp_in_recvttl);
3943 		}
3944 
3945 		/* Allocate a message block for the T_UNITDATA_IND structure. */
3946 		mp1 = allocb(udi_size, BPRI_MED);
3947 		if (mp1 == NULL) {
3948 			freemsg(mp);
3949 			if (options_mp != NULL)
3950 				freeb(options_mp);
3951 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
3952 			return;
3953 		}
3954 		mp1->b_cont = mp;
3955 		mp = mp1;
3956 		mp->b_datap->db_type = M_PROTO;
3957 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3958 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3959 		tudi->PRIM_type = T_UNITDATA_IND;
3960 		tudi->SRC_length = sizeof (sin_t);
3961 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3962 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
3963 		    sizeof (sin_t);
3964 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3965 		tudi->OPT_length = udi_size;
3966 		sin = (sin_t *)&tudi[1];
3967 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
3968 		sin->sin_port =	udpha->uha_src_port;
3969 		sin->sin_family = udp->udp_family;
3970 		*(uint32_t *)&sin->sin_zero[0] = 0;
3971 		*(uint32_t *)&sin->sin_zero[4] = 0;
3972 
3973 		/*
3974 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
3975 		 * IP_RECVTTL has been set.
3976 		 */
3977 		if (udi_size != 0) {
3978 			/*
3979 			 * Copy in destination address before options to avoid
3980 			 * any padding issues.
3981 			 */
3982 			char *dstopt;
3983 
3984 			dstopt = (char *)&sin[1];
3985 			if (udp_bits.udpb_recvdstaddr) {
3986 				struct T_opthdr *toh;
3987 				ipaddr_t *dstptr;
3988 
3989 				toh = (struct T_opthdr *)dstopt;
3990 				toh->level = IPPROTO_IP;
3991 				toh->name = IP_RECVDSTADDR;
3992 				toh->len = sizeof (struct T_opthdr) +
3993 				    sizeof (ipaddr_t);
3994 				toh->status = 0;
3995 				dstopt += sizeof (struct T_opthdr);
3996 				dstptr = (ipaddr_t *)dstopt;
3997 				*dstptr = ((ipha_t *)rptr)->ipha_dst;
3998 				dstopt += sizeof (ipaddr_t);
3999 				udi_size -= toh->len;
4000 			}
4001 
4002 			if (udp_bits.udpb_recvopts && opt_len > 0) {
4003 				struct T_opthdr *toh;
4004 
4005 				toh = (struct T_opthdr *)dstopt;
4006 				toh->level = IPPROTO_IP;
4007 				toh->name = IP_RECVOPTS;
4008 				toh->len = sizeof (struct T_opthdr) + opt_len;
4009 				toh->status = 0;
4010 				dstopt += sizeof (struct T_opthdr);
4011 				bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt,
4012 				    opt_len);
4013 				dstopt += opt_len;
4014 				udi_size -= toh->len;
4015 			}
4016 
4017 			if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) &&
4018 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
4019 				struct T_opthdr *toh;
4020 				struct in_pktinfo *pktinfop;
4021 
4022 				toh = (struct T_opthdr *)dstopt;
4023 				toh->level = IPPROTO_IP;
4024 				toh->name = IP_PKTINFO;
4025 				toh->len = sizeof (struct T_opthdr) +
4026 				    sizeof (*pktinfop);
4027 				toh->status = 0;
4028 				dstopt += sizeof (struct T_opthdr);
4029 				pktinfop = (struct in_pktinfo *)dstopt;
4030 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
4031 				pktinfop->ipi_spec_dst =
4032 				    pinfo->ip_pkt_match_addr;
4033 				pktinfop->ipi_addr.s_addr =
4034 				    ((ipha_t *)rptr)->ipha_dst;
4035 
4036 				dstopt += sizeof (struct in_pktinfo);
4037 				udi_size -= toh->len;
4038 			}
4039 
4040 			if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
4041 			    (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
4042 
4043 				struct T_opthdr *toh;
4044 				struct sockaddr_dl	*dstptr;
4045 
4046 				toh = (struct T_opthdr *)dstopt;
4047 				toh->level = IPPROTO_IP;
4048 				toh->name = IP_RECVSLLA;
4049 				toh->len = sizeof (struct T_opthdr) +
4050 				    sizeof (struct sockaddr_dl);
4051 				toh->status = 0;
4052 				dstopt += sizeof (struct T_opthdr);
4053 				dstptr = (struct sockaddr_dl *)dstopt;
4054 				bcopy(&pinfo->ip_pkt_slla, dstptr,
4055 				    sizeof (struct sockaddr_dl));
4056 				dstopt += sizeof (struct sockaddr_dl);
4057 				udi_size -= toh->len;
4058 			}
4059 
4060 			if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
4061 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
4062 
4063 				struct T_opthdr *toh;
4064 				uint_t		*dstptr;
4065 
4066 				toh = (struct T_opthdr *)dstopt;
4067 				toh->level = IPPROTO_IP;
4068 				toh->name = IP_RECVIF;
4069 				toh->len = sizeof (struct T_opthdr) +
4070 				    sizeof (uint_t);
4071 				toh->status = 0;
4072 				dstopt += sizeof (struct T_opthdr);
4073 				dstptr = (uint_t *)dstopt;
4074 				*dstptr = pinfo->ip_pkt_ifindex;
4075 				dstopt += sizeof (uint_t);
4076 				udi_size -= toh->len;
4077 			}
4078 
4079 			if (cr != NULL) {
4080 				struct T_opthdr *toh;
4081 
4082 				toh = (struct T_opthdr *)dstopt;
4083 				toh->level = SOL_SOCKET;
4084 				toh->name = SCM_UCRED;
4085 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4086 				toh->status = 0;
4087 				dstopt += sizeof (struct T_opthdr);
4088 				(void) cred2ucred(cr, cpid, dstopt, rcr);
4089 				dstopt += ucredsize;
4090 				udi_size -= toh->len;
4091 			}
4092 
4093 			if (udp_bits.udpb_timestamp) {
4094 				struct	T_opthdr *toh;
4095 
4096 				toh = (struct T_opthdr *)dstopt;
4097 				toh->level = SOL_SOCKET;
4098 				toh->name = SCM_TIMESTAMP;
4099 				toh->len = sizeof (struct T_opthdr) +
4100 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4101 				toh->status = 0;
4102 				dstopt += sizeof (struct T_opthdr);
4103 				/* Align for gethrestime() */
4104 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
4105 				    sizeof (intptr_t));
4106 				gethrestime((timestruc_t *)dstopt);
4107 				dstopt = (char *)toh + toh->len;
4108 				udi_size -= toh->len;
4109 			}
4110 
4111 			/*
4112 			 * CAUTION:
4113 			 * Due to aligment issues
4114 			 * Processing of IP_RECVTTL option
4115 			 * should always be the last. Adding
4116 			 * any option processing after this will
4117 			 * cause alignment panic.
4118 			 */
4119 			if (udp_bits.udpb_recvttl) {
4120 				struct	T_opthdr *toh;
4121 				uint8_t	*dstptr;
4122 
4123 				toh = (struct T_opthdr *)dstopt;
4124 				toh->level = IPPROTO_IP;
4125 				toh->name = IP_RECVTTL;
4126 				toh->len = sizeof (struct T_opthdr) +
4127 				    sizeof (uint8_t);
4128 				toh->status = 0;
4129 				dstopt += sizeof (struct T_opthdr);
4130 				dstptr = (uint8_t *)dstopt;
4131 				*dstptr = ((ipha_t *)rptr)->ipha_ttl;
4132 				dstopt += sizeof (uint8_t);
4133 				udi_size -= toh->len;
4134 			}
4135 
4136 			/* Consumed all of allocated space */
4137 			ASSERT(udi_size == 0);
4138 		}
4139 	} else {
4140 		sin6_t *sin6;
4141 
4142 		/*
4143 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
4144 		 *
4145 		 * Normally we only send up the address. If receiving of any
4146 		 * optional receive side information is enabled, we also send
4147 		 * that up as options.
4148 		 */
4149 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
4150 
4151 		if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
4152 		    IPPF_RTHDR|IPPF_IFINDEX)) {
4153 			if ((udp_bits.udpb_ipv6_recvhopopts) &&
4154 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4155 				size_t hlen;
4156 
4157 				UDP_STAT(us, udp_in_recvhopopts);
4158 				hlen = copy_hop_opts(&ipp, NULL);
4159 				if (hlen == 0)
4160 					ipp.ipp_fields &= ~IPPF_HOPOPTS;
4161 				udi_size += hlen;
4162 			}
4163 			if (((udp_bits.udpb_ipv6_recvdstopts) ||
4164 			    udp_bits.udpb_old_ipv6_recvdstopts) &&
4165 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4166 				udi_size += sizeof (struct T_opthdr) +
4167 				    ipp.ipp_dstoptslen;
4168 				UDP_STAT(us, udp_in_recvdstopts);
4169 			}
4170 			if ((((udp_bits.udpb_ipv6_recvdstopts) &&
4171 			    udp_bits.udpb_ipv6_recvrthdr &&
4172 			    (ipp.ipp_fields & IPPF_RTHDR)) ||
4173 			    (udp_bits.udpb_ipv6_recvrthdrdstopts)) &&
4174 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4175 				udi_size += sizeof (struct T_opthdr) +
4176 				    ipp.ipp_rtdstoptslen;
4177 				UDP_STAT(us, udp_in_recvrtdstopts);
4178 			}
4179 			if ((udp_bits.udpb_ipv6_recvrthdr) &&
4180 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4181 				udi_size += sizeof (struct T_opthdr) +
4182 				    ipp.ipp_rthdrlen;
4183 				UDP_STAT(us, udp_in_recvrthdr);
4184 			}
4185 			if ((udp_bits.udpb_ip_recvpktinfo) &&
4186 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4187 				udi_size += sizeof (struct T_opthdr) +
4188 				    sizeof (struct in6_pktinfo);
4189 				UDP_STAT(us, udp_in_recvpktinfo);
4190 			}
4191 
4192 		}
4193 		if ((udp_bits.udpb_recvucred) &&
4194 		    (cr = DB_CRED(mp)) != NULL) {
4195 			udi_size += sizeof (struct T_opthdr) + ucredsize;
4196 			cpid = DB_CPID(mp);
4197 			UDP_STAT(us, udp_in_recvucred);
4198 		}
4199 
4200 		/*
4201 		 * If SO_TIMESTAMP is set allocate the appropriate sized
4202 		 * buffer. Since gethrestime() expects a pointer aligned
4203 		 * argument, we allocate space necessary for extra
4204 		 * alignment (even though it might not be used).
4205 		 */
4206 		if (udp_bits.udpb_timestamp) {
4207 			udi_size += sizeof (struct T_opthdr) +
4208 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4209 			UDP_STAT(us, udp_in_timestamp);
4210 		}
4211 
4212 		if (udp_bits.udpb_ipv6_recvhoplimit) {
4213 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4214 			UDP_STAT(us, udp_in_recvhoplimit);
4215 		}
4216 
4217 		if (udp_bits.udpb_ipv6_recvtclass) {
4218 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4219 			UDP_STAT(us, udp_in_recvtclass);
4220 		}
4221 
4222 		mp1 = allocb(udi_size, BPRI_MED);
4223 		if (mp1 == NULL) {
4224 			freemsg(mp);
4225 			if (options_mp != NULL)
4226 				freeb(options_mp);
4227 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
4228 			return;
4229 		}
4230 		mp1->b_cont = mp;
4231 		mp = mp1;
4232 		mp->b_datap->db_type = M_PROTO;
4233 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
4234 		mp->b_wptr = (uchar_t *)tudi + udi_size;
4235 		tudi->PRIM_type = T_UNITDATA_IND;
4236 		tudi->SRC_length = sizeof (sin6_t);
4237 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
4238 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
4239 		    sizeof (sin6_t);
4240 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
4241 		tudi->OPT_length = udi_size;
4242 		sin6 = (sin6_t *)&tudi[1];
4243 		if (ipversion == IPV4_VERSION) {
4244 			in6_addr_t v6dst;
4245 
4246 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
4247 			    &sin6->sin6_addr);
4248 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
4249 			    &v6dst);
4250 			sin6->sin6_flowinfo = 0;
4251 			sin6->sin6_scope_id = 0;
4252 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
4253 			    connp->conn_zoneid, us->us_netstack);
4254 		} else {
4255 			sin6->sin6_addr = ip6h->ip6_src;
4256 			/* No sin6_flowinfo per API */
4257 			sin6->sin6_flowinfo = 0;
4258 			/* For link-scope source pass up scope id */
4259 			if ((ipp.ipp_fields & IPPF_IFINDEX) &&
4260 			    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
4261 				sin6->sin6_scope_id = ipp.ipp_ifindex;
4262 			else
4263 				sin6->sin6_scope_id = 0;
4264 			sin6->__sin6_src_id = ip_srcid_find_addr(
4265 			    &ip6h->ip6_dst, connp->conn_zoneid,
4266 			    us->us_netstack);
4267 		}
4268 		sin6->sin6_port = udpha->uha_src_port;
4269 		sin6->sin6_family = udp->udp_family;
4270 
4271 		if (udi_size != 0) {
4272 			uchar_t *dstopt;
4273 
4274 			dstopt = (uchar_t *)&sin6[1];
4275 			if ((udp_bits.udpb_ip_recvpktinfo) &&
4276 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4277 				struct T_opthdr *toh;
4278 				struct in6_pktinfo *pkti;
4279 
4280 				toh = (struct T_opthdr *)dstopt;
4281 				toh->level = IPPROTO_IPV6;
4282 				toh->name = IPV6_PKTINFO;
4283 				toh->len = sizeof (struct T_opthdr) +
4284 				    sizeof (*pkti);
4285 				toh->status = 0;
4286 				dstopt += sizeof (struct T_opthdr);
4287 				pkti = (struct in6_pktinfo *)dstopt;
4288 				if (ipversion == IPV6_VERSION)
4289 					pkti->ipi6_addr = ip6h->ip6_dst;
4290 				else
4291 					IN6_IPADDR_TO_V4MAPPED(
4292 					    ((ipha_t *)rptr)->ipha_dst,
4293 					    &pkti->ipi6_addr);
4294 				pkti->ipi6_ifindex = ipp.ipp_ifindex;
4295 				dstopt += sizeof (*pkti);
4296 				udi_size -= toh->len;
4297 			}
4298 			if (udp_bits.udpb_ipv6_recvhoplimit) {
4299 				struct T_opthdr *toh;
4300 
4301 				toh = (struct T_opthdr *)dstopt;
4302 				toh->level = IPPROTO_IPV6;
4303 				toh->name = IPV6_HOPLIMIT;
4304 				toh->len = sizeof (struct T_opthdr) +
4305 				    sizeof (uint_t);
4306 				toh->status = 0;
4307 				dstopt += sizeof (struct T_opthdr);
4308 				if (ipversion == IPV6_VERSION)
4309 					*(uint_t *)dstopt = ip6h->ip6_hops;
4310 				else
4311 					*(uint_t *)dstopt =
4312 					    ((ipha_t *)rptr)->ipha_ttl;
4313 				dstopt += sizeof (uint_t);
4314 				udi_size -= toh->len;
4315 			}
4316 			if (udp_bits.udpb_ipv6_recvtclass) {
4317 				struct T_opthdr *toh;
4318 
4319 				toh = (struct T_opthdr *)dstopt;
4320 				toh->level = IPPROTO_IPV6;
4321 				toh->name = IPV6_TCLASS;
4322 				toh->len = sizeof (struct T_opthdr) +
4323 				    sizeof (uint_t);
4324 				toh->status = 0;
4325 				dstopt += sizeof (struct T_opthdr);
4326 				if (ipversion == IPV6_VERSION) {
4327 					*(uint_t *)dstopt =
4328 					    IPV6_FLOW_TCLASS(ip6h->ip6_flow);
4329 				} else {
4330 					ipha_t *ipha = (ipha_t *)rptr;
4331 					*(uint_t *)dstopt =
4332 					    ipha->ipha_type_of_service;
4333 				}
4334 				dstopt += sizeof (uint_t);
4335 				udi_size -= toh->len;
4336 			}
4337 			if ((udp_bits.udpb_ipv6_recvhopopts) &&
4338 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4339 				size_t hlen;
4340 
4341 				hlen = copy_hop_opts(&ipp, dstopt);
4342 				dstopt += hlen;
4343 				udi_size -= hlen;
4344 			}
4345 			if ((udp_bits.udpb_ipv6_recvdstopts) &&
4346 			    (udp_bits.udpb_ipv6_recvrthdr) &&
4347 			    (ipp.ipp_fields & IPPF_RTHDR) &&
4348 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4349 				struct T_opthdr *toh;
4350 
4351 				toh = (struct T_opthdr *)dstopt;
4352 				toh->level = IPPROTO_IPV6;
4353 				toh->name = IPV6_DSTOPTS;
4354 				toh->len = sizeof (struct T_opthdr) +
4355 				    ipp.ipp_rtdstoptslen;
4356 				toh->status = 0;
4357 				dstopt += sizeof (struct T_opthdr);
4358 				bcopy(ipp.ipp_rtdstopts, dstopt,
4359 				    ipp.ipp_rtdstoptslen);
4360 				dstopt += ipp.ipp_rtdstoptslen;
4361 				udi_size -= toh->len;
4362 			}
4363 			if ((udp_bits.udpb_ipv6_recvrthdr) &&
4364 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4365 				struct T_opthdr *toh;
4366 
4367 				toh = (struct T_opthdr *)dstopt;
4368 				toh->level = IPPROTO_IPV6;
4369 				toh->name = IPV6_RTHDR;
4370 				toh->len = sizeof (struct T_opthdr) +
4371 				    ipp.ipp_rthdrlen;
4372 				toh->status = 0;
4373 				dstopt += sizeof (struct T_opthdr);
4374 				bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
4375 				dstopt += ipp.ipp_rthdrlen;
4376 				udi_size -= toh->len;
4377 			}
4378 			if ((udp_bits.udpb_ipv6_recvdstopts) &&
4379 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4380 				struct T_opthdr *toh;
4381 
4382 				toh = (struct T_opthdr *)dstopt;
4383 				toh->level = IPPROTO_IPV6;
4384 				toh->name = IPV6_DSTOPTS;
4385 				toh->len = sizeof (struct T_opthdr) +
4386 				    ipp.ipp_dstoptslen;
4387 				toh->status = 0;
4388 				dstopt += sizeof (struct T_opthdr);
4389 				bcopy(ipp.ipp_dstopts, dstopt,
4390 				    ipp.ipp_dstoptslen);
4391 				dstopt += ipp.ipp_dstoptslen;
4392 				udi_size -= toh->len;
4393 			}
4394 			if (cr != NULL) {
4395 				struct T_opthdr *toh;
4396 
4397 				toh = (struct T_opthdr *)dstopt;
4398 				toh->level = SOL_SOCKET;
4399 				toh->name = SCM_UCRED;
4400 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4401 				toh->status = 0;
4402 				(void) cred2ucred(cr, cpid, &toh[1], rcr);
4403 				dstopt += toh->len;
4404 				udi_size -= toh->len;
4405 			}
4406 			if (udp_bits.udpb_timestamp) {
4407 				struct	T_opthdr *toh;
4408 
4409 				toh = (struct T_opthdr *)dstopt;
4410 				toh->level = SOL_SOCKET;
4411 				toh->name = SCM_TIMESTAMP;
4412 				toh->len = sizeof (struct T_opthdr) +
4413 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4414 				toh->status = 0;
4415 				dstopt += sizeof (struct T_opthdr);
4416 				/* Align for gethrestime() */
4417 				dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
4418 				    sizeof (intptr_t));
4419 				gethrestime((timestruc_t *)dstopt);
4420 				dstopt = (uchar_t *)toh + toh->len;
4421 				udi_size -= toh->len;
4422 			}
4423 
4424 			/* Consumed all of allocated space */
4425 			ASSERT(udi_size == 0);
4426 		}
4427 #undef	sin6
4428 		/* No IP_RECVDSTADDR for IPv6. */
4429 	}
4430 
4431 	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
4432 	if (options_mp != NULL)
4433 		freeb(options_mp);
4434 
4435 	if (IPCL_IS_NONSTR(connp)) {
4436 		int error;
4437 
4438 		if ((*connp->conn_upcalls->su_recv)
4439 		    (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
4440 		    NULL) < 0) {
4441 			mutex_enter(&udp->udp_recv_lock);
4442 			if (error == ENOSPC) {
4443 				/*
4444 				 * let's confirm while holding the lock
4445 				 */
4446 				if ((*connp->conn_upcalls->su_recv)
4447 				    (connp->conn_upper_handle, NULL, 0, 0,
4448 				    &error, NULL) < 0) {
4449 					if (error == ENOSPC) {
4450 						connp->conn_flow_cntrld =
4451 						    B_TRUE;
4452 					} else {
4453 						ASSERT(error == EOPNOTSUPP);
4454 					}
4455 				}
4456 				mutex_exit(&udp->udp_recv_lock);
4457 			} else {
4458 				ASSERT(error == EOPNOTSUPP);
4459 				udp_queue_fallback(udp, mp);
4460 			}
4461 		}
4462 	} else {
4463 		putnext(connp->conn_rq, mp);
4464 	}
4465 	ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
4466 	return;
4467 
4468 tossit:
4469 	freemsg(mp);
4470 	if (options_mp != NULL)
4471 		freeb(options_mp);
4472 	BUMP_MIB(&us->us_udp_mib, udpInErrors);
4473 }
4474 
4475 /*
4476  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
4477  * information that can be changing beneath us.
4478  */
4479 mblk_t *
4480 udp_snmp_get(queue_t *q, mblk_t *mpctl)
4481 {
4482 	mblk_t			*mpdata;
4483 	mblk_t			*mp_conn_ctl;
4484 	mblk_t			*mp_attr_ctl;
4485 	mblk_t			*mp6_conn_ctl;
4486 	mblk_t			*mp6_attr_ctl;
4487 	mblk_t			*mp_conn_tail;
4488 	mblk_t			*mp_attr_tail;
4489 	mblk_t			*mp6_conn_tail;
4490 	mblk_t			*mp6_attr_tail;
4491 	struct opthdr		*optp;
4492 	mib2_udpEntry_t		ude;
4493 	mib2_udp6Entry_t	ude6;
4494 	mib2_transportMLPEntry_t mlp;
4495 	int			state;
4496 	zoneid_t		zoneid;
4497 	int			i;
4498 	connf_t			*connfp;
4499 	conn_t			*connp = Q_TO_CONN(q);
4500 	int			v4_conn_idx;
4501 	int			v6_conn_idx;
4502 	boolean_t		needattr;
4503 	udp_t			*udp;
4504 	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
4505 	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
4506 	mblk_t			*mp2ctl;
4507 
4508 	/*
4509 	 * make a copy of the original message
4510 	 */
4511 	mp2ctl = copymsg(mpctl);
4512 
4513 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
4514 	if (mpctl == NULL ||
4515 	    (mpdata = mpctl->b_cont) == NULL ||
4516 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
4517 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
4518 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
4519 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
4520 		freemsg(mp_conn_ctl);
4521 		freemsg(mp_attr_ctl);
4522 		freemsg(mp6_conn_ctl);
4523 		freemsg(mpctl);
4524 		freemsg(mp2ctl);
4525 		return (0);
4526 	}
4527 
4528 	zoneid = connp->conn_zoneid;
4529 
4530 	/* fixed length structure for IPv4 and IPv6 counters */
4531 	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
4532 	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
4533 	/* synchronize 64- and 32-bit counters */
4534 	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
4535 	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
4536 
4537 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4538 	optp->level = MIB2_UDP;
4539 	optp->name = 0;
4540 	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
4541 	    sizeof (us->us_udp_mib));
4542 	optp->len = msgdsize(mpdata);
4543 	qreply(q, mpctl);
4544 
4545 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
4546 	v4_conn_idx = v6_conn_idx = 0;
4547 
4548 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4549 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4550 		connp = NULL;
4551 
4552 		while ((connp = ipcl_get_next_conn(connfp, connp,
4553 		    IPCL_UDPCONN))) {
4554 			udp = connp->conn_udp;
4555 			if (zoneid != connp->conn_zoneid)
4556 				continue;
4557 
4558 			/*
4559 			 * Note that the port numbers are sent in
4560 			 * host byte order
4561 			 */
4562 
4563 			if (udp->udp_state == TS_UNBND)
4564 				state = MIB2_UDP_unbound;
4565 			else if (udp->udp_state == TS_IDLE)
4566 				state = MIB2_UDP_idle;
4567 			else if (udp->udp_state == TS_DATA_XFER)
4568 				state = MIB2_UDP_connected;
4569 			else
4570 				state = MIB2_UDP_unknown;
4571 
4572 			needattr = B_FALSE;
4573 			bzero(&mlp, sizeof (mlp));
4574 			if (connp->conn_mlp_type != mlptSingle) {
4575 				if (connp->conn_mlp_type == mlptShared ||
4576 				    connp->conn_mlp_type == mlptBoth)
4577 					mlp.tme_flags |= MIB2_TMEF_SHARED;
4578 				if (connp->conn_mlp_type == mlptPrivate ||
4579 				    connp->conn_mlp_type == mlptBoth)
4580 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
4581 				needattr = B_TRUE;
4582 			}
4583 
4584 			/*
4585 			 * Create an IPv4 table entry for IPv4 entries and also
4586 			 * any IPv6 entries which are bound to in6addr_any
4587 			 * (i.e. anything a IPv4 peer could connect/send to).
4588 			 */
4589 			if (udp->udp_ipversion == IPV4_VERSION ||
4590 			    (udp->udp_state <= TS_IDLE &&
4591 			    IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) {
4592 				ude.udpEntryInfo.ue_state = state;
4593 				/*
4594 				 * If in6addr_any this will set it to
4595 				 * INADDR_ANY
4596 				 */
4597 				ude.udpLocalAddress =
4598 				    V4_PART_OF_V6(udp->udp_v6src);
4599 				ude.udpLocalPort = ntohs(udp->udp_port);
4600 				if (udp->udp_state == TS_DATA_XFER) {
4601 					/*
4602 					 * Can potentially get here for
4603 					 * v6 socket if another process
4604 					 * (say, ping) has just done a
4605 					 * sendto(), changing the state
4606 					 * from the TS_IDLE above to
4607 					 * TS_DATA_XFER by the time we hit
4608 					 * this part of the code.
4609 					 */
4610 					ude.udpEntryInfo.ue_RemoteAddress =
4611 					    V4_PART_OF_V6(udp->udp_v6dst);
4612 					ude.udpEntryInfo.ue_RemotePort =
4613 					    ntohs(udp->udp_dstport);
4614 				} else {
4615 					ude.udpEntryInfo.ue_RemoteAddress = 0;
4616 					ude.udpEntryInfo.ue_RemotePort = 0;
4617 				}
4618 
4619 				/*
4620 				 * We make the assumption that all udp_t
4621 				 * structs will be created within an address
4622 				 * region no larger than 32-bits.
4623 				 */
4624 				ude.udpInstance = (uint32_t)(uintptr_t)udp;
4625 				ude.udpCreationProcess =
4626 				    (udp->udp_open_pid < 0) ?
4627 				    MIB2_UNKNOWN_PROCESS :
4628 				    udp->udp_open_pid;
4629 				ude.udpCreationTime = udp->udp_open_time;
4630 
4631 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
4632 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
4633 				mlp.tme_connidx = v4_conn_idx++;
4634 				if (needattr)
4635 					(void) snmp_append_data2(
4636 					    mp_attr_ctl->b_cont, &mp_attr_tail,
4637 					    (char *)&mlp, sizeof (mlp));
4638 			}
4639 			if (udp->udp_ipversion == IPV6_VERSION) {
4640 				ude6.udp6EntryInfo.ue_state  = state;
4641 				ude6.udp6LocalAddress = udp->udp_v6src;
4642 				ude6.udp6LocalPort = ntohs(udp->udp_port);
4643 				ude6.udp6IfIndex = udp->udp_bound_if;
4644 				if (udp->udp_state == TS_DATA_XFER) {
4645 					ude6.udp6EntryInfo.ue_RemoteAddress =
4646 					    udp->udp_v6dst;
4647 					ude6.udp6EntryInfo.ue_RemotePort =
4648 					    ntohs(udp->udp_dstport);
4649 				} else {
4650 					ude6.udp6EntryInfo.ue_RemoteAddress =
4651 					    sin6_null.sin6_addr;
4652 					ude6.udp6EntryInfo.ue_RemotePort = 0;
4653 				}
4654 				/*
4655 				 * We make the assumption that all udp_t
4656 				 * structs will be created within an address
4657 				 * region no larger than 32-bits.
4658 				 */
4659 				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
4660 				ude6.udp6CreationProcess =
4661 				    (udp->udp_open_pid < 0) ?
4662 				    MIB2_UNKNOWN_PROCESS :
4663 				    udp->udp_open_pid;
4664 				ude6.udp6CreationTime = udp->udp_open_time;
4665 
4666 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
4667 				    &mp6_conn_tail, (char *)&ude6,
4668 				    sizeof (ude6));
4669 				mlp.tme_connidx = v6_conn_idx++;
4670 				if (needattr)
4671 					(void) snmp_append_data2(
4672 					    mp6_attr_ctl->b_cont,
4673 					    &mp6_attr_tail, (char *)&mlp,
4674 					    sizeof (mlp));
4675 			}
4676 		}
4677 	}
4678 
4679 	/* IPv4 UDP endpoints */
4680 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
4681 	    sizeof (struct T_optmgmt_ack)];
4682 	optp->level = MIB2_UDP;
4683 	optp->name = MIB2_UDP_ENTRY;
4684 	optp->len = msgdsize(mp_conn_ctl->b_cont);
4685 	qreply(q, mp_conn_ctl);
4686 
4687 	/* table of MLP attributes... */
4688 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
4689 	    sizeof (struct T_optmgmt_ack)];
4690 	optp->level = MIB2_UDP;
4691 	optp->name = EXPER_XPORT_MLP;
4692 	optp->len = msgdsize(mp_attr_ctl->b_cont);
4693 	if (optp->len == 0)
4694 		freemsg(mp_attr_ctl);
4695 	else
4696 		qreply(q, mp_attr_ctl);
4697 
4698 	/* IPv6 UDP endpoints */
4699 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
4700 	    sizeof (struct T_optmgmt_ack)];
4701 	optp->level = MIB2_UDP6;
4702 	optp->name = MIB2_UDP6_ENTRY;
4703 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
4704 	qreply(q, mp6_conn_ctl);
4705 
4706 	/* table of MLP attributes... */
4707 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
4708 	    sizeof (struct T_optmgmt_ack)];
4709 	optp->level = MIB2_UDP6;
4710 	optp->name = EXPER_XPORT_MLP;
4711 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
4712 	if (optp->len == 0)
4713 		freemsg(mp6_attr_ctl);
4714 	else
4715 		qreply(q, mp6_attr_ctl);
4716 
4717 	return (mp2ctl);
4718 }
4719 
4720 /*
4721  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
4722  * NOTE: Per MIB-II, UDP has no writable data.
4723  * TODO:  If this ever actually tries to set anything, it needs to be
4724  * to do the appropriate locking.
4725  */
4726 /* ARGSUSED */
4727 int
4728 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4729     uchar_t *ptr, int len)
4730 {
4731 	switch (level) {
4732 	case MIB2_UDP:
4733 		return (0);
4734 	default:
4735 		return (1);
4736 	}
4737 }
4738 
4739 static void
4740 udp_report_item(mblk_t *mp, udp_t *udp)
4741 {
4742 	char *state;
4743 	char addrbuf1[INET6_ADDRSTRLEN];
4744 	char addrbuf2[INET6_ADDRSTRLEN];
4745 	uint_t print_len, buf_len;
4746 
4747 	buf_len = mp->b_datap->db_lim - mp->b_wptr;
4748 	ASSERT(buf_len >= 0);
4749 	if (buf_len == 0)
4750 		return;
4751 
4752 	if (udp->udp_state == TS_UNBND)
4753 		state = "UNBOUND";
4754 	else if (udp->udp_state == TS_IDLE)
4755 		state = "IDLE";
4756 	else if (udp->udp_state == TS_DATA_XFER)
4757 		state = "CONNECTED";
4758 	else
4759 		state = "UnkState";
4760 	print_len = snprintf((char *)mp->b_wptr, buf_len,
4761 	    MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n",
4762 	    (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port),
4763 	    inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)),
4764 	    inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)),
4765 	    ntohs(udp->udp_dstport), state);
4766 	if (print_len < buf_len) {
4767 		mp->b_wptr += print_len;
4768 	} else {
4769 		mp->b_wptr += buf_len;
4770 	}
4771 }
4772 
4773 /* Report for ndd "udp_status" */
4774 /* ARGSUSED */
4775 static int
4776 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4777 {
4778 	zoneid_t zoneid;
4779 	connf_t	*connfp;
4780 	conn_t	*connp = Q_TO_CONN(q);
4781 	udp_t	*udp = connp->conn_udp;
4782 	int	i;
4783 	udp_stack_t *us = udp->udp_us;
4784 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4785 
4786 	/*
4787 	 * Because of the ndd constraint, at most we can have 64K buffer
4788 	 * to put in all UDP info.  So to be more efficient, just
4789 	 * allocate a 64K buffer here, assuming we need that large buffer.
4790 	 * This may be a problem as any user can read udp_status.  Therefore
4791 	 * we limit the rate of doing this using us_ndd_get_info_interval.
4792 	 * This should be OK as normal users should not do this too often.
4793 	 */
4794 	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
4795 		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
4796 		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
4797 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
4798 			return (0);
4799 		}
4800 	}
4801 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
4802 		/* The following may work even if we cannot get a large buf. */
4803 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
4804 		return (0);
4805 	}
4806 	(void) mi_mpprintf(mp,
4807 	    "UDP     " MI_COL_HDRPAD_STR
4808 	/*   12345678[89ABCDEF] */
4809 	    " zone lport src addr        dest addr       port  state");
4810 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
4811 
4812 	zoneid = connp->conn_zoneid;
4813 
4814 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4815 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4816 		connp = NULL;
4817 
4818 		while ((connp = ipcl_get_next_conn(connfp, connp,
4819 		    IPCL_UDPCONN))) {
4820 			udp = connp->conn_udp;
4821 			if (zoneid != GLOBAL_ZONEID &&
4822 			    zoneid != connp->conn_zoneid)
4823 				continue;
4824 
4825 			udp_report_item(mp->b_cont, udp);
4826 		}
4827 	}
4828 	us->us_last_ndd_get_info_time = ddi_get_lbolt();
4829 	return (0);
4830 }
4831 
4832 /*
4833  * This routine creates a T_UDERROR_IND message and passes it upstream.
4834  * The address and options are copied from the T_UNITDATA_REQ message
4835  * passed in mp.  This message is freed.
4836  */
4837 static void
4838 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen,
4839     t_scalar_t err)
4840 {
4841 	struct T_unitdata_req *tudr;
4842 	mblk_t	*mp1;
4843 	uchar_t	*optaddr;
4844 	t_scalar_t optlen;
4845 
4846 	if (DB_TYPE(mp) == M_DATA) {
4847 		ASSERT(destaddr != NULL && destlen != 0);
4848 		optaddr = NULL;
4849 		optlen = 0;
4850 	} else {
4851 		if ((mp->b_wptr < mp->b_rptr) ||
4852 		    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
4853 			goto done;
4854 		}
4855 		tudr = (struct T_unitdata_req *)mp->b_rptr;
4856 		destaddr = mp->b_rptr + tudr->DEST_offset;
4857 		if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
4858 		    destaddr + tudr->DEST_length < mp->b_rptr ||
4859 		    destaddr + tudr->DEST_length > mp->b_wptr) {
4860 			goto done;
4861 		}
4862 		optaddr = mp->b_rptr + tudr->OPT_offset;
4863 		if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
4864 		    optaddr + tudr->OPT_length < mp->b_rptr ||
4865 		    optaddr + tudr->OPT_length > mp->b_wptr) {
4866 			goto done;
4867 		}
4868 		destlen = tudr->DEST_length;
4869 		optlen = tudr->OPT_length;
4870 	}
4871 
4872 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
4873 	    (char *)optaddr, optlen, err);
4874 	if (mp1 != NULL)
4875 		qreply(q, mp1);
4876 
4877 done:
4878 	freemsg(mp);
4879 }
4880 
4881 /*
4882  * This routine removes a port number association from a stream.  It
4883  * is called by udp_wput to handle T_UNBIND_REQ messages.
4884  */
4885 static void
4886 udp_tpi_unbind(queue_t *q, mblk_t *mp)
4887 {
4888 	conn_t	*connp = Q_TO_CONN(q);
4889 	int	error;
4890 
4891 	error = udp_do_unbind(connp);
4892 	if (error) {
4893 		if (error < 0)
4894 			udp_err_ack(q, mp, -error, 0);
4895 		else
4896 			udp_err_ack(q, mp, TSYSERR, error);
4897 		return;
4898 	}
4899 
4900 	mp = mi_tpi_ok_ack_alloc(mp);
4901 	ASSERT(mp != NULL);
4902 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4903 	qreply(q, mp);
4904 }
4905 
4906 /*
4907  * Don't let port fall into the privileged range.
4908  * Since the extra privileged ports can be arbitrary we also
4909  * ensure that we exclude those from consideration.
4910  * us->us_epriv_ports is not sorted thus we loop over it until
4911  * there are no changes.
4912  */
4913 static in_port_t
4914 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
4915 {
4916 	int i;
4917 	in_port_t nextport;
4918 	boolean_t restart = B_FALSE;
4919 	udp_stack_t *us = udp->udp_us;
4920 
4921 	if (random && udp_random_anon_port != 0) {
4922 		(void) random_get_pseudo_bytes((uint8_t *)&port,
4923 		    sizeof (in_port_t));
4924 		/*
4925 		 * Unless changed by a sys admin, the smallest anon port
4926 		 * is 32768 and the largest anon port is 65535.  It is
4927 		 * very likely (50%) for the random port to be smaller
4928 		 * than the smallest anon port.  When that happens,
4929 		 * add port % (anon port range) to the smallest anon
4930 		 * port to get the random port.  It should fall into the
4931 		 * valid anon port range.
4932 		 */
4933 		if (port < us->us_smallest_anon_port) {
4934 			port = us->us_smallest_anon_port +
4935 			    port % (us->us_largest_anon_port -
4936 			    us->us_smallest_anon_port);
4937 		}
4938 	}
4939 
4940 retry:
4941 	if (port < us->us_smallest_anon_port)
4942 		port = us->us_smallest_anon_port;
4943 
4944 	if (port > us->us_largest_anon_port) {
4945 		port = us->us_smallest_anon_port;
4946 		if (restart)
4947 			return (0);
4948 		restart = B_TRUE;
4949 	}
4950 
4951 	if (port < us->us_smallest_nonpriv_port)
4952 		port = us->us_smallest_nonpriv_port;
4953 
4954 	for (i = 0; i < us->us_num_epriv_ports; i++) {
4955 		if (port == us->us_epriv_ports[i]) {
4956 			port++;
4957 			/*
4958 			 * Make sure that the port is in the
4959 			 * valid range.
4960 			 */
4961 			goto retry;
4962 		}
4963 	}
4964 
4965 	if (is_system_labeled() &&
4966 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
4967 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
4968 		port = nextport;
4969 		goto retry;
4970 	}
4971 
4972 	return (port);
4973 }
4974 
4975 static int
4976 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst,
4977     boolean_t *update_lastdst)
4978 {
4979 	int err;
4980 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
4981 	udp_t *udp = Q_TO_UDP(wq);
4982 	udp_stack_t	*us = udp->udp_us;
4983 
4984 	err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst,
4985 	    opt_storage, udp->udp_connp->conn_mac_exempt,
4986 	    us->us_netstack->netstack_ip);
4987 	if (err == 0) {
4988 		err = tsol_update_options(&udp->udp_ip_snd_options,
4989 		    &udp->udp_ip_snd_options_len, &udp->udp_label_len,
4990 		    opt_storage);
4991 	}
4992 	if (err != 0) {
4993 		DTRACE_PROBE4(
4994 		    tx__ip__log__info__updatelabel__udp,
4995 		    char *, "queue(1) failed to update options(2) on mp(3)",
4996 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
4997 	} else {
4998 		*update_lastdst = B_TRUE;
4999 	}
5000 	return (err);
5001 }
5002 
5003 static mblk_t *
5004 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
5005     uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg,
5006     cred_t *cr, pid_t pid)
5007 {
5008 	udp_t		*udp = connp->conn_udp;
5009 	mblk_t		*mp1 = mp;
5010 	mblk_t		*mp2;
5011 	ipha_t		*ipha;
5012 	int		ip_hdr_length;
5013 	uint32_t 	ip_len;
5014 	udpha_t		*udpha;
5015 	boolean_t 	lock_held = B_FALSE;
5016 	in_port_t	uha_src_port;
5017 	udpattrs_t	attrs;
5018 	uchar_t		ip_snd_opt[IP_MAX_OPT_LENGTH];
5019 	uint32_t	ip_snd_opt_len = 0;
5020 	ip4_pkt_t  	pktinfo;
5021 	ip4_pkt_t  	*pktinfop = &pktinfo;
5022 	ip_opt_info_t	optinfo;
5023 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5024 	udp_stack_t	*us = udp->udp_us;
5025 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
5026 	queue_t		*q = connp->conn_wq;
5027 	ire_t		*ire;
5028 	in6_addr_t	v6dst;
5029 	boolean_t	update_lastdst = B_FALSE;
5030 
5031 	*error = 0;
5032 	pktinfop->ip4_ill_index = 0;
5033 	pktinfop->ip4_addr = INADDR_ANY;
5034 	optinfo.ip_opt_flags = 0;
5035 	optinfo.ip_opt_ill_index = 0;
5036 
5037 	if (v4dst == INADDR_ANY)
5038 		v4dst = htonl(INADDR_LOOPBACK);
5039 
5040 	/*
5041 	 * If options passed in, feed it for verification and handling
5042 	 */
5043 	attrs.udpattr_credset = B_FALSE;
5044 	if (IPCL_IS_NONSTR(connp)) {
5045 		if (msg->msg_controllen != 0) {
5046 			attrs.udpattr_ipp4 = pktinfop;
5047 			attrs.udpattr_mb = mp;
5048 
5049 			rw_enter(&udp->udp_rwlock, RW_WRITER);
5050 			*error = process_auxiliary_options(connp,
5051 			    msg->msg_control, msg->msg_controllen,
5052 			    &attrs, &udp_opt_obj, udp_opt_set);
5053 			rw_exit(&udp->udp_rwlock);
5054 			if (*error)
5055 				goto done;
5056 		}
5057 	} else {
5058 		if (DB_TYPE(mp) != M_DATA) {
5059 			mp1 = mp->b_cont;
5060 			if (((struct T_unitdata_req *)
5061 			    mp->b_rptr)->OPT_length != 0) {
5062 				attrs.udpattr_ipp4 = pktinfop;
5063 				attrs.udpattr_mb = mp;
5064 				if (udp_unitdata_opt_process(q, mp, error,
5065 				    &attrs) < 0)
5066 					goto done;
5067 				/*
5068 				 * Note: success in processing options.
5069 				 * mp option buffer represented by
5070 				 * OPT_length/offset now potentially modified
5071 				 * and contain option setting results
5072 				 */
5073 				ASSERT(*error == 0);
5074 			}
5075 		}
5076 	}
5077 
5078 	/* mp1 points to the M_DATA mblk carrying the packet */
5079 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
5080 
5081 	/*
5082 	 * Determine whether we need to mark the mblk with the user's
5083 	 * credentials.
5084 	 */
5085 	ire = connp->conn_ire_cache;
5086 	if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) ||
5087 	    (ire->ire_addr != v4dst) ||
5088 	    (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) {
5089 		if (cr != NULL && DB_CRED(mp) == NULL)
5090 			msg_setcredpid(mp, cr, pid);
5091 	}
5092 
5093 	rw_enter(&udp->udp_rwlock, RW_READER);
5094 	lock_held = B_TRUE;
5095 
5096 	/*
5097 	 * Cluster and TSOL note:
5098 	 *    udp.udp_v6lastdst		is shared by Cluster and TSOL
5099 	 *    udp.udp_lastdstport	is used by Cluster
5100 	 *
5101 	 * Both Cluster and TSOL need to update the dest addr and/or port.
5102 	 * Updating is done after both Cluster and TSOL checks, protected
5103 	 * by conn_lock.
5104 	 */
5105 	mutex_enter(&connp->conn_lock);
5106 
5107 	if (cl_inet_connect2 != NULL &&
5108 	    (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
5109 	    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
5110 	    udp->udp_lastdstport != port)) {
5111 		mutex_exit(&connp->conn_lock);
5112 		*error = 0;
5113 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5114 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error);
5115 		if (*error != 0) {
5116 			*error = EHOSTUNREACH;
5117 			goto done;
5118 		}
5119 		update_lastdst = B_TRUE;
5120 		mutex_enter(&connp->conn_lock);
5121 	}
5122 
5123 	/*
5124 	 * Check if our saved options are valid; update if not.
5125 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
5126 	 * to different destination may require different labels,
5127 	 * or worse, UDP packets to same IP address may require
5128 	 * different labels due to use of shared all-zones address.
5129 	 * We use conn_lock to ensure that lastdst, ip_snd_options,
5130 	 * and ip_snd_options_len are consistent for the current
5131 	 * destination and are updated atomically.
5132 	 */
5133 	if (is_system_labeled()) {
5134 		/* Using UDP MLP requires SCM_UCRED from user */
5135 		if (connp->conn_mlp_type != mlptSingle &&
5136 		    !attrs.udpattr_credset) {
5137 			mutex_exit(&connp->conn_lock);
5138 			DTRACE_PROBE4(
5139 			    tx__ip__log__info__output__udp,
5140 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
5141 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
5142 			*error = ECONNREFUSED;
5143 			goto done;
5144 		}
5145 		/*
5146 		 * update label option for this UDP socket if
5147 		 * - the destination has changed, or
5148 		 * - the UDP socket is MLP
5149 		 */
5150 		if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
5151 		    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
5152 		    connp->conn_mlp_type != mlptSingle) &&
5153 		    (*error = udp_update_label(q, mp, v4dst, &update_lastdst))
5154 		    != 0) {
5155 			mutex_exit(&connp->conn_lock);
5156 			goto done;
5157 		}
5158 	}
5159 	if (update_lastdst) {
5160 		IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst);
5161 		udp->udp_lastdstport = port;
5162 	}
5163 	if (udp->udp_ip_snd_options_len > 0) {
5164 		ip_snd_opt_len = udp->udp_ip_snd_options_len;
5165 		bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len);
5166 	}
5167 	mutex_exit(&connp->conn_lock);
5168 
5169 	/* Add an IP header */
5170 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len +
5171 	    (insert_spi ? sizeof (uint32_t) : 0);
5172 	ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
5173 	if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) ||
5174 	    !OK_32PTR(ipha)) {
5175 		mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO);
5176 		if (mp2 == NULL) {
5177 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5178 			    "udp_wput_end: q %p (%S)", q, "allocbfail2");
5179 			*error = ENOMEM;
5180 			goto done;
5181 		}
5182 		mp2->b_wptr = DB_LIM(mp2);
5183 		mp2->b_cont = mp1;
5184 		mp1 = mp2;
5185 		if (DB_TYPE(mp) != M_DATA)
5186 			mp->b_cont = mp1;
5187 		else
5188 			mp = mp1;
5189 
5190 		ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length);
5191 	}
5192 	ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0));
5193 #ifdef	_BIG_ENDIAN
5194 	/* Set version, header length, and tos */
5195 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
5196 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
5197 	    udp->udp_type_of_service);
5198 	/* Set ttl and protocol */
5199 	*(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP;
5200 #else
5201 	/* Set version, header length, and tos */
5202 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
5203 	    ((udp->udp_type_of_service << 8) |
5204 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
5205 	/* Set ttl and protocol */
5206 	*(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl;
5207 #endif
5208 	if (pktinfop->ip4_addr != INADDR_ANY) {
5209 		ipha->ipha_src = pktinfop->ip4_addr;
5210 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
5211 	} else {
5212 		/*
5213 		 * Copy our address into the packet.  If this is zero,
5214 		 * first look at __sin6_src_id for a hint. If we leave the
5215 		 * source as INADDR_ANY then ip will fill in the real source
5216 		 * address.
5217 		 */
5218 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src);
5219 		if (srcid != 0 && ipha->ipha_src == INADDR_ANY) {
5220 			in6_addr_t v6src;
5221 
5222 			ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid,
5223 			    us->us_netstack);
5224 			IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
5225 		}
5226 	}
5227 	uha_src_port = udp->udp_port;
5228 	if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) {
5229 		rw_exit(&udp->udp_rwlock);
5230 		lock_held = B_FALSE;
5231 	}
5232 
5233 	if (pktinfop->ip4_ill_index != 0) {
5234 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
5235 	}
5236 
5237 	ipha->ipha_fragment_offset_and_flags = 0;
5238 	ipha->ipha_ident = 0;
5239 
5240 	mp1->b_rptr = (uchar_t *)ipha;
5241 
5242 	ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <=
5243 	    (uintptr_t)UINT_MAX);
5244 
5245 	/* Determine length of packet */
5246 	ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha);
5247 	if ((mp2 = mp1->b_cont) != NULL) {
5248 		do {
5249 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
5250 			ip_len += (uint32_t)MBLKL(mp2);
5251 		} while ((mp2 = mp2->b_cont) != NULL);
5252 	}
5253 	/*
5254 	 * If the size of the packet is greater than the maximum allowed by
5255 	 * ip, return an error. Passing this down could cause panics because
5256 	 * the size will have wrapped and be inconsistent with the msg size.
5257 	 */
5258 	if (ip_len > IP_MAXPACKET) {
5259 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5260 		    "udp_wput_end: q %p (%S)", q, "IP length exceeded");
5261 		*error = EMSGSIZE;
5262 		goto done;
5263 	}
5264 	ipha->ipha_length = htons((uint16_t)ip_len);
5265 	ip_len -= ip_hdr_length;
5266 	ip_len = htons((uint16_t)ip_len);
5267 	udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length);
5268 
5269 	/* Insert all-0s SPI now. */
5270 	if (insert_spi)
5271 		*((uint32_t *)(udpha + 1)) = 0;
5272 
5273 	/*
5274 	 * Copy in the destination address
5275 	 */
5276 	ipha->ipha_dst = v4dst;
5277 
5278 	/*
5279 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
5280 	 */
5281 	if (CLASSD(v4dst))
5282 		ipha->ipha_ttl = udp->udp_multicast_ttl;
5283 
5284 	udpha->uha_dst_port = port;
5285 	udpha->uha_src_port = uha_src_port;
5286 
5287 	if (ip_snd_opt_len > 0) {
5288 		uint32_t	cksum;
5289 
5290 		bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len);
5291 		lock_held = B_FALSE;
5292 		rw_exit(&udp->udp_rwlock);
5293 		/*
5294 		 * Massage source route putting first source route in ipha_dst.
5295 		 * Ignore the destination in T_unitdata_req.
5296 		 * Create a checksum adjustment for a source route, if any.
5297 		 */
5298 		cksum = ip_massage_options(ipha, us->us_netstack);
5299 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5300 		cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) +
5301 		    (ipha->ipha_dst & 0xFFFF);
5302 		if ((int)cksum < 0)
5303 			cksum--;
5304 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5305 		/*
5306 		 * IP does the checksum if uha_checksum is non-zero,
5307 		 * We make it easy for IP to include our pseudo header
5308 		 * by putting our length in uha_checksum.
5309 		 */
5310 		cksum += ip_len;
5311 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5312 		/* There might be a carry. */
5313 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5314 #ifdef _LITTLE_ENDIAN
5315 		if (us->us_do_checksum)
5316 			ip_len = (cksum << 16) | ip_len;
5317 #else
5318 		if (us->us_do_checksum)
5319 			ip_len = (ip_len << 16) | cksum;
5320 		else
5321 			ip_len <<= 16;
5322 #endif
5323 	} else {
5324 		/*
5325 		 * IP does the checksum if uha_checksum is non-zero,
5326 		 * We make it easy for IP to include our pseudo header
5327 		 * by putting our length in uha_checksum.
5328 		 */
5329 		if (us->us_do_checksum)
5330 			ip_len |= (ip_len << 16);
5331 #ifndef _LITTLE_ENDIAN
5332 		else
5333 			ip_len <<= 16;
5334 #endif
5335 	}
5336 	ASSERT(!lock_held);
5337 	/* Set UDP length and checksum */
5338 	*((uint32_t *)&udpha->uha_length) = ip_len;
5339 	if (DB_CRED(mp) != NULL)
5340 		mblk_setcred(mp1, DB_CRED(mp));
5341 
5342 	if (DB_TYPE(mp) != M_DATA) {
5343 		ASSERT(mp != mp1);
5344 		freeb(mp);
5345 	}
5346 
5347 	/* mp has been consumed and we'll return success */
5348 	ASSERT(*error == 0);
5349 	mp = NULL;
5350 
5351 	/* We're done.  Pass the packet to ip. */
5352 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
5353 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5354 	    "udp_wput_end: q %p (%S)", q, "end");
5355 
5356 	if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 ||
5357 	    CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) ||
5358 	    connp->conn_dontroute ||
5359 	    connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 ||
5360 	    optinfo.ip_opt_ill_index != 0 ||
5361 	    ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION ||
5362 	    IPP_ENABLED(IPP_LOCAL_OUT, ipst) ||
5363 	    ipst->ips_ip_g_mrouter != NULL) {
5364 		UDP_STAT(us, udp_ip_send);
5365 		ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT,
5366 		    &optinfo);
5367 	} else {
5368 		udp_send_data(udp, connp->conn_wq, mp1, ipha);
5369 	}
5370 
5371 done:
5372 	if (lock_held)
5373 		rw_exit(&udp->udp_rwlock);
5374 	if (*error != 0) {
5375 		ASSERT(mp != NULL);
5376 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5377 	}
5378 	return (mp);
5379 }
5380 
5381 static void
5382 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
5383 {
5384 	conn_t	*connp = udp->udp_connp;
5385 	ipaddr_t src, dst;
5386 	ire_t	*ire;
5387 	ipif_t	*ipif = NULL;
5388 	mblk_t	*ire_fp_mp;
5389 	boolean_t retry_caching;
5390 	udp_stack_t *us = udp->udp_us;
5391 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5392 
5393 	dst = ipha->ipha_dst;
5394 	src = ipha->ipha_src;
5395 	ASSERT(ipha->ipha_ident == 0);
5396 
5397 	if (CLASSD(dst)) {
5398 		int err;
5399 
5400 		ipif = conn_get_held_ipif(connp,
5401 		    &connp->conn_multicast_ipif, &err);
5402 
5403 		if (ipif == NULL || ipif->ipif_isv6 ||
5404 		    (ipif->ipif_ill->ill_phyint->phyint_flags &
5405 		    PHYI_LOOPBACK)) {
5406 			if (ipif != NULL)
5407 				ipif_refrele(ipif);
5408 			UDP_STAT(us, udp_ip_send);
5409 			ip_output(connp, mp, q, IP_WPUT);
5410 			return;
5411 		}
5412 	}
5413 
5414 	retry_caching = B_FALSE;
5415 	mutex_enter(&connp->conn_lock);
5416 	ire = connp->conn_ire_cache;
5417 	ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
5418 
5419 	if (ire == NULL || ire->ire_addr != dst ||
5420 	    (ire->ire_marks & IRE_MARK_CONDEMNED)) {
5421 		retry_caching = B_TRUE;
5422 	} else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) {
5423 		ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr;
5424 
5425 		ASSERT(ipif != NULL);
5426 		if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill))
5427 			retry_caching = B_TRUE;
5428 	}
5429 
5430 	if (!retry_caching) {
5431 		ASSERT(ire != NULL);
5432 		IRE_REFHOLD(ire);
5433 		mutex_exit(&connp->conn_lock);
5434 	} else {
5435 		boolean_t cached = B_FALSE;
5436 
5437 		connp->conn_ire_cache = NULL;
5438 		mutex_exit(&connp->conn_lock);
5439 
5440 		/* Release the old ire */
5441 		if (ire != NULL) {
5442 			IRE_REFRELE_NOTR(ire);
5443 			ire = NULL;
5444 		}
5445 
5446 		if (CLASSD(dst)) {
5447 			ASSERT(ipif != NULL);
5448 			ire = ire_ctable_lookup(dst, 0, 0, ipif,
5449 			    connp->conn_zoneid, MBLK_GETLABEL(mp),
5450 			    MATCH_IRE_ILL, ipst);
5451 		} else {
5452 			ASSERT(ipif == NULL);
5453 			ire = ire_cache_lookup(dst, connp->conn_zoneid,
5454 			    MBLK_GETLABEL(mp), ipst);
5455 		}
5456 
5457 		if (ire == NULL) {
5458 			if (ipif != NULL)
5459 				ipif_refrele(ipif);
5460 			UDP_STAT(us, udp_ire_null);
5461 			ip_output(connp, mp, q, IP_WPUT);
5462 			return;
5463 		}
5464 		IRE_REFHOLD_NOTR(ire);
5465 
5466 		mutex_enter(&connp->conn_lock);
5467 		if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL &&
5468 		    !(ire->ire_marks & IRE_MARK_CONDEMNED)) {
5469 			irb_t		*irb = ire->ire_bucket;
5470 
5471 			/*
5472 			 * IRE's created for non-connection oriented transports
5473 			 * are normally initialized with IRE_MARK_TEMPORARY set
5474 			 * in the ire_marks. These IRE's are preferentially
5475 			 * reaped when the hash chain length in the cache
5476 			 * bucket exceeds the maximum value specified in
5477 			 * ip[6]_ire_max_bucket_cnt. This can severely affect
5478 			 * UDP performance if IRE cache entries that we need
5479 			 * to reuse are continually removed. To remedy this,
5480 			 * when we cache the IRE in the conn_t, we remove the
5481 			 * IRE_MARK_TEMPORARY bit from the ire_marks if it was
5482 			 * set.
5483 			 */
5484 			if (ire->ire_marks & IRE_MARK_TEMPORARY) {
5485 				rw_enter(&irb->irb_lock, RW_WRITER);
5486 				if (ire->ire_marks & IRE_MARK_TEMPORARY) {
5487 					ire->ire_marks &= ~IRE_MARK_TEMPORARY;
5488 					irb->irb_tmp_ire_cnt--;
5489 				}
5490 				rw_exit(&irb->irb_lock);
5491 			}
5492 			connp->conn_ire_cache = ire;
5493 			cached = B_TRUE;
5494 		}
5495 		mutex_exit(&connp->conn_lock);
5496 
5497 		/*
5498 		 * We can continue to use the ire but since it was not
5499 		 * cached, we should drop the extra reference.
5500 		 */
5501 		if (!cached)
5502 			IRE_REFRELE_NOTR(ire);
5503 	}
5504 	ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION);
5505 	ASSERT(!CLASSD(dst) || ipif != NULL);
5506 
5507 	/*
5508 	 * Check if we can take the fast-path.
5509 	 * Note that "incomplete" ire's (where the link-layer for next hop
5510 	 * is not resolved, or where the fast-path header in nce_fp_mp is not
5511 	 * available yet) are sent down the legacy (slow) path
5512 	 */
5513 	if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) ||
5514 	    (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) ||
5515 	    (ire->ire_max_frag < ntohs(ipha->ipha_length)) ||
5516 	    ((ire->ire_nce == NULL) ||
5517 	    ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) ||
5518 	    connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) {
5519 		if (ipif != NULL)
5520 			ipif_refrele(ipif);
5521 		UDP_STAT(us, udp_ip_ire_send);
5522 		IRE_REFRELE(ire);
5523 		ip_output(connp, mp, q, IP_WPUT);
5524 		return;
5525 	}
5526 
5527 	if (src == INADDR_ANY && !connp->conn_unspec_src) {
5528 		if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
5529 			ipha->ipha_src = ipif->ipif_src_addr;
5530 		else
5531 			ipha->ipha_src = ire->ire_src_addr;
5532 	}
5533 
5534 	if (ipif != NULL)
5535 		ipif_refrele(ipif);
5536 
5537 	udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid);
5538 }
5539 
5540 static void
5541 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
5542 {
5543 	ipaddr_t src, dst;
5544 	ill_t	*ill;
5545 	mblk_t	*ire_fp_mp;
5546 	uint_t	ire_fp_mp_len;
5547 	uint16_t *up;
5548 	uint32_t cksum, hcksum_txflags;
5549 	queue_t	*dev_q;
5550 	udp_t	*udp = connp->conn_udp;
5551 	ipha_t	*ipha = (ipha_t *)mp->b_rptr;
5552 	udp_stack_t	*us = udp->udp_us;
5553 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5554 	boolean_t	ll_multicast = B_FALSE;
5555 
5556 	dev_q = ire->ire_stq->q_next;
5557 	ASSERT(dev_q != NULL);
5558 
5559 	ill = ire_to_ill(ire);
5560 	ASSERT(ill != NULL);
5561 
5562 	/* is queue flow controlled? */
5563 	if (q->q_first != NULL || connp->conn_draining ||
5564 	    DEV_Q_FLOW_BLOCKED(dev_q)) {
5565 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
5566 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
5567 
5568 		if (ipst->ips_ip_output_queue)
5569 			(void) putq(connp->conn_wq, mp);
5570 		else
5571 			freemsg(mp);
5572 		ire_refrele(ire);
5573 		return;
5574 	}
5575 
5576 	ire_fp_mp = ire->ire_nce->nce_fp_mp;
5577 	ire_fp_mp_len = MBLKL(ire_fp_mp);
5578 	ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len);
5579 
5580 	dst = ipha->ipha_dst;
5581 	src = ipha->ipha_src;
5582 
5583 
5584 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
5585 
5586 	ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
5587 #ifndef _BIG_ENDIAN
5588 	ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
5589 #endif
5590 
5591 	if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
5592 		ASSERT(ill->ill_hcksum_capab != NULL);
5593 		hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
5594 	} else {
5595 		hcksum_txflags = 0;
5596 	}
5597 
5598 	/* pseudo-header checksum (do it in parts for IP header checksum) */
5599 	cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
5600 
5601 	ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION);
5602 	up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
5603 	if (*up != 0) {
5604 		IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags,
5605 		    mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH,
5606 		    ntohs(ipha->ipha_length), cksum);
5607 
5608 		/* Software checksum? */
5609 		if (DB_CKSUMFLAGS(mp) == 0) {
5610 			UDP_STAT(us, udp_out_sw_cksum);
5611 			UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes,
5612 			    ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH);
5613 		}
5614 	}
5615 
5616 	if (!CLASSD(dst)) {
5617 		ipha->ipha_fragment_offset_and_flags |=
5618 		    (uint32_t)htons(ire->ire_frag_flag);
5619 	}
5620 
5621 	/* Calculate IP header checksum if hardware isn't capable */
5622 	if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) {
5623 		IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0],
5624 		    ((uint16_t *)ipha)[4]);
5625 	}
5626 
5627 	if (CLASSD(dst)) {
5628 		if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) {
5629 			ip_multicast_loopback(q, ill, mp,
5630 			    connp->conn_multicast_loop ? 0 :
5631 			    IP_FF_NO_MCAST_LOOP, zoneid);
5632 		}
5633 
5634 		/* If multicast TTL is 0 then we are done */
5635 		if (ipha->ipha_ttl == 0) {
5636 			freemsg(mp);
5637 			ire_refrele(ire);
5638 			return;
5639 		}
5640 		ll_multicast = B_TRUE;
5641 	}
5642 
5643 	ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
5644 	mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
5645 	bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
5646 
5647 	UPDATE_OB_PKT_COUNT(ire);
5648 	ire->ire_last_used_time = lbolt;
5649 
5650 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits);
5651 	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
5652 	    ntohs(ipha->ipha_length));
5653 
5654 	DTRACE_PROBE4(ip4__physical__out__start,
5655 	    ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
5656 	FW_HOOKS(ipst->ips_ip4_physical_out_event,
5657 	    ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp,
5658 	    ll_multicast, ipst);
5659 	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
5660 	if (ipst->ips_ipobs_enabled && mp != NULL) {
5661 		zoneid_t szone;
5662 
5663 		szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
5664 		    ipst, ALL_ZONES);
5665 		ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
5666 		    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
5667 	}
5668 
5669 	if (mp != NULL) {
5670 		DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
5671 		    void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill,
5672 		    ipha_t *, ipha, ip6_t *, NULL, int, 0);
5673 
5674 		if (ILL_DIRECT_CAPABLE(ill)) {
5675 			ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct;
5676 
5677 			(void) idd->idd_tx_df(idd->idd_tx_dh, mp,
5678 			    (uintptr_t)connp, 0);
5679 		} else {
5680 			putnext(ire->ire_stq, mp);
5681 		}
5682 	}
5683 	IRE_REFRELE(ire);
5684 }
5685 
5686 static boolean_t
5687 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst,
5688     boolean_t *update_lastdst)
5689 {
5690 	udp_t *udp = Q_TO_UDP(wq);
5691 	int err;
5692 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
5693 	udp_stack_t		*us = udp->udp_us;
5694 
5695 	err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred),
5696 	    dst, opt_storage, udp->udp_connp->conn_mac_exempt,
5697 	    us->us_netstack->netstack_ip);
5698 	if (err == 0) {
5699 		err = tsol_update_sticky(&udp->udp_sticky_ipp,
5700 		    &udp->udp_label_len_v6, opt_storage);
5701 	}
5702 	if (err != 0) {
5703 		DTRACE_PROBE4(
5704 		    tx__ip__log__drop__updatelabel__udp6,
5705 		    char *, "queue(1) failed to update options(2) on mp(3)",
5706 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
5707 	} else {
5708 		*update_lastdst = B_TRUE;
5709 	}
5710 	return (err);
5711 }
5712 
5713 static int
5714 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr,
5715     pid_t pid)
5716 {
5717 	udp_t		*udp = connp->conn_udp;
5718 	udp_stack_t	*us = udp->udp_us;
5719 	ipaddr_t	v4dst;
5720 	in_port_t	dstport;
5721 	boolean_t	mapped_addr;
5722 	struct sockaddr_storage ss;
5723 	sin_t		*sin;
5724 	sin6_t		*sin6;
5725 	struct sockaddr	*addr;
5726 	socklen_t	addrlen;
5727 	int		error;
5728 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
5729 
5730 	/* M_DATA for connected socket */
5731 
5732 	ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp));
5733 	UDP_DBGSTAT(us, udp_data_conn);
5734 
5735 	mutex_enter(&connp->conn_lock);
5736 	if (udp->udp_state != TS_DATA_XFER) {
5737 		mutex_exit(&connp->conn_lock);
5738 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5739 		UDP_STAT(us, udp_out_err_notconn);
5740 		freemsg(mp);
5741 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5742 		    "udp_wput_end: connp %p (%S)", connp,
5743 		    "not-connected; address required");
5744 		return (EDESTADDRREQ);
5745 	}
5746 
5747 	mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst);
5748 	if (mapped_addr)
5749 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
5750 
5751 	/* Initialize addr and addrlen as if they're passed in */
5752 	if (udp->udp_family == AF_INET) {
5753 		sin = (sin_t *)&ss;
5754 		sin->sin_family = AF_INET;
5755 		dstport = sin->sin_port = udp->udp_dstport;
5756 		ASSERT(mapped_addr);
5757 		sin->sin_addr.s_addr = v4dst;
5758 		addr = (struct sockaddr *)sin;
5759 		addrlen = sizeof (*sin);
5760 	} else {
5761 		sin6 = (sin6_t *)&ss;
5762 		sin6->sin6_family = AF_INET6;
5763 		dstport = sin6->sin6_port = udp->udp_dstport;
5764 		sin6->sin6_flowinfo = udp->udp_flowinfo;
5765 		sin6->sin6_addr = udp->udp_v6dst;
5766 		sin6->sin6_scope_id = 0;
5767 		sin6->__sin6_src_id = 0;
5768 		addr = (struct sockaddr *)sin6;
5769 		addrlen = sizeof (*sin6);
5770 	}
5771 	mutex_exit(&connp->conn_lock);
5772 
5773 	if (mapped_addr) {
5774 		/*
5775 		 * Handle both AF_INET and AF_INET6; the latter
5776 		 * for IPV4 mapped destination addresses.  Note
5777 		 * here that both addr and addrlen point to the
5778 		 * corresponding struct depending on the address
5779 		 * family of the socket.
5780 		 */
5781 		mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error,
5782 		    insert_spi, msg, cr, pid);
5783 	} else {
5784 		mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid);
5785 	}
5786 	if (error == 0) {
5787 		ASSERT(mp == NULL);
5788 		return (0);
5789 	}
5790 
5791 	UDP_STAT(us, udp_out_err_output);
5792 	ASSERT(mp != NULL);
5793 	if (IPCL_IS_NONSTR(connp)) {
5794 		freemsg(mp);
5795 		return (error);
5796 	} else {
5797 		/* mp is freed by the following routine */
5798 		udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr,
5799 		    (t_scalar_t)addrlen, (t_scalar_t)error);
5800 		return (0);
5801 	}
5802 }
5803 
5804 /* ARGSUSED */
5805 static int
5806 udp_send_not_connected(conn_t *connp,  mblk_t *mp, struct sockaddr *addr,
5807     socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid)
5808 {
5809 
5810 	udp_t		*udp = connp->conn_udp;
5811 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
5812 	int		error = 0;
5813 	sin6_t		*sin6;
5814 	sin_t		*sin;
5815 	uint_t		srcid;
5816 	uint16_t	port;
5817 	ipaddr_t	v4dst;
5818 
5819 
5820 	ASSERT(addr != NULL);
5821 
5822 	switch (udp->udp_family) {
5823 	case AF_INET6:
5824 		sin6 = (sin6_t *)addr;
5825 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5826 			/*
5827 			 * Destination is a non-IPv4-compatible IPv6 address.
5828 			 * Send out an IPv6 format packet.
5829 			 */
5830 			mp = udp_output_v6(connp, mp, sin6, &error, msg, cr,
5831 			    pid);
5832 			if (error != 0)
5833 				goto ud_error;
5834 
5835 			return (0);
5836 		}
5837 		/*
5838 		 * If the local address is not zero or a mapped address
5839 		 * return an error.  It would be possible to send an IPv4
5840 		 * packet but the response would never make it back to the
5841 		 * application since it is bound to a non-mapped address.
5842 		 */
5843 		if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
5844 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
5845 			error = EADDRNOTAVAIL;
5846 			goto ud_error;
5847 		}
5848 		/* Send IPv4 packet without modifying udp_ipversion */
5849 		/* Extract port and ipaddr */
5850 		port = sin6->sin6_port;
5851 		IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
5852 		srcid = sin6->__sin6_src_id;
5853 		break;
5854 
5855 	case AF_INET:
5856 		sin = (sin_t *)addr;
5857 		/* Extract port and ipaddr */
5858 		port = sin->sin_port;
5859 		v4dst = sin->sin_addr.s_addr;
5860 		srcid = 0;
5861 		break;
5862 	}
5863 
5864 	mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi,
5865 	    msg, cr, pid);
5866 
5867 	if (error == 0) {
5868 		ASSERT(mp == NULL);
5869 		return (0);
5870 	}
5871 
5872 ud_error:
5873 	ASSERT(mp != NULL);
5874 
5875 	return (error);
5876 }
5877 
5878 /*
5879  * This routine handles all messages passed downstream.  It either
5880  * consumes the message or passes it downstream; it never queues a
5881  * a message.
5882  *
5883  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
5884  * is valid when we are directly beneath the stream head, and thus sockfs
5885  * is able to bypass STREAMS and directly call us, passing along the sockaddr
5886  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
5887  * connected endpoints.
5888  */
5889 void
5890 udp_wput(queue_t *q, mblk_t *mp)
5891 {
5892 	conn_t		*connp = Q_TO_CONN(q);
5893 	udp_t		*udp = connp->conn_udp;
5894 	int		error = 0;
5895 	struct sockaddr	*addr;
5896 	socklen_t	addrlen;
5897 	udp_stack_t	*us = udp->udp_us;
5898 
5899 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
5900 	    "udp_wput_start: queue %p mp %p", q, mp);
5901 
5902 	/*
5903 	 * We directly handle several cases here: T_UNITDATA_REQ message
5904 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
5905 	 * socket.
5906 	 */
5907 	switch (DB_TYPE(mp)) {
5908 	case M_DATA:
5909 		/*
5910 		 * Quick check for error cases. Checks will be done again
5911 		 * under the lock later on
5912 		 */
5913 		if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) {
5914 			/* Not connected; address is required */
5915 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5916 			UDP_STAT(us, udp_out_err_notconn);
5917 			freemsg(mp);
5918 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5919 			    "udp_wput_end: connp %p (%S)", connp,
5920 			    "not-connected; address required");
5921 			return;
5922 		}
5923 		(void) udp_send_connected(connp, mp, NULL, NULL, -1);
5924 		return;
5925 
5926 	case M_PROTO:
5927 	case M_PCPROTO: {
5928 		struct T_unitdata_req *tudr;
5929 
5930 		ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX);
5931 		tudr = (struct T_unitdata_req *)mp->b_rptr;
5932 
5933 		/* Handle valid T_UNITDATA_REQ here */
5934 		if (MBLKL(mp) >= sizeof (*tudr) &&
5935 		    ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) {
5936 			if (mp->b_cont == NULL) {
5937 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5938 				    "udp_wput_end: q %p (%S)", q, "badaddr");
5939 				error = EPROTO;
5940 				goto ud_error;
5941 			}
5942 
5943 			if (!MBLKIN(mp, 0, tudr->DEST_offset +
5944 			    tudr->DEST_length)) {
5945 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5946 				    "udp_wput_end: q %p (%S)", q, "badaddr");
5947 				error = EADDRNOTAVAIL;
5948 				goto ud_error;
5949 			}
5950 			/*
5951 			 * If a port has not been bound to the stream, fail.
5952 			 * This is not a problem when sockfs is directly
5953 			 * above us, because it will ensure that the socket
5954 			 * is first bound before allowing data to be sent.
5955 			 */
5956 			if (udp->udp_state == TS_UNBND) {
5957 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5958 				    "udp_wput_end: q %p (%S)", q, "outstate");
5959 				error = EPROTO;
5960 				goto ud_error;
5961 			}
5962 			addr = (struct sockaddr *)
5963 			    &mp->b_rptr[tudr->DEST_offset];
5964 			addrlen = tudr->DEST_length;
5965 			if (tudr->OPT_length != 0)
5966 				UDP_STAT(us, udp_out_opt);
5967 			break;
5968 		}
5969 		/* FALLTHRU */
5970 	}
5971 	default:
5972 		udp_wput_other(q, mp);
5973 		return;
5974 	}
5975 	ASSERT(addr != NULL);
5976 
5977 	error = udp_send_not_connected(connp,  mp, addr, addrlen, NULL, NULL,
5978 	    -1);
5979 	if (error != 0) {
5980 ud_error:
5981 		UDP_STAT(us, udp_out_err_output);
5982 		ASSERT(mp != NULL);
5983 		/* mp is freed by the following routine */
5984 		udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
5985 		    (t_scalar_t)error);
5986 	}
5987 }
5988 
5989 /* ARGSUSED */
5990 static void
5991 udp_wput_fallback(queue_t *wq, mblk_t *mp)
5992 {
5993 #ifdef DEBUG
5994 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
5995 #endif
5996 	freemsg(mp);
5997 }
5998 
5999 
6000 /*
6001  * udp_output_v6():
6002  * Assumes that udp_wput did some sanity checking on the destination
6003  * address.
6004  */
6005 static mblk_t *
6006 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error,
6007     struct nmsghdr *msg, cred_t *cr, pid_t pid)
6008 {
6009 	ip6_t		*ip6h;
6010 	ip6i_t		*ip6i;	/* mp1->b_rptr even if no ip6i_t */
6011 	mblk_t		*mp1 = mp;
6012 	mblk_t		*mp2;
6013 	int		udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
6014 	size_t		ip_len;
6015 	udpha_t		*udph;
6016 	udp_t		*udp = connp->conn_udp;
6017 	udp_stack_t	*us = udp->udp_us;
6018 	queue_t		*q = connp->conn_wq;
6019 	ip6_pkt_t	ipp_s;	/* For ancillary data options */
6020 	ip6_pkt_t	*ipp = &ipp_s;
6021 	ip6_pkt_t	*tipp;	/* temporary ipp */
6022 	uint32_t	csum = 0;
6023 	uint_t		ignore = 0;
6024 	uint_t		option_exists = 0, is_sticky = 0;
6025 	uint8_t		*cp;
6026 	uint8_t		*nxthdr_ptr;
6027 	in6_addr_t	ip6_dst;
6028 	in_port_t	port;
6029 	udpattrs_t	attrs;
6030 	boolean_t	opt_present;
6031 	ip6_hbh_t	*hopoptsptr = NULL;
6032 	uint_t		hopoptslen = 0;
6033 	boolean_t	is_ancillary = B_FALSE;
6034 	size_t		sth_wroff = 0;
6035 	ire_t		*ire;
6036 	boolean_t	update_lastdst = B_FALSE;
6037 
6038 	*error = 0;
6039 
6040 	/*
6041 	 * If the local address is a mapped address return
6042 	 * an error.
6043 	 * It would be possible to send an IPv6 packet but the
6044 	 * response would never make it back to the application
6045 	 * since it is bound to a mapped address.
6046 	 */
6047 	if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) {
6048 		*error = EADDRNOTAVAIL;
6049 		goto done;
6050 	}
6051 
6052 	ipp->ipp_fields = 0;
6053 	ipp->ipp_sticky_ignored = 0;
6054 
6055 	/*
6056 	 * If TPI options passed in, feed it for verification and handling
6057 	 */
6058 	attrs.udpattr_credset = B_FALSE;
6059 	opt_present = B_FALSE;
6060 	if (IPCL_IS_NONSTR(connp)) {
6061 		if (msg->msg_controllen != 0) {
6062 			attrs.udpattr_ipp6 = ipp;
6063 			attrs.udpattr_mb = mp;
6064 
6065 			rw_enter(&udp->udp_rwlock, RW_WRITER);
6066 			*error = process_auxiliary_options(connp,
6067 			    msg->msg_control, msg->msg_controllen,
6068 			    &attrs, &udp_opt_obj, udp_opt_set);
6069 			rw_exit(&udp->udp_rwlock);
6070 			if (*error)
6071 				goto done;
6072 			ASSERT(*error == 0);
6073 			opt_present = B_TRUE;
6074 		}
6075 	} else {
6076 		if (DB_TYPE(mp) != M_DATA) {
6077 			mp1 = mp->b_cont;
6078 			if (((struct T_unitdata_req *)
6079 			    mp->b_rptr)->OPT_length != 0) {
6080 				attrs.udpattr_ipp6 = ipp;
6081 				attrs.udpattr_mb = mp;
6082 				if (udp_unitdata_opt_process(q, mp, error,
6083 				    &attrs) < 0) {
6084 					goto done;
6085 				}
6086 				ASSERT(*error == 0);
6087 				opt_present = B_TRUE;
6088 			}
6089 		}
6090 	}
6091 
6092 	/*
6093 	 * Determine whether we need to mark the mblk with the user's
6094 	 * credentials.
6095 	 */
6096 	ire = connp->conn_ire_cache;
6097 	if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
6098 	    (ire == NULL) ||
6099 	    (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) ||
6100 	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) {
6101 		if (cr != NULL && DB_CRED(mp) == NULL)
6102 			msg_setcredpid(mp, cr, pid);
6103 	}
6104 
6105 	rw_enter(&udp->udp_rwlock, RW_READER);
6106 	ignore = ipp->ipp_sticky_ignored;
6107 
6108 	/* mp1 points to the M_DATA mblk carrying the packet */
6109 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
6110 
6111 	if (sin6->sin6_scope_id != 0 &&
6112 	    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
6113 		/*
6114 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
6115 		 * option nor ancillary data.  It needs to be
6116 		 * explicitly set in options_exists.
6117 		 */
6118 		option_exists |= IPPF_SCOPE_ID;
6119 	}
6120 
6121 	/*
6122 	 * Compute the destination address
6123 	 */
6124 	ip6_dst = sin6->sin6_addr;
6125 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6126 		ip6_dst = ipv6_loopback;
6127 
6128 	port = sin6->sin6_port;
6129 
6130 	/*
6131 	 * Cluster and TSOL notes, Cluster check:
6132 	 * see comments in udp_output_v4().
6133 	 */
6134 	mutex_enter(&connp->conn_lock);
6135 
6136 	if (cl_inet_connect2 != NULL &&
6137 	    (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) ||
6138 	    port != udp->udp_lastdstport)) {
6139 		mutex_exit(&connp->conn_lock);
6140 		*error = 0;
6141 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error);
6142 		if (*error != 0) {
6143 			*error = EHOSTUNREACH;
6144 			rw_exit(&udp->udp_rwlock);
6145 			goto done;
6146 		}
6147 		update_lastdst = B_TRUE;
6148 		mutex_enter(&connp->conn_lock);
6149 	}
6150 
6151 	/*
6152 	 * If we're not going to the same destination as last time, then
6153 	 * recompute the label required.  This is done in a separate routine to
6154 	 * avoid blowing up our stack here.
6155 	 *
6156 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
6157 	 * to different destination may require different labels,
6158 	 * or worse, UDP packets to same IP address may require
6159 	 * different labels due to use of shared all-zones address.
6160 	 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts,
6161 	 * and sticky ipp_hopoptslen are consistent for the current
6162 	 * destination and are updated atomically.
6163 	 */
6164 	if (is_system_labeled()) {
6165 		/* Using UDP MLP requires SCM_UCRED from user */
6166 		if (connp->conn_mlp_type != mlptSingle &&
6167 		    !attrs.udpattr_credset) {
6168 			DTRACE_PROBE4(
6169 			    tx__ip__log__info__output__udp6,
6170 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
6171 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
6172 			*error = ECONNREFUSED;
6173 			rw_exit(&udp->udp_rwlock);
6174 			mutex_exit(&connp->conn_lock);
6175 			goto done;
6176 		}
6177 		/*
6178 		 * update label option for this UDP socket if
6179 		 * - the destination has changed, or
6180 		 * - the UDP socket is MLP
6181 		 */
6182 		if ((opt_present ||
6183 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) ||
6184 		    connp->conn_mlp_type != mlptSingle) &&
6185 		    (*error = udp_update_label_v6(q, mp, &ip6_dst,
6186 		    &update_lastdst)) != 0) {
6187 			rw_exit(&udp->udp_rwlock);
6188 			mutex_exit(&connp->conn_lock);
6189 			goto done;
6190 		}
6191 	}
6192 
6193 	if (update_lastdst) {
6194 		udp->udp_v6lastdst = ip6_dst;
6195 		udp->udp_lastdstport = port;
6196 	}
6197 
6198 	/*
6199 	 * If there's a security label here, then we ignore any options the
6200 	 * user may try to set.  We keep the peer's label as a hidden sticky
6201 	 * option. We make a private copy of this label before releasing the
6202 	 * lock so that label is kept consistent with the destination addr.
6203 	 */
6204 	if (udp->udp_label_len_v6 > 0) {
6205 		ignore &= ~IPPF_HOPOPTS;
6206 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
6207 	}
6208 
6209 	if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) {
6210 		/* No sticky options nor ancillary data. */
6211 		mutex_exit(&connp->conn_lock);
6212 		goto no_options;
6213 	}
6214 
6215 	/*
6216 	 * Go through the options figuring out where each is going to
6217 	 * come from and build two masks.  The first mask indicates if
6218 	 * the option exists at all.  The second mask indicates if the
6219 	 * option is sticky or ancillary.
6220 	 */
6221 	if (!(ignore & IPPF_HOPOPTS)) {
6222 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
6223 			option_exists |= IPPF_HOPOPTS;
6224 			udp_ip_hdr_len += ipp->ipp_hopoptslen;
6225 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
6226 			option_exists |= IPPF_HOPOPTS;
6227 			is_sticky |= IPPF_HOPOPTS;
6228 			ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0);
6229 			hopoptsptr = kmem_alloc(
6230 			    udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP);
6231 			if (hopoptsptr == NULL) {
6232 				*error = ENOMEM;
6233 				mutex_exit(&connp->conn_lock);
6234 				goto done;
6235 			}
6236 			hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen;
6237 			bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr,
6238 			    hopoptslen);
6239 			udp_ip_hdr_len += hopoptslen;
6240 		}
6241 	}
6242 	mutex_exit(&connp->conn_lock);
6243 
6244 	if (!(ignore & IPPF_RTHDR)) {
6245 		if (ipp->ipp_fields & IPPF_RTHDR) {
6246 			option_exists |= IPPF_RTHDR;
6247 			udp_ip_hdr_len += ipp->ipp_rthdrlen;
6248 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
6249 			option_exists |= IPPF_RTHDR;
6250 			is_sticky |= IPPF_RTHDR;
6251 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen;
6252 		}
6253 	}
6254 
6255 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
6256 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
6257 			option_exists |= IPPF_RTDSTOPTS;
6258 			udp_ip_hdr_len += ipp->ipp_rtdstoptslen;
6259 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
6260 			option_exists |= IPPF_RTDSTOPTS;
6261 			is_sticky |= IPPF_RTDSTOPTS;
6262 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen;
6263 		}
6264 	}
6265 
6266 	if (!(ignore & IPPF_DSTOPTS)) {
6267 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
6268 			option_exists |= IPPF_DSTOPTS;
6269 			udp_ip_hdr_len += ipp->ipp_dstoptslen;
6270 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
6271 			option_exists |= IPPF_DSTOPTS;
6272 			is_sticky |= IPPF_DSTOPTS;
6273 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen;
6274 		}
6275 	}
6276 
6277 	if (!(ignore & IPPF_IFINDEX)) {
6278 		if (ipp->ipp_fields & IPPF_IFINDEX) {
6279 			option_exists |= IPPF_IFINDEX;
6280 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
6281 			option_exists |= IPPF_IFINDEX;
6282 			is_sticky |= IPPF_IFINDEX;
6283 		}
6284 	}
6285 
6286 	if (!(ignore & IPPF_ADDR)) {
6287 		if (ipp->ipp_fields & IPPF_ADDR) {
6288 			option_exists |= IPPF_ADDR;
6289 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) {
6290 			option_exists |= IPPF_ADDR;
6291 			is_sticky |= IPPF_ADDR;
6292 		}
6293 	}
6294 
6295 	if (!(ignore & IPPF_DONTFRAG)) {
6296 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
6297 			option_exists |= IPPF_DONTFRAG;
6298 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
6299 			option_exists |= IPPF_DONTFRAG;
6300 			is_sticky |= IPPF_DONTFRAG;
6301 		}
6302 	}
6303 
6304 	if (!(ignore & IPPF_USE_MIN_MTU)) {
6305 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
6306 			option_exists |= IPPF_USE_MIN_MTU;
6307 		} else if (udp->udp_sticky_ipp.ipp_fields &
6308 		    IPPF_USE_MIN_MTU) {
6309 			option_exists |= IPPF_USE_MIN_MTU;
6310 			is_sticky |= IPPF_USE_MIN_MTU;
6311 		}
6312 	}
6313 
6314 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
6315 		option_exists |= IPPF_HOPLIMIT;
6316 	/* IPV6_HOPLIMIT can never be sticky */
6317 	ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
6318 
6319 	if (!(ignore & IPPF_UNICAST_HOPS) &&
6320 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
6321 		option_exists |= IPPF_UNICAST_HOPS;
6322 		is_sticky |= IPPF_UNICAST_HOPS;
6323 	}
6324 
6325 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
6326 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
6327 		option_exists |= IPPF_MULTICAST_HOPS;
6328 		is_sticky |= IPPF_MULTICAST_HOPS;
6329 	}
6330 
6331 	if (!(ignore & IPPF_TCLASS)) {
6332 		if (ipp->ipp_fields & IPPF_TCLASS) {
6333 			option_exists |= IPPF_TCLASS;
6334 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
6335 			option_exists |= IPPF_TCLASS;
6336 			is_sticky |= IPPF_TCLASS;
6337 		}
6338 	}
6339 
6340 	if (!(ignore & IPPF_NEXTHOP) &&
6341 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) {
6342 		option_exists |= IPPF_NEXTHOP;
6343 		is_sticky |= IPPF_NEXTHOP;
6344 	}
6345 
6346 no_options:
6347 
6348 	/*
6349 	 * If any options carried in the ip6i_t were specified, we
6350 	 * need to account for the ip6i_t in the data we'll be sending
6351 	 * down.
6352 	 */
6353 	if (option_exists & IPPF_HAS_IP6I)
6354 		udp_ip_hdr_len += sizeof (ip6i_t);
6355 
6356 	/* check/fix buffer config, setup pointers into it */
6357 	ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len];
6358 	if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) ||
6359 	    !OK_32PTR(ip6h)) {
6360 
6361 		/* Try to get everything in a single mblk next time */
6362 		if (udp_ip_hdr_len > udp->udp_max_hdr_len) {
6363 			udp->udp_max_hdr_len = udp_ip_hdr_len;
6364 			sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
6365 		}
6366 
6367 		mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO);
6368 		if (mp2 == NULL) {
6369 			*error = ENOMEM;
6370 			rw_exit(&udp->udp_rwlock);
6371 			goto done;
6372 		}
6373 		mp2->b_wptr = DB_LIM(mp2);
6374 		mp2->b_cont = mp1;
6375 		mp1 = mp2;
6376 		if (DB_TYPE(mp) != M_DATA)
6377 			mp->b_cont = mp1;
6378 		else
6379 			mp = mp1;
6380 
6381 		ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len);
6382 	}
6383 	mp1->b_rptr = (unsigned char *)ip6h;
6384 	ip6i = (ip6i_t *)ip6h;
6385 
6386 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp)
6387 	if (option_exists & IPPF_HAS_IP6I) {
6388 		ip6h = (ip6_t *)&ip6i[1];
6389 		ip6i->ip6i_flags = 0;
6390 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
6391 
6392 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
6393 		if (option_exists & IPPF_SCOPE_ID) {
6394 			ip6i->ip6i_flags |= IP6I_IFINDEX;
6395 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
6396 		} else if (option_exists & IPPF_IFINDEX) {
6397 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
6398 			ASSERT(tipp->ipp_ifindex != 0);
6399 			ip6i->ip6i_flags |= IP6I_IFINDEX;
6400 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
6401 		}
6402 
6403 		if (option_exists & IPPF_ADDR) {
6404 			/*
6405 			 * Enable per-packet source address verification if
6406 			 * IPV6_PKTINFO specified the source address.
6407 			 * ip6_src is set in the transport's _wput function.
6408 			 */
6409 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
6410 		}
6411 
6412 		if (option_exists & IPPF_DONTFRAG) {
6413 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
6414 		}
6415 
6416 		if (option_exists & IPPF_USE_MIN_MTU) {
6417 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
6418 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
6419 		}
6420 
6421 		if (option_exists & IPPF_NEXTHOP) {
6422 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
6423 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
6424 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
6425 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
6426 		}
6427 
6428 		/*
6429 		 * tell IP this is an ip6i_t private header
6430 		 */
6431 		ip6i->ip6i_nxt = IPPROTO_RAW;
6432 	}
6433 
6434 	/* Initialize IPv6 header */
6435 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
6436 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
6437 
6438 	/* Set the hoplimit of the outgoing packet. */
6439 	if (option_exists & IPPF_HOPLIMIT) {
6440 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
6441 		ip6h->ip6_hops = ipp->ipp_hoplimit;
6442 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6443 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
6444 		ip6h->ip6_hops = udp->udp_multicast_ttl;
6445 		if (option_exists & IPPF_MULTICAST_HOPS)
6446 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6447 	} else {
6448 		ip6h->ip6_hops = udp->udp_ttl;
6449 		if (option_exists & IPPF_UNICAST_HOPS)
6450 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6451 	}
6452 
6453 	if (option_exists & IPPF_ADDR) {
6454 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
6455 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
6456 		ip6h->ip6_src = tipp->ipp_addr;
6457 	} else {
6458 		/*
6459 		 * The source address was not set using IPV6_PKTINFO.
6460 		 * First look at the bound source.
6461 		 * If unspecified fallback to __sin6_src_id.
6462 		 */
6463 		ip6h->ip6_src = udp->udp_v6src;
6464 		if (sin6->__sin6_src_id != 0 &&
6465 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
6466 			ip_srcid_find_id(sin6->__sin6_src_id,
6467 			    &ip6h->ip6_src, connp->conn_zoneid,
6468 			    us->us_netstack);
6469 		}
6470 	}
6471 
6472 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
6473 	cp = (uint8_t *)&ip6h[1];
6474 
6475 	/*
6476 	 * Here's where we have to start stringing together
6477 	 * any extension headers in the right order:
6478 	 * Hop-by-hop, destination, routing, and final destination opts.
6479 	 */
6480 	if (option_exists & IPPF_HOPOPTS) {
6481 		/* Hop-by-hop options */
6482 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
6483 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
6484 		if (hopoptslen == 0) {
6485 			hopoptsptr = tipp->ipp_hopopts;
6486 			hopoptslen = tipp->ipp_hopoptslen;
6487 			is_ancillary = B_TRUE;
6488 		}
6489 
6490 		*nxthdr_ptr = IPPROTO_HOPOPTS;
6491 		nxthdr_ptr = &hbh->ip6h_nxt;
6492 
6493 		bcopy(hopoptsptr, cp, hopoptslen);
6494 		cp += hopoptslen;
6495 
6496 		if (hopoptsptr != NULL && !is_ancillary) {
6497 			kmem_free(hopoptsptr, hopoptslen);
6498 			hopoptsptr = NULL;
6499 			hopoptslen = 0;
6500 		}
6501 	}
6502 	/*
6503 	 * En-route destination options
6504 	 * Only do them if there's a routing header as well
6505 	 */
6506 	if (option_exists & IPPF_RTDSTOPTS) {
6507 		ip6_dest_t *dst = (ip6_dest_t *)cp;
6508 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
6509 
6510 		*nxthdr_ptr = IPPROTO_DSTOPTS;
6511 		nxthdr_ptr = &dst->ip6d_nxt;
6512 
6513 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
6514 		cp += tipp->ipp_rtdstoptslen;
6515 	}
6516 	/*
6517 	 * Routing header next
6518 	 */
6519 	if (option_exists & IPPF_RTHDR) {
6520 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
6521 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
6522 
6523 		*nxthdr_ptr = IPPROTO_ROUTING;
6524 		nxthdr_ptr = &rt->ip6r_nxt;
6525 
6526 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
6527 		cp += tipp->ipp_rthdrlen;
6528 	}
6529 	/*
6530 	 * Do ultimate destination options
6531 	 */
6532 	if (option_exists & IPPF_DSTOPTS) {
6533 		ip6_dest_t *dest = (ip6_dest_t *)cp;
6534 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
6535 
6536 		*nxthdr_ptr = IPPROTO_DSTOPTS;
6537 		nxthdr_ptr = &dest->ip6d_nxt;
6538 
6539 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
6540 		cp += tipp->ipp_dstoptslen;
6541 	}
6542 	/*
6543 	 * Now set the last header pointer to the proto passed in
6544 	 */
6545 	ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE));
6546 	*nxthdr_ptr = IPPROTO_UDP;
6547 
6548 	/* Update UDP header */
6549 	udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE);
6550 	udph->uha_dst_port = sin6->sin6_port;
6551 	udph->uha_src_port = udp->udp_port;
6552 
6553 	/*
6554 	 * Copy in the destination address
6555 	 */
6556 	ip6h->ip6_dst = ip6_dst;
6557 
6558 	ip6h->ip6_vcf =
6559 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
6560 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
6561 
6562 	if (option_exists & IPPF_TCLASS) {
6563 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
6564 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
6565 		    tipp->ipp_tclass);
6566 	}
6567 	rw_exit(&udp->udp_rwlock);
6568 
6569 	if (option_exists & IPPF_RTHDR) {
6570 		ip6_rthdr_t	*rth;
6571 
6572 		/*
6573 		 * Perform any processing needed for source routing.
6574 		 * We know that all extension headers will be in the same mblk
6575 		 * as the IPv6 header.
6576 		 */
6577 		rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
6578 		if (rth != NULL && rth->ip6r_segleft != 0) {
6579 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
6580 				/*
6581 				 * Drop packet - only support Type 0 routing.
6582 				 * Notify the application as well.
6583 				 */
6584 				*error = EPROTO;
6585 				goto done;
6586 			}
6587 
6588 			/*
6589 			 * rth->ip6r_len is twice the number of
6590 			 * addresses in the header. Thus it must be even.
6591 			 */
6592 			if (rth->ip6r_len & 0x1) {
6593 				*error = EPROTO;
6594 				goto done;
6595 			}
6596 			/*
6597 			 * Shuffle the routing header and ip6_dst
6598 			 * addresses, and get the checksum difference
6599 			 * between the first hop (in ip6_dst) and
6600 			 * the destination (in the last routing hdr entry).
6601 			 */
6602 			csum = ip_massage_options_v6(ip6h, rth,
6603 			    us->us_netstack);
6604 			/*
6605 			 * Verify that the first hop isn't a mapped address.
6606 			 * Routers along the path need to do this verification
6607 			 * for subsequent hops.
6608 			 */
6609 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
6610 				*error = EADDRNOTAVAIL;
6611 				goto done;
6612 			}
6613 
6614 			cp += (rth->ip6r_len + 1)*8;
6615 		}
6616 	}
6617 
6618 	/* count up length of UDP packet */
6619 	ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN;
6620 	if ((mp2 = mp1->b_cont) != NULL) {
6621 		do {
6622 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
6623 			ip_len += (uint32_t)MBLKL(mp2);
6624 		} while ((mp2 = mp2->b_cont) != NULL);
6625 	}
6626 
6627 	/*
6628 	 * If the size of the packet is greater than the maximum allowed by
6629 	 * ip, return an error. Passing this down could cause panics because
6630 	 * the size will have wrapped and be inconsistent with the msg size.
6631 	 */
6632 	if (ip_len > IP_MAXPACKET) {
6633 		*error = EMSGSIZE;
6634 		goto done;
6635 	}
6636 
6637 	/* Store the UDP length. Subtract length of extension hdrs */
6638 	udph->uha_length = htons(ip_len + IPV6_HDR_LEN -
6639 	    (int)((uchar_t *)udph - (uchar_t *)ip6h));
6640 
6641 	/*
6642 	 * We make it easy for IP to include our pseudo header
6643 	 * by putting our length in uh_checksum, modified (if
6644 	 * we have a routing header) by the checksum difference
6645 	 * between the ultimate destination and first hop addresses.
6646 	 * Note: UDP over IPv6 must always checksum the packet.
6647 	 */
6648 	csum += udph->uha_length;
6649 	csum = (csum & 0xFFFF) + (csum >> 16);
6650 	udph->uha_checksum = (uint16_t)csum;
6651 
6652 #ifdef _LITTLE_ENDIAN
6653 	ip_len = htons(ip_len);
6654 #endif
6655 	ip6h->ip6_plen = ip_len;
6656 	if (DB_CRED(mp) != NULL)
6657 		mblk_setcred(mp1, DB_CRED(mp));
6658 
6659 	if (DB_TYPE(mp) != M_DATA) {
6660 		ASSERT(mp != mp1);
6661 		freeb(mp);
6662 	}
6663 
6664 	/* mp has been consumed and we'll return success */
6665 	ASSERT(*error == 0);
6666 	mp = NULL;
6667 
6668 	/* We're done. Pass the packet to IP */
6669 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
6670 	ip_output_v6(connp, mp1, q, IP_WPUT);
6671 
6672 done:
6673 	if (sth_wroff != 0) {
6674 		(void) proto_set_tx_wroff(RD(q), connp,
6675 		    udp->udp_max_hdr_len + us->us_wroff_extra);
6676 	}
6677 	if (hopoptsptr != NULL && !is_ancillary) {
6678 		kmem_free(hopoptsptr, hopoptslen);
6679 		hopoptsptr = NULL;
6680 	}
6681 	if (*error != 0) {
6682 		ASSERT(mp != NULL);
6683 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6684 	}
6685 	return (mp);
6686 }
6687 
6688 
6689 static int
6690 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
6691 {
6692 	sin_t *sin = (sin_t *)sa;
6693 	sin6_t *sin6 = (sin6_t *)sa;
6694 
6695 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
6696 
6697 	if (udp->udp_state != TS_DATA_XFER)
6698 		return (ENOTCONN);
6699 
6700 	switch (udp->udp_family) {
6701 	case AF_INET:
6702 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
6703 
6704 		if (*salenp < sizeof (sin_t))
6705 			return (EINVAL);
6706 
6707 		*salenp = sizeof (sin_t);
6708 		*sin = sin_null;
6709 		sin->sin_family = AF_INET;
6710 		sin->sin_port = udp->udp_dstport;
6711 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
6712 		break;
6713 
6714 	case AF_INET6:
6715 		if (*salenp < sizeof (sin6_t))
6716 			return (EINVAL);
6717 
6718 		*salenp = sizeof (sin6_t);
6719 		*sin6 = sin6_null;
6720 		sin6->sin6_family = AF_INET6;
6721 		sin6->sin6_port = udp->udp_dstport;
6722 		sin6->sin6_addr = udp->udp_v6dst;
6723 		sin6->sin6_flowinfo = udp->udp_flowinfo;
6724 		break;
6725 	}
6726 
6727 	return (0);
6728 }
6729 
6730 static int
6731 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
6732 {
6733 	sin_t *sin = (sin_t *)sa;
6734 	sin6_t *sin6 = (sin6_t *)sa;
6735 
6736 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
6737 
6738 	switch (udp->udp_family) {
6739 	case AF_INET:
6740 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
6741 
6742 		if (*salenp < sizeof (sin_t))
6743 			return (EINVAL);
6744 
6745 		*salenp = sizeof (sin_t);
6746 		*sin = sin_null;
6747 		sin->sin_family = AF_INET;
6748 		sin->sin_port = udp->udp_port;
6749 
6750 		/*
6751 		 * If udp_v6src is unspecified, we might be bound to broadcast
6752 		 * / multicast.  Use udp_bound_v6src as local address instead
6753 		 * (that could also still be unspecified).
6754 		 */
6755 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
6756 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
6757 			sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
6758 		} else {
6759 			sin->sin_addr.s_addr =
6760 			    V4_PART_OF_V6(udp->udp_bound_v6src);
6761 		}
6762 		break;
6763 
6764 	case AF_INET6:
6765 		if (*salenp < sizeof (sin6_t))
6766 			return (EINVAL);
6767 
6768 		*salenp = sizeof (sin6_t);
6769 		*sin6 = sin6_null;
6770 		sin6->sin6_family = AF_INET6;
6771 		sin6->sin6_port = udp->udp_port;
6772 		sin6->sin6_flowinfo = udp->udp_flowinfo;
6773 
6774 		/*
6775 		 * If udp_v6src is unspecified, we might be bound to broadcast
6776 		 * / multicast.  Use udp_bound_v6src as local address instead
6777 		 * (that could also still be unspecified).
6778 		 */
6779 		if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))
6780 			sin6->sin6_addr = udp->udp_v6src;
6781 		else
6782 			sin6->sin6_addr = udp->udp_bound_v6src;
6783 		break;
6784 	}
6785 
6786 	return (0);
6787 }
6788 
6789 /*
6790  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
6791  */
6792 static void
6793 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
6794 {
6795 	void	*data;
6796 	mblk_t	*datamp = mp->b_cont;
6797 	udp_t	*udp = Q_TO_UDP(q);
6798 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
6799 
6800 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
6801 		cmdp->cb_error = EPROTO;
6802 		qreply(q, mp);
6803 		return;
6804 	}
6805 	data = datamp->b_rptr;
6806 
6807 	rw_enter(&udp->udp_rwlock, RW_READER);
6808 	switch (cmdp->cb_cmd) {
6809 	case TI_GETPEERNAME:
6810 		cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len);
6811 		break;
6812 	case TI_GETMYNAME:
6813 		cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len);
6814 		break;
6815 	default:
6816 		cmdp->cb_error = EINVAL;
6817 		break;
6818 	}
6819 	rw_exit(&udp->udp_rwlock);
6820 
6821 	qreply(q, mp);
6822 }
6823 
6824 static void
6825 udp_disable_direct_sockfs(udp_t *udp)
6826 {
6827 	udp->udp_issocket = B_FALSE;
6828 	if (udp->udp_direct_sockfs) {
6829 		/*
6830 		 * Disable read-side synchronous stream interface and
6831 		 * drain any queued data.
6832 		 */
6833 		udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE);
6834 		ASSERT(!udp->udp_direct_sockfs);
6835 		UDP_STAT(udp->udp_us, udp_sock_fallback);
6836 	}
6837 }
6838 
6839 static void
6840 udp_wput_other(queue_t *q, mblk_t *mp)
6841 {
6842 	uchar_t	*rptr = mp->b_rptr;
6843 	struct datab *db;
6844 	struct iocblk *iocp;
6845 	cred_t	*cr;
6846 	conn_t	*connp = Q_TO_CONN(q);
6847 	udp_t	*udp = connp->conn_udp;
6848 	udp_stack_t *us;
6849 
6850 	TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START,
6851 	    "udp_wput_other_start: q %p", q);
6852 
6853 	us = udp->udp_us;
6854 	db = mp->b_datap;
6855 
6856 	cr = DB_CREDDEF(mp, connp->conn_cred);
6857 
6858 	switch (db->db_type) {
6859 	case M_CMD:
6860 		udp_wput_cmdblk(q, mp);
6861 		return;
6862 
6863 	case M_PROTO:
6864 	case M_PCPROTO:
6865 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
6866 			freemsg(mp);
6867 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6868 			    "udp_wput_other_end: q %p (%S)", q, "protoshort");
6869 			return;
6870 		}
6871 		switch (((t_primp_t)rptr)->type) {
6872 		case T_ADDR_REQ:
6873 			udp_addr_req(q, mp);
6874 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6875 			    "udp_wput_other_end: q %p (%S)", q, "addrreq");
6876 			return;
6877 		case O_T_BIND_REQ:
6878 		case T_BIND_REQ:
6879 			udp_tpi_bind(q, mp);
6880 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6881 			    "udp_wput_other_end: q %p (%S)", q, "bindreq");
6882 			return;
6883 		case T_CONN_REQ:
6884 			udp_tpi_connect(q, mp);
6885 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6886 			    "udp_wput_other_end: q %p (%S)", q, "connreq");
6887 			return;
6888 		case T_CAPABILITY_REQ:
6889 			udp_capability_req(q, mp);
6890 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6891 			    "udp_wput_other_end: q %p (%S)", q, "capabreq");
6892 			return;
6893 		case T_INFO_REQ:
6894 			udp_info_req(q, mp);
6895 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6896 			    "udp_wput_other_end: q %p (%S)", q, "inforeq");
6897 			return;
6898 		case T_UNITDATA_REQ:
6899 			/*
6900 			 * If a T_UNITDATA_REQ gets here, the address must
6901 			 * be bad.  Valid T_UNITDATA_REQs are handled
6902 			 * in udp_wput.
6903 			 */
6904 			udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL);
6905 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6906 			    "udp_wput_other_end: q %p (%S)", q, "unitdatareq");
6907 			return;
6908 		case T_UNBIND_REQ:
6909 			udp_tpi_unbind(q, mp);
6910 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6911 			    "udp_wput_other_end: q %p (%S)", q, "unbindreq");
6912 			return;
6913 		case T_SVR4_OPTMGMT_REQ:
6914 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
6915 			    cr)) {
6916 				(void) svr4_optcom_req(q,
6917 				    mp, cr, &udp_opt_obj, B_TRUE);
6918 			}
6919 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6920 			    "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
6921 			return;
6922 
6923 		case T_OPTMGMT_REQ:
6924 			(void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE);
6925 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6926 			    "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
6927 			return;
6928 
6929 		case T_DISCON_REQ:
6930 			udp_tpi_disconnect(q, mp);
6931 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6932 			    "udp_wput_other_end: q %p (%S)", q, "disconreq");
6933 			return;
6934 
6935 		/* The following TPI message is not supported by udp. */
6936 		case O_T_CONN_RES:
6937 		case T_CONN_RES:
6938 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
6939 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6940 			    "udp_wput_other_end: q %p (%S)", q,
6941 			    "connres/disconreq");
6942 			return;
6943 
6944 		/* The following 3 TPI messages are illegal for udp. */
6945 		case T_DATA_REQ:
6946 		case T_EXDATA_REQ:
6947 		case T_ORDREL_REQ:
6948 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
6949 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6950 			    "udp_wput_other_end: q %p (%S)", q,
6951 			    "data/exdata/ordrel");
6952 			return;
6953 		default:
6954 			break;
6955 		}
6956 		break;
6957 	case M_FLUSH:
6958 		if (*rptr & FLUSHW)
6959 			flushq(q, FLUSHDATA);
6960 		break;
6961 	case M_IOCTL:
6962 		iocp = (struct iocblk *)mp->b_rptr;
6963 		switch (iocp->ioc_cmd) {
6964 		case TI_GETPEERNAME:
6965 			if (udp->udp_state != TS_DATA_XFER) {
6966 				/*
6967 				 * If a default destination address has not
6968 				 * been associated with the stream, then we
6969 				 * don't know the peer's name.
6970 				 */
6971 				iocp->ioc_error = ENOTCONN;
6972 				iocp->ioc_count = 0;
6973 				mp->b_datap->db_type = M_IOCACK;
6974 				qreply(q, mp);
6975 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6976 				    "udp_wput_other_end: q %p (%S)", q,
6977 				    "getpeername");
6978 				return;
6979 			}
6980 			/* FALLTHRU */
6981 		case TI_GETMYNAME: {
6982 			/*
6983 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
6984 			 * need to copyin the user's strbuf structure.
6985 			 * Processing will continue in the M_IOCDATA case
6986 			 * below.
6987 			 */
6988 			mi_copyin(q, mp, NULL,
6989 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
6990 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6991 			    "udp_wput_other_end: q %p (%S)", q, "getmyname");
6992 			return;
6993 			}
6994 		case ND_SET:
6995 			/* nd_getset performs the necessary checking */
6996 		case ND_GET:
6997 			if (nd_getset(q, us->us_nd, mp)) {
6998 				qreply(q, mp);
6999 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7000 				    "udp_wput_other_end: q %p (%S)", q, "get");
7001 				return;
7002 			}
7003 			break;
7004 		case _SIOCSOCKFALLBACK:
7005 			/*
7006 			 * Either sockmod is about to be popped and the
7007 			 * socket would now be treated as a plain stream,
7008 			 * or a module is about to be pushed so we could
7009 			 * no longer use read-side synchronous stream.
7010 			 * Drain any queued data and disable direct sockfs
7011 			 * interface from now on.
7012 			 */
7013 			if (!udp->udp_issocket) {
7014 				DB_TYPE(mp) = M_IOCNAK;
7015 				iocp->ioc_error = EINVAL;
7016 			} else {
7017 				udp_disable_direct_sockfs(udp);
7018 
7019 				DB_TYPE(mp) = M_IOCACK;
7020 				iocp->ioc_error = 0;
7021 			}
7022 			iocp->ioc_count = 0;
7023 			iocp->ioc_rval = 0;
7024 			qreply(q, mp);
7025 			return;
7026 		default:
7027 			break;
7028 		}
7029 		break;
7030 	case M_IOCDATA:
7031 		udp_wput_iocdata(q, mp);
7032 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7033 		    "udp_wput_other_end: q %p (%S)", q, "iocdata");
7034 		return;
7035 	default:
7036 		/* Unrecognized messages are passed through without change. */
7037 		break;
7038 	}
7039 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7040 	    "udp_wput_other_end: q %p (%S)", q, "end");
7041 	ip_output(connp, mp, q, IP_WPUT);
7042 }
7043 
7044 /*
7045  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
7046  * messages.
7047  */
7048 static void
7049 udp_wput_iocdata(queue_t *q, mblk_t *mp)
7050 {
7051 	mblk_t		*mp1;
7052 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
7053 	STRUCT_HANDLE(strbuf, sb);
7054 	udp_t		*udp = Q_TO_UDP(q);
7055 	int		error;
7056 	uint_t		addrlen;
7057 
7058 	/* Make sure it is one of ours. */
7059 	switch (iocp->ioc_cmd) {
7060 	case TI_GETMYNAME:
7061 	case TI_GETPEERNAME:
7062 		break;
7063 	default:
7064 		ip_output(udp->udp_connp, mp, q, IP_WPUT);
7065 		return;
7066 	}
7067 
7068 	switch (mi_copy_state(q, mp, &mp1)) {
7069 	case -1:
7070 		return;
7071 	case MI_COPY_CASE(MI_COPY_IN, 1):
7072 		break;
7073 	case MI_COPY_CASE(MI_COPY_OUT, 1):
7074 		/*
7075 		 * The address has been copied out, so now
7076 		 * copyout the strbuf.
7077 		 */
7078 		mi_copyout(q, mp);
7079 		return;
7080 	case MI_COPY_CASE(MI_COPY_OUT, 2):
7081 		/*
7082 		 * The address and strbuf have been copied out.
7083 		 * We're done, so just acknowledge the original
7084 		 * M_IOCTL.
7085 		 */
7086 		mi_copy_done(q, mp, 0);
7087 		return;
7088 	default:
7089 		/*
7090 		 * Something strange has happened, so acknowledge
7091 		 * the original M_IOCTL with an EPROTO error.
7092 		 */
7093 		mi_copy_done(q, mp, EPROTO);
7094 		return;
7095 	}
7096 
7097 	/*
7098 	 * Now we have the strbuf structure for TI_GETMYNAME
7099 	 * and TI_GETPEERNAME.  Next we copyout the requested
7100 	 * address and then we'll copyout the strbuf.
7101 	 */
7102 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
7103 	addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t);
7104 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
7105 		mi_copy_done(q, mp, EINVAL);
7106 		return;
7107 	}
7108 
7109 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
7110 
7111 	if (mp1 == NULL)
7112 		return;
7113 
7114 	rw_enter(&udp->udp_rwlock, RW_READER);
7115 	switch (iocp->ioc_cmd) {
7116 	case TI_GETMYNAME:
7117 		error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen);
7118 		break;
7119 	case TI_GETPEERNAME:
7120 		error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen);
7121 		break;
7122 	}
7123 	rw_exit(&udp->udp_rwlock);
7124 
7125 	if (error != 0) {
7126 		mi_copy_done(q, mp, error);
7127 	} else {
7128 		mp1->b_wptr += addrlen;
7129 		STRUCT_FSET(sb, len, addrlen);
7130 
7131 		/* Copy out the address */
7132 		mi_copyout(q, mp);
7133 	}
7134 }
7135 
7136 static int
7137 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
7138     udpattrs_t *udpattrs)
7139 {
7140 	struct T_unitdata_req *udreqp;
7141 	int is_absreq_failure;
7142 	cred_t *cr;
7143 	conn_t	*connp = Q_TO_CONN(q);
7144 
7145 	ASSERT(((t_primp_t)mp->b_rptr)->type);
7146 
7147 	cr = DB_CREDDEF(mp, connp->conn_cred);
7148 
7149 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
7150 
7151 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
7152 	    udreqp->OPT_offset, cr, &udp_opt_obj,
7153 	    udpattrs, &is_absreq_failure);
7154 
7155 	if (*errorp != 0) {
7156 		/*
7157 		 * Note: No special action needed in this
7158 		 * module for "is_absreq_failure"
7159 		 */
7160 		return (-1);		/* failure */
7161 	}
7162 	ASSERT(is_absreq_failure == 0);
7163 	return (0);	/* success */
7164 }
7165 
7166 void
7167 udp_ddi_g_init(void)
7168 {
7169 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
7170 	    udp_opt_obj.odb_opt_arr_cnt);
7171 
7172 	/*
7173 	 * We want to be informed each time a stack is created or
7174 	 * destroyed in the kernel, so we can maintain the
7175 	 * set of udp_stack_t's.
7176 	 */
7177 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
7178 }
7179 
7180 void
7181 udp_ddi_g_destroy(void)
7182 {
7183 	netstack_unregister(NS_UDP);
7184 }
7185 
7186 #define	INET_NAME	"ip"
7187 
7188 /*
7189  * Initialize the UDP stack instance.
7190  */
7191 static void *
7192 udp_stack_init(netstackid_t stackid, netstack_t *ns)
7193 {
7194 	udp_stack_t	*us;
7195 	udpparam_t	*pa;
7196 	int		i;
7197 	int		error = 0;
7198 	major_t		major;
7199 
7200 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
7201 	us->us_netstack = ns;
7202 
7203 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
7204 	us->us_epriv_ports[0] = 2049;
7205 	us->us_epriv_ports[1] = 4045;
7206 
7207 	/*
7208 	 * The smallest anonymous port in the priviledged port range which UDP
7209 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
7210 	 */
7211 	us->us_min_anonpriv_port = 512;
7212 
7213 	us->us_bind_fanout_size = udp_bind_fanout_size;
7214 
7215 	/* Roundup variable that might have been modified in /etc/system */
7216 	if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
7217 		/* Not a power of two. Round up to nearest power of two */
7218 		for (i = 0; i < 31; i++) {
7219 			if (us->us_bind_fanout_size < (1 << i))
7220 				break;
7221 		}
7222 		us->us_bind_fanout_size = 1 << i;
7223 	}
7224 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
7225 	    sizeof (udp_fanout_t), KM_SLEEP);
7226 	for (i = 0; i < us->us_bind_fanout_size; i++) {
7227 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
7228 		    NULL);
7229 	}
7230 
7231 	pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP);
7232 
7233 	us->us_param_arr = pa;
7234 	bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr));
7235 
7236 	(void) udp_param_register(&us->us_nd,
7237 	    us->us_param_arr, A_CNT(udp_param_arr));
7238 
7239 	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
7240 	us->us_mibkp = udp_kstat_init(stackid);
7241 
7242 	major = mod_name_to_major(INET_NAME);
7243 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
7244 	ASSERT(error == 0);
7245 	return (us);
7246 }
7247 
7248 /*
7249  * Free the UDP stack instance.
7250  */
7251 static void
7252 udp_stack_fini(netstackid_t stackid, void *arg)
7253 {
7254 	udp_stack_t *us = (udp_stack_t *)arg;
7255 	int i;
7256 
7257 	for (i = 0; i < us->us_bind_fanout_size; i++) {
7258 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
7259 	}
7260 
7261 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
7262 	    sizeof (udp_fanout_t));
7263 
7264 	us->us_bind_fanout = NULL;
7265 
7266 	nd_free(&us->us_nd);
7267 	kmem_free(us->us_param_arr, sizeof (udp_param_arr));
7268 	us->us_param_arr = NULL;
7269 
7270 	udp_kstat_fini(stackid, us->us_mibkp);
7271 	us->us_mibkp = NULL;
7272 
7273 	udp_kstat2_fini(stackid, us->us_kstat);
7274 	us->us_kstat = NULL;
7275 	bzero(&us->us_statistics, sizeof (us->us_statistics));
7276 
7277 	ldi_ident_release(us->us_ldi_ident);
7278 	kmem_free(us, sizeof (*us));
7279 }
7280 
7281 static void *
7282 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
7283 {
7284 	kstat_t *ksp;
7285 
7286 	udp_stat_t template = {
7287 		{ "udp_ip_send",		KSTAT_DATA_UINT64 },
7288 		{ "udp_ip_ire_send",		KSTAT_DATA_UINT64 },
7289 		{ "udp_ire_null",		KSTAT_DATA_UINT64 },
7290 		{ "udp_drain",			KSTAT_DATA_UINT64 },
7291 		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
7292 		{ "udp_rrw_busy",		KSTAT_DATA_UINT64 },
7293 		{ "udp_rrw_msgcnt",		KSTAT_DATA_UINT64 },
7294 		{ "udp_out_sw_cksum",		KSTAT_DATA_UINT64 },
7295 		{ "udp_out_sw_cksum_bytes",	KSTAT_DATA_UINT64 },
7296 		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
7297 		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
7298 		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
7299 		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
7300 		{ "udp_in_pktinfo",		KSTAT_DATA_UINT64 },
7301 		{ "udp_in_recvdstaddr",		KSTAT_DATA_UINT64 },
7302 		{ "udp_in_recvopts",		KSTAT_DATA_UINT64 },
7303 		{ "udp_in_recvif",		KSTAT_DATA_UINT64 },
7304 		{ "udp_in_recvslla",		KSTAT_DATA_UINT64 },
7305 		{ "udp_in_recvucred",		KSTAT_DATA_UINT64 },
7306 		{ "udp_in_recvttl",		KSTAT_DATA_UINT64 },
7307 		{ "udp_in_recvhopopts",		KSTAT_DATA_UINT64 },
7308 		{ "udp_in_recvhoplimit",	KSTAT_DATA_UINT64 },
7309 		{ "udp_in_recvdstopts",		KSTAT_DATA_UINT64 },
7310 		{ "udp_in_recvrtdstopts",	KSTAT_DATA_UINT64 },
7311 		{ "udp_in_recvrthdr",		KSTAT_DATA_UINT64 },
7312 		{ "udp_in_recvpktinfo",		KSTAT_DATA_UINT64 },
7313 		{ "udp_in_recvtclass",		KSTAT_DATA_UINT64 },
7314 		{ "udp_in_timestamp",		KSTAT_DATA_UINT64 },
7315 #ifdef DEBUG
7316 		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
7317 		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
7318 #endif
7319 	};
7320 
7321 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
7322 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
7323 	    KSTAT_FLAG_VIRTUAL, stackid);
7324 
7325 	if (ksp == NULL)
7326 		return (NULL);
7327 
7328 	bcopy(&template, us_statisticsp, sizeof (template));
7329 	ksp->ks_data = (void *)us_statisticsp;
7330 	ksp->ks_private = (void *)(uintptr_t)stackid;
7331 
7332 	kstat_install(ksp);
7333 	return (ksp);
7334 }
7335 
7336 static void
7337 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
7338 {
7339 	if (ksp != NULL) {
7340 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
7341 		kstat_delete_netstack(ksp, stackid);
7342 	}
7343 }
7344 
7345 static void *
7346 udp_kstat_init(netstackid_t stackid)
7347 {
7348 	kstat_t	*ksp;
7349 
7350 	udp_named_kstat_t template = {
7351 		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
7352 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
7353 		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
7354 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
7355 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
7356 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
7357 	};
7358 
7359 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
7360 	    KSTAT_TYPE_NAMED,
7361 	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
7362 
7363 	if (ksp == NULL || ksp->ks_data == NULL)
7364 		return (NULL);
7365 
7366 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
7367 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
7368 
7369 	bcopy(&template, ksp->ks_data, sizeof (template));
7370 	ksp->ks_update = udp_kstat_update;
7371 	ksp->ks_private = (void *)(uintptr_t)stackid;
7372 
7373 	kstat_install(ksp);
7374 	return (ksp);
7375 }
7376 
7377 static void
7378 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
7379 {
7380 	if (ksp != NULL) {
7381 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
7382 		kstat_delete_netstack(ksp, stackid);
7383 	}
7384 }
7385 
7386 static int
7387 udp_kstat_update(kstat_t *kp, int rw)
7388 {
7389 	udp_named_kstat_t *udpkp;
7390 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
7391 	netstack_t	*ns;
7392 	udp_stack_t	*us;
7393 
7394 	if ((kp == NULL) || (kp->ks_data == NULL))
7395 		return (EIO);
7396 
7397 	if (rw == KSTAT_WRITE)
7398 		return (EACCES);
7399 
7400 	ns = netstack_find_by_stackid(stackid);
7401 	if (ns == NULL)
7402 		return (-1);
7403 	us = ns->netstack_udp;
7404 	if (us == NULL) {
7405 		netstack_rele(ns);
7406 		return (-1);
7407 	}
7408 	udpkp = (udp_named_kstat_t *)kp->ks_data;
7409 
7410 	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
7411 	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
7412 	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
7413 	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
7414 	netstack_rele(ns);
7415 	return (0);
7416 }
7417 
7418 /*
7419  * Read-side synchronous stream info entry point, called as a
7420  * result of handling certain STREAMS ioctl operations.
7421  */
7422 static int
7423 udp_rinfop(queue_t *q, infod_t *dp)
7424 {
7425 	mblk_t	*mp;
7426 	uint_t	cmd = dp->d_cmd;
7427 	int	res = 0;
7428 	int	error = 0;
7429 	udp_t	*udp = Q_TO_UDP(q);
7430 	struct stdata *stp = STREAM(q);
7431 
7432 	mutex_enter(&udp->udp_drain_lock);
7433 	/* If shutdown on read has happened, return nothing */
7434 	mutex_enter(&stp->sd_lock);
7435 	if (stp->sd_flag & STREOF) {
7436 		mutex_exit(&stp->sd_lock);
7437 		goto done;
7438 	}
7439 	mutex_exit(&stp->sd_lock);
7440 
7441 	if ((mp = udp->udp_rcv_list_head) == NULL)
7442 		goto done;
7443 
7444 	ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL);
7445 
7446 	if (cmd & INFOD_COUNT) {
7447 		/*
7448 		 * Return the number of messages.
7449 		 */
7450 		dp->d_count += udp->udp_rcv_msgcnt;
7451 		res |= INFOD_COUNT;
7452 	}
7453 	if (cmd & INFOD_BYTES) {
7454 		/*
7455 		 * Return size of all data messages.
7456 		 */
7457 		dp->d_bytes += udp->udp_rcv_cnt;
7458 		res |= INFOD_BYTES;
7459 	}
7460 	if (cmd & INFOD_FIRSTBYTES) {
7461 		/*
7462 		 * Return size of first data message.
7463 		 */
7464 		dp->d_bytes = msgdsize(mp);
7465 		res |= INFOD_FIRSTBYTES;
7466 		dp->d_cmd &= ~INFOD_FIRSTBYTES;
7467 	}
7468 	if (cmd & INFOD_COPYOUT) {
7469 		mblk_t *mp1 = mp->b_cont;
7470 		int n;
7471 		/*
7472 		 * Return data contents of first message.
7473 		 */
7474 		ASSERT(DB_TYPE(mp1) == M_DATA);
7475 		while (mp1 != NULL && dp->d_uiop->uio_resid > 0) {
7476 			n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1));
7477 			if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n,
7478 			    UIO_READ, dp->d_uiop)) != 0) {
7479 				goto done;
7480 			}
7481 			mp1 = mp1->b_cont;
7482 		}
7483 		res |= INFOD_COPYOUT;
7484 		dp->d_cmd &= ~INFOD_COPYOUT;
7485 	}
7486 done:
7487 	mutex_exit(&udp->udp_drain_lock);
7488 
7489 	dp->d_res |= res;
7490 
7491 	return (error);
7492 }
7493 
7494 /*
7495  * Read-side synchronous stream entry point.  This is called as a result
7496  * of recv/read operation done at sockfs, and is guaranteed to execute
7497  * outside of the interrupt thread context.  It returns a single datagram
7498  * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer.
7499  */
7500 static int
7501 udp_rrw(queue_t *q, struiod_t *dp)
7502 {
7503 	mblk_t	*mp;
7504 	udp_t	*udp = Q_TO_UDP(q);
7505 	udp_stack_t *us = udp->udp_us;
7506 
7507 	/*
7508 	 * Dequeue datagram from the head of the list and return
7509 	 * it to caller; also ensure that RSLEEP sd_wakeq flag is
7510 	 * set/cleared depending on whether or not there's data
7511 	 * remaining in the list.
7512 	 */
7513 	mutex_enter(&udp->udp_drain_lock);
7514 	if (!udp->udp_direct_sockfs) {
7515 		mutex_exit(&udp->udp_drain_lock);
7516 		UDP_STAT(us, udp_rrw_busy);
7517 		return (EBUSY);
7518 	}
7519 	if ((mp = udp->udp_rcv_list_head) != NULL) {
7520 		uint_t size = msgdsize(mp);
7521 
7522 		/* Last datagram in the list? */
7523 		if ((udp->udp_rcv_list_head = mp->b_next) == NULL)
7524 			udp->udp_rcv_list_tail = NULL;
7525 		mp->b_next = NULL;
7526 
7527 		udp->udp_rcv_cnt -= size;
7528 		udp->udp_rcv_msgcnt--;
7529 		UDP_STAT(us, udp_rrw_msgcnt);
7530 
7531 		/* No longer flow-controlling? */
7532 		if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat &&
7533 		    udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat)
7534 			udp->udp_drain_qfull = B_FALSE;
7535 	}
7536 	if (udp->udp_rcv_list_head == NULL) {
7537 		/*
7538 		 * Either we just dequeued the last datagram or
7539 		 * we get here from sockfs and have nothing to
7540 		 * return; in this case clear RSLEEP.
7541 		 */
7542 		ASSERT(udp->udp_rcv_cnt == 0);
7543 		ASSERT(udp->udp_rcv_msgcnt == 0);
7544 		ASSERT(udp->udp_rcv_list_tail == NULL);
7545 		STR_WAKEUP_CLEAR(STREAM(q));
7546 	} else {
7547 		/*
7548 		 * More data follows; we need udp_rrw() to be
7549 		 * called in future to pick up the rest.
7550 		 */
7551 		STR_WAKEUP_SET(STREAM(q));
7552 	}
7553 	mutex_exit(&udp->udp_drain_lock);
7554 	dp->d_mp = mp;
7555 	return (0);
7556 }
7557 
7558 /*
7559  * Enqueue a completely-built T_UNITDATA_IND message into the receive
7560  * list; this is typically executed within the interrupt thread context
7561  * and so we do things as quickly as possible.
7562  */
7563 static void
7564 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len)
7565 {
7566 	ASSERT(q == RD(q));
7567 	ASSERT(pkt_len == msgdsize(mp));
7568 	ASSERT(mp->b_next == NULL && mp->b_cont != NULL);
7569 	ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA);
7570 	ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind));
7571 
7572 	mutex_enter(&udp->udp_drain_lock);
7573 	/*
7574 	 * Wake up and signal the receiving app; it is okay to do this
7575 	 * before enqueueing the mp because we are holding the drain lock.
7576 	 * One of the advantages of synchronous stream is the ability for
7577 	 * us to find out when the application performs a read on the
7578 	 * socket by way of udp_rrw() entry point being called.  We need
7579 	 * to generate SIGPOLL/SIGIO for each received data in the case
7580 	 * of asynchronous socket just as in the strrput() case.  However,
7581 	 * we only wake the application up when necessary, i.e. during the
7582 	 * first enqueue.  When udp_rrw() is called, we send up a single
7583 	 * datagram upstream and call STR_WAKEUP_SET() again when there
7584 	 * are still data remaining in our receive queue.
7585 	 */
7586 	STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head);
7587 	if (udp->udp_rcv_list_head == NULL)
7588 		udp->udp_rcv_list_head = mp;
7589 	else
7590 		udp->udp_rcv_list_tail->b_next = mp;
7591 	udp->udp_rcv_list_tail = mp;
7592 	udp->udp_rcv_cnt += pkt_len;
7593 	udp->udp_rcv_msgcnt++;
7594 
7595 	/* Need to flow-control? */
7596 	if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat ||
7597 	    udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat)
7598 		udp->udp_drain_qfull = B_TRUE;
7599 
7600 	mutex_exit(&udp->udp_drain_lock);
7601 }
7602 
7603 /*
7604  * Drain the contents of receive list to the module upstream; we do
7605  * this during close or when we fallback to the slow mode due to
7606  * sockmod being popped or a module being pushed on top of us.
7607  */
7608 static void
7609 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing)
7610 {
7611 	mblk_t *mp;
7612 	udp_stack_t *us = udp->udp_us;
7613 
7614 	mutex_enter(&udp->udp_drain_lock);
7615 	/*
7616 	 * There is no race with a concurrent udp_input() sending
7617 	 * up packets using putnext() after we have cleared the
7618 	 * udp_direct_sockfs flag but before we have completed
7619 	 * sending up the packets in udp_rcv_list, since we are
7620 	 * either a writer or we have quiesced the conn.
7621 	 */
7622 	udp->udp_direct_sockfs = B_FALSE;
7623 	mutex_exit(&udp->udp_drain_lock);
7624 
7625 	if (udp->udp_rcv_list_head != NULL)
7626 		UDP_STAT(us, udp_drain);
7627 
7628 	/*
7629 	 * Send up everything via putnext(); note here that we
7630 	 * don't need the udp_drain_lock to protect us since
7631 	 * nothing can enter udp_rrw() and that we currently
7632 	 * have exclusive access to this udp.
7633 	 */
7634 	while ((mp = udp->udp_rcv_list_head) != NULL) {
7635 		udp->udp_rcv_list_head = mp->b_next;
7636 		mp->b_next = NULL;
7637 		udp->udp_rcv_cnt -= msgdsize(mp);
7638 		udp->udp_rcv_msgcnt--;
7639 		if (closing) {
7640 			freemsg(mp);
7641 		} else {
7642 			ASSERT(q == RD(q));
7643 			putnext(q, mp);
7644 		}
7645 	}
7646 	ASSERT(udp->udp_rcv_cnt == 0);
7647 	ASSERT(udp->udp_rcv_msgcnt == 0);
7648 	ASSERT(udp->udp_rcv_list_head == NULL);
7649 	udp->udp_rcv_list_tail = NULL;
7650 	udp->udp_drain_qfull = B_FALSE;
7651 }
7652 
7653 static size_t
7654 udp_set_rcv_hiwat(udp_t *udp, size_t size)
7655 {
7656 	udp_stack_t *us = udp->udp_us;
7657 
7658 	/* We add a bit of extra buffering */
7659 	size += size >> 1;
7660 	if (size > us->us_max_buf)
7661 		size = us->us_max_buf;
7662 
7663 	udp->udp_rcv_hiwat = size;
7664 	return (size);
7665 }
7666 
7667 /*
7668  * For the lower queue so that UDP can be a dummy mux.
7669  * Nobody should be sending
7670  * packets up this stream
7671  */
7672 static void
7673 udp_lrput(queue_t *q, mblk_t *mp)
7674 {
7675 	mblk_t *mp1;
7676 
7677 	switch (mp->b_datap->db_type) {
7678 	case M_FLUSH:
7679 		/* Turn around */
7680 		if (*mp->b_rptr & FLUSHW) {
7681 			*mp->b_rptr &= ~FLUSHR;
7682 			qreply(q, mp);
7683 			return;
7684 		}
7685 		break;
7686 	}
7687 	/* Could receive messages that passed through ar_rput */
7688 	for (mp1 = mp; mp1; mp1 = mp1->b_cont)
7689 		mp1->b_prev = mp1->b_next = NULL;
7690 	freemsg(mp);
7691 }
7692 
7693 /*
7694  * For the lower queue so that UDP can be a dummy mux.
7695  * Nobody should be sending packets down this stream.
7696  */
7697 /* ARGSUSED */
7698 void
7699 udp_lwput(queue_t *q, mblk_t *mp)
7700 {
7701 	freemsg(mp);
7702 }
7703 
7704 /*
7705  * Below routines for UDP socket module.
7706  */
7707 
7708 static conn_t *
7709 udp_do_open(cred_t *credp, boolean_t isv6, int flags)
7710 {
7711 	udp_t		*udp;
7712 	conn_t		*connp;
7713 	zoneid_t 	zoneid;
7714 	netstack_t 	*ns;
7715 	udp_stack_t 	*us;
7716 
7717 	ns = netstack_find_by_cred(credp);
7718 	ASSERT(ns != NULL);
7719 	us = ns->netstack_udp;
7720 	ASSERT(us != NULL);
7721 
7722 	/*
7723 	 * For exclusive stacks we set the zoneid to zero
7724 	 * to make UDP operate as if in the global zone.
7725 	 */
7726 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
7727 		zoneid = GLOBAL_ZONEID;
7728 	else
7729 		zoneid = crgetzoneid(credp);
7730 
7731 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
7732 
7733 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
7734 	if (connp == NULL) {
7735 		netstack_rele(ns);
7736 		return (NULL);
7737 	}
7738 	udp = connp->conn_udp;
7739 
7740 	/*
7741 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
7742 	 * done by netstack_find_by_cred()
7743 	 */
7744 	netstack_rele(ns);
7745 
7746 	rw_enter(&udp->udp_rwlock, RW_WRITER);
7747 	ASSERT(connp->conn_ulp == IPPROTO_UDP);
7748 	ASSERT(connp->conn_udp == udp);
7749 	ASSERT(udp->udp_connp == connp);
7750 
7751 	/* Set the initial state of the stream and the privilege status. */
7752 	udp->udp_state = TS_UNBND;
7753 	if (isv6) {
7754 		udp->udp_family = AF_INET6;
7755 		udp->udp_ipversion = IPV6_VERSION;
7756 		udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
7757 		udp->udp_ttl = us->us_ipv6_hoplimit;
7758 		connp->conn_af_isv6 = B_TRUE;
7759 		connp->conn_flags |= IPCL_ISV6;
7760 	} else {
7761 		udp->udp_family = AF_INET;
7762 		udp->udp_ipversion = IPV4_VERSION;
7763 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
7764 		udp->udp_ttl = us->us_ipv4_ttl;
7765 		connp->conn_af_isv6 = B_FALSE;
7766 		connp->conn_flags &= ~IPCL_ISV6;
7767 	}
7768 
7769 	udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
7770 	udp->udp_pending_op = -1;
7771 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
7772 	connp->conn_zoneid = zoneid;
7773 
7774 	udp->udp_open_time = lbolt64;
7775 	udp->udp_open_pid = curproc->p_pid;
7776 
7777 	/*
7778 	 * If the caller has the process-wide flag set, then default to MAC
7779 	 * exempt mode.  This allows read-down to unlabeled hosts.
7780 	 */
7781 	if (getpflags(NET_MAC_AWARE, credp) != 0)
7782 		connp->conn_mac_exempt = B_TRUE;
7783 
7784 	connp->conn_ulp_labeled = is_system_labeled();
7785 
7786 	udp->udp_us = us;
7787 
7788 	connp->conn_recv = udp_input;
7789 	crhold(credp);
7790 	connp->conn_cred = credp;
7791 
7792 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
7793 
7794 	rw_exit(&udp->udp_rwlock);
7795 
7796 	return (connp);
7797 }
7798 
7799 /* ARGSUSED */
7800 sock_lower_handle_t
7801 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
7802     uint_t *smodep, int *errorp, int flags, cred_t *credp)
7803 {
7804 	udp_t		*udp = NULL;
7805 	udp_stack_t	*us;
7806 	conn_t		*connp;
7807 	boolean_t	isv6;
7808 
7809 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
7810 	    (proto != 0 && proto != IPPROTO_UDP)) {
7811 		*errorp = EPROTONOSUPPORT;
7812 		return (NULL);
7813 	}
7814 
7815 	if (family == AF_INET6)
7816 		isv6 = B_TRUE;
7817 	else
7818 		isv6 = B_FALSE;
7819 
7820 	connp = udp_do_open(credp, isv6, flags);
7821 	if (connp == NULL) {
7822 		*errorp = ENOMEM;
7823 		return (NULL);
7824 	}
7825 
7826 	udp = connp->conn_udp;
7827 	ASSERT(udp != NULL);
7828 	us = udp->udp_us;
7829 	ASSERT(us != NULL);
7830 
7831 	connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET;
7832 
7833 	/* Set flow control */
7834 	rw_enter(&udp->udp_rwlock, RW_WRITER);
7835 	(void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat);
7836 	udp->udp_rcv_disply_hiwat = us->us_recv_hiwat;
7837 	udp->udp_rcv_lowat = udp_mod_info.mi_lowat;
7838 	udp->udp_xmit_hiwat = us->us_xmit_hiwat;
7839 	udp->udp_xmit_lowat = us->us_xmit_lowat;
7840 
7841 	if (udp->udp_family == AF_INET6) {
7842 		/* Build initial header template for transmit */
7843 		if ((*errorp = udp_build_hdrs(udp)) != 0) {
7844 			rw_exit(&udp->udp_rwlock);
7845 			ipcl_conn_destroy(connp);
7846 			return (NULL);
7847 		}
7848 	}
7849 	rw_exit(&udp->udp_rwlock);
7850 
7851 	connp->conn_flow_cntrld = B_FALSE;
7852 
7853 	ASSERT(us->us_ldi_ident != NULL);
7854 
7855 	if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) {
7856 		ip1dbg(("udp_create: create of IP helper stream failed\n"));
7857 		udp_do_close(connp);
7858 		return (NULL);
7859 	}
7860 
7861 	/* Set the send flow control */
7862 	connp->conn_wq->q_hiwat = us->us_xmit_hiwat;
7863 	connp->conn_wq->q_lowat = us->us_xmit_lowat;
7864 
7865 	mutex_enter(&connp->conn_lock);
7866 	connp->conn_state_flags &= ~CONN_INCIPIENT;
7867 	mutex_exit(&connp->conn_lock);
7868 
7869 	*errorp = 0;
7870 	*smodep = SM_ATOMIC;
7871 	*sock_downcalls = &sock_udp_downcalls;
7872 	return ((sock_lower_handle_t)connp);
7873 }
7874 
7875 /* ARGSUSED */
7876 void
7877 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
7878     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
7879 {
7880 	conn_t 		*connp = (conn_t *)proto_handle;
7881 	udp_t 		*udp = connp->conn_udp;
7882 	udp_stack_t	*us = udp->udp_us;
7883 	struct sock_proto_props sopp;
7884 
7885 	connp->conn_upcalls = sock_upcalls;
7886 	connp->conn_upper_handle = sock_handle;
7887 
7888 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
7889 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
7890 	sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
7891 	sopp.sopp_maxblk = INFPSZ;
7892 	sopp.sopp_rxhiwat = udp->udp_rcv_hiwat;
7893 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
7894 	sopp.sopp_maxpsz =
7895 	    (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
7896 	    UDP_MAXPACKET_IPV6;
7897 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
7898 	    udp_mod_info.mi_minpsz;
7899 
7900 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
7901 	    &sopp);
7902 }
7903 
7904 static void
7905 udp_do_close(conn_t *connp)
7906 {
7907 	udp_t	*udp;
7908 
7909 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
7910 	udp = connp->conn_udp;
7911 
7912 	udp_quiesce_conn(connp);
7913 	ip_quiesce_conn(connp);
7914 
7915 	if (!IPCL_IS_NONSTR(connp)) {
7916 		/*
7917 		 * Disable read-side synchronous stream
7918 		 * interface and drain any queued data.
7919 		 */
7920 		ASSERT(connp->conn_wq != NULL);
7921 		udp_rcv_drain(connp->conn_wq, udp, B_TRUE);
7922 		ASSERT(!udp->udp_direct_sockfs);
7923 
7924 		ASSERT(connp->conn_rq != NULL);
7925 		qprocsoff(connp->conn_rq);
7926 	}
7927 
7928 	ASSERT(udp->udp_rcv_cnt == 0);
7929 	ASSERT(udp->udp_rcv_msgcnt == 0);
7930 	ASSERT(udp->udp_rcv_list_head == NULL);
7931 	ASSERT(udp->udp_rcv_list_tail == NULL);
7932 
7933 	udp_close_free(connp);
7934 
7935 	/*
7936 	 * Now we are truly single threaded on this stream, and can
7937 	 * delete the things hanging off the connp, and finally the connp.
7938 	 * We removed this connp from the fanout list, it cannot be
7939 	 * accessed thru the fanouts, and we already waited for the
7940 	 * conn_ref to drop to 0. We are already in close, so
7941 	 * there cannot be any other thread from the top. qprocsoff
7942 	 * has completed, and service has completed or won't run in
7943 	 * future.
7944 	 */
7945 	ASSERT(connp->conn_ref == 1);
7946 	if (!IPCL_IS_NONSTR(connp)) {
7947 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
7948 	} else {
7949 		ip_free_helper_stream(connp);
7950 	}
7951 
7952 	connp->conn_ref--;
7953 	ipcl_conn_destroy(connp);
7954 }
7955 
7956 /* ARGSUSED */
7957 int
7958 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
7959 {
7960 	conn_t	*connp = (conn_t *)proto_handle;
7961 
7962 	udp_do_close(connp);
7963 	return (0);
7964 }
7965 
7966 static int
7967 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
7968     boolean_t bind_to_req_port_only)
7969 {
7970 	sin_t		*sin;
7971 	sin6_t		*sin6;
7972 	sin6_t		sin6addr;
7973 	in_port_t	port;		/* Host byte order */
7974 	in_port_t	requested_port;	/* Host byte order */
7975 	int		count;
7976 	in6_addr_t	v6src;
7977 	int		loopmax;
7978 	udp_fanout_t	*udpf;
7979 	in_port_t	lport;		/* Network byte order */
7980 	zoneid_t	zoneid;
7981 	udp_t		*udp;
7982 	boolean_t	is_inaddr_any;
7983 	mlp_type_t	addrtype, mlptype;
7984 	udp_stack_t	*us;
7985 	int		error = 0;
7986 	mblk_t		*mp = NULL;
7987 
7988 	udp = connp->conn_udp;
7989 	us = udp->udp_us;
7990 
7991 	if (udp->udp_state != TS_UNBND) {
7992 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
7993 		    "udp_bind: bad state, %u", udp->udp_state);
7994 		return (-TOUTSTATE);
7995 	}
7996 
7997 	switch (len) {
7998 	case 0:
7999 		if (udp->udp_family == AF_INET) {
8000 			sin = (sin_t *)&sin6addr;
8001 			*sin = sin_null;
8002 			sin->sin_family = AF_INET;
8003 			sin->sin_addr.s_addr = INADDR_ANY;
8004 			udp->udp_ipversion = IPV4_VERSION;
8005 		} else {
8006 			ASSERT(udp->udp_family == AF_INET6);
8007 			sin6 = (sin6_t *)&sin6addr;
8008 			*sin6 = sin6_null;
8009 			sin6->sin6_family = AF_INET6;
8010 			V6_SET_ZERO(sin6->sin6_addr);
8011 			udp->udp_ipversion = IPV6_VERSION;
8012 		}
8013 		port = 0;
8014 		break;
8015 
8016 	case sizeof (sin_t):	/* Complete IPv4 address */
8017 		sin = (sin_t *)sa;
8018 
8019 		if (sin == NULL || !OK_32PTR((char *)sin))
8020 			return (EINVAL);
8021 
8022 		if (udp->udp_family != AF_INET ||
8023 		    sin->sin_family != AF_INET) {
8024 			return (EAFNOSUPPORT);
8025 		}
8026 		port = ntohs(sin->sin_port);
8027 		break;
8028 
8029 	case sizeof (sin6_t):	/* complete IPv6 address */
8030 		sin6 = (sin6_t *)sa;
8031 
8032 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
8033 			return (EINVAL);
8034 
8035 		if (udp->udp_family != AF_INET6 ||
8036 		    sin6->sin6_family != AF_INET6) {
8037 			return (EAFNOSUPPORT);
8038 		}
8039 		port = ntohs(sin6->sin6_port);
8040 		break;
8041 
8042 	default:		/* Invalid request */
8043 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8044 		    "udp_bind: bad ADDR_length length %u", len);
8045 		return (-TBADADDR);
8046 	}
8047 
8048 	requested_port = port;
8049 
8050 	if (requested_port == 0 || !bind_to_req_port_only)
8051 		bind_to_req_port_only = B_FALSE;
8052 	else		/* T_BIND_REQ and requested_port != 0 */
8053 		bind_to_req_port_only = B_TRUE;
8054 
8055 	if (requested_port == 0) {
8056 		/*
8057 		 * If the application passed in zero for the port number, it
8058 		 * doesn't care which port number we bind to. Get one in the
8059 		 * valid range.
8060 		 */
8061 		if (udp->udp_anon_priv_bind) {
8062 			port = udp_get_next_priv_port(udp);
8063 		} else {
8064 			port = udp_update_next_port(udp,
8065 			    us->us_next_port_to_try, B_TRUE);
8066 		}
8067 	} else {
8068 		/*
8069 		 * If the port is in the well-known privileged range,
8070 		 * make sure the caller was privileged.
8071 		 */
8072 		int i;
8073 		boolean_t priv = B_FALSE;
8074 
8075 		if (port < us->us_smallest_nonpriv_port) {
8076 			priv = B_TRUE;
8077 		} else {
8078 			for (i = 0; i < us->us_num_epriv_ports; i++) {
8079 				if (port == us->us_epriv_ports[i]) {
8080 					priv = B_TRUE;
8081 					break;
8082 				}
8083 			}
8084 		}
8085 
8086 		if (priv) {
8087 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
8088 				return (-TACCES);
8089 		}
8090 	}
8091 
8092 	if (port == 0)
8093 		return (-TNOADDR);
8094 
8095 	/*
8096 	 * The state must be TS_UNBND. TPI mandates that users must send
8097 	 * TPI primitives only 1 at a time and wait for the response before
8098 	 * sending the next primitive.
8099 	 */
8100 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8101 	if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
8102 		rw_exit(&udp->udp_rwlock);
8103 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8104 		    "udp_bind: bad state, %u", udp->udp_state);
8105 		return (-TOUTSTATE);
8106 	}
8107 	/* XXX how to remove the T_BIND_REQ? Should set it before calling */
8108 	udp->udp_pending_op = T_BIND_REQ;
8109 	/*
8110 	 * Copy the source address into our udp structure. This address
8111 	 * may still be zero; if so, IP will fill in the correct address
8112 	 * each time an outbound packet is passed to it. Since the udp is
8113 	 * not yet in the bind hash list, we don't grab the uf_lock to
8114 	 * change udp_ipversion
8115 	 */
8116 	if (udp->udp_family == AF_INET) {
8117 		ASSERT(sin != NULL);
8118 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
8119 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
8120 		    udp->udp_ip_snd_options_len;
8121 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
8122 	} else {
8123 		ASSERT(sin6 != NULL);
8124 		v6src = sin6->sin6_addr;
8125 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
8126 			/*
8127 			 * no need to hold the uf_lock to set the udp_ipversion
8128 			 * since we are not yet in the fanout list
8129 			 */
8130 			udp->udp_ipversion = IPV4_VERSION;
8131 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
8132 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
8133 		} else {
8134 			udp->udp_ipversion = IPV6_VERSION;
8135 			udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
8136 		}
8137 	}
8138 
8139 	/*
8140 	 * If udp_reuseaddr is not set, then we have to make sure that
8141 	 * the IP address and port number the application requested
8142 	 * (or we selected for the application) is not being used by
8143 	 * another stream.  If another stream is already using the
8144 	 * requested IP address and port, the behavior depends on
8145 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
8146 	 * search for any an unused port to bind to the the stream.
8147 	 *
8148 	 * As per the BSD semantics, as modified by the Deering multicast
8149 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
8150 	 * to the same port independent of the local IP address.
8151 	 *
8152 	 * This is slightly different than in SunOS 4.X which did not
8153 	 * support IP multicast. Note that the change implemented by the
8154 	 * Deering multicast code effects all binds - not only binding
8155 	 * to IP multicast addresses.
8156 	 *
8157 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
8158 	 * order to guarantee a unique port.
8159 	 */
8160 
8161 	count = 0;
8162 	if (udp->udp_anon_priv_bind) {
8163 		/*
8164 		 * loopmax = (IPPORT_RESERVED-1) -
8165 		 *    us->us_min_anonpriv_port + 1
8166 		 */
8167 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
8168 	} else {
8169 		loopmax = us->us_largest_anon_port -
8170 		    us->us_smallest_anon_port + 1;
8171 	}
8172 
8173 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
8174 	zoneid = connp->conn_zoneid;
8175 
8176 	for (;;) {
8177 		udp_t		*udp1;
8178 		boolean_t	found_exclbind = B_FALSE;
8179 
8180 		/*
8181 		 * Walk through the list of udp streams bound to
8182 		 * requested port with the same IP address.
8183 		 */
8184 		lport = htons(port);
8185 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
8186 		    us->us_bind_fanout_size)];
8187 		mutex_enter(&udpf->uf_lock);
8188 		for (udp1 = udpf->uf_udp; udp1 != NULL;
8189 		    udp1 = udp1->udp_bind_hash) {
8190 			if (lport != udp1->udp_port)
8191 				continue;
8192 
8193 			/*
8194 			 * On a labeled system, we must treat bindings to ports
8195 			 * on shared IP addresses by sockets with MAC exemption
8196 			 * privilege as being in all zones, as there's
8197 			 * otherwise no way to identify the right receiver.
8198 			 */
8199 			if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) ||
8200 			    IPCL_ZONE_MATCH(connp,
8201 			    udp1->udp_connp->conn_zoneid)) &&
8202 			    !connp->conn_mac_exempt && \
8203 			    !udp1->udp_connp->conn_mac_exempt)
8204 				continue;
8205 
8206 			/*
8207 			 * If UDP_EXCLBIND is set for either the bound or
8208 			 * binding endpoint, the semantics of bind
8209 			 * is changed according to the following chart.
8210 			 *
8211 			 * spec = specified address (v4 or v6)
8212 			 * unspec = unspecified address (v4 or v6)
8213 			 * A = specified addresses are different for endpoints
8214 			 *
8215 			 * bound	bind to		allowed?
8216 			 * -------------------------------------
8217 			 * unspec	unspec		no
8218 			 * unspec	spec		no
8219 			 * spec		unspec		no
8220 			 * spec		spec		yes if A
8221 			 *
8222 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
8223 			 * as UDP_EXCLBIND, except that zoneid is ignored.
8224 			 */
8225 			if (udp1->udp_exclbind || udp->udp_exclbind ||
8226 			    udp1->udp_connp->conn_mac_exempt ||
8227 			    connp->conn_mac_exempt) {
8228 				if (V6_OR_V4_INADDR_ANY(
8229 				    udp1->udp_bound_v6src) ||
8230 				    is_inaddr_any ||
8231 				    IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8232 				    &v6src)) {
8233 					found_exclbind = B_TRUE;
8234 					break;
8235 				}
8236 				continue;
8237 			}
8238 
8239 			/*
8240 			 * Check ipversion to allow IPv4 and IPv6 sockets to
8241 			 * have disjoint port number spaces.
8242 			 */
8243 			if (udp->udp_ipversion != udp1->udp_ipversion) {
8244 
8245 				/*
8246 				 * On the first time through the loop, if the
8247 				 * the user intentionally specified a
8248 				 * particular port number, then ignore any
8249 				 * bindings of the other protocol that may
8250 				 * conflict. This allows the user to bind IPv6
8251 				 * alone and get both v4 and v6, or bind both
8252 				 * both and get each seperately. On subsequent
8253 				 * times through the loop, we're checking a
8254 				 * port that we chose (not the user) and thus
8255 				 * we do not allow casual duplicate bindings.
8256 				 */
8257 				if (count == 0 && requested_port != 0)
8258 					continue;
8259 			}
8260 
8261 			/*
8262 			 * No difference depending on SO_REUSEADDR.
8263 			 *
8264 			 * If existing port is bound to a
8265 			 * non-wildcard IP address and
8266 			 * the requesting stream is bound to
8267 			 * a distinct different IP addresses
8268 			 * (non-wildcard, also), keep going.
8269 			 */
8270 			if (!is_inaddr_any &&
8271 			    !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
8272 			    !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8273 			    &v6src)) {
8274 				continue;
8275 			}
8276 			break;
8277 		}
8278 
8279 		if (!found_exclbind &&
8280 		    (udp->udp_reuseaddr && requested_port != 0)) {
8281 			break;
8282 		}
8283 
8284 		if (udp1 == NULL) {
8285 			/*
8286 			 * No other stream has this IP address
8287 			 * and port number. We can use it.
8288 			 */
8289 			break;
8290 		}
8291 		mutex_exit(&udpf->uf_lock);
8292 		if (bind_to_req_port_only) {
8293 			/*
8294 			 * We get here only when requested port
8295 			 * is bound (and only first  of the for()
8296 			 * loop iteration).
8297 			 *
8298 			 * The semantics of this bind request
8299 			 * require it to fail so we return from
8300 			 * the routine (and exit the loop).
8301 			 *
8302 			 */
8303 			udp->udp_pending_op = -1;
8304 			rw_exit(&udp->udp_rwlock);
8305 			return (-TADDRBUSY);
8306 		}
8307 
8308 		if (udp->udp_anon_priv_bind) {
8309 			port = udp_get_next_priv_port(udp);
8310 		} else {
8311 			if ((count == 0) && (requested_port != 0)) {
8312 				/*
8313 				 * If the application wants us to find
8314 				 * a port, get one to start with. Set
8315 				 * requested_port to 0, so that we will
8316 				 * update us->us_next_port_to_try below.
8317 				 */
8318 				port = udp_update_next_port(udp,
8319 				    us->us_next_port_to_try, B_TRUE);
8320 				requested_port = 0;
8321 			} else {
8322 				port = udp_update_next_port(udp, port + 1,
8323 				    B_FALSE);
8324 			}
8325 		}
8326 
8327 		if (port == 0 || ++count >= loopmax) {
8328 			/*
8329 			 * We've tried every possible port number and
8330 			 * there are none available, so send an error
8331 			 * to the user.
8332 			 */
8333 			udp->udp_pending_op = -1;
8334 			rw_exit(&udp->udp_rwlock);
8335 			return (-TNOADDR);
8336 		}
8337 	}
8338 
8339 	/*
8340 	 * Copy the source address into our udp structure.  This address
8341 	 * may still be zero; if so, ip will fill in the correct address
8342 	 * each time an outbound packet is passed to it.
8343 	 * If we are binding to a broadcast or multicast address then
8344 	 * udp_post_ip_bind_connect will clear the source address
8345 	 * when udp_do_bind success.
8346 	 */
8347 	udp->udp_v6src = udp->udp_bound_v6src = v6src;
8348 	udp->udp_port = lport;
8349 	/*
8350 	 * Now reset the the next anonymous port if the application requested
8351 	 * an anonymous port, or we handed out the next anonymous port.
8352 	 */
8353 	if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
8354 		us->us_next_port_to_try = port + 1;
8355 	}
8356 
8357 	/* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
8358 	if (udp->udp_family == AF_INET) {
8359 		sin->sin_port = udp->udp_port;
8360 	} else {
8361 		sin6->sin6_port = udp->udp_port;
8362 		/* Rebuild the header template */
8363 		error = udp_build_hdrs(udp);
8364 		if (error != 0) {
8365 			udp->udp_pending_op = -1;
8366 			rw_exit(&udp->udp_rwlock);
8367 			mutex_exit(&udpf->uf_lock);
8368 			return (error);
8369 		}
8370 	}
8371 	udp->udp_state = TS_IDLE;
8372 	udp_bind_hash_insert(udpf, udp);
8373 	mutex_exit(&udpf->uf_lock);
8374 	rw_exit(&udp->udp_rwlock);
8375 
8376 	if (cl_inet_bind) {
8377 		/*
8378 		 * Running in cluster mode - register bind information
8379 		 */
8380 		if (udp->udp_ipversion == IPV4_VERSION) {
8381 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
8382 			    IPPROTO_UDP, AF_INET,
8383 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
8384 			    (in_port_t)udp->udp_port, NULL);
8385 		} else {
8386 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
8387 			    IPPROTO_UDP, AF_INET6,
8388 			    (uint8_t *)&(udp->udp_v6src),
8389 			    (in_port_t)udp->udp_port, NULL);
8390 		}
8391 	}
8392 
8393 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
8394 	if (is_system_labeled() && (!connp->conn_anon_port ||
8395 	    connp->conn_anon_mlp)) {
8396 		uint16_t mlpport;
8397 		cred_t *cr = connp->conn_cred;
8398 		zone_t *zone;
8399 
8400 		zone = crgetzone(cr);
8401 		connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
8402 		    mlptSingle;
8403 		addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
8404 		    &v6src, us->us_netstack->netstack_ip);
8405 		if (addrtype == mlptSingle) {
8406 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8407 			udp->udp_pending_op = -1;
8408 			rw_exit(&udp->udp_rwlock);
8409 			connp->conn_anon_port = B_FALSE;
8410 			connp->conn_mlp_type = mlptSingle;
8411 			return (-TNOADDR);
8412 		}
8413 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
8414 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
8415 		    addrtype);
8416 		if (mlptype != mlptSingle &&
8417 		    (connp->conn_mlp_type == mlptSingle ||
8418 		    secpolicy_net_bindmlp(cr) != 0)) {
8419 			if (udp->udp_debug) {
8420 				(void) strlog(UDP_MOD_ID, 0, 1,
8421 				    SL_ERROR|SL_TRACE,
8422 				    "udp_bind: no priv for multilevel port %d",
8423 				    mlpport);
8424 			}
8425 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8426 			udp->udp_pending_op = -1;
8427 			rw_exit(&udp->udp_rwlock);
8428 			connp->conn_anon_port = B_FALSE;
8429 			connp->conn_mlp_type = mlptSingle;
8430 			return (-TACCES);
8431 		}
8432 
8433 		/*
8434 		 * If we're specifically binding a shared IP address and the
8435 		 * port is MLP on shared addresses, then check to see if this
8436 		 * zone actually owns the MLP.  Reject if not.
8437 		 */
8438 		if (mlptype == mlptShared && addrtype == mlptShared) {
8439 			/*
8440 			 * No need to handle exclusive-stack zones since
8441 			 * ALL_ZONES only applies to the shared stack.
8442 			 */
8443 			zoneid_t mlpzone;
8444 
8445 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
8446 			    htons(mlpport));
8447 			if (connp->conn_zoneid != mlpzone) {
8448 				if (udp->udp_debug) {
8449 					(void) strlog(UDP_MOD_ID, 0, 1,
8450 					    SL_ERROR|SL_TRACE,
8451 					    "udp_bind: attempt to bind port "
8452 					    "%d on shared addr in zone %d "
8453 					    "(should be %d)",
8454 					    mlpport, connp->conn_zoneid,
8455 					    mlpzone);
8456 				}
8457 				rw_enter(&udp->udp_rwlock, RW_WRITER);
8458 				udp->udp_pending_op = -1;
8459 				rw_exit(&udp->udp_rwlock);
8460 				connp->conn_anon_port = B_FALSE;
8461 				connp->conn_mlp_type = mlptSingle;
8462 				return (-TACCES);
8463 			}
8464 		}
8465 		if (connp->conn_anon_port) {
8466 			error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
8467 			    port, B_TRUE);
8468 			if (error != 0) {
8469 				if (udp->udp_debug) {
8470 					(void) strlog(UDP_MOD_ID, 0, 1,
8471 					    SL_ERROR|SL_TRACE,
8472 					    "udp_bind: cannot establish anon "
8473 					    "MLP for port %d", port);
8474 				}
8475 				rw_enter(&udp->udp_rwlock, RW_WRITER);
8476 				udp->udp_pending_op = -1;
8477 				rw_exit(&udp->udp_rwlock);
8478 				connp->conn_anon_port = B_FALSE;
8479 				connp->conn_mlp_type = mlptSingle;
8480 				return (-TACCES);
8481 			}
8482 		}
8483 		connp->conn_mlp_type = mlptype;
8484 	}
8485 
8486 	if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
8487 		/*
8488 		 * Append a request for an IRE if udp_v6src not
8489 		 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
8490 		 */
8491 		mp = allocb(sizeof (ire_t), BPRI_HI);
8492 		if (!mp) {
8493 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8494 			udp->udp_pending_op = -1;
8495 			rw_exit(&udp->udp_rwlock);
8496 			return (ENOMEM);
8497 		}
8498 		mp->b_wptr += sizeof (ire_t);
8499 		mp->b_datap->db_type = IRE_DB_REQ_TYPE;
8500 	}
8501 	if (udp->udp_family == AF_INET6) {
8502 		ASSERT(udp->udp_connp->conn_af_isv6);
8503 		error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP,
8504 		    &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
8505 	} else {
8506 		ASSERT(!udp->udp_connp->conn_af_isv6);
8507 		error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP,
8508 		    V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port,
8509 		    B_TRUE);
8510 	}
8511 
8512 	(void) udp_post_ip_bind_connect(udp, mp, error);
8513 	return (error);
8514 }
8515 
8516 int
8517 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
8518     socklen_t len, cred_t *cr)
8519 {
8520 	int		error;
8521 	conn_t		*connp;
8522 
8523 	connp = (conn_t *)proto_handle;
8524 
8525 	if (sa == NULL)
8526 		error = udp_do_unbind(connp);
8527 	else
8528 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
8529 
8530 	if (error < 0) {
8531 		if (error == -TOUTSTATE)
8532 			error = EINVAL;
8533 		else
8534 			error = proto_tlitosyserr(-error);
8535 	}
8536 
8537 	return (error);
8538 }
8539 
8540 static int
8541 udp_implicit_bind(conn_t *connp, cred_t *cr)
8542 {
8543 	int error;
8544 
8545 	error = udp_do_bind(connp, NULL, 0, cr, B_FALSE);
8546 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
8547 }
8548 
8549 /*
8550  * This routine removes a port number association from a stream. It
8551  * is called by udp_unbind and udp_tpi_unbind.
8552  */
8553 static int
8554 udp_do_unbind(conn_t *connp)
8555 {
8556 	udp_t 		*udp = connp->conn_udp;
8557 	udp_fanout_t	*udpf;
8558 	udp_stack_t	*us = udp->udp_us;
8559 
8560 	if (cl_inet_unbind != NULL) {
8561 		/*
8562 		 * Running in cluster mode - register unbind information
8563 		 */
8564 		if (udp->udp_ipversion == IPV4_VERSION) {
8565 			(*cl_inet_unbind)(
8566 			    connp->conn_netstack->netstack_stackid,
8567 			    IPPROTO_UDP, AF_INET,
8568 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
8569 			    (in_port_t)udp->udp_port, NULL);
8570 		} else {
8571 			(*cl_inet_unbind)(
8572 			    connp->conn_netstack->netstack_stackid,
8573 			    IPPROTO_UDP, AF_INET6,
8574 			    (uint8_t *)&(udp->udp_v6src),
8575 			    (in_port_t)udp->udp_port, NULL);
8576 		}
8577 	}
8578 
8579 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8580 	if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
8581 		rw_exit(&udp->udp_rwlock);
8582 		return (-TOUTSTATE);
8583 	}
8584 	udp->udp_pending_op = T_UNBIND_REQ;
8585 	rw_exit(&udp->udp_rwlock);
8586 
8587 	/*
8588 	 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
8589 	 * and therefore ip_unbind must never return NULL.
8590 	 */
8591 	ip_unbind(connp);
8592 
8593 	/*
8594 	 * Once we're unbound from IP, the pending operation may be cleared
8595 	 * here.
8596 	 */
8597 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8598 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8599 	    us->us_bind_fanout_size)];
8600 
8601 	mutex_enter(&udpf->uf_lock);
8602 	udp_bind_hash_remove(udp, B_TRUE);
8603 	V6_SET_ZERO(udp->udp_v6src);
8604 	V6_SET_ZERO(udp->udp_bound_v6src);
8605 	udp->udp_port = 0;
8606 	mutex_exit(&udpf->uf_lock);
8607 
8608 	udp->udp_pending_op = -1;
8609 	udp->udp_state = TS_UNBND;
8610 	if (udp->udp_family == AF_INET6)
8611 		(void) udp_build_hdrs(udp);
8612 	rw_exit(&udp->udp_rwlock);
8613 
8614 	return (0);
8615 }
8616 
8617 static int
8618 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error)
8619 {
8620 	ire_t		*ire;
8621 	udp_fanout_t	*udpf;
8622 	udp_stack_t	*us = udp->udp_us;
8623 
8624 	ASSERT(udp->udp_pending_op != -1);
8625 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8626 	if (error == 0) {
8627 		/* For udp_do_connect() success */
8628 		/* udp_do_bind() success will do nothing in here */
8629 		/*
8630 		 * If a broadcast/multicast address was bound, set
8631 		 * the source address to 0.
8632 		 * This ensures no datagrams with broadcast address
8633 		 * as source address are emitted (which would violate
8634 		 * RFC1122 - Hosts requirements)
8635 		 *
8636 		 * Note that when connecting the returned IRE is
8637 		 * for the destination address and we only perform
8638 		 * the broadcast check for the source address (it
8639 		 * is OK to connect to a broadcast/multicast address.)
8640 		 */
8641 		if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
8642 			ire = (ire_t *)ire_mp->b_rptr;
8643 
8644 			/*
8645 			 * Note: we get IRE_BROADCAST for IPv6 to "mark" a
8646 			 * multicast local address.
8647 			 */
8648 			udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8649 			    us->us_bind_fanout_size)];
8650 			if (ire->ire_type == IRE_BROADCAST &&
8651 			    udp->udp_state != TS_DATA_XFER) {
8652 				ASSERT(udp->udp_pending_op == T_BIND_REQ ||
8653 				    udp->udp_pending_op == O_T_BIND_REQ);
8654 				/*
8655 				 * This was just a local bind to a broadcast
8656 				 * addr.
8657 				 */
8658 				mutex_enter(&udpf->uf_lock);
8659 				V6_SET_ZERO(udp->udp_v6src);
8660 				mutex_exit(&udpf->uf_lock);
8661 				if (udp->udp_family == AF_INET6)
8662 					(void) udp_build_hdrs(udp);
8663 			} else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
8664 				if (udp->udp_family == AF_INET6)
8665 					(void) udp_build_hdrs(udp);
8666 			}
8667 		}
8668 	} else {
8669 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8670 		    us->us_bind_fanout_size)];
8671 		mutex_enter(&udpf->uf_lock);
8672 
8673 		if (udp->udp_state == TS_DATA_XFER) {
8674 			/* Connect failed */
8675 			/* Revert back to the bound source */
8676 			udp->udp_v6src = udp->udp_bound_v6src;
8677 			udp->udp_state = TS_IDLE;
8678 		} else {
8679 			/* For udp_do_bind() failed */
8680 			V6_SET_ZERO(udp->udp_v6src);
8681 			V6_SET_ZERO(udp->udp_bound_v6src);
8682 			udp->udp_state = TS_UNBND;
8683 			udp_bind_hash_remove(udp, B_TRUE);
8684 			udp->udp_port = 0;
8685 		}
8686 		mutex_exit(&udpf->uf_lock);
8687 		if (udp->udp_family == AF_INET6)
8688 			(void) udp_build_hdrs(udp);
8689 	}
8690 	udp->udp_pending_op = -1;
8691 	rw_exit(&udp->udp_rwlock);
8692 	if (ire_mp != NULL)
8693 		freeb(ire_mp);
8694 	return (error);
8695 }
8696 
8697 /*
8698  * It associates a default destination address with the stream.
8699  */
8700 static int
8701 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len)
8702 {
8703 	sin6_t		*sin6;
8704 	sin_t		*sin;
8705 	in6_addr_t 	v6dst;
8706 	ipaddr_t 	v4dst;
8707 	uint16_t 	dstport;
8708 	uint32_t 	flowinfo;
8709 	mblk_t		*ire_mp;
8710 	udp_fanout_t	*udpf;
8711 	udp_t		*udp, *udp1;
8712 	ushort_t	ipversion;
8713 	udp_stack_t	*us;
8714 	int		error;
8715 
8716 	udp = connp->conn_udp;
8717 	us = udp->udp_us;
8718 
8719 	/*
8720 	 * Address has been verified by the caller
8721 	 */
8722 	switch (len) {
8723 	default:
8724 		/*
8725 		 * Should never happen
8726 		 */
8727 		return (EINVAL);
8728 
8729 	case sizeof (sin_t):
8730 		sin = (sin_t *)sa;
8731 		v4dst = sin->sin_addr.s_addr;
8732 		dstport = sin->sin_port;
8733 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
8734 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
8735 		ipversion = IPV4_VERSION;
8736 		break;
8737 
8738 	case sizeof (sin6_t):
8739 		sin6 = (sin6_t *)sa;
8740 		v6dst = sin6->sin6_addr;
8741 		dstport = sin6->sin6_port;
8742 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
8743 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
8744 			ipversion = IPV4_VERSION;
8745 			flowinfo = 0;
8746 		} else {
8747 			ipversion = IPV6_VERSION;
8748 			flowinfo = sin6->sin6_flowinfo;
8749 		}
8750 		break;
8751 	}
8752 
8753 	if (dstport == 0)
8754 		return (-TBADADDR);
8755 
8756 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8757 
8758 	/*
8759 	 * This UDP must have bound to a port already before doing a connect.
8760 	 * TPI mandates that users must send TPI primitives only 1 at a time
8761 	 * and wait for the response before sending the next primitive.
8762 	 */
8763 	if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
8764 		rw_exit(&udp->udp_rwlock);
8765 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8766 		    "udp_connect: bad state, %u", udp->udp_state);
8767 		return (-TOUTSTATE);
8768 	}
8769 	udp->udp_pending_op = T_CONN_REQ;
8770 	ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
8771 
8772 	if (ipversion == IPV4_VERSION) {
8773 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
8774 		    udp->udp_ip_snd_options_len;
8775 	} else {
8776 		udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
8777 	}
8778 
8779 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8780 	    us->us_bind_fanout_size)];
8781 
8782 	mutex_enter(&udpf->uf_lock);
8783 	if (udp->udp_state == TS_DATA_XFER) {
8784 		/* Already connected - clear out state */
8785 		udp->udp_v6src = udp->udp_bound_v6src;
8786 		udp->udp_state = TS_IDLE;
8787 	}
8788 
8789 	/*
8790 	 * Create a default IP header with no IP options.
8791 	 */
8792 	udp->udp_dstport = dstport;
8793 	udp->udp_ipversion = ipversion;
8794 	if (ipversion == IPV4_VERSION) {
8795 		/*
8796 		 * Interpret a zero destination to mean loopback.
8797 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
8798 		 * generate the T_CONN_CON.
8799 		 */
8800 		if (v4dst == INADDR_ANY) {
8801 			v4dst = htonl(INADDR_LOOPBACK);
8802 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
8803 			if (udp->udp_family == AF_INET) {
8804 				sin->sin_addr.s_addr = v4dst;
8805 			} else {
8806 				sin6->sin6_addr = v6dst;
8807 			}
8808 		}
8809 		udp->udp_v6dst = v6dst;
8810 		udp->udp_flowinfo = 0;
8811 
8812 		/*
8813 		 * If the destination address is multicast and
8814 		 * an outgoing multicast interface has been set,
8815 		 * use the address of that interface as our
8816 		 * source address if no source address has been set.
8817 		 */
8818 		if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
8819 		    CLASSD(v4dst) &&
8820 		    udp->udp_multicast_if_addr != INADDR_ANY) {
8821 			IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
8822 			    &udp->udp_v6src);
8823 		}
8824 	} else {
8825 		ASSERT(udp->udp_ipversion == IPV6_VERSION);
8826 		/*
8827 		 * Interpret a zero destination to mean loopback.
8828 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
8829 		 * generate the T_CONN_CON.
8830 		 */
8831 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
8832 			v6dst = ipv6_loopback;
8833 			sin6->sin6_addr = v6dst;
8834 		}
8835 		udp->udp_v6dst = v6dst;
8836 		udp->udp_flowinfo = flowinfo;
8837 		/*
8838 		 * If the destination address is multicast and
8839 		 * an outgoing multicast interface has been set,
8840 		 * then the ip bind logic will pick the correct source
8841 		 * address (i.e. matching the outgoing multicast interface).
8842 		 */
8843 	}
8844 
8845 	/*
8846 	 * Verify that the src/port/dst/port is unique for all
8847 	 * connections in TS_DATA_XFER
8848 	 */
8849 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
8850 		if (udp1->udp_state != TS_DATA_XFER)
8851 			continue;
8852 		if (udp->udp_port != udp1->udp_port ||
8853 		    udp->udp_ipversion != udp1->udp_ipversion ||
8854 		    dstport != udp1->udp_dstport ||
8855 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
8856 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) ||
8857 		    !(IPCL_ZONE_MATCH(udp->udp_connp,
8858 		    udp1->udp_connp->conn_zoneid) ||
8859 		    IPCL_ZONE_MATCH(udp1->udp_connp,
8860 		    udp->udp_connp->conn_zoneid)))
8861 			continue;
8862 		mutex_exit(&udpf->uf_lock);
8863 		udp->udp_pending_op = -1;
8864 		rw_exit(&udp->udp_rwlock);
8865 		return (-TBADADDR);
8866 	}
8867 
8868 	if (cl_inet_connect2 != NULL) {
8869 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error);
8870 		if (error != 0) {
8871 			mutex_exit(&udpf->uf_lock);
8872 			udp->udp_pending_op = -1;
8873 			rw_exit(&udp->udp_rwlock);
8874 			return (-TBADADDR);
8875 		}
8876 	}
8877 
8878 	udp->udp_state = TS_DATA_XFER;
8879 	mutex_exit(&udpf->uf_lock);
8880 
8881 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
8882 	if (ire_mp == NULL) {
8883 		mutex_enter(&udpf->uf_lock);
8884 		udp->udp_state = TS_IDLE;
8885 		udp->udp_pending_op = -1;
8886 		mutex_exit(&udpf->uf_lock);
8887 		rw_exit(&udp->udp_rwlock);
8888 		return (ENOMEM);
8889 	}
8890 
8891 	rw_exit(&udp->udp_rwlock);
8892 
8893 	ire_mp->b_wptr += sizeof (ire_t);
8894 	ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE;
8895 
8896 	if (udp->udp_family == AF_INET) {
8897 		error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP,
8898 		    &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port,
8899 		    V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport,
8900 		    B_TRUE, B_TRUE);
8901 	} else {
8902 		error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP,
8903 		    &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst,
8904 		    &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE);
8905 	}
8906 
8907 	return (udp_post_ip_bind_connect(udp, ire_mp, error));
8908 }
8909 
8910 /* ARGSUSED */
8911 static int
8912 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
8913     socklen_t len, sock_connid_t *id, cred_t *cr)
8914 {
8915 	conn_t	*connp = (conn_t *)proto_handle;
8916 	udp_t	*udp = connp->conn_udp;
8917 	int	error;
8918 	boolean_t did_bind = B_FALSE;
8919 
8920 	if (sa == NULL) {
8921 		/*
8922 		 * Disconnect
8923 		 * Make sure we are connected
8924 		 */
8925 		if (udp->udp_state != TS_DATA_XFER)
8926 			return (EINVAL);
8927 
8928 		error = udp_disconnect(connp);
8929 		return (error);
8930 	}
8931 
8932 	error = proto_verify_ip_addr(udp->udp_family, sa, len);
8933 	if (error != 0)
8934 		goto done;
8935 
8936 	/* do an implicit bind if necessary */
8937 	if (udp->udp_state == TS_UNBND) {
8938 		error = udp_implicit_bind(connp, cr);
8939 		/*
8940 		 * We could be racing with an actual bind, in which case
8941 		 * we would see EPROTO. We cross our fingers and try
8942 		 * to connect.
8943 		 */
8944 		if (!(error == 0 || error == EPROTO))
8945 			goto done;
8946 		did_bind = B_TRUE;
8947 	}
8948 	/*
8949 	 * set SO_DGRAM_ERRIND
8950 	 */
8951 	udp->udp_dgram_errind = B_TRUE;
8952 
8953 	error = udp_do_connect(connp, sa, len);
8954 
8955 	if (error != 0 && did_bind) {
8956 		int unbind_err;
8957 
8958 		unbind_err = udp_do_unbind(connp);
8959 		ASSERT(unbind_err == 0);
8960 	}
8961 
8962 	if (error == 0) {
8963 		*id = 0;
8964 		(*connp->conn_upcalls->su_connected)
8965 		    (connp->conn_upper_handle, 0, NULL, -1);
8966 	} else if (error < 0) {
8967 		error = proto_tlitosyserr(-error);
8968 	}
8969 
8970 done:
8971 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
8972 		/*
8973 		 * No need to hold locks to set state
8974 		 * after connect failure socket state is undefined
8975 		 * We set the state only to imitate old sockfs behavior
8976 		 */
8977 		udp->udp_state = TS_IDLE;
8978 	}
8979 	return (error);
8980 }
8981 
8982 /* ARGSUSED */
8983 int
8984 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
8985     cred_t *cr)
8986 {
8987 	conn_t		*connp = (conn_t *)proto_handle;
8988 	udp_t		*udp = connp->conn_udp;
8989 	udp_stack_t	*us = udp->udp_us;
8990 	int		error = 0;
8991 
8992 	ASSERT(DB_TYPE(mp) == M_DATA);
8993 
8994 	/*
8995 	 * If the socket is connected and no change in destination
8996 	 */
8997 	if (msg->msg_namelen == 0) {
8998 		error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid);
8999 		if (error == EDESTADDRREQ)
9000 			return (error);
9001 		else
9002 			return (udp->udp_dgram_errind ? error : 0);
9003 	}
9004 
9005 	/*
9006 	 * Do an implicit bind if necessary.
9007 	 */
9008 	if (udp->udp_state == TS_UNBND) {
9009 		error = udp_implicit_bind(connp, cr);
9010 		/*
9011 		 * We could be racing with an actual bind, in which case
9012 		 * we would see EPROTO. We cross our fingers and try
9013 		 * to send.
9014 		 */
9015 		if (!(error == 0 || error == EPROTO)) {
9016 			freemsg(mp);
9017 			return (error);
9018 		}
9019 	}
9020 
9021 	rw_enter(&udp->udp_rwlock, RW_WRITER);
9022 
9023 	if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) {
9024 		rw_exit(&udp->udp_rwlock);
9025 		freemsg(mp);
9026 		return (EISCONN);
9027 	}
9028 
9029 
9030 	if (udp->udp_delayed_error != 0) {
9031 		boolean_t	match;
9032 
9033 		error = udp->udp_delayed_error;
9034 		match = B_FALSE;
9035 		udp->udp_delayed_error = 0;
9036 		switch (udp->udp_family) {
9037 		case AF_INET: {
9038 			/* Compare just IP address and port */
9039 			sin_t *sin1 = (sin_t *)msg->msg_name;
9040 			sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
9041 
9042 			if (msg->msg_namelen == sizeof (sin_t) &&
9043 			    sin1->sin_port == sin2->sin_port &&
9044 			    sin1->sin_addr.s_addr == sin2->sin_addr.s_addr)
9045 				match = B_TRUE;
9046 
9047 			break;
9048 		}
9049 		case AF_INET6: {
9050 			sin6_t	*sin1 = (sin6_t *)msg->msg_name;
9051 			sin6_t	*sin2 = (sin6_t *)&udp->udp_delayed_addr;
9052 
9053 			if (msg->msg_namelen == sizeof (sin6_t) &&
9054 			    sin1->sin6_port == sin2->sin6_port &&
9055 			    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
9056 			    &sin2->sin6_addr))
9057 				match = B_TRUE;
9058 			break;
9059 		}
9060 		default:
9061 			ASSERT(0);
9062 		}
9063 
9064 		*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
9065 
9066 		if (match) {
9067 			rw_exit(&udp->udp_rwlock);
9068 			freemsg(mp);
9069 			return (error);
9070 		}
9071 	}
9072 
9073 	error = proto_verify_ip_addr(udp->udp_family,
9074 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
9075 	rw_exit(&udp->udp_rwlock);
9076 
9077 	if (error != 0) {
9078 		freemsg(mp);
9079 		return (error);
9080 	}
9081 
9082 	error = udp_send_not_connected(connp, mp,
9083 	    (struct sockaddr  *)msg->msg_name, msg->msg_namelen, msg, cr,
9084 	    curproc->p_pid);
9085 	if (error != 0) {
9086 		UDP_STAT(us, udp_out_err_output);
9087 		freemsg(mp);
9088 	}
9089 	return (udp->udp_dgram_errind ? error : 0);
9090 }
9091 
9092 void
9093 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
9094     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
9095 {
9096 	conn_t 	*connp = (conn_t *)proto_handle;
9097 	udp_t	*udp;
9098 	struct T_capability_ack tca;
9099 	struct sockaddr_in6 laddr, faddr;
9100 	socklen_t laddrlen, faddrlen;
9101 	short opts;
9102 	struct stroptions *stropt;
9103 	mblk_t *stropt_mp;
9104 	int error;
9105 
9106 	udp = connp->conn_udp;
9107 
9108 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
9109 
9110 	/*
9111 	 * setup the fallback stream that was allocated
9112 	 */
9113 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
9114 	connp->conn_minor_arena = WR(q)->q_ptr;
9115 
9116 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
9117 
9118 	WR(q)->q_qinfo = &udp_winit;
9119 
9120 	connp->conn_rq = RD(q);
9121 	connp->conn_wq = WR(q);
9122 
9123 	/* Notify stream head about options before sending up data */
9124 	stropt_mp->b_datap->db_type = M_SETOPTS;
9125 	stropt_mp->b_wptr += sizeof (*stropt);
9126 	stropt = (struct stroptions *)stropt_mp->b_rptr;
9127 	stropt->so_flags = SO_WROFF | SO_HIWAT;
9128 	stropt->so_wroff =
9129 	    (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra);
9130 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
9131 	putnext(RD(q), stropt_mp);
9132 
9133 	/*
9134 	 * Free the helper stream
9135 	 */
9136 	ip_free_helper_stream(connp);
9137 
9138 	if (!direct_sockfs)
9139 		udp_disable_direct_sockfs(udp);
9140 
9141 	/*
9142 	 * Collect the information needed to sync with the sonode
9143 	 */
9144 	udp_do_capability_ack(udp, &tca, TC1_INFO);
9145 
9146 	laddrlen = faddrlen = sizeof (sin6_t);
9147 	(void) udp_getsockname((sock_lower_handle_t)connp,
9148 	    (struct sockaddr *)&laddr, &laddrlen, NULL);
9149 	error = udp_getpeername((sock_lower_handle_t)connp,
9150 	    (struct sockaddr *)&faddr, &faddrlen, NULL);
9151 	if (error != 0)
9152 		faddrlen = 0;
9153 
9154 	opts = 0;
9155 	if (udp->udp_dgram_errind)
9156 		opts |= SO_DGRAM_ERRIND;
9157 	if (udp->udp_dontroute)
9158 		opts |= SO_DONTROUTE;
9159 
9160 	/*
9161 	 * Once we grab the drain lock, no data will be send up
9162 	 * to the socket. So we notify the socket that the endpoint
9163 	 * is quiescent and it's therefore safe move data from
9164 	 * the socket to the stream head.
9165 	 */
9166 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
9167 	    (struct sockaddr *)&laddr, laddrlen,
9168 	    (struct sockaddr *)&faddr, faddrlen, opts);
9169 
9170 	/*
9171 	 * push up any packets that were queued in udp_t
9172 	 */
9173 
9174 	mutex_enter(&udp->udp_recv_lock);
9175 	while (udp->udp_fallback_queue_head != NULL) {
9176 		mblk_t *mp;
9177 		mp = udp->udp_fallback_queue_head;
9178 		udp->udp_fallback_queue_head = mp->b_next;
9179 		mutex_exit(&udp->udp_recv_lock);
9180 		mp->b_next = NULL;
9181 		putnext(RD(q), mp);
9182 		mutex_enter(&udp->udp_recv_lock);
9183 	}
9184 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
9185 	/*
9186 	 * No longer a streams less socket
9187 	 */
9188 	connp->conn_flags &= ~IPCL_NONSTR;
9189 	mutex_exit(&udp->udp_recv_lock);
9190 
9191 	ASSERT(connp->conn_ref >= 1);
9192 }
9193 
9194 static int
9195 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
9196 {
9197 	sin_t	*sin = (sin_t *)sa;
9198 	sin6_t	*sin6 = (sin6_t *)sa;
9199 
9200 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
9201 	ASSERT(udp != NULL);
9202 
9203 	if (udp->udp_state != TS_DATA_XFER)
9204 		return (ENOTCONN);
9205 
9206 	switch (udp->udp_family) {
9207 	case AF_INET:
9208 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
9209 
9210 		if (*salenp < sizeof (sin_t))
9211 			return (EINVAL);
9212 
9213 		*salenp = sizeof (sin_t);
9214 		*sin = sin_null;
9215 		sin->sin_family = AF_INET;
9216 		sin->sin_port = udp->udp_dstport;
9217 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
9218 		break;
9219 	case AF_INET6:
9220 		if (*salenp < sizeof (sin6_t))
9221 			return (EINVAL);
9222 
9223 		*salenp = sizeof (sin6_t);
9224 		*sin6 = sin6_null;
9225 		sin6->sin6_family = AF_INET6;
9226 		sin6->sin6_port = udp->udp_dstport;
9227 		sin6->sin6_addr = udp->udp_v6dst;
9228 		sin6->sin6_flowinfo = udp->udp_flowinfo;
9229 		break;
9230 	}
9231 
9232 	return (0);
9233 }
9234 
9235 /* ARGSUSED */
9236 int
9237 udp_getpeername(sock_lower_handle_t  proto_handle, struct sockaddr *sa,
9238     socklen_t *salenp, cred_t *cr)
9239 {
9240 	conn_t	*connp = (conn_t *)proto_handle;
9241 	udp_t	*udp = connp->conn_udp;
9242 	int error;
9243 
9244 	ASSERT(udp != NULL);
9245 
9246 	rw_enter(&udp->udp_rwlock, RW_READER);
9247 
9248 	error = udp_do_getpeername(udp, sa, salenp);
9249 
9250 	rw_exit(&udp->udp_rwlock);
9251 
9252 	return (error);
9253 }
9254 
9255 static int
9256 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
9257 {
9258 	sin_t	*sin = (sin_t *)sa;
9259 	sin6_t	*sin6 = (sin6_t *)sa;
9260 
9261 	ASSERT(udp != NULL);
9262 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
9263 
9264 	switch (udp->udp_family) {
9265 	case AF_INET:
9266 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
9267 
9268 		if (*salenp < sizeof (sin_t))
9269 			return (EINVAL);
9270 
9271 		*salenp = sizeof (sin_t);
9272 		*sin = sin_null;
9273 		sin->sin_family = AF_INET;
9274 		if (udp->udp_state == TS_UNBND) {
9275 			break;
9276 		}
9277 		sin->sin_port = udp->udp_port;
9278 
9279 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
9280 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
9281 			sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
9282 		} else {
9283 			/*
9284 			 * INADDR_ANY
9285 			 * udp_v6src is not set, we might be bound to
9286 			 * broadcast/multicast. Use udp_bound_v6src as
9287 			 * local address instead (that could
9288 			 * also still be INADDR_ANY)
9289 			 */
9290 			sin->sin_addr.s_addr =
9291 			    V4_PART_OF_V6(udp->udp_bound_v6src);
9292 		}
9293 		break;
9294 
9295 	case AF_INET6:
9296 		if (*salenp < sizeof (sin6_t))
9297 			return (EINVAL);
9298 
9299 		*salenp = sizeof (sin6_t);
9300 		*sin6 = sin6_null;
9301 		sin6->sin6_family = AF_INET6;
9302 		if (udp->udp_state == TS_UNBND) {
9303 			break;
9304 		}
9305 		sin6->sin6_port = udp->udp_port;
9306 
9307 		if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
9308 			sin6->sin6_addr = udp->udp_v6src;
9309 		} else {
9310 			/*
9311 			 * UNSPECIFIED
9312 			 * udp_v6src is not set, we might be bound to
9313 			 * broadcast/multicast. Use udp_bound_v6src as
9314 			 * local address instead (that could
9315 			 * also still be UNSPECIFIED)
9316 			 */
9317 			sin6->sin6_addr = udp->udp_bound_v6src;
9318 		}
9319 	}
9320 	return (0);
9321 }
9322 
9323 /* ARGSUSED */
9324 int
9325 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
9326     socklen_t *salenp, cred_t *cr)
9327 {
9328 	conn_t	*connp = (conn_t *)proto_handle;
9329 	udp_t	*udp = connp->conn_udp;
9330 	int error;
9331 
9332 	ASSERT(udp != NULL);
9333 	rw_enter(&udp->udp_rwlock, RW_READER);
9334 
9335 	error = udp_do_getsockname(udp, sa, salenp);
9336 
9337 	rw_exit(&udp->udp_rwlock);
9338 
9339 	return (error);
9340 }
9341 
9342 int
9343 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
9344     void *optvalp, socklen_t *optlen, cred_t *cr)
9345 {
9346 	conn_t		*connp = (conn_t *)proto_handle;
9347 	udp_t		*udp = connp->conn_udp;
9348 	int		error;
9349 	t_uscalar_t	max_optbuf_len;
9350 	void		*optvalp_buf;
9351 	int		len;
9352 
9353 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
9354 	    udp_opt_obj.odb_opt_des_arr,
9355 	    udp_opt_obj.odb_opt_arr_cnt,
9356 	    udp_opt_obj.odb_topmost_tpiprovider,
9357 	    B_FALSE, B_TRUE, cr);
9358 	if (error != 0) {
9359 		if (error < 0)
9360 			error = proto_tlitosyserr(-error);
9361 		return (error);
9362 	}
9363 
9364 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
9365 	rw_enter(&udp->udp_rwlock, RW_READER);
9366 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
9367 	rw_exit(&udp->udp_rwlock);
9368 
9369 	if (len < 0) {
9370 		/*
9371 		 * Pass on to IP
9372 		 */
9373 		kmem_free(optvalp_buf, max_optbuf_len);
9374 		return (ip_get_options(connp, level, option_name,
9375 		    optvalp, optlen, cr));
9376 	} else {
9377 		/*
9378 		 * update optlen and copy option value
9379 		 */
9380 		t_uscalar_t size = MIN(len, *optlen);
9381 		bcopy(optvalp_buf, optvalp, size);
9382 		bcopy(&size, optlen, sizeof (size));
9383 
9384 		kmem_free(optvalp_buf, max_optbuf_len);
9385 		return (0);
9386 	}
9387 }
9388 
9389 int
9390 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
9391     const void *optvalp, socklen_t optlen, cred_t *cr)
9392 {
9393 	conn_t		*connp = (conn_t *)proto_handle;
9394 	udp_t		*udp = connp->conn_udp;
9395 	int		error;
9396 
9397 	error = proto_opt_check(level, option_name, optlen, NULL,
9398 	    udp_opt_obj.odb_opt_des_arr,
9399 	    udp_opt_obj.odb_opt_arr_cnt,
9400 	    udp_opt_obj.odb_topmost_tpiprovider,
9401 	    B_TRUE, B_FALSE, cr);
9402 
9403 	if (error != 0) {
9404 		if (error < 0)
9405 			error = proto_tlitosyserr(-error);
9406 		return (error);
9407 	}
9408 
9409 	rw_enter(&udp->udp_rwlock, RW_WRITER);
9410 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
9411 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
9412 	    NULL, cr);
9413 	rw_exit(&udp->udp_rwlock);
9414 
9415 	if (error < 0) {
9416 		/*
9417 		 * Pass on to ip
9418 		 */
9419 		error = ip_set_options(connp, level, option_name, optvalp,
9420 		    optlen, cr);
9421 	}
9422 
9423 	return (error);
9424 }
9425 
9426 void
9427 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
9428 {
9429 	conn_t	*connp = (conn_t *)proto_handle;
9430 	udp_t	*udp = connp->conn_udp;
9431 
9432 	mutex_enter(&udp->udp_recv_lock);
9433 	connp->conn_flow_cntrld = B_FALSE;
9434 	mutex_exit(&udp->udp_recv_lock);
9435 }
9436 
9437 /* ARGSUSED */
9438 int
9439 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
9440 {
9441 	conn_t	*connp = (conn_t *)proto_handle;
9442 
9443 	/* shut down the send side */
9444 	if (how != SHUT_RD)
9445 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
9446 		    SOCK_OPCTL_SHUT_SEND, 0);
9447 	/* shut down the recv side */
9448 	if (how != SHUT_WR)
9449 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
9450 		    SOCK_OPCTL_SHUT_RECV, 0);
9451 	return (0);
9452 }
9453 
9454 int
9455 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
9456     int mode, int32_t *rvalp, cred_t *cr)
9457 {
9458 	conn_t  	*connp = (conn_t *)proto_handle;
9459 	int		error;
9460 
9461 	switch (cmd) {
9462 		case ND_SET:
9463 		case ND_GET:
9464 		case _SIOCSOCKFALLBACK:
9465 		case TI_GETPEERNAME:
9466 		case TI_GETMYNAME:
9467 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
9468 			    cmd));
9469 			error = EINVAL;
9470 			break;
9471 		default:
9472 			/*
9473 			 * Pass on to IP using helper stream
9474 			 */
9475 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
9476 			    cmd, arg, mode, cr, rvalp);
9477 			break;
9478 	}
9479 	return (error);
9480 }
9481 
9482 /* ARGSUSED */
9483 int
9484 udp_accept(sock_lower_handle_t lproto_handle,
9485     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
9486     cred_t *cr)
9487 {
9488 	return (EOPNOTSUPP);
9489 }
9490 
9491 /* ARGSUSED */
9492 int
9493 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
9494 {
9495 	return (EOPNOTSUPP);
9496 }
9497 
9498 sock_downcalls_t sock_udp_downcalls = {
9499 	udp_activate,		/* sd_activate */
9500 	udp_accept,		/* sd_accept */
9501 	udp_bind,		/* sd_bind */
9502 	udp_listen,		/* sd_listen */
9503 	udp_connect,		/* sd_connect */
9504 	udp_getpeername,	/* sd_getpeername */
9505 	udp_getsockname,	/* sd_getsockname */
9506 	udp_getsockopt,		/* sd_getsockopt */
9507 	udp_setsockopt,		/* sd_setsockopt */
9508 	udp_send,		/* sd_send */
9509 	NULL,			/* sd_send_uio */
9510 	NULL,			/* sd_recv_uio */
9511 	NULL,			/* sd_poll */
9512 	udp_shutdown,		/* sd_shutdown */
9513 	udp_clr_flowctrl,	/* sd_setflowctrl */
9514 	udp_ioctl,		/* sd_ioctl */
9515 	udp_close		/* sd_close */
9516 };
9517