xref: /titanic_51/usr/src/uts/common/inet/udp/udp.c (revision c3a96863fc7054253767fc2de22e9e9f3a3b36fa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/pattr.h>
31 #include <sys/stropts.h>
32 #include <sys/strlog.h>
33 #include <sys/strsun.h>
34 #include <sys/time.h>
35 #define	_SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/suntpi.h>
42 #include <sys/xti_inet.h>
43 #include <sys/kmem.h>
44 #include <sys/policy.h>
45 #include <sys/ucred.h>
46 #include <sys/zone.h>
47 
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sockio.h>
51 #include <sys/vtrace.h>
52 #include <sys/sdt.h>
53 #include <sys/debug.h>
54 #include <sys/isa_defs.h>
55 #include <sys/random.h>
56 #include <netinet/in.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 #include <netinet/udp.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 
63 #include <inet/common.h>
64 #include <inet/ip.h>
65 #include <inet/ip_impl.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_ire.h>
68 #include <inet/ip_if.h>
69 #include <inet/ip_multi.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/proto_set.h>
72 #include <inet/mib2.h>
73 #include <inet/nd.h>
74 #include <inet/optcom.h>
75 #include <inet/snmpcom.h>
76 #include <inet/kstatcom.h>
77 #include <inet/udp_impl.h>
78 #include <inet/ipclassifier.h>
79 #include <inet/ipsec_impl.h>
80 #include <inet/ipp_common.h>
81 #include <sys/squeue_impl.h>
82 #include <inet/ipnet.h>
83 #include <sys/ethernet.h>
84 
85 /*
86  * The ipsec_info.h header file is here since it has the definition for the
87  * M_CTL message types used by IP to convey information to the ULP. The
88  * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence.
89  */
90 #include <net/pfkeyv2.h>
91 #include <inet/ipsec_info.h>
92 
93 #include <sys/tsol/label.h>
94 #include <sys/tsol/tnet.h>
95 #include <rpc/pmap_prot.h>
96 
97 /*
98  * Synchronization notes:
99  *
100  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
101  * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock.
102  * We also use conn_lock when updating things that affect the IP classifier
103  * lookup.
104  * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock.
105  *
106  * The fanout lock uf_lock:
107  * When a UDP endpoint is bound to a local port, it is inserted into
108  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
109  * The size of the array is controlled by the udp_bind_fanout_size variable.
110  * This variable can be changed in /etc/system if the default value is
111  * not large enough.  Each bind hash bucket is protected by a per bucket
112  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
113  * structure and a few other fields in the udp_t. A UDP endpoint is removed
114  * from the bind hash list only when it is being unbound or being closed.
115  * The per bucket lock also protects a UDP endpoint's state changes.
116  *
117  * The udp_rwlock:
118  * This protects most of the other fields in the udp_t. The exact list of
119  * fields which are protected by each of the above locks is documented in
120  * the udp_t structure definition.
121  *
122  * Plumbing notes:
123  * UDP is always a device driver. For compatibility with mibopen() code
124  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
125  * dummy module.
126  *
127  * The above implies that we don't support any intermediate module to
128  * reside in between /dev/ip and udp -- in fact, we never supported such
129  * scenario in the past as the inter-layer communication semantics have
130  * always been private.
131  */
132 
133 /* For /etc/system control */
134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
135 
136 #define	NDD_TOO_QUICK_MSG \
137 	"ndd get info rate too high for non-privileged users, try again " \
138 	"later.\n"
139 #define	NDD_OUT_OF_BUF_MSG	"<< Out of buffer >>\n"
140 
141 /* Option processing attrs */
142 typedef struct udpattrs_s {
143 	union {
144 		ip6_pkt_t	*udpattr_ipp6;	/* For V6 */
145 		ip4_pkt_t 	*udpattr_ipp4;	/* For V4 */
146 	} udpattr_ippu;
147 #define	udpattr_ipp6 udpattr_ippu.udpattr_ipp6
148 #define	udpattr_ipp4 udpattr_ippu.udpattr_ipp4
149 	mblk_t		*udpattr_mb;
150 	boolean_t	udpattr_credset;
151 } udpattrs_t;
152 
153 static void	udp_addr_req(queue_t *q, mblk_t *mp);
154 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
155 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
156 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
157 static int	udp_build_hdrs(udp_t *udp);
158 static void	udp_capability_req(queue_t *q, mblk_t *mp);
159 static int	udp_tpi_close(queue_t *q, int flags);
160 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
161 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
162 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
163 		    int sys_error);
164 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
165 		    t_scalar_t tlierr, int unixerr);
166 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
167 		    cred_t *cr);
168 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
169 		    char *value, caddr_t cp, cred_t *cr);
170 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
171 		    char *value, caddr_t cp, cred_t *cr);
172 static void	udp_icmp_error(conn_t *, mblk_t *);
173 static void	udp_icmp_error_ipv6(conn_t *, mblk_t *);
174 static void	udp_info_req(queue_t *q, mblk_t *mp);
175 static void	udp_input(void *, mblk_t *, void *);
176 static mblk_t	*udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
177 		    t_scalar_t addr_length);
178 static void	udp_lrput(queue_t *, mblk_t *);
179 static void	udp_lwput(queue_t *, mblk_t *);
180 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
181 		    cred_t *credp, boolean_t isv6);
182 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
183 		    cred_t *credp);
184 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
185 		    cred_t *credp);
186 static  int	udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
187 		    int *errorp, udpattrs_t *udpattrs);
188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
189 static int	udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
191 static int	udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
192 		    cred_t *cr);
193 static void	udp_report_item(mblk_t *mp, udp_t *udp);
194 static int	udp_rinfop(queue_t *q, infod_t *dp);
195 static int	udp_rrw(queue_t *q, struiod_t *dp);
196 static int	udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
197 		    cred_t *cr);
198 static void	udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp,
199 		    ipha_t *ipha);
200 static void	udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
201 		    t_scalar_t destlen, t_scalar_t err);
202 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
204     boolean_t random);
205 static mblk_t	*udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
206 		    int *, boolean_t, struct nmsghdr *, cred_t *, pid_t);
207 static mblk_t	*udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
208 		    int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid);
209 static void	udp_wput_other(queue_t *q, mblk_t *mp);
210 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
211 static void	udp_wput_fallback(queue_t *q, mblk_t *mp);
212 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
213 
214 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
215 static void	udp_stack_fini(netstackid_t stackid, void *arg);
216 
217 static void	*udp_kstat_init(netstackid_t stackid);
218 static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
219 static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
220 static void	udp_kstat2_fini(netstackid_t, kstat_t *);
221 static int	udp_kstat_update(kstat_t *kp, int rw);
222 
223 static void	udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
224 		    uint_t pkt_len);
225 static void	udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
226 static void	udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
227 
228 static int	udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *,
229 		    cred_t *, pid_t);
230 
231 /* Common routine for TPI and socket module */
232 static conn_t	*udp_do_open(cred_t *, boolean_t, int);
233 static void	udp_do_close(conn_t *);
234 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
235     boolean_t);
236 static int	udp_do_unbind(conn_t *);
237 static int	udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *);
238 static int	udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *);
239 
240 int		udp_getsockname(sock_lower_handle_t,
241     struct sockaddr *, socklen_t *, cred_t *);
242 int		udp_getpeername(sock_lower_handle_t,
243     struct sockaddr *, socklen_t *, cred_t *);
244 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
245     cred_t *cr);
246 static int	udp_post_ip_bind_connect(udp_t *, mblk_t *, int);
247 
248 #define	UDP_RECV_HIWATER	(56 * 1024)
249 #define	UDP_RECV_LOWATER	128
250 #define	UDP_XMIT_HIWATER	(56 * 1024)
251 #define	UDP_XMIT_LOWATER	1024
252 
253 /*
254  * The following is defined in tcp.c
255  */
256 extern int	(*cl_inet_connect2)(netstackid_t stack_id,
257 		    uint8_t protocol, boolean_t is_outgoing,
258 		    sa_family_t addr_family,
259 		    uint8_t *laddrp, in_port_t lport,
260 		    uint8_t *faddrp, in_port_t fport, void *args);
261 
262 /*
263  * Checks if the given destination addr/port is allowed out.
264  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
265  * Called for each connect() and for sendto()/sendmsg() to a different
266  * destination.
267  * For connect(), called in udp_connect().
268  * For sendto()/sendmsg(), called in udp_output_v{4,6}().
269  *
270  * This macro assumes that the cl_inet_connect2 hook is not NULL.
271  * Please check this before calling this macro.
272  *
273  * void
274  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
275  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
276  */
277 #define	CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) {	\
278 	(err) = 0;							\
279 	/*								\
280 	 * Running in cluster mode - check and register active		\
281 	 * "connection" information					\
282 	 */								\
283 	if ((udp)->udp_ipversion == IPV4_VERSION)			\
284 		(err) = (*cl_inet_connect2)(				\
285 		    (cp)->conn_netstack->netstack_stackid,		\
286 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
287 		    (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]),	\
288 		    (udp)->udp_port,					\
289 		    (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]),		\
290 		    (in_port_t)(fport), NULL);				\
291 	else								\
292 		(err) = (*cl_inet_connect2)(				\
293 		    (cp)->conn_netstack->netstack_stackid,		\
294 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
295 		    (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port,	\
296 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
297 }
298 
299 static struct module_info udp_mod_info =  {
300 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
301 };
302 
303 /*
304  * Entry points for UDP as a device.
305  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
306  */
307 static struct qinit udp_rinitv4 = {
308 	NULL, NULL, udp_openv4, udp_tpi_close, NULL,
309 	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
310 };
311 
312 static struct qinit udp_rinitv6 = {
313 	NULL, NULL, udp_openv6, udp_tpi_close, NULL,
314 	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
315 };
316 
317 static struct qinit udp_winit = {
318 	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL,
319 	&udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE
320 };
321 
322 /* UDP entry point during fallback */
323 struct qinit udp_fallback_sock_winit = {
324 	(pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
325 };
326 
327 /*
328  * UDP needs to handle I_LINK and I_PLINK since ifconfig
329  * likes to use it as a place to hang the various streams.
330  */
331 static struct qinit udp_lrinit = {
332 	(pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL,
333 	&udp_mod_info
334 };
335 
336 static struct qinit udp_lwinit = {
337 	(pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL,
338 	&udp_mod_info
339 };
340 
341 /* For AF_INET aka /dev/udp */
342 struct streamtab udpinfov4 = {
343 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
344 };
345 
346 /* For AF_INET6 aka /dev/udp6 */
347 struct streamtab udpinfov6 = {
348 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
349 };
350 
351 static	sin_t	sin_null;	/* Zero address for quick clears */
352 static	sin6_t	sin6_null;	/* Zero address for quick clears */
353 
354 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
355 
356 /* Default structure copied into T_INFO_ACK messages */
357 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
358 	T_INFO_ACK,
359 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
360 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
361 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
362 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
363 	sizeof (sin_t),	/* ADDR_size. */
364 	0,		/* OPT_size - not initialized here */
365 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
366 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
367 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
368 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
369 };
370 
371 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
372 
373 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
374 	T_INFO_ACK,
375 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
376 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
377 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
378 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
379 	sizeof (sin6_t), /* ADDR_size. */
380 	0,		/* OPT_size - not initialized here */
381 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
382 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
383 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
384 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
385 };
386 
387 /* largest UDP port number */
388 #define	UDP_MAX_PORT	65535
389 
390 /*
391  * Table of ND variables supported by udp.  These are loaded into us_nd
392  * in udp_open.
393  * All of these are alterable, within the min/max values given, at run time.
394  */
395 /* BEGIN CSTYLED */
396 udpparam_t udp_param_arr[] = {
397  /*min		max		value		name */
398  { 0L,		256,		32,		"udp_wroff_extra" },
399  { 1L,		255,		255,		"udp_ipv4_ttl" },
400  { 0,		IPV6_MAX_HOPS,	IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"},
401  { 1024,	(32 * 1024),	1024,		"udp_smallest_nonpriv_port" },
402  { 0,		1,		1,		"udp_do_checksum" },
403  { 1024,	UDP_MAX_PORT,	(32 * 1024),	"udp_smallest_anon_port" },
404  { 1024,	UDP_MAX_PORT,	UDP_MAX_PORT,	"udp_largest_anon_port" },
405  { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER,	"udp_xmit_hiwat"},
406  { 0,		     (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
407  { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER,	"udp_recv_hiwat"},
408  { 65536,	(1<<30),	2*1024*1024,	"udp_max_buf"},
409  { 100,		60000,		1000,		"udp_ndd_get_info_interval"},
410 };
411 /* END CSTYLED */
412 
413 /* Setable in /etc/system */
414 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
415 uint32_t udp_random_anon_port = 1;
416 
417 /*
418  * Hook functions to enable cluster networking.
419  * On non-clustered systems these vectors must always be NULL
420  */
421 
422 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
423     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
424     void *args) = NULL;
425 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
426     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
427     void *args) = NULL;
428 
429 typedef union T_primitives *t_primp_t;
430 
431 /*
432  * Return the next anonymous port in the privileged port range for
433  * bind checking.
434  *
435  * Trusted Extension (TX) notes: TX allows administrator to mark or
436  * reserve ports as Multilevel ports (MLP). MLP has special function
437  * on TX systems. Once a port is made MLP, it's not available as
438  * ordinary port. This creates "holes" in the port name space. It
439  * may be necessary to skip the "holes" find a suitable anon port.
440  */
441 static in_port_t
442 udp_get_next_priv_port(udp_t *udp)
443 {
444 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
445 	in_port_t nextport;
446 	boolean_t restart = B_FALSE;
447 	udp_stack_t *us = udp->udp_us;
448 
449 retry:
450 	if (next_priv_port < us->us_min_anonpriv_port ||
451 	    next_priv_port >= IPPORT_RESERVED) {
452 		next_priv_port = IPPORT_RESERVED - 1;
453 		if (restart)
454 			return (0);
455 		restart = B_TRUE;
456 	}
457 
458 	if (is_system_labeled() &&
459 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
460 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
461 		next_priv_port = nextport;
462 		goto retry;
463 	}
464 
465 	return (next_priv_port--);
466 }
467 
468 /* UDP bind hash report triggered via the Named Dispatch mechanism. */
469 /* ARGSUSED */
470 static int
471 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
472 {
473 	udp_fanout_t	*udpf;
474 	int		i;
475 	zoneid_t	zoneid;
476 	conn_t		*connp;
477 	udp_t		*udp;
478 	udp_stack_t	*us;
479 
480 	connp = Q_TO_CONN(q);
481 	udp = connp->conn_udp;
482 	us = udp->udp_us;
483 
484 	/* Refer to comments in udp_status_report(). */
485 	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
486 		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
487 		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
488 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
489 			return (0);
490 		}
491 	}
492 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
493 		/* The following may work even if we cannot get a large buf. */
494 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
495 		return (0);
496 	}
497 
498 	(void) mi_mpprintf(mp,
499 	    "UDP     " MI_COL_HDRPAD_STR
500 	/*   12345678[89ABCDEF] */
501 	    " zone lport src addr        dest addr       port  state");
502 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
503 
504 	zoneid = connp->conn_zoneid;
505 
506 	for (i = 0; i < us->us_bind_fanout_size; i++) {
507 		udpf = &us->us_bind_fanout[i];
508 		mutex_enter(&udpf->uf_lock);
509 
510 		/* Print the hash index. */
511 		udp = udpf->uf_udp;
512 		if (zoneid != GLOBAL_ZONEID) {
513 			/* skip to first entry in this zone; might be none */
514 			while (udp != NULL &&
515 			    udp->udp_connp->conn_zoneid != zoneid)
516 				udp = udp->udp_bind_hash;
517 		}
518 		if (udp != NULL) {
519 			uint_t print_len, buf_len;
520 
521 			buf_len = mp->b_cont->b_datap->db_lim -
522 			    mp->b_cont->b_wptr;
523 			print_len = snprintf((char *)mp->b_cont->b_wptr,
524 			    buf_len, "%d\n", i);
525 			if (print_len < buf_len) {
526 				mp->b_cont->b_wptr += print_len;
527 			} else {
528 				mp->b_cont->b_wptr += buf_len;
529 			}
530 			for (; udp != NULL; udp = udp->udp_bind_hash) {
531 				if (zoneid == GLOBAL_ZONEID ||
532 				    zoneid == udp->udp_connp->conn_zoneid)
533 					udp_report_item(mp->b_cont, udp);
534 			}
535 		}
536 		mutex_exit(&udpf->uf_lock);
537 	}
538 	us->us_last_ndd_get_info_time = ddi_get_lbolt();
539 	return (0);
540 }
541 
542 /*
543  * Hash list removal routine for udp_t structures.
544  */
545 static void
546 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
547 {
548 	udp_t	*udpnext;
549 	kmutex_t *lockp;
550 	udp_stack_t *us = udp->udp_us;
551 
552 	if (udp->udp_ptpbhn == NULL)
553 		return;
554 
555 	/*
556 	 * Extract the lock pointer in case there are concurrent
557 	 * hash_remove's for this instance.
558 	 */
559 	ASSERT(udp->udp_port != 0);
560 	if (!caller_holds_lock) {
561 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
562 		    us->us_bind_fanout_size)].uf_lock;
563 		ASSERT(lockp != NULL);
564 		mutex_enter(lockp);
565 	}
566 	if (udp->udp_ptpbhn != NULL) {
567 		udpnext = udp->udp_bind_hash;
568 		if (udpnext != NULL) {
569 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
570 			udp->udp_bind_hash = NULL;
571 		}
572 		*udp->udp_ptpbhn = udpnext;
573 		udp->udp_ptpbhn = NULL;
574 	}
575 	if (!caller_holds_lock) {
576 		mutex_exit(lockp);
577 	}
578 }
579 
580 static void
581 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
582 {
583 	udp_t	**udpp;
584 	udp_t	*udpnext;
585 
586 	ASSERT(MUTEX_HELD(&uf->uf_lock));
587 	ASSERT(udp->udp_ptpbhn == NULL);
588 	udpp = &uf->uf_udp;
589 	udpnext = udpp[0];
590 	if (udpnext != NULL) {
591 		/*
592 		 * If the new udp bound to the INADDR_ANY address
593 		 * and the first one in the list is not bound to
594 		 * INADDR_ANY we skip all entries until we find the
595 		 * first one bound to INADDR_ANY.
596 		 * This makes sure that applications binding to a
597 		 * specific address get preference over those binding to
598 		 * INADDR_ANY.
599 		 */
600 		if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) &&
601 		    !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) {
602 			while ((udpnext = udpp[0]) != NULL &&
603 			    !V6_OR_V4_INADDR_ANY(
604 			    udpnext->udp_bound_v6src)) {
605 				udpp = &(udpnext->udp_bind_hash);
606 			}
607 			if (udpnext != NULL)
608 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
609 		} else {
610 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
611 		}
612 	}
613 	udp->udp_bind_hash = udpnext;
614 	udp->udp_ptpbhn = udpp;
615 	udpp[0] = udp;
616 }
617 
618 /*
619  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
620  * passed to udp_wput.
621  * It associates a port number and local address with the stream.
622  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP
623  * protocol type (IPPROTO_UDP) placed in the message following the address.
624  * A T_BIND_ACK message is passed upstream when ip acknowledges the request.
625  * (Called as writer.)
626  *
627  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
628  * without setting SO_REUSEADDR. This is needed so that they
629  * can be viewed as two independent transport protocols.
630  * However, anonymouns ports are allocated from the same range to avoid
631  * duplicating the us->us_next_port_to_try.
632  */
633 static void
634 udp_tpi_bind(queue_t *q, mblk_t *mp)
635 {
636 	sin_t		*sin;
637 	sin6_t		*sin6;
638 	mblk_t		*mp1;
639 	struct T_bind_req *tbr;
640 	conn_t		*connp;
641 	udp_t		*udp;
642 	int		error;
643 	struct sockaddr	*sa;
644 	cred_t		*cr;
645 
646 	/*
647 	 * All Solaris components should pass a db_credp
648 	 * for this TPI message, hence we ASSERT.
649 	 * But in case there is some other M_PROTO that looks
650 	 * like a TPI message sent by some other kernel
651 	 * component, we check and return an error.
652 	 */
653 	cr = msg_getcred(mp, NULL);
654 	ASSERT(cr != NULL);
655 	if (cr == NULL) {
656 		udp_err_ack(q, mp, TSYSERR, EINVAL);
657 		return;
658 	}
659 
660 	connp = Q_TO_CONN(q);
661 	udp = connp->conn_udp;
662 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
663 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
664 		    "udp_bind: bad req, len %u",
665 		    (uint_t)(mp->b_wptr - mp->b_rptr));
666 		udp_err_ack(q, mp, TPROTO, 0);
667 		return;
668 	}
669 	if (udp->udp_state != TS_UNBND) {
670 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
671 		    "udp_bind: bad state, %u", udp->udp_state);
672 		udp_err_ack(q, mp, TOUTSTATE, 0);
673 		return;
674 	}
675 	/*
676 	 * Reallocate the message to make sure we have enough room for an
677 	 * address and the protocol type.
678 	 */
679 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
680 	if (!mp1) {
681 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
682 		return;
683 	}
684 
685 	mp = mp1;
686 
687 	/* Reset the message type in preparation for shipping it back. */
688 	DB_TYPE(mp) = M_PCPROTO;
689 
690 	tbr = (struct T_bind_req *)mp->b_rptr;
691 	switch (tbr->ADDR_length) {
692 	case 0:			/* Request for a generic port */
693 		tbr->ADDR_offset = sizeof (struct T_bind_req);
694 		if (udp->udp_family == AF_INET) {
695 			tbr->ADDR_length = sizeof (sin_t);
696 			sin = (sin_t *)&tbr[1];
697 			*sin = sin_null;
698 			sin->sin_family = AF_INET;
699 			mp->b_wptr = (uchar_t *)&sin[1];
700 			sa = (struct sockaddr *)sin;
701 		} else {
702 			ASSERT(udp->udp_family == AF_INET6);
703 			tbr->ADDR_length = sizeof (sin6_t);
704 			sin6 = (sin6_t *)&tbr[1];
705 			*sin6 = sin6_null;
706 			sin6->sin6_family = AF_INET6;
707 			mp->b_wptr = (uchar_t *)&sin6[1];
708 			sa = (struct sockaddr *)sin6;
709 		}
710 		break;
711 
712 	case sizeof (sin_t):	/* Complete IPv4 address */
713 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
714 		    sizeof (sin_t));
715 		if (sa == NULL || !OK_32PTR((char *)sa)) {
716 			udp_err_ack(q, mp, TSYSERR, EINVAL);
717 			return;
718 		}
719 		if (udp->udp_family != AF_INET ||
720 		    sa->sa_family != AF_INET) {
721 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
722 			return;
723 		}
724 		break;
725 
726 	case sizeof (sin6_t):	/* complete IPv6 address */
727 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
728 		    sizeof (sin6_t));
729 		if (sa == NULL || !OK_32PTR((char *)sa)) {
730 			udp_err_ack(q, mp, TSYSERR, EINVAL);
731 			return;
732 		}
733 		if (udp->udp_family != AF_INET6 ||
734 		    sa->sa_family != AF_INET6) {
735 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
736 			return;
737 		}
738 		break;
739 
740 	default:		/* Invalid request */
741 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
742 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
743 		udp_err_ack(q, mp, TBADADDR, 0);
744 		return;
745 	}
746 
747 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
748 	    tbr->PRIM_type != O_T_BIND_REQ);
749 
750 	if (error != 0) {
751 		if (error > 0) {
752 			udp_err_ack(q, mp, TSYSERR, error);
753 		} else {
754 			udp_err_ack(q, mp, -error, 0);
755 		}
756 	} else {
757 		tbr->PRIM_type = T_BIND_ACK;
758 		qreply(q, mp);
759 	}
760 }
761 
762 /*
763  * This routine handles each T_CONN_REQ message passed to udp.  It
764  * associates a default destination address with the stream.
765  *
766  * This routine sends down a T_BIND_REQ to IP with the following mblks:
767  *	T_BIND_REQ	- specifying local and remote address/port
768  *	IRE_DB_REQ_TYPE	- to get an IRE back containing ire_type and src
769  *	T_OK_ACK	- for the T_CONN_REQ
770  *	T_CONN_CON	- to keep the TPI user happy
771  *
772  * The connect completes in udp_do_connect.
773  * When a T_BIND_ACK is received information is extracted from the IRE
774  * and the two appended messages are sent to the TPI user.
775  * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
776  * convert it to an error ack for the appropriate primitive.
777  */
778 static void
779 udp_tpi_connect(queue_t *q, mblk_t *mp)
780 {
781 	mblk_t	*mp1;
782 	udp_t	*udp;
783 	conn_t	*connp = Q_TO_CONN(q);
784 	int	error;
785 	socklen_t	len;
786 	struct sockaddr		*sa;
787 	struct T_conn_req	*tcr;
788 	cred_t		*cr;
789 
790 	/*
791 	 * All Solaris components should pass a db_credp
792 	 * for this TPI message, hence we ASSERT.
793 	 * But in case there is some other M_PROTO that looks
794 	 * like a TPI message sent by some other kernel
795 	 * component, we check and return an error.
796 	 */
797 	cr = msg_getcred(mp, NULL);
798 	ASSERT(cr != NULL);
799 	if (cr == NULL) {
800 		udp_err_ack(q, mp, TSYSERR, EINVAL);
801 		return;
802 	}
803 
804 	udp = connp->conn_udp;
805 	tcr = (struct T_conn_req *)mp->b_rptr;
806 
807 	/* A bit of sanity checking */
808 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
809 		udp_err_ack(q, mp, TPROTO, 0);
810 		return;
811 	}
812 
813 	if (tcr->OPT_length != 0) {
814 		udp_err_ack(q, mp, TBADOPT, 0);
815 		return;
816 	}
817 
818 	/*
819 	 * Determine packet type based on type of address passed in
820 	 * the request should contain an IPv4 or IPv6 address.
821 	 * Make sure that address family matches the type of
822 	 * family of the the address passed down
823 	 */
824 	len = tcr->DEST_length;
825 	switch (tcr->DEST_length) {
826 	default:
827 		udp_err_ack(q, mp, TBADADDR, 0);
828 		return;
829 
830 	case sizeof (sin_t):
831 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
832 		    sizeof (sin_t));
833 		break;
834 
835 	case sizeof (sin6_t):
836 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
837 		    sizeof (sin6_t));
838 		break;
839 	}
840 
841 	error = proto_verify_ip_addr(udp->udp_family, sa, len);
842 	if (error != 0) {
843 		udp_err_ack(q, mp, TSYSERR, error);
844 		return;
845 	}
846 
847 	/*
848 	 * We have to send a connection confirmation to
849 	 * keep TLI happy.
850 	 */
851 	if (udp->udp_family == AF_INET) {
852 		mp1 = mi_tpi_conn_con(NULL, (char *)sa,
853 		    sizeof (sin_t), NULL, 0);
854 	} else {
855 		mp1 = mi_tpi_conn_con(NULL, (char *)sa,
856 		    sizeof (sin6_t), NULL, 0);
857 	}
858 	if (mp1 == NULL) {
859 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
860 		return;
861 	}
862 
863 	/*
864 	 * Allocate the largest primitive we need to send back
865 	 * T_error_ack is > than T_ok_ack
866 	 */
867 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
868 	if (mp == NULL) {
869 		/* Unable to reuse the T_CONN_REQ for the ack. */
870 		freemsg(mp1);
871 		udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
872 		return;
873 	}
874 
875 	error = udp_do_connect(connp, sa, len, cr);
876 	if (error != 0) {
877 		freeb(mp1);
878 		if (error < 0)
879 			udp_err_ack(q, mp, -error, 0);
880 		else
881 			udp_err_ack(q, mp, TSYSERR, error);
882 	} else {
883 		mp = mi_tpi_ok_ack_alloc(mp);
884 		ASSERT(mp != NULL);
885 		putnext(connp->conn_rq, mp);
886 		putnext(connp->conn_rq, mp1);
887 	}
888 }
889 
890 static int
891 udp_tpi_close(queue_t *q, int flags)
892 {
893 	conn_t	*connp;
894 
895 	if (flags & SO_FALLBACK) {
896 		/*
897 		 * stream is being closed while in fallback
898 		 * simply free the resources that were allocated
899 		 */
900 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
901 		qprocsoff(q);
902 		goto done;
903 	}
904 
905 	connp = Q_TO_CONN(q);
906 	udp_do_close(connp);
907 done:
908 	q->q_ptr = WR(q)->q_ptr = NULL;
909 	return (0);
910 }
911 
912 /*
913  * Called in the close path to quiesce the conn
914  */
915 void
916 udp_quiesce_conn(conn_t *connp)
917 {
918 	udp_t	*udp = connp->conn_udp;
919 
920 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
921 		/*
922 		 * Running in cluster mode - register unbind information
923 		 */
924 		if (udp->udp_ipversion == IPV4_VERSION) {
925 			(*cl_inet_unbind)(
926 			    connp->conn_netstack->netstack_stackid,
927 			    IPPROTO_UDP, AF_INET,
928 			    (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))),
929 			    (in_port_t)udp->udp_port, NULL);
930 		} else {
931 			(*cl_inet_unbind)(
932 			    connp->conn_netstack->netstack_stackid,
933 			    IPPROTO_UDP, AF_INET6,
934 			    (uint8_t *)(&(udp->udp_v6src)),
935 			    (in_port_t)udp->udp_port, NULL);
936 		}
937 	}
938 
939 	udp_bind_hash_remove(udp, B_FALSE);
940 
941 }
942 
943 void
944 udp_close_free(conn_t *connp)
945 {
946 	udp_t *udp = connp->conn_udp;
947 
948 	/* If there are any options associated with the stream, free them. */
949 	if (udp->udp_ip_snd_options != NULL) {
950 		mi_free((char *)udp->udp_ip_snd_options);
951 		udp->udp_ip_snd_options = NULL;
952 		udp->udp_ip_snd_options_len = 0;
953 	}
954 
955 	if (udp->udp_ip_rcv_options != NULL) {
956 		mi_free((char *)udp->udp_ip_rcv_options);
957 		udp->udp_ip_rcv_options = NULL;
958 		udp->udp_ip_rcv_options_len = 0;
959 	}
960 
961 	/* Free memory associated with sticky options */
962 	if (udp->udp_sticky_hdrs_len != 0) {
963 		kmem_free(udp->udp_sticky_hdrs,
964 		    udp->udp_sticky_hdrs_len);
965 		udp->udp_sticky_hdrs = NULL;
966 		udp->udp_sticky_hdrs_len = 0;
967 	}
968 
969 	ip6_pkt_free(&udp->udp_sticky_ipp);
970 
971 	/*
972 	 * Clear any fields which the kmem_cache constructor clears.
973 	 * Only udp_connp needs to be preserved.
974 	 * TBD: We should make this more efficient to avoid clearing
975 	 * everything.
976 	 */
977 	ASSERT(udp->udp_connp == connp);
978 	bzero(udp, sizeof (udp_t));
979 	udp->udp_connp = connp;
980 }
981 
982 static int
983 udp_do_disconnect(conn_t *connp)
984 {
985 	udp_t	*udp;
986 	mblk_t	*ire_mp;
987 	udp_fanout_t *udpf;
988 	udp_stack_t *us;
989 	int	error;
990 
991 	udp = connp->conn_udp;
992 	us = udp->udp_us;
993 	rw_enter(&udp->udp_rwlock, RW_WRITER);
994 	if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) {
995 		rw_exit(&udp->udp_rwlock);
996 		return (-TOUTSTATE);
997 	}
998 	udp->udp_pending_op = T_DISCON_REQ;
999 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
1000 	    us->us_bind_fanout_size)];
1001 	mutex_enter(&udpf->uf_lock);
1002 	udp->udp_v6src = udp->udp_bound_v6src;
1003 	udp->udp_state = TS_IDLE;
1004 	mutex_exit(&udpf->uf_lock);
1005 
1006 	if (udp->udp_family == AF_INET6) {
1007 		/* Rebuild the header template */
1008 		error = udp_build_hdrs(udp);
1009 		if (error != 0) {
1010 			udp->udp_pending_op = -1;
1011 			rw_exit(&udp->udp_rwlock);
1012 			return (error);
1013 		}
1014 	}
1015 
1016 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
1017 	if (ire_mp == NULL) {
1018 		mutex_enter(&udpf->uf_lock);
1019 		udp->udp_pending_op = -1;
1020 		mutex_exit(&udpf->uf_lock);
1021 		rw_exit(&udp->udp_rwlock);
1022 		return (ENOMEM);
1023 	}
1024 
1025 	rw_exit(&udp->udp_rwlock);
1026 
1027 	if (udp->udp_family == AF_INET6) {
1028 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP,
1029 		    &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
1030 	} else {
1031 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP,
1032 		    V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE);
1033 	}
1034 
1035 	return (udp_post_ip_bind_connect(udp, ire_mp, error));
1036 }
1037 
1038 
1039 static void
1040 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
1041 {
1042 	conn_t	*connp = Q_TO_CONN(q);
1043 	int	error;
1044 
1045 	/*
1046 	 * Allocate the largest primitive we need to send back
1047 	 * T_error_ack is > than T_ok_ack
1048 	 */
1049 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
1050 	if (mp == NULL) {
1051 		/* Unable to reuse the T_DISCON_REQ for the ack. */
1052 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
1053 		return;
1054 	}
1055 
1056 	error = udp_do_disconnect(connp);
1057 
1058 	if (error != 0) {
1059 		if (error < 0) {
1060 			udp_err_ack(q, mp, -error, 0);
1061 		} else {
1062 			udp_err_ack(q, mp, TSYSERR, error);
1063 		}
1064 	} else {
1065 		mp = mi_tpi_ok_ack_alloc(mp);
1066 		ASSERT(mp != NULL);
1067 		qreply(q, mp);
1068 	}
1069 }
1070 
1071 int
1072 udp_disconnect(conn_t *connp)
1073 {
1074 	int error;
1075 	udp_t *udp = connp->conn_udp;
1076 
1077 	udp->udp_dgram_errind = B_FALSE;
1078 
1079 	error = udp_do_disconnect(connp);
1080 
1081 	if (error < 0)
1082 		error = proto_tlitosyserr(-error);
1083 
1084 	return (error);
1085 }
1086 
1087 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
1088 static void
1089 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
1090 {
1091 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1092 		qreply(q, mp);
1093 }
1094 
1095 /* Shorthand to generate and send TPI error acks to our client */
1096 static void
1097 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
1098     int sys_error)
1099 {
1100 	struct T_error_ack	*teackp;
1101 
1102 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1103 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
1104 		teackp = (struct T_error_ack *)mp->b_rptr;
1105 		teackp->ERROR_prim = primitive;
1106 		teackp->TLI_error = t_error;
1107 		teackp->UNIX_error = sys_error;
1108 		qreply(q, mp);
1109 	}
1110 }
1111 
1112 /*ARGSUSED*/
1113 static int
1114 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
1115 {
1116 	int i;
1117 	udp_t		*udp = Q_TO_UDP(q);
1118 	udp_stack_t *us = udp->udp_us;
1119 
1120 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1121 		if (us->us_epriv_ports[i] != 0)
1122 			(void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]);
1123 	}
1124 	return (0);
1125 }
1126 
1127 /* ARGSUSED */
1128 static int
1129 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1130     cred_t *cr)
1131 {
1132 	long	new_value;
1133 	int	i;
1134 	udp_t		*udp = Q_TO_UDP(q);
1135 	udp_stack_t *us = udp->udp_us;
1136 
1137 	/*
1138 	 * Fail the request if the new value does not lie within the
1139 	 * port number limits.
1140 	 */
1141 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
1142 	    new_value <= 0 || new_value >= 65536) {
1143 		return (EINVAL);
1144 	}
1145 
1146 	/* Check if the value is already in the list */
1147 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1148 		if (new_value == us->us_epriv_ports[i]) {
1149 			return (EEXIST);
1150 		}
1151 	}
1152 	/* Find an empty slot */
1153 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1154 		if (us->us_epriv_ports[i] == 0)
1155 			break;
1156 	}
1157 	if (i == us->us_num_epriv_ports) {
1158 		return (EOVERFLOW);
1159 	}
1160 
1161 	/* Set the new value */
1162 	us->us_epriv_ports[i] = (in_port_t)new_value;
1163 	return (0);
1164 }
1165 
1166 /* ARGSUSED */
1167 static int
1168 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1169     cred_t *cr)
1170 {
1171 	long	new_value;
1172 	int	i;
1173 	udp_t		*udp = Q_TO_UDP(q);
1174 	udp_stack_t *us = udp->udp_us;
1175 
1176 	/*
1177 	 * Fail the request if the new value does not lie within the
1178 	 * port number limits.
1179 	 */
1180 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
1181 	    new_value <= 0 || new_value >= 65536) {
1182 		return (EINVAL);
1183 	}
1184 
1185 	/* Check that the value is already in the list */
1186 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1187 		if (us->us_epriv_ports[i] == new_value)
1188 			break;
1189 	}
1190 	if (i == us->us_num_epriv_ports) {
1191 		return (ESRCH);
1192 	}
1193 
1194 	/* Clear the value */
1195 	us->us_epriv_ports[i] = 0;
1196 	return (0);
1197 }
1198 
1199 /* At minimum we need 4 bytes of UDP header */
1200 #define	ICMP_MIN_UDP_HDR	4
1201 
1202 /*
1203  * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP.
1204  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1205  * Assumes that IP has pulled up everything up to and including the ICMP header.
1206  */
1207 static void
1208 udp_icmp_error(conn_t *connp, mblk_t *mp)
1209 			    {
1210 	icmph_t *icmph;
1211 	ipha_t	*ipha;
1212 	int	iph_hdr_length;
1213 	udpha_t	*udpha;
1214 	sin_t	sin;
1215 	sin6_t	sin6;
1216 	mblk_t	*mp1;
1217 	int	error = 0;
1218 	udp_t	*udp = connp->conn_udp;
1219 
1220 	mp1 = NULL;
1221 	ipha = (ipha_t *)mp->b_rptr;
1222 
1223 	ASSERT(OK_32PTR(mp->b_rptr));
1224 
1225 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1226 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
1227 		udp_icmp_error_ipv6(connp, mp);
1228 		return;
1229 	}
1230 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
1231 
1232 	/* Skip past the outer IP and ICMP headers */
1233 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1234 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1235 	ipha = (ipha_t *)&icmph[1];
1236 
1237 	/* Skip past the inner IP and find the ULP header */
1238 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1239 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1240 
1241 	switch (icmph->icmph_type) {
1242 	case ICMP_DEST_UNREACHABLE:
1243 		switch (icmph->icmph_code) {
1244 		case ICMP_FRAGMENTATION_NEEDED:
1245 			/*
1246 			 * IP has already adjusted the path MTU.
1247 			 */
1248 			break;
1249 		case ICMP_PORT_UNREACHABLE:
1250 		case ICMP_PROTOCOL_UNREACHABLE:
1251 			error = ECONNREFUSED;
1252 			break;
1253 		default:
1254 			/* Transient errors */
1255 			break;
1256 		}
1257 		break;
1258 	default:
1259 		/* Transient errors */
1260 		break;
1261 	}
1262 	if (error == 0) {
1263 		freemsg(mp);
1264 		return;
1265 	}
1266 
1267 	/*
1268 	 * Deliver T_UDERROR_IND when the application has asked for it.
1269 	 * The socket layer enables this automatically when connected.
1270 	 */
1271 	if (!udp->udp_dgram_errind) {
1272 		freemsg(mp);
1273 		return;
1274 	}
1275 
1276 
1277 	switch (udp->udp_family) {
1278 	case AF_INET:
1279 		sin = sin_null;
1280 		sin.sin_family = AF_INET;
1281 		sin.sin_addr.s_addr = ipha->ipha_dst;
1282 		sin.sin_port = udpha->uha_dst_port;
1283 		if (IPCL_IS_NONSTR(connp)) {
1284 			rw_enter(&udp->udp_rwlock, RW_WRITER);
1285 			if (udp->udp_state == TS_DATA_XFER) {
1286 				if (sin.sin_port == udp->udp_dstport &&
1287 				    sin.sin_addr.s_addr ==
1288 				    V4_PART_OF_V6(udp->udp_v6dst)) {
1289 
1290 					rw_exit(&udp->udp_rwlock);
1291 					(*connp->conn_upcalls->su_set_error)
1292 					    (connp->conn_upper_handle, error);
1293 					goto done;
1294 				}
1295 			} else {
1296 				udp->udp_delayed_error = error;
1297 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1298 			}
1299 			rw_exit(&udp->udp_rwlock);
1300 		} else {
1301 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1302 			    NULL, 0, error);
1303 		}
1304 		break;
1305 	case AF_INET6:
1306 		sin6 = sin6_null;
1307 		sin6.sin6_family = AF_INET6;
1308 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1309 		sin6.sin6_port = udpha->uha_dst_port;
1310 		if (IPCL_IS_NONSTR(connp)) {
1311 			rw_enter(&udp->udp_rwlock, RW_WRITER);
1312 			if (udp->udp_state == TS_DATA_XFER) {
1313 				if (sin6.sin6_port == udp->udp_dstport &&
1314 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1315 				    &udp->udp_v6dst)) {
1316 					rw_exit(&udp->udp_rwlock);
1317 					(*connp->conn_upcalls->su_set_error)
1318 					    (connp->conn_upper_handle, error);
1319 					goto done;
1320 				}
1321 			} else {
1322 				udp->udp_delayed_error = error;
1323 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1324 			}
1325 			rw_exit(&udp->udp_rwlock);
1326 		} else {
1327 
1328 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1329 			    NULL, 0, error);
1330 		}
1331 		break;
1332 	}
1333 	if (mp1 != NULL)
1334 		putnext(connp->conn_rq, mp1);
1335 done:
1336 	freemsg(mp);
1337 }
1338 
1339 /*
1340  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1341  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1342  * Assumes that IP has pulled up all the extension headers as well as the
1343  * ICMPv6 header.
1344  */
1345 static void
1346 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
1347 {
1348 	icmp6_t		*icmp6;
1349 	ip6_t		*ip6h, *outer_ip6h;
1350 	uint16_t	iph_hdr_length;
1351 	uint8_t		*nexthdrp;
1352 	udpha_t		*udpha;
1353 	sin6_t		sin6;
1354 	mblk_t		*mp1;
1355 	int		error = 0;
1356 	udp_t		*udp = connp->conn_udp;
1357 	udp_stack_t	*us = udp->udp_us;
1358 
1359 	outer_ip6h = (ip6_t *)mp->b_rptr;
1360 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1361 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1362 	else
1363 		iph_hdr_length = IPV6_HDR_LEN;
1364 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1365 	ip6h = (ip6_t *)&icmp6[1];
1366 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1367 		freemsg(mp);
1368 		return;
1369 	}
1370 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1371 
1372 	switch (icmp6->icmp6_type) {
1373 	case ICMP6_DST_UNREACH:
1374 		switch (icmp6->icmp6_code) {
1375 		case ICMP6_DST_UNREACH_NOPORT:
1376 			error = ECONNREFUSED;
1377 			break;
1378 		case ICMP6_DST_UNREACH_ADMIN:
1379 		case ICMP6_DST_UNREACH_NOROUTE:
1380 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1381 		case ICMP6_DST_UNREACH_ADDR:
1382 			/* Transient errors */
1383 			break;
1384 		default:
1385 			break;
1386 		}
1387 		break;
1388 	case ICMP6_PACKET_TOO_BIG: {
1389 		struct T_unitdata_ind	*tudi;
1390 		struct T_opthdr		*toh;
1391 		size_t			udi_size;
1392 		mblk_t			*newmp;
1393 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1394 		    sizeof (struct ip6_mtuinfo);
1395 		sin6_t			*sin6;
1396 		struct ip6_mtuinfo	*mtuinfo;
1397 
1398 		/*
1399 		 * If the application has requested to receive path mtu
1400 		 * information, send up an empty message containing an
1401 		 * IPV6_PATHMTU ancillary data item.
1402 		 */
1403 		if (!udp->udp_ipv6_recvpathmtu)
1404 			break;
1405 
1406 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1407 		    opt_length;
1408 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1409 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
1410 			break;
1411 		}
1412 
1413 		/*
1414 		 * newmp->b_cont is left to NULL on purpose.  This is an
1415 		 * empty message containing only ancillary data.
1416 		 */
1417 		newmp->b_datap->db_type = M_PROTO;
1418 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1419 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1420 		tudi->PRIM_type = T_UNITDATA_IND;
1421 		tudi->SRC_length = sizeof (sin6_t);
1422 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1423 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1424 		tudi->OPT_length = opt_length;
1425 
1426 		sin6 = (sin6_t *)&tudi[1];
1427 		bzero(sin6, sizeof (sin6_t));
1428 		sin6->sin6_family = AF_INET6;
1429 		sin6->sin6_addr = udp->udp_v6dst;
1430 
1431 		toh = (struct T_opthdr *)&sin6[1];
1432 		toh->level = IPPROTO_IPV6;
1433 		toh->name = IPV6_PATHMTU;
1434 		toh->len = opt_length;
1435 		toh->status = 0;
1436 
1437 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1438 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1439 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1440 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1441 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1442 		/*
1443 		 * We've consumed everything we need from the original
1444 		 * message.  Free it, then send our empty message.
1445 		 */
1446 		freemsg(mp);
1447 		if (!IPCL_IS_NONSTR(connp)) {
1448 			putnext(connp->conn_rq, newmp);
1449 		} else {
1450 			(*connp->conn_upcalls->su_recv)
1451 			    (connp->conn_upper_handle, newmp, 0, 0, &error,
1452 			    NULL);
1453 		}
1454 		return;
1455 	}
1456 	case ICMP6_TIME_EXCEEDED:
1457 		/* Transient errors */
1458 		break;
1459 	case ICMP6_PARAM_PROB:
1460 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1461 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1462 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1463 		    (uchar_t *)nexthdrp) {
1464 			error = ECONNREFUSED;
1465 			break;
1466 		}
1467 		break;
1468 	}
1469 	if (error == 0) {
1470 		freemsg(mp);
1471 		return;
1472 	}
1473 
1474 	/*
1475 	 * Deliver T_UDERROR_IND when the application has asked for it.
1476 	 * The socket layer enables this automatically when connected.
1477 	 */
1478 	if (!udp->udp_dgram_errind) {
1479 		freemsg(mp);
1480 		return;
1481 	}
1482 
1483 	sin6 = sin6_null;
1484 	sin6.sin6_family = AF_INET6;
1485 	sin6.sin6_addr = ip6h->ip6_dst;
1486 	sin6.sin6_port = udpha->uha_dst_port;
1487 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1488 
1489 	if (IPCL_IS_NONSTR(connp)) {
1490 		rw_enter(&udp->udp_rwlock, RW_WRITER);
1491 		if (udp->udp_state == TS_DATA_XFER) {
1492 			if (sin6.sin6_port == udp->udp_dstport &&
1493 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1494 			    &udp->udp_v6dst)) {
1495 				rw_exit(&udp->udp_rwlock);
1496 				(*connp->conn_upcalls->su_set_error)
1497 				    (connp->conn_upper_handle, error);
1498 				goto done;
1499 			}
1500 		} else {
1501 			udp->udp_delayed_error = error;
1502 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1503 		}
1504 		rw_exit(&udp->udp_rwlock);
1505 	} else {
1506 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1507 		    NULL, 0, error);
1508 		if (mp1 != NULL)
1509 			putnext(connp->conn_rq, mp1);
1510 	}
1511 
1512 done:
1513 	freemsg(mp);
1514 }
1515 
1516 /*
1517  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1518  * The local address is filled in if endpoint is bound. The remote address
1519  * is filled in if remote address has been precified ("connected endpoint")
1520  * (The concept of connected CLTS sockets is alien to published TPI
1521  *  but we support it anyway).
1522  */
1523 static void
1524 udp_addr_req(queue_t *q, mblk_t *mp)
1525 {
1526 	sin_t	*sin;
1527 	sin6_t	*sin6;
1528 	mblk_t	*ackmp;
1529 	struct T_addr_ack *taa;
1530 	udp_t	*udp = Q_TO_UDP(q);
1531 
1532 	/* Make it large enough for worst case */
1533 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1534 	    2 * sizeof (sin6_t), 1);
1535 	if (ackmp == NULL) {
1536 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1537 		return;
1538 	}
1539 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1540 
1541 	bzero(taa, sizeof (struct T_addr_ack));
1542 	ackmp->b_wptr = (uchar_t *)&taa[1];
1543 
1544 	taa->PRIM_type = T_ADDR_ACK;
1545 	ackmp->b_datap->db_type = M_PCPROTO;
1546 	rw_enter(&udp->udp_rwlock, RW_READER);
1547 	/*
1548 	 * Note: Following code assumes 32 bit alignment of basic
1549 	 * data structures like sin_t and struct T_addr_ack.
1550 	 */
1551 	if (udp->udp_state != TS_UNBND) {
1552 		/*
1553 		 * Fill in local address first
1554 		 */
1555 		taa->LOCADDR_offset = sizeof (*taa);
1556 		if (udp->udp_family == AF_INET) {
1557 			taa->LOCADDR_length = sizeof (sin_t);
1558 			sin = (sin_t *)&taa[1];
1559 			/* Fill zeroes and then initialize non-zero fields */
1560 			*sin = sin_null;
1561 			sin->sin_family = AF_INET;
1562 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
1563 			    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
1564 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src,
1565 				    sin->sin_addr.s_addr);
1566 			} else {
1567 				/*
1568 				 * INADDR_ANY
1569 				 * udp_v6src is not set, we might be bound to
1570 				 * broadcast/multicast. Use udp_bound_v6src as
1571 				 * local address instead (that could
1572 				 * also still be INADDR_ANY)
1573 				 */
1574 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src,
1575 				    sin->sin_addr.s_addr);
1576 			}
1577 			sin->sin_port = udp->udp_port;
1578 			ackmp->b_wptr = (uchar_t *)&sin[1];
1579 			if (udp->udp_state == TS_DATA_XFER) {
1580 				/*
1581 				 * connected, fill remote address too
1582 				 */
1583 				taa->REMADDR_length = sizeof (sin_t);
1584 				/* assumed 32-bit alignment */
1585 				taa->REMADDR_offset = taa->LOCADDR_offset +
1586 				    taa->LOCADDR_length;
1587 
1588 				sin = (sin_t *)(ackmp->b_rptr +
1589 				    taa->REMADDR_offset);
1590 				/* initialize */
1591 				*sin = sin_null;
1592 				sin->sin_family = AF_INET;
1593 				sin->sin_addr.s_addr =
1594 				    V4_PART_OF_V6(udp->udp_v6dst);
1595 				sin->sin_port = udp->udp_dstport;
1596 				ackmp->b_wptr = (uchar_t *)&sin[1];
1597 			}
1598 		} else {
1599 			taa->LOCADDR_length = sizeof (sin6_t);
1600 			sin6 = (sin6_t *)&taa[1];
1601 			/* Fill zeroes and then initialize non-zero fields */
1602 			*sin6 = sin6_null;
1603 			sin6->sin6_family = AF_INET6;
1604 			if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
1605 				sin6->sin6_addr = udp->udp_v6src;
1606 			} else {
1607 				/*
1608 				 * UNSPECIFIED
1609 				 * udp_v6src is not set, we might be bound to
1610 				 * broadcast/multicast. Use udp_bound_v6src as
1611 				 * local address instead (that could
1612 				 * also still be UNSPECIFIED)
1613 				 */
1614 				sin6->sin6_addr =
1615 				    udp->udp_bound_v6src;
1616 			}
1617 			sin6->sin6_port = udp->udp_port;
1618 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1619 			if (udp->udp_state == TS_DATA_XFER) {
1620 				/*
1621 				 * connected, fill remote address too
1622 				 */
1623 				taa->REMADDR_length = sizeof (sin6_t);
1624 				/* assumed 32-bit alignment */
1625 				taa->REMADDR_offset = taa->LOCADDR_offset +
1626 				    taa->LOCADDR_length;
1627 
1628 				sin6 = (sin6_t *)(ackmp->b_rptr +
1629 				    taa->REMADDR_offset);
1630 				/* initialize */
1631 				*sin6 = sin6_null;
1632 				sin6->sin6_family = AF_INET6;
1633 				sin6->sin6_addr = udp->udp_v6dst;
1634 				sin6->sin6_port =  udp->udp_dstport;
1635 				ackmp->b_wptr = (uchar_t *)&sin6[1];
1636 			}
1637 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1638 		}
1639 	}
1640 	rw_exit(&udp->udp_rwlock);
1641 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1642 	qreply(q, ackmp);
1643 }
1644 
1645 static void
1646 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1647 {
1648 	if (udp->udp_family == AF_INET) {
1649 		*tap = udp_g_t_info_ack_ipv4;
1650 	} else {
1651 		*tap = udp_g_t_info_ack_ipv6;
1652 	}
1653 	tap->CURRENT_state = udp->udp_state;
1654 	tap->OPT_size = udp_max_optsize;
1655 }
1656 
1657 static void
1658 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1659     t_uscalar_t cap_bits1)
1660 {
1661 	tcap->CAP_bits1 = 0;
1662 
1663 	if (cap_bits1 & TC1_INFO) {
1664 		udp_copy_info(&tcap->INFO_ack, udp);
1665 		tcap->CAP_bits1 |= TC1_INFO;
1666 	}
1667 }
1668 
1669 /*
1670  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1671  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1672  * udp_g_t_info_ack.  The current state of the stream is copied from
1673  * udp_state.
1674  */
1675 static void
1676 udp_capability_req(queue_t *q, mblk_t *mp)
1677 {
1678 	t_uscalar_t		cap_bits1;
1679 	struct T_capability_ack	*tcap;
1680 	udp_t	*udp = Q_TO_UDP(q);
1681 
1682 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1683 
1684 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1685 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1686 	if (!mp)
1687 		return;
1688 
1689 	tcap = (struct T_capability_ack *)mp->b_rptr;
1690 	udp_do_capability_ack(udp, tcap, cap_bits1);
1691 
1692 	qreply(q, mp);
1693 }
1694 
1695 /*
1696  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1697  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1698  * The current state of the stream is copied from udp_state.
1699  */
1700 static void
1701 udp_info_req(queue_t *q, mblk_t *mp)
1702 {
1703 	udp_t *udp = Q_TO_UDP(q);
1704 
1705 	/* Create a T_INFO_ACK message. */
1706 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1707 	    T_INFO_ACK);
1708 	if (!mp)
1709 		return;
1710 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1711 	qreply(q, mp);
1712 }
1713 
1714 /*
1715  * IP recognizes seven kinds of bind requests:
1716  *
1717  * - A zero-length address binds only to the protocol number.
1718  *
1719  * - A 4-byte address is treated as a request to
1720  * validate that the address is a valid local IPv4
1721  * address, appropriate for an application to bind to.
1722  * IP does the verification, but does not make any note
1723  * of the address at this time.
1724  *
1725  * - A 16-byte address contains is treated as a request
1726  * to validate a local IPv6 address, as the 4-byte
1727  * address case above.
1728  *
1729  * - A 16-byte sockaddr_in to validate the local IPv4 address and also
1730  * use it for the inbound fanout of packets.
1731  *
1732  * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
1733  * use it for the inbound fanout of packets.
1734  *
1735  * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
1736  * information consisting of local and remote addresses
1737  * and ports.  In this case, the addresses are both
1738  * validated as appropriate for this operation, and, if
1739  * so, the information is retained for use in the
1740  * inbound fanout.
1741  *
1742  * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
1743  * fanout information, like the 12-byte case above.
1744  *
1745  * IP will also fill in the IRE request mblk with information
1746  * regarding our peer.  In all cases, we notify IP of our protocol
1747  * type by appending a single protocol byte to the bind request.
1748  */
1749 static mblk_t *
1750 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length)
1751 {
1752 	char	*cp;
1753 	mblk_t	*mp;
1754 	struct T_bind_req *tbr;
1755 	ipa_conn_t	*ac;
1756 	ipa6_conn_t	*ac6;
1757 	sin_t		*sin;
1758 	sin6_t		*sin6;
1759 
1760 	ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
1761 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
1762 	mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
1763 	if (!mp)
1764 		return (mp);
1765 	mp->b_datap->db_type = M_PROTO;
1766 	tbr = (struct T_bind_req *)mp->b_rptr;
1767 	tbr->PRIM_type = bind_prim;
1768 	tbr->ADDR_offset = sizeof (*tbr);
1769 	tbr->CONIND_number = 0;
1770 	tbr->ADDR_length = addr_length;
1771 	cp = (char *)&tbr[1];
1772 	switch (addr_length) {
1773 	case sizeof (ipa_conn_t):
1774 		ASSERT(udp->udp_family == AF_INET);
1775 		/* Append a request for an IRE */
1776 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1777 		if (!mp->b_cont) {
1778 			freemsg(mp);
1779 			return (NULL);
1780 		}
1781 		mp->b_cont->b_wptr += sizeof (ire_t);
1782 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1783 
1784 		/* cp known to be 32 bit aligned */
1785 		ac = (ipa_conn_t *)cp;
1786 		ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src);
1787 		ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst);
1788 		ac->ac_fport = udp->udp_dstport;
1789 		ac->ac_lport = udp->udp_port;
1790 		break;
1791 
1792 	case sizeof (ipa6_conn_t):
1793 		ASSERT(udp->udp_family == AF_INET6);
1794 		/* Append a request for an IRE */
1795 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1796 		if (!mp->b_cont) {
1797 			freemsg(mp);
1798 			return (NULL);
1799 		}
1800 		mp->b_cont->b_wptr += sizeof (ire_t);
1801 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1802 
1803 		/* cp known to be 32 bit aligned */
1804 		ac6 = (ipa6_conn_t *)cp;
1805 		ac6->ac6_laddr = udp->udp_v6src;
1806 		ac6->ac6_faddr = udp->udp_v6dst;
1807 		ac6->ac6_fport = udp->udp_dstport;
1808 		ac6->ac6_lport = udp->udp_port;
1809 		break;
1810 
1811 	case sizeof (sin_t):
1812 		ASSERT(udp->udp_family == AF_INET);
1813 		/* Append a request for an IRE */
1814 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1815 		if (!mp->b_cont) {
1816 			freemsg(mp);
1817 			return (NULL);
1818 		}
1819 		mp->b_cont->b_wptr += sizeof (ire_t);
1820 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1821 
1822 		sin = (sin_t *)cp;
1823 		*sin = sin_null;
1824 		sin->sin_family = AF_INET;
1825 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src);
1826 		sin->sin_port = udp->udp_port;
1827 		break;
1828 
1829 	case sizeof (sin6_t):
1830 		ASSERT(udp->udp_family == AF_INET6);
1831 		/* Append a request for an IRE */
1832 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1833 		if (!mp->b_cont) {
1834 			freemsg(mp);
1835 			return (NULL);
1836 		}
1837 		mp->b_cont->b_wptr += sizeof (ire_t);
1838 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1839 
1840 		sin6 = (sin6_t *)cp;
1841 		*sin6 = sin6_null;
1842 		sin6->sin6_family = AF_INET6;
1843 		sin6->sin6_addr = udp->udp_bound_v6src;
1844 		sin6->sin6_port = udp->udp_port;
1845 		break;
1846 	}
1847 	/* Add protocol number to end */
1848 	cp[addr_length] = (char)IPPROTO_UDP;
1849 	mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
1850 	return (mp);
1851 }
1852 
1853 /* For /dev/udp aka AF_INET open */
1854 static int
1855 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1856 {
1857 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1858 }
1859 
1860 /* For /dev/udp6 aka AF_INET6 open */
1861 static int
1862 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1863 {
1864 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1865 }
1866 
1867 /*
1868  * This is the open routine for udp.  It allocates a udp_t structure for
1869  * the stream and, on the first open of the module, creates an ND table.
1870  */
1871 /*ARGSUSED2*/
1872 static int
1873 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1874     boolean_t isv6)
1875 {
1876 	int		error;
1877 	udp_t		*udp;
1878 	conn_t		*connp;
1879 	dev_t		conn_dev;
1880 	udp_stack_t	*us;
1881 	vmem_t		*minor_arena;
1882 
1883 	TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q);
1884 
1885 	/* If the stream is already open, return immediately. */
1886 	if (q->q_ptr != NULL)
1887 		return (0);
1888 
1889 	if (sflag == MODOPEN)
1890 		return (EINVAL);
1891 
1892 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1893 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1894 		minor_arena = ip_minor_arena_la;
1895 	} else {
1896 		/*
1897 		 * Either minor numbers in the large arena were exhausted
1898 		 * or a non socket application is doing the open.
1899 		 * Try to allocate from the small arena.
1900 		 */
1901 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1902 			return (EBUSY);
1903 
1904 		minor_arena = ip_minor_arena_sa;
1905 	}
1906 
1907 	if (flag & SO_FALLBACK) {
1908 		/*
1909 		 * Non streams socket needs a stream to fallback to
1910 		 */
1911 		RD(q)->q_ptr = (void *)conn_dev;
1912 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1913 		WR(q)->q_ptr = (void *)minor_arena;
1914 		qprocson(q);
1915 		return (0);
1916 	}
1917 
1918 	connp = udp_do_open(credp, isv6, KM_SLEEP);
1919 	if (connp == NULL) {
1920 		inet_minor_free(minor_arena, conn_dev);
1921 		return (ENOMEM);
1922 	}
1923 	udp = connp->conn_udp;
1924 	us = udp->udp_us;
1925 
1926 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1927 	connp->conn_dev = conn_dev;
1928 	connp->conn_minor_arena = minor_arena;
1929 
1930 	/*
1931 	 * Initialize the udp_t structure for this stream.
1932 	 */
1933 	q->q_ptr = connp;
1934 	WR(q)->q_ptr = connp;
1935 	connp->conn_rq = q;
1936 	connp->conn_wq = WR(q);
1937 
1938 	rw_enter(&udp->udp_rwlock, RW_WRITER);
1939 	ASSERT(connp->conn_ulp == IPPROTO_UDP);
1940 	ASSERT(connp->conn_udp == udp);
1941 	ASSERT(udp->udp_connp == connp);
1942 
1943 	if (flag & SO_SOCKSTR) {
1944 		connp->conn_flags |= IPCL_SOCKET;
1945 		udp->udp_issocket = B_TRUE;
1946 		udp->udp_direct_sockfs = B_TRUE;
1947 	}
1948 
1949 	q->q_hiwat = us->us_recv_hiwat;
1950 	WR(q)->q_hiwat = us->us_xmit_hiwat;
1951 	WR(q)->q_lowat = us->us_xmit_lowat;
1952 
1953 	qprocson(q);
1954 
1955 	if (udp->udp_family == AF_INET6) {
1956 		/* Build initial header template for transmit */
1957 		if ((error = udp_build_hdrs(udp)) != 0) {
1958 			rw_exit(&udp->udp_rwlock);
1959 			qprocsoff(q);
1960 			inet_minor_free(minor_arena, conn_dev);
1961 			ipcl_conn_destroy(connp);
1962 			return (error);
1963 		}
1964 	}
1965 	rw_exit(&udp->udp_rwlock);
1966 
1967 	/* Set the Stream head write offset and high watermark. */
1968 	(void) proto_set_tx_wroff(q, connp,
1969 	    udp->udp_max_hdr_len + us->us_wroff_extra);
1970 	/* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */
1971 	(void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat));
1972 
1973 	mutex_enter(&connp->conn_lock);
1974 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1975 	mutex_exit(&connp->conn_lock);
1976 	return (0);
1977 }
1978 
1979 /*
1980  * Which UDP options OK to set through T_UNITDATA_REQ...
1981  */
1982 /* ARGSUSED */
1983 static boolean_t
1984 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1985 {
1986 	return (B_TRUE);
1987 }
1988 
1989 /*
1990  * This routine gets default values of certain options whose default
1991  * values are maintained by protcol specific code
1992  */
1993 /* ARGSUSED */
1994 int
1995 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1996 {
1997 	udp_t		*udp = Q_TO_UDP(q);
1998 	udp_stack_t *us = udp->udp_us;
1999 	int *i1 = (int *)ptr;
2000 
2001 	switch (level) {
2002 	case IPPROTO_IP:
2003 		switch (name) {
2004 		case IP_MULTICAST_TTL:
2005 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
2006 			return (sizeof (uchar_t));
2007 		case IP_MULTICAST_LOOP:
2008 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
2009 			return (sizeof (uchar_t));
2010 		}
2011 		break;
2012 	case IPPROTO_IPV6:
2013 		switch (name) {
2014 		case IPV6_MULTICAST_HOPS:
2015 			*i1 = IP_DEFAULT_MULTICAST_TTL;
2016 			return (sizeof (int));
2017 		case IPV6_MULTICAST_LOOP:
2018 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
2019 			return (sizeof (int));
2020 		case IPV6_UNICAST_HOPS:
2021 			*i1 = us->us_ipv6_hoplimit;
2022 			return (sizeof (int));
2023 		}
2024 		break;
2025 	}
2026 	return (-1);
2027 }
2028 
2029 /*
2030  * This routine retrieves the current status of socket options.
2031  * It returns the size of the option retrieved.
2032  */
2033 static int
2034 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
2035 {
2036 	udp_t		*udp = connp->conn_udp;
2037 	udp_stack_t	*us = udp->udp_us;
2038 	int		*i1 = (int *)ptr;
2039 	ip6_pkt_t 	*ipp = &udp->udp_sticky_ipp;
2040 	int		len;
2041 
2042 	ASSERT(RW_READ_HELD(&udp->udp_rwlock));
2043 	switch (level) {
2044 	case SOL_SOCKET:
2045 		switch (name) {
2046 		case SO_DEBUG:
2047 			*i1 = udp->udp_debug;
2048 			break;	/* goto sizeof (int) option return */
2049 		case SO_REUSEADDR:
2050 			*i1 = udp->udp_reuseaddr;
2051 			break;	/* goto sizeof (int) option return */
2052 		case SO_TYPE:
2053 			*i1 = SOCK_DGRAM;
2054 			break;	/* goto sizeof (int) option return */
2055 
2056 		/*
2057 		 * The following three items are available here,
2058 		 * but are only meaningful to IP.
2059 		 */
2060 		case SO_DONTROUTE:
2061 			*i1 = udp->udp_dontroute;
2062 			break;	/* goto sizeof (int) option return */
2063 		case SO_USELOOPBACK:
2064 			*i1 = udp->udp_useloopback;
2065 			break;	/* goto sizeof (int) option return */
2066 		case SO_BROADCAST:
2067 			*i1 = udp->udp_broadcast;
2068 			break;	/* goto sizeof (int) option return */
2069 
2070 		case SO_SNDBUF:
2071 			*i1 = udp->udp_xmit_hiwat;
2072 			break;	/* goto sizeof (int) option return */
2073 		case SO_RCVBUF:
2074 			*i1 = udp->udp_rcv_disply_hiwat;
2075 			break;	/* goto sizeof (int) option return */
2076 		case SO_DGRAM_ERRIND:
2077 			*i1 = udp->udp_dgram_errind;
2078 			break;	/* goto sizeof (int) option return */
2079 		case SO_RECVUCRED:
2080 			*i1 = udp->udp_recvucred;
2081 			break;	/* goto sizeof (int) option return */
2082 		case SO_TIMESTAMP:
2083 			*i1 = udp->udp_timestamp;
2084 			break;	/* goto sizeof (int) option return */
2085 		case SO_ANON_MLP:
2086 			*i1 = connp->conn_anon_mlp;
2087 			break;	/* goto sizeof (int) option return */
2088 		case SO_MAC_EXEMPT:
2089 			*i1 = connp->conn_mac_exempt;
2090 			break;	/* goto sizeof (int) option return */
2091 		case SO_ALLZONES:
2092 			*i1 = connp->conn_allzones;
2093 			break;	/* goto sizeof (int) option return */
2094 		case SO_EXCLBIND:
2095 			*i1 = udp->udp_exclbind ? SO_EXCLBIND : 0;
2096 			break;
2097 		case SO_PROTOTYPE:
2098 			*i1 = IPPROTO_UDP;
2099 			break;
2100 		case SO_DOMAIN:
2101 			*i1 = udp->udp_family;
2102 			break;
2103 		default:
2104 			return (-1);
2105 		}
2106 		break;
2107 	case IPPROTO_IP:
2108 		if (udp->udp_family != AF_INET)
2109 			return (-1);
2110 		switch (name) {
2111 		case IP_OPTIONS:
2112 		case T_IP_OPTIONS:
2113 			len = udp->udp_ip_rcv_options_len - udp->udp_label_len;
2114 			if (len > 0) {
2115 				bcopy(udp->udp_ip_rcv_options +
2116 				    udp->udp_label_len, ptr, len);
2117 			}
2118 			return (len);
2119 		case IP_TOS:
2120 		case T_IP_TOS:
2121 			*i1 = (int)udp->udp_type_of_service;
2122 			break;	/* goto sizeof (int) option return */
2123 		case IP_TTL:
2124 			*i1 = (int)udp->udp_ttl;
2125 			break;	/* goto sizeof (int) option return */
2126 		case IP_DHCPINIT_IF:
2127 			return (-EINVAL);
2128 		case IP_NEXTHOP:
2129 		case IP_RECVPKTINFO:
2130 			/*
2131 			 * This also handles IP_PKTINFO.
2132 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2133 			 * Differentiation is based on the size of the argument
2134 			 * passed in.
2135 			 * This option is handled in IP which will return an
2136 			 * error for IP_PKTINFO as it's not supported as a
2137 			 * sticky option.
2138 			 */
2139 			return (-EINVAL);
2140 		case IP_MULTICAST_IF:
2141 			/* 0 address if not set */
2142 			*(ipaddr_t *)ptr = udp->udp_multicast_if_addr;
2143 			return (sizeof (ipaddr_t));
2144 		case IP_MULTICAST_TTL:
2145 			*(uchar_t *)ptr = udp->udp_multicast_ttl;
2146 			return (sizeof (uchar_t));
2147 		case IP_MULTICAST_LOOP:
2148 			*ptr = connp->conn_multicast_loop;
2149 			return (sizeof (uint8_t));
2150 		case IP_RECVOPTS:
2151 			*i1 = udp->udp_recvopts;
2152 			break;	/* goto sizeof (int) option return */
2153 		case IP_RECVDSTADDR:
2154 			*i1 = udp->udp_recvdstaddr;
2155 			break;	/* goto sizeof (int) option return */
2156 		case IP_RECVIF:
2157 			*i1 = udp->udp_recvif;
2158 			break;	/* goto sizeof (int) option return */
2159 		case IP_RECVSLLA:
2160 			*i1 = udp->udp_recvslla;
2161 			break;	/* goto sizeof (int) option return */
2162 		case IP_RECVTTL:
2163 			*i1 = udp->udp_recvttl;
2164 			break;	/* goto sizeof (int) option return */
2165 		case IP_ADD_MEMBERSHIP:
2166 		case IP_DROP_MEMBERSHIP:
2167 		case IP_BLOCK_SOURCE:
2168 		case IP_UNBLOCK_SOURCE:
2169 		case IP_ADD_SOURCE_MEMBERSHIP:
2170 		case IP_DROP_SOURCE_MEMBERSHIP:
2171 		case MCAST_JOIN_GROUP:
2172 		case MCAST_LEAVE_GROUP:
2173 		case MCAST_BLOCK_SOURCE:
2174 		case MCAST_UNBLOCK_SOURCE:
2175 		case MCAST_JOIN_SOURCE_GROUP:
2176 		case MCAST_LEAVE_SOURCE_GROUP:
2177 			/* cannot "get" the value for these */
2178 			return (-1);
2179 		case IP_BOUND_IF:
2180 			/* Zero if not set */
2181 			*i1 = udp->udp_bound_if;
2182 			break;	/* goto sizeof (int) option return */
2183 		case IP_UNSPEC_SRC:
2184 			*i1 = udp->udp_unspec_source;
2185 			break;	/* goto sizeof (int) option return */
2186 		case IP_BROADCAST_TTL:
2187 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
2188 			return (sizeof (uchar_t));
2189 		default:
2190 			return (-1);
2191 		}
2192 		break;
2193 	case IPPROTO_IPV6:
2194 		if (udp->udp_family != AF_INET6)
2195 			return (-1);
2196 		switch (name) {
2197 		case IPV6_UNICAST_HOPS:
2198 			*i1 = (unsigned int)udp->udp_ttl;
2199 			break;	/* goto sizeof (int) option return */
2200 		case IPV6_MULTICAST_IF:
2201 			/* 0 index if not set */
2202 			*i1 = udp->udp_multicast_if_index;
2203 			break;	/* goto sizeof (int) option return */
2204 		case IPV6_MULTICAST_HOPS:
2205 			*i1 = udp->udp_multicast_ttl;
2206 			break;	/* goto sizeof (int) option return */
2207 		case IPV6_MULTICAST_LOOP:
2208 			*i1 = connp->conn_multicast_loop;
2209 			break;	/* goto sizeof (int) option return */
2210 		case IPV6_JOIN_GROUP:
2211 		case IPV6_LEAVE_GROUP:
2212 		case MCAST_JOIN_GROUP:
2213 		case MCAST_LEAVE_GROUP:
2214 		case MCAST_BLOCK_SOURCE:
2215 		case MCAST_UNBLOCK_SOURCE:
2216 		case MCAST_JOIN_SOURCE_GROUP:
2217 		case MCAST_LEAVE_SOURCE_GROUP:
2218 			/* cannot "get" the value for these */
2219 			return (-1);
2220 		case IPV6_BOUND_IF:
2221 			/* Zero if not set */
2222 			*i1 = udp->udp_bound_if;
2223 			break;	/* goto sizeof (int) option return */
2224 		case IPV6_UNSPEC_SRC:
2225 			*i1 = udp->udp_unspec_source;
2226 			break;	/* goto sizeof (int) option return */
2227 		case IPV6_RECVPKTINFO:
2228 			*i1 = udp->udp_ip_recvpktinfo;
2229 			break;	/* goto sizeof (int) option return */
2230 		case IPV6_RECVTCLASS:
2231 			*i1 = udp->udp_ipv6_recvtclass;
2232 			break;	/* goto sizeof (int) option return */
2233 		case IPV6_RECVPATHMTU:
2234 			*i1 = udp->udp_ipv6_recvpathmtu;
2235 			break;	/* goto sizeof (int) option return */
2236 		case IPV6_RECVHOPLIMIT:
2237 			*i1 = udp->udp_ipv6_recvhoplimit;
2238 			break;	/* goto sizeof (int) option return */
2239 		case IPV6_RECVHOPOPTS:
2240 			*i1 = udp->udp_ipv6_recvhopopts;
2241 			break;	/* goto sizeof (int) option return */
2242 		case IPV6_RECVDSTOPTS:
2243 			*i1 = udp->udp_ipv6_recvdstopts;
2244 			break;	/* goto sizeof (int) option return */
2245 		case _OLD_IPV6_RECVDSTOPTS:
2246 			*i1 = udp->udp_old_ipv6_recvdstopts;
2247 			break;	/* goto sizeof (int) option return */
2248 		case IPV6_RECVRTHDRDSTOPTS:
2249 			*i1 = udp->udp_ipv6_recvrthdrdstopts;
2250 			break;	/* goto sizeof (int) option return */
2251 		case IPV6_RECVRTHDR:
2252 			*i1 = udp->udp_ipv6_recvrthdr;
2253 			break;	/* goto sizeof (int) option return */
2254 		case IPV6_PKTINFO: {
2255 			/* XXX assumes that caller has room for max size! */
2256 			struct in6_pktinfo *pkti;
2257 
2258 			pkti = (struct in6_pktinfo *)ptr;
2259 			if (ipp->ipp_fields & IPPF_IFINDEX)
2260 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
2261 			else
2262 				pkti->ipi6_ifindex = 0;
2263 			if (ipp->ipp_fields & IPPF_ADDR)
2264 				pkti->ipi6_addr = ipp->ipp_addr;
2265 			else
2266 				pkti->ipi6_addr = ipv6_all_zeros;
2267 			return (sizeof (struct in6_pktinfo));
2268 		}
2269 		case IPV6_TCLASS:
2270 			if (ipp->ipp_fields & IPPF_TCLASS)
2271 				*i1 = ipp->ipp_tclass;
2272 			else
2273 				*i1 = IPV6_FLOW_TCLASS(
2274 				    IPV6_DEFAULT_VERS_AND_FLOW);
2275 			break;	/* goto sizeof (int) option return */
2276 		case IPV6_NEXTHOP: {
2277 			sin6_t *sin6 = (sin6_t *)ptr;
2278 
2279 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
2280 				return (0);
2281 			*sin6 = sin6_null;
2282 			sin6->sin6_family = AF_INET6;
2283 			sin6->sin6_addr = ipp->ipp_nexthop;
2284 			return (sizeof (sin6_t));
2285 		}
2286 		case IPV6_HOPOPTS:
2287 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
2288 				return (0);
2289 			if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6)
2290 				return (0);
2291 			/*
2292 			 * The cipso/label option is added by kernel.
2293 			 * User is not usually aware of this option.
2294 			 * We copy out the hbh opt after the label option.
2295 			 */
2296 			bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6,
2297 			    ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6);
2298 			if (udp->udp_label_len_v6 > 0) {
2299 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
2300 				ptr[1] = (ipp->ipp_hopoptslen -
2301 				    udp->udp_label_len_v6 + 7) / 8 - 1;
2302 			}
2303 			return (ipp->ipp_hopoptslen - udp->udp_label_len_v6);
2304 		case IPV6_RTHDRDSTOPTS:
2305 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
2306 				return (0);
2307 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
2308 			return (ipp->ipp_rtdstoptslen);
2309 		case IPV6_RTHDR:
2310 			if (!(ipp->ipp_fields & IPPF_RTHDR))
2311 				return (0);
2312 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
2313 			return (ipp->ipp_rthdrlen);
2314 		case IPV6_DSTOPTS:
2315 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
2316 				return (0);
2317 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
2318 			return (ipp->ipp_dstoptslen);
2319 		case IPV6_PATHMTU:
2320 			return (ip_fill_mtuinfo(&udp->udp_v6dst,
2321 			    udp->udp_dstport, (struct ip6_mtuinfo *)ptr,
2322 			    us->us_netstack));
2323 		default:
2324 			return (-1);
2325 		}
2326 		break;
2327 	case IPPROTO_UDP:
2328 		switch (name) {
2329 		case UDP_ANONPRIVBIND:
2330 			*i1 = udp->udp_anon_priv_bind;
2331 			break;
2332 		case UDP_EXCLBIND:
2333 			*i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0;
2334 			break;
2335 		case UDP_RCVHDR:
2336 			*i1 = udp->udp_rcvhdr ? 1 : 0;
2337 			break;
2338 		case UDP_NAT_T_ENDPOINT:
2339 			*i1 = udp->udp_nat_t_endpoint;
2340 			break;
2341 		default:
2342 			return (-1);
2343 		}
2344 		break;
2345 	default:
2346 		return (-1);
2347 	}
2348 	return (sizeof (int));
2349 }
2350 
2351 int
2352 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
2353 {
2354 	udp_t   *udp;
2355 	int	err;
2356 
2357 	udp = Q_TO_UDP(q);
2358 
2359 	rw_enter(&udp->udp_rwlock, RW_READER);
2360 	err = udp_opt_get(Q_TO_CONN(q), level, name, ptr);
2361 	rw_exit(&udp->udp_rwlock);
2362 	return (err);
2363 }
2364 
2365 /*
2366  * This routine sets socket options.
2367  */
2368 /* ARGSUSED */
2369 static int
2370 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
2371     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
2372     void *thisdg_attrs, boolean_t checkonly)
2373 {
2374 	udpattrs_t *attrs = thisdg_attrs;
2375 	int	*i1 = (int *)invalp;
2376 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2377 	udp_t	*udp = connp->conn_udp;
2378 	udp_stack_t	*us = udp->udp_us;
2379 	int	error;
2380 	uint_t	newlen;
2381 	size_t	sth_wroff;
2382 
2383 	ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
2384 	/*
2385 	 * For fixed length options, no sanity check
2386 	 * of passed in length is done. It is assumed *_optcom_req()
2387 	 * routines do the right thing.
2388 	 */
2389 	switch (level) {
2390 	case SOL_SOCKET:
2391 		switch (name) {
2392 		case SO_REUSEADDR:
2393 			if (!checkonly) {
2394 				udp->udp_reuseaddr = onoff;
2395 				PASS_OPT_TO_IP(connp);
2396 			}
2397 			break;
2398 		case SO_DEBUG:
2399 			if (!checkonly)
2400 				udp->udp_debug = onoff;
2401 			break;
2402 		/*
2403 		 * The following three items are available here,
2404 		 * but are only meaningful to IP.
2405 		 */
2406 		case SO_DONTROUTE:
2407 			if (!checkonly) {
2408 				udp->udp_dontroute = onoff;
2409 				PASS_OPT_TO_IP(connp);
2410 			}
2411 			break;
2412 		case SO_USELOOPBACK:
2413 			if (!checkonly) {
2414 				udp->udp_useloopback = onoff;
2415 				PASS_OPT_TO_IP(connp);
2416 			}
2417 			break;
2418 		case SO_BROADCAST:
2419 			if (!checkonly) {
2420 				udp->udp_broadcast = onoff;
2421 				PASS_OPT_TO_IP(connp);
2422 			}
2423 			break;
2424 
2425 		case SO_SNDBUF:
2426 			if (*i1 > us->us_max_buf) {
2427 				*outlenp = 0;
2428 				return (ENOBUFS);
2429 			}
2430 			if (!checkonly) {
2431 				udp->udp_xmit_hiwat = *i1;
2432 				connp->conn_wq->q_hiwat = *i1;
2433 			}
2434 			break;
2435 		case SO_RCVBUF:
2436 			if (*i1 > us->us_max_buf) {
2437 				*outlenp = 0;
2438 				return (ENOBUFS);
2439 			}
2440 			if (!checkonly) {
2441 				int size;
2442 
2443 				udp->udp_rcv_disply_hiwat = *i1;
2444 				size = udp_set_rcv_hiwat(udp, *i1);
2445 				rw_exit(&udp->udp_rwlock);
2446 				(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2447 				    size);
2448 				rw_enter(&udp->udp_rwlock, RW_WRITER);
2449 			}
2450 			break;
2451 		case SO_DGRAM_ERRIND:
2452 			if (!checkonly)
2453 				udp->udp_dgram_errind = onoff;
2454 			break;
2455 		case SO_RECVUCRED:
2456 			if (!checkonly)
2457 				udp->udp_recvucred = onoff;
2458 			break;
2459 		case SO_ALLZONES:
2460 			/*
2461 			 * "soft" error (negative)
2462 			 * option not handled at this level
2463 			 * Do not modify *outlenp.
2464 			 */
2465 			return (-EINVAL);
2466 		case SO_TIMESTAMP:
2467 			if (!checkonly)
2468 				udp->udp_timestamp = onoff;
2469 			break;
2470 		case SO_ANON_MLP:
2471 			if (!checkonly) {
2472 				connp->conn_anon_mlp = onoff;
2473 				PASS_OPT_TO_IP(connp);
2474 			}
2475 			break;
2476 		case SO_MAC_EXEMPT:
2477 			if (secpolicy_net_mac_aware(cr) != 0 ||
2478 			    udp->udp_state != TS_UNBND)
2479 				return (EACCES);
2480 			if (!checkonly) {
2481 				connp->conn_mac_exempt = onoff;
2482 				PASS_OPT_TO_IP(connp);
2483 			}
2484 			break;
2485 		case SCM_UCRED: {
2486 			struct ucred_s *ucr;
2487 			cred_t *cr, *newcr;
2488 			ts_label_t *tsl;
2489 
2490 			/*
2491 			 * Only sockets that have proper privileges and are
2492 			 * bound to MLPs will have any other value here, so
2493 			 * this implicitly tests for privilege to set label.
2494 			 */
2495 			if (connp->conn_mlp_type == mlptSingle)
2496 				break;
2497 			ucr = (struct ucred_s *)invalp;
2498 			if (inlen != ucredsize ||
2499 			    ucr->uc_labeloff < sizeof (*ucr) ||
2500 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
2501 				return (EINVAL);
2502 			if (!checkonly) {
2503 				mblk_t *mb;
2504 				pid_t  cpid;
2505 
2506 				if (attrs == NULL ||
2507 				    (mb = attrs->udpattr_mb) == NULL)
2508 					return (EINVAL);
2509 				if ((cr = msg_getcred(mb, &cpid)) == NULL)
2510 					cr = udp->udp_connp->conn_cred;
2511 				ASSERT(cr != NULL);
2512 				if ((tsl = crgetlabel(cr)) == NULL)
2513 					return (EINVAL);
2514 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
2515 				    tsl->tsl_doi, KM_NOSLEEP);
2516 				if (newcr == NULL)
2517 					return (ENOSR);
2518 				mblk_setcred(mb, newcr, cpid);
2519 				attrs->udpattr_credset = B_TRUE;
2520 				crfree(newcr);
2521 			}
2522 			break;
2523 		}
2524 		case SO_EXCLBIND:
2525 			if (!checkonly)
2526 				udp->udp_exclbind = onoff;
2527 			break;
2528 		case SO_RCVTIMEO:
2529 		case SO_SNDTIMEO:
2530 			/*
2531 			 * Pass these two options in order for third part
2532 			 * protocol usage. Here just return directly.
2533 			 */
2534 			return (0);
2535 		default:
2536 			*outlenp = 0;
2537 			return (EINVAL);
2538 		}
2539 		break;
2540 	case IPPROTO_IP:
2541 		if (udp->udp_family != AF_INET) {
2542 			*outlenp = 0;
2543 			return (ENOPROTOOPT);
2544 		}
2545 		switch (name) {
2546 		case IP_OPTIONS:
2547 		case T_IP_OPTIONS:
2548 			/* Save options for use by IP. */
2549 			newlen = inlen + udp->udp_label_len;
2550 			if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
2551 				*outlenp = 0;
2552 				return (EINVAL);
2553 			}
2554 			if (checkonly)
2555 				break;
2556 
2557 			/*
2558 			 * Update the stored options taking into account
2559 			 * any CIPSO option which we should not overwrite.
2560 			 */
2561 			if (!tsol_option_set(&udp->udp_ip_snd_options,
2562 			    &udp->udp_ip_snd_options_len,
2563 			    udp->udp_label_len, invalp, inlen)) {
2564 				*outlenp = 0;
2565 				return (ENOMEM);
2566 			}
2567 
2568 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2569 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
2570 			sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
2571 			rw_exit(&udp->udp_rwlock);
2572 			(void) proto_set_tx_wroff(connp->conn_rq, connp,
2573 			    sth_wroff);
2574 			rw_enter(&udp->udp_rwlock, RW_WRITER);
2575 			break;
2576 
2577 		case IP_TTL:
2578 			if (!checkonly) {
2579 				udp->udp_ttl = (uchar_t)*i1;
2580 			}
2581 			break;
2582 		case IP_TOS:
2583 		case T_IP_TOS:
2584 			if (!checkonly) {
2585 				udp->udp_type_of_service = (uchar_t)*i1;
2586 			}
2587 			break;
2588 		case IP_MULTICAST_IF: {
2589 			/*
2590 			 * TODO should check OPTMGMT reply and undo this if
2591 			 * there is an error.
2592 			 */
2593 			struct in_addr *inap = (struct in_addr *)invalp;
2594 			if (!checkonly) {
2595 				udp->udp_multicast_if_addr =
2596 				    inap->s_addr;
2597 				PASS_OPT_TO_IP(connp);
2598 			}
2599 			break;
2600 		}
2601 		case IP_MULTICAST_TTL:
2602 			if (!checkonly)
2603 				udp->udp_multicast_ttl = *invalp;
2604 			break;
2605 		case IP_MULTICAST_LOOP:
2606 			if (!checkonly) {
2607 				connp->conn_multicast_loop = *invalp;
2608 				PASS_OPT_TO_IP(connp);
2609 			}
2610 			break;
2611 		case IP_RECVOPTS:
2612 			if (!checkonly)
2613 				udp->udp_recvopts = onoff;
2614 			break;
2615 		case IP_RECVDSTADDR:
2616 			if (!checkonly)
2617 				udp->udp_recvdstaddr = onoff;
2618 			break;
2619 		case IP_RECVIF:
2620 			if (!checkonly) {
2621 				udp->udp_recvif = onoff;
2622 				PASS_OPT_TO_IP(connp);
2623 			}
2624 			break;
2625 		case IP_RECVSLLA:
2626 			if (!checkonly) {
2627 				udp->udp_recvslla = onoff;
2628 				PASS_OPT_TO_IP(connp);
2629 			}
2630 			break;
2631 		case IP_RECVTTL:
2632 			if (!checkonly)
2633 				udp->udp_recvttl = onoff;
2634 			break;
2635 		case IP_PKTINFO: {
2636 			/*
2637 			 * This also handles IP_RECVPKTINFO.
2638 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
2639 			 * Differentiation is based on the size of the
2640 			 * argument passed in.
2641 			 */
2642 			struct in_pktinfo *pktinfop;
2643 			ip4_pkt_t *attr_pktinfop;
2644 
2645 			if (checkonly)
2646 				break;
2647 
2648 			if (inlen == sizeof (int)) {
2649 				/*
2650 				 * This is IP_RECVPKTINFO option.
2651 				 * Keep a local copy of whether this option is
2652 				 * set or not and pass it down to IP for
2653 				 * processing.
2654 				 */
2655 
2656 				udp->udp_ip_recvpktinfo = onoff;
2657 				return (-EINVAL);
2658 			}
2659 
2660 			if (attrs == NULL ||
2661 			    (attr_pktinfop = attrs->udpattr_ipp4) == NULL) {
2662 				/*
2663 				 * sticky option or no buffer to return
2664 				 * the results.
2665 				 */
2666 				return (EINVAL);
2667 			}
2668 
2669 			if (inlen != sizeof (struct in_pktinfo))
2670 				return (EINVAL);
2671 
2672 			pktinfop = (struct in_pktinfo *)invalp;
2673 
2674 			/*
2675 			 * At least one of the values should be specified
2676 			 */
2677 			if (pktinfop->ipi_ifindex == 0 &&
2678 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2679 				return (EINVAL);
2680 			}
2681 
2682 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2683 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2684 
2685 			break;
2686 		}
2687 		case IP_ADD_MEMBERSHIP:
2688 		case IP_DROP_MEMBERSHIP:
2689 		case IP_BLOCK_SOURCE:
2690 		case IP_UNBLOCK_SOURCE:
2691 		case IP_ADD_SOURCE_MEMBERSHIP:
2692 		case IP_DROP_SOURCE_MEMBERSHIP:
2693 		case MCAST_JOIN_GROUP:
2694 		case MCAST_LEAVE_GROUP:
2695 		case MCAST_BLOCK_SOURCE:
2696 		case MCAST_UNBLOCK_SOURCE:
2697 		case MCAST_JOIN_SOURCE_GROUP:
2698 		case MCAST_LEAVE_SOURCE_GROUP:
2699 		case IP_SEC_OPT:
2700 		case IP_NEXTHOP:
2701 		case IP_DHCPINIT_IF:
2702 			/*
2703 			 * "soft" error (negative)
2704 			 * option not handled at this level
2705 			 * Do not modify *outlenp.
2706 			 */
2707 			return (-EINVAL);
2708 		case IP_BOUND_IF:
2709 			if (!checkonly) {
2710 				udp->udp_bound_if = *i1;
2711 				PASS_OPT_TO_IP(connp);
2712 			}
2713 			break;
2714 		case IP_UNSPEC_SRC:
2715 			if (!checkonly) {
2716 				udp->udp_unspec_source = onoff;
2717 				PASS_OPT_TO_IP(connp);
2718 			}
2719 			break;
2720 		case IP_BROADCAST_TTL:
2721 			if (!checkonly)
2722 				connp->conn_broadcast_ttl = *invalp;
2723 			break;
2724 		default:
2725 			*outlenp = 0;
2726 			return (EINVAL);
2727 		}
2728 		break;
2729 	case IPPROTO_IPV6: {
2730 		ip6_pkt_t		*ipp;
2731 		boolean_t		sticky;
2732 
2733 		if (udp->udp_family != AF_INET6) {
2734 			*outlenp = 0;
2735 			return (ENOPROTOOPT);
2736 		}
2737 		/*
2738 		 * Deal with both sticky options and ancillary data
2739 		 */
2740 		sticky = B_FALSE;
2741 		if (attrs == NULL || (ipp = attrs->udpattr_ipp6) ==
2742 		    NULL) {
2743 			/* sticky options, or none */
2744 			ipp = &udp->udp_sticky_ipp;
2745 			sticky = B_TRUE;
2746 		}
2747 
2748 		switch (name) {
2749 		case IPV6_MULTICAST_IF:
2750 			if (!checkonly) {
2751 				udp->udp_multicast_if_index = *i1;
2752 				PASS_OPT_TO_IP(connp);
2753 			}
2754 			break;
2755 		case IPV6_UNICAST_HOPS:
2756 			/* -1 means use default */
2757 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2758 				*outlenp = 0;
2759 				return (EINVAL);
2760 			}
2761 			if (!checkonly) {
2762 				if (*i1 == -1) {
2763 					udp->udp_ttl = ipp->ipp_unicast_hops =
2764 					    us->us_ipv6_hoplimit;
2765 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2766 					/* Pass modified value to IP. */
2767 					*i1 = udp->udp_ttl;
2768 				} else {
2769 					udp->udp_ttl = ipp->ipp_unicast_hops =
2770 					    (uint8_t)*i1;
2771 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2772 				}
2773 				/* Rebuild the header template */
2774 				error = udp_build_hdrs(udp);
2775 				if (error != 0) {
2776 					*outlenp = 0;
2777 					return (error);
2778 				}
2779 			}
2780 			break;
2781 		case IPV6_MULTICAST_HOPS:
2782 			/* -1 means use default */
2783 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2784 				*outlenp = 0;
2785 				return (EINVAL);
2786 			}
2787 			if (!checkonly) {
2788 				if (*i1 == -1) {
2789 					udp->udp_multicast_ttl =
2790 					    ipp->ipp_multicast_hops =
2791 					    IP_DEFAULT_MULTICAST_TTL;
2792 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2793 					/* Pass modified value to IP. */
2794 					*i1 = udp->udp_multicast_ttl;
2795 				} else {
2796 					udp->udp_multicast_ttl =
2797 					    ipp->ipp_multicast_hops =
2798 					    (uint8_t)*i1;
2799 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2800 				}
2801 			}
2802 			break;
2803 		case IPV6_MULTICAST_LOOP:
2804 			if (*i1 != 0 && *i1 != 1) {
2805 				*outlenp = 0;
2806 				return (EINVAL);
2807 			}
2808 			if (!checkonly) {
2809 				connp->conn_multicast_loop = *i1;
2810 				PASS_OPT_TO_IP(connp);
2811 			}
2812 			break;
2813 		case IPV6_JOIN_GROUP:
2814 		case IPV6_LEAVE_GROUP:
2815 		case MCAST_JOIN_GROUP:
2816 		case MCAST_LEAVE_GROUP:
2817 		case MCAST_BLOCK_SOURCE:
2818 		case MCAST_UNBLOCK_SOURCE:
2819 		case MCAST_JOIN_SOURCE_GROUP:
2820 		case MCAST_LEAVE_SOURCE_GROUP:
2821 			/*
2822 			 * "soft" error (negative)
2823 			 * option not handled at this level
2824 			 * Note: Do not modify *outlenp
2825 			 */
2826 			return (-EINVAL);
2827 		case IPV6_BOUND_IF:
2828 			if (!checkonly) {
2829 				udp->udp_bound_if = *i1;
2830 				PASS_OPT_TO_IP(connp);
2831 			}
2832 			break;
2833 		case IPV6_UNSPEC_SRC:
2834 			if (!checkonly) {
2835 				udp->udp_unspec_source = onoff;
2836 				PASS_OPT_TO_IP(connp);
2837 			}
2838 			break;
2839 		/*
2840 		 * Set boolean switches for ancillary data delivery
2841 		 */
2842 		case IPV6_RECVPKTINFO:
2843 			if (!checkonly) {
2844 				udp->udp_ip_recvpktinfo = onoff;
2845 				PASS_OPT_TO_IP(connp);
2846 			}
2847 			break;
2848 		case IPV6_RECVTCLASS:
2849 			if (!checkonly) {
2850 				udp->udp_ipv6_recvtclass = onoff;
2851 				PASS_OPT_TO_IP(connp);
2852 			}
2853 			break;
2854 		case IPV6_RECVPATHMTU:
2855 			if (!checkonly) {
2856 				udp->udp_ipv6_recvpathmtu = onoff;
2857 				PASS_OPT_TO_IP(connp);
2858 			}
2859 			break;
2860 		case IPV6_RECVHOPLIMIT:
2861 			if (!checkonly) {
2862 				udp->udp_ipv6_recvhoplimit = onoff;
2863 				PASS_OPT_TO_IP(connp);
2864 			}
2865 			break;
2866 		case IPV6_RECVHOPOPTS:
2867 			if (!checkonly) {
2868 				udp->udp_ipv6_recvhopopts = onoff;
2869 				PASS_OPT_TO_IP(connp);
2870 			}
2871 			break;
2872 		case IPV6_RECVDSTOPTS:
2873 			if (!checkonly) {
2874 				udp->udp_ipv6_recvdstopts = onoff;
2875 				PASS_OPT_TO_IP(connp);
2876 			}
2877 			break;
2878 		case _OLD_IPV6_RECVDSTOPTS:
2879 			if (!checkonly)
2880 				udp->udp_old_ipv6_recvdstopts = onoff;
2881 			break;
2882 		case IPV6_RECVRTHDRDSTOPTS:
2883 			if (!checkonly) {
2884 				udp->udp_ipv6_recvrthdrdstopts = onoff;
2885 				PASS_OPT_TO_IP(connp);
2886 			}
2887 			break;
2888 		case IPV6_RECVRTHDR:
2889 			if (!checkonly) {
2890 				udp->udp_ipv6_recvrthdr = onoff;
2891 				PASS_OPT_TO_IP(connp);
2892 			}
2893 			break;
2894 		/*
2895 		 * Set sticky options or ancillary data.
2896 		 * If sticky options, (re)build any extension headers
2897 		 * that might be needed as a result.
2898 		 */
2899 		case IPV6_PKTINFO:
2900 			/*
2901 			 * The source address and ifindex are verified
2902 			 * in ip_opt_set(). For ancillary data the
2903 			 * source address is checked in ip_wput_v6.
2904 			 */
2905 			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
2906 				return (EINVAL);
2907 			if (checkonly)
2908 				break;
2909 
2910 			if (inlen == 0) {
2911 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2912 				ipp->ipp_sticky_ignored |=
2913 				    (IPPF_IFINDEX|IPPF_ADDR);
2914 			} else {
2915 				struct in6_pktinfo *pkti;
2916 
2917 				pkti = (struct in6_pktinfo *)invalp;
2918 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2919 				ipp->ipp_addr = pkti->ipi6_addr;
2920 				if (ipp->ipp_ifindex != 0)
2921 					ipp->ipp_fields |= IPPF_IFINDEX;
2922 				else
2923 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2924 				if (!IN6_IS_ADDR_UNSPECIFIED(
2925 				    &ipp->ipp_addr))
2926 					ipp->ipp_fields |= IPPF_ADDR;
2927 				else
2928 					ipp->ipp_fields &= ~IPPF_ADDR;
2929 			}
2930 			if (sticky) {
2931 				error = udp_build_hdrs(udp);
2932 				if (error != 0)
2933 					return (error);
2934 				PASS_OPT_TO_IP(connp);
2935 			}
2936 			break;
2937 		case IPV6_HOPLIMIT:
2938 			if (sticky)
2939 				return (EINVAL);
2940 			if (inlen != 0 && inlen != sizeof (int))
2941 				return (EINVAL);
2942 			if (checkonly)
2943 				break;
2944 
2945 			if (inlen == 0) {
2946 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2947 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2948 			} else {
2949 				if (*i1 > 255 || *i1 < -1)
2950 					return (EINVAL);
2951 				if (*i1 == -1)
2952 					ipp->ipp_hoplimit =
2953 					    us->us_ipv6_hoplimit;
2954 				else
2955 					ipp->ipp_hoplimit = *i1;
2956 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2957 			}
2958 			break;
2959 		case IPV6_TCLASS:
2960 			if (inlen != 0 && inlen != sizeof (int))
2961 				return (EINVAL);
2962 			if (checkonly)
2963 				break;
2964 
2965 			if (inlen == 0) {
2966 				ipp->ipp_fields &= ~IPPF_TCLASS;
2967 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2968 			} else {
2969 				if (*i1 > 255 || *i1 < -1)
2970 					return (EINVAL);
2971 				if (*i1 == -1)
2972 					ipp->ipp_tclass = 0;
2973 				else
2974 					ipp->ipp_tclass = *i1;
2975 				ipp->ipp_fields |= IPPF_TCLASS;
2976 			}
2977 			if (sticky) {
2978 				error = udp_build_hdrs(udp);
2979 				if (error != 0)
2980 					return (error);
2981 			}
2982 			break;
2983 		case IPV6_NEXTHOP:
2984 			/*
2985 			 * IP will verify that the nexthop is reachable
2986 			 * and fail for sticky options.
2987 			 */
2988 			if (inlen != 0 && inlen != sizeof (sin6_t))
2989 				return (EINVAL);
2990 			if (checkonly)
2991 				break;
2992 
2993 			if (inlen == 0) {
2994 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2995 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2996 			} else {
2997 				sin6_t *sin6 = (sin6_t *)invalp;
2998 
2999 				if (sin6->sin6_family != AF_INET6) {
3000 					return (EAFNOSUPPORT);
3001 				}
3002 				if (IN6_IS_ADDR_V4MAPPED(
3003 				    &sin6->sin6_addr))
3004 					return (EADDRNOTAVAIL);
3005 				ipp->ipp_nexthop = sin6->sin6_addr;
3006 				if (!IN6_IS_ADDR_UNSPECIFIED(
3007 				    &ipp->ipp_nexthop))
3008 					ipp->ipp_fields |= IPPF_NEXTHOP;
3009 				else
3010 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
3011 			}
3012 			if (sticky) {
3013 				error = udp_build_hdrs(udp);
3014 				if (error != 0)
3015 					return (error);
3016 				PASS_OPT_TO_IP(connp);
3017 			}
3018 			break;
3019 		case IPV6_HOPOPTS: {
3020 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
3021 			/*
3022 			 * Sanity checks - minimum size, size a multiple of
3023 			 * eight bytes, and matching size passed in.
3024 			 */
3025 			if (inlen != 0 &&
3026 			    inlen != (8 * (hopts->ip6h_len + 1)))
3027 				return (EINVAL);
3028 
3029 			if (checkonly)
3030 				break;
3031 
3032 			error = optcom_pkt_set(invalp, inlen, sticky,
3033 			    (uchar_t **)&ipp->ipp_hopopts,
3034 			    &ipp->ipp_hopoptslen,
3035 			    sticky ? udp->udp_label_len_v6 : 0);
3036 			if (error != 0)
3037 				return (error);
3038 			if (ipp->ipp_hopoptslen == 0) {
3039 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
3040 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
3041 			} else {
3042 				ipp->ipp_fields |= IPPF_HOPOPTS;
3043 			}
3044 			if (sticky) {
3045 				error = udp_build_hdrs(udp);
3046 				if (error != 0)
3047 					return (error);
3048 			}
3049 			break;
3050 		}
3051 		case IPV6_RTHDRDSTOPTS: {
3052 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3053 
3054 			/*
3055 			 * Sanity checks - minimum size, size a multiple of
3056 			 * eight bytes, and matching size passed in.
3057 			 */
3058 			if (inlen != 0 &&
3059 			    inlen != (8 * (dopts->ip6d_len + 1)))
3060 				return (EINVAL);
3061 
3062 			if (checkonly)
3063 				break;
3064 
3065 			if (inlen == 0) {
3066 				if (sticky &&
3067 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
3068 					kmem_free(ipp->ipp_rtdstopts,
3069 					    ipp->ipp_rtdstoptslen);
3070 					ipp->ipp_rtdstopts = NULL;
3071 					ipp->ipp_rtdstoptslen = 0;
3072 				}
3073 
3074 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
3075 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
3076 			} else {
3077 				error = optcom_pkt_set(invalp, inlen, sticky,
3078 				    (uchar_t **)&ipp->ipp_rtdstopts,
3079 				    &ipp->ipp_rtdstoptslen, 0);
3080 				if (error != 0)
3081 					return (error);
3082 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
3083 			}
3084 			if (sticky) {
3085 				error = udp_build_hdrs(udp);
3086 				if (error != 0)
3087 					return (error);
3088 			}
3089 			break;
3090 		}
3091 		case IPV6_DSTOPTS: {
3092 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3093 
3094 			/*
3095 			 * Sanity checks - minimum size, size a multiple of
3096 			 * eight bytes, and matching size passed in.
3097 			 */
3098 			if (inlen != 0 &&
3099 			    inlen != (8 * (dopts->ip6d_len + 1)))
3100 				return (EINVAL);
3101 
3102 			if (checkonly)
3103 				break;
3104 
3105 			if (inlen == 0) {
3106 				if (sticky &&
3107 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
3108 					kmem_free(ipp->ipp_dstopts,
3109 					    ipp->ipp_dstoptslen);
3110 					ipp->ipp_dstopts = NULL;
3111 					ipp->ipp_dstoptslen = 0;
3112 				}
3113 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
3114 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
3115 			} else {
3116 				error = optcom_pkt_set(invalp, inlen, sticky,
3117 				    (uchar_t **)&ipp->ipp_dstopts,
3118 				    &ipp->ipp_dstoptslen, 0);
3119 				if (error != 0)
3120 					return (error);
3121 				ipp->ipp_fields |= IPPF_DSTOPTS;
3122 			}
3123 			if (sticky) {
3124 				error = udp_build_hdrs(udp);
3125 				if (error != 0)
3126 					return (error);
3127 			}
3128 			break;
3129 		}
3130 		case IPV6_RTHDR: {
3131 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
3132 
3133 			/*
3134 			 * Sanity checks - minimum size, size a multiple of
3135 			 * eight bytes, and matching size passed in.
3136 			 */
3137 			if (inlen != 0 &&
3138 			    inlen != (8 * (rt->ip6r_len + 1)))
3139 				return (EINVAL);
3140 
3141 			if (checkonly)
3142 				break;
3143 
3144 			if (inlen == 0) {
3145 				if (sticky &&
3146 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
3147 					kmem_free(ipp->ipp_rthdr,
3148 					    ipp->ipp_rthdrlen);
3149 					ipp->ipp_rthdr = NULL;
3150 					ipp->ipp_rthdrlen = 0;
3151 				}
3152 				ipp->ipp_fields &= ~IPPF_RTHDR;
3153 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
3154 			} else {
3155 				error = optcom_pkt_set(invalp, inlen, sticky,
3156 				    (uchar_t **)&ipp->ipp_rthdr,
3157 				    &ipp->ipp_rthdrlen, 0);
3158 				if (error != 0)
3159 					return (error);
3160 				ipp->ipp_fields |= IPPF_RTHDR;
3161 			}
3162 			if (sticky) {
3163 				error = udp_build_hdrs(udp);
3164 				if (error != 0)
3165 					return (error);
3166 			}
3167 			break;
3168 		}
3169 
3170 		case IPV6_DONTFRAG:
3171 			if (checkonly)
3172 				break;
3173 
3174 			if (onoff) {
3175 				ipp->ipp_fields |= IPPF_DONTFRAG;
3176 			} else {
3177 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
3178 			}
3179 			break;
3180 
3181 		case IPV6_USE_MIN_MTU:
3182 			if (inlen != sizeof (int))
3183 				return (EINVAL);
3184 
3185 			if (*i1 < -1 || *i1 > 1)
3186 				return (EINVAL);
3187 
3188 			if (checkonly)
3189 				break;
3190 
3191 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
3192 			ipp->ipp_use_min_mtu = *i1;
3193 			break;
3194 
3195 		case IPV6_SEC_OPT:
3196 		case IPV6_SRC_PREFERENCES:
3197 		case IPV6_V6ONLY:
3198 			/* Handled at the IP level */
3199 			return (-EINVAL);
3200 		default:
3201 			*outlenp = 0;
3202 			return (EINVAL);
3203 		}
3204 		break;
3205 		}		/* end IPPROTO_IPV6 */
3206 	case IPPROTO_UDP:
3207 		switch (name) {
3208 		case UDP_ANONPRIVBIND:
3209 			if ((error = secpolicy_net_privaddr(cr, 0,
3210 			    IPPROTO_UDP)) != 0) {
3211 				*outlenp = 0;
3212 				return (error);
3213 			}
3214 			if (!checkonly) {
3215 				udp->udp_anon_priv_bind = onoff;
3216 			}
3217 			break;
3218 		case UDP_EXCLBIND:
3219 			if (!checkonly)
3220 				udp->udp_exclbind = onoff;
3221 			break;
3222 		case UDP_RCVHDR:
3223 			if (!checkonly)
3224 				udp->udp_rcvhdr = onoff;
3225 			break;
3226 		case UDP_NAT_T_ENDPOINT:
3227 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
3228 				*outlenp = 0;
3229 				return (error);
3230 			}
3231 
3232 			/*
3233 			 * Use udp_family instead so we can avoid ambiguitites
3234 			 * with AF_INET6 sockets that may switch from IPv4
3235 			 * to IPv6.
3236 			 */
3237 			if (udp->udp_family != AF_INET) {
3238 				*outlenp = 0;
3239 				return (EAFNOSUPPORT);
3240 			}
3241 
3242 			if (!checkonly) {
3243 				int size;
3244 
3245 				udp->udp_nat_t_endpoint = onoff;
3246 
3247 				udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
3248 				    UDPH_SIZE + udp->udp_ip_snd_options_len;
3249 
3250 				/* Also, adjust wroff */
3251 				if (onoff) {
3252 					udp->udp_max_hdr_len +=
3253 					    sizeof (uint32_t);
3254 				}
3255 				size = udp->udp_max_hdr_len +
3256 				    us->us_wroff_extra;
3257 				(void) proto_set_tx_wroff(connp->conn_rq, connp,
3258 				    size);
3259 			}
3260 			break;
3261 		default:
3262 			*outlenp = 0;
3263 			return (EINVAL);
3264 		}
3265 		break;
3266 	default:
3267 		*outlenp = 0;
3268 		return (EINVAL);
3269 	}
3270 	/*
3271 	 * Common case of OK return with outval same as inval.
3272 	 */
3273 	if (invalp != outvalp) {
3274 		/* don't trust bcopy for identical src/dst */
3275 		(void) bcopy(invalp, outvalp, inlen);
3276 	}
3277 	*outlenp = inlen;
3278 	return (0);
3279 }
3280 
3281 int
3282 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
3283     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3284     void *thisdg_attrs, cred_t *cr)
3285 {
3286 	int		error;
3287 	boolean_t	checkonly;
3288 
3289 	error = 0;
3290 	switch (optset_context) {
3291 	case SETFN_OPTCOM_CHECKONLY:
3292 		checkonly = B_TRUE;
3293 		/*
3294 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
3295 		 * inlen != 0 implies value supplied and
3296 		 * 	we have to "pretend" to set it.
3297 		 * inlen == 0 implies that there is no
3298 		 * 	value part in T_CHECK request and just validation
3299 		 * done elsewhere should be enough, we just return here.
3300 		 */
3301 		if (inlen == 0) {
3302 			*outlenp = 0;
3303 			goto done;
3304 		}
3305 		break;
3306 	case SETFN_OPTCOM_NEGOTIATE:
3307 		checkonly = B_FALSE;
3308 		break;
3309 	case SETFN_UD_NEGOTIATE:
3310 	case SETFN_CONN_NEGOTIATE:
3311 		checkonly = B_FALSE;
3312 		/*
3313 		 * Negotiating local and "association-related" options
3314 		 * through T_UNITDATA_REQ.
3315 		 *
3316 		 * Following routine can filter out ones we do not
3317 		 * want to be "set" this way.
3318 		 */
3319 		if (!udp_opt_allow_udr_set(level, name)) {
3320 			*outlenp = 0;
3321 			error = EINVAL;
3322 			goto done;
3323 		}
3324 		break;
3325 	default:
3326 		/*
3327 		 * We should never get here
3328 		 */
3329 		*outlenp = 0;
3330 		error = EINVAL;
3331 		goto done;
3332 	}
3333 
3334 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
3335 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
3336 
3337 	error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
3338 	    outvalp, cr, thisdg_attrs, checkonly);
3339 done:
3340 	return (error);
3341 }
3342 
3343 /* ARGSUSED */
3344 int
3345 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
3346     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3347     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3348 {
3349 	conn_t  *connp =  Q_TO_CONN(q);
3350 	int error;
3351 	udp_t	*udp = connp->conn_udp;
3352 
3353 	rw_enter(&udp->udp_rwlock, RW_WRITER);
3354 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
3355 	    outlenp, outvalp, thisdg_attrs, cr);
3356 	rw_exit(&udp->udp_rwlock);
3357 	return (error);
3358 }
3359 
3360 /*
3361  * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl.
3362  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
3363  * headers, and the udp header.
3364  * Returns failure if can't allocate memory.
3365  */
3366 static int
3367 udp_build_hdrs(udp_t *udp)
3368 {
3369 	udp_stack_t *us = udp->udp_us;
3370 	uchar_t	*hdrs;
3371 	uint_t	hdrs_len;
3372 	ip6_t	*ip6h;
3373 	ip6i_t	*ip6i;
3374 	udpha_t	*udpha;
3375 	ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
3376 	size_t	sth_wroff;
3377 	conn_t	*connp = udp->udp_connp;
3378 
3379 	ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
3380 	ASSERT(connp != NULL);
3381 
3382 	hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
3383 	ASSERT(hdrs_len != 0);
3384 	if (hdrs_len != udp->udp_sticky_hdrs_len) {
3385 		/* Need to reallocate */
3386 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3387 		if (hdrs == NULL)
3388 			return (ENOMEM);
3389 
3390 		if (udp->udp_sticky_hdrs_len != 0) {
3391 			kmem_free(udp->udp_sticky_hdrs,
3392 			    udp->udp_sticky_hdrs_len);
3393 		}
3394 		udp->udp_sticky_hdrs = hdrs;
3395 		udp->udp_sticky_hdrs_len = hdrs_len;
3396 	}
3397 	ip_build_hdrs_v6(udp->udp_sticky_hdrs,
3398 	    udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP);
3399 
3400 	/* Set header fields not in ipp */
3401 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3402 		ip6i = (ip6i_t *)udp->udp_sticky_hdrs;
3403 		ip6h = (ip6_t *)&ip6i[1];
3404 	} else {
3405 		ip6h = (ip6_t *)udp->udp_sticky_hdrs;
3406 	}
3407 
3408 	if (!(ipp->ipp_fields & IPPF_ADDR))
3409 		ip6h->ip6_src = udp->udp_v6src;
3410 
3411 	udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE);
3412 	udpha->uha_src_port = udp->udp_port;
3413 
3414 	/* Try to get everything in a single mblk */
3415 	if (hdrs_len > udp->udp_max_hdr_len) {
3416 		udp->udp_max_hdr_len = hdrs_len;
3417 		sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
3418 		rw_exit(&udp->udp_rwlock);
3419 		(void) proto_set_tx_wroff(udp->udp_connp->conn_rq,
3420 		    udp->udp_connp, sth_wroff);
3421 		rw_enter(&udp->udp_rwlock, RW_WRITER);
3422 	}
3423 	return (0);
3424 }
3425 
3426 /*
3427  * This routine retrieves the value of an ND variable in a udpparam_t
3428  * structure.  It is called through nd_getset when a user reads the
3429  * variable.
3430  */
3431 /* ARGSUSED */
3432 static int
3433 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3434 {
3435 	udpparam_t *udppa = (udpparam_t *)cp;
3436 
3437 	(void) mi_mpprintf(mp, "%d", udppa->udp_param_value);
3438 	return (0);
3439 }
3440 
3441 /*
3442  * Walk through the param array specified registering each element with the
3443  * named dispatch (ND) handler.
3444  */
3445 static boolean_t
3446 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt)
3447 {
3448 	for (; cnt-- > 0; udppa++) {
3449 		if (udppa->udp_param_name && udppa->udp_param_name[0]) {
3450 			if (!nd_load(ndp, udppa->udp_param_name,
3451 			    udp_param_get, udp_param_set,
3452 			    (caddr_t)udppa)) {
3453 				nd_free(ndp);
3454 				return (B_FALSE);
3455 			}
3456 		}
3457 	}
3458 	if (!nd_load(ndp, "udp_extra_priv_ports",
3459 	    udp_extra_priv_ports_get, NULL, NULL)) {
3460 		nd_free(ndp);
3461 		return (B_FALSE);
3462 	}
3463 	if (!nd_load(ndp, "udp_extra_priv_ports_add",
3464 	    NULL, udp_extra_priv_ports_add, NULL)) {
3465 		nd_free(ndp);
3466 		return (B_FALSE);
3467 	}
3468 	if (!nd_load(ndp, "udp_extra_priv_ports_del",
3469 	    NULL, udp_extra_priv_ports_del, NULL)) {
3470 		nd_free(ndp);
3471 		return (B_FALSE);
3472 	}
3473 	if (!nd_load(ndp, "udp_status", udp_status_report, NULL,
3474 	    NULL)) {
3475 		nd_free(ndp);
3476 		return (B_FALSE);
3477 	}
3478 	if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL,
3479 	    NULL)) {
3480 		nd_free(ndp);
3481 		return (B_FALSE);
3482 	}
3483 	return (B_TRUE);
3484 }
3485 
3486 /* This routine sets an ND variable in a udpparam_t structure. */
3487 /* ARGSUSED */
3488 static int
3489 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3490 {
3491 	long		new_value;
3492 	udpparam_t	*udppa = (udpparam_t *)cp;
3493 
3494 	/*
3495 	 * Fail the request if the new value does not lie within the
3496 	 * required bounds.
3497 	 */
3498 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3499 	    new_value < udppa->udp_param_min ||
3500 	    new_value > udppa->udp_param_max) {
3501 		return (EINVAL);
3502 	}
3503 
3504 	/* Set the new value */
3505 	udppa->udp_param_value = new_value;
3506 	return (0);
3507 }
3508 
3509 /*
3510  * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with
3511  * T_opthdr) and return the number of bytes copied.  'dbuf' may be NULL to
3512  * just count the length needed for allocation.  If 'dbuf' is non-NULL,
3513  * then it's assumed to be allocated to be large enough.
3514  *
3515  * Returns zero if trimming of the security option causes all options to go
3516  * away.
3517  */
3518 static size_t
3519 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf)
3520 {
3521 	struct T_opthdr *toh;
3522 	size_t hol = ipp->ipp_hopoptslen;
3523 	ip6_hbh_t *dstopt = NULL;
3524 	const ip6_hbh_t *srcopt = ipp->ipp_hopopts;
3525 	size_t tlen, olen, plen;
3526 	boolean_t deleting;
3527 	const struct ip6_opt *sopt, *lastpad;
3528 	struct ip6_opt *dopt;
3529 
3530 	if ((toh = (struct T_opthdr *)dbuf) != NULL) {
3531 		toh->level = IPPROTO_IPV6;
3532 		toh->name = IPV6_HOPOPTS;
3533 		toh->status = 0;
3534 		dstopt = (ip6_hbh_t *)(toh + 1);
3535 	}
3536 
3537 	/*
3538 	 * If labeling is enabled, then skip the label option
3539 	 * but get other options if there are any.
3540 	 */
3541 	if (is_system_labeled()) {
3542 		dopt = NULL;
3543 		if (dstopt != NULL) {
3544 			/* will fill in ip6h_len later */
3545 			dstopt->ip6h_nxt = srcopt->ip6h_nxt;
3546 			dopt = (struct ip6_opt *)(dstopt + 1);
3547 		}
3548 		sopt = (const struct ip6_opt *)(srcopt + 1);
3549 		hol -= sizeof (*srcopt);
3550 		tlen = sizeof (*dstopt);
3551 		lastpad = NULL;
3552 		deleting = B_FALSE;
3553 		/*
3554 		 * This loop finds the first (lastpad pointer) of any number of
3555 		 * pads that preceeds the security option, then treats the
3556 		 * security option as though it were a pad, and then finds the
3557 		 * next non-pad option (or end of list).
3558 		 *
3559 		 * It then treats the entire block as one big pad.  To preserve
3560 		 * alignment of any options that follow, or just the end of the
3561 		 * list, it computes a minimal new padding size that keeps the
3562 		 * same alignment for the next option.
3563 		 *
3564 		 * If it encounters just a sequence of pads with no security
3565 		 * option, those are copied as-is rather than collapsed.
3566 		 *
3567 		 * Note that to handle the end of list case, the code makes one
3568 		 * loop with 'hol' set to zero.
3569 		 */
3570 		for (;;) {
3571 			if (hol > 0) {
3572 				if (sopt->ip6o_type == IP6OPT_PAD1) {
3573 					if (lastpad == NULL)
3574 						lastpad = sopt;
3575 					sopt = (const struct ip6_opt *)
3576 					    &sopt->ip6o_len;
3577 					hol--;
3578 					continue;
3579 				}
3580 				olen = sopt->ip6o_len + sizeof (*sopt);
3581 				if (olen > hol)
3582 					olen = hol;
3583 				if (sopt->ip6o_type == IP6OPT_PADN ||
3584 				    sopt->ip6o_type == ip6opt_ls) {
3585 					if (sopt->ip6o_type == ip6opt_ls)
3586 						deleting = B_TRUE;
3587 					if (lastpad == NULL)
3588 						lastpad = sopt;
3589 					sopt = (const struct ip6_opt *)
3590 					    ((const char *)sopt + olen);
3591 					hol -= olen;
3592 					continue;
3593 				}
3594 			} else {
3595 				/* if nothing was copied at all, then delete */
3596 				if (tlen == sizeof (*dstopt))
3597 					return (0);
3598 				/* last pass; pick up any trailing padding */
3599 				olen = 0;
3600 			}
3601 			if (deleting) {
3602 				/*
3603 				 * compute aligning effect of deleted material
3604 				 * to reproduce with pad.
3605 				 */
3606 				plen = ((const char *)sopt -
3607 				    (const char *)lastpad) & 7;
3608 				tlen += plen;
3609 				if (dopt != NULL) {
3610 					if (plen == 1) {
3611 						dopt->ip6o_type = IP6OPT_PAD1;
3612 					} else if (plen > 1) {
3613 						plen -= sizeof (*dopt);
3614 						dopt->ip6o_type = IP6OPT_PADN;
3615 						dopt->ip6o_len = plen;
3616 						if (plen > 0)
3617 							bzero(dopt + 1, plen);
3618 					}
3619 					dopt = (struct ip6_opt *)
3620 					    ((char *)dopt + plen);
3621 				}
3622 				deleting = B_FALSE;
3623 				lastpad = NULL;
3624 			}
3625 			/* if there's uncopied padding, then copy that now */
3626 			if (lastpad != NULL) {
3627 				olen += (const char *)sopt -
3628 				    (const char *)lastpad;
3629 				sopt = lastpad;
3630 				lastpad = NULL;
3631 			}
3632 			if (dopt != NULL && olen > 0) {
3633 				bcopy(sopt, dopt, olen);
3634 				dopt = (struct ip6_opt *)((char *)dopt + olen);
3635 			}
3636 			if (hol == 0)
3637 				break;
3638 			tlen += olen;
3639 			sopt = (const struct ip6_opt *)
3640 			    ((const char *)sopt + olen);
3641 			hol -= olen;
3642 		}
3643 		/* go back and patch up the length value, rounded upward */
3644 		if (dstopt != NULL)
3645 			dstopt->ip6h_len = (tlen - 1) >> 3;
3646 	} else {
3647 		tlen = hol;
3648 		if (dstopt != NULL)
3649 			bcopy(srcopt, dstopt, hol);
3650 	}
3651 
3652 	tlen += sizeof (*toh);
3653 	if (toh != NULL)
3654 		toh->len = tlen;
3655 
3656 	return (tlen);
3657 }
3658 
3659 /*
3660  * Update udp_rcv_opt_len from the packet.
3661  * Called when options received, and when no options received but
3662  * udp_ip_recv_opt_len has previously recorded options.
3663  */
3664 static void
3665 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len)
3666 {
3667 	/* Save the options if any */
3668 	if (opt_len > 0) {
3669 		if (opt_len > udp->udp_ip_rcv_options_len) {
3670 			/* Need to allocate larger buffer */
3671 			if (udp->udp_ip_rcv_options_len != 0)
3672 				mi_free((char *)udp->udp_ip_rcv_options);
3673 			udp->udp_ip_rcv_options_len = 0;
3674 			udp->udp_ip_rcv_options =
3675 			    (uchar_t *)mi_alloc(opt_len, BPRI_HI);
3676 			if (udp->udp_ip_rcv_options != NULL)
3677 				udp->udp_ip_rcv_options_len = opt_len;
3678 		}
3679 		if (udp->udp_ip_rcv_options_len != 0) {
3680 			bcopy(opt, udp->udp_ip_rcv_options, opt_len);
3681 			/* Adjust length if we are resusing the space */
3682 			udp->udp_ip_rcv_options_len = opt_len;
3683 		}
3684 	} else if (udp->udp_ip_rcv_options_len != 0) {
3685 		/* Clear out previously recorded options */
3686 		mi_free((char *)udp->udp_ip_rcv_options);
3687 		udp->udp_ip_rcv_options = NULL;
3688 		udp->udp_ip_rcv_options_len = 0;
3689 	}
3690 }
3691 
3692 static void
3693 udp_queue_fallback(udp_t *udp, mblk_t *mp)
3694 {
3695 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
3696 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
3697 		/*
3698 		 * fallback has started but messages have not been moved yet
3699 		 */
3700 		if (udp->udp_fallback_queue_head == NULL) {
3701 			ASSERT(udp->udp_fallback_queue_tail == NULL);
3702 			udp->udp_fallback_queue_head = mp;
3703 			udp->udp_fallback_queue_tail = mp;
3704 		} else {
3705 			ASSERT(udp->udp_fallback_queue_tail != NULL);
3706 			udp->udp_fallback_queue_tail->b_next = mp;
3707 			udp->udp_fallback_queue_tail = mp;
3708 		}
3709 		mutex_exit(&udp->udp_recv_lock);
3710 	} else {
3711 		/*
3712 		 * no more fallbacks possible, ok to drop lock.
3713 		 */
3714 		mutex_exit(&udp->udp_recv_lock);
3715 		putnext(udp->udp_connp->conn_rq, mp);
3716 	}
3717 }
3718 
3719 /* ARGSUSED2 */
3720 static void
3721 udp_input(void *arg1, mblk_t *mp, void *arg2)
3722 {
3723 	conn_t *connp = (conn_t *)arg1;
3724 	struct T_unitdata_ind	*tudi;
3725 	uchar_t			*rptr;		/* Pointer to IP header */
3726 	int			hdr_length;	/* Length of IP+UDP headers */
3727 	int			opt_len;
3728 	int			udi_size;	/* Size of T_unitdata_ind */
3729 	int			mp_len;
3730 	udp_t			*udp;
3731 	udpha_t			*udpha;
3732 	int			ipversion;
3733 	ip6_pkt_t		ipp;
3734 	ip6_t			*ip6h;
3735 	ip6i_t			*ip6i;
3736 	mblk_t			*mp1;
3737 	mblk_t			*options_mp = NULL;
3738 	ip_pktinfo_t		*pinfo = NULL;
3739 	cred_t			*cr = NULL;
3740 	pid_t			cpid;
3741 	uint32_t		udp_ip_rcv_options_len;
3742 	udp_bits_t		udp_bits;
3743 	cred_t			*rcr = connp->conn_cred;
3744 	udp_stack_t *us;
3745 
3746 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
3747 
3748 	udp = connp->conn_udp;
3749 	us = udp->udp_us;
3750 	rptr = mp->b_rptr;
3751 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3752 	ASSERT(OK_32PTR(rptr));
3753 
3754 	/*
3755 	 * IP should have prepended the options data in an M_CTL
3756 	 * Check M_CTL "type" to make sure are not here bcos of
3757 	 * a valid ICMP message
3758 	 */
3759 	if (DB_TYPE(mp) == M_CTL) {
3760 		if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3761 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3762 		    IN_PKTINFO) {
3763 			/*
3764 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3765 			 * has been prepended to the packet by IP. We need to
3766 			 * extract the mblk and adjust the rptr
3767 			 */
3768 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3769 			options_mp = mp;
3770 			mp = mp->b_cont;
3771 			rptr = mp->b_rptr;
3772 			UDP_STAT(us, udp_in_pktinfo);
3773 		} else {
3774 			/*
3775 			 * ICMP messages.
3776 			 */
3777 			udp_icmp_error(connp, mp);
3778 			return;
3779 		}
3780 	}
3781 
3782 	mp_len = msgdsize(mp);
3783 	/*
3784 	 * This is the inbound data path.
3785 	 * First, we check to make sure the IP version number is correct,
3786 	 * and then pull the IP and UDP headers into the first mblk.
3787 	 */
3788 
3789 	/* Initialize regardless if ipversion is IPv4 or IPv6 */
3790 	ipp.ipp_fields = 0;
3791 
3792 	ipversion = IPH_HDR_VERSION(rptr);
3793 
3794 	rw_enter(&udp->udp_rwlock, RW_READER);
3795 	udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len;
3796 	udp_bits = udp->udp_bits;
3797 	rw_exit(&udp->udp_rwlock);
3798 
3799 	switch (ipversion) {
3800 	case IPV4_VERSION:
3801 		ASSERT(MBLKL(mp) >= sizeof (ipha_t));
3802 		ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
3803 		hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
3804 		opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
3805 		if ((opt_len > 0 || udp_ip_rcv_options_len > 0) &&
3806 		    udp->udp_family == AF_INET) {
3807 			/*
3808 			 * Record/update udp_ip_rcv_options with the lock
3809 			 * held. Not needed for AF_INET6 sockets
3810 			 * since they don't support a getsockopt of IP_OPTIONS.
3811 			 */
3812 			rw_enter(&udp->udp_rwlock, RW_WRITER);
3813 			udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH,
3814 			    opt_len);
3815 			rw_exit(&udp->udp_rwlock);
3816 		}
3817 		/* Handle IPV6_RECVPKTINFO even for IPv4 packet. */
3818 		if ((udp->udp_family == AF_INET6) && (pinfo != NULL) &&
3819 		    udp->udp_ip_recvpktinfo) {
3820 			if (pinfo->ip_pkt_flags & IPF_RECVIF) {
3821 				ipp.ipp_fields |= IPPF_IFINDEX;
3822 				ipp.ipp_ifindex = pinfo->ip_pkt_ifindex;
3823 			}
3824 		}
3825 		break;
3826 	case IPV6_VERSION:
3827 		/*
3828 		 * IPv6 packets can only be received by applications
3829 		 * that are prepared to receive IPv6 addresses.
3830 		 * The IP fanout must ensure this.
3831 		 */
3832 		ASSERT(udp->udp_family == AF_INET6);
3833 
3834 		ip6h = (ip6_t *)rptr;
3835 		ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
3836 
3837 		if (ip6h->ip6_nxt != IPPROTO_UDP) {
3838 			uint8_t nexthdrp;
3839 			/* Look for ifindex information */
3840 			if (ip6h->ip6_nxt == IPPROTO_RAW) {
3841 				ip6i = (ip6i_t *)ip6h;
3842 				if ((uchar_t *)&ip6i[1] > mp->b_wptr)
3843 					goto tossit;
3844 
3845 				if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3846 					ASSERT(ip6i->ip6i_ifindex != 0);
3847 					ipp.ipp_fields |= IPPF_IFINDEX;
3848 					ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3849 				}
3850 				rptr = (uchar_t *)&ip6i[1];
3851 				mp->b_rptr = rptr;
3852 				if (rptr == mp->b_wptr) {
3853 					mp1 = mp->b_cont;
3854 					freeb(mp);
3855 					mp = mp1;
3856 					rptr = mp->b_rptr;
3857 				}
3858 				if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE))
3859 					goto tossit;
3860 				ip6h = (ip6_t *)rptr;
3861 				mp_len = msgdsize(mp);
3862 			}
3863 			/*
3864 			 * Find any potentially interesting extension headers
3865 			 * as well as the length of the IPv6 + extension
3866 			 * headers.
3867 			 */
3868 			hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) +
3869 			    UDPH_SIZE;
3870 			ASSERT(nexthdrp == IPPROTO_UDP);
3871 		} else {
3872 			hdr_length = IPV6_HDR_LEN + UDPH_SIZE;
3873 			ip6i = NULL;
3874 		}
3875 		break;
3876 	default:
3877 		ASSERT(0);
3878 	}
3879 
3880 	/*
3881 	 * IP inspected the UDP header thus all of it must be in the mblk.
3882 	 * UDP length check is performed for IPv6 packets and IPv4 packets
3883 	 * to check if the size of the packet as specified
3884 	 * by the header is the same as the physical size of the packet.
3885 	 * FIXME? Didn't IP already check this?
3886 	 */
3887 	udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
3888 	if ((MBLKL(mp) < hdr_length) ||
3889 	    (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) {
3890 		goto tossit;
3891 	}
3892 
3893 
3894 	/* Walk past the headers unless UDP_RCVHDR was set. */
3895 	if (!udp_bits.udpb_rcvhdr) {
3896 		mp->b_rptr = rptr + hdr_length;
3897 		mp_len -= hdr_length;
3898 	}
3899 
3900 	/*
3901 	 * This is the inbound data path.  Packets are passed upstream as
3902 	 * T_UNITDATA_IND messages with full IP headers still attached.
3903 	 */
3904 	if (udp->udp_family == AF_INET) {
3905 		sin_t *sin;
3906 
3907 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
3908 
3909 		/*
3910 		 * Normally only send up the source address.
3911 		 * If IP_RECVDSTADDR is set we include the destination IP
3912 		 * address as an option. With IP_RECVOPTS we include all
3913 		 * the IP options.
3914 		 */
3915 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3916 		if (udp_bits.udpb_recvdstaddr) {
3917 			udi_size += sizeof (struct T_opthdr) +
3918 			    sizeof (struct in_addr);
3919 			UDP_STAT(us, udp_in_recvdstaddr);
3920 		}
3921 
3922 		if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) &&
3923 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3924 			udi_size += sizeof (struct T_opthdr) +
3925 			    sizeof (struct in_pktinfo);
3926 			UDP_STAT(us, udp_ip_rcvpktinfo);
3927 		}
3928 
3929 		if ((udp_bits.udpb_recvopts) && opt_len > 0) {
3930 			udi_size += sizeof (struct T_opthdr) + opt_len;
3931 			UDP_STAT(us, udp_in_recvopts);
3932 		}
3933 
3934 		/*
3935 		 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
3936 		 * space accordingly
3937 		 */
3938 		if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
3939 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3940 			udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
3941 			UDP_STAT(us, udp_in_recvif);
3942 		}
3943 
3944 		if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
3945 		    (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
3946 			udi_size += sizeof (struct T_opthdr) +
3947 			    sizeof (struct sockaddr_dl);
3948 			UDP_STAT(us, udp_in_recvslla);
3949 		}
3950 
3951 		if ((udp_bits.udpb_recvucred) &&
3952 		    (cr = msg_getcred(mp, &cpid)) != NULL) {
3953 			udi_size += sizeof (struct T_opthdr) + ucredsize;
3954 			UDP_STAT(us, udp_in_recvucred);
3955 		}
3956 
3957 		/*
3958 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3959 		 * buffer. Since gethrestime() expects a pointer aligned
3960 		 * argument, we allocate space necessary for extra
3961 		 * alignment (even though it might not be used).
3962 		 */
3963 		if (udp_bits.udpb_timestamp) {
3964 			udi_size += sizeof (struct T_opthdr) +
3965 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3966 			UDP_STAT(us, udp_in_timestamp);
3967 		}
3968 
3969 		/*
3970 		 * If IP_RECVTTL is set allocate the appropriate sized buffer
3971 		 */
3972 		if (udp_bits.udpb_recvttl) {
3973 			udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
3974 			UDP_STAT(us, udp_in_recvttl);
3975 		}
3976 
3977 		/* Allocate a message block for the T_UNITDATA_IND structure. */
3978 		mp1 = allocb(udi_size, BPRI_MED);
3979 		if (mp1 == NULL) {
3980 			freemsg(mp);
3981 			if (options_mp != NULL)
3982 				freeb(options_mp);
3983 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
3984 			return;
3985 		}
3986 		mp1->b_cont = mp;
3987 		mp = mp1;
3988 		mp->b_datap->db_type = M_PROTO;
3989 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3990 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3991 		tudi->PRIM_type = T_UNITDATA_IND;
3992 		tudi->SRC_length = sizeof (sin_t);
3993 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3994 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
3995 		    sizeof (sin_t);
3996 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3997 		tudi->OPT_length = udi_size;
3998 		sin = (sin_t *)&tudi[1];
3999 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
4000 		sin->sin_port =	udpha->uha_src_port;
4001 		sin->sin_family = udp->udp_family;
4002 		*(uint32_t *)&sin->sin_zero[0] = 0;
4003 		*(uint32_t *)&sin->sin_zero[4] = 0;
4004 
4005 		/*
4006 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
4007 		 * IP_RECVTTL has been set.
4008 		 */
4009 		if (udi_size != 0) {
4010 			/*
4011 			 * Copy in destination address before options to avoid
4012 			 * any padding issues.
4013 			 */
4014 			char *dstopt;
4015 
4016 			dstopt = (char *)&sin[1];
4017 			if (udp_bits.udpb_recvdstaddr) {
4018 				struct T_opthdr *toh;
4019 				ipaddr_t *dstptr;
4020 
4021 				toh = (struct T_opthdr *)dstopt;
4022 				toh->level = IPPROTO_IP;
4023 				toh->name = IP_RECVDSTADDR;
4024 				toh->len = sizeof (struct T_opthdr) +
4025 				    sizeof (ipaddr_t);
4026 				toh->status = 0;
4027 				dstopt += sizeof (struct T_opthdr);
4028 				dstptr = (ipaddr_t *)dstopt;
4029 				*dstptr = ((ipha_t *)rptr)->ipha_dst;
4030 				dstopt += sizeof (ipaddr_t);
4031 				udi_size -= toh->len;
4032 			}
4033 
4034 			if (udp_bits.udpb_recvopts && opt_len > 0) {
4035 				struct T_opthdr *toh;
4036 
4037 				toh = (struct T_opthdr *)dstopt;
4038 				toh->level = IPPROTO_IP;
4039 				toh->name = IP_RECVOPTS;
4040 				toh->len = sizeof (struct T_opthdr) + opt_len;
4041 				toh->status = 0;
4042 				dstopt += sizeof (struct T_opthdr);
4043 				bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt,
4044 				    opt_len);
4045 				dstopt += opt_len;
4046 				udi_size -= toh->len;
4047 			}
4048 
4049 			if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) &&
4050 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
4051 				struct T_opthdr *toh;
4052 				struct in_pktinfo *pktinfop;
4053 
4054 				toh = (struct T_opthdr *)dstopt;
4055 				toh->level = IPPROTO_IP;
4056 				toh->name = IP_PKTINFO;
4057 				toh->len = sizeof (struct T_opthdr) +
4058 				    sizeof (*pktinfop);
4059 				toh->status = 0;
4060 				dstopt += sizeof (struct T_opthdr);
4061 				pktinfop = (struct in_pktinfo *)dstopt;
4062 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
4063 				pktinfop->ipi_spec_dst =
4064 				    pinfo->ip_pkt_match_addr;
4065 				pktinfop->ipi_addr.s_addr =
4066 				    ((ipha_t *)rptr)->ipha_dst;
4067 
4068 				dstopt += sizeof (struct in_pktinfo);
4069 				udi_size -= toh->len;
4070 			}
4071 
4072 			if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
4073 			    (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
4074 
4075 				struct T_opthdr *toh;
4076 				struct sockaddr_dl	*dstptr;
4077 
4078 				toh = (struct T_opthdr *)dstopt;
4079 				toh->level = IPPROTO_IP;
4080 				toh->name = IP_RECVSLLA;
4081 				toh->len = sizeof (struct T_opthdr) +
4082 				    sizeof (struct sockaddr_dl);
4083 				toh->status = 0;
4084 				dstopt += sizeof (struct T_opthdr);
4085 				dstptr = (struct sockaddr_dl *)dstopt;
4086 				bcopy(&pinfo->ip_pkt_slla, dstptr,
4087 				    sizeof (struct sockaddr_dl));
4088 				dstopt += sizeof (struct sockaddr_dl);
4089 				udi_size -= toh->len;
4090 			}
4091 
4092 			if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
4093 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
4094 
4095 				struct T_opthdr *toh;
4096 				uint_t		*dstptr;
4097 
4098 				toh = (struct T_opthdr *)dstopt;
4099 				toh->level = IPPROTO_IP;
4100 				toh->name = IP_RECVIF;
4101 				toh->len = sizeof (struct T_opthdr) +
4102 				    sizeof (uint_t);
4103 				toh->status = 0;
4104 				dstopt += sizeof (struct T_opthdr);
4105 				dstptr = (uint_t *)dstopt;
4106 				*dstptr = pinfo->ip_pkt_ifindex;
4107 				dstopt += sizeof (uint_t);
4108 				udi_size -= toh->len;
4109 			}
4110 
4111 			if (cr != NULL) {
4112 				struct T_opthdr *toh;
4113 
4114 				toh = (struct T_opthdr *)dstopt;
4115 				toh->level = SOL_SOCKET;
4116 				toh->name = SCM_UCRED;
4117 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4118 				toh->status = 0;
4119 				dstopt += sizeof (struct T_opthdr);
4120 				(void) cred2ucred(cr, cpid, dstopt, rcr);
4121 				dstopt += ucredsize;
4122 				udi_size -= toh->len;
4123 			}
4124 
4125 			if (udp_bits.udpb_timestamp) {
4126 				struct	T_opthdr *toh;
4127 
4128 				toh = (struct T_opthdr *)dstopt;
4129 				toh->level = SOL_SOCKET;
4130 				toh->name = SCM_TIMESTAMP;
4131 				toh->len = sizeof (struct T_opthdr) +
4132 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4133 				toh->status = 0;
4134 				dstopt += sizeof (struct T_opthdr);
4135 				/* Align for gethrestime() */
4136 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
4137 				    sizeof (intptr_t));
4138 				gethrestime((timestruc_t *)dstopt);
4139 				dstopt = (char *)toh + toh->len;
4140 				udi_size -= toh->len;
4141 			}
4142 
4143 			/*
4144 			 * CAUTION:
4145 			 * Due to aligment issues
4146 			 * Processing of IP_RECVTTL option
4147 			 * should always be the last. Adding
4148 			 * any option processing after this will
4149 			 * cause alignment panic.
4150 			 */
4151 			if (udp_bits.udpb_recvttl) {
4152 				struct	T_opthdr *toh;
4153 				uint8_t	*dstptr;
4154 
4155 				toh = (struct T_opthdr *)dstopt;
4156 				toh->level = IPPROTO_IP;
4157 				toh->name = IP_RECVTTL;
4158 				toh->len = sizeof (struct T_opthdr) +
4159 				    sizeof (uint8_t);
4160 				toh->status = 0;
4161 				dstopt += sizeof (struct T_opthdr);
4162 				dstptr = (uint8_t *)dstopt;
4163 				*dstptr = ((ipha_t *)rptr)->ipha_ttl;
4164 				dstopt += sizeof (uint8_t);
4165 				udi_size -= toh->len;
4166 			}
4167 
4168 			/* Consumed all of allocated space */
4169 			ASSERT(udi_size == 0);
4170 		}
4171 	} else {
4172 		sin6_t *sin6;
4173 
4174 		/*
4175 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
4176 		 *
4177 		 * Normally we only send up the address. If receiving of any
4178 		 * optional receive side information is enabled, we also send
4179 		 * that up as options.
4180 		 */
4181 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
4182 
4183 		if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
4184 		    IPPF_RTHDR|IPPF_IFINDEX)) {
4185 			if ((udp_bits.udpb_ipv6_recvhopopts) &&
4186 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4187 				size_t hlen;
4188 
4189 				UDP_STAT(us, udp_in_recvhopopts);
4190 				hlen = copy_hop_opts(&ipp, NULL);
4191 				if (hlen == 0)
4192 					ipp.ipp_fields &= ~IPPF_HOPOPTS;
4193 				udi_size += hlen;
4194 			}
4195 			if (((udp_bits.udpb_ipv6_recvdstopts) ||
4196 			    udp_bits.udpb_old_ipv6_recvdstopts) &&
4197 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4198 				udi_size += sizeof (struct T_opthdr) +
4199 				    ipp.ipp_dstoptslen;
4200 				UDP_STAT(us, udp_in_recvdstopts);
4201 			}
4202 			if ((((udp_bits.udpb_ipv6_recvdstopts) &&
4203 			    udp_bits.udpb_ipv6_recvrthdr &&
4204 			    (ipp.ipp_fields & IPPF_RTHDR)) ||
4205 			    (udp_bits.udpb_ipv6_recvrthdrdstopts)) &&
4206 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4207 				udi_size += sizeof (struct T_opthdr) +
4208 				    ipp.ipp_rtdstoptslen;
4209 				UDP_STAT(us, udp_in_recvrtdstopts);
4210 			}
4211 			if ((udp_bits.udpb_ipv6_recvrthdr) &&
4212 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4213 				udi_size += sizeof (struct T_opthdr) +
4214 				    ipp.ipp_rthdrlen;
4215 				UDP_STAT(us, udp_in_recvrthdr);
4216 			}
4217 			if ((udp_bits.udpb_ip_recvpktinfo) &&
4218 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4219 				udi_size += sizeof (struct T_opthdr) +
4220 				    sizeof (struct in6_pktinfo);
4221 				UDP_STAT(us, udp_in_recvpktinfo);
4222 			}
4223 
4224 		}
4225 		if ((udp_bits.udpb_recvucred) &&
4226 		    (cr = msg_getcred(mp, &cpid)) != NULL) {
4227 			udi_size += sizeof (struct T_opthdr) + ucredsize;
4228 			UDP_STAT(us, udp_in_recvucred);
4229 		}
4230 
4231 		/*
4232 		 * If SO_TIMESTAMP is set allocate the appropriate sized
4233 		 * buffer. Since gethrestime() expects a pointer aligned
4234 		 * argument, we allocate space necessary for extra
4235 		 * alignment (even though it might not be used).
4236 		 */
4237 		if (udp_bits.udpb_timestamp) {
4238 			udi_size += sizeof (struct T_opthdr) +
4239 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4240 			UDP_STAT(us, udp_in_timestamp);
4241 		}
4242 
4243 		if (udp_bits.udpb_ipv6_recvhoplimit) {
4244 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4245 			UDP_STAT(us, udp_in_recvhoplimit);
4246 		}
4247 
4248 		if (udp_bits.udpb_ipv6_recvtclass) {
4249 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4250 			UDP_STAT(us, udp_in_recvtclass);
4251 		}
4252 
4253 		mp1 = allocb(udi_size, BPRI_MED);
4254 		if (mp1 == NULL) {
4255 			freemsg(mp);
4256 			if (options_mp != NULL)
4257 				freeb(options_mp);
4258 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
4259 			return;
4260 		}
4261 		mp1->b_cont = mp;
4262 		mp = mp1;
4263 		mp->b_datap->db_type = M_PROTO;
4264 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
4265 		mp->b_wptr = (uchar_t *)tudi + udi_size;
4266 		tudi->PRIM_type = T_UNITDATA_IND;
4267 		tudi->SRC_length = sizeof (sin6_t);
4268 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
4269 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
4270 		    sizeof (sin6_t);
4271 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
4272 		tudi->OPT_length = udi_size;
4273 		sin6 = (sin6_t *)&tudi[1];
4274 		if (ipversion == IPV4_VERSION) {
4275 			in6_addr_t v6dst;
4276 
4277 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
4278 			    &sin6->sin6_addr);
4279 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
4280 			    &v6dst);
4281 			sin6->sin6_flowinfo = 0;
4282 			sin6->sin6_scope_id = 0;
4283 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
4284 			    connp->conn_zoneid, us->us_netstack);
4285 		} else {
4286 			sin6->sin6_addr = ip6h->ip6_src;
4287 			/* No sin6_flowinfo per API */
4288 			sin6->sin6_flowinfo = 0;
4289 			/* For link-scope source pass up scope id */
4290 			if ((ipp.ipp_fields & IPPF_IFINDEX) &&
4291 			    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
4292 				sin6->sin6_scope_id = ipp.ipp_ifindex;
4293 			else
4294 				sin6->sin6_scope_id = 0;
4295 			sin6->__sin6_src_id = ip_srcid_find_addr(
4296 			    &ip6h->ip6_dst, connp->conn_zoneid,
4297 			    us->us_netstack);
4298 		}
4299 		sin6->sin6_port = udpha->uha_src_port;
4300 		sin6->sin6_family = udp->udp_family;
4301 
4302 		if (udi_size != 0) {
4303 			uchar_t *dstopt;
4304 
4305 			dstopt = (uchar_t *)&sin6[1];
4306 			if ((udp_bits.udpb_ip_recvpktinfo) &&
4307 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4308 				struct T_opthdr *toh;
4309 				struct in6_pktinfo *pkti;
4310 
4311 				toh = (struct T_opthdr *)dstopt;
4312 				toh->level = IPPROTO_IPV6;
4313 				toh->name = IPV6_PKTINFO;
4314 				toh->len = sizeof (struct T_opthdr) +
4315 				    sizeof (*pkti);
4316 				toh->status = 0;
4317 				dstopt += sizeof (struct T_opthdr);
4318 				pkti = (struct in6_pktinfo *)dstopt;
4319 				if (ipversion == IPV6_VERSION)
4320 					pkti->ipi6_addr = ip6h->ip6_dst;
4321 				else
4322 					IN6_IPADDR_TO_V4MAPPED(
4323 					    ((ipha_t *)rptr)->ipha_dst,
4324 					    &pkti->ipi6_addr);
4325 				pkti->ipi6_ifindex = ipp.ipp_ifindex;
4326 				dstopt += sizeof (*pkti);
4327 				udi_size -= toh->len;
4328 			}
4329 			if (udp_bits.udpb_ipv6_recvhoplimit) {
4330 				struct T_opthdr *toh;
4331 
4332 				toh = (struct T_opthdr *)dstopt;
4333 				toh->level = IPPROTO_IPV6;
4334 				toh->name = IPV6_HOPLIMIT;
4335 				toh->len = sizeof (struct T_opthdr) +
4336 				    sizeof (uint_t);
4337 				toh->status = 0;
4338 				dstopt += sizeof (struct T_opthdr);
4339 				if (ipversion == IPV6_VERSION)
4340 					*(uint_t *)dstopt = ip6h->ip6_hops;
4341 				else
4342 					*(uint_t *)dstopt =
4343 					    ((ipha_t *)rptr)->ipha_ttl;
4344 				dstopt += sizeof (uint_t);
4345 				udi_size -= toh->len;
4346 			}
4347 			if (udp_bits.udpb_ipv6_recvtclass) {
4348 				struct T_opthdr *toh;
4349 
4350 				toh = (struct T_opthdr *)dstopt;
4351 				toh->level = IPPROTO_IPV6;
4352 				toh->name = IPV6_TCLASS;
4353 				toh->len = sizeof (struct T_opthdr) +
4354 				    sizeof (uint_t);
4355 				toh->status = 0;
4356 				dstopt += sizeof (struct T_opthdr);
4357 				if (ipversion == IPV6_VERSION) {
4358 					*(uint_t *)dstopt =
4359 					    IPV6_FLOW_TCLASS(ip6h->ip6_flow);
4360 				} else {
4361 					ipha_t *ipha = (ipha_t *)rptr;
4362 					*(uint_t *)dstopt =
4363 					    ipha->ipha_type_of_service;
4364 				}
4365 				dstopt += sizeof (uint_t);
4366 				udi_size -= toh->len;
4367 			}
4368 			if ((udp_bits.udpb_ipv6_recvhopopts) &&
4369 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4370 				size_t hlen;
4371 
4372 				hlen = copy_hop_opts(&ipp, dstopt);
4373 				dstopt += hlen;
4374 				udi_size -= hlen;
4375 			}
4376 			if ((udp_bits.udpb_ipv6_recvdstopts) &&
4377 			    (udp_bits.udpb_ipv6_recvrthdr) &&
4378 			    (ipp.ipp_fields & IPPF_RTHDR) &&
4379 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4380 				struct T_opthdr *toh;
4381 
4382 				toh = (struct T_opthdr *)dstopt;
4383 				toh->level = IPPROTO_IPV6;
4384 				toh->name = IPV6_DSTOPTS;
4385 				toh->len = sizeof (struct T_opthdr) +
4386 				    ipp.ipp_rtdstoptslen;
4387 				toh->status = 0;
4388 				dstopt += sizeof (struct T_opthdr);
4389 				bcopy(ipp.ipp_rtdstopts, dstopt,
4390 				    ipp.ipp_rtdstoptslen);
4391 				dstopt += ipp.ipp_rtdstoptslen;
4392 				udi_size -= toh->len;
4393 			}
4394 			if ((udp_bits.udpb_ipv6_recvrthdr) &&
4395 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4396 				struct T_opthdr *toh;
4397 
4398 				toh = (struct T_opthdr *)dstopt;
4399 				toh->level = IPPROTO_IPV6;
4400 				toh->name = IPV6_RTHDR;
4401 				toh->len = sizeof (struct T_opthdr) +
4402 				    ipp.ipp_rthdrlen;
4403 				toh->status = 0;
4404 				dstopt += sizeof (struct T_opthdr);
4405 				bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
4406 				dstopt += ipp.ipp_rthdrlen;
4407 				udi_size -= toh->len;
4408 			}
4409 			if ((udp_bits.udpb_ipv6_recvdstopts) &&
4410 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4411 				struct T_opthdr *toh;
4412 
4413 				toh = (struct T_opthdr *)dstopt;
4414 				toh->level = IPPROTO_IPV6;
4415 				toh->name = IPV6_DSTOPTS;
4416 				toh->len = sizeof (struct T_opthdr) +
4417 				    ipp.ipp_dstoptslen;
4418 				toh->status = 0;
4419 				dstopt += sizeof (struct T_opthdr);
4420 				bcopy(ipp.ipp_dstopts, dstopt,
4421 				    ipp.ipp_dstoptslen);
4422 				dstopt += ipp.ipp_dstoptslen;
4423 				udi_size -= toh->len;
4424 			}
4425 			if (cr != NULL) {
4426 				struct T_opthdr *toh;
4427 
4428 				toh = (struct T_opthdr *)dstopt;
4429 				toh->level = SOL_SOCKET;
4430 				toh->name = SCM_UCRED;
4431 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4432 				toh->status = 0;
4433 				(void) cred2ucred(cr, cpid, &toh[1], rcr);
4434 				dstopt += toh->len;
4435 				udi_size -= toh->len;
4436 			}
4437 			if (udp_bits.udpb_timestamp) {
4438 				struct	T_opthdr *toh;
4439 
4440 				toh = (struct T_opthdr *)dstopt;
4441 				toh->level = SOL_SOCKET;
4442 				toh->name = SCM_TIMESTAMP;
4443 				toh->len = sizeof (struct T_opthdr) +
4444 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4445 				toh->status = 0;
4446 				dstopt += sizeof (struct T_opthdr);
4447 				/* Align for gethrestime() */
4448 				dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
4449 				    sizeof (intptr_t));
4450 				gethrestime((timestruc_t *)dstopt);
4451 				dstopt = (uchar_t *)toh + toh->len;
4452 				udi_size -= toh->len;
4453 			}
4454 
4455 			/* Consumed all of allocated space */
4456 			ASSERT(udi_size == 0);
4457 		}
4458 #undef	sin6
4459 		/* No IP_RECVDSTADDR for IPv6. */
4460 	}
4461 
4462 	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
4463 	if (options_mp != NULL)
4464 		freeb(options_mp);
4465 
4466 	if (IPCL_IS_NONSTR(connp)) {
4467 		int error;
4468 
4469 		if ((*connp->conn_upcalls->su_recv)
4470 		    (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
4471 		    NULL) < 0) {
4472 			mutex_enter(&udp->udp_recv_lock);
4473 			if (error == ENOSPC) {
4474 				/*
4475 				 * let's confirm while holding the lock
4476 				 */
4477 				if ((*connp->conn_upcalls->su_recv)
4478 				    (connp->conn_upper_handle, NULL, 0, 0,
4479 				    &error, NULL) < 0) {
4480 					if (error == ENOSPC) {
4481 						connp->conn_flow_cntrld =
4482 						    B_TRUE;
4483 					} else {
4484 						ASSERT(error == EOPNOTSUPP);
4485 					}
4486 				}
4487 				mutex_exit(&udp->udp_recv_lock);
4488 			} else {
4489 				ASSERT(error == EOPNOTSUPP);
4490 				udp_queue_fallback(udp, mp);
4491 			}
4492 		}
4493 	} else {
4494 		putnext(connp->conn_rq, mp);
4495 	}
4496 	ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
4497 	return;
4498 
4499 tossit:
4500 	freemsg(mp);
4501 	if (options_mp != NULL)
4502 		freeb(options_mp);
4503 	BUMP_MIB(&us->us_udp_mib, udpInErrors);
4504 }
4505 
4506 /*
4507  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
4508  * information that can be changing beneath us.
4509  */
4510 mblk_t *
4511 udp_snmp_get(queue_t *q, mblk_t *mpctl)
4512 {
4513 	mblk_t			*mpdata;
4514 	mblk_t			*mp_conn_ctl;
4515 	mblk_t			*mp_attr_ctl;
4516 	mblk_t			*mp6_conn_ctl;
4517 	mblk_t			*mp6_attr_ctl;
4518 	mblk_t			*mp_conn_tail;
4519 	mblk_t			*mp_attr_tail;
4520 	mblk_t			*mp6_conn_tail;
4521 	mblk_t			*mp6_attr_tail;
4522 	struct opthdr		*optp;
4523 	mib2_udpEntry_t		ude;
4524 	mib2_udp6Entry_t	ude6;
4525 	mib2_transportMLPEntry_t mlp;
4526 	int			state;
4527 	zoneid_t		zoneid;
4528 	int			i;
4529 	connf_t			*connfp;
4530 	conn_t			*connp = Q_TO_CONN(q);
4531 	int			v4_conn_idx;
4532 	int			v6_conn_idx;
4533 	boolean_t		needattr;
4534 	udp_t			*udp;
4535 	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
4536 	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
4537 	mblk_t			*mp2ctl;
4538 
4539 	/*
4540 	 * make a copy of the original message
4541 	 */
4542 	mp2ctl = copymsg(mpctl);
4543 
4544 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
4545 	if (mpctl == NULL ||
4546 	    (mpdata = mpctl->b_cont) == NULL ||
4547 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
4548 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
4549 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
4550 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
4551 		freemsg(mp_conn_ctl);
4552 		freemsg(mp_attr_ctl);
4553 		freemsg(mp6_conn_ctl);
4554 		freemsg(mpctl);
4555 		freemsg(mp2ctl);
4556 		return (0);
4557 	}
4558 
4559 	zoneid = connp->conn_zoneid;
4560 
4561 	/* fixed length structure for IPv4 and IPv6 counters */
4562 	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
4563 	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
4564 	/* synchronize 64- and 32-bit counters */
4565 	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
4566 	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
4567 
4568 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4569 	optp->level = MIB2_UDP;
4570 	optp->name = 0;
4571 	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
4572 	    sizeof (us->us_udp_mib));
4573 	optp->len = msgdsize(mpdata);
4574 	qreply(q, mpctl);
4575 
4576 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
4577 	v4_conn_idx = v6_conn_idx = 0;
4578 
4579 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4580 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4581 		connp = NULL;
4582 
4583 		while ((connp = ipcl_get_next_conn(connfp, connp,
4584 		    IPCL_UDPCONN))) {
4585 			udp = connp->conn_udp;
4586 			if (zoneid != connp->conn_zoneid)
4587 				continue;
4588 
4589 			/*
4590 			 * Note that the port numbers are sent in
4591 			 * host byte order
4592 			 */
4593 
4594 			if (udp->udp_state == TS_UNBND)
4595 				state = MIB2_UDP_unbound;
4596 			else if (udp->udp_state == TS_IDLE)
4597 				state = MIB2_UDP_idle;
4598 			else if (udp->udp_state == TS_DATA_XFER)
4599 				state = MIB2_UDP_connected;
4600 			else
4601 				state = MIB2_UDP_unknown;
4602 
4603 			needattr = B_FALSE;
4604 			bzero(&mlp, sizeof (mlp));
4605 			if (connp->conn_mlp_type != mlptSingle) {
4606 				if (connp->conn_mlp_type == mlptShared ||
4607 				    connp->conn_mlp_type == mlptBoth)
4608 					mlp.tme_flags |= MIB2_TMEF_SHARED;
4609 				if (connp->conn_mlp_type == mlptPrivate ||
4610 				    connp->conn_mlp_type == mlptBoth)
4611 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
4612 				needattr = B_TRUE;
4613 			}
4614 
4615 			/*
4616 			 * Create an IPv4 table entry for IPv4 entries and also
4617 			 * any IPv6 entries which are bound to in6addr_any
4618 			 * (i.e. anything a IPv4 peer could connect/send to).
4619 			 */
4620 			if (udp->udp_ipversion == IPV4_VERSION ||
4621 			    (udp->udp_state <= TS_IDLE &&
4622 			    IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) {
4623 				ude.udpEntryInfo.ue_state = state;
4624 				/*
4625 				 * If in6addr_any this will set it to
4626 				 * INADDR_ANY
4627 				 */
4628 				ude.udpLocalAddress =
4629 				    V4_PART_OF_V6(udp->udp_v6src);
4630 				ude.udpLocalPort = ntohs(udp->udp_port);
4631 				if (udp->udp_state == TS_DATA_XFER) {
4632 					/*
4633 					 * Can potentially get here for
4634 					 * v6 socket if another process
4635 					 * (say, ping) has just done a
4636 					 * sendto(), changing the state
4637 					 * from the TS_IDLE above to
4638 					 * TS_DATA_XFER by the time we hit
4639 					 * this part of the code.
4640 					 */
4641 					ude.udpEntryInfo.ue_RemoteAddress =
4642 					    V4_PART_OF_V6(udp->udp_v6dst);
4643 					ude.udpEntryInfo.ue_RemotePort =
4644 					    ntohs(udp->udp_dstport);
4645 				} else {
4646 					ude.udpEntryInfo.ue_RemoteAddress = 0;
4647 					ude.udpEntryInfo.ue_RemotePort = 0;
4648 				}
4649 
4650 				/*
4651 				 * We make the assumption that all udp_t
4652 				 * structs will be created within an address
4653 				 * region no larger than 32-bits.
4654 				 */
4655 				ude.udpInstance = (uint32_t)(uintptr_t)udp;
4656 				ude.udpCreationProcess =
4657 				    (udp->udp_open_pid < 0) ?
4658 				    MIB2_UNKNOWN_PROCESS :
4659 				    udp->udp_open_pid;
4660 				ude.udpCreationTime = udp->udp_open_time;
4661 
4662 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
4663 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
4664 				mlp.tme_connidx = v4_conn_idx++;
4665 				if (needattr)
4666 					(void) snmp_append_data2(
4667 					    mp_attr_ctl->b_cont, &mp_attr_tail,
4668 					    (char *)&mlp, sizeof (mlp));
4669 			}
4670 			if (udp->udp_ipversion == IPV6_VERSION) {
4671 				ude6.udp6EntryInfo.ue_state  = state;
4672 				ude6.udp6LocalAddress = udp->udp_v6src;
4673 				ude6.udp6LocalPort = ntohs(udp->udp_port);
4674 				ude6.udp6IfIndex = udp->udp_bound_if;
4675 				if (udp->udp_state == TS_DATA_XFER) {
4676 					ude6.udp6EntryInfo.ue_RemoteAddress =
4677 					    udp->udp_v6dst;
4678 					ude6.udp6EntryInfo.ue_RemotePort =
4679 					    ntohs(udp->udp_dstport);
4680 				} else {
4681 					ude6.udp6EntryInfo.ue_RemoteAddress =
4682 					    sin6_null.sin6_addr;
4683 					ude6.udp6EntryInfo.ue_RemotePort = 0;
4684 				}
4685 				/*
4686 				 * We make the assumption that all udp_t
4687 				 * structs will be created within an address
4688 				 * region no larger than 32-bits.
4689 				 */
4690 				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
4691 				ude6.udp6CreationProcess =
4692 				    (udp->udp_open_pid < 0) ?
4693 				    MIB2_UNKNOWN_PROCESS :
4694 				    udp->udp_open_pid;
4695 				ude6.udp6CreationTime = udp->udp_open_time;
4696 
4697 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
4698 				    &mp6_conn_tail, (char *)&ude6,
4699 				    sizeof (ude6));
4700 				mlp.tme_connidx = v6_conn_idx++;
4701 				if (needattr)
4702 					(void) snmp_append_data2(
4703 					    mp6_attr_ctl->b_cont,
4704 					    &mp6_attr_tail, (char *)&mlp,
4705 					    sizeof (mlp));
4706 			}
4707 		}
4708 	}
4709 
4710 	/* IPv4 UDP endpoints */
4711 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
4712 	    sizeof (struct T_optmgmt_ack)];
4713 	optp->level = MIB2_UDP;
4714 	optp->name = MIB2_UDP_ENTRY;
4715 	optp->len = msgdsize(mp_conn_ctl->b_cont);
4716 	qreply(q, mp_conn_ctl);
4717 
4718 	/* table of MLP attributes... */
4719 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
4720 	    sizeof (struct T_optmgmt_ack)];
4721 	optp->level = MIB2_UDP;
4722 	optp->name = EXPER_XPORT_MLP;
4723 	optp->len = msgdsize(mp_attr_ctl->b_cont);
4724 	if (optp->len == 0)
4725 		freemsg(mp_attr_ctl);
4726 	else
4727 		qreply(q, mp_attr_ctl);
4728 
4729 	/* IPv6 UDP endpoints */
4730 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
4731 	    sizeof (struct T_optmgmt_ack)];
4732 	optp->level = MIB2_UDP6;
4733 	optp->name = MIB2_UDP6_ENTRY;
4734 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
4735 	qreply(q, mp6_conn_ctl);
4736 
4737 	/* table of MLP attributes... */
4738 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
4739 	    sizeof (struct T_optmgmt_ack)];
4740 	optp->level = MIB2_UDP6;
4741 	optp->name = EXPER_XPORT_MLP;
4742 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
4743 	if (optp->len == 0)
4744 		freemsg(mp6_attr_ctl);
4745 	else
4746 		qreply(q, mp6_attr_ctl);
4747 
4748 	return (mp2ctl);
4749 }
4750 
4751 /*
4752  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
4753  * NOTE: Per MIB-II, UDP has no writable data.
4754  * TODO:  If this ever actually tries to set anything, it needs to be
4755  * to do the appropriate locking.
4756  */
4757 /* ARGSUSED */
4758 int
4759 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4760     uchar_t *ptr, int len)
4761 {
4762 	switch (level) {
4763 	case MIB2_UDP:
4764 		return (0);
4765 	default:
4766 		return (1);
4767 	}
4768 }
4769 
4770 static void
4771 udp_report_item(mblk_t *mp, udp_t *udp)
4772 {
4773 	char *state;
4774 	char addrbuf1[INET6_ADDRSTRLEN];
4775 	char addrbuf2[INET6_ADDRSTRLEN];
4776 	uint_t print_len, buf_len;
4777 
4778 	buf_len = mp->b_datap->db_lim - mp->b_wptr;
4779 	ASSERT(buf_len >= 0);
4780 	if (buf_len == 0)
4781 		return;
4782 
4783 	if (udp->udp_state == TS_UNBND)
4784 		state = "UNBOUND";
4785 	else if (udp->udp_state == TS_IDLE)
4786 		state = "IDLE";
4787 	else if (udp->udp_state == TS_DATA_XFER)
4788 		state = "CONNECTED";
4789 	else
4790 		state = "UnkState";
4791 	print_len = snprintf((char *)mp->b_wptr, buf_len,
4792 	    MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n",
4793 	    (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port),
4794 	    inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)),
4795 	    inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)),
4796 	    ntohs(udp->udp_dstport), state);
4797 	if (print_len < buf_len) {
4798 		mp->b_wptr += print_len;
4799 	} else {
4800 		mp->b_wptr += buf_len;
4801 	}
4802 }
4803 
4804 /* Report for ndd "udp_status" */
4805 /* ARGSUSED */
4806 static int
4807 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4808 {
4809 	zoneid_t zoneid;
4810 	connf_t	*connfp;
4811 	conn_t	*connp = Q_TO_CONN(q);
4812 	udp_t	*udp = connp->conn_udp;
4813 	int	i;
4814 	udp_stack_t *us = udp->udp_us;
4815 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4816 
4817 	/*
4818 	 * Because of the ndd constraint, at most we can have 64K buffer
4819 	 * to put in all UDP info.  So to be more efficient, just
4820 	 * allocate a 64K buffer here, assuming we need that large buffer.
4821 	 * This may be a problem as any user can read udp_status.  Therefore
4822 	 * we limit the rate of doing this using us_ndd_get_info_interval.
4823 	 * This should be OK as normal users should not do this too often.
4824 	 */
4825 	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
4826 		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
4827 		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
4828 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
4829 			return (0);
4830 		}
4831 	}
4832 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
4833 		/* The following may work even if we cannot get a large buf. */
4834 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
4835 		return (0);
4836 	}
4837 	(void) mi_mpprintf(mp,
4838 	    "UDP     " MI_COL_HDRPAD_STR
4839 	/*   12345678[89ABCDEF] */
4840 	    " zone lport src addr        dest addr       port  state");
4841 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
4842 
4843 	zoneid = connp->conn_zoneid;
4844 
4845 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4846 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4847 		connp = NULL;
4848 
4849 		while ((connp = ipcl_get_next_conn(connfp, connp,
4850 		    IPCL_UDPCONN))) {
4851 			udp = connp->conn_udp;
4852 			if (zoneid != GLOBAL_ZONEID &&
4853 			    zoneid != connp->conn_zoneid)
4854 				continue;
4855 
4856 			udp_report_item(mp->b_cont, udp);
4857 		}
4858 	}
4859 	us->us_last_ndd_get_info_time = ddi_get_lbolt();
4860 	return (0);
4861 }
4862 
4863 /*
4864  * This routine creates a T_UDERROR_IND message and passes it upstream.
4865  * The address and options are copied from the T_UNITDATA_REQ message
4866  * passed in mp.  This message is freed.
4867  */
4868 static void
4869 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen,
4870     t_scalar_t err)
4871 {
4872 	struct T_unitdata_req *tudr;
4873 	mblk_t	*mp1;
4874 	uchar_t	*optaddr;
4875 	t_scalar_t optlen;
4876 
4877 	if (DB_TYPE(mp) == M_DATA) {
4878 		ASSERT(destaddr != NULL && destlen != 0);
4879 		optaddr = NULL;
4880 		optlen = 0;
4881 	} else {
4882 		if ((mp->b_wptr < mp->b_rptr) ||
4883 		    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
4884 			goto done;
4885 		}
4886 		tudr = (struct T_unitdata_req *)mp->b_rptr;
4887 		destaddr = mp->b_rptr + tudr->DEST_offset;
4888 		if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
4889 		    destaddr + tudr->DEST_length < mp->b_rptr ||
4890 		    destaddr + tudr->DEST_length > mp->b_wptr) {
4891 			goto done;
4892 		}
4893 		optaddr = mp->b_rptr + tudr->OPT_offset;
4894 		if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
4895 		    optaddr + tudr->OPT_length < mp->b_rptr ||
4896 		    optaddr + tudr->OPT_length > mp->b_wptr) {
4897 			goto done;
4898 		}
4899 		destlen = tudr->DEST_length;
4900 		optlen = tudr->OPT_length;
4901 	}
4902 
4903 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
4904 	    (char *)optaddr, optlen, err);
4905 	if (mp1 != NULL)
4906 		qreply(q, mp1);
4907 
4908 done:
4909 	freemsg(mp);
4910 }
4911 
4912 /*
4913  * This routine removes a port number association from a stream.  It
4914  * is called by udp_wput to handle T_UNBIND_REQ messages.
4915  */
4916 static void
4917 udp_tpi_unbind(queue_t *q, mblk_t *mp)
4918 {
4919 	conn_t	*connp = Q_TO_CONN(q);
4920 	int	error;
4921 
4922 	error = udp_do_unbind(connp);
4923 	if (error) {
4924 		if (error < 0)
4925 			udp_err_ack(q, mp, -error, 0);
4926 		else
4927 			udp_err_ack(q, mp, TSYSERR, error);
4928 		return;
4929 	}
4930 
4931 	mp = mi_tpi_ok_ack_alloc(mp);
4932 	ASSERT(mp != NULL);
4933 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4934 	qreply(q, mp);
4935 }
4936 
4937 /*
4938  * Don't let port fall into the privileged range.
4939  * Since the extra privileged ports can be arbitrary we also
4940  * ensure that we exclude those from consideration.
4941  * us->us_epriv_ports is not sorted thus we loop over it until
4942  * there are no changes.
4943  */
4944 static in_port_t
4945 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
4946 {
4947 	int i;
4948 	in_port_t nextport;
4949 	boolean_t restart = B_FALSE;
4950 	udp_stack_t *us = udp->udp_us;
4951 
4952 	if (random && udp_random_anon_port != 0) {
4953 		(void) random_get_pseudo_bytes((uint8_t *)&port,
4954 		    sizeof (in_port_t));
4955 		/*
4956 		 * Unless changed by a sys admin, the smallest anon port
4957 		 * is 32768 and the largest anon port is 65535.  It is
4958 		 * very likely (50%) for the random port to be smaller
4959 		 * than the smallest anon port.  When that happens,
4960 		 * add port % (anon port range) to the smallest anon
4961 		 * port to get the random port.  It should fall into the
4962 		 * valid anon port range.
4963 		 */
4964 		if (port < us->us_smallest_anon_port) {
4965 			port = us->us_smallest_anon_port +
4966 			    port % (us->us_largest_anon_port -
4967 			    us->us_smallest_anon_port);
4968 		}
4969 	}
4970 
4971 retry:
4972 	if (port < us->us_smallest_anon_port)
4973 		port = us->us_smallest_anon_port;
4974 
4975 	if (port > us->us_largest_anon_port) {
4976 		port = us->us_smallest_anon_port;
4977 		if (restart)
4978 			return (0);
4979 		restart = B_TRUE;
4980 	}
4981 
4982 	if (port < us->us_smallest_nonpriv_port)
4983 		port = us->us_smallest_nonpriv_port;
4984 
4985 	for (i = 0; i < us->us_num_epriv_ports; i++) {
4986 		if (port == us->us_epriv_ports[i]) {
4987 			port++;
4988 			/*
4989 			 * Make sure that the port is in the
4990 			 * valid range.
4991 			 */
4992 			goto retry;
4993 		}
4994 	}
4995 
4996 	if (is_system_labeled() &&
4997 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
4998 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
4999 		port = nextport;
5000 		goto retry;
5001 	}
5002 
5003 	return (port);
5004 }
5005 
5006 static int
5007 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst,
5008     boolean_t *update_lastdst)
5009 {
5010 	int err;
5011 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
5012 	udp_t *udp = Q_TO_UDP(wq);
5013 	udp_stack_t	*us = udp->udp_us;
5014 	cred_t			*cr;
5015 
5016 	/*
5017 	 * All Solaris components should pass a db_credp
5018 	 * for this message, hence we ASSERT.
5019 	 * On production kernels we return an error to be robust against
5020 	 * random streams modules sitting on top of us.
5021 	 */
5022 	cr = msg_getcred(mp, NULL);
5023 	ASSERT(cr != NULL);
5024 	if (cr == NULL)
5025 		return (EINVAL);
5026 
5027 	/* Note that we use the cred/label from the message to handle MLP */
5028 	err = tsol_compute_label(cr, dst,
5029 	    opt_storage, udp->udp_connp->conn_mac_exempt,
5030 	    us->us_netstack->netstack_ip);
5031 	if (err == 0) {
5032 		err = tsol_update_options(&udp->udp_ip_snd_options,
5033 		    &udp->udp_ip_snd_options_len, &udp->udp_label_len,
5034 		    opt_storage);
5035 	}
5036 	if (err != 0) {
5037 		DTRACE_PROBE4(
5038 		    tx__ip__log__info__updatelabel__udp,
5039 		    char *, "queue(1) failed to update options(2) on mp(3)",
5040 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
5041 	} else {
5042 		*update_lastdst = B_TRUE;
5043 	}
5044 	return (err);
5045 }
5046 
5047 static mblk_t *
5048 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
5049     uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg,
5050     cred_t *cr, pid_t pid)
5051 {
5052 	udp_t		*udp = connp->conn_udp;
5053 	mblk_t		*mp1 = mp;
5054 	mblk_t		*mp2;
5055 	ipha_t		*ipha;
5056 	int		ip_hdr_length;
5057 	uint32_t 	ip_len;
5058 	udpha_t		*udpha;
5059 	boolean_t 	lock_held = B_FALSE;
5060 	in_port_t	uha_src_port;
5061 	udpattrs_t	attrs;
5062 	uchar_t		ip_snd_opt[IP_MAX_OPT_LENGTH];
5063 	uint32_t	ip_snd_opt_len = 0;
5064 	ip4_pkt_t  	pktinfo;
5065 	ip4_pkt_t  	*pktinfop = &pktinfo;
5066 	ip_opt_info_t	optinfo;
5067 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5068 	udp_stack_t	*us = udp->udp_us;
5069 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
5070 	queue_t		*q = connp->conn_wq;
5071 	ire_t		*ire;
5072 	in6_addr_t	v6dst;
5073 	boolean_t	update_lastdst = B_FALSE;
5074 
5075 	*error = 0;
5076 	pktinfop->ip4_ill_index = 0;
5077 	pktinfop->ip4_addr = INADDR_ANY;
5078 	optinfo.ip_opt_flags = 0;
5079 	optinfo.ip_opt_ill_index = 0;
5080 
5081 	if (v4dst == INADDR_ANY)
5082 		v4dst = htonl(INADDR_LOOPBACK);
5083 
5084 	/*
5085 	 * If options passed in, feed it for verification and handling
5086 	 */
5087 	attrs.udpattr_credset = B_FALSE;
5088 	if (IPCL_IS_NONSTR(connp)) {
5089 		if (msg->msg_controllen != 0) {
5090 			attrs.udpattr_ipp4 = pktinfop;
5091 			attrs.udpattr_mb = mp;
5092 
5093 			rw_enter(&udp->udp_rwlock, RW_WRITER);
5094 			*error = process_auxiliary_options(connp,
5095 			    msg->msg_control, msg->msg_controllen,
5096 			    &attrs, &udp_opt_obj, udp_opt_set, cr);
5097 			rw_exit(&udp->udp_rwlock);
5098 			if (*error)
5099 				goto done;
5100 		}
5101 	} else {
5102 		if (DB_TYPE(mp) != M_DATA) {
5103 			mp1 = mp->b_cont;
5104 			if (((struct T_unitdata_req *)
5105 			    mp->b_rptr)->OPT_length != 0) {
5106 				attrs.udpattr_ipp4 = pktinfop;
5107 				attrs.udpattr_mb = mp;
5108 				if (udp_unitdata_opt_process(q, mp, error,
5109 				    &attrs) < 0)
5110 					goto done;
5111 				/*
5112 				 * Note: success in processing options.
5113 				 * mp option buffer represented by
5114 				 * OPT_length/offset now potentially modified
5115 				 * and contain option setting results
5116 				 */
5117 				ASSERT(*error == 0);
5118 			}
5119 		}
5120 	}
5121 
5122 	/* mp1 points to the M_DATA mblk carrying the packet */
5123 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
5124 
5125 	/*
5126 	 * Determine whether we need to mark the mblk with the user's
5127 	 * credentials.
5128 	 * If labeled then sockfs would have already done this.
5129 	 */
5130 	ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
5131 
5132 	ire = connp->conn_ire_cache;
5133 	if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) ||
5134 	    (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) {
5135 		if (cr != NULL && msg_getcred(mp, NULL) == NULL)
5136 			mblk_setcred(mp, cr, pid);
5137 	}
5138 
5139 	rw_enter(&udp->udp_rwlock, RW_READER);
5140 	lock_held = B_TRUE;
5141 
5142 	/*
5143 	 * Cluster and TSOL note:
5144 	 *    udp.udp_v6lastdst		is shared by Cluster and TSOL
5145 	 *    udp.udp_lastdstport	is used by Cluster
5146 	 *
5147 	 * Both Cluster and TSOL need to update the dest addr and/or port.
5148 	 * Updating is done after both Cluster and TSOL checks, protected
5149 	 * by conn_lock.
5150 	 */
5151 	mutex_enter(&connp->conn_lock);
5152 
5153 	if (cl_inet_connect2 != NULL &&
5154 	    (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
5155 	    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
5156 	    udp->udp_lastdstport != port)) {
5157 		mutex_exit(&connp->conn_lock);
5158 		*error = 0;
5159 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5160 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error);
5161 		if (*error != 0) {
5162 			*error = EHOSTUNREACH;
5163 			goto done;
5164 		}
5165 		update_lastdst = B_TRUE;
5166 		mutex_enter(&connp->conn_lock);
5167 	}
5168 
5169 	/*
5170 	 * Check if our saved options are valid; update if not.
5171 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
5172 	 * to different destination may require different labels,
5173 	 * or worse, UDP packets to same IP address may require
5174 	 * different labels due to use of shared all-zones address.
5175 	 * We use conn_lock to ensure that lastdst, ip_snd_options,
5176 	 * and ip_snd_options_len are consistent for the current
5177 	 * destination and are updated atomically.
5178 	 */
5179 	if (is_system_labeled()) {
5180 		/* Using UDP MLP requires SCM_UCRED from user */
5181 		if (connp->conn_mlp_type != mlptSingle &&
5182 		    !attrs.udpattr_credset) {
5183 			mutex_exit(&connp->conn_lock);
5184 			DTRACE_PROBE4(
5185 			    tx__ip__log__info__output__udp,
5186 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
5187 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
5188 			*error = ECONNREFUSED;
5189 			goto done;
5190 		}
5191 		/*
5192 		 * update label option for this UDP socket if
5193 		 * - the destination has changed, or
5194 		 * - the UDP socket is MLP
5195 		 */
5196 		if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
5197 		    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
5198 		    connp->conn_mlp_type != mlptSingle) &&
5199 		    (*error = udp_update_label(q, mp, v4dst, &update_lastdst))
5200 		    != 0) {
5201 			mutex_exit(&connp->conn_lock);
5202 			goto done;
5203 		}
5204 	}
5205 	if (update_lastdst) {
5206 		IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst);
5207 		udp->udp_lastdstport = port;
5208 	}
5209 	if (udp->udp_ip_snd_options_len > 0) {
5210 		ip_snd_opt_len = udp->udp_ip_snd_options_len;
5211 		bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len);
5212 	}
5213 	mutex_exit(&connp->conn_lock);
5214 
5215 	/* Add an IP header */
5216 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len +
5217 	    (insert_spi ? sizeof (uint32_t) : 0);
5218 	ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
5219 	if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) ||
5220 	    !OK_32PTR(ipha)) {
5221 		mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO);
5222 		if (mp2 == NULL) {
5223 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5224 			    "udp_wput_end: q %p (%S)", q, "allocbfail2");
5225 			*error = ENOMEM;
5226 			goto done;
5227 		}
5228 		mp2->b_wptr = DB_LIM(mp2);
5229 		mp2->b_cont = mp1;
5230 		mp1 = mp2;
5231 		if (DB_TYPE(mp) != M_DATA)
5232 			mp->b_cont = mp1;
5233 		else
5234 			mp = mp1;
5235 
5236 		ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length);
5237 	}
5238 	ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0));
5239 #ifdef	_BIG_ENDIAN
5240 	/* Set version, header length, and tos */
5241 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
5242 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
5243 	    udp->udp_type_of_service);
5244 	/* Set ttl and protocol */
5245 	*(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP;
5246 #else
5247 	/* Set version, header length, and tos */
5248 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
5249 	    ((udp->udp_type_of_service << 8) |
5250 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
5251 	/* Set ttl and protocol */
5252 	*(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl;
5253 #endif
5254 	if (pktinfop->ip4_addr != INADDR_ANY) {
5255 		ipha->ipha_src = pktinfop->ip4_addr;
5256 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
5257 	} else {
5258 		/*
5259 		 * Copy our address into the packet.  If this is zero,
5260 		 * first look at __sin6_src_id for a hint. If we leave the
5261 		 * source as INADDR_ANY then ip will fill in the real source
5262 		 * address.
5263 		 */
5264 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src);
5265 		if (srcid != 0 && ipha->ipha_src == INADDR_ANY) {
5266 			in6_addr_t v6src;
5267 
5268 			ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid,
5269 			    us->us_netstack);
5270 			IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
5271 		}
5272 	}
5273 	uha_src_port = udp->udp_port;
5274 	if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) {
5275 		rw_exit(&udp->udp_rwlock);
5276 		lock_held = B_FALSE;
5277 	}
5278 
5279 	if (pktinfop->ip4_ill_index != 0) {
5280 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
5281 	}
5282 
5283 	ipha->ipha_fragment_offset_and_flags = 0;
5284 	ipha->ipha_ident = 0;
5285 
5286 	mp1->b_rptr = (uchar_t *)ipha;
5287 
5288 	ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <=
5289 	    (uintptr_t)UINT_MAX);
5290 
5291 	/* Determine length of packet */
5292 	ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha);
5293 	if ((mp2 = mp1->b_cont) != NULL) {
5294 		do {
5295 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
5296 			ip_len += (uint32_t)MBLKL(mp2);
5297 		} while ((mp2 = mp2->b_cont) != NULL);
5298 	}
5299 	/*
5300 	 * If the size of the packet is greater than the maximum allowed by
5301 	 * ip, return an error. Passing this down could cause panics because
5302 	 * the size will have wrapped and be inconsistent with the msg size.
5303 	 */
5304 	if (ip_len > IP_MAXPACKET) {
5305 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5306 		    "udp_wput_end: q %p (%S)", q, "IP length exceeded");
5307 		*error = EMSGSIZE;
5308 		goto done;
5309 	}
5310 	ipha->ipha_length = htons((uint16_t)ip_len);
5311 	ip_len -= ip_hdr_length;
5312 	ip_len = htons((uint16_t)ip_len);
5313 	udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length);
5314 
5315 	/* Insert all-0s SPI now. */
5316 	if (insert_spi)
5317 		*((uint32_t *)(udpha + 1)) = 0;
5318 
5319 	/*
5320 	 * Copy in the destination address
5321 	 */
5322 	ipha->ipha_dst = v4dst;
5323 
5324 	/*
5325 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
5326 	 */
5327 	if (CLASSD(v4dst))
5328 		ipha->ipha_ttl = udp->udp_multicast_ttl;
5329 
5330 	udpha->uha_dst_port = port;
5331 	udpha->uha_src_port = uha_src_port;
5332 
5333 	if (ip_snd_opt_len > 0) {
5334 		uint32_t	cksum;
5335 
5336 		bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len);
5337 		lock_held = B_FALSE;
5338 		rw_exit(&udp->udp_rwlock);
5339 		/*
5340 		 * Massage source route putting first source route in ipha_dst.
5341 		 * Ignore the destination in T_unitdata_req.
5342 		 * Create a checksum adjustment for a source route, if any.
5343 		 */
5344 		cksum = ip_massage_options(ipha, us->us_netstack);
5345 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5346 		cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) +
5347 		    (ipha->ipha_dst & 0xFFFF);
5348 		if ((int)cksum < 0)
5349 			cksum--;
5350 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5351 		/*
5352 		 * IP does the checksum if uha_checksum is non-zero,
5353 		 * We make it easy for IP to include our pseudo header
5354 		 * by putting our length in uha_checksum.
5355 		 */
5356 		cksum += ip_len;
5357 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5358 		/* There might be a carry. */
5359 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5360 #ifdef _LITTLE_ENDIAN
5361 		if (us->us_do_checksum)
5362 			ip_len = (cksum << 16) | ip_len;
5363 #else
5364 		if (us->us_do_checksum)
5365 			ip_len = (ip_len << 16) | cksum;
5366 		else
5367 			ip_len <<= 16;
5368 #endif
5369 	} else {
5370 		/*
5371 		 * IP does the checksum if uha_checksum is non-zero,
5372 		 * We make it easy for IP to include our pseudo header
5373 		 * by putting our length in uha_checksum.
5374 		 */
5375 		if (us->us_do_checksum)
5376 			ip_len |= (ip_len << 16);
5377 #ifndef _LITTLE_ENDIAN
5378 		else
5379 			ip_len <<= 16;
5380 #endif
5381 	}
5382 	ASSERT(!lock_held);
5383 	/* Set UDP length and checksum */
5384 	*((uint32_t *)&udpha->uha_length) = ip_len;
5385 
5386 	if (DB_TYPE(mp) != M_DATA) {
5387 		cred_t *cr;
5388 		pid_t cpid;
5389 
5390 		/* Move any cred from the T_UNITDATA_REQ to the packet */
5391 		cr = msg_extractcred(mp, &cpid);
5392 		if (cr != NULL) {
5393 			if (mp1->b_datap->db_credp != NULL)
5394 				crfree(mp1->b_datap->db_credp);
5395 			mp1->b_datap->db_credp = cr;
5396 			mp1->b_datap->db_cpid = cpid;
5397 		}
5398 		ASSERT(mp != mp1);
5399 		freeb(mp);
5400 	}
5401 
5402 	/* mp has been consumed and we'll return success */
5403 	ASSERT(*error == 0);
5404 	mp = NULL;
5405 
5406 	/* We're done.  Pass the packet to ip. */
5407 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
5408 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5409 	    "udp_wput_end: q %p (%S)", q, "end");
5410 
5411 	if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 ||
5412 	    CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) ||
5413 	    connp->conn_dontroute ||
5414 	    connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 ||
5415 	    optinfo.ip_opt_ill_index != 0 ||
5416 	    ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION ||
5417 	    IPP_ENABLED(IPP_LOCAL_OUT, ipst) ||
5418 	    ipst->ips_ip_g_mrouter != NULL) {
5419 		UDP_STAT(us, udp_ip_send);
5420 		ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT,
5421 		    &optinfo);
5422 	} else {
5423 		udp_send_data(udp, connp->conn_wq, mp1, ipha);
5424 	}
5425 
5426 done:
5427 	if (lock_held)
5428 		rw_exit(&udp->udp_rwlock);
5429 	if (*error != 0) {
5430 		ASSERT(mp != NULL);
5431 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5432 	}
5433 	return (mp);
5434 }
5435 
5436 static void
5437 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
5438 {
5439 	conn_t	*connp = udp->udp_connp;
5440 	ipaddr_t src, dst;
5441 	ire_t	*ire;
5442 	ipif_t	*ipif = NULL;
5443 	mblk_t	*ire_fp_mp;
5444 	boolean_t retry_caching;
5445 	udp_stack_t *us = udp->udp_us;
5446 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5447 
5448 	dst = ipha->ipha_dst;
5449 	src = ipha->ipha_src;
5450 	ASSERT(ipha->ipha_ident == 0);
5451 
5452 	if (CLASSD(dst)) {
5453 		int err;
5454 
5455 		ipif = conn_get_held_ipif(connp,
5456 		    &connp->conn_multicast_ipif, &err);
5457 
5458 		if (ipif == NULL || ipif->ipif_isv6 ||
5459 		    (ipif->ipif_ill->ill_phyint->phyint_flags &
5460 		    PHYI_LOOPBACK)) {
5461 			if (ipif != NULL)
5462 				ipif_refrele(ipif);
5463 			UDP_STAT(us, udp_ip_send);
5464 			ip_output(connp, mp, q, IP_WPUT);
5465 			return;
5466 		}
5467 	}
5468 
5469 	retry_caching = B_FALSE;
5470 	mutex_enter(&connp->conn_lock);
5471 	ire = connp->conn_ire_cache;
5472 	ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
5473 
5474 	if (ire == NULL || ire->ire_addr != dst ||
5475 	    (ire->ire_marks & IRE_MARK_CONDEMNED)) {
5476 		retry_caching = B_TRUE;
5477 	} else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) {
5478 		ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr;
5479 
5480 		ASSERT(ipif != NULL);
5481 		if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill))
5482 			retry_caching = B_TRUE;
5483 	}
5484 
5485 	if (!retry_caching) {
5486 		ASSERT(ire != NULL);
5487 		IRE_REFHOLD(ire);
5488 		mutex_exit(&connp->conn_lock);
5489 	} else {
5490 		boolean_t cached = B_FALSE;
5491 
5492 		connp->conn_ire_cache = NULL;
5493 		mutex_exit(&connp->conn_lock);
5494 
5495 		/* Release the old ire */
5496 		if (ire != NULL) {
5497 			IRE_REFRELE_NOTR(ire);
5498 			ire = NULL;
5499 		}
5500 
5501 		if (CLASSD(dst)) {
5502 			ASSERT(ipif != NULL);
5503 			ire = ire_ctable_lookup(dst, 0, 0, ipif,
5504 			    connp->conn_zoneid, msg_getlabel(mp),
5505 			    MATCH_IRE_ILL, ipst);
5506 		} else {
5507 			ASSERT(ipif == NULL);
5508 			ire = ire_cache_lookup(dst, connp->conn_zoneid,
5509 			    msg_getlabel(mp), ipst);
5510 		}
5511 
5512 		if (ire == NULL) {
5513 			if (ipif != NULL)
5514 				ipif_refrele(ipif);
5515 			UDP_STAT(us, udp_ire_null);
5516 			ip_output(connp, mp, q, IP_WPUT);
5517 			return;
5518 		}
5519 		IRE_REFHOLD_NOTR(ire);
5520 
5521 		mutex_enter(&connp->conn_lock);
5522 		if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL &&
5523 		    !(ire->ire_marks & IRE_MARK_CONDEMNED)) {
5524 			irb_t		*irb = ire->ire_bucket;
5525 
5526 			/*
5527 			 * IRE's created for non-connection oriented transports
5528 			 * are normally initialized with IRE_MARK_TEMPORARY set
5529 			 * in the ire_marks. These IRE's are preferentially
5530 			 * reaped when the hash chain length in the cache
5531 			 * bucket exceeds the maximum value specified in
5532 			 * ip[6]_ire_max_bucket_cnt. This can severely affect
5533 			 * UDP performance if IRE cache entries that we need
5534 			 * to reuse are continually removed. To remedy this,
5535 			 * when we cache the IRE in the conn_t, we remove the
5536 			 * IRE_MARK_TEMPORARY bit from the ire_marks if it was
5537 			 * set.
5538 			 */
5539 			if (ire->ire_marks & IRE_MARK_TEMPORARY) {
5540 				rw_enter(&irb->irb_lock, RW_WRITER);
5541 				if (ire->ire_marks & IRE_MARK_TEMPORARY) {
5542 					ire->ire_marks &= ~IRE_MARK_TEMPORARY;
5543 					irb->irb_tmp_ire_cnt--;
5544 				}
5545 				rw_exit(&irb->irb_lock);
5546 			}
5547 			connp->conn_ire_cache = ire;
5548 			cached = B_TRUE;
5549 		}
5550 		mutex_exit(&connp->conn_lock);
5551 
5552 		/*
5553 		 * We can continue to use the ire but since it was not
5554 		 * cached, we should drop the extra reference.
5555 		 */
5556 		if (!cached)
5557 			IRE_REFRELE_NOTR(ire);
5558 	}
5559 	ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION);
5560 	ASSERT(!CLASSD(dst) || ipif != NULL);
5561 
5562 	/*
5563 	 * Check if we can take the fast-path.
5564 	 * Note that "incomplete" ire's (where the link-layer for next hop
5565 	 * is not resolved, or where the fast-path header in nce_fp_mp is not
5566 	 * available yet) are sent down the legacy (slow) path
5567 	 */
5568 	if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) ||
5569 	    (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) ||
5570 	    (ire->ire_max_frag < ntohs(ipha->ipha_length)) ||
5571 	    ((ire->ire_nce == NULL) ||
5572 	    ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) ||
5573 	    connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) {
5574 		if (ipif != NULL)
5575 			ipif_refrele(ipif);
5576 		UDP_STAT(us, udp_ip_ire_send);
5577 		IRE_REFRELE(ire);
5578 		ip_output(connp, mp, q, IP_WPUT);
5579 		return;
5580 	}
5581 
5582 	if (src == INADDR_ANY && !connp->conn_unspec_src) {
5583 		if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
5584 			ipha->ipha_src = ipif->ipif_src_addr;
5585 		else
5586 			ipha->ipha_src = ire->ire_src_addr;
5587 	}
5588 
5589 	if (ipif != NULL)
5590 		ipif_refrele(ipif);
5591 
5592 	udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid);
5593 }
5594 
5595 static void
5596 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
5597 {
5598 	ipaddr_t src, dst;
5599 	ill_t	*ill;
5600 	mblk_t	*ire_fp_mp;
5601 	uint_t	ire_fp_mp_len;
5602 	uint16_t *up;
5603 	uint32_t cksum, hcksum_txflags;
5604 	queue_t	*dev_q;
5605 	udp_t	*udp = connp->conn_udp;
5606 	ipha_t	*ipha = (ipha_t *)mp->b_rptr;
5607 	udp_stack_t	*us = udp->udp_us;
5608 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5609 	boolean_t	ll_multicast = B_FALSE;
5610 	boolean_t	direct_send;
5611 
5612 	dev_q = ire->ire_stq->q_next;
5613 	ASSERT(dev_q != NULL);
5614 
5615 	ill = ire_to_ill(ire);
5616 	ASSERT(ill != NULL);
5617 
5618 	/*
5619 	 * For the direct send case, if resetting of conn_direct_blocked
5620 	 * was missed, it is still ok because the putq() would enable
5621 	 * the queue and write service will drain it out.
5622 	 */
5623 	direct_send = ILL_DIRECT_CAPABLE(ill);
5624 
5625 	/* is queue flow controlled? */
5626 	if ((!direct_send) && (q->q_first != NULL || connp->conn_draining ||
5627 	    DEV_Q_FLOW_BLOCKED(dev_q))) {
5628 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
5629 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
5630 		if (ipst->ips_ip_output_queue) {
5631 			DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp);
5632 			(void) putq(connp->conn_wq, mp);
5633 		} else {
5634 			freemsg(mp);
5635 		}
5636 		ire_refrele(ire);
5637 		return;
5638 	}
5639 
5640 	ire_fp_mp = ire->ire_nce->nce_fp_mp;
5641 	ire_fp_mp_len = MBLKL(ire_fp_mp);
5642 	ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len);
5643 
5644 	dst = ipha->ipha_dst;
5645 	src = ipha->ipha_src;
5646 
5647 
5648 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
5649 
5650 	ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
5651 #ifndef _BIG_ENDIAN
5652 	ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
5653 #endif
5654 
5655 	if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
5656 		ASSERT(ill->ill_hcksum_capab != NULL);
5657 		hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
5658 	} else {
5659 		hcksum_txflags = 0;
5660 	}
5661 
5662 	/* pseudo-header checksum (do it in parts for IP header checksum) */
5663 	cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
5664 
5665 	ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION);
5666 	up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
5667 	if (*up != 0) {
5668 		IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags,
5669 		    mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH,
5670 		    ntohs(ipha->ipha_length), cksum);
5671 
5672 		/* Software checksum? */
5673 		if (DB_CKSUMFLAGS(mp) == 0) {
5674 			UDP_STAT(us, udp_out_sw_cksum);
5675 			UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes,
5676 			    ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH);
5677 		}
5678 	}
5679 
5680 	if (!CLASSD(dst)) {
5681 		ipha->ipha_fragment_offset_and_flags |=
5682 		    (uint32_t)htons(ire->ire_frag_flag);
5683 	}
5684 
5685 	/* Calculate IP header checksum if hardware isn't capable */
5686 	if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) {
5687 		IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0],
5688 		    ((uint16_t *)ipha)[4]);
5689 	}
5690 
5691 	if (CLASSD(dst)) {
5692 		if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) {
5693 			ip_multicast_loopback(q, ill, mp,
5694 			    connp->conn_multicast_loop ? 0 :
5695 			    IP_FF_NO_MCAST_LOOP, zoneid);
5696 		}
5697 
5698 		/* If multicast TTL is 0 then we are done */
5699 		if (ipha->ipha_ttl == 0) {
5700 			freemsg(mp);
5701 			ire_refrele(ire);
5702 			return;
5703 		}
5704 		ll_multicast = B_TRUE;
5705 	}
5706 
5707 	ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
5708 	mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
5709 	bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
5710 
5711 	UPDATE_OB_PKT_COUNT(ire);
5712 	ire->ire_last_used_time = lbolt;
5713 
5714 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits);
5715 	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
5716 	    ntohs(ipha->ipha_length));
5717 
5718 	DTRACE_PROBE4(ip4__physical__out__start,
5719 	    ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
5720 	FW_HOOKS(ipst->ips_ip4_physical_out_event,
5721 	    ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp,
5722 	    ll_multicast, ipst);
5723 	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
5724 	if (ipst->ips_ipobs_enabled && mp != NULL) {
5725 		zoneid_t szone;
5726 
5727 		szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
5728 		    ipst, ALL_ZONES);
5729 		ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
5730 		    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
5731 	}
5732 
5733 	if (mp == NULL)
5734 		goto bail;
5735 
5736 	DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
5737 	    void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill,
5738 	    ipha_t *, ipha, ip6_t *, NULL, int, 0);
5739 
5740 	if (direct_send) {
5741 		uintptr_t cookie;
5742 		ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct;
5743 
5744 		cookie = idd->idd_tx_df(idd->idd_tx_dh, mp,
5745 		    (uintptr_t)connp, 0);
5746 		if (cookie != NULL) {
5747 			idl_tx_list_t *idl_txl;
5748 
5749 			/*
5750 			 * Flow controlled.
5751 			 */
5752 			DTRACE_PROBE2(non__null__cookie, uintptr_t,
5753 			    cookie, conn_t *, connp);
5754 			idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
5755 			mutex_enter(&idl_txl->txl_lock);
5756 			/*
5757 			 * Check again after holding txl_lock to see if Tx
5758 			 * ring is still blocked and only then insert the
5759 			 * connp into the drain list.
5760 			 */
5761 			if (connp->conn_direct_blocked ||
5762 			    (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh,
5763 			    cookie) == 0)) {
5764 				mutex_exit(&idl_txl->txl_lock);
5765 				goto bail;
5766 			}
5767 			if (idl_txl->txl_cookie != NULL &&
5768 			    idl_txl->txl_cookie != cookie) {
5769 				DTRACE_PROBE2(udp__xmit__collision,
5770 				    uintptr_t, cookie,
5771 				    uintptr_t, idl_txl->txl_cookie);
5772 				UDP_STAT(us, udp_cookie_coll);
5773 			} else {
5774 				connp->conn_direct_blocked = B_TRUE;
5775 				idl_txl->txl_cookie = cookie;
5776 				conn_drain_insert(connp, idl_txl);
5777 				DTRACE_PROBE1(udp__xmit__insert,
5778 				    conn_t *, connp);
5779 			}
5780 			mutex_exit(&idl_txl->txl_lock);
5781 		}
5782 	} else {
5783 		DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp);
5784 		putnext(ire->ire_stq, mp);
5785 	}
5786 bail:
5787 	IRE_REFRELE(ire);
5788 }
5789 
5790 static boolean_t
5791 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst,
5792     boolean_t *update_lastdst)
5793 {
5794 	udp_t *udp = Q_TO_UDP(wq);
5795 	int err;
5796 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
5797 	udp_stack_t		*us = udp->udp_us;
5798 	cred_t			*cr;
5799 
5800 	/*
5801 	 * All Solaris components should pass a db_credp
5802 	 * for this message, hence we ASSERT.
5803 	 * On production kernels we return an error to be robust against
5804 	 * random streams modules sitting on top of us.
5805 	 */
5806 	cr = msg_getcred(mp, NULL);
5807 	ASSERT(cr != NULL);
5808 	if (cr == NULL)
5809 		return (EINVAL);
5810 
5811 	/* Note that we use the cred/label from the message to handle MLP */
5812 	err = tsol_compute_label_v6(cr,
5813 	    dst, opt_storage, udp->udp_connp->conn_mac_exempt,
5814 	    us->us_netstack->netstack_ip);
5815 	if (err == 0) {
5816 		err = tsol_update_sticky(&udp->udp_sticky_ipp,
5817 		    &udp->udp_label_len_v6, opt_storage);
5818 	}
5819 	if (err != 0) {
5820 		DTRACE_PROBE4(
5821 		    tx__ip__log__drop__updatelabel__udp6,
5822 		    char *, "queue(1) failed to update options(2) on mp(3)",
5823 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
5824 	} else {
5825 		*update_lastdst = B_TRUE;
5826 	}
5827 	return (err);
5828 }
5829 
5830 static int
5831 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr,
5832     pid_t pid)
5833 {
5834 	udp_t		*udp = connp->conn_udp;
5835 	udp_stack_t	*us = udp->udp_us;
5836 	ipaddr_t	v4dst;
5837 	in_port_t	dstport;
5838 	boolean_t	mapped_addr;
5839 	struct sockaddr_storage ss;
5840 	sin_t		*sin;
5841 	sin6_t		*sin6;
5842 	struct sockaddr	*addr;
5843 	socklen_t	addrlen;
5844 	int		error;
5845 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
5846 
5847 	/* M_DATA for connected socket */
5848 
5849 	ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp));
5850 	UDP_DBGSTAT(us, udp_data_conn);
5851 
5852 	mutex_enter(&connp->conn_lock);
5853 	if (udp->udp_state != TS_DATA_XFER) {
5854 		mutex_exit(&connp->conn_lock);
5855 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5856 		UDP_STAT(us, udp_out_err_notconn);
5857 		freemsg(mp);
5858 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5859 		    "udp_wput_end: connp %p (%S)", connp,
5860 		    "not-connected; address required");
5861 		return (EDESTADDRREQ);
5862 	}
5863 
5864 	mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst);
5865 	if (mapped_addr)
5866 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
5867 
5868 	/* Initialize addr and addrlen as if they're passed in */
5869 	if (udp->udp_family == AF_INET) {
5870 		sin = (sin_t *)&ss;
5871 		sin->sin_family = AF_INET;
5872 		dstport = sin->sin_port = udp->udp_dstport;
5873 		ASSERT(mapped_addr);
5874 		sin->sin_addr.s_addr = v4dst;
5875 		addr = (struct sockaddr *)sin;
5876 		addrlen = sizeof (*sin);
5877 	} else {
5878 		sin6 = (sin6_t *)&ss;
5879 		sin6->sin6_family = AF_INET6;
5880 		dstport = sin6->sin6_port = udp->udp_dstport;
5881 		sin6->sin6_flowinfo = udp->udp_flowinfo;
5882 		sin6->sin6_addr = udp->udp_v6dst;
5883 		sin6->sin6_scope_id = 0;
5884 		sin6->__sin6_src_id = 0;
5885 		addr = (struct sockaddr *)sin6;
5886 		addrlen = sizeof (*sin6);
5887 	}
5888 	mutex_exit(&connp->conn_lock);
5889 
5890 	if (mapped_addr) {
5891 		/*
5892 		 * Handle both AF_INET and AF_INET6; the latter
5893 		 * for IPV4 mapped destination addresses.  Note
5894 		 * here that both addr and addrlen point to the
5895 		 * corresponding struct depending on the address
5896 		 * family of the socket.
5897 		 */
5898 		mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error,
5899 		    insert_spi, msg, cr, pid);
5900 	} else {
5901 		mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid);
5902 	}
5903 	if (error == 0) {
5904 		ASSERT(mp == NULL);
5905 		return (0);
5906 	}
5907 
5908 	UDP_STAT(us, udp_out_err_output);
5909 	ASSERT(mp != NULL);
5910 	if (IPCL_IS_NONSTR(connp)) {
5911 		freemsg(mp);
5912 		return (error);
5913 	} else {
5914 		/* mp is freed by the following routine */
5915 		udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr,
5916 		    (t_scalar_t)addrlen, (t_scalar_t)error);
5917 		return (0);
5918 	}
5919 }
5920 
5921 /* ARGSUSED */
5922 static int
5923 udp_send_not_connected(conn_t *connp,  mblk_t *mp, struct sockaddr *addr,
5924     socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid)
5925 {
5926 
5927 	udp_t		*udp = connp->conn_udp;
5928 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
5929 	int		error = 0;
5930 	sin6_t		*sin6;
5931 	sin_t		*sin;
5932 	uint_t		srcid;
5933 	uint16_t	port;
5934 	ipaddr_t	v4dst;
5935 
5936 
5937 	ASSERT(addr != NULL);
5938 
5939 	switch (udp->udp_family) {
5940 	case AF_INET6:
5941 		sin6 = (sin6_t *)addr;
5942 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5943 			/*
5944 			 * Destination is a non-IPv4-compatible IPv6 address.
5945 			 * Send out an IPv6 format packet.
5946 			 */
5947 			mp = udp_output_v6(connp, mp, sin6, &error, msg, cr,
5948 			    pid);
5949 			if (error != 0)
5950 				goto ud_error;
5951 
5952 			return (0);
5953 		}
5954 		/*
5955 		 * If the local address is not zero or a mapped address
5956 		 * return an error.  It would be possible to send an IPv4
5957 		 * packet but the response would never make it back to the
5958 		 * application since it is bound to a non-mapped address.
5959 		 */
5960 		if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
5961 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
5962 			error = EADDRNOTAVAIL;
5963 			goto ud_error;
5964 		}
5965 		/* Send IPv4 packet without modifying udp_ipversion */
5966 		/* Extract port and ipaddr */
5967 		port = sin6->sin6_port;
5968 		IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
5969 		srcid = sin6->__sin6_src_id;
5970 		break;
5971 
5972 	case AF_INET:
5973 		sin = (sin_t *)addr;
5974 		/* Extract port and ipaddr */
5975 		port = sin->sin_port;
5976 		v4dst = sin->sin_addr.s_addr;
5977 		srcid = 0;
5978 		break;
5979 	}
5980 
5981 	mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi,
5982 	    msg, cr, pid);
5983 
5984 	if (error == 0) {
5985 		ASSERT(mp == NULL);
5986 		return (0);
5987 	}
5988 
5989 ud_error:
5990 	ASSERT(mp != NULL);
5991 
5992 	return (error);
5993 }
5994 
5995 /*
5996  * This routine handles all messages passed downstream.  It either
5997  * consumes the message or passes it downstream; it never queues a
5998  * a message.
5999  *
6000  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
6001  * is valid when we are directly beneath the stream head, and thus sockfs
6002  * is able to bypass STREAMS and directly call us, passing along the sockaddr
6003  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
6004  * connected endpoints.
6005  */
6006 void
6007 udp_wput(queue_t *q, mblk_t *mp)
6008 {
6009 	conn_t		*connp = Q_TO_CONN(q);
6010 	udp_t		*udp = connp->conn_udp;
6011 	int		error = 0;
6012 	struct sockaddr	*addr;
6013 	socklen_t	addrlen;
6014 	udp_stack_t	*us = udp->udp_us;
6015 
6016 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
6017 	    "udp_wput_start: queue %p mp %p", q, mp);
6018 
6019 	/*
6020 	 * We directly handle several cases here: T_UNITDATA_REQ message
6021 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
6022 	 * socket.
6023 	 */
6024 	switch (DB_TYPE(mp)) {
6025 	case M_DATA:
6026 		/*
6027 		 * Quick check for error cases. Checks will be done again
6028 		 * under the lock later on
6029 		 */
6030 		if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) {
6031 			/* Not connected; address is required */
6032 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6033 			UDP_STAT(us, udp_out_err_notconn);
6034 			freemsg(mp);
6035 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6036 			    "udp_wput_end: connp %p (%S)", connp,
6037 			    "not-connected; address required");
6038 			return;
6039 		}
6040 		(void) udp_send_connected(connp, mp, NULL, NULL, -1);
6041 		return;
6042 
6043 	case M_PROTO:
6044 	case M_PCPROTO: {
6045 		struct T_unitdata_req *tudr;
6046 
6047 		ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX);
6048 		tudr = (struct T_unitdata_req *)mp->b_rptr;
6049 
6050 		/* Handle valid T_UNITDATA_REQ here */
6051 		if (MBLKL(mp) >= sizeof (*tudr) &&
6052 		    ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) {
6053 			if (mp->b_cont == NULL) {
6054 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6055 				    "udp_wput_end: q %p (%S)", q, "badaddr");
6056 				error = EPROTO;
6057 				goto ud_error;
6058 			}
6059 
6060 			if (!MBLKIN(mp, 0, tudr->DEST_offset +
6061 			    tudr->DEST_length)) {
6062 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6063 				    "udp_wput_end: q %p (%S)", q, "badaddr");
6064 				error = EADDRNOTAVAIL;
6065 				goto ud_error;
6066 			}
6067 			/*
6068 			 * If a port has not been bound to the stream, fail.
6069 			 * This is not a problem when sockfs is directly
6070 			 * above us, because it will ensure that the socket
6071 			 * is first bound before allowing data to be sent.
6072 			 */
6073 			if (udp->udp_state == TS_UNBND) {
6074 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6075 				    "udp_wput_end: q %p (%S)", q, "outstate");
6076 				error = EPROTO;
6077 				goto ud_error;
6078 			}
6079 			addr = (struct sockaddr *)
6080 			    &mp->b_rptr[tudr->DEST_offset];
6081 			addrlen = tudr->DEST_length;
6082 			if (tudr->OPT_length != 0)
6083 				UDP_STAT(us, udp_out_opt);
6084 			break;
6085 		}
6086 		/* FALLTHRU */
6087 	}
6088 	default:
6089 		udp_wput_other(q, mp);
6090 		return;
6091 	}
6092 	ASSERT(addr != NULL);
6093 
6094 	error = udp_send_not_connected(connp,  mp, addr, addrlen, NULL, NULL,
6095 	    -1);
6096 	if (error != 0) {
6097 ud_error:
6098 		UDP_STAT(us, udp_out_err_output);
6099 		ASSERT(mp != NULL);
6100 		/* mp is freed by the following routine */
6101 		udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
6102 		    (t_scalar_t)error);
6103 	}
6104 }
6105 
6106 /* ARGSUSED */
6107 static void
6108 udp_wput_fallback(queue_t *wq, mblk_t *mp)
6109 {
6110 #ifdef DEBUG
6111 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
6112 #endif
6113 	freemsg(mp);
6114 }
6115 
6116 
6117 /*
6118  * udp_output_v6():
6119  * Assumes that udp_wput did some sanity checking on the destination
6120  * address.
6121  */
6122 static mblk_t *
6123 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error,
6124     struct nmsghdr *msg, cred_t *cr, pid_t pid)
6125 {
6126 	ip6_t		*ip6h;
6127 	ip6i_t		*ip6i;	/* mp1->b_rptr even if no ip6i_t */
6128 	mblk_t		*mp1 = mp;
6129 	mblk_t		*mp2;
6130 	int		udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
6131 	size_t		ip_len;
6132 	udpha_t		*udph;
6133 	udp_t		*udp = connp->conn_udp;
6134 	udp_stack_t	*us = udp->udp_us;
6135 	queue_t		*q = connp->conn_wq;
6136 	ip6_pkt_t	ipp_s;	/* For ancillary data options */
6137 	ip6_pkt_t	*ipp = &ipp_s;
6138 	ip6_pkt_t	*tipp;	/* temporary ipp */
6139 	uint32_t	csum = 0;
6140 	uint_t		ignore = 0;
6141 	uint_t		option_exists = 0, is_sticky = 0;
6142 	uint8_t		*cp;
6143 	uint8_t		*nxthdr_ptr;
6144 	in6_addr_t	ip6_dst;
6145 	in_port_t	port;
6146 	udpattrs_t	attrs;
6147 	boolean_t	opt_present;
6148 	ip6_hbh_t	*hopoptsptr = NULL;
6149 	uint_t		hopoptslen = 0;
6150 	boolean_t	is_ancillary = B_FALSE;
6151 	size_t		sth_wroff = 0;
6152 	ire_t		*ire;
6153 	boolean_t	update_lastdst = B_FALSE;
6154 
6155 	*error = 0;
6156 
6157 	/*
6158 	 * If the local address is a mapped address return
6159 	 * an error.
6160 	 * It would be possible to send an IPv6 packet but the
6161 	 * response would never make it back to the application
6162 	 * since it is bound to a mapped address.
6163 	 */
6164 	if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) {
6165 		*error = EADDRNOTAVAIL;
6166 		goto done;
6167 	}
6168 
6169 	ipp->ipp_fields = 0;
6170 	ipp->ipp_sticky_ignored = 0;
6171 
6172 	/*
6173 	 * If TPI options passed in, feed it for verification and handling
6174 	 */
6175 	attrs.udpattr_credset = B_FALSE;
6176 	opt_present = B_FALSE;
6177 	if (IPCL_IS_NONSTR(connp)) {
6178 		if (msg->msg_controllen != 0) {
6179 			attrs.udpattr_ipp6 = ipp;
6180 			attrs.udpattr_mb = mp;
6181 
6182 			rw_enter(&udp->udp_rwlock, RW_WRITER);
6183 			*error = process_auxiliary_options(connp,
6184 			    msg->msg_control, msg->msg_controllen,
6185 			    &attrs, &udp_opt_obj, udp_opt_set, cr);
6186 			rw_exit(&udp->udp_rwlock);
6187 			if (*error)
6188 				goto done;
6189 			ASSERT(*error == 0);
6190 			opt_present = B_TRUE;
6191 		}
6192 	} else {
6193 		if (DB_TYPE(mp) != M_DATA) {
6194 			mp1 = mp->b_cont;
6195 			if (((struct T_unitdata_req *)
6196 			    mp->b_rptr)->OPT_length != 0) {
6197 				attrs.udpattr_ipp6 = ipp;
6198 				attrs.udpattr_mb = mp;
6199 				if (udp_unitdata_opt_process(q, mp, error,
6200 				    &attrs) < 0) {
6201 					goto done;
6202 				}
6203 				ASSERT(*error == 0);
6204 				opt_present = B_TRUE;
6205 			}
6206 		}
6207 	}
6208 
6209 	/*
6210 	 * Determine whether we need to mark the mblk with the user's
6211 	 * credentials.
6212 	 * If labeled then sockfs would have already done this.
6213 	 */
6214 	ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
6215 	ire = connp->conn_ire_cache;
6216 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) ||
6217 	    (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) ||
6218 	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) {
6219 		if (cr != NULL && msg_getcred(mp, NULL) == NULL)
6220 			mblk_setcred(mp, cr, pid);
6221 	}
6222 
6223 	rw_enter(&udp->udp_rwlock, RW_READER);
6224 	ignore = ipp->ipp_sticky_ignored;
6225 
6226 	/* mp1 points to the M_DATA mblk carrying the packet */
6227 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
6228 
6229 	if (sin6->sin6_scope_id != 0 &&
6230 	    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
6231 		/*
6232 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
6233 		 * option nor ancillary data.  It needs to be
6234 		 * explicitly set in options_exists.
6235 		 */
6236 		option_exists |= IPPF_SCOPE_ID;
6237 	}
6238 
6239 	/*
6240 	 * Compute the destination address
6241 	 */
6242 	ip6_dst = sin6->sin6_addr;
6243 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6244 		ip6_dst = ipv6_loopback;
6245 
6246 	port = sin6->sin6_port;
6247 
6248 	/*
6249 	 * Cluster and TSOL notes, Cluster check:
6250 	 * see comments in udp_output_v4().
6251 	 */
6252 	mutex_enter(&connp->conn_lock);
6253 
6254 	if (cl_inet_connect2 != NULL &&
6255 	    (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) ||
6256 	    port != udp->udp_lastdstport)) {
6257 		mutex_exit(&connp->conn_lock);
6258 		*error = 0;
6259 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error);
6260 		if (*error != 0) {
6261 			*error = EHOSTUNREACH;
6262 			rw_exit(&udp->udp_rwlock);
6263 			goto done;
6264 		}
6265 		update_lastdst = B_TRUE;
6266 		mutex_enter(&connp->conn_lock);
6267 	}
6268 
6269 	/*
6270 	 * If we're not going to the same destination as last time, then
6271 	 * recompute the label required.  This is done in a separate routine to
6272 	 * avoid blowing up our stack here.
6273 	 *
6274 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
6275 	 * to different destination may require different labels,
6276 	 * or worse, UDP packets to same IP address may require
6277 	 * different labels due to use of shared all-zones address.
6278 	 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts,
6279 	 * and sticky ipp_hopoptslen are consistent for the current
6280 	 * destination and are updated atomically.
6281 	 */
6282 	if (is_system_labeled()) {
6283 		/* Using UDP MLP requires SCM_UCRED from user */
6284 		if (connp->conn_mlp_type != mlptSingle &&
6285 		    !attrs.udpattr_credset) {
6286 			DTRACE_PROBE4(
6287 			    tx__ip__log__info__output__udp6,
6288 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
6289 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
6290 			*error = ECONNREFUSED;
6291 			rw_exit(&udp->udp_rwlock);
6292 			mutex_exit(&connp->conn_lock);
6293 			goto done;
6294 		}
6295 		/*
6296 		 * update label option for this UDP socket if
6297 		 * - the destination has changed, or
6298 		 * - the UDP socket is MLP
6299 		 */
6300 		if ((opt_present ||
6301 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) ||
6302 		    connp->conn_mlp_type != mlptSingle) &&
6303 		    (*error = udp_update_label_v6(q, mp, &ip6_dst,
6304 		    &update_lastdst)) != 0) {
6305 			rw_exit(&udp->udp_rwlock);
6306 			mutex_exit(&connp->conn_lock);
6307 			goto done;
6308 		}
6309 	}
6310 
6311 	if (update_lastdst) {
6312 		udp->udp_v6lastdst = ip6_dst;
6313 		udp->udp_lastdstport = port;
6314 	}
6315 
6316 	/*
6317 	 * If there's a security label here, then we ignore any options the
6318 	 * user may try to set.  We keep the peer's label as a hidden sticky
6319 	 * option. We make a private copy of this label before releasing the
6320 	 * lock so that label is kept consistent with the destination addr.
6321 	 */
6322 	if (udp->udp_label_len_v6 > 0) {
6323 		ignore &= ~IPPF_HOPOPTS;
6324 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
6325 	}
6326 
6327 	if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) {
6328 		/* No sticky options nor ancillary data. */
6329 		mutex_exit(&connp->conn_lock);
6330 		goto no_options;
6331 	}
6332 
6333 	/*
6334 	 * Go through the options figuring out where each is going to
6335 	 * come from and build two masks.  The first mask indicates if
6336 	 * the option exists at all.  The second mask indicates if the
6337 	 * option is sticky or ancillary.
6338 	 */
6339 	if (!(ignore & IPPF_HOPOPTS)) {
6340 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
6341 			option_exists |= IPPF_HOPOPTS;
6342 			udp_ip_hdr_len += ipp->ipp_hopoptslen;
6343 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
6344 			option_exists |= IPPF_HOPOPTS;
6345 			is_sticky |= IPPF_HOPOPTS;
6346 			ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0);
6347 			hopoptsptr = kmem_alloc(
6348 			    udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP);
6349 			if (hopoptsptr == NULL) {
6350 				*error = ENOMEM;
6351 				mutex_exit(&connp->conn_lock);
6352 				goto done;
6353 			}
6354 			hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen;
6355 			bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr,
6356 			    hopoptslen);
6357 			udp_ip_hdr_len += hopoptslen;
6358 		}
6359 	}
6360 	mutex_exit(&connp->conn_lock);
6361 
6362 	if (!(ignore & IPPF_RTHDR)) {
6363 		if (ipp->ipp_fields & IPPF_RTHDR) {
6364 			option_exists |= IPPF_RTHDR;
6365 			udp_ip_hdr_len += ipp->ipp_rthdrlen;
6366 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
6367 			option_exists |= IPPF_RTHDR;
6368 			is_sticky |= IPPF_RTHDR;
6369 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen;
6370 		}
6371 	}
6372 
6373 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
6374 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
6375 			option_exists |= IPPF_RTDSTOPTS;
6376 			udp_ip_hdr_len += ipp->ipp_rtdstoptslen;
6377 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
6378 			option_exists |= IPPF_RTDSTOPTS;
6379 			is_sticky |= IPPF_RTDSTOPTS;
6380 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen;
6381 		}
6382 	}
6383 
6384 	if (!(ignore & IPPF_DSTOPTS)) {
6385 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
6386 			option_exists |= IPPF_DSTOPTS;
6387 			udp_ip_hdr_len += ipp->ipp_dstoptslen;
6388 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
6389 			option_exists |= IPPF_DSTOPTS;
6390 			is_sticky |= IPPF_DSTOPTS;
6391 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen;
6392 		}
6393 	}
6394 
6395 	if (!(ignore & IPPF_IFINDEX)) {
6396 		if (ipp->ipp_fields & IPPF_IFINDEX) {
6397 			option_exists |= IPPF_IFINDEX;
6398 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
6399 			option_exists |= IPPF_IFINDEX;
6400 			is_sticky |= IPPF_IFINDEX;
6401 		}
6402 	}
6403 
6404 	if (!(ignore & IPPF_ADDR)) {
6405 		if (ipp->ipp_fields & IPPF_ADDR) {
6406 			option_exists |= IPPF_ADDR;
6407 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) {
6408 			option_exists |= IPPF_ADDR;
6409 			is_sticky |= IPPF_ADDR;
6410 		}
6411 	}
6412 
6413 	if (!(ignore & IPPF_DONTFRAG)) {
6414 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
6415 			option_exists |= IPPF_DONTFRAG;
6416 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
6417 			option_exists |= IPPF_DONTFRAG;
6418 			is_sticky |= IPPF_DONTFRAG;
6419 		}
6420 	}
6421 
6422 	if (!(ignore & IPPF_USE_MIN_MTU)) {
6423 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
6424 			option_exists |= IPPF_USE_MIN_MTU;
6425 		} else if (udp->udp_sticky_ipp.ipp_fields &
6426 		    IPPF_USE_MIN_MTU) {
6427 			option_exists |= IPPF_USE_MIN_MTU;
6428 			is_sticky |= IPPF_USE_MIN_MTU;
6429 		}
6430 	}
6431 
6432 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
6433 		option_exists |= IPPF_HOPLIMIT;
6434 	/* IPV6_HOPLIMIT can never be sticky */
6435 	ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
6436 
6437 	if (!(ignore & IPPF_UNICAST_HOPS) &&
6438 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
6439 		option_exists |= IPPF_UNICAST_HOPS;
6440 		is_sticky |= IPPF_UNICAST_HOPS;
6441 	}
6442 
6443 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
6444 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
6445 		option_exists |= IPPF_MULTICAST_HOPS;
6446 		is_sticky |= IPPF_MULTICAST_HOPS;
6447 	}
6448 
6449 	if (!(ignore & IPPF_TCLASS)) {
6450 		if (ipp->ipp_fields & IPPF_TCLASS) {
6451 			option_exists |= IPPF_TCLASS;
6452 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
6453 			option_exists |= IPPF_TCLASS;
6454 			is_sticky |= IPPF_TCLASS;
6455 		}
6456 	}
6457 
6458 	if (!(ignore & IPPF_NEXTHOP) &&
6459 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) {
6460 		option_exists |= IPPF_NEXTHOP;
6461 		is_sticky |= IPPF_NEXTHOP;
6462 	}
6463 
6464 no_options:
6465 
6466 	/*
6467 	 * If any options carried in the ip6i_t were specified, we
6468 	 * need to account for the ip6i_t in the data we'll be sending
6469 	 * down.
6470 	 */
6471 	if (option_exists & IPPF_HAS_IP6I)
6472 		udp_ip_hdr_len += sizeof (ip6i_t);
6473 
6474 	/* check/fix buffer config, setup pointers into it */
6475 	ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len];
6476 	if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) ||
6477 	    !OK_32PTR(ip6h)) {
6478 
6479 		/* Try to get everything in a single mblk next time */
6480 		if (udp_ip_hdr_len > udp->udp_max_hdr_len) {
6481 			udp->udp_max_hdr_len = udp_ip_hdr_len;
6482 			sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
6483 		}
6484 
6485 		mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO);
6486 		if (mp2 == NULL) {
6487 			*error = ENOMEM;
6488 			rw_exit(&udp->udp_rwlock);
6489 			goto done;
6490 		}
6491 		mp2->b_wptr = DB_LIM(mp2);
6492 		mp2->b_cont = mp1;
6493 		mp1 = mp2;
6494 		if (DB_TYPE(mp) != M_DATA)
6495 			mp->b_cont = mp1;
6496 		else
6497 			mp = mp1;
6498 
6499 		ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len);
6500 	}
6501 	mp1->b_rptr = (unsigned char *)ip6h;
6502 	ip6i = (ip6i_t *)ip6h;
6503 
6504 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp)
6505 	if (option_exists & IPPF_HAS_IP6I) {
6506 		ip6h = (ip6_t *)&ip6i[1];
6507 		ip6i->ip6i_flags = 0;
6508 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
6509 
6510 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
6511 		if (option_exists & IPPF_SCOPE_ID) {
6512 			ip6i->ip6i_flags |= IP6I_IFINDEX;
6513 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
6514 		} else if (option_exists & IPPF_IFINDEX) {
6515 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
6516 			ASSERT(tipp->ipp_ifindex != 0);
6517 			ip6i->ip6i_flags |= IP6I_IFINDEX;
6518 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
6519 		}
6520 
6521 		if (option_exists & IPPF_ADDR) {
6522 			/*
6523 			 * Enable per-packet source address verification if
6524 			 * IPV6_PKTINFO specified the source address.
6525 			 * ip6_src is set in the transport's _wput function.
6526 			 */
6527 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
6528 		}
6529 
6530 		if (option_exists & IPPF_DONTFRAG) {
6531 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
6532 		}
6533 
6534 		if (option_exists & IPPF_USE_MIN_MTU) {
6535 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
6536 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
6537 		}
6538 
6539 		if (option_exists & IPPF_NEXTHOP) {
6540 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
6541 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
6542 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
6543 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
6544 		}
6545 
6546 		/*
6547 		 * tell IP this is an ip6i_t private header
6548 		 */
6549 		ip6i->ip6i_nxt = IPPROTO_RAW;
6550 	}
6551 
6552 	/* Initialize IPv6 header */
6553 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
6554 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
6555 
6556 	/* Set the hoplimit of the outgoing packet. */
6557 	if (option_exists & IPPF_HOPLIMIT) {
6558 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
6559 		ip6h->ip6_hops = ipp->ipp_hoplimit;
6560 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6561 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
6562 		ip6h->ip6_hops = udp->udp_multicast_ttl;
6563 		if (option_exists & IPPF_MULTICAST_HOPS)
6564 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6565 	} else {
6566 		ip6h->ip6_hops = udp->udp_ttl;
6567 		if (option_exists & IPPF_UNICAST_HOPS)
6568 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6569 	}
6570 
6571 	if (option_exists & IPPF_ADDR) {
6572 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
6573 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
6574 		ip6h->ip6_src = tipp->ipp_addr;
6575 	} else {
6576 		/*
6577 		 * The source address was not set using IPV6_PKTINFO.
6578 		 * First look at the bound source.
6579 		 * If unspecified fallback to __sin6_src_id.
6580 		 */
6581 		ip6h->ip6_src = udp->udp_v6src;
6582 		if (sin6->__sin6_src_id != 0 &&
6583 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
6584 			ip_srcid_find_id(sin6->__sin6_src_id,
6585 			    &ip6h->ip6_src, connp->conn_zoneid,
6586 			    us->us_netstack);
6587 		}
6588 	}
6589 
6590 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
6591 	cp = (uint8_t *)&ip6h[1];
6592 
6593 	/*
6594 	 * Here's where we have to start stringing together
6595 	 * any extension headers in the right order:
6596 	 * Hop-by-hop, destination, routing, and final destination opts.
6597 	 */
6598 	if (option_exists & IPPF_HOPOPTS) {
6599 		/* Hop-by-hop options */
6600 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
6601 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
6602 		if (hopoptslen == 0) {
6603 			hopoptsptr = tipp->ipp_hopopts;
6604 			hopoptslen = tipp->ipp_hopoptslen;
6605 			is_ancillary = B_TRUE;
6606 		}
6607 
6608 		*nxthdr_ptr = IPPROTO_HOPOPTS;
6609 		nxthdr_ptr = &hbh->ip6h_nxt;
6610 
6611 		bcopy(hopoptsptr, cp, hopoptslen);
6612 		cp += hopoptslen;
6613 
6614 		if (hopoptsptr != NULL && !is_ancillary) {
6615 			kmem_free(hopoptsptr, hopoptslen);
6616 			hopoptsptr = NULL;
6617 			hopoptslen = 0;
6618 		}
6619 	}
6620 	/*
6621 	 * En-route destination options
6622 	 * Only do them if there's a routing header as well
6623 	 */
6624 	if (option_exists & IPPF_RTDSTOPTS) {
6625 		ip6_dest_t *dst = (ip6_dest_t *)cp;
6626 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
6627 
6628 		*nxthdr_ptr = IPPROTO_DSTOPTS;
6629 		nxthdr_ptr = &dst->ip6d_nxt;
6630 
6631 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
6632 		cp += tipp->ipp_rtdstoptslen;
6633 	}
6634 	/*
6635 	 * Routing header next
6636 	 */
6637 	if (option_exists & IPPF_RTHDR) {
6638 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
6639 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
6640 
6641 		*nxthdr_ptr = IPPROTO_ROUTING;
6642 		nxthdr_ptr = &rt->ip6r_nxt;
6643 
6644 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
6645 		cp += tipp->ipp_rthdrlen;
6646 	}
6647 	/*
6648 	 * Do ultimate destination options
6649 	 */
6650 	if (option_exists & IPPF_DSTOPTS) {
6651 		ip6_dest_t *dest = (ip6_dest_t *)cp;
6652 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
6653 
6654 		*nxthdr_ptr = IPPROTO_DSTOPTS;
6655 		nxthdr_ptr = &dest->ip6d_nxt;
6656 
6657 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
6658 		cp += tipp->ipp_dstoptslen;
6659 	}
6660 	/*
6661 	 * Now set the last header pointer to the proto passed in
6662 	 */
6663 	ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE));
6664 	*nxthdr_ptr = IPPROTO_UDP;
6665 
6666 	/* Update UDP header */
6667 	udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE);
6668 	udph->uha_dst_port = sin6->sin6_port;
6669 	udph->uha_src_port = udp->udp_port;
6670 
6671 	/*
6672 	 * Copy in the destination address
6673 	 */
6674 	ip6h->ip6_dst = ip6_dst;
6675 
6676 	ip6h->ip6_vcf =
6677 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
6678 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
6679 
6680 	if (option_exists & IPPF_TCLASS) {
6681 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
6682 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
6683 		    tipp->ipp_tclass);
6684 	}
6685 	rw_exit(&udp->udp_rwlock);
6686 
6687 	if (option_exists & IPPF_RTHDR) {
6688 		ip6_rthdr_t	*rth;
6689 
6690 		/*
6691 		 * Perform any processing needed for source routing.
6692 		 * We know that all extension headers will be in the same mblk
6693 		 * as the IPv6 header.
6694 		 */
6695 		rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
6696 		if (rth != NULL && rth->ip6r_segleft != 0) {
6697 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
6698 				/*
6699 				 * Drop packet - only support Type 0 routing.
6700 				 * Notify the application as well.
6701 				 */
6702 				*error = EPROTO;
6703 				goto done;
6704 			}
6705 
6706 			/*
6707 			 * rth->ip6r_len is twice the number of
6708 			 * addresses in the header. Thus it must be even.
6709 			 */
6710 			if (rth->ip6r_len & 0x1) {
6711 				*error = EPROTO;
6712 				goto done;
6713 			}
6714 			/*
6715 			 * Shuffle the routing header and ip6_dst
6716 			 * addresses, and get the checksum difference
6717 			 * between the first hop (in ip6_dst) and
6718 			 * the destination (in the last routing hdr entry).
6719 			 */
6720 			csum = ip_massage_options_v6(ip6h, rth,
6721 			    us->us_netstack);
6722 			/*
6723 			 * Verify that the first hop isn't a mapped address.
6724 			 * Routers along the path need to do this verification
6725 			 * for subsequent hops.
6726 			 */
6727 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
6728 				*error = EADDRNOTAVAIL;
6729 				goto done;
6730 			}
6731 
6732 			cp += (rth->ip6r_len + 1)*8;
6733 		}
6734 	}
6735 
6736 	/* count up length of UDP packet */
6737 	ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN;
6738 	if ((mp2 = mp1->b_cont) != NULL) {
6739 		do {
6740 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
6741 			ip_len += (uint32_t)MBLKL(mp2);
6742 		} while ((mp2 = mp2->b_cont) != NULL);
6743 	}
6744 
6745 	/*
6746 	 * If the size of the packet is greater than the maximum allowed by
6747 	 * ip, return an error. Passing this down could cause panics because
6748 	 * the size will have wrapped and be inconsistent with the msg size.
6749 	 */
6750 	if (ip_len > IP_MAXPACKET) {
6751 		*error = EMSGSIZE;
6752 		goto done;
6753 	}
6754 
6755 	/* Store the UDP length. Subtract length of extension hdrs */
6756 	udph->uha_length = htons(ip_len + IPV6_HDR_LEN -
6757 	    (int)((uchar_t *)udph - (uchar_t *)ip6h));
6758 
6759 	/*
6760 	 * We make it easy for IP to include our pseudo header
6761 	 * by putting our length in uh_checksum, modified (if
6762 	 * we have a routing header) by the checksum difference
6763 	 * between the ultimate destination and first hop addresses.
6764 	 * Note: UDP over IPv6 must always checksum the packet.
6765 	 */
6766 	csum += udph->uha_length;
6767 	csum = (csum & 0xFFFF) + (csum >> 16);
6768 	udph->uha_checksum = (uint16_t)csum;
6769 
6770 #ifdef _LITTLE_ENDIAN
6771 	ip_len = htons(ip_len);
6772 #endif
6773 	ip6h->ip6_plen = ip_len;
6774 
6775 	if (DB_TYPE(mp) != M_DATA) {
6776 		cred_t *cr;
6777 		pid_t cpid;
6778 
6779 		/* Move any cred from the T_UNITDATA_REQ to the packet */
6780 		cr = msg_extractcred(mp, &cpid);
6781 		if (cr != NULL) {
6782 			if (mp1->b_datap->db_credp != NULL)
6783 				crfree(mp1->b_datap->db_credp);
6784 			mp1->b_datap->db_credp = cr;
6785 			mp1->b_datap->db_cpid = cpid;
6786 		}
6787 
6788 		ASSERT(mp != mp1);
6789 		freeb(mp);
6790 	}
6791 
6792 	/* mp has been consumed and we'll return success */
6793 	ASSERT(*error == 0);
6794 	mp = NULL;
6795 
6796 	/* We're done. Pass the packet to IP */
6797 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
6798 	ip_output_v6(connp, mp1, q, IP_WPUT);
6799 
6800 done:
6801 	if (sth_wroff != 0) {
6802 		(void) proto_set_tx_wroff(RD(q), connp,
6803 		    udp->udp_max_hdr_len + us->us_wroff_extra);
6804 	}
6805 	if (hopoptsptr != NULL && !is_ancillary) {
6806 		kmem_free(hopoptsptr, hopoptslen);
6807 		hopoptsptr = NULL;
6808 	}
6809 	if (*error != 0) {
6810 		ASSERT(mp != NULL);
6811 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6812 	}
6813 	return (mp);
6814 }
6815 
6816 
6817 static int
6818 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
6819 {
6820 	sin_t *sin = (sin_t *)sa;
6821 	sin6_t *sin6 = (sin6_t *)sa;
6822 
6823 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
6824 
6825 	if (udp->udp_state != TS_DATA_XFER)
6826 		return (ENOTCONN);
6827 
6828 	switch (udp->udp_family) {
6829 	case AF_INET:
6830 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
6831 
6832 		if (*salenp < sizeof (sin_t))
6833 			return (EINVAL);
6834 
6835 		*salenp = sizeof (sin_t);
6836 		*sin = sin_null;
6837 		sin->sin_family = AF_INET;
6838 		sin->sin_port = udp->udp_dstport;
6839 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
6840 		break;
6841 
6842 	case AF_INET6:
6843 		if (*salenp < sizeof (sin6_t))
6844 			return (EINVAL);
6845 
6846 		*salenp = sizeof (sin6_t);
6847 		*sin6 = sin6_null;
6848 		sin6->sin6_family = AF_INET6;
6849 		sin6->sin6_port = udp->udp_dstport;
6850 		sin6->sin6_addr = udp->udp_v6dst;
6851 		sin6->sin6_flowinfo = udp->udp_flowinfo;
6852 		break;
6853 	}
6854 
6855 	return (0);
6856 }
6857 
6858 static int
6859 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
6860 {
6861 	sin_t *sin = (sin_t *)sa;
6862 	sin6_t *sin6 = (sin6_t *)sa;
6863 
6864 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
6865 
6866 	switch (udp->udp_family) {
6867 	case AF_INET:
6868 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
6869 
6870 		if (*salenp < sizeof (sin_t))
6871 			return (EINVAL);
6872 
6873 		*salenp = sizeof (sin_t);
6874 		*sin = sin_null;
6875 		sin->sin_family = AF_INET;
6876 		sin->sin_port = udp->udp_port;
6877 
6878 		/*
6879 		 * If udp_v6src is unspecified, we might be bound to broadcast
6880 		 * / multicast.  Use udp_bound_v6src as local address instead
6881 		 * (that could also still be unspecified).
6882 		 */
6883 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
6884 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
6885 			sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
6886 		} else {
6887 			sin->sin_addr.s_addr =
6888 			    V4_PART_OF_V6(udp->udp_bound_v6src);
6889 		}
6890 		break;
6891 
6892 	case AF_INET6:
6893 		if (*salenp < sizeof (sin6_t))
6894 			return (EINVAL);
6895 
6896 		*salenp = sizeof (sin6_t);
6897 		*sin6 = sin6_null;
6898 		sin6->sin6_family = AF_INET6;
6899 		sin6->sin6_port = udp->udp_port;
6900 		sin6->sin6_flowinfo = udp->udp_flowinfo;
6901 
6902 		/*
6903 		 * If udp_v6src is unspecified, we might be bound to broadcast
6904 		 * / multicast.  Use udp_bound_v6src as local address instead
6905 		 * (that could also still be unspecified).
6906 		 */
6907 		if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))
6908 			sin6->sin6_addr = udp->udp_v6src;
6909 		else
6910 			sin6->sin6_addr = udp->udp_bound_v6src;
6911 		break;
6912 	}
6913 
6914 	return (0);
6915 }
6916 
6917 /*
6918  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
6919  */
6920 static void
6921 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
6922 {
6923 	void	*data;
6924 	mblk_t	*datamp = mp->b_cont;
6925 	udp_t	*udp = Q_TO_UDP(q);
6926 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
6927 
6928 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
6929 		cmdp->cb_error = EPROTO;
6930 		qreply(q, mp);
6931 		return;
6932 	}
6933 	data = datamp->b_rptr;
6934 
6935 	rw_enter(&udp->udp_rwlock, RW_READER);
6936 	switch (cmdp->cb_cmd) {
6937 	case TI_GETPEERNAME:
6938 		cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len);
6939 		break;
6940 	case TI_GETMYNAME:
6941 		cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len);
6942 		break;
6943 	default:
6944 		cmdp->cb_error = EINVAL;
6945 		break;
6946 	}
6947 	rw_exit(&udp->udp_rwlock);
6948 
6949 	qreply(q, mp);
6950 }
6951 
6952 static void
6953 udp_disable_direct_sockfs(udp_t *udp)
6954 {
6955 	udp->udp_issocket = B_FALSE;
6956 	if (udp->udp_direct_sockfs) {
6957 		/*
6958 		 * Disable read-side synchronous stream interface and
6959 		 * drain any queued data.
6960 		 */
6961 		udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE);
6962 		ASSERT(!udp->udp_direct_sockfs);
6963 		UDP_STAT(udp->udp_us, udp_sock_fallback);
6964 	}
6965 }
6966 
6967 static void
6968 udp_wput_other(queue_t *q, mblk_t *mp)
6969 {
6970 	uchar_t	*rptr = mp->b_rptr;
6971 	struct datab *db;
6972 	struct iocblk *iocp;
6973 	cred_t	*cr;
6974 	conn_t	*connp = Q_TO_CONN(q);
6975 	udp_t	*udp = connp->conn_udp;
6976 	udp_stack_t *us;
6977 
6978 	TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START,
6979 	    "udp_wput_other_start: q %p", q);
6980 
6981 	us = udp->udp_us;
6982 	db = mp->b_datap;
6983 
6984 	switch (db->db_type) {
6985 	case M_CMD:
6986 		udp_wput_cmdblk(q, mp);
6987 		return;
6988 
6989 	case M_PROTO:
6990 	case M_PCPROTO:
6991 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
6992 			freemsg(mp);
6993 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6994 			    "udp_wput_other_end: q %p (%S)", q, "protoshort");
6995 			return;
6996 		}
6997 		switch (((t_primp_t)rptr)->type) {
6998 		case T_ADDR_REQ:
6999 			udp_addr_req(q, mp);
7000 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7001 			    "udp_wput_other_end: q %p (%S)", q, "addrreq");
7002 			return;
7003 		case O_T_BIND_REQ:
7004 		case T_BIND_REQ:
7005 			udp_tpi_bind(q, mp);
7006 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7007 			    "udp_wput_other_end: q %p (%S)", q, "bindreq");
7008 			return;
7009 		case T_CONN_REQ:
7010 			udp_tpi_connect(q, mp);
7011 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7012 			    "udp_wput_other_end: q %p (%S)", q, "connreq");
7013 			return;
7014 		case T_CAPABILITY_REQ:
7015 			udp_capability_req(q, mp);
7016 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7017 			    "udp_wput_other_end: q %p (%S)", q, "capabreq");
7018 			return;
7019 		case T_INFO_REQ:
7020 			udp_info_req(q, mp);
7021 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7022 			    "udp_wput_other_end: q %p (%S)", q, "inforeq");
7023 			return;
7024 		case T_UNITDATA_REQ:
7025 			/*
7026 			 * If a T_UNITDATA_REQ gets here, the address must
7027 			 * be bad.  Valid T_UNITDATA_REQs are handled
7028 			 * in udp_wput.
7029 			 */
7030 			udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL);
7031 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7032 			    "udp_wput_other_end: q %p (%S)", q, "unitdatareq");
7033 			return;
7034 		case T_UNBIND_REQ:
7035 			udp_tpi_unbind(q, mp);
7036 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7037 			    "udp_wput_other_end: q %p (%S)", q, "unbindreq");
7038 			return;
7039 		case T_SVR4_OPTMGMT_REQ:
7040 			/*
7041 			 * All Solaris components should pass a db_credp
7042 			 * for this TPI message, hence we ASSERT.
7043 			 * But in case there is some other M_PROTO that looks
7044 			 * like a TPI message sent by some other kernel
7045 			 * component, we check and return an error.
7046 			 */
7047 			cr = msg_getcred(mp, NULL);
7048 			ASSERT(cr != NULL);
7049 			if (cr == NULL) {
7050 				udp_err_ack(q, mp, TSYSERR, EINVAL);
7051 				return;
7052 			}
7053 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
7054 			    cr)) {
7055 				(void) svr4_optcom_req(q,
7056 				    mp, cr, &udp_opt_obj, B_TRUE);
7057 			}
7058 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7059 			    "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
7060 			return;
7061 
7062 		case T_OPTMGMT_REQ:
7063 			/*
7064 			 * All Solaris components should pass a db_credp
7065 			 * for this TPI message, hence we ASSERT.
7066 			 * But in case there is some other M_PROTO that looks
7067 			 * like a TPI message sent by some other kernel
7068 			 * component, we check and return an error.
7069 			 */
7070 			cr = msg_getcred(mp, NULL);
7071 			ASSERT(cr != NULL);
7072 			if (cr == NULL) {
7073 				udp_err_ack(q, mp, TSYSERR, EINVAL);
7074 				return;
7075 			}
7076 			(void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE);
7077 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7078 			    "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
7079 			return;
7080 
7081 		case T_DISCON_REQ:
7082 			udp_tpi_disconnect(q, mp);
7083 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7084 			    "udp_wput_other_end: q %p (%S)", q, "disconreq");
7085 			return;
7086 
7087 		/* The following TPI message is not supported by udp. */
7088 		case O_T_CONN_RES:
7089 		case T_CONN_RES:
7090 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
7091 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7092 			    "udp_wput_other_end: q %p (%S)", q,
7093 			    "connres/disconreq");
7094 			return;
7095 
7096 		/* The following 3 TPI messages are illegal for udp. */
7097 		case T_DATA_REQ:
7098 		case T_EXDATA_REQ:
7099 		case T_ORDREL_REQ:
7100 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
7101 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7102 			    "udp_wput_other_end: q %p (%S)", q,
7103 			    "data/exdata/ordrel");
7104 			return;
7105 		default:
7106 			break;
7107 		}
7108 		break;
7109 	case M_FLUSH:
7110 		if (*rptr & FLUSHW)
7111 			flushq(q, FLUSHDATA);
7112 		break;
7113 	case M_IOCTL:
7114 		iocp = (struct iocblk *)mp->b_rptr;
7115 		switch (iocp->ioc_cmd) {
7116 		case TI_GETPEERNAME:
7117 			if (udp->udp_state != TS_DATA_XFER) {
7118 				/*
7119 				 * If a default destination address has not
7120 				 * been associated with the stream, then we
7121 				 * don't know the peer's name.
7122 				 */
7123 				iocp->ioc_error = ENOTCONN;
7124 				iocp->ioc_count = 0;
7125 				mp->b_datap->db_type = M_IOCACK;
7126 				qreply(q, mp);
7127 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7128 				    "udp_wput_other_end: q %p (%S)", q,
7129 				    "getpeername");
7130 				return;
7131 			}
7132 			/* FALLTHRU */
7133 		case TI_GETMYNAME: {
7134 			/*
7135 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
7136 			 * need to copyin the user's strbuf structure.
7137 			 * Processing will continue in the M_IOCDATA case
7138 			 * below.
7139 			 */
7140 			mi_copyin(q, mp, NULL,
7141 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
7142 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7143 			    "udp_wput_other_end: q %p (%S)", q, "getmyname");
7144 			return;
7145 			}
7146 		case ND_SET:
7147 			/* nd_getset performs the necessary checking */
7148 		case ND_GET:
7149 			if (nd_getset(q, us->us_nd, mp)) {
7150 				qreply(q, mp);
7151 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7152 				    "udp_wput_other_end: q %p (%S)", q, "get");
7153 				return;
7154 			}
7155 			break;
7156 		case _SIOCSOCKFALLBACK:
7157 			/*
7158 			 * Either sockmod is about to be popped and the
7159 			 * socket would now be treated as a plain stream,
7160 			 * or a module is about to be pushed so we could
7161 			 * no longer use read-side synchronous stream.
7162 			 * Drain any queued data and disable direct sockfs
7163 			 * interface from now on.
7164 			 */
7165 			if (!udp->udp_issocket) {
7166 				DB_TYPE(mp) = M_IOCNAK;
7167 				iocp->ioc_error = EINVAL;
7168 			} else {
7169 				udp_disable_direct_sockfs(udp);
7170 
7171 				DB_TYPE(mp) = M_IOCACK;
7172 				iocp->ioc_error = 0;
7173 			}
7174 			iocp->ioc_count = 0;
7175 			iocp->ioc_rval = 0;
7176 			qreply(q, mp);
7177 			return;
7178 		default:
7179 			break;
7180 		}
7181 		break;
7182 	case M_IOCDATA:
7183 		udp_wput_iocdata(q, mp);
7184 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7185 		    "udp_wput_other_end: q %p (%S)", q, "iocdata");
7186 		return;
7187 	default:
7188 		/* Unrecognized messages are passed through without change. */
7189 		break;
7190 	}
7191 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7192 	    "udp_wput_other_end: q %p (%S)", q, "end");
7193 	ip_output(connp, mp, q, IP_WPUT);
7194 }
7195 
7196 /*
7197  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
7198  * messages.
7199  */
7200 static void
7201 udp_wput_iocdata(queue_t *q, mblk_t *mp)
7202 {
7203 	mblk_t		*mp1;
7204 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
7205 	STRUCT_HANDLE(strbuf, sb);
7206 	udp_t		*udp = Q_TO_UDP(q);
7207 	int		error;
7208 	uint_t		addrlen;
7209 
7210 	/* Make sure it is one of ours. */
7211 	switch (iocp->ioc_cmd) {
7212 	case TI_GETMYNAME:
7213 	case TI_GETPEERNAME:
7214 		break;
7215 	default:
7216 		ip_output(udp->udp_connp, mp, q, IP_WPUT);
7217 		return;
7218 	}
7219 
7220 	switch (mi_copy_state(q, mp, &mp1)) {
7221 	case -1:
7222 		return;
7223 	case MI_COPY_CASE(MI_COPY_IN, 1):
7224 		break;
7225 	case MI_COPY_CASE(MI_COPY_OUT, 1):
7226 		/*
7227 		 * The address has been copied out, so now
7228 		 * copyout the strbuf.
7229 		 */
7230 		mi_copyout(q, mp);
7231 		return;
7232 	case MI_COPY_CASE(MI_COPY_OUT, 2):
7233 		/*
7234 		 * The address and strbuf have been copied out.
7235 		 * We're done, so just acknowledge the original
7236 		 * M_IOCTL.
7237 		 */
7238 		mi_copy_done(q, mp, 0);
7239 		return;
7240 	default:
7241 		/*
7242 		 * Something strange has happened, so acknowledge
7243 		 * the original M_IOCTL with an EPROTO error.
7244 		 */
7245 		mi_copy_done(q, mp, EPROTO);
7246 		return;
7247 	}
7248 
7249 	/*
7250 	 * Now we have the strbuf structure for TI_GETMYNAME
7251 	 * and TI_GETPEERNAME.  Next we copyout the requested
7252 	 * address and then we'll copyout the strbuf.
7253 	 */
7254 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
7255 	addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t);
7256 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
7257 		mi_copy_done(q, mp, EINVAL);
7258 		return;
7259 	}
7260 
7261 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
7262 
7263 	if (mp1 == NULL)
7264 		return;
7265 
7266 	rw_enter(&udp->udp_rwlock, RW_READER);
7267 	switch (iocp->ioc_cmd) {
7268 	case TI_GETMYNAME:
7269 		error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen);
7270 		break;
7271 	case TI_GETPEERNAME:
7272 		error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen);
7273 		break;
7274 	}
7275 	rw_exit(&udp->udp_rwlock);
7276 
7277 	if (error != 0) {
7278 		mi_copy_done(q, mp, error);
7279 	} else {
7280 		mp1->b_wptr += addrlen;
7281 		STRUCT_FSET(sb, len, addrlen);
7282 
7283 		/* Copy out the address */
7284 		mi_copyout(q, mp);
7285 	}
7286 }
7287 
7288 static int
7289 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
7290     udpattrs_t *udpattrs)
7291 {
7292 	struct T_unitdata_req *udreqp;
7293 	int is_absreq_failure;
7294 	cred_t *cr;
7295 
7296 	ASSERT(((t_primp_t)mp->b_rptr)->type);
7297 
7298 	/*
7299 	 * All Solaris components should pass a db_credp
7300 	 * for this TPI message, hence we should ASSERT.
7301 	 * However, RPC (svc_clts_ksend) does this odd thing where it
7302 	 * passes the options from a T_UNITDATA_IND unchanged in a
7303 	 * T_UNITDATA_REQ. While that is the right thing to do for
7304 	 * some options, SCM_UCRED being the key one, this also makes it
7305 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
7306 	 */
7307 	cr = msg_getcred(mp, NULL);
7308 	if (cr == NULL) {
7309 		cr = Q_TO_CONN(q)->conn_cred;
7310 	}
7311 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
7312 
7313 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
7314 	    udreqp->OPT_offset, cr, &udp_opt_obj,
7315 	    udpattrs, &is_absreq_failure);
7316 
7317 	if (*errorp != 0) {
7318 		/*
7319 		 * Note: No special action needed in this
7320 		 * module for "is_absreq_failure"
7321 		 */
7322 		return (-1);		/* failure */
7323 	}
7324 	ASSERT(is_absreq_failure == 0);
7325 	return (0);	/* success */
7326 }
7327 
7328 void
7329 udp_ddi_g_init(void)
7330 {
7331 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
7332 	    udp_opt_obj.odb_opt_arr_cnt);
7333 
7334 	/*
7335 	 * We want to be informed each time a stack is created or
7336 	 * destroyed in the kernel, so we can maintain the
7337 	 * set of udp_stack_t's.
7338 	 */
7339 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
7340 }
7341 
7342 void
7343 udp_ddi_g_destroy(void)
7344 {
7345 	netstack_unregister(NS_UDP);
7346 }
7347 
7348 #define	INET_NAME	"ip"
7349 
7350 /*
7351  * Initialize the UDP stack instance.
7352  */
7353 static void *
7354 udp_stack_init(netstackid_t stackid, netstack_t *ns)
7355 {
7356 	udp_stack_t	*us;
7357 	udpparam_t	*pa;
7358 	int		i;
7359 	int		error = 0;
7360 	major_t		major;
7361 
7362 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
7363 	us->us_netstack = ns;
7364 
7365 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
7366 	us->us_epriv_ports[0] = 2049;
7367 	us->us_epriv_ports[1] = 4045;
7368 
7369 	/*
7370 	 * The smallest anonymous port in the priviledged port range which UDP
7371 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
7372 	 */
7373 	us->us_min_anonpriv_port = 512;
7374 
7375 	us->us_bind_fanout_size = udp_bind_fanout_size;
7376 
7377 	/* Roundup variable that might have been modified in /etc/system */
7378 	if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
7379 		/* Not a power of two. Round up to nearest power of two */
7380 		for (i = 0; i < 31; i++) {
7381 			if (us->us_bind_fanout_size < (1 << i))
7382 				break;
7383 		}
7384 		us->us_bind_fanout_size = 1 << i;
7385 	}
7386 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
7387 	    sizeof (udp_fanout_t), KM_SLEEP);
7388 	for (i = 0; i < us->us_bind_fanout_size; i++) {
7389 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
7390 		    NULL);
7391 	}
7392 
7393 	pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP);
7394 
7395 	us->us_param_arr = pa;
7396 	bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr));
7397 
7398 	(void) udp_param_register(&us->us_nd,
7399 	    us->us_param_arr, A_CNT(udp_param_arr));
7400 
7401 	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
7402 	us->us_mibkp = udp_kstat_init(stackid);
7403 
7404 	major = mod_name_to_major(INET_NAME);
7405 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
7406 	ASSERT(error == 0);
7407 	return (us);
7408 }
7409 
7410 /*
7411  * Free the UDP stack instance.
7412  */
7413 static void
7414 udp_stack_fini(netstackid_t stackid, void *arg)
7415 {
7416 	udp_stack_t *us = (udp_stack_t *)arg;
7417 	int i;
7418 
7419 	for (i = 0; i < us->us_bind_fanout_size; i++) {
7420 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
7421 	}
7422 
7423 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
7424 	    sizeof (udp_fanout_t));
7425 
7426 	us->us_bind_fanout = NULL;
7427 
7428 	nd_free(&us->us_nd);
7429 	kmem_free(us->us_param_arr, sizeof (udp_param_arr));
7430 	us->us_param_arr = NULL;
7431 
7432 	udp_kstat_fini(stackid, us->us_mibkp);
7433 	us->us_mibkp = NULL;
7434 
7435 	udp_kstat2_fini(stackid, us->us_kstat);
7436 	us->us_kstat = NULL;
7437 	bzero(&us->us_statistics, sizeof (us->us_statistics));
7438 
7439 	ldi_ident_release(us->us_ldi_ident);
7440 	kmem_free(us, sizeof (*us));
7441 }
7442 
7443 static void *
7444 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
7445 {
7446 	kstat_t *ksp;
7447 
7448 	udp_stat_t template = {
7449 		{ "udp_ip_send",		KSTAT_DATA_UINT64 },
7450 		{ "udp_ip_ire_send",		KSTAT_DATA_UINT64 },
7451 		{ "udp_ire_null",		KSTAT_DATA_UINT64 },
7452 		{ "udp_drain",			KSTAT_DATA_UINT64 },
7453 		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
7454 		{ "udp_rrw_busy",		KSTAT_DATA_UINT64 },
7455 		{ "udp_rrw_msgcnt",		KSTAT_DATA_UINT64 },
7456 		{ "udp_out_sw_cksum",		KSTAT_DATA_UINT64 },
7457 		{ "udp_out_sw_cksum_bytes",	KSTAT_DATA_UINT64 },
7458 		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
7459 		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
7460 		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
7461 		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
7462 		{ "udp_in_pktinfo",		KSTAT_DATA_UINT64 },
7463 		{ "udp_in_recvdstaddr",		KSTAT_DATA_UINT64 },
7464 		{ "udp_in_recvopts",		KSTAT_DATA_UINT64 },
7465 		{ "udp_in_recvif",		KSTAT_DATA_UINT64 },
7466 		{ "udp_in_recvslla",		KSTAT_DATA_UINT64 },
7467 		{ "udp_in_recvucred",		KSTAT_DATA_UINT64 },
7468 		{ "udp_in_recvttl",		KSTAT_DATA_UINT64 },
7469 		{ "udp_in_recvhopopts",		KSTAT_DATA_UINT64 },
7470 		{ "udp_in_recvhoplimit",	KSTAT_DATA_UINT64 },
7471 		{ "udp_in_recvdstopts",		KSTAT_DATA_UINT64 },
7472 		{ "udp_in_recvrtdstopts",	KSTAT_DATA_UINT64 },
7473 		{ "udp_in_recvrthdr",		KSTAT_DATA_UINT64 },
7474 		{ "udp_in_recvpktinfo",		KSTAT_DATA_UINT64 },
7475 		{ "udp_in_recvtclass",		KSTAT_DATA_UINT64 },
7476 		{ "udp_in_timestamp",		KSTAT_DATA_UINT64 },
7477 #ifdef DEBUG
7478 		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
7479 		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
7480 #endif
7481 	};
7482 
7483 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
7484 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
7485 	    KSTAT_FLAG_VIRTUAL, stackid);
7486 
7487 	if (ksp == NULL)
7488 		return (NULL);
7489 
7490 	bcopy(&template, us_statisticsp, sizeof (template));
7491 	ksp->ks_data = (void *)us_statisticsp;
7492 	ksp->ks_private = (void *)(uintptr_t)stackid;
7493 
7494 	kstat_install(ksp);
7495 	return (ksp);
7496 }
7497 
7498 static void
7499 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
7500 {
7501 	if (ksp != NULL) {
7502 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
7503 		kstat_delete_netstack(ksp, stackid);
7504 	}
7505 }
7506 
7507 static void *
7508 udp_kstat_init(netstackid_t stackid)
7509 {
7510 	kstat_t	*ksp;
7511 
7512 	udp_named_kstat_t template = {
7513 		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
7514 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
7515 		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
7516 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
7517 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
7518 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
7519 	};
7520 
7521 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
7522 	    KSTAT_TYPE_NAMED,
7523 	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
7524 
7525 	if (ksp == NULL || ksp->ks_data == NULL)
7526 		return (NULL);
7527 
7528 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
7529 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
7530 
7531 	bcopy(&template, ksp->ks_data, sizeof (template));
7532 	ksp->ks_update = udp_kstat_update;
7533 	ksp->ks_private = (void *)(uintptr_t)stackid;
7534 
7535 	kstat_install(ksp);
7536 	return (ksp);
7537 }
7538 
7539 static void
7540 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
7541 {
7542 	if (ksp != NULL) {
7543 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
7544 		kstat_delete_netstack(ksp, stackid);
7545 	}
7546 }
7547 
7548 static int
7549 udp_kstat_update(kstat_t *kp, int rw)
7550 {
7551 	udp_named_kstat_t *udpkp;
7552 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
7553 	netstack_t	*ns;
7554 	udp_stack_t	*us;
7555 
7556 	if ((kp == NULL) || (kp->ks_data == NULL))
7557 		return (EIO);
7558 
7559 	if (rw == KSTAT_WRITE)
7560 		return (EACCES);
7561 
7562 	ns = netstack_find_by_stackid(stackid);
7563 	if (ns == NULL)
7564 		return (-1);
7565 	us = ns->netstack_udp;
7566 	if (us == NULL) {
7567 		netstack_rele(ns);
7568 		return (-1);
7569 	}
7570 	udpkp = (udp_named_kstat_t *)kp->ks_data;
7571 
7572 	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
7573 	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
7574 	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
7575 	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
7576 	netstack_rele(ns);
7577 	return (0);
7578 }
7579 
7580 /*
7581  * Read-side synchronous stream info entry point, called as a
7582  * result of handling certain STREAMS ioctl operations.
7583  */
7584 static int
7585 udp_rinfop(queue_t *q, infod_t *dp)
7586 {
7587 	mblk_t	*mp;
7588 	uint_t	cmd = dp->d_cmd;
7589 	int	res = 0;
7590 	int	error = 0;
7591 	udp_t	*udp = Q_TO_UDP(q);
7592 	struct stdata *stp = STREAM(q);
7593 
7594 	mutex_enter(&udp->udp_drain_lock);
7595 	/* If shutdown on read has happened, return nothing */
7596 	mutex_enter(&stp->sd_lock);
7597 	if (stp->sd_flag & STREOF) {
7598 		mutex_exit(&stp->sd_lock);
7599 		goto done;
7600 	}
7601 	mutex_exit(&stp->sd_lock);
7602 
7603 	if ((mp = udp->udp_rcv_list_head) == NULL)
7604 		goto done;
7605 
7606 	ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL);
7607 
7608 	if (cmd & INFOD_COUNT) {
7609 		/*
7610 		 * Return the number of messages.
7611 		 */
7612 		dp->d_count += udp->udp_rcv_msgcnt;
7613 		res |= INFOD_COUNT;
7614 	}
7615 	if (cmd & INFOD_BYTES) {
7616 		/*
7617 		 * Return size of all data messages.
7618 		 */
7619 		dp->d_bytes += udp->udp_rcv_cnt;
7620 		res |= INFOD_BYTES;
7621 	}
7622 	if (cmd & INFOD_FIRSTBYTES) {
7623 		/*
7624 		 * Return size of first data message.
7625 		 */
7626 		dp->d_bytes = msgdsize(mp);
7627 		res |= INFOD_FIRSTBYTES;
7628 		dp->d_cmd &= ~INFOD_FIRSTBYTES;
7629 	}
7630 	if (cmd & INFOD_COPYOUT) {
7631 		mblk_t *mp1 = mp->b_cont;
7632 		int n;
7633 		/*
7634 		 * Return data contents of first message.
7635 		 */
7636 		ASSERT(DB_TYPE(mp1) == M_DATA);
7637 		while (mp1 != NULL && dp->d_uiop->uio_resid > 0) {
7638 			n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1));
7639 			if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n,
7640 			    UIO_READ, dp->d_uiop)) != 0) {
7641 				goto done;
7642 			}
7643 			mp1 = mp1->b_cont;
7644 		}
7645 		res |= INFOD_COPYOUT;
7646 		dp->d_cmd &= ~INFOD_COPYOUT;
7647 	}
7648 done:
7649 	mutex_exit(&udp->udp_drain_lock);
7650 
7651 	dp->d_res |= res;
7652 
7653 	return (error);
7654 }
7655 
7656 /*
7657  * Read-side synchronous stream entry point.  This is called as a result
7658  * of recv/read operation done at sockfs, and is guaranteed to execute
7659  * outside of the interrupt thread context.  It returns a single datagram
7660  * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer.
7661  */
7662 static int
7663 udp_rrw(queue_t *q, struiod_t *dp)
7664 {
7665 	mblk_t	*mp;
7666 	udp_t	*udp = Q_TO_UDP(q);
7667 	udp_stack_t *us = udp->udp_us;
7668 
7669 	/*
7670 	 * Dequeue datagram from the head of the list and return
7671 	 * it to caller; also ensure that RSLEEP sd_wakeq flag is
7672 	 * set/cleared depending on whether or not there's data
7673 	 * remaining in the list.
7674 	 */
7675 	mutex_enter(&udp->udp_drain_lock);
7676 	if (!udp->udp_direct_sockfs) {
7677 		mutex_exit(&udp->udp_drain_lock);
7678 		UDP_STAT(us, udp_rrw_busy);
7679 		return (EBUSY);
7680 	}
7681 	if ((mp = udp->udp_rcv_list_head) != NULL) {
7682 		uint_t size = msgdsize(mp);
7683 
7684 		/* Last datagram in the list? */
7685 		if ((udp->udp_rcv_list_head = mp->b_next) == NULL)
7686 			udp->udp_rcv_list_tail = NULL;
7687 		mp->b_next = NULL;
7688 
7689 		udp->udp_rcv_cnt -= size;
7690 		udp->udp_rcv_msgcnt--;
7691 		UDP_STAT(us, udp_rrw_msgcnt);
7692 
7693 		/* No longer flow-controlling? */
7694 		if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat &&
7695 		    udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat)
7696 			udp->udp_drain_qfull = B_FALSE;
7697 	}
7698 	if (udp->udp_rcv_list_head == NULL) {
7699 		/*
7700 		 * Either we just dequeued the last datagram or
7701 		 * we get here from sockfs and have nothing to
7702 		 * return; in this case clear RSLEEP.
7703 		 */
7704 		ASSERT(udp->udp_rcv_cnt == 0);
7705 		ASSERT(udp->udp_rcv_msgcnt == 0);
7706 		ASSERT(udp->udp_rcv_list_tail == NULL);
7707 		STR_WAKEUP_CLEAR(STREAM(q));
7708 	} else {
7709 		/*
7710 		 * More data follows; we need udp_rrw() to be
7711 		 * called in future to pick up the rest.
7712 		 */
7713 		STR_WAKEUP_SET(STREAM(q));
7714 	}
7715 	mutex_exit(&udp->udp_drain_lock);
7716 	dp->d_mp = mp;
7717 	return (0);
7718 }
7719 
7720 /*
7721  * Enqueue a completely-built T_UNITDATA_IND message into the receive
7722  * list; this is typically executed within the interrupt thread context
7723  * and so we do things as quickly as possible.
7724  */
7725 static void
7726 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len)
7727 {
7728 	ASSERT(q == RD(q));
7729 	ASSERT(pkt_len == msgdsize(mp));
7730 	ASSERT(mp->b_next == NULL && mp->b_cont != NULL);
7731 	ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA);
7732 	ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind));
7733 
7734 	mutex_enter(&udp->udp_drain_lock);
7735 	/*
7736 	 * Wake up and signal the receiving app; it is okay to do this
7737 	 * before enqueueing the mp because we are holding the drain lock.
7738 	 * One of the advantages of synchronous stream is the ability for
7739 	 * us to find out when the application performs a read on the
7740 	 * socket by way of udp_rrw() entry point being called.  We need
7741 	 * to generate SIGPOLL/SIGIO for each received data in the case
7742 	 * of asynchronous socket just as in the strrput() case.  However,
7743 	 * we only wake the application up when necessary, i.e. during the
7744 	 * first enqueue.  When udp_rrw() is called, we send up a single
7745 	 * datagram upstream and call STR_WAKEUP_SET() again when there
7746 	 * are still data remaining in our receive queue.
7747 	 */
7748 	STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head);
7749 	if (udp->udp_rcv_list_head == NULL)
7750 		udp->udp_rcv_list_head = mp;
7751 	else
7752 		udp->udp_rcv_list_tail->b_next = mp;
7753 	udp->udp_rcv_list_tail = mp;
7754 	udp->udp_rcv_cnt += pkt_len;
7755 	udp->udp_rcv_msgcnt++;
7756 
7757 	/* Need to flow-control? */
7758 	if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat ||
7759 	    udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat)
7760 		udp->udp_drain_qfull = B_TRUE;
7761 
7762 	mutex_exit(&udp->udp_drain_lock);
7763 }
7764 
7765 /*
7766  * Drain the contents of receive list to the module upstream; we do
7767  * this during close or when we fallback to the slow mode due to
7768  * sockmod being popped or a module being pushed on top of us.
7769  */
7770 static void
7771 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing)
7772 {
7773 	mblk_t *mp;
7774 	udp_stack_t *us = udp->udp_us;
7775 
7776 	mutex_enter(&udp->udp_drain_lock);
7777 	/*
7778 	 * There is no race with a concurrent udp_input() sending
7779 	 * up packets using putnext() after we have cleared the
7780 	 * udp_direct_sockfs flag but before we have completed
7781 	 * sending up the packets in udp_rcv_list, since we are
7782 	 * either a writer or we have quiesced the conn.
7783 	 */
7784 	udp->udp_direct_sockfs = B_FALSE;
7785 	mutex_exit(&udp->udp_drain_lock);
7786 
7787 	if (udp->udp_rcv_list_head != NULL)
7788 		UDP_STAT(us, udp_drain);
7789 
7790 	/*
7791 	 * Send up everything via putnext(); note here that we
7792 	 * don't need the udp_drain_lock to protect us since
7793 	 * nothing can enter udp_rrw() and that we currently
7794 	 * have exclusive access to this udp.
7795 	 */
7796 	while ((mp = udp->udp_rcv_list_head) != NULL) {
7797 		udp->udp_rcv_list_head = mp->b_next;
7798 		mp->b_next = NULL;
7799 		udp->udp_rcv_cnt -= msgdsize(mp);
7800 		udp->udp_rcv_msgcnt--;
7801 		if (closing) {
7802 			freemsg(mp);
7803 		} else {
7804 			ASSERT(q == RD(q));
7805 			putnext(q, mp);
7806 		}
7807 	}
7808 	ASSERT(udp->udp_rcv_cnt == 0);
7809 	ASSERT(udp->udp_rcv_msgcnt == 0);
7810 	ASSERT(udp->udp_rcv_list_head == NULL);
7811 	udp->udp_rcv_list_tail = NULL;
7812 	udp->udp_drain_qfull = B_FALSE;
7813 }
7814 
7815 static size_t
7816 udp_set_rcv_hiwat(udp_t *udp, size_t size)
7817 {
7818 	udp_stack_t *us = udp->udp_us;
7819 
7820 	/* We add a bit of extra buffering */
7821 	size += size >> 1;
7822 	if (size > us->us_max_buf)
7823 		size = us->us_max_buf;
7824 
7825 	udp->udp_rcv_hiwat = size;
7826 	return (size);
7827 }
7828 
7829 /*
7830  * For the lower queue so that UDP can be a dummy mux.
7831  * Nobody should be sending
7832  * packets up this stream
7833  */
7834 static void
7835 udp_lrput(queue_t *q, mblk_t *mp)
7836 {
7837 	mblk_t *mp1;
7838 
7839 	switch (mp->b_datap->db_type) {
7840 	case M_FLUSH:
7841 		/* Turn around */
7842 		if (*mp->b_rptr & FLUSHW) {
7843 			*mp->b_rptr &= ~FLUSHR;
7844 			qreply(q, mp);
7845 			return;
7846 		}
7847 		break;
7848 	}
7849 	/* Could receive messages that passed through ar_rput */
7850 	for (mp1 = mp; mp1; mp1 = mp1->b_cont)
7851 		mp1->b_prev = mp1->b_next = NULL;
7852 	freemsg(mp);
7853 }
7854 
7855 /*
7856  * For the lower queue so that UDP can be a dummy mux.
7857  * Nobody should be sending packets down this stream.
7858  */
7859 /* ARGSUSED */
7860 void
7861 udp_lwput(queue_t *q, mblk_t *mp)
7862 {
7863 	freemsg(mp);
7864 }
7865 
7866 /*
7867  * Below routines for UDP socket module.
7868  */
7869 
7870 static conn_t *
7871 udp_do_open(cred_t *credp, boolean_t isv6, int flags)
7872 {
7873 	udp_t		*udp;
7874 	conn_t		*connp;
7875 	zoneid_t 	zoneid;
7876 	netstack_t 	*ns;
7877 	udp_stack_t 	*us;
7878 
7879 	ns = netstack_find_by_cred(credp);
7880 	ASSERT(ns != NULL);
7881 	us = ns->netstack_udp;
7882 	ASSERT(us != NULL);
7883 
7884 	/*
7885 	 * For exclusive stacks we set the zoneid to zero
7886 	 * to make UDP operate as if in the global zone.
7887 	 */
7888 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
7889 		zoneid = GLOBAL_ZONEID;
7890 	else
7891 		zoneid = crgetzoneid(credp);
7892 
7893 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
7894 
7895 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
7896 	if (connp == NULL) {
7897 		netstack_rele(ns);
7898 		return (NULL);
7899 	}
7900 	udp = connp->conn_udp;
7901 
7902 	/*
7903 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
7904 	 * done by netstack_find_by_cred()
7905 	 */
7906 	netstack_rele(ns);
7907 
7908 	rw_enter(&udp->udp_rwlock, RW_WRITER);
7909 	ASSERT(connp->conn_ulp == IPPROTO_UDP);
7910 	ASSERT(connp->conn_udp == udp);
7911 	ASSERT(udp->udp_connp == connp);
7912 
7913 	/* Set the initial state of the stream and the privilege status. */
7914 	udp->udp_state = TS_UNBND;
7915 	if (isv6) {
7916 		udp->udp_family = AF_INET6;
7917 		udp->udp_ipversion = IPV6_VERSION;
7918 		udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
7919 		udp->udp_ttl = us->us_ipv6_hoplimit;
7920 		connp->conn_af_isv6 = B_TRUE;
7921 		connp->conn_flags |= IPCL_ISV6;
7922 	} else {
7923 		udp->udp_family = AF_INET;
7924 		udp->udp_ipversion = IPV4_VERSION;
7925 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
7926 		udp->udp_ttl = us->us_ipv4_ttl;
7927 		connp->conn_af_isv6 = B_FALSE;
7928 		connp->conn_flags &= ~IPCL_ISV6;
7929 	}
7930 
7931 	udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
7932 	udp->udp_pending_op = -1;
7933 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
7934 	connp->conn_zoneid = zoneid;
7935 
7936 	udp->udp_open_time = lbolt64;
7937 	udp->udp_open_pid = curproc->p_pid;
7938 
7939 	/*
7940 	 * If the caller has the process-wide flag set, then default to MAC
7941 	 * exempt mode.  This allows read-down to unlabeled hosts.
7942 	 */
7943 	if (getpflags(NET_MAC_AWARE, credp) != 0)
7944 		connp->conn_mac_exempt = B_TRUE;
7945 
7946 	connp->conn_ulp_labeled = is_system_labeled();
7947 
7948 	udp->udp_us = us;
7949 
7950 	connp->conn_recv = udp_input;
7951 	crhold(credp);
7952 	connp->conn_cred = credp;
7953 
7954 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
7955 
7956 	rw_exit(&udp->udp_rwlock);
7957 
7958 	return (connp);
7959 }
7960 
7961 /* ARGSUSED */
7962 sock_lower_handle_t
7963 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
7964     uint_t *smodep, int *errorp, int flags, cred_t *credp)
7965 {
7966 	udp_t		*udp = NULL;
7967 	udp_stack_t	*us;
7968 	conn_t		*connp;
7969 	boolean_t	isv6;
7970 
7971 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
7972 	    (proto != 0 && proto != IPPROTO_UDP)) {
7973 		*errorp = EPROTONOSUPPORT;
7974 		return (NULL);
7975 	}
7976 
7977 	if (family == AF_INET6)
7978 		isv6 = B_TRUE;
7979 	else
7980 		isv6 = B_FALSE;
7981 
7982 	connp = udp_do_open(credp, isv6, flags);
7983 	if (connp == NULL) {
7984 		*errorp = ENOMEM;
7985 		return (NULL);
7986 	}
7987 
7988 	udp = connp->conn_udp;
7989 	ASSERT(udp != NULL);
7990 	us = udp->udp_us;
7991 	ASSERT(us != NULL);
7992 
7993 	connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET;
7994 
7995 	/* Set flow control */
7996 	rw_enter(&udp->udp_rwlock, RW_WRITER);
7997 	(void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat);
7998 	udp->udp_rcv_disply_hiwat = us->us_recv_hiwat;
7999 	udp->udp_rcv_lowat = udp_mod_info.mi_lowat;
8000 	udp->udp_xmit_hiwat = us->us_xmit_hiwat;
8001 	udp->udp_xmit_lowat = us->us_xmit_lowat;
8002 
8003 	if (udp->udp_family == AF_INET6) {
8004 		/* Build initial header template for transmit */
8005 		if ((*errorp = udp_build_hdrs(udp)) != 0) {
8006 			rw_exit(&udp->udp_rwlock);
8007 			ipcl_conn_destroy(connp);
8008 			return (NULL);
8009 		}
8010 	}
8011 	rw_exit(&udp->udp_rwlock);
8012 
8013 	connp->conn_flow_cntrld = B_FALSE;
8014 
8015 	ASSERT(us->us_ldi_ident != NULL);
8016 
8017 	if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) {
8018 		ip1dbg(("udp_create: create of IP helper stream failed\n"));
8019 		udp_do_close(connp);
8020 		return (NULL);
8021 	}
8022 
8023 	/* Set the send flow control */
8024 	connp->conn_wq->q_hiwat = us->us_xmit_hiwat;
8025 	connp->conn_wq->q_lowat = us->us_xmit_lowat;
8026 
8027 	mutex_enter(&connp->conn_lock);
8028 	connp->conn_state_flags &= ~CONN_INCIPIENT;
8029 	mutex_exit(&connp->conn_lock);
8030 
8031 	*errorp = 0;
8032 	*smodep = SM_ATOMIC;
8033 	*sock_downcalls = &sock_udp_downcalls;
8034 	return ((sock_lower_handle_t)connp);
8035 }
8036 
8037 /* ARGSUSED */
8038 void
8039 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
8040     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
8041 {
8042 	conn_t 		*connp = (conn_t *)proto_handle;
8043 	udp_t 		*udp = connp->conn_udp;
8044 	udp_stack_t	*us = udp->udp_us;
8045 	struct sock_proto_props sopp;
8046 
8047 	/* All Solaris components should pass a cred for this operation. */
8048 	ASSERT(cr != NULL);
8049 
8050 	connp->conn_upcalls = sock_upcalls;
8051 	connp->conn_upper_handle = sock_handle;
8052 
8053 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
8054 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
8055 	sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
8056 	sopp.sopp_maxblk = INFPSZ;
8057 	sopp.sopp_rxhiwat = udp->udp_rcv_hiwat;
8058 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
8059 	sopp.sopp_maxpsz =
8060 	    (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
8061 	    UDP_MAXPACKET_IPV6;
8062 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
8063 	    udp_mod_info.mi_minpsz;
8064 
8065 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
8066 	    &sopp);
8067 }
8068 
8069 static void
8070 udp_do_close(conn_t *connp)
8071 {
8072 	udp_t	*udp;
8073 
8074 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
8075 	udp = connp->conn_udp;
8076 
8077 	udp_quiesce_conn(connp);
8078 	ip_quiesce_conn(connp);
8079 
8080 	if (!IPCL_IS_NONSTR(connp)) {
8081 		/*
8082 		 * Disable read-side synchronous stream
8083 		 * interface and drain any queued data.
8084 		 */
8085 		ASSERT(connp->conn_wq != NULL);
8086 		udp_rcv_drain(connp->conn_wq, udp, B_TRUE);
8087 		ASSERT(!udp->udp_direct_sockfs);
8088 
8089 		ASSERT(connp->conn_rq != NULL);
8090 		qprocsoff(connp->conn_rq);
8091 	}
8092 
8093 	ASSERT(udp->udp_rcv_cnt == 0);
8094 	ASSERT(udp->udp_rcv_msgcnt == 0);
8095 	ASSERT(udp->udp_rcv_list_head == NULL);
8096 	ASSERT(udp->udp_rcv_list_tail == NULL);
8097 
8098 	udp_close_free(connp);
8099 
8100 	/*
8101 	 * Now we are truly single threaded on this stream, and can
8102 	 * delete the things hanging off the connp, and finally the connp.
8103 	 * We removed this connp from the fanout list, it cannot be
8104 	 * accessed thru the fanouts, and we already waited for the
8105 	 * conn_ref to drop to 0. We are already in close, so
8106 	 * there cannot be any other thread from the top. qprocsoff
8107 	 * has completed, and service has completed or won't run in
8108 	 * future.
8109 	 */
8110 	ASSERT(connp->conn_ref == 1);
8111 	if (!IPCL_IS_NONSTR(connp)) {
8112 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
8113 	} else {
8114 		ip_free_helper_stream(connp);
8115 	}
8116 
8117 	connp->conn_ref--;
8118 	ipcl_conn_destroy(connp);
8119 }
8120 
8121 /* ARGSUSED */
8122 int
8123 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
8124 {
8125 	conn_t	*connp = (conn_t *)proto_handle;
8126 
8127 	/* All Solaris components should pass a cred for this operation. */
8128 	ASSERT(cr != NULL);
8129 
8130 	udp_do_close(connp);
8131 	return (0);
8132 }
8133 
8134 static int
8135 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
8136     boolean_t bind_to_req_port_only)
8137 {
8138 	sin_t		*sin;
8139 	sin6_t		*sin6;
8140 	sin6_t		sin6addr;
8141 	in_port_t	port;		/* Host byte order */
8142 	in_port_t	requested_port;	/* Host byte order */
8143 	int		count;
8144 	in6_addr_t	v6src;
8145 	int		loopmax;
8146 	udp_fanout_t	*udpf;
8147 	in_port_t	lport;		/* Network byte order */
8148 	zoneid_t	zoneid;
8149 	udp_t		*udp;
8150 	boolean_t	is_inaddr_any;
8151 	mlp_type_t	addrtype, mlptype;
8152 	udp_stack_t	*us;
8153 	int		error = 0;
8154 	mblk_t		*mp = NULL;
8155 
8156 	udp = connp->conn_udp;
8157 	us = udp->udp_us;
8158 
8159 	if (udp->udp_state != TS_UNBND) {
8160 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8161 		    "udp_bind: bad state, %u", udp->udp_state);
8162 		return (-TOUTSTATE);
8163 	}
8164 
8165 	switch (len) {
8166 	case 0:
8167 		if (udp->udp_family == AF_INET) {
8168 			sin = (sin_t *)&sin6addr;
8169 			*sin = sin_null;
8170 			sin->sin_family = AF_INET;
8171 			sin->sin_addr.s_addr = INADDR_ANY;
8172 			udp->udp_ipversion = IPV4_VERSION;
8173 		} else {
8174 			ASSERT(udp->udp_family == AF_INET6);
8175 			sin6 = (sin6_t *)&sin6addr;
8176 			*sin6 = sin6_null;
8177 			sin6->sin6_family = AF_INET6;
8178 			V6_SET_ZERO(sin6->sin6_addr);
8179 			udp->udp_ipversion = IPV6_VERSION;
8180 		}
8181 		port = 0;
8182 		break;
8183 
8184 	case sizeof (sin_t):	/* Complete IPv4 address */
8185 		sin = (sin_t *)sa;
8186 
8187 		if (sin == NULL || !OK_32PTR((char *)sin))
8188 			return (EINVAL);
8189 
8190 		if (udp->udp_family != AF_INET ||
8191 		    sin->sin_family != AF_INET) {
8192 			return (EAFNOSUPPORT);
8193 		}
8194 		port = ntohs(sin->sin_port);
8195 		break;
8196 
8197 	case sizeof (sin6_t):	/* complete IPv6 address */
8198 		sin6 = (sin6_t *)sa;
8199 
8200 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
8201 			return (EINVAL);
8202 
8203 		if (udp->udp_family != AF_INET6 ||
8204 		    sin6->sin6_family != AF_INET6) {
8205 			return (EAFNOSUPPORT);
8206 		}
8207 		port = ntohs(sin6->sin6_port);
8208 		break;
8209 
8210 	default:		/* Invalid request */
8211 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8212 		    "udp_bind: bad ADDR_length length %u", len);
8213 		return (-TBADADDR);
8214 	}
8215 
8216 	requested_port = port;
8217 
8218 	if (requested_port == 0 || !bind_to_req_port_only)
8219 		bind_to_req_port_only = B_FALSE;
8220 	else		/* T_BIND_REQ and requested_port != 0 */
8221 		bind_to_req_port_only = B_TRUE;
8222 
8223 	if (requested_port == 0) {
8224 		/*
8225 		 * If the application passed in zero for the port number, it
8226 		 * doesn't care which port number we bind to. Get one in the
8227 		 * valid range.
8228 		 */
8229 		if (udp->udp_anon_priv_bind) {
8230 			port = udp_get_next_priv_port(udp);
8231 		} else {
8232 			port = udp_update_next_port(udp,
8233 			    us->us_next_port_to_try, B_TRUE);
8234 		}
8235 	} else {
8236 		/*
8237 		 * If the port is in the well-known privileged range,
8238 		 * make sure the caller was privileged.
8239 		 */
8240 		int i;
8241 		boolean_t priv = B_FALSE;
8242 
8243 		if (port < us->us_smallest_nonpriv_port) {
8244 			priv = B_TRUE;
8245 		} else {
8246 			for (i = 0; i < us->us_num_epriv_ports; i++) {
8247 				if (port == us->us_epriv_ports[i]) {
8248 					priv = B_TRUE;
8249 					break;
8250 				}
8251 			}
8252 		}
8253 
8254 		if (priv) {
8255 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
8256 				return (-TACCES);
8257 		}
8258 	}
8259 
8260 	if (port == 0)
8261 		return (-TNOADDR);
8262 
8263 	/*
8264 	 * The state must be TS_UNBND. TPI mandates that users must send
8265 	 * TPI primitives only 1 at a time and wait for the response before
8266 	 * sending the next primitive.
8267 	 */
8268 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8269 	if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
8270 		rw_exit(&udp->udp_rwlock);
8271 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8272 		    "udp_bind: bad state, %u", udp->udp_state);
8273 		return (-TOUTSTATE);
8274 	}
8275 	/* XXX how to remove the T_BIND_REQ? Should set it before calling */
8276 	udp->udp_pending_op = T_BIND_REQ;
8277 	/*
8278 	 * Copy the source address into our udp structure. This address
8279 	 * may still be zero; if so, IP will fill in the correct address
8280 	 * each time an outbound packet is passed to it. Since the udp is
8281 	 * not yet in the bind hash list, we don't grab the uf_lock to
8282 	 * change udp_ipversion
8283 	 */
8284 	if (udp->udp_family == AF_INET) {
8285 		ASSERT(sin != NULL);
8286 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
8287 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
8288 		    udp->udp_ip_snd_options_len;
8289 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
8290 	} else {
8291 		ASSERT(sin6 != NULL);
8292 		v6src = sin6->sin6_addr;
8293 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
8294 			/*
8295 			 * no need to hold the uf_lock to set the udp_ipversion
8296 			 * since we are not yet in the fanout list
8297 			 */
8298 			udp->udp_ipversion = IPV4_VERSION;
8299 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
8300 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
8301 		} else {
8302 			udp->udp_ipversion = IPV6_VERSION;
8303 			udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
8304 		}
8305 	}
8306 
8307 	/*
8308 	 * If udp_reuseaddr is not set, then we have to make sure that
8309 	 * the IP address and port number the application requested
8310 	 * (or we selected for the application) is not being used by
8311 	 * another stream.  If another stream is already using the
8312 	 * requested IP address and port, the behavior depends on
8313 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
8314 	 * search for any an unused port to bind to the the stream.
8315 	 *
8316 	 * As per the BSD semantics, as modified by the Deering multicast
8317 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
8318 	 * to the same port independent of the local IP address.
8319 	 *
8320 	 * This is slightly different than in SunOS 4.X which did not
8321 	 * support IP multicast. Note that the change implemented by the
8322 	 * Deering multicast code effects all binds - not only binding
8323 	 * to IP multicast addresses.
8324 	 *
8325 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
8326 	 * order to guarantee a unique port.
8327 	 */
8328 
8329 	count = 0;
8330 	if (udp->udp_anon_priv_bind) {
8331 		/*
8332 		 * loopmax = (IPPORT_RESERVED-1) -
8333 		 *    us->us_min_anonpriv_port + 1
8334 		 */
8335 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
8336 	} else {
8337 		loopmax = us->us_largest_anon_port -
8338 		    us->us_smallest_anon_port + 1;
8339 	}
8340 
8341 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
8342 	zoneid = connp->conn_zoneid;
8343 
8344 	for (;;) {
8345 		udp_t		*udp1;
8346 		boolean_t	found_exclbind = B_FALSE;
8347 
8348 		/*
8349 		 * Walk through the list of udp streams bound to
8350 		 * requested port with the same IP address.
8351 		 */
8352 		lport = htons(port);
8353 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
8354 		    us->us_bind_fanout_size)];
8355 		mutex_enter(&udpf->uf_lock);
8356 		for (udp1 = udpf->uf_udp; udp1 != NULL;
8357 		    udp1 = udp1->udp_bind_hash) {
8358 			if (lport != udp1->udp_port)
8359 				continue;
8360 
8361 			/*
8362 			 * On a labeled system, we must treat bindings to ports
8363 			 * on shared IP addresses by sockets with MAC exemption
8364 			 * privilege as being in all zones, as there's
8365 			 * otherwise no way to identify the right receiver.
8366 			 */
8367 			if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) ||
8368 			    IPCL_ZONE_MATCH(connp,
8369 			    udp1->udp_connp->conn_zoneid)) &&
8370 			    !connp->conn_mac_exempt && \
8371 			    !udp1->udp_connp->conn_mac_exempt)
8372 				continue;
8373 
8374 			/*
8375 			 * If UDP_EXCLBIND is set for either the bound or
8376 			 * binding endpoint, the semantics of bind
8377 			 * is changed according to the following chart.
8378 			 *
8379 			 * spec = specified address (v4 or v6)
8380 			 * unspec = unspecified address (v4 or v6)
8381 			 * A = specified addresses are different for endpoints
8382 			 *
8383 			 * bound	bind to		allowed?
8384 			 * -------------------------------------
8385 			 * unspec	unspec		no
8386 			 * unspec	spec		no
8387 			 * spec		unspec		no
8388 			 * spec		spec		yes if A
8389 			 *
8390 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
8391 			 * as UDP_EXCLBIND, except that zoneid is ignored.
8392 			 */
8393 			if (udp1->udp_exclbind || udp->udp_exclbind ||
8394 			    udp1->udp_connp->conn_mac_exempt ||
8395 			    connp->conn_mac_exempt) {
8396 				if (V6_OR_V4_INADDR_ANY(
8397 				    udp1->udp_bound_v6src) ||
8398 				    is_inaddr_any ||
8399 				    IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8400 				    &v6src)) {
8401 					found_exclbind = B_TRUE;
8402 					break;
8403 				}
8404 				continue;
8405 			}
8406 
8407 			/*
8408 			 * Check ipversion to allow IPv4 and IPv6 sockets to
8409 			 * have disjoint port number spaces.
8410 			 */
8411 			if (udp->udp_ipversion != udp1->udp_ipversion) {
8412 
8413 				/*
8414 				 * On the first time through the loop, if the
8415 				 * the user intentionally specified a
8416 				 * particular port number, then ignore any
8417 				 * bindings of the other protocol that may
8418 				 * conflict. This allows the user to bind IPv6
8419 				 * alone and get both v4 and v6, or bind both
8420 				 * both and get each seperately. On subsequent
8421 				 * times through the loop, we're checking a
8422 				 * port that we chose (not the user) and thus
8423 				 * we do not allow casual duplicate bindings.
8424 				 */
8425 				if (count == 0 && requested_port != 0)
8426 					continue;
8427 			}
8428 
8429 			/*
8430 			 * No difference depending on SO_REUSEADDR.
8431 			 *
8432 			 * If existing port is bound to a
8433 			 * non-wildcard IP address and
8434 			 * the requesting stream is bound to
8435 			 * a distinct different IP addresses
8436 			 * (non-wildcard, also), keep going.
8437 			 */
8438 			if (!is_inaddr_any &&
8439 			    !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
8440 			    !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8441 			    &v6src)) {
8442 				continue;
8443 			}
8444 			break;
8445 		}
8446 
8447 		if (!found_exclbind &&
8448 		    (udp->udp_reuseaddr && requested_port != 0)) {
8449 			break;
8450 		}
8451 
8452 		if (udp1 == NULL) {
8453 			/*
8454 			 * No other stream has this IP address
8455 			 * and port number. We can use it.
8456 			 */
8457 			break;
8458 		}
8459 		mutex_exit(&udpf->uf_lock);
8460 		if (bind_to_req_port_only) {
8461 			/*
8462 			 * We get here only when requested port
8463 			 * is bound (and only first  of the for()
8464 			 * loop iteration).
8465 			 *
8466 			 * The semantics of this bind request
8467 			 * require it to fail so we return from
8468 			 * the routine (and exit the loop).
8469 			 *
8470 			 */
8471 			udp->udp_pending_op = -1;
8472 			rw_exit(&udp->udp_rwlock);
8473 			return (-TADDRBUSY);
8474 		}
8475 
8476 		if (udp->udp_anon_priv_bind) {
8477 			port = udp_get_next_priv_port(udp);
8478 		} else {
8479 			if ((count == 0) && (requested_port != 0)) {
8480 				/*
8481 				 * If the application wants us to find
8482 				 * a port, get one to start with. Set
8483 				 * requested_port to 0, so that we will
8484 				 * update us->us_next_port_to_try below.
8485 				 */
8486 				port = udp_update_next_port(udp,
8487 				    us->us_next_port_to_try, B_TRUE);
8488 				requested_port = 0;
8489 			} else {
8490 				port = udp_update_next_port(udp, port + 1,
8491 				    B_FALSE);
8492 			}
8493 		}
8494 
8495 		if (port == 0 || ++count >= loopmax) {
8496 			/*
8497 			 * We've tried every possible port number and
8498 			 * there are none available, so send an error
8499 			 * to the user.
8500 			 */
8501 			udp->udp_pending_op = -1;
8502 			rw_exit(&udp->udp_rwlock);
8503 			return (-TNOADDR);
8504 		}
8505 	}
8506 
8507 	/*
8508 	 * Copy the source address into our udp structure.  This address
8509 	 * may still be zero; if so, ip will fill in the correct address
8510 	 * each time an outbound packet is passed to it.
8511 	 * If we are binding to a broadcast or multicast address then
8512 	 * udp_post_ip_bind_connect will clear the source address
8513 	 * when udp_do_bind success.
8514 	 */
8515 	udp->udp_v6src = udp->udp_bound_v6src = v6src;
8516 	udp->udp_port = lport;
8517 	/*
8518 	 * Now reset the the next anonymous port if the application requested
8519 	 * an anonymous port, or we handed out the next anonymous port.
8520 	 */
8521 	if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
8522 		us->us_next_port_to_try = port + 1;
8523 	}
8524 
8525 	/* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
8526 	if (udp->udp_family == AF_INET) {
8527 		sin->sin_port = udp->udp_port;
8528 	} else {
8529 		sin6->sin6_port = udp->udp_port;
8530 		/* Rebuild the header template */
8531 		error = udp_build_hdrs(udp);
8532 		if (error != 0) {
8533 			udp->udp_pending_op = -1;
8534 			rw_exit(&udp->udp_rwlock);
8535 			mutex_exit(&udpf->uf_lock);
8536 			return (error);
8537 		}
8538 	}
8539 	udp->udp_state = TS_IDLE;
8540 	udp_bind_hash_insert(udpf, udp);
8541 	mutex_exit(&udpf->uf_lock);
8542 	rw_exit(&udp->udp_rwlock);
8543 
8544 	if (cl_inet_bind) {
8545 		/*
8546 		 * Running in cluster mode - register bind information
8547 		 */
8548 		if (udp->udp_ipversion == IPV4_VERSION) {
8549 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
8550 			    IPPROTO_UDP, AF_INET,
8551 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
8552 			    (in_port_t)udp->udp_port, NULL);
8553 		} else {
8554 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
8555 			    IPPROTO_UDP, AF_INET6,
8556 			    (uint8_t *)&(udp->udp_v6src),
8557 			    (in_port_t)udp->udp_port, NULL);
8558 		}
8559 	}
8560 
8561 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
8562 	if (is_system_labeled() && (!connp->conn_anon_port ||
8563 	    connp->conn_anon_mlp)) {
8564 		uint16_t mlpport;
8565 		zone_t *zone;
8566 
8567 		zone = crgetzone(cr);
8568 		connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
8569 		    mlptSingle;
8570 		addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
8571 		    &v6src, us->us_netstack->netstack_ip);
8572 		if (addrtype == mlptSingle) {
8573 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8574 			udp->udp_pending_op = -1;
8575 			rw_exit(&udp->udp_rwlock);
8576 			connp->conn_anon_port = B_FALSE;
8577 			connp->conn_mlp_type = mlptSingle;
8578 			return (-TNOADDR);
8579 		}
8580 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
8581 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
8582 		    addrtype);
8583 		if (mlptype != mlptSingle &&
8584 		    (connp->conn_mlp_type == mlptSingle ||
8585 		    secpolicy_net_bindmlp(cr) != 0)) {
8586 			if (udp->udp_debug) {
8587 				(void) strlog(UDP_MOD_ID, 0, 1,
8588 				    SL_ERROR|SL_TRACE,
8589 				    "udp_bind: no priv for multilevel port %d",
8590 				    mlpport);
8591 			}
8592 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8593 			udp->udp_pending_op = -1;
8594 			rw_exit(&udp->udp_rwlock);
8595 			connp->conn_anon_port = B_FALSE;
8596 			connp->conn_mlp_type = mlptSingle;
8597 			return (-TACCES);
8598 		}
8599 
8600 		/*
8601 		 * If we're specifically binding a shared IP address and the
8602 		 * port is MLP on shared addresses, then check to see if this
8603 		 * zone actually owns the MLP.  Reject if not.
8604 		 */
8605 		if (mlptype == mlptShared && addrtype == mlptShared) {
8606 			/*
8607 			 * No need to handle exclusive-stack zones since
8608 			 * ALL_ZONES only applies to the shared stack.
8609 			 */
8610 			zoneid_t mlpzone;
8611 
8612 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
8613 			    htons(mlpport));
8614 			if (connp->conn_zoneid != mlpzone) {
8615 				if (udp->udp_debug) {
8616 					(void) strlog(UDP_MOD_ID, 0, 1,
8617 					    SL_ERROR|SL_TRACE,
8618 					    "udp_bind: attempt to bind port "
8619 					    "%d on shared addr in zone %d "
8620 					    "(should be %d)",
8621 					    mlpport, connp->conn_zoneid,
8622 					    mlpzone);
8623 				}
8624 				rw_enter(&udp->udp_rwlock, RW_WRITER);
8625 				udp->udp_pending_op = -1;
8626 				rw_exit(&udp->udp_rwlock);
8627 				connp->conn_anon_port = B_FALSE;
8628 				connp->conn_mlp_type = mlptSingle;
8629 				return (-TACCES);
8630 			}
8631 		}
8632 		if (connp->conn_anon_port) {
8633 			error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
8634 			    port, B_TRUE);
8635 			if (error != 0) {
8636 				if (udp->udp_debug) {
8637 					(void) strlog(UDP_MOD_ID, 0, 1,
8638 					    SL_ERROR|SL_TRACE,
8639 					    "udp_bind: cannot establish anon "
8640 					    "MLP for port %d", port);
8641 				}
8642 				rw_enter(&udp->udp_rwlock, RW_WRITER);
8643 				udp->udp_pending_op = -1;
8644 				rw_exit(&udp->udp_rwlock);
8645 				connp->conn_anon_port = B_FALSE;
8646 				connp->conn_mlp_type = mlptSingle;
8647 				return (-TACCES);
8648 			}
8649 		}
8650 		connp->conn_mlp_type = mlptype;
8651 	}
8652 
8653 	if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
8654 		/*
8655 		 * Append a request for an IRE if udp_v6src not
8656 		 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
8657 		 */
8658 		mp = allocb(sizeof (ire_t), BPRI_HI);
8659 		if (!mp) {
8660 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8661 			udp->udp_pending_op = -1;
8662 			rw_exit(&udp->udp_rwlock);
8663 			return (ENOMEM);
8664 		}
8665 		mp->b_wptr += sizeof (ire_t);
8666 		mp->b_datap->db_type = IRE_DB_REQ_TYPE;
8667 	}
8668 	if (udp->udp_family == AF_INET6) {
8669 		ASSERT(udp->udp_connp->conn_af_isv6);
8670 		error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP,
8671 		    &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
8672 	} else {
8673 		ASSERT(!udp->udp_connp->conn_af_isv6);
8674 		error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP,
8675 		    V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port,
8676 		    B_TRUE);
8677 	}
8678 
8679 	(void) udp_post_ip_bind_connect(udp, mp, error);
8680 	return (error);
8681 }
8682 
8683 int
8684 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
8685     socklen_t len, cred_t *cr)
8686 {
8687 	int		error;
8688 	conn_t		*connp;
8689 
8690 	/* All Solaris components should pass a cred for this operation. */
8691 	ASSERT(cr != NULL);
8692 
8693 	connp = (conn_t *)proto_handle;
8694 
8695 	if (sa == NULL)
8696 		error = udp_do_unbind(connp);
8697 	else
8698 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
8699 
8700 	if (error < 0) {
8701 		if (error == -TOUTSTATE)
8702 			error = EINVAL;
8703 		else
8704 			error = proto_tlitosyserr(-error);
8705 	}
8706 
8707 	return (error);
8708 }
8709 
8710 static int
8711 udp_implicit_bind(conn_t *connp, cred_t *cr)
8712 {
8713 	int error;
8714 
8715 	/* All Solaris components should pass a cred for this operation. */
8716 	ASSERT(cr != NULL);
8717 
8718 	error = udp_do_bind(connp, NULL, 0, cr, B_FALSE);
8719 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
8720 }
8721 
8722 /*
8723  * This routine removes a port number association from a stream. It
8724  * is called by udp_unbind and udp_tpi_unbind.
8725  */
8726 static int
8727 udp_do_unbind(conn_t *connp)
8728 {
8729 	udp_t 		*udp = connp->conn_udp;
8730 	udp_fanout_t	*udpf;
8731 	udp_stack_t	*us = udp->udp_us;
8732 
8733 	if (cl_inet_unbind != NULL) {
8734 		/*
8735 		 * Running in cluster mode - register unbind information
8736 		 */
8737 		if (udp->udp_ipversion == IPV4_VERSION) {
8738 			(*cl_inet_unbind)(
8739 			    connp->conn_netstack->netstack_stackid,
8740 			    IPPROTO_UDP, AF_INET,
8741 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
8742 			    (in_port_t)udp->udp_port, NULL);
8743 		} else {
8744 			(*cl_inet_unbind)(
8745 			    connp->conn_netstack->netstack_stackid,
8746 			    IPPROTO_UDP, AF_INET6,
8747 			    (uint8_t *)&(udp->udp_v6src),
8748 			    (in_port_t)udp->udp_port, NULL);
8749 		}
8750 	}
8751 
8752 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8753 	if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
8754 		rw_exit(&udp->udp_rwlock);
8755 		return (-TOUTSTATE);
8756 	}
8757 	udp->udp_pending_op = T_UNBIND_REQ;
8758 	rw_exit(&udp->udp_rwlock);
8759 
8760 	/*
8761 	 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
8762 	 * and therefore ip_unbind must never return NULL.
8763 	 */
8764 	ip_unbind(connp);
8765 
8766 	/*
8767 	 * Once we're unbound from IP, the pending operation may be cleared
8768 	 * here.
8769 	 */
8770 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8771 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8772 	    us->us_bind_fanout_size)];
8773 
8774 	mutex_enter(&udpf->uf_lock);
8775 	udp_bind_hash_remove(udp, B_TRUE);
8776 	V6_SET_ZERO(udp->udp_v6src);
8777 	V6_SET_ZERO(udp->udp_bound_v6src);
8778 	udp->udp_port = 0;
8779 	mutex_exit(&udpf->uf_lock);
8780 
8781 	udp->udp_pending_op = -1;
8782 	udp->udp_state = TS_UNBND;
8783 	if (udp->udp_family == AF_INET6)
8784 		(void) udp_build_hdrs(udp);
8785 	rw_exit(&udp->udp_rwlock);
8786 
8787 	return (0);
8788 }
8789 
8790 static int
8791 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error)
8792 {
8793 	ire_t		*ire;
8794 	udp_fanout_t	*udpf;
8795 	udp_stack_t	*us = udp->udp_us;
8796 
8797 	ASSERT(udp->udp_pending_op != -1);
8798 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8799 	if (error == 0) {
8800 		/* For udp_do_connect() success */
8801 		/* udp_do_bind() success will do nothing in here */
8802 		/*
8803 		 * If a broadcast/multicast address was bound, set
8804 		 * the source address to 0.
8805 		 * This ensures no datagrams with broadcast address
8806 		 * as source address are emitted (which would violate
8807 		 * RFC1122 - Hosts requirements)
8808 		 *
8809 		 * Note that when connecting the returned IRE is
8810 		 * for the destination address and we only perform
8811 		 * the broadcast check for the source address (it
8812 		 * is OK to connect to a broadcast/multicast address.)
8813 		 */
8814 		if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
8815 			ire = (ire_t *)ire_mp->b_rptr;
8816 
8817 			/*
8818 			 * Note: we get IRE_BROADCAST for IPv6 to "mark" a
8819 			 * multicast local address.
8820 			 */
8821 			udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8822 			    us->us_bind_fanout_size)];
8823 			if (ire->ire_type == IRE_BROADCAST &&
8824 			    udp->udp_state != TS_DATA_XFER) {
8825 				ASSERT(udp->udp_pending_op == T_BIND_REQ ||
8826 				    udp->udp_pending_op == O_T_BIND_REQ);
8827 				/*
8828 				 * This was just a local bind to a broadcast
8829 				 * addr.
8830 				 */
8831 				mutex_enter(&udpf->uf_lock);
8832 				V6_SET_ZERO(udp->udp_v6src);
8833 				mutex_exit(&udpf->uf_lock);
8834 				if (udp->udp_family == AF_INET6)
8835 					(void) udp_build_hdrs(udp);
8836 			} else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
8837 				if (udp->udp_family == AF_INET6)
8838 					(void) udp_build_hdrs(udp);
8839 			}
8840 		}
8841 	} else {
8842 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8843 		    us->us_bind_fanout_size)];
8844 		mutex_enter(&udpf->uf_lock);
8845 
8846 		if (udp->udp_state == TS_DATA_XFER) {
8847 			/* Connect failed */
8848 			/* Revert back to the bound source */
8849 			udp->udp_v6src = udp->udp_bound_v6src;
8850 			udp->udp_state = TS_IDLE;
8851 		} else {
8852 			/* For udp_do_bind() failed */
8853 			V6_SET_ZERO(udp->udp_v6src);
8854 			V6_SET_ZERO(udp->udp_bound_v6src);
8855 			udp->udp_state = TS_UNBND;
8856 			udp_bind_hash_remove(udp, B_TRUE);
8857 			udp->udp_port = 0;
8858 		}
8859 		mutex_exit(&udpf->uf_lock);
8860 		if (udp->udp_family == AF_INET6)
8861 			(void) udp_build_hdrs(udp);
8862 	}
8863 	udp->udp_pending_op = -1;
8864 	rw_exit(&udp->udp_rwlock);
8865 	if (ire_mp != NULL)
8866 		freeb(ire_mp);
8867 	return (error);
8868 }
8869 
8870 /*
8871  * It associates a default destination address with the stream.
8872  */
8873 static int
8874 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
8875     cred_t *cr)
8876 {
8877 	sin6_t		*sin6;
8878 	sin_t		*sin;
8879 	in6_addr_t 	v6dst;
8880 	ipaddr_t 	v4dst;
8881 	uint16_t 	dstport;
8882 	uint32_t 	flowinfo;
8883 	mblk_t		*ire_mp;
8884 	udp_fanout_t	*udpf;
8885 	udp_t		*udp, *udp1;
8886 	ushort_t	ipversion;
8887 	udp_stack_t	*us;
8888 	int		error;
8889 
8890 	udp = connp->conn_udp;
8891 	us = udp->udp_us;
8892 
8893 	/*
8894 	 * Address has been verified by the caller
8895 	 */
8896 	switch (len) {
8897 	default:
8898 		/*
8899 		 * Should never happen
8900 		 */
8901 		return (EINVAL);
8902 
8903 	case sizeof (sin_t):
8904 		sin = (sin_t *)sa;
8905 		v4dst = sin->sin_addr.s_addr;
8906 		dstport = sin->sin_port;
8907 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
8908 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
8909 		ipversion = IPV4_VERSION;
8910 		break;
8911 
8912 	case sizeof (sin6_t):
8913 		sin6 = (sin6_t *)sa;
8914 		v6dst = sin6->sin6_addr;
8915 		dstport = sin6->sin6_port;
8916 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
8917 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
8918 			ipversion = IPV4_VERSION;
8919 			flowinfo = 0;
8920 		} else {
8921 			ipversion = IPV6_VERSION;
8922 			flowinfo = sin6->sin6_flowinfo;
8923 		}
8924 		break;
8925 	}
8926 
8927 	if (dstport == 0)
8928 		return (-TBADADDR);
8929 
8930 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8931 
8932 	/*
8933 	 * This UDP must have bound to a port already before doing a connect.
8934 	 * TPI mandates that users must send TPI primitives only 1 at a time
8935 	 * and wait for the response before sending the next primitive.
8936 	 */
8937 	if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
8938 		rw_exit(&udp->udp_rwlock);
8939 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8940 		    "udp_connect: bad state, %u", udp->udp_state);
8941 		return (-TOUTSTATE);
8942 	}
8943 	udp->udp_pending_op = T_CONN_REQ;
8944 	ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
8945 
8946 	if (ipversion == IPV4_VERSION) {
8947 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
8948 		    udp->udp_ip_snd_options_len;
8949 	} else {
8950 		udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
8951 	}
8952 
8953 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8954 	    us->us_bind_fanout_size)];
8955 
8956 	mutex_enter(&udpf->uf_lock);
8957 	if (udp->udp_state == TS_DATA_XFER) {
8958 		/* Already connected - clear out state */
8959 		udp->udp_v6src = udp->udp_bound_v6src;
8960 		udp->udp_state = TS_IDLE;
8961 	}
8962 
8963 	/*
8964 	 * Create a default IP header with no IP options.
8965 	 */
8966 	udp->udp_dstport = dstport;
8967 	udp->udp_ipversion = ipversion;
8968 	if (ipversion == IPV4_VERSION) {
8969 		/*
8970 		 * Interpret a zero destination to mean loopback.
8971 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
8972 		 * generate the T_CONN_CON.
8973 		 */
8974 		if (v4dst == INADDR_ANY) {
8975 			v4dst = htonl(INADDR_LOOPBACK);
8976 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
8977 			if (udp->udp_family == AF_INET) {
8978 				sin->sin_addr.s_addr = v4dst;
8979 			} else {
8980 				sin6->sin6_addr = v6dst;
8981 			}
8982 		}
8983 		udp->udp_v6dst = v6dst;
8984 		udp->udp_flowinfo = 0;
8985 
8986 		/*
8987 		 * If the destination address is multicast and
8988 		 * an outgoing multicast interface has been set,
8989 		 * use the address of that interface as our
8990 		 * source address if no source address has been set.
8991 		 */
8992 		if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
8993 		    CLASSD(v4dst) &&
8994 		    udp->udp_multicast_if_addr != INADDR_ANY) {
8995 			IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
8996 			    &udp->udp_v6src);
8997 		}
8998 	} else {
8999 		ASSERT(udp->udp_ipversion == IPV6_VERSION);
9000 		/*
9001 		 * Interpret a zero destination to mean loopback.
9002 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
9003 		 * generate the T_CONN_CON.
9004 		 */
9005 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
9006 			v6dst = ipv6_loopback;
9007 			sin6->sin6_addr = v6dst;
9008 		}
9009 		udp->udp_v6dst = v6dst;
9010 		udp->udp_flowinfo = flowinfo;
9011 		/*
9012 		 * If the destination address is multicast and
9013 		 * an outgoing multicast interface has been set,
9014 		 * then the ip bind logic will pick the correct source
9015 		 * address (i.e. matching the outgoing multicast interface).
9016 		 */
9017 	}
9018 
9019 	/*
9020 	 * Verify that the src/port/dst/port is unique for all
9021 	 * connections in TS_DATA_XFER
9022 	 */
9023 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
9024 		if (udp1->udp_state != TS_DATA_XFER)
9025 			continue;
9026 		if (udp->udp_port != udp1->udp_port ||
9027 		    udp->udp_ipversion != udp1->udp_ipversion ||
9028 		    dstport != udp1->udp_dstport ||
9029 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
9030 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) ||
9031 		    !(IPCL_ZONE_MATCH(udp->udp_connp,
9032 		    udp1->udp_connp->conn_zoneid) ||
9033 		    IPCL_ZONE_MATCH(udp1->udp_connp,
9034 		    udp->udp_connp->conn_zoneid)))
9035 			continue;
9036 		mutex_exit(&udpf->uf_lock);
9037 		udp->udp_pending_op = -1;
9038 		rw_exit(&udp->udp_rwlock);
9039 		return (-TBADADDR);
9040 	}
9041 
9042 	if (cl_inet_connect2 != NULL) {
9043 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error);
9044 		if (error != 0) {
9045 			mutex_exit(&udpf->uf_lock);
9046 			udp->udp_pending_op = -1;
9047 			rw_exit(&udp->udp_rwlock);
9048 			return (-TBADADDR);
9049 		}
9050 	}
9051 
9052 	udp->udp_state = TS_DATA_XFER;
9053 	mutex_exit(&udpf->uf_lock);
9054 
9055 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
9056 	if (ire_mp == NULL) {
9057 		mutex_enter(&udpf->uf_lock);
9058 		udp->udp_state = TS_IDLE;
9059 		udp->udp_pending_op = -1;
9060 		mutex_exit(&udpf->uf_lock);
9061 		rw_exit(&udp->udp_rwlock);
9062 		return (ENOMEM);
9063 	}
9064 
9065 	rw_exit(&udp->udp_rwlock);
9066 
9067 	ire_mp->b_wptr += sizeof (ire_t);
9068 	ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE;
9069 
9070 	if (udp->udp_family == AF_INET) {
9071 		error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP,
9072 		    &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port,
9073 		    V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport,
9074 		    B_TRUE, B_TRUE, cr);
9075 	} else {
9076 		error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP,
9077 		    &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst,
9078 		    &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr);
9079 	}
9080 
9081 	return (udp_post_ip_bind_connect(udp, ire_mp, error));
9082 }
9083 
9084 /* ARGSUSED */
9085 static int
9086 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
9087     socklen_t len, sock_connid_t *id, cred_t *cr)
9088 {
9089 	conn_t	*connp = (conn_t *)proto_handle;
9090 	udp_t	*udp = connp->conn_udp;
9091 	int	error;
9092 	boolean_t did_bind = B_FALSE;
9093 
9094 	/* All Solaris components should pass a cred for this operation. */
9095 	ASSERT(cr != NULL);
9096 
9097 	if (sa == NULL) {
9098 		/*
9099 		 * Disconnect
9100 		 * Make sure we are connected
9101 		 */
9102 		if (udp->udp_state != TS_DATA_XFER)
9103 			return (EINVAL);
9104 
9105 		error = udp_disconnect(connp);
9106 		return (error);
9107 	}
9108 
9109 	error = proto_verify_ip_addr(udp->udp_family, sa, len);
9110 	if (error != 0)
9111 		goto done;
9112 
9113 	/* do an implicit bind if necessary */
9114 	if (udp->udp_state == TS_UNBND) {
9115 		error = udp_implicit_bind(connp, cr);
9116 		/*
9117 		 * We could be racing with an actual bind, in which case
9118 		 * we would see EPROTO. We cross our fingers and try
9119 		 * to connect.
9120 		 */
9121 		if (!(error == 0 || error == EPROTO))
9122 			goto done;
9123 		did_bind = B_TRUE;
9124 	}
9125 	/*
9126 	 * set SO_DGRAM_ERRIND
9127 	 */
9128 	udp->udp_dgram_errind = B_TRUE;
9129 
9130 	error = udp_do_connect(connp, sa, len, cr);
9131 
9132 	if (error != 0 && did_bind) {
9133 		int unbind_err;
9134 
9135 		unbind_err = udp_do_unbind(connp);
9136 		ASSERT(unbind_err == 0);
9137 	}
9138 
9139 	if (error == 0) {
9140 		*id = 0;
9141 		(*connp->conn_upcalls->su_connected)
9142 		    (connp->conn_upper_handle, 0, NULL, -1);
9143 	} else if (error < 0) {
9144 		error = proto_tlitosyserr(-error);
9145 	}
9146 
9147 done:
9148 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
9149 		/*
9150 		 * No need to hold locks to set state
9151 		 * after connect failure socket state is undefined
9152 		 * We set the state only to imitate old sockfs behavior
9153 		 */
9154 		udp->udp_state = TS_IDLE;
9155 	}
9156 	return (error);
9157 }
9158 
9159 /* ARGSUSED */
9160 int
9161 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
9162     cred_t *cr)
9163 {
9164 	conn_t		*connp = (conn_t *)proto_handle;
9165 	udp_t		*udp = connp->conn_udp;
9166 	udp_stack_t	*us = udp->udp_us;
9167 	int		error = 0;
9168 
9169 	ASSERT(DB_TYPE(mp) == M_DATA);
9170 
9171 	/* All Solaris components should pass a cred for this operation. */
9172 	ASSERT(cr != NULL);
9173 
9174 	/* If labeled then sockfs should have already set db_credp */
9175 	ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
9176 
9177 	/*
9178 	 * If the socket is connected and no change in destination
9179 	 */
9180 	if (msg->msg_namelen == 0) {
9181 		error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid);
9182 		if (error == EDESTADDRREQ)
9183 			return (error);
9184 		else
9185 			return (udp->udp_dgram_errind ? error : 0);
9186 	}
9187 
9188 	/*
9189 	 * Do an implicit bind if necessary.
9190 	 */
9191 	if (udp->udp_state == TS_UNBND) {
9192 		error = udp_implicit_bind(connp, cr);
9193 		/*
9194 		 * We could be racing with an actual bind, in which case
9195 		 * we would see EPROTO. We cross our fingers and try
9196 		 * to send.
9197 		 */
9198 		if (!(error == 0 || error == EPROTO)) {
9199 			freemsg(mp);
9200 			return (error);
9201 		}
9202 	}
9203 
9204 	rw_enter(&udp->udp_rwlock, RW_WRITER);
9205 
9206 	if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) {
9207 		rw_exit(&udp->udp_rwlock);
9208 		freemsg(mp);
9209 		return (EISCONN);
9210 	}
9211 
9212 
9213 	if (udp->udp_delayed_error != 0) {
9214 		boolean_t	match;
9215 
9216 		error = udp->udp_delayed_error;
9217 		match = B_FALSE;
9218 		udp->udp_delayed_error = 0;
9219 		switch (udp->udp_family) {
9220 		case AF_INET: {
9221 			/* Compare just IP address and port */
9222 			sin_t *sin1 = (sin_t *)msg->msg_name;
9223 			sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
9224 
9225 			if (msg->msg_namelen == sizeof (sin_t) &&
9226 			    sin1->sin_port == sin2->sin_port &&
9227 			    sin1->sin_addr.s_addr == sin2->sin_addr.s_addr)
9228 				match = B_TRUE;
9229 
9230 			break;
9231 		}
9232 		case AF_INET6: {
9233 			sin6_t	*sin1 = (sin6_t *)msg->msg_name;
9234 			sin6_t	*sin2 = (sin6_t *)&udp->udp_delayed_addr;
9235 
9236 			if (msg->msg_namelen == sizeof (sin6_t) &&
9237 			    sin1->sin6_port == sin2->sin6_port &&
9238 			    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
9239 			    &sin2->sin6_addr))
9240 				match = B_TRUE;
9241 			break;
9242 		}
9243 		default:
9244 			ASSERT(0);
9245 		}
9246 
9247 		*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
9248 
9249 		if (match) {
9250 			rw_exit(&udp->udp_rwlock);
9251 			freemsg(mp);
9252 			return (error);
9253 		}
9254 	}
9255 
9256 	error = proto_verify_ip_addr(udp->udp_family,
9257 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
9258 	rw_exit(&udp->udp_rwlock);
9259 
9260 	if (error != 0) {
9261 		freemsg(mp);
9262 		return (error);
9263 	}
9264 
9265 	error = udp_send_not_connected(connp, mp,
9266 	    (struct sockaddr  *)msg->msg_name, msg->msg_namelen, msg, cr,
9267 	    curproc->p_pid);
9268 	if (error != 0) {
9269 		UDP_STAT(us, udp_out_err_output);
9270 		freemsg(mp);
9271 	}
9272 	return (udp->udp_dgram_errind ? error : 0);
9273 }
9274 
9275 void
9276 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
9277     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
9278 {
9279 	conn_t 	*connp = (conn_t *)proto_handle;
9280 	udp_t	*udp;
9281 	struct T_capability_ack tca;
9282 	struct sockaddr_in6 laddr, faddr;
9283 	socklen_t laddrlen, faddrlen;
9284 	short opts;
9285 	struct stroptions *stropt;
9286 	mblk_t *stropt_mp;
9287 	int error;
9288 
9289 	udp = connp->conn_udp;
9290 
9291 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
9292 
9293 	/*
9294 	 * setup the fallback stream that was allocated
9295 	 */
9296 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
9297 	connp->conn_minor_arena = WR(q)->q_ptr;
9298 
9299 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
9300 
9301 	WR(q)->q_qinfo = &udp_winit;
9302 
9303 	connp->conn_rq = RD(q);
9304 	connp->conn_wq = WR(q);
9305 
9306 	/* Notify stream head about options before sending up data */
9307 	stropt_mp->b_datap->db_type = M_SETOPTS;
9308 	stropt_mp->b_wptr += sizeof (*stropt);
9309 	stropt = (struct stroptions *)stropt_mp->b_rptr;
9310 	stropt->so_flags = SO_WROFF | SO_HIWAT;
9311 	stropt->so_wroff =
9312 	    (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra);
9313 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
9314 	putnext(RD(q), stropt_mp);
9315 
9316 	/*
9317 	 * Free the helper stream
9318 	 */
9319 	ip_free_helper_stream(connp);
9320 
9321 	if (!direct_sockfs)
9322 		udp_disable_direct_sockfs(udp);
9323 
9324 	/*
9325 	 * Collect the information needed to sync with the sonode
9326 	 */
9327 	udp_do_capability_ack(udp, &tca, TC1_INFO);
9328 
9329 	laddrlen = faddrlen = sizeof (sin6_t);
9330 	(void) udp_getsockname((sock_lower_handle_t)connp,
9331 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
9332 	error = udp_getpeername((sock_lower_handle_t)connp,
9333 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
9334 	if (error != 0)
9335 		faddrlen = 0;
9336 
9337 	opts = 0;
9338 	if (udp->udp_dgram_errind)
9339 		opts |= SO_DGRAM_ERRIND;
9340 	if (udp->udp_dontroute)
9341 		opts |= SO_DONTROUTE;
9342 
9343 	/*
9344 	 * Once we grab the drain lock, no data will be send up
9345 	 * to the socket. So we notify the socket that the endpoint
9346 	 * is quiescent and it's therefore safe move data from
9347 	 * the socket to the stream head.
9348 	 */
9349 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
9350 	    (struct sockaddr *)&laddr, laddrlen,
9351 	    (struct sockaddr *)&faddr, faddrlen, opts);
9352 
9353 	/*
9354 	 * push up any packets that were queued in udp_t
9355 	 */
9356 
9357 	mutex_enter(&udp->udp_recv_lock);
9358 	while (udp->udp_fallback_queue_head != NULL) {
9359 		mblk_t *mp;
9360 		mp = udp->udp_fallback_queue_head;
9361 		udp->udp_fallback_queue_head = mp->b_next;
9362 		mutex_exit(&udp->udp_recv_lock);
9363 		mp->b_next = NULL;
9364 		putnext(RD(q), mp);
9365 		mutex_enter(&udp->udp_recv_lock);
9366 	}
9367 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
9368 	/*
9369 	 * No longer a streams less socket
9370 	 */
9371 	connp->conn_flags &= ~IPCL_NONSTR;
9372 	mutex_exit(&udp->udp_recv_lock);
9373 
9374 	ASSERT(connp->conn_ref >= 1);
9375 }
9376 
9377 static int
9378 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
9379 {
9380 	sin_t	*sin = (sin_t *)sa;
9381 	sin6_t	*sin6 = (sin6_t *)sa;
9382 
9383 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
9384 	ASSERT(udp != NULL);
9385 
9386 	if (udp->udp_state != TS_DATA_XFER)
9387 		return (ENOTCONN);
9388 
9389 	switch (udp->udp_family) {
9390 	case AF_INET:
9391 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
9392 
9393 		if (*salenp < sizeof (sin_t))
9394 			return (EINVAL);
9395 
9396 		*salenp = sizeof (sin_t);
9397 		*sin = sin_null;
9398 		sin->sin_family = AF_INET;
9399 		sin->sin_port = udp->udp_dstport;
9400 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
9401 		break;
9402 	case AF_INET6:
9403 		if (*salenp < sizeof (sin6_t))
9404 			return (EINVAL);
9405 
9406 		*salenp = sizeof (sin6_t);
9407 		*sin6 = sin6_null;
9408 		sin6->sin6_family = AF_INET6;
9409 		sin6->sin6_port = udp->udp_dstport;
9410 		sin6->sin6_addr = udp->udp_v6dst;
9411 		sin6->sin6_flowinfo = udp->udp_flowinfo;
9412 		break;
9413 	}
9414 
9415 	return (0);
9416 }
9417 
9418 /* ARGSUSED */
9419 int
9420 udp_getpeername(sock_lower_handle_t  proto_handle, struct sockaddr *sa,
9421     socklen_t *salenp, cred_t *cr)
9422 {
9423 	conn_t	*connp = (conn_t *)proto_handle;
9424 	udp_t	*udp = connp->conn_udp;
9425 	int error;
9426 
9427 	/* All Solaris components should pass a cred for this operation. */
9428 	ASSERT(cr != NULL);
9429 
9430 	ASSERT(udp != NULL);
9431 
9432 	rw_enter(&udp->udp_rwlock, RW_READER);
9433 
9434 	error = udp_do_getpeername(udp, sa, salenp);
9435 
9436 	rw_exit(&udp->udp_rwlock);
9437 
9438 	return (error);
9439 }
9440 
9441 static int
9442 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
9443 {
9444 	sin_t	*sin = (sin_t *)sa;
9445 	sin6_t	*sin6 = (sin6_t *)sa;
9446 
9447 	ASSERT(udp != NULL);
9448 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
9449 
9450 	switch (udp->udp_family) {
9451 	case AF_INET:
9452 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
9453 
9454 		if (*salenp < sizeof (sin_t))
9455 			return (EINVAL);
9456 
9457 		*salenp = sizeof (sin_t);
9458 		*sin = sin_null;
9459 		sin->sin_family = AF_INET;
9460 		if (udp->udp_state == TS_UNBND) {
9461 			break;
9462 		}
9463 		sin->sin_port = udp->udp_port;
9464 
9465 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
9466 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
9467 			sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
9468 		} else {
9469 			/*
9470 			 * INADDR_ANY
9471 			 * udp_v6src is not set, we might be bound to
9472 			 * broadcast/multicast. Use udp_bound_v6src as
9473 			 * local address instead (that could
9474 			 * also still be INADDR_ANY)
9475 			 */
9476 			sin->sin_addr.s_addr =
9477 			    V4_PART_OF_V6(udp->udp_bound_v6src);
9478 		}
9479 		break;
9480 
9481 	case AF_INET6:
9482 		if (*salenp < sizeof (sin6_t))
9483 			return (EINVAL);
9484 
9485 		*salenp = sizeof (sin6_t);
9486 		*sin6 = sin6_null;
9487 		sin6->sin6_family = AF_INET6;
9488 		if (udp->udp_state == TS_UNBND) {
9489 			break;
9490 		}
9491 		sin6->sin6_port = udp->udp_port;
9492 
9493 		if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
9494 			sin6->sin6_addr = udp->udp_v6src;
9495 		} else {
9496 			/*
9497 			 * UNSPECIFIED
9498 			 * udp_v6src is not set, we might be bound to
9499 			 * broadcast/multicast. Use udp_bound_v6src as
9500 			 * local address instead (that could
9501 			 * also still be UNSPECIFIED)
9502 			 */
9503 			sin6->sin6_addr = udp->udp_bound_v6src;
9504 		}
9505 	}
9506 	return (0);
9507 }
9508 
9509 /* ARGSUSED */
9510 int
9511 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
9512     socklen_t *salenp, cred_t *cr)
9513 {
9514 	conn_t	*connp = (conn_t *)proto_handle;
9515 	udp_t	*udp = connp->conn_udp;
9516 	int error;
9517 
9518 	/* All Solaris components should pass a cred for this operation. */
9519 	ASSERT(cr != NULL);
9520 
9521 	ASSERT(udp != NULL);
9522 	rw_enter(&udp->udp_rwlock, RW_READER);
9523 
9524 	error = udp_do_getsockname(udp, sa, salenp);
9525 
9526 	rw_exit(&udp->udp_rwlock);
9527 
9528 	return (error);
9529 }
9530 
9531 int
9532 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
9533     void *optvalp, socklen_t *optlen, cred_t *cr)
9534 {
9535 	conn_t		*connp = (conn_t *)proto_handle;
9536 	udp_t		*udp = connp->conn_udp;
9537 	int		error;
9538 	t_uscalar_t	max_optbuf_len;
9539 	void		*optvalp_buf;
9540 	int		len;
9541 
9542 	/* All Solaris components should pass a cred for this operation. */
9543 	ASSERT(cr != NULL);
9544 
9545 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
9546 	    udp_opt_obj.odb_opt_des_arr,
9547 	    udp_opt_obj.odb_opt_arr_cnt,
9548 	    udp_opt_obj.odb_topmost_tpiprovider,
9549 	    B_FALSE, B_TRUE, cr);
9550 	if (error != 0) {
9551 		if (error < 0)
9552 			error = proto_tlitosyserr(-error);
9553 		return (error);
9554 	}
9555 
9556 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
9557 	rw_enter(&udp->udp_rwlock, RW_READER);
9558 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
9559 	rw_exit(&udp->udp_rwlock);
9560 
9561 	if (len < 0) {
9562 		/*
9563 		 * Pass on to IP
9564 		 */
9565 		kmem_free(optvalp_buf, max_optbuf_len);
9566 		return (ip_get_options(connp, level, option_name,
9567 		    optvalp, optlen, cr));
9568 	} else {
9569 		/*
9570 		 * update optlen and copy option value
9571 		 */
9572 		t_uscalar_t size = MIN(len, *optlen);
9573 		bcopy(optvalp_buf, optvalp, size);
9574 		bcopy(&size, optlen, sizeof (size));
9575 
9576 		kmem_free(optvalp_buf, max_optbuf_len);
9577 		return (0);
9578 	}
9579 }
9580 
9581 int
9582 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
9583     const void *optvalp, socklen_t optlen, cred_t *cr)
9584 {
9585 	conn_t		*connp = (conn_t *)proto_handle;
9586 	udp_t		*udp = connp->conn_udp;
9587 	int		error;
9588 
9589 	/* All Solaris components should pass a cred for this operation. */
9590 	ASSERT(cr != NULL);
9591 
9592 	error = proto_opt_check(level, option_name, optlen, NULL,
9593 	    udp_opt_obj.odb_opt_des_arr,
9594 	    udp_opt_obj.odb_opt_arr_cnt,
9595 	    udp_opt_obj.odb_topmost_tpiprovider,
9596 	    B_TRUE, B_FALSE, cr);
9597 
9598 	if (error != 0) {
9599 		if (error < 0)
9600 			error = proto_tlitosyserr(-error);
9601 		return (error);
9602 	}
9603 
9604 	rw_enter(&udp->udp_rwlock, RW_WRITER);
9605 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
9606 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
9607 	    NULL, cr);
9608 	rw_exit(&udp->udp_rwlock);
9609 
9610 	if (error < 0) {
9611 		/*
9612 		 * Pass on to ip
9613 		 */
9614 		error = ip_set_options(connp, level, option_name, optvalp,
9615 		    optlen, cr);
9616 	}
9617 
9618 	return (error);
9619 }
9620 
9621 void
9622 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
9623 {
9624 	conn_t	*connp = (conn_t *)proto_handle;
9625 	udp_t	*udp = connp->conn_udp;
9626 
9627 	mutex_enter(&udp->udp_recv_lock);
9628 	connp->conn_flow_cntrld = B_FALSE;
9629 	mutex_exit(&udp->udp_recv_lock);
9630 }
9631 
9632 /* ARGSUSED */
9633 int
9634 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
9635 {
9636 	conn_t	*connp = (conn_t *)proto_handle;
9637 
9638 	/* All Solaris components should pass a cred for this operation. */
9639 	ASSERT(cr != NULL);
9640 
9641 	/* shut down the send side */
9642 	if (how != SHUT_RD)
9643 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
9644 		    SOCK_OPCTL_SHUT_SEND, 0);
9645 	/* shut down the recv side */
9646 	if (how != SHUT_WR)
9647 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
9648 		    SOCK_OPCTL_SHUT_RECV, 0);
9649 	return (0);
9650 }
9651 
9652 int
9653 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
9654     int mode, int32_t *rvalp, cred_t *cr)
9655 {
9656 	conn_t  	*connp = (conn_t *)proto_handle;
9657 	int		error;
9658 
9659 	/* All Solaris components should pass a cred for this operation. */
9660 	ASSERT(cr != NULL);
9661 
9662 	switch (cmd) {
9663 		case ND_SET:
9664 		case ND_GET:
9665 		case _SIOCSOCKFALLBACK:
9666 		case TI_GETPEERNAME:
9667 		case TI_GETMYNAME:
9668 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
9669 			    cmd));
9670 			error = EINVAL;
9671 			break;
9672 		default:
9673 			/*
9674 			 * Pass on to IP using helper stream
9675 			 */
9676 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
9677 			    cmd, arg, mode, cr, rvalp);
9678 			break;
9679 	}
9680 	return (error);
9681 }
9682 
9683 /* ARGSUSED */
9684 int
9685 udp_accept(sock_lower_handle_t lproto_handle,
9686     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
9687     cred_t *cr)
9688 {
9689 	return (EOPNOTSUPP);
9690 }
9691 
9692 /* ARGSUSED */
9693 int
9694 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
9695 {
9696 	return (EOPNOTSUPP);
9697 }
9698 
9699 sock_downcalls_t sock_udp_downcalls = {
9700 	udp_activate,		/* sd_activate */
9701 	udp_accept,		/* sd_accept */
9702 	udp_bind,		/* sd_bind */
9703 	udp_listen,		/* sd_listen */
9704 	udp_connect,		/* sd_connect */
9705 	udp_getpeername,	/* sd_getpeername */
9706 	udp_getsockname,	/* sd_getsockname */
9707 	udp_getsockopt,		/* sd_getsockopt */
9708 	udp_setsockopt,		/* sd_setsockopt */
9709 	udp_send,		/* sd_send */
9710 	NULL,			/* sd_send_uio */
9711 	NULL,			/* sd_recv_uio */
9712 	NULL,			/* sd_poll */
9713 	udp_shutdown,		/* sd_shutdown */
9714 	udp_clr_flowctrl,	/* sd_setflowctrl */
9715 	udp_ioctl,		/* sd_ioctl */
9716 	udp_close		/* sd_close */
9717 };
9718