xref: /titanic_44/usr/src/uts/common/inet/udp/udp.c (revision 5087e485d482853e61c9d38d8197dee892c7f43d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/pattr.h>
31 #include <sys/stropts.h>
32 #include <sys/strlog.h>
33 #include <sys/strsun.h>
34 #include <sys/time.h>
35 #define	_SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/suntpi.h>
42 #include <sys/xti_inet.h>
43 #include <sys/kmem.h>
44 #include <sys/policy.h>
45 #include <sys/ucred.h>
46 #include <sys/zone.h>
47 
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sockio.h>
51 #include <sys/vtrace.h>
52 #include <sys/sdt.h>
53 #include <sys/debug.h>
54 #include <sys/isa_defs.h>
55 #include <sys/random.h>
56 #include <netinet/in.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 #include <netinet/udp.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 
63 #include <inet/common.h>
64 #include <inet/ip.h>
65 #include <inet/ip_impl.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_ire.h>
68 #include <inet/ip_if.h>
69 #include <inet/ip_multi.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/proto_set.h>
72 #include <inet/mib2.h>
73 #include <inet/nd.h>
74 #include <inet/optcom.h>
75 #include <inet/snmpcom.h>
76 #include <inet/kstatcom.h>
77 #include <inet/udp_impl.h>
78 #include <inet/ipclassifier.h>
79 #include <inet/ipsec_impl.h>
80 #include <inet/ipp_common.h>
81 #include <sys/squeue_impl.h>
82 #include <inet/ipnet.h>
83 
84 /*
85  * The ipsec_info.h header file is here since it has the definition for the
86  * M_CTL message types used by IP to convey information to the ULP. The
87  * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence.
88  */
89 #include <net/pfkeyv2.h>
90 #include <inet/ipsec_info.h>
91 
92 #include <sys/tsol/label.h>
93 #include <sys/tsol/tnet.h>
94 #include <rpc/pmap_prot.h>
95 
96 /*
97  * Synchronization notes:
98  *
99  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
100  * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock.
101  * We also use conn_lock when updating things that affect the IP classifier
102  * lookup.
103  * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock.
104  *
105  * The fanout lock uf_lock:
106  * When a UDP endpoint is bound to a local port, it is inserted into
107  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
108  * The size of the array is controlled by the udp_bind_fanout_size variable.
109  * This variable can be changed in /etc/system if the default value is
110  * not large enough.  Each bind hash bucket is protected by a per bucket
111  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
112  * structure and a few other fields in the udp_t. A UDP endpoint is removed
113  * from the bind hash list only when it is being unbound or being closed.
114  * The per bucket lock also protects a UDP endpoint's state changes.
115  *
116  * The udp_rwlock:
117  * This protects most of the other fields in the udp_t. The exact list of
118  * fields which are protected by each of the above locks is documented in
119  * the udp_t structure definition.
120  *
121  * Plumbing notes:
122  * UDP is always a device driver. For compatibility with mibopen() code
123  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
124  * dummy module.
125  *
126  * The above implies that we don't support any intermediate module to
127  * reside in between /dev/ip and udp -- in fact, we never supported such
128  * scenario in the past as the inter-layer communication semantics have
129  * always been private.
130  */
131 
132 /* For /etc/system control */
133 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
134 
135 #define	NDD_TOO_QUICK_MSG \
136 	"ndd get info rate too high for non-privileged users, try again " \
137 	"later.\n"
138 #define	NDD_OUT_OF_BUF_MSG	"<< Out of buffer >>\n"
139 
140 /* Option processing attrs */
141 typedef struct udpattrs_s {
142 	union {
143 		ip6_pkt_t	*udpattr_ipp6;	/* For V6 */
144 		ip4_pkt_t 	*udpattr_ipp4;	/* For V4 */
145 	} udpattr_ippu;
146 #define	udpattr_ipp6 udpattr_ippu.udpattr_ipp6
147 #define	udpattr_ipp4 udpattr_ippu.udpattr_ipp4
148 	mblk_t		*udpattr_mb;
149 	boolean_t	udpattr_credset;
150 } udpattrs_t;
151 
152 static void	udp_addr_req(queue_t *q, mblk_t *mp);
153 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
154 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
155 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
156 static int	udp_build_hdrs(udp_t *udp);
157 static void	udp_capability_req(queue_t *q, mblk_t *mp);
158 static int	udp_tpi_close(queue_t *q, int flags);
159 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
160 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
161 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
162 		    int sys_error);
163 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
164 		    t_scalar_t tlierr, int unixerr);
165 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
166 		    cred_t *cr);
167 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
168 		    char *value, caddr_t cp, cred_t *cr);
169 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
170 		    char *value, caddr_t cp, cred_t *cr);
171 static void	udp_icmp_error(conn_t *, mblk_t *);
172 static void	udp_icmp_error_ipv6(conn_t *, mblk_t *);
173 static void	udp_info_req(queue_t *q, mblk_t *mp);
174 static void	udp_input(void *, mblk_t *, void *);
175 static mblk_t	*udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
176 		    t_scalar_t addr_length);
177 static void	udp_lrput(queue_t *, mblk_t *);
178 static void	udp_lwput(queue_t *, mblk_t *);
179 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
180 		    cred_t *credp, boolean_t isv6);
181 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
182 		    cred_t *credp);
183 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
184 		    cred_t *credp);
185 static  int	udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
186 		    int *errorp, udpattrs_t *udpattrs);
187 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
188 static int	udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
189 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
190 static int	udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
191 		    cred_t *cr);
192 static void	udp_report_item(mblk_t *mp, udp_t *udp);
193 static int	udp_rinfop(queue_t *q, infod_t *dp);
194 static int	udp_rrw(queue_t *q, struiod_t *dp);
195 static int	udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
196 		    cred_t *cr);
197 static void	udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp,
198 		    ipha_t *ipha);
199 static void	udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
200 		    t_scalar_t destlen, t_scalar_t err);
201 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
202 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
203     boolean_t random);
204 static mblk_t	*udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
205 		    int *, boolean_t, struct nmsghdr *, cred_t *, pid_t);
206 static mblk_t	*udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
207 		    int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid);
208 static void	udp_wput_other(queue_t *q, mblk_t *mp);
209 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
210 static void	udp_wput_fallback(queue_t *q, mblk_t *mp);
211 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
212 
213 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
214 static void	udp_stack_fini(netstackid_t stackid, void *arg);
215 
216 static void	*udp_kstat_init(netstackid_t stackid);
217 static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
218 static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
219 static void	udp_kstat2_fini(netstackid_t, kstat_t *);
220 static int	udp_kstat_update(kstat_t *kp, int rw);
221 
222 static void	udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
223 		    uint_t pkt_len);
224 static void	udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
225 static void	udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
226 
227 static int	udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *,
228 		    cred_t *, pid_t);
229 
230 /* Common routine for TPI and socket module */
231 static conn_t	*udp_do_open(cred_t *, boolean_t, int);
232 static void	udp_do_close(conn_t *);
233 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
234     boolean_t);
235 static int	udp_do_unbind(conn_t *);
236 static int	udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *);
237 static int	udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *);
238 
239 int		udp_getsockname(sock_lower_handle_t,
240     struct sockaddr *, socklen_t *, cred_t *);
241 int		udp_getpeername(sock_lower_handle_t,
242     struct sockaddr *, socklen_t *, cred_t *);
243 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t);
244 static int	udp_post_ip_bind_connect(udp_t *, mblk_t *, int);
245 
246 #define	UDP_RECV_HIWATER	(56 * 1024)
247 #define	UDP_RECV_LOWATER	128
248 #define	UDP_XMIT_HIWATER	(56 * 1024)
249 #define	UDP_XMIT_LOWATER	1024
250 
251 /*
252  * The following is defined in tcp.c
253  */
254 extern int	(*cl_inet_connect2)(netstackid_t stack_id,
255 		    uint8_t protocol, boolean_t is_outgoing,
256 		    sa_family_t addr_family,
257 		    uint8_t *laddrp, in_port_t lport,
258 		    uint8_t *faddrp, in_port_t fport, void *args);
259 
260 /*
261  * Checks if the given destination addr/port is allowed out.
262  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
263  * Called for each connect() and for sendto()/sendmsg() to a different
264  * destination.
265  * For connect(), called in udp_connect().
266  * For sendto()/sendmsg(), called in udp_output_v{4,6}().
267  *
268  * This macro assumes that the cl_inet_connect2 hook is not NULL.
269  * Please check this before calling this macro.
270  *
271  * void
272  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
273  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
274  */
275 #define	CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) {	\
276 	(err) = 0;							\
277 	/*								\
278 	 * Running in cluster mode - check and register active		\
279 	 * "connection" information					\
280 	 */								\
281 	if ((udp)->udp_ipversion == IPV4_VERSION)			\
282 		(err) = (*cl_inet_connect2)(				\
283 		    (cp)->conn_netstack->netstack_stackid,		\
284 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
285 		    (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]),	\
286 		    (udp)->udp_port,					\
287 		    (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]),		\
288 		    (in_port_t)(fport), NULL);				\
289 	else								\
290 		(err) = (*cl_inet_connect2)(				\
291 		    (cp)->conn_netstack->netstack_stackid,		\
292 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
293 		    (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port,	\
294 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
295 }
296 
297 static struct module_info udp_mod_info =  {
298 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
299 };
300 
301 /*
302  * Entry points for UDP as a device.
303  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
304  */
305 static struct qinit udp_rinitv4 = {
306 	NULL, NULL, udp_openv4, udp_tpi_close, NULL,
307 	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
308 };
309 
310 static struct qinit udp_rinitv6 = {
311 	NULL, NULL, udp_openv6, udp_tpi_close, NULL,
312 	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
313 };
314 
315 static struct qinit udp_winit = {
316 	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL,
317 	&udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE
318 };
319 
320 /* UDP entry point during fallback */
321 struct qinit udp_fallback_sock_winit = {
322 	(pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
323 };
324 
325 /*
326  * UDP needs to handle I_LINK and I_PLINK since ifconfig
327  * likes to use it as a place to hang the various streams.
328  */
329 static struct qinit udp_lrinit = {
330 	(pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL,
331 	&udp_mod_info
332 };
333 
334 static struct qinit udp_lwinit = {
335 	(pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL,
336 	&udp_mod_info
337 };
338 
339 /* For AF_INET aka /dev/udp */
340 struct streamtab udpinfov4 = {
341 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
342 };
343 
344 /* For AF_INET6 aka /dev/udp6 */
345 struct streamtab udpinfov6 = {
346 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
347 };
348 
349 static	sin_t	sin_null;	/* Zero address for quick clears */
350 static	sin6_t	sin6_null;	/* Zero address for quick clears */
351 
352 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
353 
354 /* Default structure copied into T_INFO_ACK messages */
355 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
356 	T_INFO_ACK,
357 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
358 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
359 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
360 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
361 	sizeof (sin_t),	/* ADDR_size. */
362 	0,		/* OPT_size - not initialized here */
363 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
364 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
365 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
366 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
367 };
368 
369 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
370 
371 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
372 	T_INFO_ACK,
373 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
374 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
375 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
376 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
377 	sizeof (sin6_t), /* ADDR_size. */
378 	0,		/* OPT_size - not initialized here */
379 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
380 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
381 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
382 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
383 };
384 
385 /* largest UDP port number */
386 #define	UDP_MAX_PORT	65535
387 
388 /*
389  * Table of ND variables supported by udp.  These are loaded into us_nd
390  * in udp_open.
391  * All of these are alterable, within the min/max values given, at run time.
392  */
393 /* BEGIN CSTYLED */
394 udpparam_t udp_param_arr[] = {
395  /*min		max		value		name */
396  { 0L,		256,		32,		"udp_wroff_extra" },
397  { 1L,		255,		255,		"udp_ipv4_ttl" },
398  { 0,		IPV6_MAX_HOPS,	IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"},
399  { 1024,	(32 * 1024),	1024,		"udp_smallest_nonpriv_port" },
400  { 0,		1,		1,		"udp_do_checksum" },
401  { 1024,	UDP_MAX_PORT,	(32 * 1024),	"udp_smallest_anon_port" },
402  { 1024,	UDP_MAX_PORT,	UDP_MAX_PORT,	"udp_largest_anon_port" },
403  { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER,	"udp_xmit_hiwat"},
404  { 0,		     (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
405  { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER,	"udp_recv_hiwat"},
406  { 65536,	(1<<30),	2*1024*1024,	"udp_max_buf"},
407  { 100,		60000,		1000,		"udp_ndd_get_info_interval"},
408 };
409 /* END CSTYLED */
410 
411 /* Setable in /etc/system */
412 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
413 uint32_t udp_random_anon_port = 1;
414 
415 /*
416  * Hook functions to enable cluster networking.
417  * On non-clustered systems these vectors must always be NULL
418  */
419 
420 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
421     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
422     void *args) = NULL;
423 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
424     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
425     void *args) = NULL;
426 
427 typedef union T_primitives *t_primp_t;
428 
429 /*
430  * Return the next anonymous port in the privileged port range for
431  * bind checking.
432  *
433  * Trusted Extension (TX) notes: TX allows administrator to mark or
434  * reserve ports as Multilevel ports (MLP). MLP has special function
435  * on TX systems. Once a port is made MLP, it's not available as
436  * ordinary port. This creates "holes" in the port name space. It
437  * may be necessary to skip the "holes" find a suitable anon port.
438  */
439 static in_port_t
440 udp_get_next_priv_port(udp_t *udp)
441 {
442 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
443 	in_port_t nextport;
444 	boolean_t restart = B_FALSE;
445 	udp_stack_t *us = udp->udp_us;
446 
447 retry:
448 	if (next_priv_port < us->us_min_anonpriv_port ||
449 	    next_priv_port >= IPPORT_RESERVED) {
450 		next_priv_port = IPPORT_RESERVED - 1;
451 		if (restart)
452 			return (0);
453 		restart = B_TRUE;
454 	}
455 
456 	if (is_system_labeled() &&
457 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
458 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
459 		next_priv_port = nextport;
460 		goto retry;
461 	}
462 
463 	return (next_priv_port--);
464 }
465 
466 /* UDP bind hash report triggered via the Named Dispatch mechanism. */
467 /* ARGSUSED */
468 static int
469 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
470 {
471 	udp_fanout_t	*udpf;
472 	int		i;
473 	zoneid_t	zoneid;
474 	conn_t		*connp;
475 	udp_t		*udp;
476 	udp_stack_t	*us;
477 
478 	connp = Q_TO_CONN(q);
479 	udp = connp->conn_udp;
480 	us = udp->udp_us;
481 
482 	/* Refer to comments in udp_status_report(). */
483 	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
484 		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
485 		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
486 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
487 			return (0);
488 		}
489 	}
490 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
491 		/* The following may work even if we cannot get a large buf. */
492 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
493 		return (0);
494 	}
495 
496 	(void) mi_mpprintf(mp,
497 	    "UDP     " MI_COL_HDRPAD_STR
498 	/*   12345678[89ABCDEF] */
499 	    " zone lport src addr        dest addr       port  state");
500 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
501 
502 	zoneid = connp->conn_zoneid;
503 
504 	for (i = 0; i < us->us_bind_fanout_size; i++) {
505 		udpf = &us->us_bind_fanout[i];
506 		mutex_enter(&udpf->uf_lock);
507 
508 		/* Print the hash index. */
509 		udp = udpf->uf_udp;
510 		if (zoneid != GLOBAL_ZONEID) {
511 			/* skip to first entry in this zone; might be none */
512 			while (udp != NULL &&
513 			    udp->udp_connp->conn_zoneid != zoneid)
514 				udp = udp->udp_bind_hash;
515 		}
516 		if (udp != NULL) {
517 			uint_t print_len, buf_len;
518 
519 			buf_len = mp->b_cont->b_datap->db_lim -
520 			    mp->b_cont->b_wptr;
521 			print_len = snprintf((char *)mp->b_cont->b_wptr,
522 			    buf_len, "%d\n", i);
523 			if (print_len < buf_len) {
524 				mp->b_cont->b_wptr += print_len;
525 			} else {
526 				mp->b_cont->b_wptr += buf_len;
527 			}
528 			for (; udp != NULL; udp = udp->udp_bind_hash) {
529 				if (zoneid == GLOBAL_ZONEID ||
530 				    zoneid == udp->udp_connp->conn_zoneid)
531 					udp_report_item(mp->b_cont, udp);
532 			}
533 		}
534 		mutex_exit(&udpf->uf_lock);
535 	}
536 	us->us_last_ndd_get_info_time = ddi_get_lbolt();
537 	return (0);
538 }
539 
540 /*
541  * Hash list removal routine for udp_t structures.
542  */
543 static void
544 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
545 {
546 	udp_t	*udpnext;
547 	kmutex_t *lockp;
548 	udp_stack_t *us = udp->udp_us;
549 
550 	if (udp->udp_ptpbhn == NULL)
551 		return;
552 
553 	/*
554 	 * Extract the lock pointer in case there are concurrent
555 	 * hash_remove's for this instance.
556 	 */
557 	ASSERT(udp->udp_port != 0);
558 	if (!caller_holds_lock) {
559 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
560 		    us->us_bind_fanout_size)].uf_lock;
561 		ASSERT(lockp != NULL);
562 		mutex_enter(lockp);
563 	}
564 	if (udp->udp_ptpbhn != NULL) {
565 		udpnext = udp->udp_bind_hash;
566 		if (udpnext != NULL) {
567 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
568 			udp->udp_bind_hash = NULL;
569 		}
570 		*udp->udp_ptpbhn = udpnext;
571 		udp->udp_ptpbhn = NULL;
572 	}
573 	if (!caller_holds_lock) {
574 		mutex_exit(lockp);
575 	}
576 }
577 
578 static void
579 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
580 {
581 	udp_t	**udpp;
582 	udp_t	*udpnext;
583 
584 	ASSERT(MUTEX_HELD(&uf->uf_lock));
585 	ASSERT(udp->udp_ptpbhn == NULL);
586 	udpp = &uf->uf_udp;
587 	udpnext = udpp[0];
588 	if (udpnext != NULL) {
589 		/*
590 		 * If the new udp bound to the INADDR_ANY address
591 		 * and the first one in the list is not bound to
592 		 * INADDR_ANY we skip all entries until we find the
593 		 * first one bound to INADDR_ANY.
594 		 * This makes sure that applications binding to a
595 		 * specific address get preference over those binding to
596 		 * INADDR_ANY.
597 		 */
598 		if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) &&
599 		    !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) {
600 			while ((udpnext = udpp[0]) != NULL &&
601 			    !V6_OR_V4_INADDR_ANY(
602 			    udpnext->udp_bound_v6src)) {
603 				udpp = &(udpnext->udp_bind_hash);
604 			}
605 			if (udpnext != NULL)
606 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
607 		} else {
608 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
609 		}
610 	}
611 	udp->udp_bind_hash = udpnext;
612 	udp->udp_ptpbhn = udpp;
613 	udpp[0] = udp;
614 }
615 
616 /*
617  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
618  * passed to udp_wput.
619  * It associates a port number and local address with the stream.
620  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP
621  * protocol type (IPPROTO_UDP) placed in the message following the address.
622  * A T_BIND_ACK message is passed upstream when ip acknowledges the request.
623  * (Called as writer.)
624  *
625  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
626  * without setting SO_REUSEADDR. This is needed so that they
627  * can be viewed as two independent transport protocols.
628  * However, anonymouns ports are allocated from the same range to avoid
629  * duplicating the us->us_next_port_to_try.
630  */
631 static void
632 udp_tpi_bind(queue_t *q, mblk_t *mp)
633 {
634 	sin_t		*sin;
635 	sin6_t		*sin6;
636 	mblk_t		*mp1;
637 	struct T_bind_req *tbr;
638 	conn_t		*connp;
639 	udp_t		*udp;
640 	int		error;
641 	struct sockaddr	*sa;
642 
643 	connp = Q_TO_CONN(q);
644 	udp = connp->conn_udp;
645 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
646 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
647 		    "udp_bind: bad req, len %u",
648 		    (uint_t)(mp->b_wptr - mp->b_rptr));
649 		udp_err_ack(q, mp, TPROTO, 0);
650 		return;
651 	}
652 	if (udp->udp_state != TS_UNBND) {
653 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
654 		    "udp_bind: bad state, %u", udp->udp_state);
655 		udp_err_ack(q, mp, TOUTSTATE, 0);
656 		return;
657 	}
658 	/*
659 	 * Reallocate the message to make sure we have enough room for an
660 	 * address and the protocol type.
661 	 */
662 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
663 	if (!mp1) {
664 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
665 		return;
666 	}
667 
668 	mp = mp1;
669 
670 	/* Reset the message type in preparation for shipping it back. */
671 	DB_TYPE(mp) = M_PCPROTO;
672 
673 	tbr = (struct T_bind_req *)mp->b_rptr;
674 	switch (tbr->ADDR_length) {
675 	case 0:			/* Request for a generic port */
676 		tbr->ADDR_offset = sizeof (struct T_bind_req);
677 		if (udp->udp_family == AF_INET) {
678 			tbr->ADDR_length = sizeof (sin_t);
679 			sin = (sin_t *)&tbr[1];
680 			*sin = sin_null;
681 			sin->sin_family = AF_INET;
682 			mp->b_wptr = (uchar_t *)&sin[1];
683 			sa = (struct sockaddr *)sin;
684 		} else {
685 			ASSERT(udp->udp_family == AF_INET6);
686 			tbr->ADDR_length = sizeof (sin6_t);
687 			sin6 = (sin6_t *)&tbr[1];
688 			*sin6 = sin6_null;
689 			sin6->sin6_family = AF_INET6;
690 			mp->b_wptr = (uchar_t *)&sin6[1];
691 			sa = (struct sockaddr *)sin6;
692 		}
693 		break;
694 
695 	case sizeof (sin_t):	/* Complete IPv4 address */
696 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
697 		    sizeof (sin_t));
698 		if (sa == NULL || !OK_32PTR((char *)sa)) {
699 			udp_err_ack(q, mp, TSYSERR, EINVAL);
700 			return;
701 		}
702 		if (udp->udp_family != AF_INET ||
703 		    sa->sa_family != AF_INET) {
704 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
705 			return;
706 		}
707 		break;
708 
709 	case sizeof (sin6_t):	/* complete IPv6 address */
710 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
711 		    sizeof (sin6_t));
712 		if (sa == NULL || !OK_32PTR((char *)sa)) {
713 			udp_err_ack(q, mp, TSYSERR, EINVAL);
714 			return;
715 		}
716 		if (udp->udp_family != AF_INET6 ||
717 		    sa->sa_family != AF_INET6) {
718 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
719 			return;
720 		}
721 		break;
722 
723 	default:		/* Invalid request */
724 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
725 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
726 		udp_err_ack(q, mp, TBADADDR, 0);
727 		return;
728 	}
729 
730 
731 	cred_t *cr = DB_CREDDEF(mp, connp->conn_cred);
732 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
733 	    tbr->PRIM_type != O_T_BIND_REQ);
734 
735 	if (error != 0) {
736 		if (error > 0) {
737 			udp_err_ack(q, mp, TSYSERR, error);
738 		} else {
739 			udp_err_ack(q, mp, -error, 0);
740 		}
741 	} else {
742 		tbr->PRIM_type = T_BIND_ACK;
743 		qreply(q, mp);
744 	}
745 }
746 
747 /*
748  * This routine handles each T_CONN_REQ message passed to udp.  It
749  * associates a default destination address with the stream.
750  *
751  * This routine sends down a T_BIND_REQ to IP with the following mblks:
752  *	T_BIND_REQ	- specifying local and remote address/port
753  *	IRE_DB_REQ_TYPE	- to get an IRE back containing ire_type and src
754  *	T_OK_ACK	- for the T_CONN_REQ
755  *	T_CONN_CON	- to keep the TPI user happy
756  *
757  * The connect completes in udp_do_connect.
758  * When a T_BIND_ACK is received information is extracted from the IRE
759  * and the two appended messages are sent to the TPI user.
760  * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
761  * convert it to an error ack for the appropriate primitive.
762  */
763 static void
764 udp_tpi_connect(queue_t *q, mblk_t *mp)
765 {
766 	mblk_t	*mp1;
767 	udp_t	*udp;
768 	conn_t	*connp = Q_TO_CONN(q);
769 	int	error;
770 	socklen_t	len;
771 	struct sockaddr		*sa;
772 	struct T_conn_req	*tcr;
773 
774 	udp = connp->conn_udp;
775 	tcr = (struct T_conn_req *)mp->b_rptr;
776 
777 	/* A bit of sanity checking */
778 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
779 		udp_err_ack(q, mp, TPROTO, 0);
780 		return;
781 	}
782 
783 	if (tcr->OPT_length != 0) {
784 		udp_err_ack(q, mp, TBADOPT, 0);
785 		return;
786 	}
787 
788 	/*
789 	 * Determine packet type based on type of address passed in
790 	 * the request should contain an IPv4 or IPv6 address.
791 	 * Make sure that address family matches the type of
792 	 * family of the the address passed down
793 	 */
794 	len = tcr->DEST_length;
795 	switch (tcr->DEST_length) {
796 	default:
797 		udp_err_ack(q, mp, TBADADDR, 0);
798 		return;
799 
800 	case sizeof (sin_t):
801 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
802 		    sizeof (sin_t));
803 		break;
804 
805 	case sizeof (sin6_t):
806 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
807 		    sizeof (sin6_t));
808 		break;
809 	}
810 
811 	error = proto_verify_ip_addr(udp->udp_family, sa, len);
812 	if (error != 0) {
813 		udp_err_ack(q, mp, TSYSERR, error);
814 		return;
815 	}
816 
817 	/*
818 	 * We have to send a connection confirmation to
819 	 * keep TLI happy.
820 	 */
821 	if (udp->udp_family == AF_INET) {
822 		mp1 = mi_tpi_conn_con(NULL, (char *)sa,
823 		    sizeof (sin_t), NULL, 0);
824 	} else {
825 		mp1 = mi_tpi_conn_con(NULL, (char *)sa,
826 		    sizeof (sin6_t), NULL, 0);
827 	}
828 	if (mp1 == NULL) {
829 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
830 		return;
831 	}
832 
833 	/*
834 	 * ok_ack for T_CONN_REQ
835 	 */
836 	mp = mi_tpi_ok_ack_alloc(mp);
837 	if (mp == NULL) {
838 		/* Unable to reuse the T_CONN_REQ for the ack. */
839 		freemsg(mp1);
840 		udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
841 		return;
842 	}
843 
844 	error = udp_do_connect(connp, sa, len);
845 	if (error != 0) {
846 		freeb(mp1);
847 		if (error < 0)
848 			udp_err_ack(q, mp, -error, 0);
849 		else
850 			udp_err_ack(q, mp, TSYSERR, error);
851 	} else {
852 		putnext(connp->conn_rq, mp);
853 		putnext(connp->conn_rq, mp1);
854 	}
855 }
856 
857 static int
858 udp_tpi_close(queue_t *q, int flags)
859 {
860 	conn_t	*connp;
861 
862 	if (flags & SO_FALLBACK) {
863 		/*
864 		 * stream is being closed while in fallback
865 		 * simply free the resources that were allocated
866 		 */
867 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
868 		qprocsoff(q);
869 		goto done;
870 	}
871 
872 	connp = Q_TO_CONN(q);
873 	udp_do_close(connp);
874 done:
875 	q->q_ptr = WR(q)->q_ptr = NULL;
876 	return (0);
877 }
878 
879 /*
880  * Called in the close path to quiesce the conn
881  */
882 void
883 udp_quiesce_conn(conn_t *connp)
884 {
885 	udp_t	*udp = connp->conn_udp;
886 
887 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
888 		/*
889 		 * Running in cluster mode - register unbind information
890 		 */
891 		if (udp->udp_ipversion == IPV4_VERSION) {
892 			(*cl_inet_unbind)(
893 			    connp->conn_netstack->netstack_stackid,
894 			    IPPROTO_UDP, AF_INET,
895 			    (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))),
896 			    (in_port_t)udp->udp_port, NULL);
897 		} else {
898 			(*cl_inet_unbind)(
899 			    connp->conn_netstack->netstack_stackid,
900 			    IPPROTO_UDP, AF_INET6,
901 			    (uint8_t *)(&(udp->udp_v6src)),
902 			    (in_port_t)udp->udp_port, NULL);
903 		}
904 	}
905 
906 	udp_bind_hash_remove(udp, B_FALSE);
907 
908 }
909 
910 void
911 udp_close_free(conn_t *connp)
912 {
913 	udp_t *udp = connp->conn_udp;
914 
915 	/* If there are any options associated with the stream, free them. */
916 	if (udp->udp_ip_snd_options != NULL) {
917 		mi_free((char *)udp->udp_ip_snd_options);
918 		udp->udp_ip_snd_options = NULL;
919 		udp->udp_ip_snd_options_len = 0;
920 	}
921 
922 	if (udp->udp_ip_rcv_options != NULL) {
923 		mi_free((char *)udp->udp_ip_rcv_options);
924 		udp->udp_ip_rcv_options = NULL;
925 		udp->udp_ip_rcv_options_len = 0;
926 	}
927 
928 	/* Free memory associated with sticky options */
929 	if (udp->udp_sticky_hdrs_len != 0) {
930 		kmem_free(udp->udp_sticky_hdrs,
931 		    udp->udp_sticky_hdrs_len);
932 		udp->udp_sticky_hdrs = NULL;
933 		udp->udp_sticky_hdrs_len = 0;
934 	}
935 
936 	ip6_pkt_free(&udp->udp_sticky_ipp);
937 
938 	/*
939 	 * Clear any fields which the kmem_cache constructor clears.
940 	 * Only udp_connp needs to be preserved.
941 	 * TBD: We should make this more efficient to avoid clearing
942 	 * everything.
943 	 */
944 	ASSERT(udp->udp_connp == connp);
945 	bzero(udp, sizeof (udp_t));
946 	udp->udp_connp = connp;
947 }
948 
949 static int
950 udp_do_disconnect(conn_t *connp)
951 {
952 	udp_t	*udp;
953 	mblk_t	*ire_mp;
954 	udp_fanout_t *udpf;
955 	udp_stack_t *us;
956 	int	error;
957 
958 	udp = connp->conn_udp;
959 	us = udp->udp_us;
960 	rw_enter(&udp->udp_rwlock, RW_WRITER);
961 	if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) {
962 		rw_exit(&udp->udp_rwlock);
963 		return (-TOUTSTATE);
964 	}
965 	udp->udp_pending_op = T_DISCON_REQ;
966 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
967 	    us->us_bind_fanout_size)];
968 	mutex_enter(&udpf->uf_lock);
969 	udp->udp_v6src = udp->udp_bound_v6src;
970 	udp->udp_state = TS_IDLE;
971 	mutex_exit(&udpf->uf_lock);
972 
973 	if (udp->udp_family == AF_INET6) {
974 		/* Rebuild the header template */
975 		error = udp_build_hdrs(udp);
976 		if (error != 0) {
977 			udp->udp_pending_op = -1;
978 			rw_exit(&udp->udp_rwlock);
979 			return (error);
980 		}
981 	}
982 
983 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
984 	if (ire_mp == NULL) {
985 		mutex_enter(&udpf->uf_lock);
986 		udp->udp_pending_op = -1;
987 		mutex_exit(&udpf->uf_lock);
988 		rw_exit(&udp->udp_rwlock);
989 		return (ENOMEM);
990 	}
991 
992 	rw_exit(&udp->udp_rwlock);
993 
994 	if (udp->udp_family == AF_INET6) {
995 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP,
996 		    &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
997 	} else {
998 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP,
999 		    V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE);
1000 	}
1001 
1002 	return (udp_post_ip_bind_connect(udp, ire_mp, error));
1003 }
1004 
1005 
1006 static void
1007 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
1008 {
1009 	conn_t	*connp = Q_TO_CONN(q);
1010 	int	error;
1011 
1012 	/*
1013 	 * Allocate the largest primitive we need to send back
1014 	 * T_error_ack is > than T_ok_ack
1015 	 */
1016 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
1017 	if (mp == NULL) {
1018 		/* Unable to reuse the T_DISCON_REQ for the ack. */
1019 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
1020 		return;
1021 	}
1022 
1023 	error = udp_do_disconnect(connp);
1024 
1025 	if (error != 0) {
1026 		if (error < 0) {
1027 			udp_err_ack(q, mp, -error, 0);
1028 		} else {
1029 			udp_err_ack(q, mp, TSYSERR, error);
1030 		}
1031 	} else {
1032 		mp = mi_tpi_ok_ack_alloc(mp);
1033 		ASSERT(mp != NULL);
1034 		qreply(q, mp);
1035 	}
1036 }
1037 
1038 int
1039 udp_disconnect(conn_t *connp)
1040 {
1041 	int error;
1042 	udp_t *udp = connp->conn_udp;
1043 
1044 	udp->udp_dgram_errind = B_FALSE;
1045 
1046 	error = udp_do_disconnect(connp);
1047 
1048 	if (error < 0)
1049 		error = proto_tlitosyserr(-error);
1050 
1051 	return (error);
1052 }
1053 
1054 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
1055 static void
1056 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
1057 {
1058 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1059 		qreply(q, mp);
1060 }
1061 
1062 /* Shorthand to generate and send TPI error acks to our client */
1063 static void
1064 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
1065     int sys_error)
1066 {
1067 	struct T_error_ack	*teackp;
1068 
1069 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1070 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
1071 		teackp = (struct T_error_ack *)mp->b_rptr;
1072 		teackp->ERROR_prim = primitive;
1073 		teackp->TLI_error = t_error;
1074 		teackp->UNIX_error = sys_error;
1075 		qreply(q, mp);
1076 	}
1077 }
1078 
1079 /*ARGSUSED*/
1080 static int
1081 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
1082 {
1083 	int i;
1084 	udp_t		*udp = Q_TO_UDP(q);
1085 	udp_stack_t *us = udp->udp_us;
1086 
1087 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1088 		if (us->us_epriv_ports[i] != 0)
1089 			(void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]);
1090 	}
1091 	return (0);
1092 }
1093 
1094 /* ARGSUSED */
1095 static int
1096 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1097     cred_t *cr)
1098 {
1099 	long	new_value;
1100 	int	i;
1101 	udp_t		*udp = Q_TO_UDP(q);
1102 	udp_stack_t *us = udp->udp_us;
1103 
1104 	/*
1105 	 * Fail the request if the new value does not lie within the
1106 	 * port number limits.
1107 	 */
1108 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
1109 	    new_value <= 0 || new_value >= 65536) {
1110 		return (EINVAL);
1111 	}
1112 
1113 	/* Check if the value is already in the list */
1114 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1115 		if (new_value == us->us_epriv_ports[i]) {
1116 			return (EEXIST);
1117 		}
1118 	}
1119 	/* Find an empty slot */
1120 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1121 		if (us->us_epriv_ports[i] == 0)
1122 			break;
1123 	}
1124 	if (i == us->us_num_epriv_ports) {
1125 		return (EOVERFLOW);
1126 	}
1127 
1128 	/* Set the new value */
1129 	us->us_epriv_ports[i] = (in_port_t)new_value;
1130 	return (0);
1131 }
1132 
1133 /* ARGSUSED */
1134 static int
1135 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1136     cred_t *cr)
1137 {
1138 	long	new_value;
1139 	int	i;
1140 	udp_t		*udp = Q_TO_UDP(q);
1141 	udp_stack_t *us = udp->udp_us;
1142 
1143 	/*
1144 	 * Fail the request if the new value does not lie within the
1145 	 * port number limits.
1146 	 */
1147 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
1148 	    new_value <= 0 || new_value >= 65536) {
1149 		return (EINVAL);
1150 	}
1151 
1152 	/* Check that the value is already in the list */
1153 	for (i = 0; i < us->us_num_epriv_ports; i++) {
1154 		if (us->us_epriv_ports[i] == new_value)
1155 			break;
1156 	}
1157 	if (i == us->us_num_epriv_ports) {
1158 		return (ESRCH);
1159 	}
1160 
1161 	/* Clear the value */
1162 	us->us_epriv_ports[i] = 0;
1163 	return (0);
1164 }
1165 
1166 /* At minimum we need 4 bytes of UDP header */
1167 #define	ICMP_MIN_UDP_HDR	4
1168 
1169 /*
1170  * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP.
1171  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1172  * Assumes that IP has pulled up everything up to and including the ICMP header.
1173  */
1174 static void
1175 udp_icmp_error(conn_t *connp, mblk_t *mp)
1176 			    {
1177 	icmph_t *icmph;
1178 	ipha_t	*ipha;
1179 	int	iph_hdr_length;
1180 	udpha_t	*udpha;
1181 	sin_t	sin;
1182 	sin6_t	sin6;
1183 	mblk_t	*mp1;
1184 	int	error = 0;
1185 	udp_t	*udp = connp->conn_udp;
1186 
1187 	mp1 = NULL;
1188 	ipha = (ipha_t *)mp->b_rptr;
1189 
1190 	ASSERT(OK_32PTR(mp->b_rptr));
1191 
1192 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1193 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
1194 		udp_icmp_error_ipv6(connp, mp);
1195 		return;
1196 	}
1197 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
1198 
1199 	/* Skip past the outer IP and ICMP headers */
1200 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1201 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1202 	ipha = (ipha_t *)&icmph[1];
1203 
1204 	/* Skip past the inner IP and find the ULP header */
1205 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1206 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1207 
1208 	switch (icmph->icmph_type) {
1209 	case ICMP_DEST_UNREACHABLE:
1210 		switch (icmph->icmph_code) {
1211 		case ICMP_FRAGMENTATION_NEEDED:
1212 			/*
1213 			 * IP has already adjusted the path MTU.
1214 			 */
1215 			break;
1216 		case ICMP_PORT_UNREACHABLE:
1217 		case ICMP_PROTOCOL_UNREACHABLE:
1218 			error = ECONNREFUSED;
1219 			break;
1220 		default:
1221 			/* Transient errors */
1222 			break;
1223 		}
1224 		break;
1225 	default:
1226 		/* Transient errors */
1227 		break;
1228 	}
1229 	if (error == 0) {
1230 		freemsg(mp);
1231 		return;
1232 	}
1233 
1234 	/*
1235 	 * Deliver T_UDERROR_IND when the application has asked for it.
1236 	 * The socket layer enables this automatically when connected.
1237 	 */
1238 	if (!udp->udp_dgram_errind) {
1239 		freemsg(mp);
1240 		return;
1241 	}
1242 
1243 
1244 	switch (udp->udp_family) {
1245 	case AF_INET:
1246 		sin = sin_null;
1247 		sin.sin_family = AF_INET;
1248 		sin.sin_addr.s_addr = ipha->ipha_dst;
1249 		sin.sin_port = udpha->uha_dst_port;
1250 		if (IPCL_IS_NONSTR(connp)) {
1251 			rw_enter(&udp->udp_rwlock, RW_WRITER);
1252 			if (udp->udp_state == TS_DATA_XFER) {
1253 				if (sin.sin_port == udp->udp_dstport &&
1254 				    sin.sin_addr.s_addr ==
1255 				    V4_PART_OF_V6(udp->udp_v6dst)) {
1256 
1257 					rw_exit(&udp->udp_rwlock);
1258 					(*connp->conn_upcalls->su_set_error)
1259 					    (connp->conn_upper_handle, error);
1260 					goto done;
1261 				}
1262 			} else {
1263 				udp->udp_delayed_error = error;
1264 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1265 			}
1266 			rw_exit(&udp->udp_rwlock);
1267 		} else {
1268 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1269 			    NULL, 0, error);
1270 		}
1271 		break;
1272 	case AF_INET6:
1273 		sin6 = sin6_null;
1274 		sin6.sin6_family = AF_INET6;
1275 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1276 		sin6.sin6_port = udpha->uha_dst_port;
1277 		if (IPCL_IS_NONSTR(connp)) {
1278 			rw_enter(&udp->udp_rwlock, RW_WRITER);
1279 			if (udp->udp_state == TS_DATA_XFER) {
1280 				if (sin6.sin6_port == udp->udp_dstport &&
1281 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1282 				    &udp->udp_v6dst)) {
1283 					rw_exit(&udp->udp_rwlock);
1284 					(*connp->conn_upcalls->su_set_error)
1285 					    (connp->conn_upper_handle, error);
1286 					goto done;
1287 				}
1288 			} else {
1289 				udp->udp_delayed_error = error;
1290 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1291 			}
1292 			rw_exit(&udp->udp_rwlock);
1293 		} else {
1294 
1295 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1296 			    NULL, 0, error);
1297 		}
1298 		break;
1299 	}
1300 	if (mp1 != NULL)
1301 		putnext(connp->conn_rq, mp1);
1302 done:
1303 	freemsg(mp);
1304 }
1305 
1306 /*
1307  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1308  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1309  * Assumes that IP has pulled up all the extension headers as well as the
1310  * ICMPv6 header.
1311  */
1312 static void
1313 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
1314 {
1315 	icmp6_t		*icmp6;
1316 	ip6_t		*ip6h, *outer_ip6h;
1317 	uint16_t	iph_hdr_length;
1318 	uint8_t		*nexthdrp;
1319 	udpha_t		*udpha;
1320 	sin6_t		sin6;
1321 	mblk_t		*mp1;
1322 	int		error = 0;
1323 	udp_t		*udp = connp->conn_udp;
1324 	udp_stack_t	*us = udp->udp_us;
1325 
1326 	outer_ip6h = (ip6_t *)mp->b_rptr;
1327 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1328 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1329 	else
1330 		iph_hdr_length = IPV6_HDR_LEN;
1331 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1332 	ip6h = (ip6_t *)&icmp6[1];
1333 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1334 		freemsg(mp);
1335 		return;
1336 	}
1337 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1338 
1339 	switch (icmp6->icmp6_type) {
1340 	case ICMP6_DST_UNREACH:
1341 		switch (icmp6->icmp6_code) {
1342 		case ICMP6_DST_UNREACH_NOPORT:
1343 			error = ECONNREFUSED;
1344 			break;
1345 		case ICMP6_DST_UNREACH_ADMIN:
1346 		case ICMP6_DST_UNREACH_NOROUTE:
1347 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1348 		case ICMP6_DST_UNREACH_ADDR:
1349 			/* Transient errors */
1350 			break;
1351 		default:
1352 			break;
1353 		}
1354 		break;
1355 	case ICMP6_PACKET_TOO_BIG: {
1356 		struct T_unitdata_ind	*tudi;
1357 		struct T_opthdr		*toh;
1358 		size_t			udi_size;
1359 		mblk_t			*newmp;
1360 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1361 		    sizeof (struct ip6_mtuinfo);
1362 		sin6_t			*sin6;
1363 		struct ip6_mtuinfo	*mtuinfo;
1364 
1365 		/*
1366 		 * If the application has requested to receive path mtu
1367 		 * information, send up an empty message containing an
1368 		 * IPV6_PATHMTU ancillary data item.
1369 		 */
1370 		if (!udp->udp_ipv6_recvpathmtu)
1371 			break;
1372 
1373 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1374 		    opt_length;
1375 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1376 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
1377 			break;
1378 		}
1379 
1380 		/*
1381 		 * newmp->b_cont is left to NULL on purpose.  This is an
1382 		 * empty message containing only ancillary data.
1383 		 */
1384 		newmp->b_datap->db_type = M_PROTO;
1385 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1386 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1387 		tudi->PRIM_type = T_UNITDATA_IND;
1388 		tudi->SRC_length = sizeof (sin6_t);
1389 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1390 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1391 		tudi->OPT_length = opt_length;
1392 
1393 		sin6 = (sin6_t *)&tudi[1];
1394 		bzero(sin6, sizeof (sin6_t));
1395 		sin6->sin6_family = AF_INET6;
1396 		sin6->sin6_addr = udp->udp_v6dst;
1397 
1398 		toh = (struct T_opthdr *)&sin6[1];
1399 		toh->level = IPPROTO_IPV6;
1400 		toh->name = IPV6_PATHMTU;
1401 		toh->len = opt_length;
1402 		toh->status = 0;
1403 
1404 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1405 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1406 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1407 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1408 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1409 		/*
1410 		 * We've consumed everything we need from the original
1411 		 * message.  Free it, then send our empty message.
1412 		 */
1413 		freemsg(mp);
1414 		if (!IPCL_IS_NONSTR(connp)) {
1415 			putnext(connp->conn_rq, newmp);
1416 		} else {
1417 			(*connp->conn_upcalls->su_recv)
1418 			    (connp->conn_upper_handle, newmp, 0, 0, &error,
1419 			    NULL);
1420 		}
1421 		return;
1422 	}
1423 	case ICMP6_TIME_EXCEEDED:
1424 		/* Transient errors */
1425 		break;
1426 	case ICMP6_PARAM_PROB:
1427 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1428 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1429 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1430 		    (uchar_t *)nexthdrp) {
1431 			error = ECONNREFUSED;
1432 			break;
1433 		}
1434 		break;
1435 	}
1436 	if (error == 0) {
1437 		freemsg(mp);
1438 		return;
1439 	}
1440 
1441 	/*
1442 	 * Deliver T_UDERROR_IND when the application has asked for it.
1443 	 * The socket layer enables this automatically when connected.
1444 	 */
1445 	if (!udp->udp_dgram_errind) {
1446 		freemsg(mp);
1447 		return;
1448 	}
1449 
1450 	sin6 = sin6_null;
1451 	sin6.sin6_family = AF_INET6;
1452 	sin6.sin6_addr = ip6h->ip6_dst;
1453 	sin6.sin6_port = udpha->uha_dst_port;
1454 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1455 
1456 	if (IPCL_IS_NONSTR(connp)) {
1457 		rw_enter(&udp->udp_rwlock, RW_WRITER);
1458 		if (udp->udp_state == TS_DATA_XFER) {
1459 			if (sin6.sin6_port == udp->udp_dstport &&
1460 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1461 			    &udp->udp_v6dst)) {
1462 				rw_exit(&udp->udp_rwlock);
1463 				(*connp->conn_upcalls->su_set_error)
1464 				    (connp->conn_upper_handle, error);
1465 				goto done;
1466 			}
1467 		} else {
1468 			udp->udp_delayed_error = error;
1469 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1470 		}
1471 		rw_exit(&udp->udp_rwlock);
1472 	} else {
1473 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1474 		    NULL, 0, error);
1475 		if (mp1 != NULL)
1476 			putnext(connp->conn_rq, mp1);
1477 	}
1478 
1479 done:
1480 	freemsg(mp);
1481 }
1482 
1483 /*
1484  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1485  * The local address is filled in if endpoint is bound. The remote address
1486  * is filled in if remote address has been precified ("connected endpoint")
1487  * (The concept of connected CLTS sockets is alien to published TPI
1488  *  but we support it anyway).
1489  */
1490 static void
1491 udp_addr_req(queue_t *q, mblk_t *mp)
1492 {
1493 	sin_t	*sin;
1494 	sin6_t	*sin6;
1495 	mblk_t	*ackmp;
1496 	struct T_addr_ack *taa;
1497 	udp_t	*udp = Q_TO_UDP(q);
1498 
1499 	/* Make it large enough for worst case */
1500 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1501 	    2 * sizeof (sin6_t), 1);
1502 	if (ackmp == NULL) {
1503 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1504 		return;
1505 	}
1506 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1507 
1508 	bzero(taa, sizeof (struct T_addr_ack));
1509 	ackmp->b_wptr = (uchar_t *)&taa[1];
1510 
1511 	taa->PRIM_type = T_ADDR_ACK;
1512 	ackmp->b_datap->db_type = M_PCPROTO;
1513 	rw_enter(&udp->udp_rwlock, RW_READER);
1514 	/*
1515 	 * Note: Following code assumes 32 bit alignment of basic
1516 	 * data structures like sin_t and struct T_addr_ack.
1517 	 */
1518 	if (udp->udp_state != TS_UNBND) {
1519 		/*
1520 		 * Fill in local address first
1521 		 */
1522 		taa->LOCADDR_offset = sizeof (*taa);
1523 		if (udp->udp_family == AF_INET) {
1524 			taa->LOCADDR_length = sizeof (sin_t);
1525 			sin = (sin_t *)&taa[1];
1526 			/* Fill zeroes and then initialize non-zero fields */
1527 			*sin = sin_null;
1528 			sin->sin_family = AF_INET;
1529 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
1530 			    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
1531 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src,
1532 				    sin->sin_addr.s_addr);
1533 			} else {
1534 				/*
1535 				 * INADDR_ANY
1536 				 * udp_v6src is not set, we might be bound to
1537 				 * broadcast/multicast. Use udp_bound_v6src as
1538 				 * local address instead (that could
1539 				 * also still be INADDR_ANY)
1540 				 */
1541 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src,
1542 				    sin->sin_addr.s_addr);
1543 			}
1544 			sin->sin_port = udp->udp_port;
1545 			ackmp->b_wptr = (uchar_t *)&sin[1];
1546 			if (udp->udp_state == TS_DATA_XFER) {
1547 				/*
1548 				 * connected, fill remote address too
1549 				 */
1550 				taa->REMADDR_length = sizeof (sin_t);
1551 				/* assumed 32-bit alignment */
1552 				taa->REMADDR_offset = taa->LOCADDR_offset +
1553 				    taa->LOCADDR_length;
1554 
1555 				sin = (sin_t *)(ackmp->b_rptr +
1556 				    taa->REMADDR_offset);
1557 				/* initialize */
1558 				*sin = sin_null;
1559 				sin->sin_family = AF_INET;
1560 				sin->sin_addr.s_addr =
1561 				    V4_PART_OF_V6(udp->udp_v6dst);
1562 				sin->sin_port = udp->udp_dstport;
1563 				ackmp->b_wptr = (uchar_t *)&sin[1];
1564 			}
1565 		} else {
1566 			taa->LOCADDR_length = sizeof (sin6_t);
1567 			sin6 = (sin6_t *)&taa[1];
1568 			/* Fill zeroes and then initialize non-zero fields */
1569 			*sin6 = sin6_null;
1570 			sin6->sin6_family = AF_INET6;
1571 			if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
1572 				sin6->sin6_addr = udp->udp_v6src;
1573 			} else {
1574 				/*
1575 				 * UNSPECIFIED
1576 				 * udp_v6src is not set, we might be bound to
1577 				 * broadcast/multicast. Use udp_bound_v6src as
1578 				 * local address instead (that could
1579 				 * also still be UNSPECIFIED)
1580 				 */
1581 				sin6->sin6_addr =
1582 				    udp->udp_bound_v6src;
1583 			}
1584 			sin6->sin6_port = udp->udp_port;
1585 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1586 			if (udp->udp_state == TS_DATA_XFER) {
1587 				/*
1588 				 * connected, fill remote address too
1589 				 */
1590 				taa->REMADDR_length = sizeof (sin6_t);
1591 				/* assumed 32-bit alignment */
1592 				taa->REMADDR_offset = taa->LOCADDR_offset +
1593 				    taa->LOCADDR_length;
1594 
1595 				sin6 = (sin6_t *)(ackmp->b_rptr +
1596 				    taa->REMADDR_offset);
1597 				/* initialize */
1598 				*sin6 = sin6_null;
1599 				sin6->sin6_family = AF_INET6;
1600 				sin6->sin6_addr = udp->udp_v6dst;
1601 				sin6->sin6_port =  udp->udp_dstport;
1602 				ackmp->b_wptr = (uchar_t *)&sin6[1];
1603 			}
1604 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1605 		}
1606 	}
1607 	rw_exit(&udp->udp_rwlock);
1608 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1609 	qreply(q, ackmp);
1610 }
1611 
1612 static void
1613 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1614 {
1615 	if (udp->udp_family == AF_INET) {
1616 		*tap = udp_g_t_info_ack_ipv4;
1617 	} else {
1618 		*tap = udp_g_t_info_ack_ipv6;
1619 	}
1620 	tap->CURRENT_state = udp->udp_state;
1621 	tap->OPT_size = udp_max_optsize;
1622 }
1623 
1624 static void
1625 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1626     t_uscalar_t cap_bits1)
1627 {
1628 	tcap->CAP_bits1 = 0;
1629 
1630 	if (cap_bits1 & TC1_INFO) {
1631 		udp_copy_info(&tcap->INFO_ack, udp);
1632 		tcap->CAP_bits1 |= TC1_INFO;
1633 	}
1634 }
1635 
1636 /*
1637  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1638  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1639  * udp_g_t_info_ack.  The current state of the stream is copied from
1640  * udp_state.
1641  */
1642 static void
1643 udp_capability_req(queue_t *q, mblk_t *mp)
1644 {
1645 	t_uscalar_t		cap_bits1;
1646 	struct T_capability_ack	*tcap;
1647 	udp_t	*udp = Q_TO_UDP(q);
1648 
1649 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1650 
1651 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1652 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1653 	if (!mp)
1654 		return;
1655 
1656 	tcap = (struct T_capability_ack *)mp->b_rptr;
1657 	udp_do_capability_ack(udp, tcap, cap_bits1);
1658 
1659 	qreply(q, mp);
1660 }
1661 
1662 /*
1663  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1664  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1665  * The current state of the stream is copied from udp_state.
1666  */
1667 static void
1668 udp_info_req(queue_t *q, mblk_t *mp)
1669 {
1670 	udp_t *udp = Q_TO_UDP(q);
1671 
1672 	/* Create a T_INFO_ACK message. */
1673 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1674 	    T_INFO_ACK);
1675 	if (!mp)
1676 		return;
1677 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1678 	qreply(q, mp);
1679 }
1680 
1681 /*
1682  * IP recognizes seven kinds of bind requests:
1683  *
1684  * - A zero-length address binds only to the protocol number.
1685  *
1686  * - A 4-byte address is treated as a request to
1687  * validate that the address is a valid local IPv4
1688  * address, appropriate for an application to bind to.
1689  * IP does the verification, but does not make any note
1690  * of the address at this time.
1691  *
1692  * - A 16-byte address contains is treated as a request
1693  * to validate a local IPv6 address, as the 4-byte
1694  * address case above.
1695  *
1696  * - A 16-byte sockaddr_in to validate the local IPv4 address and also
1697  * use it for the inbound fanout of packets.
1698  *
1699  * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
1700  * use it for the inbound fanout of packets.
1701  *
1702  * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
1703  * information consisting of local and remote addresses
1704  * and ports.  In this case, the addresses are both
1705  * validated as appropriate for this operation, and, if
1706  * so, the information is retained for use in the
1707  * inbound fanout.
1708  *
1709  * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
1710  * fanout information, like the 12-byte case above.
1711  *
1712  * IP will also fill in the IRE request mblk with information
1713  * regarding our peer.  In all cases, we notify IP of our protocol
1714  * type by appending a single protocol byte to the bind request.
1715  */
1716 static mblk_t *
1717 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length)
1718 {
1719 	char	*cp;
1720 	mblk_t	*mp;
1721 	struct T_bind_req *tbr;
1722 	ipa_conn_t	*ac;
1723 	ipa6_conn_t	*ac6;
1724 	sin_t		*sin;
1725 	sin6_t		*sin6;
1726 
1727 	ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
1728 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
1729 	mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
1730 	if (!mp)
1731 		return (mp);
1732 	mp->b_datap->db_type = M_PROTO;
1733 	tbr = (struct T_bind_req *)mp->b_rptr;
1734 	tbr->PRIM_type = bind_prim;
1735 	tbr->ADDR_offset = sizeof (*tbr);
1736 	tbr->CONIND_number = 0;
1737 	tbr->ADDR_length = addr_length;
1738 	cp = (char *)&tbr[1];
1739 	switch (addr_length) {
1740 	case sizeof (ipa_conn_t):
1741 		ASSERT(udp->udp_family == AF_INET);
1742 		/* Append a request for an IRE */
1743 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1744 		if (!mp->b_cont) {
1745 			freemsg(mp);
1746 			return (NULL);
1747 		}
1748 		mp->b_cont->b_wptr += sizeof (ire_t);
1749 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1750 
1751 		/* cp known to be 32 bit aligned */
1752 		ac = (ipa_conn_t *)cp;
1753 		ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src);
1754 		ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst);
1755 		ac->ac_fport = udp->udp_dstport;
1756 		ac->ac_lport = udp->udp_port;
1757 		break;
1758 
1759 	case sizeof (ipa6_conn_t):
1760 		ASSERT(udp->udp_family == AF_INET6);
1761 		/* Append a request for an IRE */
1762 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1763 		if (!mp->b_cont) {
1764 			freemsg(mp);
1765 			return (NULL);
1766 		}
1767 		mp->b_cont->b_wptr += sizeof (ire_t);
1768 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1769 
1770 		/* cp known to be 32 bit aligned */
1771 		ac6 = (ipa6_conn_t *)cp;
1772 		ac6->ac6_laddr = udp->udp_v6src;
1773 		ac6->ac6_faddr = udp->udp_v6dst;
1774 		ac6->ac6_fport = udp->udp_dstport;
1775 		ac6->ac6_lport = udp->udp_port;
1776 		break;
1777 
1778 	case sizeof (sin_t):
1779 		ASSERT(udp->udp_family == AF_INET);
1780 		/* Append a request for an IRE */
1781 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1782 		if (!mp->b_cont) {
1783 			freemsg(mp);
1784 			return (NULL);
1785 		}
1786 		mp->b_cont->b_wptr += sizeof (ire_t);
1787 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1788 
1789 		sin = (sin_t *)cp;
1790 		*sin = sin_null;
1791 		sin->sin_family = AF_INET;
1792 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src);
1793 		sin->sin_port = udp->udp_port;
1794 		break;
1795 
1796 	case sizeof (sin6_t):
1797 		ASSERT(udp->udp_family == AF_INET6);
1798 		/* Append a request for an IRE */
1799 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1800 		if (!mp->b_cont) {
1801 			freemsg(mp);
1802 			return (NULL);
1803 		}
1804 		mp->b_cont->b_wptr += sizeof (ire_t);
1805 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1806 
1807 		sin6 = (sin6_t *)cp;
1808 		*sin6 = sin6_null;
1809 		sin6->sin6_family = AF_INET6;
1810 		sin6->sin6_addr = udp->udp_bound_v6src;
1811 		sin6->sin6_port = udp->udp_port;
1812 		break;
1813 	}
1814 	/* Add protocol number to end */
1815 	cp[addr_length] = (char)IPPROTO_UDP;
1816 	mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
1817 	return (mp);
1818 }
1819 
1820 /* For /dev/udp aka AF_INET open */
1821 static int
1822 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1823 {
1824 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1825 }
1826 
1827 /* For /dev/udp6 aka AF_INET6 open */
1828 static int
1829 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1830 {
1831 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1832 }
1833 
1834 /*
1835  * This is the open routine for udp.  It allocates a udp_t structure for
1836  * the stream and, on the first open of the module, creates an ND table.
1837  */
1838 /*ARGSUSED2*/
1839 static int
1840 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1841     boolean_t isv6)
1842 {
1843 	int		error;
1844 	udp_t		*udp;
1845 	conn_t		*connp;
1846 	dev_t		conn_dev;
1847 	udp_stack_t	*us;
1848 	vmem_t		*minor_arena;
1849 
1850 	TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q);
1851 
1852 	/* If the stream is already open, return immediately. */
1853 	if (q->q_ptr != NULL)
1854 		return (0);
1855 
1856 	if (sflag == MODOPEN)
1857 		return (EINVAL);
1858 
1859 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1860 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1861 		minor_arena = ip_minor_arena_la;
1862 	} else {
1863 		/*
1864 		 * Either minor numbers in the large arena were exhausted
1865 		 * or a non socket application is doing the open.
1866 		 * Try to allocate from the small arena.
1867 		 */
1868 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1869 			return (EBUSY);
1870 
1871 		minor_arena = ip_minor_arena_sa;
1872 	}
1873 
1874 	if (flag & SO_FALLBACK) {
1875 		/*
1876 		 * Non streams socket needs a stream to fallback to
1877 		 */
1878 		RD(q)->q_ptr = (void *)conn_dev;
1879 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1880 		WR(q)->q_ptr = (void *)minor_arena;
1881 		qprocson(q);
1882 		return (0);
1883 	}
1884 
1885 	connp = udp_do_open(credp, isv6, KM_SLEEP);
1886 	if (connp == NULL) {
1887 		inet_minor_free(minor_arena, conn_dev);
1888 		return (ENOMEM);
1889 	}
1890 	udp = connp->conn_udp;
1891 	us = udp->udp_us;
1892 
1893 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1894 	connp->conn_dev = conn_dev;
1895 	connp->conn_minor_arena = minor_arena;
1896 
1897 	/*
1898 	 * Initialize the udp_t structure for this stream.
1899 	 */
1900 	q->q_ptr = connp;
1901 	WR(q)->q_ptr = connp;
1902 	connp->conn_rq = q;
1903 	connp->conn_wq = WR(q);
1904 
1905 	rw_enter(&udp->udp_rwlock, RW_WRITER);
1906 	ASSERT(connp->conn_ulp == IPPROTO_UDP);
1907 	ASSERT(connp->conn_udp == udp);
1908 	ASSERT(udp->udp_connp == connp);
1909 
1910 	if (flag & SO_SOCKSTR) {
1911 		connp->conn_flags |= IPCL_SOCKET;
1912 		udp->udp_issocket = B_TRUE;
1913 		udp->udp_direct_sockfs = B_TRUE;
1914 	}
1915 
1916 	q->q_hiwat = us->us_recv_hiwat;
1917 	WR(q)->q_hiwat = us->us_xmit_hiwat;
1918 	WR(q)->q_lowat = us->us_xmit_lowat;
1919 
1920 	qprocson(q);
1921 
1922 	if (udp->udp_family == AF_INET6) {
1923 		/* Build initial header template for transmit */
1924 		if ((error = udp_build_hdrs(udp)) != 0) {
1925 			rw_exit(&udp->udp_rwlock);
1926 			qprocsoff(q);
1927 			inet_minor_free(minor_arena, conn_dev);
1928 			ipcl_conn_destroy(connp);
1929 			return (error);
1930 		}
1931 	}
1932 	rw_exit(&udp->udp_rwlock);
1933 
1934 	/* Set the Stream head write offset and high watermark. */
1935 	(void) proto_set_tx_wroff(q, connp,
1936 	    udp->udp_max_hdr_len + us->us_wroff_extra);
1937 	/* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */
1938 	(void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat));
1939 
1940 	mutex_enter(&connp->conn_lock);
1941 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1942 	mutex_exit(&connp->conn_lock);
1943 	return (0);
1944 }
1945 
1946 /*
1947  * Which UDP options OK to set through T_UNITDATA_REQ...
1948  */
1949 /* ARGSUSED */
1950 static boolean_t
1951 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1952 {
1953 	return (B_TRUE);
1954 }
1955 
1956 /*
1957  * This routine gets default values of certain options whose default
1958  * values are maintained by protcol specific code
1959  */
1960 /* ARGSUSED */
1961 int
1962 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1963 {
1964 	udp_t		*udp = Q_TO_UDP(q);
1965 	udp_stack_t *us = udp->udp_us;
1966 	int *i1 = (int *)ptr;
1967 
1968 	switch (level) {
1969 	case IPPROTO_IP:
1970 		switch (name) {
1971 		case IP_MULTICAST_TTL:
1972 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1973 			return (sizeof (uchar_t));
1974 		case IP_MULTICAST_LOOP:
1975 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1976 			return (sizeof (uchar_t));
1977 		}
1978 		break;
1979 	case IPPROTO_IPV6:
1980 		switch (name) {
1981 		case IPV6_MULTICAST_HOPS:
1982 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1983 			return (sizeof (int));
1984 		case IPV6_MULTICAST_LOOP:
1985 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1986 			return (sizeof (int));
1987 		case IPV6_UNICAST_HOPS:
1988 			*i1 = us->us_ipv6_hoplimit;
1989 			return (sizeof (int));
1990 		}
1991 		break;
1992 	}
1993 	return (-1);
1994 }
1995 
1996 /*
1997  * This routine retrieves the current status of socket options.
1998  * It returns the size of the option retrieved.
1999  */
2000 static int
2001 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
2002 {
2003 	udp_t		*udp = connp->conn_udp;
2004 	udp_stack_t	*us = udp->udp_us;
2005 	int		*i1 = (int *)ptr;
2006 	ip6_pkt_t 	*ipp = &udp->udp_sticky_ipp;
2007 	int		len;
2008 
2009 	ASSERT(RW_READ_HELD(&udp->udp_rwlock));
2010 	switch (level) {
2011 	case SOL_SOCKET:
2012 		switch (name) {
2013 		case SO_DEBUG:
2014 			*i1 = udp->udp_debug;
2015 			break;	/* goto sizeof (int) option return */
2016 		case SO_REUSEADDR:
2017 			*i1 = udp->udp_reuseaddr;
2018 			break;	/* goto sizeof (int) option return */
2019 		case SO_TYPE:
2020 			*i1 = SOCK_DGRAM;
2021 			break;	/* goto sizeof (int) option return */
2022 
2023 		/*
2024 		 * The following three items are available here,
2025 		 * but are only meaningful to IP.
2026 		 */
2027 		case SO_DONTROUTE:
2028 			*i1 = udp->udp_dontroute;
2029 			break;	/* goto sizeof (int) option return */
2030 		case SO_USELOOPBACK:
2031 			*i1 = udp->udp_useloopback;
2032 			break;	/* goto sizeof (int) option return */
2033 		case SO_BROADCAST:
2034 			*i1 = udp->udp_broadcast;
2035 			break;	/* goto sizeof (int) option return */
2036 
2037 		case SO_SNDBUF:
2038 			*i1 = udp->udp_xmit_hiwat;
2039 			break;	/* goto sizeof (int) option return */
2040 		case SO_RCVBUF:
2041 			*i1 = udp->udp_rcv_disply_hiwat;
2042 			break;	/* goto sizeof (int) option return */
2043 		case SO_DGRAM_ERRIND:
2044 			*i1 = udp->udp_dgram_errind;
2045 			break;	/* goto sizeof (int) option return */
2046 		case SO_RECVUCRED:
2047 			*i1 = udp->udp_recvucred;
2048 			break;	/* goto sizeof (int) option return */
2049 		case SO_TIMESTAMP:
2050 			*i1 = udp->udp_timestamp;
2051 			break;	/* goto sizeof (int) option return */
2052 		case SO_ANON_MLP:
2053 			*i1 = connp->conn_anon_mlp;
2054 			break;	/* goto sizeof (int) option return */
2055 		case SO_MAC_EXEMPT:
2056 			*i1 = connp->conn_mac_exempt;
2057 			break;	/* goto sizeof (int) option return */
2058 		case SO_ALLZONES:
2059 			*i1 = connp->conn_allzones;
2060 			break;	/* goto sizeof (int) option return */
2061 		case SO_EXCLBIND:
2062 			*i1 = udp->udp_exclbind ? SO_EXCLBIND : 0;
2063 			break;
2064 		case SO_PROTOTYPE:
2065 			*i1 = IPPROTO_UDP;
2066 			break;
2067 		case SO_DOMAIN:
2068 			*i1 = udp->udp_family;
2069 			break;
2070 		default:
2071 			return (-1);
2072 		}
2073 		break;
2074 	case IPPROTO_IP:
2075 		if (udp->udp_family != AF_INET)
2076 			return (-1);
2077 		switch (name) {
2078 		case IP_OPTIONS:
2079 		case T_IP_OPTIONS:
2080 			len = udp->udp_ip_rcv_options_len - udp->udp_label_len;
2081 			if (len > 0) {
2082 				bcopy(udp->udp_ip_rcv_options +
2083 				    udp->udp_label_len, ptr, len);
2084 			}
2085 			return (len);
2086 		case IP_TOS:
2087 		case T_IP_TOS:
2088 			*i1 = (int)udp->udp_type_of_service;
2089 			break;	/* goto sizeof (int) option return */
2090 		case IP_TTL:
2091 			*i1 = (int)udp->udp_ttl;
2092 			break;	/* goto sizeof (int) option return */
2093 		case IP_DHCPINIT_IF:
2094 			return (-EINVAL);
2095 		case IP_NEXTHOP:
2096 		case IP_RECVPKTINFO:
2097 			/*
2098 			 * This also handles IP_PKTINFO.
2099 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2100 			 * Differentiation is based on the size of the argument
2101 			 * passed in.
2102 			 * This option is handled in IP which will return an
2103 			 * error for IP_PKTINFO as it's not supported as a
2104 			 * sticky option.
2105 			 */
2106 			return (-EINVAL);
2107 		case IP_MULTICAST_IF:
2108 			/* 0 address if not set */
2109 			*(ipaddr_t *)ptr = udp->udp_multicast_if_addr;
2110 			return (sizeof (ipaddr_t));
2111 		case IP_MULTICAST_TTL:
2112 			*(uchar_t *)ptr = udp->udp_multicast_ttl;
2113 			return (sizeof (uchar_t));
2114 		case IP_MULTICAST_LOOP:
2115 			*ptr = connp->conn_multicast_loop;
2116 			return (sizeof (uint8_t));
2117 		case IP_RECVOPTS:
2118 			*i1 = udp->udp_recvopts;
2119 			break;	/* goto sizeof (int) option return */
2120 		case IP_RECVDSTADDR:
2121 			*i1 = udp->udp_recvdstaddr;
2122 			break;	/* goto sizeof (int) option return */
2123 		case IP_RECVIF:
2124 			*i1 = udp->udp_recvif;
2125 			break;	/* goto sizeof (int) option return */
2126 		case IP_RECVSLLA:
2127 			*i1 = udp->udp_recvslla;
2128 			break;	/* goto sizeof (int) option return */
2129 		case IP_RECVTTL:
2130 			*i1 = udp->udp_recvttl;
2131 			break;	/* goto sizeof (int) option return */
2132 		case IP_ADD_MEMBERSHIP:
2133 		case IP_DROP_MEMBERSHIP:
2134 		case IP_BLOCK_SOURCE:
2135 		case IP_UNBLOCK_SOURCE:
2136 		case IP_ADD_SOURCE_MEMBERSHIP:
2137 		case IP_DROP_SOURCE_MEMBERSHIP:
2138 		case MCAST_JOIN_GROUP:
2139 		case MCAST_LEAVE_GROUP:
2140 		case MCAST_BLOCK_SOURCE:
2141 		case MCAST_UNBLOCK_SOURCE:
2142 		case MCAST_JOIN_SOURCE_GROUP:
2143 		case MCAST_LEAVE_SOURCE_GROUP:
2144 		case IP_DONTFAILOVER_IF:
2145 			/* cannot "get" the value for these */
2146 			return (-1);
2147 		case IP_BOUND_IF:
2148 			/* Zero if not set */
2149 			*i1 = udp->udp_bound_if;
2150 			break;	/* goto sizeof (int) option return */
2151 		case IP_UNSPEC_SRC:
2152 			*i1 = udp->udp_unspec_source;
2153 			break;	/* goto sizeof (int) option return */
2154 		case IP_BROADCAST_TTL:
2155 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
2156 			return (sizeof (uchar_t));
2157 		default:
2158 			return (-1);
2159 		}
2160 		break;
2161 	case IPPROTO_IPV6:
2162 		if (udp->udp_family != AF_INET6)
2163 			return (-1);
2164 		switch (name) {
2165 		case IPV6_UNICAST_HOPS:
2166 			*i1 = (unsigned int)udp->udp_ttl;
2167 			break;	/* goto sizeof (int) option return */
2168 		case IPV6_MULTICAST_IF:
2169 			/* 0 index if not set */
2170 			*i1 = udp->udp_multicast_if_index;
2171 			break;	/* goto sizeof (int) option return */
2172 		case IPV6_MULTICAST_HOPS:
2173 			*i1 = udp->udp_multicast_ttl;
2174 			break;	/* goto sizeof (int) option return */
2175 		case IPV6_MULTICAST_LOOP:
2176 			*i1 = connp->conn_multicast_loop;
2177 			break;	/* goto sizeof (int) option return */
2178 		case IPV6_JOIN_GROUP:
2179 		case IPV6_LEAVE_GROUP:
2180 		case MCAST_JOIN_GROUP:
2181 		case MCAST_LEAVE_GROUP:
2182 		case MCAST_BLOCK_SOURCE:
2183 		case MCAST_UNBLOCK_SOURCE:
2184 		case MCAST_JOIN_SOURCE_GROUP:
2185 		case MCAST_LEAVE_SOURCE_GROUP:
2186 			/* cannot "get" the value for these */
2187 			return (-1);
2188 		case IPV6_BOUND_IF:
2189 			/* Zero if not set */
2190 			*i1 = udp->udp_bound_if;
2191 			break;	/* goto sizeof (int) option return */
2192 		case IPV6_UNSPEC_SRC:
2193 			*i1 = udp->udp_unspec_source;
2194 			break;	/* goto sizeof (int) option return */
2195 		case IPV6_RECVPKTINFO:
2196 			*i1 = udp->udp_ip_recvpktinfo;
2197 			break;	/* goto sizeof (int) option return */
2198 		case IPV6_RECVTCLASS:
2199 			*i1 = udp->udp_ipv6_recvtclass;
2200 			break;	/* goto sizeof (int) option return */
2201 		case IPV6_RECVPATHMTU:
2202 			*i1 = udp->udp_ipv6_recvpathmtu;
2203 			break;	/* goto sizeof (int) option return */
2204 		case IPV6_RECVHOPLIMIT:
2205 			*i1 = udp->udp_ipv6_recvhoplimit;
2206 			break;	/* goto sizeof (int) option return */
2207 		case IPV6_RECVHOPOPTS:
2208 			*i1 = udp->udp_ipv6_recvhopopts;
2209 			break;	/* goto sizeof (int) option return */
2210 		case IPV6_RECVDSTOPTS:
2211 			*i1 = udp->udp_ipv6_recvdstopts;
2212 			break;	/* goto sizeof (int) option return */
2213 		case _OLD_IPV6_RECVDSTOPTS:
2214 			*i1 = udp->udp_old_ipv6_recvdstopts;
2215 			break;	/* goto sizeof (int) option return */
2216 		case IPV6_RECVRTHDRDSTOPTS:
2217 			*i1 = udp->udp_ipv6_recvrthdrdstopts;
2218 			break;	/* goto sizeof (int) option return */
2219 		case IPV6_RECVRTHDR:
2220 			*i1 = udp->udp_ipv6_recvrthdr;
2221 			break;	/* goto sizeof (int) option return */
2222 		case IPV6_PKTINFO: {
2223 			/* XXX assumes that caller has room for max size! */
2224 			struct in6_pktinfo *pkti;
2225 
2226 			pkti = (struct in6_pktinfo *)ptr;
2227 			if (ipp->ipp_fields & IPPF_IFINDEX)
2228 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
2229 			else
2230 				pkti->ipi6_ifindex = 0;
2231 			if (ipp->ipp_fields & IPPF_ADDR)
2232 				pkti->ipi6_addr = ipp->ipp_addr;
2233 			else
2234 				pkti->ipi6_addr = ipv6_all_zeros;
2235 			return (sizeof (struct in6_pktinfo));
2236 		}
2237 		case IPV6_TCLASS:
2238 			if (ipp->ipp_fields & IPPF_TCLASS)
2239 				*i1 = ipp->ipp_tclass;
2240 			else
2241 				*i1 = IPV6_FLOW_TCLASS(
2242 				    IPV6_DEFAULT_VERS_AND_FLOW);
2243 			break;	/* goto sizeof (int) option return */
2244 		case IPV6_NEXTHOP: {
2245 			sin6_t *sin6 = (sin6_t *)ptr;
2246 
2247 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
2248 				return (0);
2249 			*sin6 = sin6_null;
2250 			sin6->sin6_family = AF_INET6;
2251 			sin6->sin6_addr = ipp->ipp_nexthop;
2252 			return (sizeof (sin6_t));
2253 		}
2254 		case IPV6_HOPOPTS:
2255 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
2256 				return (0);
2257 			if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6)
2258 				return (0);
2259 			/*
2260 			 * The cipso/label option is added by kernel.
2261 			 * User is not usually aware of this option.
2262 			 * We copy out the hbh opt after the label option.
2263 			 */
2264 			bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6,
2265 			    ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6);
2266 			if (udp->udp_label_len_v6 > 0) {
2267 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
2268 				ptr[1] = (ipp->ipp_hopoptslen -
2269 				    udp->udp_label_len_v6 + 7) / 8 - 1;
2270 			}
2271 			return (ipp->ipp_hopoptslen - udp->udp_label_len_v6);
2272 		case IPV6_RTHDRDSTOPTS:
2273 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
2274 				return (0);
2275 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
2276 			return (ipp->ipp_rtdstoptslen);
2277 		case IPV6_RTHDR:
2278 			if (!(ipp->ipp_fields & IPPF_RTHDR))
2279 				return (0);
2280 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
2281 			return (ipp->ipp_rthdrlen);
2282 		case IPV6_DSTOPTS:
2283 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
2284 				return (0);
2285 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
2286 			return (ipp->ipp_dstoptslen);
2287 		case IPV6_PATHMTU:
2288 			return (ip_fill_mtuinfo(&udp->udp_v6dst,
2289 			    udp->udp_dstport, (struct ip6_mtuinfo *)ptr,
2290 			    us->us_netstack));
2291 		default:
2292 			return (-1);
2293 		}
2294 		break;
2295 	case IPPROTO_UDP:
2296 		switch (name) {
2297 		case UDP_ANONPRIVBIND:
2298 			*i1 = udp->udp_anon_priv_bind;
2299 			break;
2300 		case UDP_EXCLBIND:
2301 			*i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0;
2302 			break;
2303 		case UDP_RCVHDR:
2304 			*i1 = udp->udp_rcvhdr ? 1 : 0;
2305 			break;
2306 		case UDP_NAT_T_ENDPOINT:
2307 			*i1 = udp->udp_nat_t_endpoint;
2308 			break;
2309 		default:
2310 			return (-1);
2311 		}
2312 		break;
2313 	default:
2314 		return (-1);
2315 	}
2316 	return (sizeof (int));
2317 }
2318 
2319 int
2320 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
2321 {
2322 	udp_t   *udp;
2323 	int	err;
2324 
2325 	udp = Q_TO_UDP(q);
2326 
2327 	rw_enter(&udp->udp_rwlock, RW_READER);
2328 	err = udp_opt_get(Q_TO_CONN(q), level, name, ptr);
2329 	rw_exit(&udp->udp_rwlock);
2330 	return (err);
2331 }
2332 
2333 /*
2334  * This routine sets socket options.
2335  */
2336 /* ARGSUSED */
2337 static int
2338 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
2339     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
2340     void *thisdg_attrs, boolean_t checkonly)
2341 {
2342 	udpattrs_t *attrs = thisdg_attrs;
2343 	int	*i1 = (int *)invalp;
2344 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2345 	udp_t	*udp = connp->conn_udp;
2346 	udp_stack_t	*us = udp->udp_us;
2347 	int	error;
2348 	uint_t	newlen;
2349 	size_t	sth_wroff;
2350 
2351 	ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
2352 	/*
2353 	 * For fixed length options, no sanity check
2354 	 * of passed in length is done. It is assumed *_optcom_req()
2355 	 * routines do the right thing.
2356 	 */
2357 	switch (level) {
2358 	case SOL_SOCKET:
2359 		switch (name) {
2360 		case SO_REUSEADDR:
2361 			if (!checkonly) {
2362 				udp->udp_reuseaddr = onoff;
2363 				PASS_OPT_TO_IP(connp);
2364 			}
2365 			break;
2366 		case SO_DEBUG:
2367 			if (!checkonly)
2368 				udp->udp_debug = onoff;
2369 			break;
2370 		/*
2371 		 * The following three items are available here,
2372 		 * but are only meaningful to IP.
2373 		 */
2374 		case SO_DONTROUTE:
2375 			if (!checkonly) {
2376 				udp->udp_dontroute = onoff;
2377 				PASS_OPT_TO_IP(connp);
2378 			}
2379 			break;
2380 		case SO_USELOOPBACK:
2381 			if (!checkonly) {
2382 				udp->udp_useloopback = onoff;
2383 				PASS_OPT_TO_IP(connp);
2384 			}
2385 			break;
2386 		case SO_BROADCAST:
2387 			if (!checkonly) {
2388 				udp->udp_broadcast = onoff;
2389 				PASS_OPT_TO_IP(connp);
2390 			}
2391 			break;
2392 
2393 		case SO_SNDBUF:
2394 			if (*i1 > us->us_max_buf) {
2395 				*outlenp = 0;
2396 				return (ENOBUFS);
2397 			}
2398 			if (!checkonly) {
2399 				udp->udp_xmit_hiwat = *i1;
2400 				connp->conn_wq->q_hiwat = *i1;
2401 			}
2402 			break;
2403 		case SO_RCVBUF:
2404 			if (*i1 > us->us_max_buf) {
2405 				*outlenp = 0;
2406 				return (ENOBUFS);
2407 			}
2408 			if (!checkonly) {
2409 				int size;
2410 
2411 				udp->udp_rcv_disply_hiwat = *i1;
2412 				size = udp_set_rcv_hiwat(udp, *i1);
2413 				rw_exit(&udp->udp_rwlock);
2414 				(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2415 				    size);
2416 				rw_enter(&udp->udp_rwlock, RW_WRITER);
2417 			}
2418 			break;
2419 		case SO_DGRAM_ERRIND:
2420 			if (!checkonly)
2421 				udp->udp_dgram_errind = onoff;
2422 			break;
2423 		case SO_RECVUCRED:
2424 			if (!checkonly)
2425 				udp->udp_recvucred = onoff;
2426 			break;
2427 		case SO_ALLZONES:
2428 			/*
2429 			 * "soft" error (negative)
2430 			 * option not handled at this level
2431 			 * Do not modify *outlenp.
2432 			 */
2433 			return (-EINVAL);
2434 		case SO_TIMESTAMP:
2435 			if (!checkonly)
2436 				udp->udp_timestamp = onoff;
2437 			break;
2438 		case SO_ANON_MLP:
2439 			if (!checkonly) {
2440 				connp->conn_anon_mlp = onoff;
2441 				PASS_OPT_TO_IP(connp);
2442 			}
2443 			break;
2444 		case SO_MAC_EXEMPT:
2445 			if (secpolicy_net_mac_aware(cr) != 0 ||
2446 			    udp->udp_state != TS_UNBND)
2447 				return (EACCES);
2448 			if (!checkonly) {
2449 				connp->conn_mac_exempt = onoff;
2450 				PASS_OPT_TO_IP(connp);
2451 			}
2452 			break;
2453 		case SCM_UCRED: {
2454 			struct ucred_s *ucr;
2455 			cred_t *cr, *newcr;
2456 			ts_label_t *tsl;
2457 
2458 			/*
2459 			 * Only sockets that have proper privileges and are
2460 			 * bound to MLPs will have any other value here, so
2461 			 * this implicitly tests for privilege to set label.
2462 			 */
2463 			if (connp->conn_mlp_type == mlptSingle)
2464 				break;
2465 			ucr = (struct ucred_s *)invalp;
2466 			if (inlen != ucredsize ||
2467 			    ucr->uc_labeloff < sizeof (*ucr) ||
2468 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
2469 				return (EINVAL);
2470 			if (!checkonly) {
2471 				mblk_t *mb;
2472 
2473 				if (attrs == NULL ||
2474 				    (mb = attrs->udpattr_mb) == NULL)
2475 					return (EINVAL);
2476 				if ((cr = DB_CRED(mb)) == NULL)
2477 					cr = udp->udp_connp->conn_cred;
2478 				ASSERT(cr != NULL);
2479 				if ((tsl = crgetlabel(cr)) == NULL)
2480 					return (EINVAL);
2481 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
2482 				    tsl->tsl_doi, KM_NOSLEEP);
2483 				if (newcr == NULL)
2484 					return (ENOSR);
2485 				mblk_setcred(mb, newcr);
2486 				attrs->udpattr_credset = B_TRUE;
2487 				crfree(newcr);
2488 			}
2489 			break;
2490 		}
2491 		case SO_EXCLBIND:
2492 			if (!checkonly)
2493 				udp->udp_exclbind = onoff;
2494 			break;
2495 		default:
2496 			*outlenp = 0;
2497 			return (EINVAL);
2498 		}
2499 		break;
2500 	case IPPROTO_IP:
2501 		if (udp->udp_family != AF_INET) {
2502 			*outlenp = 0;
2503 			return (ENOPROTOOPT);
2504 		}
2505 		switch (name) {
2506 		case IP_OPTIONS:
2507 		case T_IP_OPTIONS:
2508 			/* Save options for use by IP. */
2509 			newlen = inlen + udp->udp_label_len;
2510 			if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
2511 				*outlenp = 0;
2512 				return (EINVAL);
2513 			}
2514 			if (checkonly)
2515 				break;
2516 
2517 			/*
2518 			 * Update the stored options taking into account
2519 			 * any CIPSO option which we should not overwrite.
2520 			 */
2521 			if (!tsol_option_set(&udp->udp_ip_snd_options,
2522 			    &udp->udp_ip_snd_options_len,
2523 			    udp->udp_label_len, invalp, inlen)) {
2524 				*outlenp = 0;
2525 				return (ENOMEM);
2526 			}
2527 
2528 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2529 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
2530 			sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
2531 			rw_exit(&udp->udp_rwlock);
2532 			(void) proto_set_tx_wroff(connp->conn_rq, connp,
2533 			    sth_wroff);
2534 			rw_enter(&udp->udp_rwlock, RW_WRITER);
2535 			break;
2536 
2537 		case IP_TTL:
2538 			if (!checkonly) {
2539 				udp->udp_ttl = (uchar_t)*i1;
2540 			}
2541 			break;
2542 		case IP_TOS:
2543 		case T_IP_TOS:
2544 			if (!checkonly) {
2545 				udp->udp_type_of_service = (uchar_t)*i1;
2546 			}
2547 			break;
2548 		case IP_MULTICAST_IF: {
2549 			/*
2550 			 * TODO should check OPTMGMT reply and undo this if
2551 			 * there is an error.
2552 			 */
2553 			struct in_addr *inap = (struct in_addr *)invalp;
2554 			if (!checkonly) {
2555 				udp->udp_multicast_if_addr =
2556 				    inap->s_addr;
2557 				PASS_OPT_TO_IP(connp);
2558 			}
2559 			break;
2560 		}
2561 		case IP_MULTICAST_TTL:
2562 			if (!checkonly)
2563 				udp->udp_multicast_ttl = *invalp;
2564 			break;
2565 		case IP_MULTICAST_LOOP:
2566 			if (!checkonly) {
2567 				connp->conn_multicast_loop = *invalp;
2568 				PASS_OPT_TO_IP(connp);
2569 			}
2570 			break;
2571 		case IP_RECVOPTS:
2572 			if (!checkonly)
2573 				udp->udp_recvopts = onoff;
2574 			break;
2575 		case IP_RECVDSTADDR:
2576 			if (!checkonly)
2577 				udp->udp_recvdstaddr = onoff;
2578 			break;
2579 		case IP_RECVIF:
2580 			if (!checkonly) {
2581 				udp->udp_recvif = onoff;
2582 				PASS_OPT_TO_IP(connp);
2583 			}
2584 			break;
2585 		case IP_RECVSLLA:
2586 			if (!checkonly) {
2587 				udp->udp_recvslla = onoff;
2588 				PASS_OPT_TO_IP(connp);
2589 			}
2590 			break;
2591 		case IP_RECVTTL:
2592 			if (!checkonly)
2593 				udp->udp_recvttl = onoff;
2594 			break;
2595 		case IP_PKTINFO: {
2596 			/*
2597 			 * This also handles IP_RECVPKTINFO.
2598 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
2599 			 * Differentiation is based on the size of the
2600 			 * argument passed in.
2601 			 */
2602 			struct in_pktinfo *pktinfop;
2603 			ip4_pkt_t *attr_pktinfop;
2604 
2605 			if (checkonly)
2606 				break;
2607 
2608 			if (inlen == sizeof (int)) {
2609 				/*
2610 				 * This is IP_RECVPKTINFO option.
2611 				 * Keep a local copy of whether this option is
2612 				 * set or not and pass it down to IP for
2613 				 * processing.
2614 				 */
2615 
2616 				udp->udp_ip_recvpktinfo = onoff;
2617 				return (-EINVAL);
2618 			}
2619 
2620 			if (attrs == NULL ||
2621 			    (attr_pktinfop = attrs->udpattr_ipp4) == NULL) {
2622 				/*
2623 				 * sticky option or no buffer to return
2624 				 * the results.
2625 				 */
2626 				return (EINVAL);
2627 			}
2628 
2629 			if (inlen != sizeof (struct in_pktinfo))
2630 				return (EINVAL);
2631 
2632 			pktinfop = (struct in_pktinfo *)invalp;
2633 
2634 			/*
2635 			 * At least one of the values should be specified
2636 			 */
2637 			if (pktinfop->ipi_ifindex == 0 &&
2638 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2639 				return (EINVAL);
2640 			}
2641 
2642 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2643 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2644 
2645 			break;
2646 		}
2647 		case IP_ADD_MEMBERSHIP:
2648 		case IP_DROP_MEMBERSHIP:
2649 		case IP_BLOCK_SOURCE:
2650 		case IP_UNBLOCK_SOURCE:
2651 		case IP_ADD_SOURCE_MEMBERSHIP:
2652 		case IP_DROP_SOURCE_MEMBERSHIP:
2653 		case MCAST_JOIN_GROUP:
2654 		case MCAST_LEAVE_GROUP:
2655 		case MCAST_BLOCK_SOURCE:
2656 		case MCAST_UNBLOCK_SOURCE:
2657 		case MCAST_JOIN_SOURCE_GROUP:
2658 		case MCAST_LEAVE_SOURCE_GROUP:
2659 		case IP_SEC_OPT:
2660 		case IP_NEXTHOP:
2661 		case IP_DHCPINIT_IF:
2662 			/*
2663 			 * "soft" error (negative)
2664 			 * option not handled at this level
2665 			 * Do not modify *outlenp.
2666 			 */
2667 			return (-EINVAL);
2668 		case IP_BOUND_IF:
2669 			if (!checkonly) {
2670 				udp->udp_bound_if = *i1;
2671 				PASS_OPT_TO_IP(connp);
2672 			}
2673 			break;
2674 		case IP_UNSPEC_SRC:
2675 			if (!checkonly) {
2676 				udp->udp_unspec_source = onoff;
2677 				PASS_OPT_TO_IP(connp);
2678 			}
2679 			break;
2680 		case IP_BROADCAST_TTL:
2681 			if (!checkonly)
2682 				connp->conn_broadcast_ttl = *invalp;
2683 			break;
2684 		default:
2685 			*outlenp = 0;
2686 			return (EINVAL);
2687 		}
2688 		break;
2689 	case IPPROTO_IPV6: {
2690 		ip6_pkt_t		*ipp;
2691 		boolean_t		sticky;
2692 
2693 		if (udp->udp_family != AF_INET6) {
2694 			*outlenp = 0;
2695 			return (ENOPROTOOPT);
2696 		}
2697 		/*
2698 		 * Deal with both sticky options and ancillary data
2699 		 */
2700 		sticky = B_FALSE;
2701 		if (attrs == NULL || (ipp = attrs->udpattr_ipp6) ==
2702 		    NULL) {
2703 			/* sticky options, or none */
2704 			ipp = &udp->udp_sticky_ipp;
2705 			sticky = B_TRUE;
2706 		}
2707 
2708 		switch (name) {
2709 		case IPV6_MULTICAST_IF:
2710 			if (!checkonly) {
2711 				udp->udp_multicast_if_index = *i1;
2712 				PASS_OPT_TO_IP(connp);
2713 			}
2714 			break;
2715 		case IPV6_UNICAST_HOPS:
2716 			/* -1 means use default */
2717 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2718 				*outlenp = 0;
2719 				return (EINVAL);
2720 			}
2721 			if (!checkonly) {
2722 				if (*i1 == -1) {
2723 					udp->udp_ttl = ipp->ipp_unicast_hops =
2724 					    us->us_ipv6_hoplimit;
2725 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2726 					/* Pass modified value to IP. */
2727 					*i1 = udp->udp_ttl;
2728 				} else {
2729 					udp->udp_ttl = ipp->ipp_unicast_hops =
2730 					    (uint8_t)*i1;
2731 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2732 				}
2733 				/* Rebuild the header template */
2734 				error = udp_build_hdrs(udp);
2735 				if (error != 0) {
2736 					*outlenp = 0;
2737 					return (error);
2738 				}
2739 			}
2740 			break;
2741 		case IPV6_MULTICAST_HOPS:
2742 			/* -1 means use default */
2743 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2744 				*outlenp = 0;
2745 				return (EINVAL);
2746 			}
2747 			if (!checkonly) {
2748 				if (*i1 == -1) {
2749 					udp->udp_multicast_ttl =
2750 					    ipp->ipp_multicast_hops =
2751 					    IP_DEFAULT_MULTICAST_TTL;
2752 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2753 					/* Pass modified value to IP. */
2754 					*i1 = udp->udp_multicast_ttl;
2755 				} else {
2756 					udp->udp_multicast_ttl =
2757 					    ipp->ipp_multicast_hops =
2758 					    (uint8_t)*i1;
2759 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2760 				}
2761 			}
2762 			break;
2763 		case IPV6_MULTICAST_LOOP:
2764 			if (*i1 != 0 && *i1 != 1) {
2765 				*outlenp = 0;
2766 				return (EINVAL);
2767 			}
2768 			if (!checkonly) {
2769 				connp->conn_multicast_loop = *i1;
2770 				PASS_OPT_TO_IP(connp);
2771 			}
2772 			break;
2773 		case IPV6_JOIN_GROUP:
2774 		case IPV6_LEAVE_GROUP:
2775 		case MCAST_JOIN_GROUP:
2776 		case MCAST_LEAVE_GROUP:
2777 		case MCAST_BLOCK_SOURCE:
2778 		case MCAST_UNBLOCK_SOURCE:
2779 		case MCAST_JOIN_SOURCE_GROUP:
2780 		case MCAST_LEAVE_SOURCE_GROUP:
2781 			/*
2782 			 * "soft" error (negative)
2783 			 * option not handled at this level
2784 			 * Note: Do not modify *outlenp
2785 			 */
2786 			return (-EINVAL);
2787 		case IPV6_BOUND_IF:
2788 			if (!checkonly) {
2789 				udp->udp_bound_if = *i1;
2790 				PASS_OPT_TO_IP(connp);
2791 			}
2792 			break;
2793 		case IPV6_UNSPEC_SRC:
2794 			if (!checkonly) {
2795 				udp->udp_unspec_source = onoff;
2796 				PASS_OPT_TO_IP(connp);
2797 			}
2798 			break;
2799 		/*
2800 		 * Set boolean switches for ancillary data delivery
2801 		 */
2802 		case IPV6_RECVPKTINFO:
2803 			if (!checkonly) {
2804 				udp->udp_ip_recvpktinfo = onoff;
2805 				PASS_OPT_TO_IP(connp);
2806 			}
2807 			break;
2808 		case IPV6_RECVTCLASS:
2809 			if (!checkonly) {
2810 				udp->udp_ipv6_recvtclass = onoff;
2811 				PASS_OPT_TO_IP(connp);
2812 			}
2813 			break;
2814 		case IPV6_RECVPATHMTU:
2815 			if (!checkonly) {
2816 				udp->udp_ipv6_recvpathmtu = onoff;
2817 				PASS_OPT_TO_IP(connp);
2818 			}
2819 			break;
2820 		case IPV6_RECVHOPLIMIT:
2821 			if (!checkonly) {
2822 				udp->udp_ipv6_recvhoplimit = onoff;
2823 				PASS_OPT_TO_IP(connp);
2824 			}
2825 			break;
2826 		case IPV6_RECVHOPOPTS:
2827 			if (!checkonly) {
2828 				udp->udp_ipv6_recvhopopts = onoff;
2829 				PASS_OPT_TO_IP(connp);
2830 			}
2831 			break;
2832 		case IPV6_RECVDSTOPTS:
2833 			if (!checkonly) {
2834 				udp->udp_ipv6_recvdstopts = onoff;
2835 				PASS_OPT_TO_IP(connp);
2836 			}
2837 			break;
2838 		case _OLD_IPV6_RECVDSTOPTS:
2839 			if (!checkonly)
2840 				udp->udp_old_ipv6_recvdstopts = onoff;
2841 			break;
2842 		case IPV6_RECVRTHDRDSTOPTS:
2843 			if (!checkonly) {
2844 				udp->udp_ipv6_recvrthdrdstopts = onoff;
2845 				PASS_OPT_TO_IP(connp);
2846 			}
2847 			break;
2848 		case IPV6_RECVRTHDR:
2849 			if (!checkonly) {
2850 				udp->udp_ipv6_recvrthdr = onoff;
2851 				PASS_OPT_TO_IP(connp);
2852 			}
2853 			break;
2854 		/*
2855 		 * Set sticky options or ancillary data.
2856 		 * If sticky options, (re)build any extension headers
2857 		 * that might be needed as a result.
2858 		 */
2859 		case IPV6_PKTINFO:
2860 			/*
2861 			 * The source address and ifindex are verified
2862 			 * in ip_opt_set(). For ancillary data the
2863 			 * source address is checked in ip_wput_v6.
2864 			 */
2865 			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
2866 				return (EINVAL);
2867 			if (checkonly)
2868 				break;
2869 
2870 			if (inlen == 0) {
2871 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2872 				ipp->ipp_sticky_ignored |=
2873 				    (IPPF_IFINDEX|IPPF_ADDR);
2874 			} else {
2875 				struct in6_pktinfo *pkti;
2876 
2877 				pkti = (struct in6_pktinfo *)invalp;
2878 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2879 				ipp->ipp_addr = pkti->ipi6_addr;
2880 				if (ipp->ipp_ifindex != 0)
2881 					ipp->ipp_fields |= IPPF_IFINDEX;
2882 				else
2883 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2884 				if (!IN6_IS_ADDR_UNSPECIFIED(
2885 				    &ipp->ipp_addr))
2886 					ipp->ipp_fields |= IPPF_ADDR;
2887 				else
2888 					ipp->ipp_fields &= ~IPPF_ADDR;
2889 			}
2890 			if (sticky) {
2891 				error = udp_build_hdrs(udp);
2892 				if (error != 0)
2893 					return (error);
2894 				PASS_OPT_TO_IP(connp);
2895 			}
2896 			break;
2897 		case IPV6_HOPLIMIT:
2898 			if (sticky)
2899 				return (EINVAL);
2900 			if (inlen != 0 && inlen != sizeof (int))
2901 				return (EINVAL);
2902 			if (checkonly)
2903 				break;
2904 
2905 			if (inlen == 0) {
2906 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2907 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2908 			} else {
2909 				if (*i1 > 255 || *i1 < -1)
2910 					return (EINVAL);
2911 				if (*i1 == -1)
2912 					ipp->ipp_hoplimit =
2913 					    us->us_ipv6_hoplimit;
2914 				else
2915 					ipp->ipp_hoplimit = *i1;
2916 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2917 			}
2918 			break;
2919 		case IPV6_TCLASS:
2920 			if (inlen != 0 && inlen != sizeof (int))
2921 				return (EINVAL);
2922 			if (checkonly)
2923 				break;
2924 
2925 			if (inlen == 0) {
2926 				ipp->ipp_fields &= ~IPPF_TCLASS;
2927 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2928 			} else {
2929 				if (*i1 > 255 || *i1 < -1)
2930 					return (EINVAL);
2931 				if (*i1 == -1)
2932 					ipp->ipp_tclass = 0;
2933 				else
2934 					ipp->ipp_tclass = *i1;
2935 				ipp->ipp_fields |= IPPF_TCLASS;
2936 			}
2937 			if (sticky) {
2938 				error = udp_build_hdrs(udp);
2939 				if (error != 0)
2940 					return (error);
2941 			}
2942 			break;
2943 		case IPV6_NEXTHOP:
2944 			/*
2945 			 * IP will verify that the nexthop is reachable
2946 			 * and fail for sticky options.
2947 			 */
2948 			if (inlen != 0 && inlen != sizeof (sin6_t))
2949 				return (EINVAL);
2950 			if (checkonly)
2951 				break;
2952 
2953 			if (inlen == 0) {
2954 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2955 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2956 			} else {
2957 				sin6_t *sin6 = (sin6_t *)invalp;
2958 
2959 				if (sin6->sin6_family != AF_INET6) {
2960 					return (EAFNOSUPPORT);
2961 				}
2962 				if (IN6_IS_ADDR_V4MAPPED(
2963 				    &sin6->sin6_addr))
2964 					return (EADDRNOTAVAIL);
2965 				ipp->ipp_nexthop = sin6->sin6_addr;
2966 				if (!IN6_IS_ADDR_UNSPECIFIED(
2967 				    &ipp->ipp_nexthop))
2968 					ipp->ipp_fields |= IPPF_NEXTHOP;
2969 				else
2970 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
2971 			}
2972 			if (sticky) {
2973 				error = udp_build_hdrs(udp);
2974 				if (error != 0)
2975 					return (error);
2976 				PASS_OPT_TO_IP(connp);
2977 			}
2978 			break;
2979 		case IPV6_HOPOPTS: {
2980 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
2981 			/*
2982 			 * Sanity checks - minimum size, size a multiple of
2983 			 * eight bytes, and matching size passed in.
2984 			 */
2985 			if (inlen != 0 &&
2986 			    inlen != (8 * (hopts->ip6h_len + 1)))
2987 				return (EINVAL);
2988 
2989 			if (checkonly)
2990 				break;
2991 
2992 			error = optcom_pkt_set(invalp, inlen, sticky,
2993 			    (uchar_t **)&ipp->ipp_hopopts,
2994 			    &ipp->ipp_hopoptslen,
2995 			    sticky ? udp->udp_label_len_v6 : 0);
2996 			if (error != 0)
2997 				return (error);
2998 			if (ipp->ipp_hopoptslen == 0) {
2999 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
3000 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
3001 			} else {
3002 				ipp->ipp_fields |= IPPF_HOPOPTS;
3003 			}
3004 			if (sticky) {
3005 				error = udp_build_hdrs(udp);
3006 				if (error != 0)
3007 					return (error);
3008 			}
3009 			break;
3010 		}
3011 		case IPV6_RTHDRDSTOPTS: {
3012 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3013 
3014 			/*
3015 			 * Sanity checks - minimum size, size a multiple of
3016 			 * eight bytes, and matching size passed in.
3017 			 */
3018 			if (inlen != 0 &&
3019 			    inlen != (8 * (dopts->ip6d_len + 1)))
3020 				return (EINVAL);
3021 
3022 			if (checkonly)
3023 				break;
3024 
3025 			if (inlen == 0) {
3026 				if (sticky &&
3027 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
3028 					kmem_free(ipp->ipp_rtdstopts,
3029 					    ipp->ipp_rtdstoptslen);
3030 					ipp->ipp_rtdstopts = NULL;
3031 					ipp->ipp_rtdstoptslen = 0;
3032 				}
3033 
3034 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
3035 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
3036 			} else {
3037 				error = optcom_pkt_set(invalp, inlen, sticky,
3038 				    (uchar_t **)&ipp->ipp_rtdstopts,
3039 				    &ipp->ipp_rtdstoptslen, 0);
3040 				if (error != 0)
3041 					return (error);
3042 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
3043 			}
3044 			if (sticky) {
3045 				error = udp_build_hdrs(udp);
3046 				if (error != 0)
3047 					return (error);
3048 			}
3049 			break;
3050 		}
3051 		case IPV6_DSTOPTS: {
3052 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3053 
3054 			/*
3055 			 * Sanity checks - minimum size, size a multiple of
3056 			 * eight bytes, and matching size passed in.
3057 			 */
3058 			if (inlen != 0 &&
3059 			    inlen != (8 * (dopts->ip6d_len + 1)))
3060 				return (EINVAL);
3061 
3062 			if (checkonly)
3063 				break;
3064 
3065 			if (inlen == 0) {
3066 				if (sticky &&
3067 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
3068 					kmem_free(ipp->ipp_dstopts,
3069 					    ipp->ipp_dstoptslen);
3070 					ipp->ipp_dstopts = NULL;
3071 					ipp->ipp_dstoptslen = 0;
3072 				}
3073 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
3074 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
3075 			} else {
3076 				error = optcom_pkt_set(invalp, inlen, sticky,
3077 				    (uchar_t **)&ipp->ipp_dstopts,
3078 				    &ipp->ipp_dstoptslen, 0);
3079 				if (error != 0)
3080 					return (error);
3081 				ipp->ipp_fields |= IPPF_DSTOPTS;
3082 			}
3083 			if (sticky) {
3084 				error = udp_build_hdrs(udp);
3085 				if (error != 0)
3086 					return (error);
3087 			}
3088 			break;
3089 		}
3090 		case IPV6_RTHDR: {
3091 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
3092 
3093 			/*
3094 			 * Sanity checks - minimum size, size a multiple of
3095 			 * eight bytes, and matching size passed in.
3096 			 */
3097 			if (inlen != 0 &&
3098 			    inlen != (8 * (rt->ip6r_len + 1)))
3099 				return (EINVAL);
3100 
3101 			if (checkonly)
3102 				break;
3103 
3104 			if (inlen == 0) {
3105 				if (sticky &&
3106 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
3107 					kmem_free(ipp->ipp_rthdr,
3108 					    ipp->ipp_rthdrlen);
3109 					ipp->ipp_rthdr = NULL;
3110 					ipp->ipp_rthdrlen = 0;
3111 				}
3112 				ipp->ipp_fields &= ~IPPF_RTHDR;
3113 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
3114 			} else {
3115 				error = optcom_pkt_set(invalp, inlen, sticky,
3116 				    (uchar_t **)&ipp->ipp_rthdr,
3117 				    &ipp->ipp_rthdrlen, 0);
3118 				if (error != 0)
3119 					return (error);
3120 				ipp->ipp_fields |= IPPF_RTHDR;
3121 			}
3122 			if (sticky) {
3123 				error = udp_build_hdrs(udp);
3124 				if (error != 0)
3125 					return (error);
3126 			}
3127 			break;
3128 		}
3129 
3130 		case IPV6_DONTFRAG:
3131 			if (checkonly)
3132 				break;
3133 
3134 			if (onoff) {
3135 				ipp->ipp_fields |= IPPF_DONTFRAG;
3136 			} else {
3137 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
3138 			}
3139 			break;
3140 
3141 		case IPV6_USE_MIN_MTU:
3142 			if (inlen != sizeof (int))
3143 				return (EINVAL);
3144 
3145 			if (*i1 < -1 || *i1 > 1)
3146 				return (EINVAL);
3147 
3148 			if (checkonly)
3149 				break;
3150 
3151 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
3152 			ipp->ipp_use_min_mtu = *i1;
3153 			break;
3154 
3155 		case IPV6_BOUND_PIF:
3156 		case IPV6_SEC_OPT:
3157 		case IPV6_DONTFAILOVER_IF:
3158 		case IPV6_SRC_PREFERENCES:
3159 		case IPV6_V6ONLY:
3160 			/* Handled at the IP level */
3161 			return (-EINVAL);
3162 		default:
3163 			*outlenp = 0;
3164 			return (EINVAL);
3165 		}
3166 		break;
3167 		}		/* end IPPROTO_IPV6 */
3168 	case IPPROTO_UDP:
3169 		switch (name) {
3170 		case UDP_ANONPRIVBIND:
3171 			if ((error = secpolicy_net_privaddr(cr, 0,
3172 			    IPPROTO_UDP)) != 0) {
3173 				*outlenp = 0;
3174 				return (error);
3175 			}
3176 			if (!checkonly) {
3177 				udp->udp_anon_priv_bind = onoff;
3178 			}
3179 			break;
3180 		case UDP_EXCLBIND:
3181 			if (!checkonly)
3182 				udp->udp_exclbind = onoff;
3183 			break;
3184 		case UDP_RCVHDR:
3185 			if (!checkonly)
3186 				udp->udp_rcvhdr = onoff;
3187 			break;
3188 		case UDP_NAT_T_ENDPOINT:
3189 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
3190 				*outlenp = 0;
3191 				return (error);
3192 			}
3193 
3194 			/*
3195 			 * Use udp_family instead so we can avoid ambiguitites
3196 			 * with AF_INET6 sockets that may switch from IPv4
3197 			 * to IPv6.
3198 			 */
3199 			if (udp->udp_family != AF_INET) {
3200 				*outlenp = 0;
3201 				return (EAFNOSUPPORT);
3202 			}
3203 
3204 			if (!checkonly) {
3205 				int size;
3206 
3207 				udp->udp_nat_t_endpoint = onoff;
3208 
3209 				udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
3210 				    UDPH_SIZE + udp->udp_ip_snd_options_len;
3211 
3212 				/* Also, adjust wroff */
3213 				if (onoff) {
3214 					udp->udp_max_hdr_len +=
3215 					    sizeof (uint32_t);
3216 				}
3217 				size = udp->udp_max_hdr_len +
3218 				    us->us_wroff_extra;
3219 				(void) proto_set_tx_wroff(connp->conn_rq, connp,
3220 				    size);
3221 			}
3222 			break;
3223 		default:
3224 			*outlenp = 0;
3225 			return (EINVAL);
3226 		}
3227 		break;
3228 	default:
3229 		*outlenp = 0;
3230 		return (EINVAL);
3231 	}
3232 	/*
3233 	 * Common case of OK return with outval same as inval.
3234 	 */
3235 	if (invalp != outvalp) {
3236 		/* don't trust bcopy for identical src/dst */
3237 		(void) bcopy(invalp, outvalp, inlen);
3238 	}
3239 	*outlenp = inlen;
3240 	return (0);
3241 }
3242 
3243 int
3244 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
3245     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3246     void *thisdg_attrs, cred_t *cr)
3247 {
3248 	int		error;
3249 	boolean_t	checkonly;
3250 
3251 	error = 0;
3252 	switch (optset_context) {
3253 	case SETFN_OPTCOM_CHECKONLY:
3254 		checkonly = B_TRUE;
3255 		/*
3256 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
3257 		 * inlen != 0 implies value supplied and
3258 		 * 	we have to "pretend" to set it.
3259 		 * inlen == 0 implies that there is no
3260 		 * 	value part in T_CHECK request and just validation
3261 		 * done elsewhere should be enough, we just return here.
3262 		 */
3263 		if (inlen == 0) {
3264 			*outlenp = 0;
3265 			goto done;
3266 		}
3267 		break;
3268 	case SETFN_OPTCOM_NEGOTIATE:
3269 		checkonly = B_FALSE;
3270 		break;
3271 	case SETFN_UD_NEGOTIATE:
3272 	case SETFN_CONN_NEGOTIATE:
3273 		checkonly = B_FALSE;
3274 		/*
3275 		 * Negotiating local and "association-related" options
3276 		 * through T_UNITDATA_REQ.
3277 		 *
3278 		 * Following routine can filter out ones we do not
3279 		 * want to be "set" this way.
3280 		 */
3281 		if (!udp_opt_allow_udr_set(level, name)) {
3282 			*outlenp = 0;
3283 			error = EINVAL;
3284 			goto done;
3285 		}
3286 		break;
3287 	default:
3288 		/*
3289 		 * We should never get here
3290 		 */
3291 		*outlenp = 0;
3292 		error = EINVAL;
3293 		goto done;
3294 	}
3295 
3296 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
3297 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
3298 
3299 	error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
3300 	    outvalp, cr, thisdg_attrs, checkonly);
3301 done:
3302 	return (error);
3303 }
3304 
3305 /* ARGSUSED */
3306 int
3307 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
3308     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3309     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3310 {
3311 	conn_t  *connp =  Q_TO_CONN(q);
3312 	int error;
3313 	udp_t	*udp = connp->conn_udp;
3314 
3315 	rw_enter(&udp->udp_rwlock, RW_WRITER);
3316 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
3317 	    outlenp, outvalp, thisdg_attrs, cr);
3318 	rw_exit(&udp->udp_rwlock);
3319 	return (error);
3320 }
3321 
3322 /*
3323  * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl.
3324  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
3325  * headers, and the udp header.
3326  * Returns failure if can't allocate memory.
3327  */
3328 static int
3329 udp_build_hdrs(udp_t *udp)
3330 {
3331 	udp_stack_t *us = udp->udp_us;
3332 	uchar_t	*hdrs;
3333 	uint_t	hdrs_len;
3334 	ip6_t	*ip6h;
3335 	ip6i_t	*ip6i;
3336 	udpha_t	*udpha;
3337 	ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
3338 	size_t	sth_wroff;
3339 	conn_t	*connp = udp->udp_connp;
3340 
3341 	ASSERT(RW_WRITE_HELD(&udp->udp_rwlock));
3342 	ASSERT(connp != NULL);
3343 
3344 	hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
3345 	ASSERT(hdrs_len != 0);
3346 	if (hdrs_len != udp->udp_sticky_hdrs_len) {
3347 		/* Need to reallocate */
3348 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3349 		if (hdrs == NULL)
3350 			return (ENOMEM);
3351 
3352 		if (udp->udp_sticky_hdrs_len != 0) {
3353 			kmem_free(udp->udp_sticky_hdrs,
3354 			    udp->udp_sticky_hdrs_len);
3355 		}
3356 		udp->udp_sticky_hdrs = hdrs;
3357 		udp->udp_sticky_hdrs_len = hdrs_len;
3358 	}
3359 	ip_build_hdrs_v6(udp->udp_sticky_hdrs,
3360 	    udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP);
3361 
3362 	/* Set header fields not in ipp */
3363 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3364 		ip6i = (ip6i_t *)udp->udp_sticky_hdrs;
3365 		ip6h = (ip6_t *)&ip6i[1];
3366 	} else {
3367 		ip6h = (ip6_t *)udp->udp_sticky_hdrs;
3368 	}
3369 
3370 	if (!(ipp->ipp_fields & IPPF_ADDR))
3371 		ip6h->ip6_src = udp->udp_v6src;
3372 
3373 	udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE);
3374 	udpha->uha_src_port = udp->udp_port;
3375 
3376 	/* Try to get everything in a single mblk */
3377 	if (hdrs_len > udp->udp_max_hdr_len) {
3378 		udp->udp_max_hdr_len = hdrs_len;
3379 		sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
3380 		rw_exit(&udp->udp_rwlock);
3381 		(void) proto_set_tx_wroff(udp->udp_connp->conn_rq,
3382 		    udp->udp_connp, sth_wroff);
3383 		rw_enter(&udp->udp_rwlock, RW_WRITER);
3384 	}
3385 	return (0);
3386 }
3387 
3388 /*
3389  * This routine retrieves the value of an ND variable in a udpparam_t
3390  * structure.  It is called through nd_getset when a user reads the
3391  * variable.
3392  */
3393 /* ARGSUSED */
3394 static int
3395 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3396 {
3397 	udpparam_t *udppa = (udpparam_t *)cp;
3398 
3399 	(void) mi_mpprintf(mp, "%d", udppa->udp_param_value);
3400 	return (0);
3401 }
3402 
3403 /*
3404  * Walk through the param array specified registering each element with the
3405  * named dispatch (ND) handler.
3406  */
3407 static boolean_t
3408 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt)
3409 {
3410 	for (; cnt-- > 0; udppa++) {
3411 		if (udppa->udp_param_name && udppa->udp_param_name[0]) {
3412 			if (!nd_load(ndp, udppa->udp_param_name,
3413 			    udp_param_get, udp_param_set,
3414 			    (caddr_t)udppa)) {
3415 				nd_free(ndp);
3416 				return (B_FALSE);
3417 			}
3418 		}
3419 	}
3420 	if (!nd_load(ndp, "udp_extra_priv_ports",
3421 	    udp_extra_priv_ports_get, NULL, NULL)) {
3422 		nd_free(ndp);
3423 		return (B_FALSE);
3424 	}
3425 	if (!nd_load(ndp, "udp_extra_priv_ports_add",
3426 	    NULL, udp_extra_priv_ports_add, NULL)) {
3427 		nd_free(ndp);
3428 		return (B_FALSE);
3429 	}
3430 	if (!nd_load(ndp, "udp_extra_priv_ports_del",
3431 	    NULL, udp_extra_priv_ports_del, NULL)) {
3432 		nd_free(ndp);
3433 		return (B_FALSE);
3434 	}
3435 	if (!nd_load(ndp, "udp_status", udp_status_report, NULL,
3436 	    NULL)) {
3437 		nd_free(ndp);
3438 		return (B_FALSE);
3439 	}
3440 	if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL,
3441 	    NULL)) {
3442 		nd_free(ndp);
3443 		return (B_FALSE);
3444 	}
3445 	return (B_TRUE);
3446 }
3447 
3448 /* This routine sets an ND variable in a udpparam_t structure. */
3449 /* ARGSUSED */
3450 static int
3451 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3452 {
3453 	long		new_value;
3454 	udpparam_t	*udppa = (udpparam_t *)cp;
3455 
3456 	/*
3457 	 * Fail the request if the new value does not lie within the
3458 	 * required bounds.
3459 	 */
3460 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3461 	    new_value < udppa->udp_param_min ||
3462 	    new_value > udppa->udp_param_max) {
3463 		return (EINVAL);
3464 	}
3465 
3466 	/* Set the new value */
3467 	udppa->udp_param_value = new_value;
3468 	return (0);
3469 }
3470 
3471 /*
3472  * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with
3473  * T_opthdr) and return the number of bytes copied.  'dbuf' may be NULL to
3474  * just count the length needed for allocation.  If 'dbuf' is non-NULL,
3475  * then it's assumed to be allocated to be large enough.
3476  *
3477  * Returns zero if trimming of the security option causes all options to go
3478  * away.
3479  */
3480 static size_t
3481 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf)
3482 {
3483 	struct T_opthdr *toh;
3484 	size_t hol = ipp->ipp_hopoptslen;
3485 	ip6_hbh_t *dstopt = NULL;
3486 	const ip6_hbh_t *srcopt = ipp->ipp_hopopts;
3487 	size_t tlen, olen, plen;
3488 	boolean_t deleting;
3489 	const struct ip6_opt *sopt, *lastpad;
3490 	struct ip6_opt *dopt;
3491 
3492 	if ((toh = (struct T_opthdr *)dbuf) != NULL) {
3493 		toh->level = IPPROTO_IPV6;
3494 		toh->name = IPV6_HOPOPTS;
3495 		toh->status = 0;
3496 		dstopt = (ip6_hbh_t *)(toh + 1);
3497 	}
3498 
3499 	/*
3500 	 * If labeling is enabled, then skip the label option
3501 	 * but get other options if there are any.
3502 	 */
3503 	if (is_system_labeled()) {
3504 		dopt = NULL;
3505 		if (dstopt != NULL) {
3506 			/* will fill in ip6h_len later */
3507 			dstopt->ip6h_nxt = srcopt->ip6h_nxt;
3508 			dopt = (struct ip6_opt *)(dstopt + 1);
3509 		}
3510 		sopt = (const struct ip6_opt *)(srcopt + 1);
3511 		hol -= sizeof (*srcopt);
3512 		tlen = sizeof (*dstopt);
3513 		lastpad = NULL;
3514 		deleting = B_FALSE;
3515 		/*
3516 		 * This loop finds the first (lastpad pointer) of any number of
3517 		 * pads that preceeds the security option, then treats the
3518 		 * security option as though it were a pad, and then finds the
3519 		 * next non-pad option (or end of list).
3520 		 *
3521 		 * It then treats the entire block as one big pad.  To preserve
3522 		 * alignment of any options that follow, or just the end of the
3523 		 * list, it computes a minimal new padding size that keeps the
3524 		 * same alignment for the next option.
3525 		 *
3526 		 * If it encounters just a sequence of pads with no security
3527 		 * option, those are copied as-is rather than collapsed.
3528 		 *
3529 		 * Note that to handle the end of list case, the code makes one
3530 		 * loop with 'hol' set to zero.
3531 		 */
3532 		for (;;) {
3533 			if (hol > 0) {
3534 				if (sopt->ip6o_type == IP6OPT_PAD1) {
3535 					if (lastpad == NULL)
3536 						lastpad = sopt;
3537 					sopt = (const struct ip6_opt *)
3538 					    &sopt->ip6o_len;
3539 					hol--;
3540 					continue;
3541 				}
3542 				olen = sopt->ip6o_len + sizeof (*sopt);
3543 				if (olen > hol)
3544 					olen = hol;
3545 				if (sopt->ip6o_type == IP6OPT_PADN ||
3546 				    sopt->ip6o_type == ip6opt_ls) {
3547 					if (sopt->ip6o_type == ip6opt_ls)
3548 						deleting = B_TRUE;
3549 					if (lastpad == NULL)
3550 						lastpad = sopt;
3551 					sopt = (const struct ip6_opt *)
3552 					    ((const char *)sopt + olen);
3553 					hol -= olen;
3554 					continue;
3555 				}
3556 			} else {
3557 				/* if nothing was copied at all, then delete */
3558 				if (tlen == sizeof (*dstopt))
3559 					return (0);
3560 				/* last pass; pick up any trailing padding */
3561 				olen = 0;
3562 			}
3563 			if (deleting) {
3564 				/*
3565 				 * compute aligning effect of deleted material
3566 				 * to reproduce with pad.
3567 				 */
3568 				plen = ((const char *)sopt -
3569 				    (const char *)lastpad) & 7;
3570 				tlen += plen;
3571 				if (dopt != NULL) {
3572 					if (plen == 1) {
3573 						dopt->ip6o_type = IP6OPT_PAD1;
3574 					} else if (plen > 1) {
3575 						plen -= sizeof (*dopt);
3576 						dopt->ip6o_type = IP6OPT_PADN;
3577 						dopt->ip6o_len = plen;
3578 						if (plen > 0)
3579 							bzero(dopt + 1, plen);
3580 					}
3581 					dopt = (struct ip6_opt *)
3582 					    ((char *)dopt + plen);
3583 				}
3584 				deleting = B_FALSE;
3585 				lastpad = NULL;
3586 			}
3587 			/* if there's uncopied padding, then copy that now */
3588 			if (lastpad != NULL) {
3589 				olen += (const char *)sopt -
3590 				    (const char *)lastpad;
3591 				sopt = lastpad;
3592 				lastpad = NULL;
3593 			}
3594 			if (dopt != NULL && olen > 0) {
3595 				bcopy(sopt, dopt, olen);
3596 				dopt = (struct ip6_opt *)((char *)dopt + olen);
3597 			}
3598 			if (hol == 0)
3599 				break;
3600 			tlen += olen;
3601 			sopt = (const struct ip6_opt *)
3602 			    ((const char *)sopt + olen);
3603 			hol -= olen;
3604 		}
3605 		/* go back and patch up the length value, rounded upward */
3606 		if (dstopt != NULL)
3607 			dstopt->ip6h_len = (tlen - 1) >> 3;
3608 	} else {
3609 		tlen = hol;
3610 		if (dstopt != NULL)
3611 			bcopy(srcopt, dstopt, hol);
3612 	}
3613 
3614 	tlen += sizeof (*toh);
3615 	if (toh != NULL)
3616 		toh->len = tlen;
3617 
3618 	return (tlen);
3619 }
3620 
3621 /*
3622  * Update udp_rcv_opt_len from the packet.
3623  * Called when options received, and when no options received but
3624  * udp_ip_recv_opt_len has previously recorded options.
3625  */
3626 static void
3627 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len)
3628 {
3629 	/* Save the options if any */
3630 	if (opt_len > 0) {
3631 		if (opt_len > udp->udp_ip_rcv_options_len) {
3632 			/* Need to allocate larger buffer */
3633 			if (udp->udp_ip_rcv_options_len != 0)
3634 				mi_free((char *)udp->udp_ip_rcv_options);
3635 			udp->udp_ip_rcv_options_len = 0;
3636 			udp->udp_ip_rcv_options =
3637 			    (uchar_t *)mi_alloc(opt_len, BPRI_HI);
3638 			if (udp->udp_ip_rcv_options != NULL)
3639 				udp->udp_ip_rcv_options_len = opt_len;
3640 		}
3641 		if (udp->udp_ip_rcv_options_len != 0) {
3642 			bcopy(opt, udp->udp_ip_rcv_options, opt_len);
3643 			/* Adjust length if we are resusing the space */
3644 			udp->udp_ip_rcv_options_len = opt_len;
3645 		}
3646 	} else if (udp->udp_ip_rcv_options_len != 0) {
3647 		/* Clear out previously recorded options */
3648 		mi_free((char *)udp->udp_ip_rcv_options);
3649 		udp->udp_ip_rcv_options = NULL;
3650 		udp->udp_ip_rcv_options_len = 0;
3651 	}
3652 }
3653 
3654 static void
3655 udp_queue_fallback(udp_t *udp, mblk_t *mp)
3656 {
3657 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
3658 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
3659 		/*
3660 		 * fallback has started but messages have not been moved yet
3661 		 */
3662 		if (udp->udp_fallback_queue_head == NULL) {
3663 			ASSERT(udp->udp_fallback_queue_tail == NULL);
3664 			udp->udp_fallback_queue_head = mp;
3665 			udp->udp_fallback_queue_tail = mp;
3666 		} else {
3667 			ASSERT(udp->udp_fallback_queue_tail != NULL);
3668 			udp->udp_fallback_queue_tail->b_next = mp;
3669 			udp->udp_fallback_queue_tail = mp;
3670 		}
3671 		mutex_exit(&udp->udp_recv_lock);
3672 	} else {
3673 		/*
3674 		 * no more fallbacks possible, ok to drop lock.
3675 		 */
3676 		mutex_exit(&udp->udp_recv_lock);
3677 		putnext(udp->udp_connp->conn_rq, mp);
3678 	}
3679 }
3680 
3681 /* ARGSUSED2 */
3682 static void
3683 udp_input(void *arg1, mblk_t *mp, void *arg2)
3684 {
3685 	conn_t *connp = (conn_t *)arg1;
3686 	struct T_unitdata_ind	*tudi;
3687 	uchar_t			*rptr;		/* Pointer to IP header */
3688 	int			hdr_length;	/* Length of IP+UDP headers */
3689 	int			opt_len;
3690 	int			udi_size;	/* Size of T_unitdata_ind */
3691 	int			mp_len;
3692 	udp_t			*udp;
3693 	udpha_t			*udpha;
3694 	int			ipversion;
3695 	ip6_pkt_t		ipp;
3696 	ip6_t			*ip6h;
3697 	ip6i_t			*ip6i;
3698 	mblk_t			*mp1;
3699 	mblk_t			*options_mp = NULL;
3700 	ip_pktinfo_t		*pinfo = NULL;
3701 	cred_t			*cr = NULL;
3702 	pid_t			cpid;
3703 	uint32_t		udp_ip_rcv_options_len;
3704 	udp_bits_t		udp_bits;
3705 	cred_t			*rcr = connp->conn_cred;
3706 	udp_stack_t *us;
3707 
3708 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
3709 
3710 	udp = connp->conn_udp;
3711 	us = udp->udp_us;
3712 	rptr = mp->b_rptr;
3713 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3714 	ASSERT(OK_32PTR(rptr));
3715 
3716 	/*
3717 	 * IP should have prepended the options data in an M_CTL
3718 	 * Check M_CTL "type" to make sure are not here bcos of
3719 	 * a valid ICMP message
3720 	 */
3721 	if (DB_TYPE(mp) == M_CTL) {
3722 		if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3723 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3724 		    IN_PKTINFO) {
3725 			/*
3726 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3727 			 * has been prepended to the packet by IP. We need to
3728 			 * extract the mblk and adjust the rptr
3729 			 */
3730 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3731 			options_mp = mp;
3732 			mp = mp->b_cont;
3733 			rptr = mp->b_rptr;
3734 			UDP_STAT(us, udp_in_pktinfo);
3735 		} else {
3736 			/*
3737 			 * ICMP messages.
3738 			 */
3739 			udp_icmp_error(connp, mp);
3740 			return;
3741 		}
3742 	}
3743 
3744 	mp_len = msgdsize(mp);
3745 	/*
3746 	 * This is the inbound data path.
3747 	 * First, we check to make sure the IP version number is correct,
3748 	 * and then pull the IP and UDP headers into the first mblk.
3749 	 */
3750 
3751 	/* Initialize regardless if ipversion is IPv4 or IPv6 */
3752 	ipp.ipp_fields = 0;
3753 
3754 	ipversion = IPH_HDR_VERSION(rptr);
3755 
3756 	rw_enter(&udp->udp_rwlock, RW_READER);
3757 	udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len;
3758 	udp_bits = udp->udp_bits;
3759 	rw_exit(&udp->udp_rwlock);
3760 
3761 	switch (ipversion) {
3762 	case IPV4_VERSION:
3763 		ASSERT(MBLKL(mp) >= sizeof (ipha_t));
3764 		ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
3765 		hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
3766 		opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
3767 		if ((opt_len > 0 || udp_ip_rcv_options_len > 0) &&
3768 		    udp->udp_family == AF_INET) {
3769 			/*
3770 			 * Record/update udp_ip_rcv_options with the lock
3771 			 * held. Not needed for AF_INET6 sockets
3772 			 * since they don't support a getsockopt of IP_OPTIONS.
3773 			 */
3774 			rw_enter(&udp->udp_rwlock, RW_WRITER);
3775 			udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH,
3776 			    opt_len);
3777 			rw_exit(&udp->udp_rwlock);
3778 		}
3779 		/* Handle IPV6_RECVPKTINFO even for IPv4 packet. */
3780 		if ((udp->udp_family == AF_INET6) && (pinfo != NULL) &&
3781 		    udp->udp_ip_recvpktinfo) {
3782 			if (pinfo->ip_pkt_flags & IPF_RECVIF) {
3783 				ipp.ipp_fields |= IPPF_IFINDEX;
3784 				ipp.ipp_ifindex = pinfo->ip_pkt_ifindex;
3785 			}
3786 		}
3787 		break;
3788 	case IPV6_VERSION:
3789 		/*
3790 		 * IPv6 packets can only be received by applications
3791 		 * that are prepared to receive IPv6 addresses.
3792 		 * The IP fanout must ensure this.
3793 		 */
3794 		ASSERT(udp->udp_family == AF_INET6);
3795 
3796 		ip6h = (ip6_t *)rptr;
3797 		ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
3798 
3799 		if (ip6h->ip6_nxt != IPPROTO_UDP) {
3800 			uint8_t nexthdrp;
3801 			/* Look for ifindex information */
3802 			if (ip6h->ip6_nxt == IPPROTO_RAW) {
3803 				ip6i = (ip6i_t *)ip6h;
3804 				if ((uchar_t *)&ip6i[1] > mp->b_wptr)
3805 					goto tossit;
3806 
3807 				if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3808 					ASSERT(ip6i->ip6i_ifindex != 0);
3809 					ipp.ipp_fields |= IPPF_IFINDEX;
3810 					ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3811 				}
3812 				rptr = (uchar_t *)&ip6i[1];
3813 				mp->b_rptr = rptr;
3814 				if (rptr == mp->b_wptr) {
3815 					mp1 = mp->b_cont;
3816 					freeb(mp);
3817 					mp = mp1;
3818 					rptr = mp->b_rptr;
3819 				}
3820 				if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE))
3821 					goto tossit;
3822 				ip6h = (ip6_t *)rptr;
3823 				mp_len = msgdsize(mp);
3824 			}
3825 			/*
3826 			 * Find any potentially interesting extension headers
3827 			 * as well as the length of the IPv6 + extension
3828 			 * headers.
3829 			 */
3830 			hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) +
3831 			    UDPH_SIZE;
3832 			ASSERT(nexthdrp == IPPROTO_UDP);
3833 		} else {
3834 			hdr_length = IPV6_HDR_LEN + UDPH_SIZE;
3835 			ip6i = NULL;
3836 		}
3837 		break;
3838 	default:
3839 		ASSERT(0);
3840 	}
3841 
3842 	/*
3843 	 * IP inspected the UDP header thus all of it must be in the mblk.
3844 	 * UDP length check is performed for IPv6 packets and IPv4 packets
3845 	 * to check if the size of the packet as specified
3846 	 * by the header is the same as the physical size of the packet.
3847 	 * FIXME? Didn't IP already check this?
3848 	 */
3849 	udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
3850 	if ((MBLKL(mp) < hdr_length) ||
3851 	    (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) {
3852 		goto tossit;
3853 	}
3854 
3855 
3856 	/* Walk past the headers unless IP_RECVHDR was set. */
3857 	if (!udp_bits.udpb_rcvhdr) {
3858 		mp->b_rptr = rptr + hdr_length;
3859 		mp_len -= hdr_length;
3860 	}
3861 
3862 	/*
3863 	 * This is the inbound data path.  Packets are passed upstream as
3864 	 * T_UNITDATA_IND messages with full IP headers still attached.
3865 	 */
3866 	if (udp->udp_family == AF_INET) {
3867 		sin_t *sin;
3868 
3869 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
3870 
3871 		/*
3872 		 * Normally only send up the source address.
3873 		 * If IP_RECVDSTADDR is set we include the destination IP
3874 		 * address as an option. With IP_RECVOPTS we include all
3875 		 * the IP options.
3876 		 */
3877 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3878 		if (udp_bits.udpb_recvdstaddr) {
3879 			udi_size += sizeof (struct T_opthdr) +
3880 			    sizeof (struct in_addr);
3881 			UDP_STAT(us, udp_in_recvdstaddr);
3882 		}
3883 
3884 		if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) &&
3885 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3886 			udi_size += sizeof (struct T_opthdr) +
3887 			    sizeof (struct in_pktinfo);
3888 			UDP_STAT(us, udp_ip_rcvpktinfo);
3889 		}
3890 
3891 		if ((udp_bits.udpb_recvopts) && opt_len > 0) {
3892 			udi_size += sizeof (struct T_opthdr) + opt_len;
3893 			UDP_STAT(us, udp_in_recvopts);
3894 		}
3895 
3896 		/*
3897 		 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
3898 		 * space accordingly
3899 		 */
3900 		if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
3901 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3902 			udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
3903 			UDP_STAT(us, udp_in_recvif);
3904 		}
3905 
3906 		if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
3907 		    (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
3908 			udi_size += sizeof (struct T_opthdr) +
3909 			    sizeof (struct sockaddr_dl);
3910 			UDP_STAT(us, udp_in_recvslla);
3911 		}
3912 
3913 		if ((udp_bits.udpb_recvucred) &&
3914 		    (cr = DB_CRED(mp)) != NULL) {
3915 			udi_size += sizeof (struct T_opthdr) + ucredsize;
3916 			cpid = DB_CPID(mp);
3917 			UDP_STAT(us, udp_in_recvucred);
3918 		}
3919 
3920 		/*
3921 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3922 		 * buffer. Since gethrestime() expects a pointer aligned
3923 		 * argument, we allocate space necessary for extra
3924 		 * alignment (even though it might not be used).
3925 		 */
3926 		if (udp_bits.udpb_timestamp) {
3927 			udi_size += sizeof (struct T_opthdr) +
3928 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3929 			UDP_STAT(us, udp_in_timestamp);
3930 		}
3931 
3932 		/*
3933 		 * If IP_RECVTTL is set allocate the appropriate sized buffer
3934 		 */
3935 		if (udp_bits.udpb_recvttl) {
3936 			udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
3937 			UDP_STAT(us, udp_in_recvttl);
3938 		}
3939 
3940 		/* Allocate a message block for the T_UNITDATA_IND structure. */
3941 		mp1 = allocb(udi_size, BPRI_MED);
3942 		if (mp1 == NULL) {
3943 			freemsg(mp);
3944 			if (options_mp != NULL)
3945 				freeb(options_mp);
3946 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
3947 			return;
3948 		}
3949 		mp1->b_cont = mp;
3950 		mp = mp1;
3951 		mp->b_datap->db_type = M_PROTO;
3952 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3953 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3954 		tudi->PRIM_type = T_UNITDATA_IND;
3955 		tudi->SRC_length = sizeof (sin_t);
3956 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3957 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
3958 		    sizeof (sin_t);
3959 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3960 		tudi->OPT_length = udi_size;
3961 		sin = (sin_t *)&tudi[1];
3962 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
3963 		sin->sin_port =	udpha->uha_src_port;
3964 		sin->sin_family = udp->udp_family;
3965 		*(uint32_t *)&sin->sin_zero[0] = 0;
3966 		*(uint32_t *)&sin->sin_zero[4] = 0;
3967 
3968 		/*
3969 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
3970 		 * IP_RECVTTL has been set.
3971 		 */
3972 		if (udi_size != 0) {
3973 			/*
3974 			 * Copy in destination address before options to avoid
3975 			 * any padding issues.
3976 			 */
3977 			char *dstopt;
3978 
3979 			dstopt = (char *)&sin[1];
3980 			if (udp_bits.udpb_recvdstaddr) {
3981 				struct T_opthdr *toh;
3982 				ipaddr_t *dstptr;
3983 
3984 				toh = (struct T_opthdr *)dstopt;
3985 				toh->level = IPPROTO_IP;
3986 				toh->name = IP_RECVDSTADDR;
3987 				toh->len = sizeof (struct T_opthdr) +
3988 				    sizeof (ipaddr_t);
3989 				toh->status = 0;
3990 				dstopt += sizeof (struct T_opthdr);
3991 				dstptr = (ipaddr_t *)dstopt;
3992 				*dstptr = ((ipha_t *)rptr)->ipha_dst;
3993 				dstopt += sizeof (ipaddr_t);
3994 				udi_size -= toh->len;
3995 			}
3996 
3997 			if (udp_bits.udpb_recvopts && opt_len > 0) {
3998 				struct T_opthdr *toh;
3999 
4000 				toh = (struct T_opthdr *)dstopt;
4001 				toh->level = IPPROTO_IP;
4002 				toh->name = IP_RECVOPTS;
4003 				toh->len = sizeof (struct T_opthdr) + opt_len;
4004 				toh->status = 0;
4005 				dstopt += sizeof (struct T_opthdr);
4006 				bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt,
4007 				    opt_len);
4008 				dstopt += opt_len;
4009 				udi_size -= toh->len;
4010 			}
4011 
4012 			if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) &&
4013 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
4014 				struct T_opthdr *toh;
4015 				struct in_pktinfo *pktinfop;
4016 
4017 				toh = (struct T_opthdr *)dstopt;
4018 				toh->level = IPPROTO_IP;
4019 				toh->name = IP_PKTINFO;
4020 				toh->len = sizeof (struct T_opthdr) +
4021 				    sizeof (*pktinfop);
4022 				toh->status = 0;
4023 				dstopt += sizeof (struct T_opthdr);
4024 				pktinfop = (struct in_pktinfo *)dstopt;
4025 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
4026 				pktinfop->ipi_spec_dst =
4027 				    pinfo->ip_pkt_match_addr;
4028 				pktinfop->ipi_addr.s_addr =
4029 				    ((ipha_t *)rptr)->ipha_dst;
4030 
4031 				dstopt += sizeof (struct in_pktinfo);
4032 				udi_size -= toh->len;
4033 			}
4034 
4035 			if ((udp_bits.udpb_recvslla) && (pinfo != NULL) &&
4036 			    (pinfo->ip_pkt_flags & IPF_RECVSLLA)) {
4037 
4038 				struct T_opthdr *toh;
4039 				struct sockaddr_dl	*dstptr;
4040 
4041 				toh = (struct T_opthdr *)dstopt;
4042 				toh->level = IPPROTO_IP;
4043 				toh->name = IP_RECVSLLA;
4044 				toh->len = sizeof (struct T_opthdr) +
4045 				    sizeof (struct sockaddr_dl);
4046 				toh->status = 0;
4047 				dstopt += sizeof (struct T_opthdr);
4048 				dstptr = (struct sockaddr_dl *)dstopt;
4049 				bcopy(&pinfo->ip_pkt_slla, dstptr,
4050 				    sizeof (struct sockaddr_dl));
4051 				dstopt += sizeof (struct sockaddr_dl);
4052 				udi_size -= toh->len;
4053 			}
4054 
4055 			if ((udp_bits.udpb_recvif) && (pinfo != NULL) &&
4056 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
4057 
4058 				struct T_opthdr *toh;
4059 				uint_t		*dstptr;
4060 
4061 				toh = (struct T_opthdr *)dstopt;
4062 				toh->level = IPPROTO_IP;
4063 				toh->name = IP_RECVIF;
4064 				toh->len = sizeof (struct T_opthdr) +
4065 				    sizeof (uint_t);
4066 				toh->status = 0;
4067 				dstopt += sizeof (struct T_opthdr);
4068 				dstptr = (uint_t *)dstopt;
4069 				*dstptr = pinfo->ip_pkt_ifindex;
4070 				dstopt += sizeof (uint_t);
4071 				udi_size -= toh->len;
4072 			}
4073 
4074 			if (cr != NULL) {
4075 				struct T_opthdr *toh;
4076 
4077 				toh = (struct T_opthdr *)dstopt;
4078 				toh->level = SOL_SOCKET;
4079 				toh->name = SCM_UCRED;
4080 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4081 				toh->status = 0;
4082 				dstopt += sizeof (struct T_opthdr);
4083 				(void) cred2ucred(cr, cpid, dstopt, rcr);
4084 				dstopt += ucredsize;
4085 				udi_size -= toh->len;
4086 			}
4087 
4088 			if (udp_bits.udpb_timestamp) {
4089 				struct	T_opthdr *toh;
4090 
4091 				toh = (struct T_opthdr *)dstopt;
4092 				toh->level = SOL_SOCKET;
4093 				toh->name = SCM_TIMESTAMP;
4094 				toh->len = sizeof (struct T_opthdr) +
4095 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4096 				toh->status = 0;
4097 				dstopt += sizeof (struct T_opthdr);
4098 				/* Align for gethrestime() */
4099 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
4100 				    sizeof (intptr_t));
4101 				gethrestime((timestruc_t *)dstopt);
4102 				dstopt = (char *)toh + toh->len;
4103 				udi_size -= toh->len;
4104 			}
4105 
4106 			/*
4107 			 * CAUTION:
4108 			 * Due to aligment issues
4109 			 * Processing of IP_RECVTTL option
4110 			 * should always be the last. Adding
4111 			 * any option processing after this will
4112 			 * cause alignment panic.
4113 			 */
4114 			if (udp_bits.udpb_recvttl) {
4115 				struct	T_opthdr *toh;
4116 				uint8_t	*dstptr;
4117 
4118 				toh = (struct T_opthdr *)dstopt;
4119 				toh->level = IPPROTO_IP;
4120 				toh->name = IP_RECVTTL;
4121 				toh->len = sizeof (struct T_opthdr) +
4122 				    sizeof (uint8_t);
4123 				toh->status = 0;
4124 				dstopt += sizeof (struct T_opthdr);
4125 				dstptr = (uint8_t *)dstopt;
4126 				*dstptr = ((ipha_t *)rptr)->ipha_ttl;
4127 				dstopt += sizeof (uint8_t);
4128 				udi_size -= toh->len;
4129 			}
4130 
4131 			/* Consumed all of allocated space */
4132 			ASSERT(udi_size == 0);
4133 		}
4134 	} else {
4135 		sin6_t *sin6;
4136 
4137 		/*
4138 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
4139 		 *
4140 		 * Normally we only send up the address. If receiving of any
4141 		 * optional receive side information is enabled, we also send
4142 		 * that up as options.
4143 		 */
4144 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
4145 
4146 		if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
4147 		    IPPF_RTHDR|IPPF_IFINDEX)) {
4148 			if ((udp_bits.udpb_ipv6_recvhopopts) &&
4149 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4150 				size_t hlen;
4151 
4152 				UDP_STAT(us, udp_in_recvhopopts);
4153 				hlen = copy_hop_opts(&ipp, NULL);
4154 				if (hlen == 0)
4155 					ipp.ipp_fields &= ~IPPF_HOPOPTS;
4156 				udi_size += hlen;
4157 			}
4158 			if (((udp_bits.udpb_ipv6_recvdstopts) ||
4159 			    udp_bits.udpb_old_ipv6_recvdstopts) &&
4160 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4161 				udi_size += sizeof (struct T_opthdr) +
4162 				    ipp.ipp_dstoptslen;
4163 				UDP_STAT(us, udp_in_recvdstopts);
4164 			}
4165 			if ((((udp_bits.udpb_ipv6_recvdstopts) &&
4166 			    udp_bits.udpb_ipv6_recvrthdr &&
4167 			    (ipp.ipp_fields & IPPF_RTHDR)) ||
4168 			    (udp_bits.udpb_ipv6_recvrthdrdstopts)) &&
4169 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4170 				udi_size += sizeof (struct T_opthdr) +
4171 				    ipp.ipp_rtdstoptslen;
4172 				UDP_STAT(us, udp_in_recvrtdstopts);
4173 			}
4174 			if ((udp_bits.udpb_ipv6_recvrthdr) &&
4175 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4176 				udi_size += sizeof (struct T_opthdr) +
4177 				    ipp.ipp_rthdrlen;
4178 				UDP_STAT(us, udp_in_recvrthdr);
4179 			}
4180 			if ((udp_bits.udpb_ip_recvpktinfo) &&
4181 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4182 				udi_size += sizeof (struct T_opthdr) +
4183 				    sizeof (struct in6_pktinfo);
4184 				UDP_STAT(us, udp_in_recvpktinfo);
4185 			}
4186 
4187 		}
4188 		if ((udp_bits.udpb_recvucred) &&
4189 		    (cr = DB_CRED(mp)) != NULL) {
4190 			udi_size += sizeof (struct T_opthdr) + ucredsize;
4191 			cpid = DB_CPID(mp);
4192 			UDP_STAT(us, udp_in_recvucred);
4193 		}
4194 
4195 		/*
4196 		 * If SO_TIMESTAMP is set allocate the appropriate sized
4197 		 * buffer. Since gethrestime() expects a pointer aligned
4198 		 * argument, we allocate space necessary for extra
4199 		 * alignment (even though it might not be used).
4200 		 */
4201 		if (udp_bits.udpb_timestamp) {
4202 			udi_size += sizeof (struct T_opthdr) +
4203 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4204 			UDP_STAT(us, udp_in_timestamp);
4205 		}
4206 
4207 		if (udp_bits.udpb_ipv6_recvhoplimit) {
4208 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4209 			UDP_STAT(us, udp_in_recvhoplimit);
4210 		}
4211 
4212 		if (udp_bits.udpb_ipv6_recvtclass) {
4213 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4214 			UDP_STAT(us, udp_in_recvtclass);
4215 		}
4216 
4217 		mp1 = allocb(udi_size, BPRI_MED);
4218 		if (mp1 == NULL) {
4219 			freemsg(mp);
4220 			if (options_mp != NULL)
4221 				freeb(options_mp);
4222 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
4223 			return;
4224 		}
4225 		mp1->b_cont = mp;
4226 		mp = mp1;
4227 		mp->b_datap->db_type = M_PROTO;
4228 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
4229 		mp->b_wptr = (uchar_t *)tudi + udi_size;
4230 		tudi->PRIM_type = T_UNITDATA_IND;
4231 		tudi->SRC_length = sizeof (sin6_t);
4232 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
4233 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
4234 		    sizeof (sin6_t);
4235 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
4236 		tudi->OPT_length = udi_size;
4237 		sin6 = (sin6_t *)&tudi[1];
4238 		if (ipversion == IPV4_VERSION) {
4239 			in6_addr_t v6dst;
4240 
4241 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
4242 			    &sin6->sin6_addr);
4243 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
4244 			    &v6dst);
4245 			sin6->sin6_flowinfo = 0;
4246 			sin6->sin6_scope_id = 0;
4247 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
4248 			    connp->conn_zoneid, us->us_netstack);
4249 		} else {
4250 			sin6->sin6_addr = ip6h->ip6_src;
4251 			/* No sin6_flowinfo per API */
4252 			sin6->sin6_flowinfo = 0;
4253 			/* For link-scope source pass up scope id */
4254 			if ((ipp.ipp_fields & IPPF_IFINDEX) &&
4255 			    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
4256 				sin6->sin6_scope_id = ipp.ipp_ifindex;
4257 			else
4258 				sin6->sin6_scope_id = 0;
4259 			sin6->__sin6_src_id = ip_srcid_find_addr(
4260 			    &ip6h->ip6_dst, connp->conn_zoneid,
4261 			    us->us_netstack);
4262 		}
4263 		sin6->sin6_port = udpha->uha_src_port;
4264 		sin6->sin6_family = udp->udp_family;
4265 
4266 		if (udi_size != 0) {
4267 			uchar_t *dstopt;
4268 
4269 			dstopt = (uchar_t *)&sin6[1];
4270 			if ((udp_bits.udpb_ip_recvpktinfo) &&
4271 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4272 				struct T_opthdr *toh;
4273 				struct in6_pktinfo *pkti;
4274 
4275 				toh = (struct T_opthdr *)dstopt;
4276 				toh->level = IPPROTO_IPV6;
4277 				toh->name = IPV6_PKTINFO;
4278 				toh->len = sizeof (struct T_opthdr) +
4279 				    sizeof (*pkti);
4280 				toh->status = 0;
4281 				dstopt += sizeof (struct T_opthdr);
4282 				pkti = (struct in6_pktinfo *)dstopt;
4283 				if (ipversion == IPV6_VERSION)
4284 					pkti->ipi6_addr = ip6h->ip6_dst;
4285 				else
4286 					IN6_IPADDR_TO_V4MAPPED(
4287 					    ((ipha_t *)rptr)->ipha_dst,
4288 					    &pkti->ipi6_addr);
4289 				pkti->ipi6_ifindex = ipp.ipp_ifindex;
4290 				dstopt += sizeof (*pkti);
4291 				udi_size -= toh->len;
4292 			}
4293 			if (udp_bits.udpb_ipv6_recvhoplimit) {
4294 				struct T_opthdr *toh;
4295 
4296 				toh = (struct T_opthdr *)dstopt;
4297 				toh->level = IPPROTO_IPV6;
4298 				toh->name = IPV6_HOPLIMIT;
4299 				toh->len = sizeof (struct T_opthdr) +
4300 				    sizeof (uint_t);
4301 				toh->status = 0;
4302 				dstopt += sizeof (struct T_opthdr);
4303 				if (ipversion == IPV6_VERSION)
4304 					*(uint_t *)dstopt = ip6h->ip6_hops;
4305 				else
4306 					*(uint_t *)dstopt =
4307 					    ((ipha_t *)rptr)->ipha_ttl;
4308 				dstopt += sizeof (uint_t);
4309 				udi_size -= toh->len;
4310 			}
4311 			if (udp_bits.udpb_ipv6_recvtclass) {
4312 				struct T_opthdr *toh;
4313 
4314 				toh = (struct T_opthdr *)dstopt;
4315 				toh->level = IPPROTO_IPV6;
4316 				toh->name = IPV6_TCLASS;
4317 				toh->len = sizeof (struct T_opthdr) +
4318 				    sizeof (uint_t);
4319 				toh->status = 0;
4320 				dstopt += sizeof (struct T_opthdr);
4321 				if (ipversion == IPV6_VERSION) {
4322 					*(uint_t *)dstopt =
4323 					    IPV6_FLOW_TCLASS(ip6h->ip6_flow);
4324 				} else {
4325 					ipha_t *ipha = (ipha_t *)rptr;
4326 					*(uint_t *)dstopt =
4327 					    ipha->ipha_type_of_service;
4328 				}
4329 				dstopt += sizeof (uint_t);
4330 				udi_size -= toh->len;
4331 			}
4332 			if ((udp_bits.udpb_ipv6_recvhopopts) &&
4333 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4334 				size_t hlen;
4335 
4336 				hlen = copy_hop_opts(&ipp, dstopt);
4337 				dstopt += hlen;
4338 				udi_size -= hlen;
4339 			}
4340 			if ((udp_bits.udpb_ipv6_recvdstopts) &&
4341 			    (udp_bits.udpb_ipv6_recvrthdr) &&
4342 			    (ipp.ipp_fields & IPPF_RTHDR) &&
4343 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4344 				struct T_opthdr *toh;
4345 
4346 				toh = (struct T_opthdr *)dstopt;
4347 				toh->level = IPPROTO_IPV6;
4348 				toh->name = IPV6_DSTOPTS;
4349 				toh->len = sizeof (struct T_opthdr) +
4350 				    ipp.ipp_rtdstoptslen;
4351 				toh->status = 0;
4352 				dstopt += sizeof (struct T_opthdr);
4353 				bcopy(ipp.ipp_rtdstopts, dstopt,
4354 				    ipp.ipp_rtdstoptslen);
4355 				dstopt += ipp.ipp_rtdstoptslen;
4356 				udi_size -= toh->len;
4357 			}
4358 			if ((udp_bits.udpb_ipv6_recvrthdr) &&
4359 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4360 				struct T_opthdr *toh;
4361 
4362 				toh = (struct T_opthdr *)dstopt;
4363 				toh->level = IPPROTO_IPV6;
4364 				toh->name = IPV6_RTHDR;
4365 				toh->len = sizeof (struct T_opthdr) +
4366 				    ipp.ipp_rthdrlen;
4367 				toh->status = 0;
4368 				dstopt += sizeof (struct T_opthdr);
4369 				bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
4370 				dstopt += ipp.ipp_rthdrlen;
4371 				udi_size -= toh->len;
4372 			}
4373 			if ((udp_bits.udpb_ipv6_recvdstopts) &&
4374 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4375 				struct T_opthdr *toh;
4376 
4377 				toh = (struct T_opthdr *)dstopt;
4378 				toh->level = IPPROTO_IPV6;
4379 				toh->name = IPV6_DSTOPTS;
4380 				toh->len = sizeof (struct T_opthdr) +
4381 				    ipp.ipp_dstoptslen;
4382 				toh->status = 0;
4383 				dstopt += sizeof (struct T_opthdr);
4384 				bcopy(ipp.ipp_dstopts, dstopt,
4385 				    ipp.ipp_dstoptslen);
4386 				dstopt += ipp.ipp_dstoptslen;
4387 				udi_size -= toh->len;
4388 			}
4389 			if (cr != NULL) {
4390 				struct T_opthdr *toh;
4391 
4392 				toh = (struct T_opthdr *)dstopt;
4393 				toh->level = SOL_SOCKET;
4394 				toh->name = SCM_UCRED;
4395 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4396 				toh->status = 0;
4397 				(void) cred2ucred(cr, cpid, &toh[1], rcr);
4398 				dstopt += toh->len;
4399 				udi_size -= toh->len;
4400 			}
4401 			if (udp_bits.udpb_timestamp) {
4402 				struct	T_opthdr *toh;
4403 
4404 				toh = (struct T_opthdr *)dstopt;
4405 				toh->level = SOL_SOCKET;
4406 				toh->name = SCM_TIMESTAMP;
4407 				toh->len = sizeof (struct T_opthdr) +
4408 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4409 				toh->status = 0;
4410 				dstopt += sizeof (struct T_opthdr);
4411 				/* Align for gethrestime() */
4412 				dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
4413 				    sizeof (intptr_t));
4414 				gethrestime((timestruc_t *)dstopt);
4415 				dstopt = (uchar_t *)toh + toh->len;
4416 				udi_size -= toh->len;
4417 			}
4418 
4419 			/* Consumed all of allocated space */
4420 			ASSERT(udi_size == 0);
4421 		}
4422 #undef	sin6
4423 		/* No IP_RECVDSTADDR for IPv6. */
4424 	}
4425 
4426 	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
4427 	if (options_mp != NULL)
4428 		freeb(options_mp);
4429 
4430 	if (IPCL_IS_NONSTR(connp)) {
4431 		int error;
4432 
4433 		if ((*connp->conn_upcalls->su_recv)
4434 		    (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
4435 		    NULL) < 0) {
4436 			mutex_enter(&udp->udp_recv_lock);
4437 			if (error == ENOSPC) {
4438 				/*
4439 				 * let's confirm while holding the lock
4440 				 */
4441 				if ((*connp->conn_upcalls->su_recv)
4442 				    (connp->conn_upper_handle, NULL, 0, 0,
4443 				    &error, NULL) < 0) {
4444 					if (error == ENOSPC) {
4445 						connp->conn_flow_cntrld =
4446 						    B_TRUE;
4447 					} else {
4448 						ASSERT(error == EOPNOTSUPP);
4449 					}
4450 				}
4451 				mutex_exit(&udp->udp_recv_lock);
4452 			} else {
4453 				ASSERT(error == EOPNOTSUPP);
4454 				udp_queue_fallback(udp, mp);
4455 			}
4456 		}
4457 	} else {
4458 		putnext(connp->conn_rq, mp);
4459 	}
4460 	ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
4461 	return;
4462 
4463 tossit:
4464 	freemsg(mp);
4465 	if (options_mp != NULL)
4466 		freeb(options_mp);
4467 	BUMP_MIB(&us->us_udp_mib, udpInErrors);
4468 }
4469 
4470 /*
4471  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
4472  * information that can be changing beneath us.
4473  */
4474 mblk_t *
4475 udp_snmp_get(queue_t *q, mblk_t *mpctl)
4476 {
4477 	mblk_t			*mpdata;
4478 	mblk_t			*mp_conn_ctl;
4479 	mblk_t			*mp_attr_ctl;
4480 	mblk_t			*mp6_conn_ctl;
4481 	mblk_t			*mp6_attr_ctl;
4482 	mblk_t			*mp_conn_tail;
4483 	mblk_t			*mp_attr_tail;
4484 	mblk_t			*mp6_conn_tail;
4485 	mblk_t			*mp6_attr_tail;
4486 	struct opthdr		*optp;
4487 	mib2_udpEntry_t		ude;
4488 	mib2_udp6Entry_t	ude6;
4489 	mib2_transportMLPEntry_t mlp;
4490 	int			state;
4491 	zoneid_t		zoneid;
4492 	int			i;
4493 	connf_t			*connfp;
4494 	conn_t			*connp = Q_TO_CONN(q);
4495 	int			v4_conn_idx;
4496 	int			v6_conn_idx;
4497 	boolean_t		needattr;
4498 	udp_t			*udp;
4499 	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
4500 	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
4501 	mblk_t			*mp2ctl;
4502 
4503 	/*
4504 	 * make a copy of the original message
4505 	 */
4506 	mp2ctl = copymsg(mpctl);
4507 
4508 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
4509 	if (mpctl == NULL ||
4510 	    (mpdata = mpctl->b_cont) == NULL ||
4511 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
4512 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
4513 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
4514 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
4515 		freemsg(mp_conn_ctl);
4516 		freemsg(mp_attr_ctl);
4517 		freemsg(mp6_conn_ctl);
4518 		freemsg(mpctl);
4519 		freemsg(mp2ctl);
4520 		return (0);
4521 	}
4522 
4523 	zoneid = connp->conn_zoneid;
4524 
4525 	/* fixed length structure for IPv4 and IPv6 counters */
4526 	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
4527 	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
4528 	/* synchronize 64- and 32-bit counters */
4529 	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
4530 	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
4531 
4532 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4533 	optp->level = MIB2_UDP;
4534 	optp->name = 0;
4535 	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
4536 	    sizeof (us->us_udp_mib));
4537 	optp->len = msgdsize(mpdata);
4538 	qreply(q, mpctl);
4539 
4540 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
4541 	v4_conn_idx = v6_conn_idx = 0;
4542 
4543 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4544 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4545 		connp = NULL;
4546 
4547 		while ((connp = ipcl_get_next_conn(connfp, connp,
4548 		    IPCL_UDPCONN))) {
4549 			udp = connp->conn_udp;
4550 			if (zoneid != connp->conn_zoneid)
4551 				continue;
4552 
4553 			/*
4554 			 * Note that the port numbers are sent in
4555 			 * host byte order
4556 			 */
4557 
4558 			if (udp->udp_state == TS_UNBND)
4559 				state = MIB2_UDP_unbound;
4560 			else if (udp->udp_state == TS_IDLE)
4561 				state = MIB2_UDP_idle;
4562 			else if (udp->udp_state == TS_DATA_XFER)
4563 				state = MIB2_UDP_connected;
4564 			else
4565 				state = MIB2_UDP_unknown;
4566 
4567 			needattr = B_FALSE;
4568 			bzero(&mlp, sizeof (mlp));
4569 			if (connp->conn_mlp_type != mlptSingle) {
4570 				if (connp->conn_mlp_type == mlptShared ||
4571 				    connp->conn_mlp_type == mlptBoth)
4572 					mlp.tme_flags |= MIB2_TMEF_SHARED;
4573 				if (connp->conn_mlp_type == mlptPrivate ||
4574 				    connp->conn_mlp_type == mlptBoth)
4575 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
4576 				needattr = B_TRUE;
4577 			}
4578 
4579 			/*
4580 			 * Create an IPv4 table entry for IPv4 entries and also
4581 			 * any IPv6 entries which are bound to in6addr_any
4582 			 * (i.e. anything a IPv4 peer could connect/send to).
4583 			 */
4584 			if (udp->udp_ipversion == IPV4_VERSION ||
4585 			    (udp->udp_state <= TS_IDLE &&
4586 			    IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) {
4587 				ude.udpEntryInfo.ue_state = state;
4588 				/*
4589 				 * If in6addr_any this will set it to
4590 				 * INADDR_ANY
4591 				 */
4592 				ude.udpLocalAddress =
4593 				    V4_PART_OF_V6(udp->udp_v6src);
4594 				ude.udpLocalPort = ntohs(udp->udp_port);
4595 				if (udp->udp_state == TS_DATA_XFER) {
4596 					/*
4597 					 * Can potentially get here for
4598 					 * v6 socket if another process
4599 					 * (say, ping) has just done a
4600 					 * sendto(), changing the state
4601 					 * from the TS_IDLE above to
4602 					 * TS_DATA_XFER by the time we hit
4603 					 * this part of the code.
4604 					 */
4605 					ude.udpEntryInfo.ue_RemoteAddress =
4606 					    V4_PART_OF_V6(udp->udp_v6dst);
4607 					ude.udpEntryInfo.ue_RemotePort =
4608 					    ntohs(udp->udp_dstport);
4609 				} else {
4610 					ude.udpEntryInfo.ue_RemoteAddress = 0;
4611 					ude.udpEntryInfo.ue_RemotePort = 0;
4612 				}
4613 
4614 				/*
4615 				 * We make the assumption that all udp_t
4616 				 * structs will be created within an address
4617 				 * region no larger than 32-bits.
4618 				 */
4619 				ude.udpInstance = (uint32_t)(uintptr_t)udp;
4620 				ude.udpCreationProcess =
4621 				    (udp->udp_open_pid < 0) ?
4622 				    MIB2_UNKNOWN_PROCESS :
4623 				    udp->udp_open_pid;
4624 				ude.udpCreationTime = udp->udp_open_time;
4625 
4626 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
4627 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
4628 				mlp.tme_connidx = v4_conn_idx++;
4629 				if (needattr)
4630 					(void) snmp_append_data2(
4631 					    mp_attr_ctl->b_cont, &mp_attr_tail,
4632 					    (char *)&mlp, sizeof (mlp));
4633 			}
4634 			if (udp->udp_ipversion == IPV6_VERSION) {
4635 				ude6.udp6EntryInfo.ue_state  = state;
4636 				ude6.udp6LocalAddress = udp->udp_v6src;
4637 				ude6.udp6LocalPort = ntohs(udp->udp_port);
4638 				ude6.udp6IfIndex = udp->udp_bound_if;
4639 				if (udp->udp_state == TS_DATA_XFER) {
4640 					ude6.udp6EntryInfo.ue_RemoteAddress =
4641 					    udp->udp_v6dst;
4642 					ude6.udp6EntryInfo.ue_RemotePort =
4643 					    ntohs(udp->udp_dstport);
4644 				} else {
4645 					ude6.udp6EntryInfo.ue_RemoteAddress =
4646 					    sin6_null.sin6_addr;
4647 					ude6.udp6EntryInfo.ue_RemotePort = 0;
4648 				}
4649 				/*
4650 				 * We make the assumption that all udp_t
4651 				 * structs will be created within an address
4652 				 * region no larger than 32-bits.
4653 				 */
4654 				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
4655 				ude6.udp6CreationProcess =
4656 				    (udp->udp_open_pid < 0) ?
4657 				    MIB2_UNKNOWN_PROCESS :
4658 				    udp->udp_open_pid;
4659 				ude6.udp6CreationTime = udp->udp_open_time;
4660 
4661 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
4662 				    &mp6_conn_tail, (char *)&ude6,
4663 				    sizeof (ude6));
4664 				mlp.tme_connidx = v6_conn_idx++;
4665 				if (needattr)
4666 					(void) snmp_append_data2(
4667 					    mp6_attr_ctl->b_cont,
4668 					    &mp6_attr_tail, (char *)&mlp,
4669 					    sizeof (mlp));
4670 			}
4671 		}
4672 	}
4673 
4674 	/* IPv4 UDP endpoints */
4675 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
4676 	    sizeof (struct T_optmgmt_ack)];
4677 	optp->level = MIB2_UDP;
4678 	optp->name = MIB2_UDP_ENTRY;
4679 	optp->len = msgdsize(mp_conn_ctl->b_cont);
4680 	qreply(q, mp_conn_ctl);
4681 
4682 	/* table of MLP attributes... */
4683 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
4684 	    sizeof (struct T_optmgmt_ack)];
4685 	optp->level = MIB2_UDP;
4686 	optp->name = EXPER_XPORT_MLP;
4687 	optp->len = msgdsize(mp_attr_ctl->b_cont);
4688 	if (optp->len == 0)
4689 		freemsg(mp_attr_ctl);
4690 	else
4691 		qreply(q, mp_attr_ctl);
4692 
4693 	/* IPv6 UDP endpoints */
4694 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
4695 	    sizeof (struct T_optmgmt_ack)];
4696 	optp->level = MIB2_UDP6;
4697 	optp->name = MIB2_UDP6_ENTRY;
4698 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
4699 	qreply(q, mp6_conn_ctl);
4700 
4701 	/* table of MLP attributes... */
4702 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
4703 	    sizeof (struct T_optmgmt_ack)];
4704 	optp->level = MIB2_UDP6;
4705 	optp->name = EXPER_XPORT_MLP;
4706 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
4707 	if (optp->len == 0)
4708 		freemsg(mp6_attr_ctl);
4709 	else
4710 		qreply(q, mp6_attr_ctl);
4711 
4712 	return (mp2ctl);
4713 }
4714 
4715 /*
4716  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
4717  * NOTE: Per MIB-II, UDP has no writable data.
4718  * TODO:  If this ever actually tries to set anything, it needs to be
4719  * to do the appropriate locking.
4720  */
4721 /* ARGSUSED */
4722 int
4723 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4724     uchar_t *ptr, int len)
4725 {
4726 	switch (level) {
4727 	case MIB2_UDP:
4728 		return (0);
4729 	default:
4730 		return (1);
4731 	}
4732 }
4733 
4734 static void
4735 udp_report_item(mblk_t *mp, udp_t *udp)
4736 {
4737 	char *state;
4738 	char addrbuf1[INET6_ADDRSTRLEN];
4739 	char addrbuf2[INET6_ADDRSTRLEN];
4740 	uint_t print_len, buf_len;
4741 
4742 	buf_len = mp->b_datap->db_lim - mp->b_wptr;
4743 	ASSERT(buf_len >= 0);
4744 	if (buf_len == 0)
4745 		return;
4746 
4747 	if (udp->udp_state == TS_UNBND)
4748 		state = "UNBOUND";
4749 	else if (udp->udp_state == TS_IDLE)
4750 		state = "IDLE";
4751 	else if (udp->udp_state == TS_DATA_XFER)
4752 		state = "CONNECTED";
4753 	else
4754 		state = "UnkState";
4755 	print_len = snprintf((char *)mp->b_wptr, buf_len,
4756 	    MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n",
4757 	    (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port),
4758 	    inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)),
4759 	    inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)),
4760 	    ntohs(udp->udp_dstport), state);
4761 	if (print_len < buf_len) {
4762 		mp->b_wptr += print_len;
4763 	} else {
4764 		mp->b_wptr += buf_len;
4765 	}
4766 }
4767 
4768 /* Report for ndd "udp_status" */
4769 /* ARGSUSED */
4770 static int
4771 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4772 {
4773 	zoneid_t zoneid;
4774 	connf_t	*connfp;
4775 	conn_t	*connp = Q_TO_CONN(q);
4776 	udp_t	*udp = connp->conn_udp;
4777 	int	i;
4778 	udp_stack_t *us = udp->udp_us;
4779 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4780 
4781 	/*
4782 	 * Because of the ndd constraint, at most we can have 64K buffer
4783 	 * to put in all UDP info.  So to be more efficient, just
4784 	 * allocate a 64K buffer here, assuming we need that large buffer.
4785 	 * This may be a problem as any user can read udp_status.  Therefore
4786 	 * we limit the rate of doing this using us_ndd_get_info_interval.
4787 	 * This should be OK as normal users should not do this too often.
4788 	 */
4789 	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
4790 		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
4791 		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
4792 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
4793 			return (0);
4794 		}
4795 	}
4796 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
4797 		/* The following may work even if we cannot get a large buf. */
4798 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
4799 		return (0);
4800 	}
4801 	(void) mi_mpprintf(mp,
4802 	    "UDP     " MI_COL_HDRPAD_STR
4803 	/*   12345678[89ABCDEF] */
4804 	    " zone lport src addr        dest addr       port  state");
4805 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
4806 
4807 	zoneid = connp->conn_zoneid;
4808 
4809 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4810 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4811 		connp = NULL;
4812 
4813 		while ((connp = ipcl_get_next_conn(connfp, connp,
4814 		    IPCL_UDPCONN))) {
4815 			udp = connp->conn_udp;
4816 			if (zoneid != GLOBAL_ZONEID &&
4817 			    zoneid != connp->conn_zoneid)
4818 				continue;
4819 
4820 			udp_report_item(mp->b_cont, udp);
4821 		}
4822 	}
4823 	us->us_last_ndd_get_info_time = ddi_get_lbolt();
4824 	return (0);
4825 }
4826 
4827 /*
4828  * This routine creates a T_UDERROR_IND message and passes it upstream.
4829  * The address and options are copied from the T_UNITDATA_REQ message
4830  * passed in mp.  This message is freed.
4831  */
4832 static void
4833 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen,
4834     t_scalar_t err)
4835 {
4836 	struct T_unitdata_req *tudr;
4837 	mblk_t	*mp1;
4838 	uchar_t	*optaddr;
4839 	t_scalar_t optlen;
4840 
4841 	if (DB_TYPE(mp) == M_DATA) {
4842 		ASSERT(destaddr != NULL && destlen != 0);
4843 		optaddr = NULL;
4844 		optlen = 0;
4845 	} else {
4846 		if ((mp->b_wptr < mp->b_rptr) ||
4847 		    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
4848 			goto done;
4849 		}
4850 		tudr = (struct T_unitdata_req *)mp->b_rptr;
4851 		destaddr = mp->b_rptr + tudr->DEST_offset;
4852 		if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
4853 		    destaddr + tudr->DEST_length < mp->b_rptr ||
4854 		    destaddr + tudr->DEST_length > mp->b_wptr) {
4855 			goto done;
4856 		}
4857 		optaddr = mp->b_rptr + tudr->OPT_offset;
4858 		if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
4859 		    optaddr + tudr->OPT_length < mp->b_rptr ||
4860 		    optaddr + tudr->OPT_length > mp->b_wptr) {
4861 			goto done;
4862 		}
4863 		destlen = tudr->DEST_length;
4864 		optlen = tudr->OPT_length;
4865 	}
4866 
4867 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
4868 	    (char *)optaddr, optlen, err);
4869 	if (mp1 != NULL)
4870 		qreply(q, mp1);
4871 
4872 done:
4873 	freemsg(mp);
4874 }
4875 
4876 /*
4877  * This routine removes a port number association from a stream.  It
4878  * is called by udp_wput to handle T_UNBIND_REQ messages.
4879  */
4880 static void
4881 udp_tpi_unbind(queue_t *q, mblk_t *mp)
4882 {
4883 	conn_t	*connp = Q_TO_CONN(q);
4884 	int	error;
4885 
4886 	error = udp_do_unbind(connp);
4887 	if (error) {
4888 		if (error < 0)
4889 			udp_err_ack(q, mp, -error, 0);
4890 		else
4891 			udp_err_ack(q, mp, TSYSERR, error);
4892 		return;
4893 	}
4894 
4895 	mp = mi_tpi_ok_ack_alloc(mp);
4896 	ASSERT(mp != NULL);
4897 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4898 	qreply(q, mp);
4899 }
4900 
4901 /*
4902  * Don't let port fall into the privileged range.
4903  * Since the extra privileged ports can be arbitrary we also
4904  * ensure that we exclude those from consideration.
4905  * us->us_epriv_ports is not sorted thus we loop over it until
4906  * there are no changes.
4907  */
4908 static in_port_t
4909 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
4910 {
4911 	int i;
4912 	in_port_t nextport;
4913 	boolean_t restart = B_FALSE;
4914 	udp_stack_t *us = udp->udp_us;
4915 
4916 	if (random && udp_random_anon_port != 0) {
4917 		(void) random_get_pseudo_bytes((uint8_t *)&port,
4918 		    sizeof (in_port_t));
4919 		/*
4920 		 * Unless changed by a sys admin, the smallest anon port
4921 		 * is 32768 and the largest anon port is 65535.  It is
4922 		 * very likely (50%) for the random port to be smaller
4923 		 * than the smallest anon port.  When that happens,
4924 		 * add port % (anon port range) to the smallest anon
4925 		 * port to get the random port.  It should fall into the
4926 		 * valid anon port range.
4927 		 */
4928 		if (port < us->us_smallest_anon_port) {
4929 			port = us->us_smallest_anon_port +
4930 			    port % (us->us_largest_anon_port -
4931 			    us->us_smallest_anon_port);
4932 		}
4933 	}
4934 
4935 retry:
4936 	if (port < us->us_smallest_anon_port)
4937 		port = us->us_smallest_anon_port;
4938 
4939 	if (port > us->us_largest_anon_port) {
4940 		port = us->us_smallest_anon_port;
4941 		if (restart)
4942 			return (0);
4943 		restart = B_TRUE;
4944 	}
4945 
4946 	if (port < us->us_smallest_nonpriv_port)
4947 		port = us->us_smallest_nonpriv_port;
4948 
4949 	for (i = 0; i < us->us_num_epriv_ports; i++) {
4950 		if (port == us->us_epriv_ports[i]) {
4951 			port++;
4952 			/*
4953 			 * Make sure that the port is in the
4954 			 * valid range.
4955 			 */
4956 			goto retry;
4957 		}
4958 	}
4959 
4960 	if (is_system_labeled() &&
4961 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
4962 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
4963 		port = nextport;
4964 		goto retry;
4965 	}
4966 
4967 	return (port);
4968 }
4969 
4970 static int
4971 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst,
4972     boolean_t *update_lastdst)
4973 {
4974 	int err;
4975 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
4976 	udp_t *udp = Q_TO_UDP(wq);
4977 	udp_stack_t	*us = udp->udp_us;
4978 
4979 	err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst,
4980 	    opt_storage, udp->udp_connp->conn_mac_exempt,
4981 	    us->us_netstack->netstack_ip);
4982 	if (err == 0) {
4983 		err = tsol_update_options(&udp->udp_ip_snd_options,
4984 		    &udp->udp_ip_snd_options_len, &udp->udp_label_len,
4985 		    opt_storage);
4986 	}
4987 	if (err != 0) {
4988 		DTRACE_PROBE4(
4989 		    tx__ip__log__info__updatelabel__udp,
4990 		    char *, "queue(1) failed to update options(2) on mp(3)",
4991 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
4992 	} else {
4993 		*update_lastdst = B_TRUE;
4994 	}
4995 	return (err);
4996 }
4997 
4998 static mblk_t *
4999 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
5000     uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg,
5001     cred_t *cr, pid_t pid)
5002 {
5003 	udp_t		*udp = connp->conn_udp;
5004 	mblk_t		*mp1 = mp;
5005 	mblk_t		*mp2;
5006 	ipha_t		*ipha;
5007 	int		ip_hdr_length;
5008 	uint32_t 	ip_len;
5009 	udpha_t		*udpha;
5010 	boolean_t 	lock_held = B_FALSE;
5011 	in_port_t	uha_src_port;
5012 	udpattrs_t	attrs;
5013 	uchar_t		ip_snd_opt[IP_MAX_OPT_LENGTH];
5014 	uint32_t	ip_snd_opt_len = 0;
5015 	ip4_pkt_t  	pktinfo;
5016 	ip4_pkt_t  	*pktinfop = &pktinfo;
5017 	ip_opt_info_t	optinfo;
5018 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5019 	udp_stack_t	*us = udp->udp_us;
5020 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
5021 	queue_t		*q = connp->conn_wq;
5022 	ire_t		*ire;
5023 	in6_addr_t	v6dst;
5024 	boolean_t	update_lastdst = B_FALSE;
5025 
5026 	*error = 0;
5027 	pktinfop->ip4_ill_index = 0;
5028 	pktinfop->ip4_addr = INADDR_ANY;
5029 	optinfo.ip_opt_flags = 0;
5030 	optinfo.ip_opt_ill_index = 0;
5031 
5032 	if (v4dst == INADDR_ANY)
5033 		v4dst = htonl(INADDR_LOOPBACK);
5034 
5035 	/*
5036 	 * If options passed in, feed it for verification and handling
5037 	 */
5038 	attrs.udpattr_credset = B_FALSE;
5039 	if (IPCL_IS_NONSTR(connp)) {
5040 		if (msg->msg_controllen != 0) {
5041 			attrs.udpattr_ipp4 = pktinfop;
5042 			attrs.udpattr_mb = mp;
5043 
5044 			rw_enter(&udp->udp_rwlock, RW_WRITER);
5045 			*error = process_auxiliary_options(connp,
5046 			    msg->msg_control, msg->msg_controllen,
5047 			    &attrs, &udp_opt_obj, udp_opt_set);
5048 			rw_exit(&udp->udp_rwlock);
5049 			if (*error)
5050 				goto done;
5051 		}
5052 	} else {
5053 		if (DB_TYPE(mp) != M_DATA) {
5054 			mp1 = mp->b_cont;
5055 			if (((struct T_unitdata_req *)
5056 			    mp->b_rptr)->OPT_length != 0) {
5057 				attrs.udpattr_ipp4 = pktinfop;
5058 				attrs.udpattr_mb = mp;
5059 				if (udp_unitdata_opt_process(q, mp, error,
5060 				    &attrs) < 0)
5061 					goto done;
5062 				/*
5063 				 * Note: success in processing options.
5064 				 * mp option buffer represented by
5065 				 * OPT_length/offset now potentially modified
5066 				 * and contain option setting results
5067 				 */
5068 				ASSERT(*error == 0);
5069 			}
5070 		}
5071 	}
5072 
5073 	/* mp1 points to the M_DATA mblk carrying the packet */
5074 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
5075 
5076 	/*
5077 	 * Determine whether we need to mark the mblk with the user's
5078 	 * credentials.
5079 	 */
5080 	ire = connp->conn_ire_cache;
5081 	if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) ||
5082 	    (ire->ire_addr != v4dst) ||
5083 	    (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) {
5084 		if (cr != NULL && DB_CRED(mp) == NULL)
5085 			msg_setcredpid(mp, cr, pid);
5086 	}
5087 
5088 	rw_enter(&udp->udp_rwlock, RW_READER);
5089 	lock_held = B_TRUE;
5090 
5091 	/*
5092 	 * Cluster and TSOL note:
5093 	 *    udp.udp_v6lastdst		is shared by Cluster and TSOL
5094 	 *    udp.udp_lastdstport	is used by Cluster
5095 	 *
5096 	 * Both Cluster and TSOL need to update the dest addr and/or port.
5097 	 * Updating is done after both Cluster and TSOL checks, protected
5098 	 * by conn_lock.
5099 	 */
5100 	mutex_enter(&connp->conn_lock);
5101 
5102 	if (cl_inet_connect2 != NULL &&
5103 	    (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
5104 	    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
5105 	    udp->udp_lastdstport != port)) {
5106 		mutex_exit(&connp->conn_lock);
5107 		*error = 0;
5108 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5109 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error);
5110 		if (*error != 0) {
5111 			*error = EHOSTUNREACH;
5112 			goto done;
5113 		}
5114 		update_lastdst = B_TRUE;
5115 		mutex_enter(&connp->conn_lock);
5116 	}
5117 
5118 	/*
5119 	 * Check if our saved options are valid; update if not.
5120 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
5121 	 * to different destination may require different labels,
5122 	 * or worse, UDP packets to same IP address may require
5123 	 * different labels due to use of shared all-zones address.
5124 	 * We use conn_lock to ensure that lastdst, ip_snd_options,
5125 	 * and ip_snd_options_len are consistent for the current
5126 	 * destination and are updated atomically.
5127 	 */
5128 	if (is_system_labeled()) {
5129 		/* Using UDP MLP requires SCM_UCRED from user */
5130 		if (connp->conn_mlp_type != mlptSingle &&
5131 		    !attrs.udpattr_credset) {
5132 			mutex_exit(&connp->conn_lock);
5133 			DTRACE_PROBE4(
5134 			    tx__ip__log__info__output__udp,
5135 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
5136 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
5137 			*error = ECONNREFUSED;
5138 			goto done;
5139 		}
5140 		/*
5141 		 * update label option for this UDP socket if
5142 		 * - the destination has changed, or
5143 		 * - the UDP socket is MLP
5144 		 */
5145 		if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
5146 		    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst ||
5147 		    connp->conn_mlp_type != mlptSingle) &&
5148 		    (*error = udp_update_label(q, mp, v4dst, &update_lastdst))
5149 		    != 0) {
5150 			mutex_exit(&connp->conn_lock);
5151 			goto done;
5152 		}
5153 	}
5154 	if (update_lastdst) {
5155 		IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst);
5156 		udp->udp_lastdstport = port;
5157 	}
5158 	if (udp->udp_ip_snd_options_len > 0) {
5159 		ip_snd_opt_len = udp->udp_ip_snd_options_len;
5160 		bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len);
5161 	}
5162 	mutex_exit(&connp->conn_lock);
5163 
5164 	/* Add an IP header */
5165 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len +
5166 	    (insert_spi ? sizeof (uint32_t) : 0);
5167 	ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
5168 	if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) ||
5169 	    !OK_32PTR(ipha)) {
5170 		mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO);
5171 		if (mp2 == NULL) {
5172 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5173 			    "udp_wput_end: q %p (%S)", q, "allocbfail2");
5174 			*error = ENOMEM;
5175 			goto done;
5176 		}
5177 		mp2->b_wptr = DB_LIM(mp2);
5178 		mp2->b_cont = mp1;
5179 		mp1 = mp2;
5180 		if (DB_TYPE(mp) != M_DATA)
5181 			mp->b_cont = mp1;
5182 		else
5183 			mp = mp1;
5184 
5185 		ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length);
5186 	}
5187 	ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0));
5188 #ifdef	_BIG_ENDIAN
5189 	/* Set version, header length, and tos */
5190 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
5191 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
5192 	    udp->udp_type_of_service);
5193 	/* Set ttl and protocol */
5194 	*(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP;
5195 #else
5196 	/* Set version, header length, and tos */
5197 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
5198 	    ((udp->udp_type_of_service << 8) |
5199 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
5200 	/* Set ttl and protocol */
5201 	*(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl;
5202 #endif
5203 	if (pktinfop->ip4_addr != INADDR_ANY) {
5204 		ipha->ipha_src = pktinfop->ip4_addr;
5205 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
5206 	} else {
5207 		/*
5208 		 * Copy our address into the packet.  If this is zero,
5209 		 * first look at __sin6_src_id for a hint. If we leave the
5210 		 * source as INADDR_ANY then ip will fill in the real source
5211 		 * address.
5212 		 */
5213 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src);
5214 		if (srcid != 0 && ipha->ipha_src == INADDR_ANY) {
5215 			in6_addr_t v6src;
5216 
5217 			ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid,
5218 			    us->us_netstack);
5219 			IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
5220 		}
5221 	}
5222 	uha_src_port = udp->udp_port;
5223 	if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) {
5224 		rw_exit(&udp->udp_rwlock);
5225 		lock_held = B_FALSE;
5226 	}
5227 
5228 	if (pktinfop->ip4_ill_index != 0) {
5229 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
5230 	}
5231 
5232 	ipha->ipha_fragment_offset_and_flags = 0;
5233 	ipha->ipha_ident = 0;
5234 
5235 	mp1->b_rptr = (uchar_t *)ipha;
5236 
5237 	ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <=
5238 	    (uintptr_t)UINT_MAX);
5239 
5240 	/* Determine length of packet */
5241 	ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha);
5242 	if ((mp2 = mp1->b_cont) != NULL) {
5243 		do {
5244 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
5245 			ip_len += (uint32_t)MBLKL(mp2);
5246 		} while ((mp2 = mp2->b_cont) != NULL);
5247 	}
5248 	/*
5249 	 * If the size of the packet is greater than the maximum allowed by
5250 	 * ip, return an error. Passing this down could cause panics because
5251 	 * the size will have wrapped and be inconsistent with the msg size.
5252 	 */
5253 	if (ip_len > IP_MAXPACKET) {
5254 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5255 		    "udp_wput_end: q %p (%S)", q, "IP length exceeded");
5256 		*error = EMSGSIZE;
5257 		goto done;
5258 	}
5259 	ipha->ipha_length = htons((uint16_t)ip_len);
5260 	ip_len -= ip_hdr_length;
5261 	ip_len = htons((uint16_t)ip_len);
5262 	udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length);
5263 
5264 	/* Insert all-0s SPI now. */
5265 	if (insert_spi)
5266 		*((uint32_t *)(udpha + 1)) = 0;
5267 
5268 	/*
5269 	 * Copy in the destination address
5270 	 */
5271 	ipha->ipha_dst = v4dst;
5272 
5273 	/*
5274 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
5275 	 */
5276 	if (CLASSD(v4dst))
5277 		ipha->ipha_ttl = udp->udp_multicast_ttl;
5278 
5279 	udpha->uha_dst_port = port;
5280 	udpha->uha_src_port = uha_src_port;
5281 
5282 	if (ip_snd_opt_len > 0) {
5283 		uint32_t	cksum;
5284 
5285 		bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len);
5286 		lock_held = B_FALSE;
5287 		rw_exit(&udp->udp_rwlock);
5288 		/*
5289 		 * Massage source route putting first source route in ipha_dst.
5290 		 * Ignore the destination in T_unitdata_req.
5291 		 * Create a checksum adjustment for a source route, if any.
5292 		 */
5293 		cksum = ip_massage_options(ipha, us->us_netstack);
5294 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5295 		cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) +
5296 		    (ipha->ipha_dst & 0xFFFF);
5297 		if ((int)cksum < 0)
5298 			cksum--;
5299 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5300 		/*
5301 		 * IP does the checksum if uha_checksum is non-zero,
5302 		 * We make it easy for IP to include our pseudo header
5303 		 * by putting our length in uha_checksum.
5304 		 */
5305 		cksum += ip_len;
5306 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5307 		/* There might be a carry. */
5308 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
5309 #ifdef _LITTLE_ENDIAN
5310 		if (us->us_do_checksum)
5311 			ip_len = (cksum << 16) | ip_len;
5312 #else
5313 		if (us->us_do_checksum)
5314 			ip_len = (ip_len << 16) | cksum;
5315 		else
5316 			ip_len <<= 16;
5317 #endif
5318 	} else {
5319 		/*
5320 		 * IP does the checksum if uha_checksum is non-zero,
5321 		 * We make it easy for IP to include our pseudo header
5322 		 * by putting our length in uha_checksum.
5323 		 */
5324 		if (us->us_do_checksum)
5325 			ip_len |= (ip_len << 16);
5326 #ifndef _LITTLE_ENDIAN
5327 		else
5328 			ip_len <<= 16;
5329 #endif
5330 	}
5331 	ASSERT(!lock_held);
5332 	/* Set UDP length and checksum */
5333 	*((uint32_t *)&udpha->uha_length) = ip_len;
5334 	if (DB_CRED(mp) != NULL)
5335 		mblk_setcred(mp1, DB_CRED(mp));
5336 
5337 	if (DB_TYPE(mp) != M_DATA) {
5338 		ASSERT(mp != mp1);
5339 		freeb(mp);
5340 	}
5341 
5342 	/* mp has been consumed and we'll return success */
5343 	ASSERT(*error == 0);
5344 	mp = NULL;
5345 
5346 	/* We're done.  Pass the packet to ip. */
5347 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
5348 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5349 	    "udp_wput_end: q %p (%S)", q, "end");
5350 
5351 	if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 ||
5352 	    CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) ||
5353 	    connp->conn_dontroute ||
5354 	    connp->conn_nofailover_ill != NULL ||
5355 	    connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 ||
5356 	    optinfo.ip_opt_ill_index != 0 ||
5357 	    ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION ||
5358 	    IPP_ENABLED(IPP_LOCAL_OUT, ipst) ||
5359 	    ipst->ips_ip_g_mrouter != NULL) {
5360 		UDP_STAT(us, udp_ip_send);
5361 		ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT,
5362 		    &optinfo);
5363 	} else {
5364 		udp_send_data(udp, connp->conn_wq, mp1, ipha);
5365 	}
5366 
5367 done:
5368 	if (lock_held)
5369 		rw_exit(&udp->udp_rwlock);
5370 	if (*error != 0) {
5371 		ASSERT(mp != NULL);
5372 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5373 	}
5374 	return (mp);
5375 }
5376 
5377 static void
5378 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
5379 {
5380 	conn_t	*connp = udp->udp_connp;
5381 	ipaddr_t src, dst;
5382 	ire_t	*ire;
5383 	ipif_t	*ipif = NULL;
5384 	mblk_t	*ire_fp_mp;
5385 	boolean_t retry_caching;
5386 	udp_stack_t *us = udp->udp_us;
5387 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5388 
5389 	dst = ipha->ipha_dst;
5390 	src = ipha->ipha_src;
5391 	ASSERT(ipha->ipha_ident == 0);
5392 
5393 	if (CLASSD(dst)) {
5394 		int err;
5395 
5396 		ipif = conn_get_held_ipif(connp,
5397 		    &connp->conn_multicast_ipif, &err);
5398 
5399 		if (ipif == NULL || ipif->ipif_isv6 ||
5400 		    (ipif->ipif_ill->ill_phyint->phyint_flags &
5401 		    PHYI_LOOPBACK)) {
5402 			if (ipif != NULL)
5403 				ipif_refrele(ipif);
5404 			UDP_STAT(us, udp_ip_send);
5405 			ip_output(connp, mp, q, IP_WPUT);
5406 			return;
5407 		}
5408 	}
5409 
5410 	retry_caching = B_FALSE;
5411 	mutex_enter(&connp->conn_lock);
5412 	ire = connp->conn_ire_cache;
5413 	ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
5414 
5415 	if (ire == NULL || ire->ire_addr != dst ||
5416 	    (ire->ire_marks & IRE_MARK_CONDEMNED)) {
5417 		retry_caching = B_TRUE;
5418 	} else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) {
5419 		ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr;
5420 
5421 		ASSERT(ipif != NULL);
5422 		if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL ||
5423 		    stq_ill->ill_group != ipif->ipif_ill->ill_group))
5424 			retry_caching = B_TRUE;
5425 	}
5426 
5427 	if (!retry_caching) {
5428 		ASSERT(ire != NULL);
5429 		IRE_REFHOLD(ire);
5430 		mutex_exit(&connp->conn_lock);
5431 	} else {
5432 		boolean_t cached = B_FALSE;
5433 
5434 		connp->conn_ire_cache = NULL;
5435 		mutex_exit(&connp->conn_lock);
5436 
5437 		/* Release the old ire */
5438 		if (ire != NULL) {
5439 			IRE_REFRELE_NOTR(ire);
5440 			ire = NULL;
5441 		}
5442 
5443 		if (CLASSD(dst)) {
5444 			ASSERT(ipif != NULL);
5445 			ire = ire_ctable_lookup(dst, 0, 0, ipif,
5446 			    connp->conn_zoneid, MBLK_GETLABEL(mp),
5447 			    MATCH_IRE_ILL_GROUP, ipst);
5448 		} else {
5449 			ASSERT(ipif == NULL);
5450 			ire = ire_cache_lookup(dst, connp->conn_zoneid,
5451 			    MBLK_GETLABEL(mp), ipst);
5452 		}
5453 
5454 		if (ire == NULL) {
5455 			if (ipif != NULL)
5456 				ipif_refrele(ipif);
5457 			UDP_STAT(us, udp_ire_null);
5458 			ip_output(connp, mp, q, IP_WPUT);
5459 			return;
5460 		}
5461 		IRE_REFHOLD_NOTR(ire);
5462 
5463 		mutex_enter(&connp->conn_lock);
5464 		if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL &&
5465 		    !(ire->ire_marks & IRE_MARK_CONDEMNED)) {
5466 			irb_t		*irb = ire->ire_bucket;
5467 
5468 			/*
5469 			 * IRE's created for non-connection oriented transports
5470 			 * are normally initialized with IRE_MARK_TEMPORARY set
5471 			 * in the ire_marks. These IRE's are preferentially
5472 			 * reaped when the hash chain length in the cache
5473 			 * bucket exceeds the maximum value specified in
5474 			 * ip[6]_ire_max_bucket_cnt. This can severely affect
5475 			 * UDP performance if IRE cache entries that we need
5476 			 * to reuse are continually removed. To remedy this,
5477 			 * when we cache the IRE in the conn_t, we remove the
5478 			 * IRE_MARK_TEMPORARY bit from the ire_marks if it was
5479 			 * set.
5480 			 */
5481 			if (ire->ire_marks & IRE_MARK_TEMPORARY) {
5482 				rw_enter(&irb->irb_lock, RW_WRITER);
5483 				if (ire->ire_marks & IRE_MARK_TEMPORARY) {
5484 					ire->ire_marks &= ~IRE_MARK_TEMPORARY;
5485 					irb->irb_tmp_ire_cnt--;
5486 				}
5487 				rw_exit(&irb->irb_lock);
5488 			}
5489 			connp->conn_ire_cache = ire;
5490 			cached = B_TRUE;
5491 		}
5492 		mutex_exit(&connp->conn_lock);
5493 
5494 		/*
5495 		 * We can continue to use the ire but since it was not
5496 		 * cached, we should drop the extra reference.
5497 		 */
5498 		if (!cached)
5499 			IRE_REFRELE_NOTR(ire);
5500 	}
5501 	ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION);
5502 	ASSERT(!CLASSD(dst) || ipif != NULL);
5503 
5504 	/*
5505 	 * Check if we can take the fast-path.
5506 	 * Note that "incomplete" ire's (where the link-layer for next hop
5507 	 * is not resolved, or where the fast-path header in nce_fp_mp is not
5508 	 * available yet) are sent down the legacy (slow) path
5509 	 */
5510 	if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) ||
5511 	    (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) ||
5512 	    (ire->ire_max_frag < ntohs(ipha->ipha_length)) ||
5513 	    ((ire->ire_nce == NULL) ||
5514 	    ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) ||
5515 	    connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) {
5516 		if (ipif != NULL)
5517 			ipif_refrele(ipif);
5518 		UDP_STAT(us, udp_ip_ire_send);
5519 		IRE_REFRELE(ire);
5520 		ip_output(connp, mp, q, IP_WPUT);
5521 		return;
5522 	}
5523 
5524 	if (src == INADDR_ANY && !connp->conn_unspec_src) {
5525 		if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
5526 			ipha->ipha_src = ipif->ipif_src_addr;
5527 		else
5528 			ipha->ipha_src = ire->ire_src_addr;
5529 	}
5530 
5531 	if (ipif != NULL)
5532 		ipif_refrele(ipif);
5533 
5534 	udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid);
5535 }
5536 
5537 static void
5538 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid)
5539 {
5540 	ipaddr_t src, dst;
5541 	ill_t	*ill;
5542 	mblk_t	*ire_fp_mp;
5543 	uint_t	ire_fp_mp_len;
5544 	uint16_t *up;
5545 	uint32_t cksum, hcksum_txflags;
5546 	queue_t	*dev_q;
5547 	udp_t	*udp = connp->conn_udp;
5548 	ipha_t	*ipha = (ipha_t *)mp->b_rptr;
5549 	udp_stack_t	*us = udp->udp_us;
5550 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
5551 	boolean_t	ll_multicast = B_FALSE;
5552 
5553 	dev_q = ire->ire_stq->q_next;
5554 	ASSERT(dev_q != NULL);
5555 
5556 	ill = ire_to_ill(ire);
5557 	ASSERT(ill != NULL);
5558 
5559 	/* is queue flow controlled? */
5560 	if (q->q_first != NULL || connp->conn_draining ||
5561 	    DEV_Q_FLOW_BLOCKED(dev_q)) {
5562 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
5563 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
5564 
5565 		if (ipst->ips_ip_output_queue)
5566 			(void) putq(connp->conn_wq, mp);
5567 		else
5568 			freemsg(mp);
5569 		ire_refrele(ire);
5570 		return;
5571 	}
5572 
5573 	ire_fp_mp = ire->ire_nce->nce_fp_mp;
5574 	ire_fp_mp_len = MBLKL(ire_fp_mp);
5575 	ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len);
5576 
5577 	dst = ipha->ipha_dst;
5578 	src = ipha->ipha_src;
5579 
5580 
5581 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
5582 
5583 	ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
5584 #ifndef _BIG_ENDIAN
5585 	ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
5586 #endif
5587 
5588 	if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
5589 		ASSERT(ill->ill_hcksum_capab != NULL);
5590 		hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
5591 	} else {
5592 		hcksum_txflags = 0;
5593 	}
5594 
5595 	/* pseudo-header checksum (do it in parts for IP header checksum) */
5596 	cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
5597 
5598 	ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION);
5599 	up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
5600 	if (*up != 0) {
5601 		IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags,
5602 		    mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH,
5603 		    ntohs(ipha->ipha_length), cksum);
5604 
5605 		/* Software checksum? */
5606 		if (DB_CKSUMFLAGS(mp) == 0) {
5607 			UDP_STAT(us, udp_out_sw_cksum);
5608 			UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes,
5609 			    ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH);
5610 		}
5611 	}
5612 
5613 	if (!CLASSD(dst)) {
5614 		ipha->ipha_fragment_offset_and_flags |=
5615 		    (uint32_t)htons(ire->ire_frag_flag);
5616 	}
5617 
5618 	/* Calculate IP header checksum if hardware isn't capable */
5619 	if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) {
5620 		IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0],
5621 		    ((uint16_t *)ipha)[4]);
5622 	}
5623 
5624 	if (CLASSD(dst)) {
5625 		boolean_t ilm_exists;
5626 
5627 		ILM_WALKER_HOLD(ill);
5628 		ilm_exists = (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL);
5629 		ILM_WALKER_RELE(ill);
5630 		if (ilm_exists) {
5631 			ip_multicast_loopback(q, ill, mp,
5632 			    connp->conn_multicast_loop ? 0 :
5633 			    IP_FF_NO_MCAST_LOOP, zoneid);
5634 		}
5635 
5636 		/* If multicast TTL is 0 then we are done */
5637 		if (ipha->ipha_ttl == 0) {
5638 			freemsg(mp);
5639 			ire_refrele(ire);
5640 			return;
5641 		}
5642 		ll_multicast = B_TRUE;
5643 	}
5644 
5645 	ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
5646 	mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
5647 	bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
5648 
5649 	UPDATE_OB_PKT_COUNT(ire);
5650 	ire->ire_last_used_time = lbolt;
5651 
5652 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits);
5653 	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
5654 	    ntohs(ipha->ipha_length));
5655 
5656 	DTRACE_PROBE4(ip4__physical__out__start,
5657 	    ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
5658 	FW_HOOKS(ipst->ips_ip4_physical_out_event,
5659 	    ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp,
5660 	    ll_multicast, ipst);
5661 	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
5662 	if (ipst->ips_ipobs_enabled && mp != NULL) {
5663 		zoneid_t szone;
5664 
5665 		szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
5666 		    ipst, ALL_ZONES);
5667 		ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
5668 		    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
5669 	}
5670 
5671 	if (mp != NULL) {
5672 		DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
5673 		    void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill,
5674 		    ipha_t *, ipha, ip6_t *, NULL, int, 0);
5675 
5676 		if (ILL_DIRECT_CAPABLE(ill)) {
5677 			ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct;
5678 
5679 			(void) idd->idd_tx_df(idd->idd_tx_dh, mp,
5680 			    (uintptr_t)connp, 0);
5681 		} else {
5682 			putnext(ire->ire_stq, mp);
5683 		}
5684 	}
5685 	IRE_REFRELE(ire);
5686 }
5687 
5688 static boolean_t
5689 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst,
5690     boolean_t *update_lastdst)
5691 {
5692 	udp_t *udp = Q_TO_UDP(wq);
5693 	int err;
5694 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
5695 	udp_stack_t		*us = udp->udp_us;
5696 
5697 	err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred),
5698 	    dst, opt_storage, udp->udp_connp->conn_mac_exempt,
5699 	    us->us_netstack->netstack_ip);
5700 	if (err == 0) {
5701 		err = tsol_update_sticky(&udp->udp_sticky_ipp,
5702 		    &udp->udp_label_len_v6, opt_storage);
5703 	}
5704 	if (err != 0) {
5705 		DTRACE_PROBE4(
5706 		    tx__ip__log__drop__updatelabel__udp6,
5707 		    char *, "queue(1) failed to update options(2) on mp(3)",
5708 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
5709 	} else {
5710 		*update_lastdst = B_TRUE;
5711 	}
5712 	return (err);
5713 }
5714 
5715 static int
5716 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr,
5717     pid_t pid)
5718 {
5719 	udp_t		*udp = connp->conn_udp;
5720 	udp_stack_t	*us = udp->udp_us;
5721 	ipaddr_t	v4dst;
5722 	in_port_t	dstport;
5723 	boolean_t	mapped_addr;
5724 	struct sockaddr_storage ss;
5725 	sin_t		*sin;
5726 	sin6_t		*sin6;
5727 	struct sockaddr	*addr;
5728 	socklen_t	addrlen;
5729 	int		error;
5730 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
5731 
5732 	/* M_DATA for connected socket */
5733 
5734 	ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp));
5735 	UDP_DBGSTAT(us, udp_data_conn);
5736 
5737 	mutex_enter(&connp->conn_lock);
5738 	if (udp->udp_state != TS_DATA_XFER) {
5739 		mutex_exit(&connp->conn_lock);
5740 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5741 		UDP_STAT(us, udp_out_err_notconn);
5742 		freemsg(mp);
5743 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5744 		    "udp_wput_end: connp %p (%S)", connp,
5745 		    "not-connected; address required");
5746 		return (EDESTADDRREQ);
5747 	}
5748 
5749 	mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst);
5750 	if (mapped_addr)
5751 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
5752 
5753 	/* Initialize addr and addrlen as if they're passed in */
5754 	if (udp->udp_family == AF_INET) {
5755 		sin = (sin_t *)&ss;
5756 		sin->sin_family = AF_INET;
5757 		dstport = sin->sin_port = udp->udp_dstport;
5758 		ASSERT(mapped_addr);
5759 		sin->sin_addr.s_addr = v4dst;
5760 		addr = (struct sockaddr *)sin;
5761 		addrlen = sizeof (*sin);
5762 	} else {
5763 		sin6 = (sin6_t *)&ss;
5764 		sin6->sin6_family = AF_INET6;
5765 		dstport = sin6->sin6_port = udp->udp_dstport;
5766 		sin6->sin6_flowinfo = udp->udp_flowinfo;
5767 		sin6->sin6_addr = udp->udp_v6dst;
5768 		sin6->sin6_scope_id = 0;
5769 		sin6->__sin6_src_id = 0;
5770 		addr = (struct sockaddr *)sin6;
5771 		addrlen = sizeof (*sin6);
5772 	}
5773 	mutex_exit(&connp->conn_lock);
5774 
5775 	if (mapped_addr) {
5776 		/*
5777 		 * Handle both AF_INET and AF_INET6; the latter
5778 		 * for IPV4 mapped destination addresses.  Note
5779 		 * here that both addr and addrlen point to the
5780 		 * corresponding struct depending on the address
5781 		 * family of the socket.
5782 		 */
5783 		mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error,
5784 		    insert_spi, msg, cr, pid);
5785 	} else {
5786 		mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid);
5787 	}
5788 	if (error == 0) {
5789 		ASSERT(mp == NULL);
5790 		return (0);
5791 	}
5792 
5793 	UDP_STAT(us, udp_out_err_output);
5794 	ASSERT(mp != NULL);
5795 	if (IPCL_IS_NONSTR(connp)) {
5796 		freemsg(mp);
5797 		return (error);
5798 	} else {
5799 		/* mp is freed by the following routine */
5800 		udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr,
5801 		    (t_scalar_t)addrlen, (t_scalar_t)error);
5802 		return (0);
5803 	}
5804 }
5805 
5806 /* ARGSUSED */
5807 static int
5808 udp_send_not_connected(conn_t *connp,  mblk_t *mp, struct sockaddr *addr,
5809     socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid)
5810 {
5811 
5812 	udp_t		*udp = connp->conn_udp;
5813 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
5814 	int		error = 0;
5815 	sin6_t		*sin6;
5816 	sin_t		*sin;
5817 	uint_t		srcid;
5818 	uint16_t	port;
5819 	ipaddr_t	v4dst;
5820 
5821 
5822 	ASSERT(addr != NULL);
5823 
5824 	switch (udp->udp_family) {
5825 	case AF_INET6:
5826 		sin6 = (sin6_t *)addr;
5827 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5828 			/*
5829 			 * Destination is a non-IPv4-compatible IPv6 address.
5830 			 * Send out an IPv6 format packet.
5831 			 */
5832 			mp = udp_output_v6(connp, mp, sin6, &error, msg, cr,
5833 			    pid);
5834 			if (error != 0)
5835 				goto ud_error;
5836 
5837 			return (0);
5838 		}
5839 		/*
5840 		 * If the local address is not zero or a mapped address
5841 		 * return an error.  It would be possible to send an IPv4
5842 		 * packet but the response would never make it back to the
5843 		 * application since it is bound to a non-mapped address.
5844 		 */
5845 		if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
5846 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
5847 			error = EADDRNOTAVAIL;
5848 			goto ud_error;
5849 		}
5850 		/* Send IPv4 packet without modifying udp_ipversion */
5851 		/* Extract port and ipaddr */
5852 		port = sin6->sin6_port;
5853 		IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
5854 		srcid = sin6->__sin6_src_id;
5855 		break;
5856 
5857 	case AF_INET:
5858 		sin = (sin_t *)addr;
5859 		/* Extract port and ipaddr */
5860 		port = sin->sin_port;
5861 		v4dst = sin->sin_addr.s_addr;
5862 		srcid = 0;
5863 		break;
5864 	}
5865 
5866 	mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi,
5867 	    msg, cr, pid);
5868 
5869 	if (error == 0) {
5870 		ASSERT(mp == NULL);
5871 		return (0);
5872 	}
5873 
5874 ud_error:
5875 	ASSERT(mp != NULL);
5876 
5877 	return (error);
5878 }
5879 
5880 /*
5881  * This routine handles all messages passed downstream.  It either
5882  * consumes the message or passes it downstream; it never queues a
5883  * a message.
5884  *
5885  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
5886  * is valid when we are directly beneath the stream head, and thus sockfs
5887  * is able to bypass STREAMS and directly call us, passing along the sockaddr
5888  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
5889  * connected endpoints.
5890  */
5891 void
5892 udp_wput(queue_t *q, mblk_t *mp)
5893 {
5894 	conn_t		*connp = Q_TO_CONN(q);
5895 	udp_t		*udp = connp->conn_udp;
5896 	int		error = 0;
5897 	struct sockaddr	*addr;
5898 	socklen_t	addrlen;
5899 	udp_stack_t	*us = udp->udp_us;
5900 
5901 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
5902 	    "udp_wput_start: queue %p mp %p", q, mp);
5903 
5904 	/*
5905 	 * We directly handle several cases here: T_UNITDATA_REQ message
5906 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
5907 	 * socket.
5908 	 */
5909 	switch (DB_TYPE(mp)) {
5910 	case M_DATA:
5911 		/*
5912 		 * Quick check for error cases. Checks will be done again
5913 		 * under the lock later on
5914 		 */
5915 		if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) {
5916 			/* Not connected; address is required */
5917 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
5918 			UDP_STAT(us, udp_out_err_notconn);
5919 			freemsg(mp);
5920 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5921 			    "udp_wput_end: connp %p (%S)", connp,
5922 			    "not-connected; address required");
5923 			return;
5924 		}
5925 		(void) udp_send_connected(connp, mp, NULL, NULL, -1);
5926 		return;
5927 
5928 	case M_PROTO:
5929 	case M_PCPROTO: {
5930 		struct T_unitdata_req *tudr;
5931 
5932 		ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX);
5933 		tudr = (struct T_unitdata_req *)mp->b_rptr;
5934 
5935 		/* Handle valid T_UNITDATA_REQ here */
5936 		if (MBLKL(mp) >= sizeof (*tudr) &&
5937 		    ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) {
5938 			if (mp->b_cont == NULL) {
5939 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5940 				    "udp_wput_end: q %p (%S)", q, "badaddr");
5941 				error = EPROTO;
5942 				goto ud_error;
5943 			}
5944 
5945 			if (!MBLKIN(mp, 0, tudr->DEST_offset +
5946 			    tudr->DEST_length)) {
5947 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5948 				    "udp_wput_end: q %p (%S)", q, "badaddr");
5949 				error = EADDRNOTAVAIL;
5950 				goto ud_error;
5951 			}
5952 			/*
5953 			 * If a port has not been bound to the stream, fail.
5954 			 * This is not a problem when sockfs is directly
5955 			 * above us, because it will ensure that the socket
5956 			 * is first bound before allowing data to be sent.
5957 			 */
5958 			if (udp->udp_state == TS_UNBND) {
5959 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
5960 				    "udp_wput_end: q %p (%S)", q, "outstate");
5961 				error = EPROTO;
5962 				goto ud_error;
5963 			}
5964 			addr = (struct sockaddr *)
5965 			    &mp->b_rptr[tudr->DEST_offset];
5966 			addrlen = tudr->DEST_length;
5967 			if (tudr->OPT_length != 0)
5968 				UDP_STAT(us, udp_out_opt);
5969 			break;
5970 		}
5971 		/* FALLTHRU */
5972 	}
5973 	default:
5974 		udp_wput_other(q, mp);
5975 		return;
5976 	}
5977 	ASSERT(addr != NULL);
5978 
5979 	error = udp_send_not_connected(connp,  mp, addr, addrlen, NULL, NULL,
5980 	    -1);
5981 	if (error != 0) {
5982 ud_error:
5983 		UDP_STAT(us, udp_out_err_output);
5984 		ASSERT(mp != NULL);
5985 		/* mp is freed by the following routine */
5986 		udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
5987 		    (t_scalar_t)error);
5988 	}
5989 }
5990 
5991 /* ARGSUSED */
5992 static void
5993 udp_wput_fallback(queue_t *wq, mblk_t *mp)
5994 {
5995 #ifdef DEBUG
5996 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
5997 #endif
5998 	freemsg(mp);
5999 }
6000 
6001 
6002 /*
6003  * udp_output_v6():
6004  * Assumes that udp_wput did some sanity checking on the destination
6005  * address.
6006  */
6007 static mblk_t *
6008 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error,
6009     struct nmsghdr *msg, cred_t *cr, pid_t pid)
6010 {
6011 	ip6_t		*ip6h;
6012 	ip6i_t		*ip6i;	/* mp1->b_rptr even if no ip6i_t */
6013 	mblk_t		*mp1 = mp;
6014 	mblk_t		*mp2;
6015 	int		udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
6016 	size_t		ip_len;
6017 	udpha_t		*udph;
6018 	udp_t		*udp = connp->conn_udp;
6019 	udp_stack_t	*us = udp->udp_us;
6020 	queue_t		*q = connp->conn_wq;
6021 	ip6_pkt_t	ipp_s;	/* For ancillary data options */
6022 	ip6_pkt_t	*ipp = &ipp_s;
6023 	ip6_pkt_t	*tipp;	/* temporary ipp */
6024 	uint32_t	csum = 0;
6025 	uint_t		ignore = 0;
6026 	uint_t		option_exists = 0, is_sticky = 0;
6027 	uint8_t		*cp;
6028 	uint8_t		*nxthdr_ptr;
6029 	in6_addr_t	ip6_dst;
6030 	in_port_t	port;
6031 	udpattrs_t	attrs;
6032 	boolean_t	opt_present;
6033 	ip6_hbh_t	*hopoptsptr = NULL;
6034 	uint_t		hopoptslen = 0;
6035 	boolean_t	is_ancillary = B_FALSE;
6036 	size_t		sth_wroff = 0;
6037 	ire_t		*ire;
6038 	boolean_t	update_lastdst = B_FALSE;
6039 
6040 	*error = 0;
6041 
6042 	/*
6043 	 * If the local address is a mapped address return
6044 	 * an error.
6045 	 * It would be possible to send an IPv6 packet but the
6046 	 * response would never make it back to the application
6047 	 * since it is bound to a mapped address.
6048 	 */
6049 	if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) {
6050 		*error = EADDRNOTAVAIL;
6051 		goto done;
6052 	}
6053 
6054 	ipp->ipp_fields = 0;
6055 	ipp->ipp_sticky_ignored = 0;
6056 
6057 	/*
6058 	 * If TPI options passed in, feed it for verification and handling
6059 	 */
6060 	attrs.udpattr_credset = B_FALSE;
6061 	opt_present = B_FALSE;
6062 	if (IPCL_IS_NONSTR(connp)) {
6063 		if (msg->msg_controllen != 0) {
6064 			attrs.udpattr_ipp6 = ipp;
6065 			attrs.udpattr_mb = mp;
6066 
6067 			rw_enter(&udp->udp_rwlock, RW_WRITER);
6068 			*error = process_auxiliary_options(connp,
6069 			    msg->msg_control, msg->msg_controllen,
6070 			    &attrs, &udp_opt_obj, udp_opt_set);
6071 			rw_exit(&udp->udp_rwlock);
6072 			if (*error)
6073 				goto done;
6074 			ASSERT(*error == 0);
6075 			opt_present = B_TRUE;
6076 		}
6077 	} else {
6078 		if (DB_TYPE(mp) != M_DATA) {
6079 			mp1 = mp->b_cont;
6080 			if (((struct T_unitdata_req *)
6081 			    mp->b_rptr)->OPT_length != 0) {
6082 				attrs.udpattr_ipp6 = ipp;
6083 				attrs.udpattr_mb = mp;
6084 				if (udp_unitdata_opt_process(q, mp, error,
6085 				    &attrs) < 0) {
6086 					goto done;
6087 				}
6088 				ASSERT(*error == 0);
6089 				opt_present = B_TRUE;
6090 			}
6091 		}
6092 	}
6093 
6094 	/*
6095 	 * Determine whether we need to mark the mblk with the user's
6096 	 * credentials.
6097 	 */
6098 	ire = connp->conn_ire_cache;
6099 	if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
6100 	    (ire == NULL) ||
6101 	    (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) ||
6102 	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) {
6103 		if (cr != NULL && DB_CRED(mp) == NULL)
6104 			msg_setcredpid(mp, cr, pid);
6105 	}
6106 
6107 	rw_enter(&udp->udp_rwlock, RW_READER);
6108 	ignore = ipp->ipp_sticky_ignored;
6109 
6110 	/* mp1 points to the M_DATA mblk carrying the packet */
6111 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
6112 
6113 	if (sin6->sin6_scope_id != 0 &&
6114 	    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
6115 		/*
6116 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
6117 		 * option nor ancillary data.  It needs to be
6118 		 * explicitly set in options_exists.
6119 		 */
6120 		option_exists |= IPPF_SCOPE_ID;
6121 	}
6122 
6123 	/*
6124 	 * Compute the destination address
6125 	 */
6126 	ip6_dst = sin6->sin6_addr;
6127 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6128 		ip6_dst = ipv6_loopback;
6129 
6130 	port = sin6->sin6_port;
6131 
6132 	/*
6133 	 * Cluster and TSOL notes, Cluster check:
6134 	 * see comments in udp_output_v4().
6135 	 */
6136 	mutex_enter(&connp->conn_lock);
6137 
6138 	if (cl_inet_connect2 != NULL &&
6139 	    (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) ||
6140 	    port != udp->udp_lastdstport)) {
6141 		mutex_exit(&connp->conn_lock);
6142 		*error = 0;
6143 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error);
6144 		if (*error != 0) {
6145 			*error = EHOSTUNREACH;
6146 			rw_exit(&udp->udp_rwlock);
6147 			goto done;
6148 		}
6149 		update_lastdst = B_TRUE;
6150 		mutex_enter(&connp->conn_lock);
6151 	}
6152 
6153 	/*
6154 	 * If we're not going to the same destination as last time, then
6155 	 * recompute the label required.  This is done in a separate routine to
6156 	 * avoid blowing up our stack here.
6157 	 *
6158 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
6159 	 * to different destination may require different labels,
6160 	 * or worse, UDP packets to same IP address may require
6161 	 * different labels due to use of shared all-zones address.
6162 	 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts,
6163 	 * and sticky ipp_hopoptslen are consistent for the current
6164 	 * destination and are updated atomically.
6165 	 */
6166 	if (is_system_labeled()) {
6167 		/* Using UDP MLP requires SCM_UCRED from user */
6168 		if (connp->conn_mlp_type != mlptSingle &&
6169 		    !attrs.udpattr_credset) {
6170 			DTRACE_PROBE4(
6171 			    tx__ip__log__info__output__udp6,
6172 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
6173 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
6174 			*error = ECONNREFUSED;
6175 			rw_exit(&udp->udp_rwlock);
6176 			mutex_exit(&connp->conn_lock);
6177 			goto done;
6178 		}
6179 		/*
6180 		 * update label option for this UDP socket if
6181 		 * - the destination has changed, or
6182 		 * - the UDP socket is MLP
6183 		 */
6184 		if ((opt_present ||
6185 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) ||
6186 		    connp->conn_mlp_type != mlptSingle) &&
6187 		    (*error = udp_update_label_v6(q, mp, &ip6_dst,
6188 		    &update_lastdst)) != 0) {
6189 			rw_exit(&udp->udp_rwlock);
6190 			mutex_exit(&connp->conn_lock);
6191 			goto done;
6192 		}
6193 	}
6194 
6195 	if (update_lastdst) {
6196 		udp->udp_v6lastdst = ip6_dst;
6197 		udp->udp_lastdstport = port;
6198 	}
6199 
6200 	/*
6201 	 * If there's a security label here, then we ignore any options the
6202 	 * user may try to set.  We keep the peer's label as a hidden sticky
6203 	 * option. We make a private copy of this label before releasing the
6204 	 * lock so that label is kept consistent with the destination addr.
6205 	 */
6206 	if (udp->udp_label_len_v6 > 0) {
6207 		ignore &= ~IPPF_HOPOPTS;
6208 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
6209 	}
6210 
6211 	if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) {
6212 		/* No sticky options nor ancillary data. */
6213 		mutex_exit(&connp->conn_lock);
6214 		goto no_options;
6215 	}
6216 
6217 	/*
6218 	 * Go through the options figuring out where each is going to
6219 	 * come from and build two masks.  The first mask indicates if
6220 	 * the option exists at all.  The second mask indicates if the
6221 	 * option is sticky or ancillary.
6222 	 */
6223 	if (!(ignore & IPPF_HOPOPTS)) {
6224 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
6225 			option_exists |= IPPF_HOPOPTS;
6226 			udp_ip_hdr_len += ipp->ipp_hopoptslen;
6227 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
6228 			option_exists |= IPPF_HOPOPTS;
6229 			is_sticky |= IPPF_HOPOPTS;
6230 			ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0);
6231 			hopoptsptr = kmem_alloc(
6232 			    udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP);
6233 			if (hopoptsptr == NULL) {
6234 				*error = ENOMEM;
6235 				mutex_exit(&connp->conn_lock);
6236 				goto done;
6237 			}
6238 			hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen;
6239 			bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr,
6240 			    hopoptslen);
6241 			udp_ip_hdr_len += hopoptslen;
6242 		}
6243 	}
6244 	mutex_exit(&connp->conn_lock);
6245 
6246 	if (!(ignore & IPPF_RTHDR)) {
6247 		if (ipp->ipp_fields & IPPF_RTHDR) {
6248 			option_exists |= IPPF_RTHDR;
6249 			udp_ip_hdr_len += ipp->ipp_rthdrlen;
6250 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
6251 			option_exists |= IPPF_RTHDR;
6252 			is_sticky |= IPPF_RTHDR;
6253 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen;
6254 		}
6255 	}
6256 
6257 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
6258 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
6259 			option_exists |= IPPF_RTDSTOPTS;
6260 			udp_ip_hdr_len += ipp->ipp_rtdstoptslen;
6261 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
6262 			option_exists |= IPPF_RTDSTOPTS;
6263 			is_sticky |= IPPF_RTDSTOPTS;
6264 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen;
6265 		}
6266 	}
6267 
6268 	if (!(ignore & IPPF_DSTOPTS)) {
6269 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
6270 			option_exists |= IPPF_DSTOPTS;
6271 			udp_ip_hdr_len += ipp->ipp_dstoptslen;
6272 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
6273 			option_exists |= IPPF_DSTOPTS;
6274 			is_sticky |= IPPF_DSTOPTS;
6275 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen;
6276 		}
6277 	}
6278 
6279 	if (!(ignore & IPPF_IFINDEX)) {
6280 		if (ipp->ipp_fields & IPPF_IFINDEX) {
6281 			option_exists |= IPPF_IFINDEX;
6282 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
6283 			option_exists |= IPPF_IFINDEX;
6284 			is_sticky |= IPPF_IFINDEX;
6285 		}
6286 	}
6287 
6288 	if (!(ignore & IPPF_ADDR)) {
6289 		if (ipp->ipp_fields & IPPF_ADDR) {
6290 			option_exists |= IPPF_ADDR;
6291 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) {
6292 			option_exists |= IPPF_ADDR;
6293 			is_sticky |= IPPF_ADDR;
6294 		}
6295 	}
6296 
6297 	if (!(ignore & IPPF_DONTFRAG)) {
6298 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
6299 			option_exists |= IPPF_DONTFRAG;
6300 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
6301 			option_exists |= IPPF_DONTFRAG;
6302 			is_sticky |= IPPF_DONTFRAG;
6303 		}
6304 	}
6305 
6306 	if (!(ignore & IPPF_USE_MIN_MTU)) {
6307 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
6308 			option_exists |= IPPF_USE_MIN_MTU;
6309 		} else if (udp->udp_sticky_ipp.ipp_fields &
6310 		    IPPF_USE_MIN_MTU) {
6311 			option_exists |= IPPF_USE_MIN_MTU;
6312 			is_sticky |= IPPF_USE_MIN_MTU;
6313 		}
6314 	}
6315 
6316 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
6317 		option_exists |= IPPF_HOPLIMIT;
6318 	/* IPV6_HOPLIMIT can never be sticky */
6319 	ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
6320 
6321 	if (!(ignore & IPPF_UNICAST_HOPS) &&
6322 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
6323 		option_exists |= IPPF_UNICAST_HOPS;
6324 		is_sticky |= IPPF_UNICAST_HOPS;
6325 	}
6326 
6327 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
6328 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
6329 		option_exists |= IPPF_MULTICAST_HOPS;
6330 		is_sticky |= IPPF_MULTICAST_HOPS;
6331 	}
6332 
6333 	if (!(ignore & IPPF_TCLASS)) {
6334 		if (ipp->ipp_fields & IPPF_TCLASS) {
6335 			option_exists |= IPPF_TCLASS;
6336 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
6337 			option_exists |= IPPF_TCLASS;
6338 			is_sticky |= IPPF_TCLASS;
6339 		}
6340 	}
6341 
6342 	if (!(ignore & IPPF_NEXTHOP) &&
6343 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) {
6344 		option_exists |= IPPF_NEXTHOP;
6345 		is_sticky |= IPPF_NEXTHOP;
6346 	}
6347 
6348 no_options:
6349 
6350 	/*
6351 	 * If any options carried in the ip6i_t were specified, we
6352 	 * need to account for the ip6i_t in the data we'll be sending
6353 	 * down.
6354 	 */
6355 	if (option_exists & IPPF_HAS_IP6I)
6356 		udp_ip_hdr_len += sizeof (ip6i_t);
6357 
6358 	/* check/fix buffer config, setup pointers into it */
6359 	ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len];
6360 	if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) ||
6361 	    !OK_32PTR(ip6h)) {
6362 
6363 		/* Try to get everything in a single mblk next time */
6364 		if (udp_ip_hdr_len > udp->udp_max_hdr_len) {
6365 			udp->udp_max_hdr_len = udp_ip_hdr_len;
6366 			sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
6367 		}
6368 
6369 		mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO);
6370 		if (mp2 == NULL) {
6371 			*error = ENOMEM;
6372 			rw_exit(&udp->udp_rwlock);
6373 			goto done;
6374 		}
6375 		mp2->b_wptr = DB_LIM(mp2);
6376 		mp2->b_cont = mp1;
6377 		mp1 = mp2;
6378 		if (DB_TYPE(mp) != M_DATA)
6379 			mp->b_cont = mp1;
6380 		else
6381 			mp = mp1;
6382 
6383 		ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len);
6384 	}
6385 	mp1->b_rptr = (unsigned char *)ip6h;
6386 	ip6i = (ip6i_t *)ip6h;
6387 
6388 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp)
6389 	if (option_exists & IPPF_HAS_IP6I) {
6390 		ip6h = (ip6_t *)&ip6i[1];
6391 		ip6i->ip6i_flags = 0;
6392 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
6393 
6394 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
6395 		if (option_exists & IPPF_SCOPE_ID) {
6396 			ip6i->ip6i_flags |= IP6I_IFINDEX;
6397 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
6398 		} else if (option_exists & IPPF_IFINDEX) {
6399 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
6400 			ASSERT(tipp->ipp_ifindex != 0);
6401 			ip6i->ip6i_flags |= IP6I_IFINDEX;
6402 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
6403 		}
6404 
6405 		if (option_exists & IPPF_ADDR) {
6406 			/*
6407 			 * Enable per-packet source address verification if
6408 			 * IPV6_PKTINFO specified the source address.
6409 			 * ip6_src is set in the transport's _wput function.
6410 			 */
6411 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
6412 		}
6413 
6414 		if (option_exists & IPPF_DONTFRAG) {
6415 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
6416 		}
6417 
6418 		if (option_exists & IPPF_USE_MIN_MTU) {
6419 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
6420 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
6421 		}
6422 
6423 		if (option_exists & IPPF_NEXTHOP) {
6424 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
6425 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
6426 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
6427 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
6428 		}
6429 
6430 		/*
6431 		 * tell IP this is an ip6i_t private header
6432 		 */
6433 		ip6i->ip6i_nxt = IPPROTO_RAW;
6434 	}
6435 
6436 	/* Initialize IPv6 header */
6437 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
6438 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
6439 
6440 	/* Set the hoplimit of the outgoing packet. */
6441 	if (option_exists & IPPF_HOPLIMIT) {
6442 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
6443 		ip6h->ip6_hops = ipp->ipp_hoplimit;
6444 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6445 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
6446 		ip6h->ip6_hops = udp->udp_multicast_ttl;
6447 		if (option_exists & IPPF_MULTICAST_HOPS)
6448 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6449 	} else {
6450 		ip6h->ip6_hops = udp->udp_ttl;
6451 		if (option_exists & IPPF_UNICAST_HOPS)
6452 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
6453 	}
6454 
6455 	if (option_exists & IPPF_ADDR) {
6456 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
6457 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
6458 		ip6h->ip6_src = tipp->ipp_addr;
6459 	} else {
6460 		/*
6461 		 * The source address was not set using IPV6_PKTINFO.
6462 		 * First look at the bound source.
6463 		 * If unspecified fallback to __sin6_src_id.
6464 		 */
6465 		ip6h->ip6_src = udp->udp_v6src;
6466 		if (sin6->__sin6_src_id != 0 &&
6467 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
6468 			ip_srcid_find_id(sin6->__sin6_src_id,
6469 			    &ip6h->ip6_src, connp->conn_zoneid,
6470 			    us->us_netstack);
6471 		}
6472 	}
6473 
6474 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
6475 	cp = (uint8_t *)&ip6h[1];
6476 
6477 	/*
6478 	 * Here's where we have to start stringing together
6479 	 * any extension headers in the right order:
6480 	 * Hop-by-hop, destination, routing, and final destination opts.
6481 	 */
6482 	if (option_exists & IPPF_HOPOPTS) {
6483 		/* Hop-by-hop options */
6484 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
6485 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
6486 		if (hopoptslen == 0) {
6487 			hopoptsptr = tipp->ipp_hopopts;
6488 			hopoptslen = tipp->ipp_hopoptslen;
6489 			is_ancillary = B_TRUE;
6490 		}
6491 
6492 		*nxthdr_ptr = IPPROTO_HOPOPTS;
6493 		nxthdr_ptr = &hbh->ip6h_nxt;
6494 
6495 		bcopy(hopoptsptr, cp, hopoptslen);
6496 		cp += hopoptslen;
6497 
6498 		if (hopoptsptr != NULL && !is_ancillary) {
6499 			kmem_free(hopoptsptr, hopoptslen);
6500 			hopoptsptr = NULL;
6501 			hopoptslen = 0;
6502 		}
6503 	}
6504 	/*
6505 	 * En-route destination options
6506 	 * Only do them if there's a routing header as well
6507 	 */
6508 	if (option_exists & IPPF_RTDSTOPTS) {
6509 		ip6_dest_t *dst = (ip6_dest_t *)cp;
6510 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
6511 
6512 		*nxthdr_ptr = IPPROTO_DSTOPTS;
6513 		nxthdr_ptr = &dst->ip6d_nxt;
6514 
6515 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
6516 		cp += tipp->ipp_rtdstoptslen;
6517 	}
6518 	/*
6519 	 * Routing header next
6520 	 */
6521 	if (option_exists & IPPF_RTHDR) {
6522 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
6523 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
6524 
6525 		*nxthdr_ptr = IPPROTO_ROUTING;
6526 		nxthdr_ptr = &rt->ip6r_nxt;
6527 
6528 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
6529 		cp += tipp->ipp_rthdrlen;
6530 	}
6531 	/*
6532 	 * Do ultimate destination options
6533 	 */
6534 	if (option_exists & IPPF_DSTOPTS) {
6535 		ip6_dest_t *dest = (ip6_dest_t *)cp;
6536 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
6537 
6538 		*nxthdr_ptr = IPPROTO_DSTOPTS;
6539 		nxthdr_ptr = &dest->ip6d_nxt;
6540 
6541 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
6542 		cp += tipp->ipp_dstoptslen;
6543 	}
6544 	/*
6545 	 * Now set the last header pointer to the proto passed in
6546 	 */
6547 	ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE));
6548 	*nxthdr_ptr = IPPROTO_UDP;
6549 
6550 	/* Update UDP header */
6551 	udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE);
6552 	udph->uha_dst_port = sin6->sin6_port;
6553 	udph->uha_src_port = udp->udp_port;
6554 
6555 	/*
6556 	 * Copy in the destination address
6557 	 */
6558 	ip6h->ip6_dst = ip6_dst;
6559 
6560 	ip6h->ip6_vcf =
6561 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
6562 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
6563 
6564 	if (option_exists & IPPF_TCLASS) {
6565 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
6566 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
6567 		    tipp->ipp_tclass);
6568 	}
6569 	rw_exit(&udp->udp_rwlock);
6570 
6571 	if (option_exists & IPPF_RTHDR) {
6572 		ip6_rthdr_t	*rth;
6573 
6574 		/*
6575 		 * Perform any processing needed for source routing.
6576 		 * We know that all extension headers will be in the same mblk
6577 		 * as the IPv6 header.
6578 		 */
6579 		rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
6580 		if (rth != NULL && rth->ip6r_segleft != 0) {
6581 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
6582 				/*
6583 				 * Drop packet - only support Type 0 routing.
6584 				 * Notify the application as well.
6585 				 */
6586 				*error = EPROTO;
6587 				goto done;
6588 			}
6589 
6590 			/*
6591 			 * rth->ip6r_len is twice the number of
6592 			 * addresses in the header. Thus it must be even.
6593 			 */
6594 			if (rth->ip6r_len & 0x1) {
6595 				*error = EPROTO;
6596 				goto done;
6597 			}
6598 			/*
6599 			 * Shuffle the routing header and ip6_dst
6600 			 * addresses, and get the checksum difference
6601 			 * between the first hop (in ip6_dst) and
6602 			 * the destination (in the last routing hdr entry).
6603 			 */
6604 			csum = ip_massage_options_v6(ip6h, rth,
6605 			    us->us_netstack);
6606 			/*
6607 			 * Verify that the first hop isn't a mapped address.
6608 			 * Routers along the path need to do this verification
6609 			 * for subsequent hops.
6610 			 */
6611 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
6612 				*error = EADDRNOTAVAIL;
6613 				goto done;
6614 			}
6615 
6616 			cp += (rth->ip6r_len + 1)*8;
6617 		}
6618 	}
6619 
6620 	/* count up length of UDP packet */
6621 	ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN;
6622 	if ((mp2 = mp1->b_cont) != NULL) {
6623 		do {
6624 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
6625 			ip_len += (uint32_t)MBLKL(mp2);
6626 		} while ((mp2 = mp2->b_cont) != NULL);
6627 	}
6628 
6629 	/*
6630 	 * If the size of the packet is greater than the maximum allowed by
6631 	 * ip, return an error. Passing this down could cause panics because
6632 	 * the size will have wrapped and be inconsistent with the msg size.
6633 	 */
6634 	if (ip_len > IP_MAXPACKET) {
6635 		*error = EMSGSIZE;
6636 		goto done;
6637 	}
6638 
6639 	/* Store the UDP length. Subtract length of extension hdrs */
6640 	udph->uha_length = htons(ip_len + IPV6_HDR_LEN -
6641 	    (int)((uchar_t *)udph - (uchar_t *)ip6h));
6642 
6643 	/*
6644 	 * We make it easy for IP to include our pseudo header
6645 	 * by putting our length in uh_checksum, modified (if
6646 	 * we have a routing header) by the checksum difference
6647 	 * between the ultimate destination and first hop addresses.
6648 	 * Note: UDP over IPv6 must always checksum the packet.
6649 	 */
6650 	csum += udph->uha_length;
6651 	csum = (csum & 0xFFFF) + (csum >> 16);
6652 	udph->uha_checksum = (uint16_t)csum;
6653 
6654 #ifdef _LITTLE_ENDIAN
6655 	ip_len = htons(ip_len);
6656 #endif
6657 	ip6h->ip6_plen = ip_len;
6658 	if (DB_CRED(mp) != NULL)
6659 		mblk_setcred(mp1, DB_CRED(mp));
6660 
6661 	if (DB_TYPE(mp) != M_DATA) {
6662 		ASSERT(mp != mp1);
6663 		freeb(mp);
6664 	}
6665 
6666 	/* mp has been consumed and we'll return success */
6667 	ASSERT(*error == 0);
6668 	mp = NULL;
6669 
6670 	/* We're done. Pass the packet to IP */
6671 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
6672 	ip_output_v6(connp, mp1, q, IP_WPUT);
6673 
6674 done:
6675 	if (sth_wroff != 0) {
6676 		(void) proto_set_tx_wroff(RD(q), connp,
6677 		    udp->udp_max_hdr_len + us->us_wroff_extra);
6678 	}
6679 	if (hopoptsptr != NULL && !is_ancillary) {
6680 		kmem_free(hopoptsptr, hopoptslen);
6681 		hopoptsptr = NULL;
6682 	}
6683 	if (*error != 0) {
6684 		ASSERT(mp != NULL);
6685 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
6686 	}
6687 	return (mp);
6688 }
6689 
6690 
6691 static int
6692 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
6693 {
6694 	sin_t *sin = (sin_t *)sa;
6695 	sin6_t *sin6 = (sin6_t *)sa;
6696 
6697 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
6698 
6699 	if (udp->udp_state != TS_DATA_XFER)
6700 		return (ENOTCONN);
6701 
6702 	switch (udp->udp_family) {
6703 	case AF_INET:
6704 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
6705 
6706 		if (*salenp < sizeof (sin_t))
6707 			return (EINVAL);
6708 
6709 		*salenp = sizeof (sin_t);
6710 		*sin = sin_null;
6711 		sin->sin_family = AF_INET;
6712 		sin->sin_port = udp->udp_dstport;
6713 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
6714 		break;
6715 
6716 	case AF_INET6:
6717 		if (*salenp < sizeof (sin6_t))
6718 			return (EINVAL);
6719 
6720 		*salenp = sizeof (sin6_t);
6721 		*sin6 = sin6_null;
6722 		sin6->sin6_family = AF_INET6;
6723 		sin6->sin6_port = udp->udp_dstport;
6724 		sin6->sin6_addr = udp->udp_v6dst;
6725 		sin6->sin6_flowinfo = udp->udp_flowinfo;
6726 		break;
6727 	}
6728 
6729 	return (0);
6730 }
6731 
6732 static int
6733 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
6734 {
6735 	sin_t *sin = (sin_t *)sa;
6736 	sin6_t *sin6 = (sin6_t *)sa;
6737 
6738 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
6739 
6740 	switch (udp->udp_family) {
6741 	case AF_INET:
6742 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
6743 
6744 		if (*salenp < sizeof (sin_t))
6745 			return (EINVAL);
6746 
6747 		*salenp = sizeof (sin_t);
6748 		*sin = sin_null;
6749 		sin->sin_family = AF_INET;
6750 		sin->sin_port = udp->udp_port;
6751 
6752 		/*
6753 		 * If udp_v6src is unspecified, we might be bound to broadcast
6754 		 * / multicast.  Use udp_bound_v6src as local address instead
6755 		 * (that could also still be unspecified).
6756 		 */
6757 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
6758 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
6759 			sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
6760 		} else {
6761 			sin->sin_addr.s_addr =
6762 			    V4_PART_OF_V6(udp->udp_bound_v6src);
6763 		}
6764 		break;
6765 
6766 	case AF_INET6:
6767 		if (*salenp < sizeof (sin6_t))
6768 			return (EINVAL);
6769 
6770 		*salenp = sizeof (sin6_t);
6771 		*sin6 = sin6_null;
6772 		sin6->sin6_family = AF_INET6;
6773 		sin6->sin6_port = udp->udp_port;
6774 		sin6->sin6_flowinfo = udp->udp_flowinfo;
6775 
6776 		/*
6777 		 * If udp_v6src is unspecified, we might be bound to broadcast
6778 		 * / multicast.  Use udp_bound_v6src as local address instead
6779 		 * (that could also still be unspecified).
6780 		 */
6781 		if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))
6782 			sin6->sin6_addr = udp->udp_v6src;
6783 		else
6784 			sin6->sin6_addr = udp->udp_bound_v6src;
6785 		break;
6786 	}
6787 
6788 	return (0);
6789 }
6790 
6791 /*
6792  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
6793  */
6794 static void
6795 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
6796 {
6797 	void	*data;
6798 	mblk_t	*datamp = mp->b_cont;
6799 	udp_t	*udp = Q_TO_UDP(q);
6800 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
6801 
6802 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
6803 		cmdp->cb_error = EPROTO;
6804 		qreply(q, mp);
6805 		return;
6806 	}
6807 	data = datamp->b_rptr;
6808 
6809 	rw_enter(&udp->udp_rwlock, RW_READER);
6810 	switch (cmdp->cb_cmd) {
6811 	case TI_GETPEERNAME:
6812 		cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len);
6813 		break;
6814 	case TI_GETMYNAME:
6815 		cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len);
6816 		break;
6817 	default:
6818 		cmdp->cb_error = EINVAL;
6819 		break;
6820 	}
6821 	rw_exit(&udp->udp_rwlock);
6822 
6823 	qreply(q, mp);
6824 }
6825 
6826 static void
6827 udp_disable_direct_sockfs(udp_t *udp)
6828 {
6829 	udp->udp_issocket = B_FALSE;
6830 	if (udp->udp_direct_sockfs) {
6831 		/*
6832 		 * Disable read-side synchronous stream interface and
6833 		 * drain any queued data.
6834 		 */
6835 		udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE);
6836 		ASSERT(!udp->udp_direct_sockfs);
6837 		UDP_STAT(udp->udp_us, udp_sock_fallback);
6838 	}
6839 }
6840 
6841 static void
6842 udp_wput_other(queue_t *q, mblk_t *mp)
6843 {
6844 	uchar_t	*rptr = mp->b_rptr;
6845 	struct datab *db;
6846 	struct iocblk *iocp;
6847 	cred_t	*cr;
6848 	conn_t	*connp = Q_TO_CONN(q);
6849 	udp_t	*udp = connp->conn_udp;
6850 	udp_stack_t *us;
6851 
6852 	TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START,
6853 	    "udp_wput_other_start: q %p", q);
6854 
6855 	us = udp->udp_us;
6856 	db = mp->b_datap;
6857 
6858 	cr = DB_CREDDEF(mp, connp->conn_cred);
6859 
6860 	switch (db->db_type) {
6861 	case M_CMD:
6862 		udp_wput_cmdblk(q, mp);
6863 		return;
6864 
6865 	case M_PROTO:
6866 	case M_PCPROTO:
6867 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
6868 			freemsg(mp);
6869 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6870 			    "udp_wput_other_end: q %p (%S)", q, "protoshort");
6871 			return;
6872 		}
6873 		switch (((t_primp_t)rptr)->type) {
6874 		case T_ADDR_REQ:
6875 			udp_addr_req(q, mp);
6876 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6877 			    "udp_wput_other_end: q %p (%S)", q, "addrreq");
6878 			return;
6879 		case O_T_BIND_REQ:
6880 		case T_BIND_REQ:
6881 			udp_tpi_bind(q, mp);
6882 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6883 			    "udp_wput_other_end: q %p (%S)", q, "bindreq");
6884 			return;
6885 		case T_CONN_REQ:
6886 			udp_tpi_connect(q, mp);
6887 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6888 			    "udp_wput_other_end: q %p (%S)", q, "connreq");
6889 			return;
6890 		case T_CAPABILITY_REQ:
6891 			udp_capability_req(q, mp);
6892 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6893 			    "udp_wput_other_end: q %p (%S)", q, "capabreq");
6894 			return;
6895 		case T_INFO_REQ:
6896 			udp_info_req(q, mp);
6897 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6898 			    "udp_wput_other_end: q %p (%S)", q, "inforeq");
6899 			return;
6900 		case T_UNITDATA_REQ:
6901 			/*
6902 			 * If a T_UNITDATA_REQ gets here, the address must
6903 			 * be bad.  Valid T_UNITDATA_REQs are handled
6904 			 * in udp_wput.
6905 			 */
6906 			udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL);
6907 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6908 			    "udp_wput_other_end: q %p (%S)", q, "unitdatareq");
6909 			return;
6910 		case T_UNBIND_REQ:
6911 			udp_tpi_unbind(q, mp);
6912 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6913 			    "udp_wput_other_end: q %p (%S)", q, "unbindreq");
6914 			return;
6915 		case T_SVR4_OPTMGMT_REQ:
6916 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
6917 			    cr)) {
6918 				(void) svr4_optcom_req(q,
6919 				    mp, cr, &udp_opt_obj, B_TRUE);
6920 			}
6921 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6922 			    "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
6923 			return;
6924 
6925 		case T_OPTMGMT_REQ:
6926 			(void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE);
6927 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6928 			    "udp_wput_other_end: q %p (%S)", q, "optmgmtreq");
6929 			return;
6930 
6931 		case T_DISCON_REQ:
6932 			udp_tpi_disconnect(q, mp);
6933 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6934 			    "udp_wput_other_end: q %p (%S)", q, "disconreq");
6935 			return;
6936 
6937 		/* The following TPI message is not supported by udp. */
6938 		case O_T_CONN_RES:
6939 		case T_CONN_RES:
6940 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
6941 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6942 			    "udp_wput_other_end: q %p (%S)", q,
6943 			    "connres/disconreq");
6944 			return;
6945 
6946 		/* The following 3 TPI messages are illegal for udp. */
6947 		case T_DATA_REQ:
6948 		case T_EXDATA_REQ:
6949 		case T_ORDREL_REQ:
6950 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
6951 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6952 			    "udp_wput_other_end: q %p (%S)", q,
6953 			    "data/exdata/ordrel");
6954 			return;
6955 		default:
6956 			break;
6957 		}
6958 		break;
6959 	case M_FLUSH:
6960 		if (*rptr & FLUSHW)
6961 			flushq(q, FLUSHDATA);
6962 		break;
6963 	case M_IOCTL:
6964 		iocp = (struct iocblk *)mp->b_rptr;
6965 		switch (iocp->ioc_cmd) {
6966 		case TI_GETPEERNAME:
6967 			if (udp->udp_state != TS_DATA_XFER) {
6968 				/*
6969 				 * If a default destination address has not
6970 				 * been associated with the stream, then we
6971 				 * don't know the peer's name.
6972 				 */
6973 				iocp->ioc_error = ENOTCONN;
6974 				iocp->ioc_count = 0;
6975 				mp->b_datap->db_type = M_IOCACK;
6976 				qreply(q, mp);
6977 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6978 				    "udp_wput_other_end: q %p (%S)", q,
6979 				    "getpeername");
6980 				return;
6981 			}
6982 			/* FALLTHRU */
6983 		case TI_GETMYNAME: {
6984 			/*
6985 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
6986 			 * need to copyin the user's strbuf structure.
6987 			 * Processing will continue in the M_IOCDATA case
6988 			 * below.
6989 			 */
6990 			mi_copyin(q, mp, NULL,
6991 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
6992 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
6993 			    "udp_wput_other_end: q %p (%S)", q, "getmyname");
6994 			return;
6995 			}
6996 		case ND_SET:
6997 			/* nd_getset performs the necessary checking */
6998 		case ND_GET:
6999 			if (nd_getset(q, us->us_nd, mp)) {
7000 				qreply(q, mp);
7001 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7002 				    "udp_wput_other_end: q %p (%S)", q, "get");
7003 				return;
7004 			}
7005 			break;
7006 		case _SIOCSOCKFALLBACK:
7007 			/*
7008 			 * Either sockmod is about to be popped and the
7009 			 * socket would now be treated as a plain stream,
7010 			 * or a module is about to be pushed so we could
7011 			 * no longer use read-side synchronous stream.
7012 			 * Drain any queued data and disable direct sockfs
7013 			 * interface from now on.
7014 			 */
7015 			if (!udp->udp_issocket) {
7016 				DB_TYPE(mp) = M_IOCNAK;
7017 				iocp->ioc_error = EINVAL;
7018 			} else {
7019 				udp_disable_direct_sockfs(udp);
7020 
7021 				DB_TYPE(mp) = M_IOCACK;
7022 				iocp->ioc_error = 0;
7023 			}
7024 			iocp->ioc_count = 0;
7025 			iocp->ioc_rval = 0;
7026 			qreply(q, mp);
7027 			return;
7028 		default:
7029 			break;
7030 		}
7031 		break;
7032 	case M_IOCDATA:
7033 		udp_wput_iocdata(q, mp);
7034 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7035 		    "udp_wput_other_end: q %p (%S)", q, "iocdata");
7036 		return;
7037 	default:
7038 		/* Unrecognized messages are passed through without change. */
7039 		break;
7040 	}
7041 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7042 	    "udp_wput_other_end: q %p (%S)", q, "end");
7043 	ip_output(connp, mp, q, IP_WPUT);
7044 }
7045 
7046 /*
7047  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
7048  * messages.
7049  */
7050 static void
7051 udp_wput_iocdata(queue_t *q, mblk_t *mp)
7052 {
7053 	mblk_t		*mp1;
7054 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
7055 	STRUCT_HANDLE(strbuf, sb);
7056 	udp_t		*udp = Q_TO_UDP(q);
7057 	int		error;
7058 	uint_t		addrlen;
7059 
7060 	/* Make sure it is one of ours. */
7061 	switch (iocp->ioc_cmd) {
7062 	case TI_GETMYNAME:
7063 	case TI_GETPEERNAME:
7064 		break;
7065 	default:
7066 		ip_output(udp->udp_connp, mp, q, IP_WPUT);
7067 		return;
7068 	}
7069 
7070 	switch (mi_copy_state(q, mp, &mp1)) {
7071 	case -1:
7072 		return;
7073 	case MI_COPY_CASE(MI_COPY_IN, 1):
7074 		break;
7075 	case MI_COPY_CASE(MI_COPY_OUT, 1):
7076 		/*
7077 		 * The address has been copied out, so now
7078 		 * copyout the strbuf.
7079 		 */
7080 		mi_copyout(q, mp);
7081 		return;
7082 	case MI_COPY_CASE(MI_COPY_OUT, 2):
7083 		/*
7084 		 * The address and strbuf have been copied out.
7085 		 * We're done, so just acknowledge the original
7086 		 * M_IOCTL.
7087 		 */
7088 		mi_copy_done(q, mp, 0);
7089 		return;
7090 	default:
7091 		/*
7092 		 * Something strange has happened, so acknowledge
7093 		 * the original M_IOCTL with an EPROTO error.
7094 		 */
7095 		mi_copy_done(q, mp, EPROTO);
7096 		return;
7097 	}
7098 
7099 	/*
7100 	 * Now we have the strbuf structure for TI_GETMYNAME
7101 	 * and TI_GETPEERNAME.  Next we copyout the requested
7102 	 * address and then we'll copyout the strbuf.
7103 	 */
7104 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
7105 	addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t);
7106 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
7107 		mi_copy_done(q, mp, EINVAL);
7108 		return;
7109 	}
7110 
7111 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
7112 
7113 	if (mp1 == NULL)
7114 		return;
7115 
7116 	rw_enter(&udp->udp_rwlock, RW_READER);
7117 	switch (iocp->ioc_cmd) {
7118 	case TI_GETMYNAME:
7119 		error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen);
7120 		break;
7121 	case TI_GETPEERNAME:
7122 		error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen);
7123 		break;
7124 	}
7125 	rw_exit(&udp->udp_rwlock);
7126 
7127 	if (error != 0) {
7128 		mi_copy_done(q, mp, error);
7129 	} else {
7130 		mp1->b_wptr += addrlen;
7131 		STRUCT_FSET(sb, len, addrlen);
7132 
7133 		/* Copy out the address */
7134 		mi_copyout(q, mp);
7135 	}
7136 }
7137 
7138 static int
7139 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
7140     udpattrs_t *udpattrs)
7141 {
7142 	struct T_unitdata_req *udreqp;
7143 	int is_absreq_failure;
7144 	cred_t *cr;
7145 	conn_t	*connp = Q_TO_CONN(q);
7146 
7147 	ASSERT(((t_primp_t)mp->b_rptr)->type);
7148 
7149 	cr = DB_CREDDEF(mp, connp->conn_cred);
7150 
7151 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
7152 
7153 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
7154 	    udreqp->OPT_offset, cr, &udp_opt_obj,
7155 	    udpattrs, &is_absreq_failure);
7156 
7157 	if (*errorp != 0) {
7158 		/*
7159 		 * Note: No special action needed in this
7160 		 * module for "is_absreq_failure"
7161 		 */
7162 		return (-1);		/* failure */
7163 	}
7164 	ASSERT(is_absreq_failure == 0);
7165 	return (0);	/* success */
7166 }
7167 
7168 void
7169 udp_ddi_g_init(void)
7170 {
7171 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
7172 	    udp_opt_obj.odb_opt_arr_cnt);
7173 
7174 	/*
7175 	 * We want to be informed each time a stack is created or
7176 	 * destroyed in the kernel, so we can maintain the
7177 	 * set of udp_stack_t's.
7178 	 */
7179 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
7180 }
7181 
7182 void
7183 udp_ddi_g_destroy(void)
7184 {
7185 	netstack_unregister(NS_UDP);
7186 }
7187 
7188 #define	INET_NAME	"ip"
7189 
7190 /*
7191  * Initialize the UDP stack instance.
7192  */
7193 static void *
7194 udp_stack_init(netstackid_t stackid, netstack_t *ns)
7195 {
7196 	udp_stack_t	*us;
7197 	udpparam_t	*pa;
7198 	int		i;
7199 	int		error = 0;
7200 	major_t		major;
7201 
7202 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
7203 	us->us_netstack = ns;
7204 
7205 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
7206 	us->us_epriv_ports[0] = 2049;
7207 	us->us_epriv_ports[1] = 4045;
7208 
7209 	/*
7210 	 * The smallest anonymous port in the priviledged port range which UDP
7211 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
7212 	 */
7213 	us->us_min_anonpriv_port = 512;
7214 
7215 	us->us_bind_fanout_size = udp_bind_fanout_size;
7216 
7217 	/* Roundup variable that might have been modified in /etc/system */
7218 	if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
7219 		/* Not a power of two. Round up to nearest power of two */
7220 		for (i = 0; i < 31; i++) {
7221 			if (us->us_bind_fanout_size < (1 << i))
7222 				break;
7223 		}
7224 		us->us_bind_fanout_size = 1 << i;
7225 	}
7226 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
7227 	    sizeof (udp_fanout_t), KM_SLEEP);
7228 	for (i = 0; i < us->us_bind_fanout_size; i++) {
7229 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
7230 		    NULL);
7231 	}
7232 
7233 	pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP);
7234 
7235 	us->us_param_arr = pa;
7236 	bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr));
7237 
7238 	(void) udp_param_register(&us->us_nd,
7239 	    us->us_param_arr, A_CNT(udp_param_arr));
7240 
7241 	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
7242 	us->us_mibkp = udp_kstat_init(stackid);
7243 
7244 	major = mod_name_to_major(INET_NAME);
7245 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
7246 	ASSERT(error == 0);
7247 	return (us);
7248 }
7249 
7250 /*
7251  * Free the UDP stack instance.
7252  */
7253 static void
7254 udp_stack_fini(netstackid_t stackid, void *arg)
7255 {
7256 	udp_stack_t *us = (udp_stack_t *)arg;
7257 	int i;
7258 
7259 	for (i = 0; i < us->us_bind_fanout_size; i++) {
7260 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
7261 	}
7262 
7263 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
7264 	    sizeof (udp_fanout_t));
7265 
7266 	us->us_bind_fanout = NULL;
7267 
7268 	nd_free(&us->us_nd);
7269 	kmem_free(us->us_param_arr, sizeof (udp_param_arr));
7270 	us->us_param_arr = NULL;
7271 
7272 	udp_kstat_fini(stackid, us->us_mibkp);
7273 	us->us_mibkp = NULL;
7274 
7275 	udp_kstat2_fini(stackid, us->us_kstat);
7276 	us->us_kstat = NULL;
7277 	bzero(&us->us_statistics, sizeof (us->us_statistics));
7278 
7279 	ldi_ident_release(us->us_ldi_ident);
7280 	kmem_free(us, sizeof (*us));
7281 }
7282 
7283 static void *
7284 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
7285 {
7286 	kstat_t *ksp;
7287 
7288 	udp_stat_t template = {
7289 		{ "udp_ip_send",		KSTAT_DATA_UINT64 },
7290 		{ "udp_ip_ire_send",		KSTAT_DATA_UINT64 },
7291 		{ "udp_ire_null",		KSTAT_DATA_UINT64 },
7292 		{ "udp_drain",			KSTAT_DATA_UINT64 },
7293 		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
7294 		{ "udp_rrw_busy",		KSTAT_DATA_UINT64 },
7295 		{ "udp_rrw_msgcnt",		KSTAT_DATA_UINT64 },
7296 		{ "udp_out_sw_cksum",		KSTAT_DATA_UINT64 },
7297 		{ "udp_out_sw_cksum_bytes",	KSTAT_DATA_UINT64 },
7298 		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
7299 		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
7300 		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
7301 		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
7302 		{ "udp_in_pktinfo",		KSTAT_DATA_UINT64 },
7303 		{ "udp_in_recvdstaddr",		KSTAT_DATA_UINT64 },
7304 		{ "udp_in_recvopts",		KSTAT_DATA_UINT64 },
7305 		{ "udp_in_recvif",		KSTAT_DATA_UINT64 },
7306 		{ "udp_in_recvslla",		KSTAT_DATA_UINT64 },
7307 		{ "udp_in_recvucred",		KSTAT_DATA_UINT64 },
7308 		{ "udp_in_recvttl",		KSTAT_DATA_UINT64 },
7309 		{ "udp_in_recvhopopts",		KSTAT_DATA_UINT64 },
7310 		{ "udp_in_recvhoplimit",	KSTAT_DATA_UINT64 },
7311 		{ "udp_in_recvdstopts",		KSTAT_DATA_UINT64 },
7312 		{ "udp_in_recvrtdstopts",	KSTAT_DATA_UINT64 },
7313 		{ "udp_in_recvrthdr",		KSTAT_DATA_UINT64 },
7314 		{ "udp_in_recvpktinfo",		KSTAT_DATA_UINT64 },
7315 		{ "udp_in_recvtclass",		KSTAT_DATA_UINT64 },
7316 		{ "udp_in_timestamp",		KSTAT_DATA_UINT64 },
7317 #ifdef DEBUG
7318 		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
7319 		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
7320 #endif
7321 	};
7322 
7323 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
7324 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
7325 	    KSTAT_FLAG_VIRTUAL, stackid);
7326 
7327 	if (ksp == NULL)
7328 		return (NULL);
7329 
7330 	bcopy(&template, us_statisticsp, sizeof (template));
7331 	ksp->ks_data = (void *)us_statisticsp;
7332 	ksp->ks_private = (void *)(uintptr_t)stackid;
7333 
7334 	kstat_install(ksp);
7335 	return (ksp);
7336 }
7337 
7338 static void
7339 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
7340 {
7341 	if (ksp != NULL) {
7342 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
7343 		kstat_delete_netstack(ksp, stackid);
7344 	}
7345 }
7346 
7347 static void *
7348 udp_kstat_init(netstackid_t stackid)
7349 {
7350 	kstat_t	*ksp;
7351 
7352 	udp_named_kstat_t template = {
7353 		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
7354 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
7355 		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
7356 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
7357 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
7358 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
7359 	};
7360 
7361 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
7362 	    KSTAT_TYPE_NAMED,
7363 	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
7364 
7365 	if (ksp == NULL || ksp->ks_data == NULL)
7366 		return (NULL);
7367 
7368 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
7369 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
7370 
7371 	bcopy(&template, ksp->ks_data, sizeof (template));
7372 	ksp->ks_update = udp_kstat_update;
7373 	ksp->ks_private = (void *)(uintptr_t)stackid;
7374 
7375 	kstat_install(ksp);
7376 	return (ksp);
7377 }
7378 
7379 static void
7380 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
7381 {
7382 	if (ksp != NULL) {
7383 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
7384 		kstat_delete_netstack(ksp, stackid);
7385 	}
7386 }
7387 
7388 static int
7389 udp_kstat_update(kstat_t *kp, int rw)
7390 {
7391 	udp_named_kstat_t *udpkp;
7392 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
7393 	netstack_t	*ns;
7394 	udp_stack_t	*us;
7395 
7396 	if ((kp == NULL) || (kp->ks_data == NULL))
7397 		return (EIO);
7398 
7399 	if (rw == KSTAT_WRITE)
7400 		return (EACCES);
7401 
7402 	ns = netstack_find_by_stackid(stackid);
7403 	if (ns == NULL)
7404 		return (-1);
7405 	us = ns->netstack_udp;
7406 	if (us == NULL) {
7407 		netstack_rele(ns);
7408 		return (-1);
7409 	}
7410 	udpkp = (udp_named_kstat_t *)kp->ks_data;
7411 
7412 	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
7413 	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
7414 	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
7415 	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
7416 	netstack_rele(ns);
7417 	return (0);
7418 }
7419 
7420 /*
7421  * Read-side synchronous stream info entry point, called as a
7422  * result of handling certain STREAMS ioctl operations.
7423  */
7424 static int
7425 udp_rinfop(queue_t *q, infod_t *dp)
7426 {
7427 	mblk_t	*mp;
7428 	uint_t	cmd = dp->d_cmd;
7429 	int	res = 0;
7430 	int	error = 0;
7431 	udp_t	*udp = Q_TO_UDP(q);
7432 	struct stdata *stp = STREAM(q);
7433 
7434 	mutex_enter(&udp->udp_drain_lock);
7435 	/* If shutdown on read has happened, return nothing */
7436 	mutex_enter(&stp->sd_lock);
7437 	if (stp->sd_flag & STREOF) {
7438 		mutex_exit(&stp->sd_lock);
7439 		goto done;
7440 	}
7441 	mutex_exit(&stp->sd_lock);
7442 
7443 	if ((mp = udp->udp_rcv_list_head) == NULL)
7444 		goto done;
7445 
7446 	ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL);
7447 
7448 	if (cmd & INFOD_COUNT) {
7449 		/*
7450 		 * Return the number of messages.
7451 		 */
7452 		dp->d_count += udp->udp_rcv_msgcnt;
7453 		res |= INFOD_COUNT;
7454 	}
7455 	if (cmd & INFOD_BYTES) {
7456 		/*
7457 		 * Return size of all data messages.
7458 		 */
7459 		dp->d_bytes += udp->udp_rcv_cnt;
7460 		res |= INFOD_BYTES;
7461 	}
7462 	if (cmd & INFOD_FIRSTBYTES) {
7463 		/*
7464 		 * Return size of first data message.
7465 		 */
7466 		dp->d_bytes = msgdsize(mp);
7467 		res |= INFOD_FIRSTBYTES;
7468 		dp->d_cmd &= ~INFOD_FIRSTBYTES;
7469 	}
7470 	if (cmd & INFOD_COPYOUT) {
7471 		mblk_t *mp1 = mp->b_cont;
7472 		int n;
7473 		/*
7474 		 * Return data contents of first message.
7475 		 */
7476 		ASSERT(DB_TYPE(mp1) == M_DATA);
7477 		while (mp1 != NULL && dp->d_uiop->uio_resid > 0) {
7478 			n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1));
7479 			if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n,
7480 			    UIO_READ, dp->d_uiop)) != 0) {
7481 				goto done;
7482 			}
7483 			mp1 = mp1->b_cont;
7484 		}
7485 		res |= INFOD_COPYOUT;
7486 		dp->d_cmd &= ~INFOD_COPYOUT;
7487 	}
7488 done:
7489 	mutex_exit(&udp->udp_drain_lock);
7490 
7491 	dp->d_res |= res;
7492 
7493 	return (error);
7494 }
7495 
7496 /*
7497  * Read-side synchronous stream entry point.  This is called as a result
7498  * of recv/read operation done at sockfs, and is guaranteed to execute
7499  * outside of the interrupt thread context.  It returns a single datagram
7500  * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer.
7501  */
7502 static int
7503 udp_rrw(queue_t *q, struiod_t *dp)
7504 {
7505 	mblk_t	*mp;
7506 	udp_t	*udp = Q_TO_UDP(q);
7507 	udp_stack_t *us = udp->udp_us;
7508 
7509 	/*
7510 	 * Dequeue datagram from the head of the list and return
7511 	 * it to caller; also ensure that RSLEEP sd_wakeq flag is
7512 	 * set/cleared depending on whether or not there's data
7513 	 * remaining in the list.
7514 	 */
7515 	mutex_enter(&udp->udp_drain_lock);
7516 	if (!udp->udp_direct_sockfs) {
7517 		mutex_exit(&udp->udp_drain_lock);
7518 		UDP_STAT(us, udp_rrw_busy);
7519 		return (EBUSY);
7520 	}
7521 	if ((mp = udp->udp_rcv_list_head) != NULL) {
7522 		uint_t size = msgdsize(mp);
7523 
7524 		/* Last datagram in the list? */
7525 		if ((udp->udp_rcv_list_head = mp->b_next) == NULL)
7526 			udp->udp_rcv_list_tail = NULL;
7527 		mp->b_next = NULL;
7528 
7529 		udp->udp_rcv_cnt -= size;
7530 		udp->udp_rcv_msgcnt--;
7531 		UDP_STAT(us, udp_rrw_msgcnt);
7532 
7533 		/* No longer flow-controlling? */
7534 		if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat &&
7535 		    udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat)
7536 			udp->udp_drain_qfull = B_FALSE;
7537 	}
7538 	if (udp->udp_rcv_list_head == NULL) {
7539 		/*
7540 		 * Either we just dequeued the last datagram or
7541 		 * we get here from sockfs and have nothing to
7542 		 * return; in this case clear RSLEEP.
7543 		 */
7544 		ASSERT(udp->udp_rcv_cnt == 0);
7545 		ASSERT(udp->udp_rcv_msgcnt == 0);
7546 		ASSERT(udp->udp_rcv_list_tail == NULL);
7547 		STR_WAKEUP_CLEAR(STREAM(q));
7548 	} else {
7549 		/*
7550 		 * More data follows; we need udp_rrw() to be
7551 		 * called in future to pick up the rest.
7552 		 */
7553 		STR_WAKEUP_SET(STREAM(q));
7554 	}
7555 	mutex_exit(&udp->udp_drain_lock);
7556 	dp->d_mp = mp;
7557 	return (0);
7558 }
7559 
7560 /*
7561  * Enqueue a completely-built T_UNITDATA_IND message into the receive
7562  * list; this is typically executed within the interrupt thread context
7563  * and so we do things as quickly as possible.
7564  */
7565 static void
7566 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len)
7567 {
7568 	ASSERT(q == RD(q));
7569 	ASSERT(pkt_len == msgdsize(mp));
7570 	ASSERT(mp->b_next == NULL && mp->b_cont != NULL);
7571 	ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA);
7572 	ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind));
7573 
7574 	mutex_enter(&udp->udp_drain_lock);
7575 	/*
7576 	 * Wake up and signal the receiving app; it is okay to do this
7577 	 * before enqueueing the mp because we are holding the drain lock.
7578 	 * One of the advantages of synchronous stream is the ability for
7579 	 * us to find out when the application performs a read on the
7580 	 * socket by way of udp_rrw() entry point being called.  We need
7581 	 * to generate SIGPOLL/SIGIO for each received data in the case
7582 	 * of asynchronous socket just as in the strrput() case.  However,
7583 	 * we only wake the application up when necessary, i.e. during the
7584 	 * first enqueue.  When udp_rrw() is called, we send up a single
7585 	 * datagram upstream and call STR_WAKEUP_SET() again when there
7586 	 * are still data remaining in our receive queue.
7587 	 */
7588 	STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head);
7589 	if (udp->udp_rcv_list_head == NULL)
7590 		udp->udp_rcv_list_head = mp;
7591 	else
7592 		udp->udp_rcv_list_tail->b_next = mp;
7593 	udp->udp_rcv_list_tail = mp;
7594 	udp->udp_rcv_cnt += pkt_len;
7595 	udp->udp_rcv_msgcnt++;
7596 
7597 	/* Need to flow-control? */
7598 	if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat ||
7599 	    udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat)
7600 		udp->udp_drain_qfull = B_TRUE;
7601 
7602 	mutex_exit(&udp->udp_drain_lock);
7603 }
7604 
7605 /*
7606  * Drain the contents of receive list to the module upstream; we do
7607  * this during close or when we fallback to the slow mode due to
7608  * sockmod being popped or a module being pushed on top of us.
7609  */
7610 static void
7611 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing)
7612 {
7613 	mblk_t *mp;
7614 	udp_stack_t *us = udp->udp_us;
7615 
7616 	mutex_enter(&udp->udp_drain_lock);
7617 	/*
7618 	 * There is no race with a concurrent udp_input() sending
7619 	 * up packets using putnext() after we have cleared the
7620 	 * udp_direct_sockfs flag but before we have completed
7621 	 * sending up the packets in udp_rcv_list, since we are
7622 	 * either a writer or we have quiesced the conn.
7623 	 */
7624 	udp->udp_direct_sockfs = B_FALSE;
7625 	mutex_exit(&udp->udp_drain_lock);
7626 
7627 	if (udp->udp_rcv_list_head != NULL)
7628 		UDP_STAT(us, udp_drain);
7629 
7630 	/*
7631 	 * Send up everything via putnext(); note here that we
7632 	 * don't need the udp_drain_lock to protect us since
7633 	 * nothing can enter udp_rrw() and that we currently
7634 	 * have exclusive access to this udp.
7635 	 */
7636 	while ((mp = udp->udp_rcv_list_head) != NULL) {
7637 		udp->udp_rcv_list_head = mp->b_next;
7638 		mp->b_next = NULL;
7639 		udp->udp_rcv_cnt -= msgdsize(mp);
7640 		udp->udp_rcv_msgcnt--;
7641 		if (closing) {
7642 			freemsg(mp);
7643 		} else {
7644 			ASSERT(q == RD(q));
7645 			putnext(q, mp);
7646 		}
7647 	}
7648 	ASSERT(udp->udp_rcv_cnt == 0);
7649 	ASSERT(udp->udp_rcv_msgcnt == 0);
7650 	ASSERT(udp->udp_rcv_list_head == NULL);
7651 	udp->udp_rcv_list_tail = NULL;
7652 	udp->udp_drain_qfull = B_FALSE;
7653 }
7654 
7655 static size_t
7656 udp_set_rcv_hiwat(udp_t *udp, size_t size)
7657 {
7658 	udp_stack_t *us = udp->udp_us;
7659 
7660 	/* We add a bit of extra buffering */
7661 	size += size >> 1;
7662 	if (size > us->us_max_buf)
7663 		size = us->us_max_buf;
7664 
7665 	udp->udp_rcv_hiwat = size;
7666 	return (size);
7667 }
7668 
7669 /*
7670  * For the lower queue so that UDP can be a dummy mux.
7671  * Nobody should be sending
7672  * packets up this stream
7673  */
7674 static void
7675 udp_lrput(queue_t *q, mblk_t *mp)
7676 {
7677 	mblk_t *mp1;
7678 
7679 	switch (mp->b_datap->db_type) {
7680 	case M_FLUSH:
7681 		/* Turn around */
7682 		if (*mp->b_rptr & FLUSHW) {
7683 			*mp->b_rptr &= ~FLUSHR;
7684 			qreply(q, mp);
7685 			return;
7686 		}
7687 		break;
7688 	}
7689 	/* Could receive messages that passed through ar_rput */
7690 	for (mp1 = mp; mp1; mp1 = mp1->b_cont)
7691 		mp1->b_prev = mp1->b_next = NULL;
7692 	freemsg(mp);
7693 }
7694 
7695 /*
7696  * For the lower queue so that UDP can be a dummy mux.
7697  * Nobody should be sending packets down this stream.
7698  */
7699 /* ARGSUSED */
7700 void
7701 udp_lwput(queue_t *q, mblk_t *mp)
7702 {
7703 	freemsg(mp);
7704 }
7705 
7706 /*
7707  * Below routines for UDP socket module.
7708  */
7709 
7710 static conn_t *
7711 udp_do_open(cred_t *credp, boolean_t isv6, int flags)
7712 {
7713 	udp_t		*udp;
7714 	conn_t		*connp;
7715 	zoneid_t 	zoneid;
7716 	netstack_t 	*ns;
7717 	udp_stack_t 	*us;
7718 
7719 	ns = netstack_find_by_cred(credp);
7720 	ASSERT(ns != NULL);
7721 	us = ns->netstack_udp;
7722 	ASSERT(us != NULL);
7723 
7724 	/*
7725 	 * For exclusive stacks we set the zoneid to zero
7726 	 * to make UDP operate as if in the global zone.
7727 	 */
7728 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
7729 		zoneid = GLOBAL_ZONEID;
7730 	else
7731 		zoneid = crgetzoneid(credp);
7732 
7733 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
7734 
7735 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
7736 	if (connp == NULL) {
7737 		netstack_rele(ns);
7738 		return (NULL);
7739 	}
7740 	udp = connp->conn_udp;
7741 
7742 	/*
7743 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
7744 	 * done by netstack_find_by_cred()
7745 	 */
7746 	netstack_rele(ns);
7747 
7748 	rw_enter(&udp->udp_rwlock, RW_WRITER);
7749 	ASSERT(connp->conn_ulp == IPPROTO_UDP);
7750 	ASSERT(connp->conn_udp == udp);
7751 	ASSERT(udp->udp_connp == connp);
7752 
7753 	/* Set the initial state of the stream and the privilege status. */
7754 	udp->udp_state = TS_UNBND;
7755 	if (isv6) {
7756 		udp->udp_family = AF_INET6;
7757 		udp->udp_ipversion = IPV6_VERSION;
7758 		udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
7759 		udp->udp_ttl = us->us_ipv6_hoplimit;
7760 		connp->conn_af_isv6 = B_TRUE;
7761 		connp->conn_flags |= IPCL_ISV6;
7762 	} else {
7763 		udp->udp_family = AF_INET;
7764 		udp->udp_ipversion = IPV4_VERSION;
7765 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
7766 		udp->udp_ttl = us->us_ipv4_ttl;
7767 		connp->conn_af_isv6 = B_FALSE;
7768 		connp->conn_flags &= ~IPCL_ISV6;
7769 	}
7770 
7771 	udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
7772 	udp->udp_pending_op = -1;
7773 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
7774 	connp->conn_zoneid = zoneid;
7775 
7776 	udp->udp_open_time = lbolt64;
7777 	udp->udp_open_pid = curproc->p_pid;
7778 
7779 	/*
7780 	 * If the caller has the process-wide flag set, then default to MAC
7781 	 * exempt mode.  This allows read-down to unlabeled hosts.
7782 	 */
7783 	if (getpflags(NET_MAC_AWARE, credp) != 0)
7784 		connp->conn_mac_exempt = B_TRUE;
7785 
7786 	connp->conn_ulp_labeled = is_system_labeled();
7787 
7788 	udp->udp_us = us;
7789 
7790 	connp->conn_recv = udp_input;
7791 	crhold(credp);
7792 	connp->conn_cred = credp;
7793 
7794 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
7795 
7796 	rw_exit(&udp->udp_rwlock);
7797 
7798 	return (connp);
7799 }
7800 
7801 /* ARGSUSED */
7802 sock_lower_handle_t
7803 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
7804     uint_t *smodep, int *errorp, int flags, cred_t *credp)
7805 {
7806 	udp_t		*udp = NULL;
7807 	udp_stack_t	*us;
7808 	conn_t		*connp;
7809 	boolean_t	isv6;
7810 
7811 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
7812 	    (proto != 0 && proto != IPPROTO_UDP)) {
7813 		*errorp = EPROTONOSUPPORT;
7814 		return (NULL);
7815 	}
7816 
7817 	if (family == AF_INET6)
7818 		isv6 = B_TRUE;
7819 	else
7820 		isv6 = B_FALSE;
7821 
7822 	connp = udp_do_open(credp, isv6, flags);
7823 	if (connp == NULL) {
7824 		*errorp = ENOMEM;
7825 		return (NULL);
7826 	}
7827 
7828 	udp = connp->conn_udp;
7829 	ASSERT(udp != NULL);
7830 	us = udp->udp_us;
7831 	ASSERT(us != NULL);
7832 
7833 	connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET;
7834 
7835 	/* Set flow control */
7836 	rw_enter(&udp->udp_rwlock, RW_WRITER);
7837 	(void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat);
7838 	udp->udp_rcv_disply_hiwat = us->us_recv_hiwat;
7839 	udp->udp_rcv_lowat = udp_mod_info.mi_lowat;
7840 	udp->udp_xmit_hiwat = us->us_xmit_hiwat;
7841 	udp->udp_xmit_lowat = us->us_xmit_lowat;
7842 
7843 	if (udp->udp_family == AF_INET6) {
7844 		/* Build initial header template for transmit */
7845 		if ((*errorp = udp_build_hdrs(udp)) != 0) {
7846 			rw_exit(&udp->udp_rwlock);
7847 			ipcl_conn_destroy(connp);
7848 			return (NULL);
7849 		}
7850 	}
7851 	rw_exit(&udp->udp_rwlock);
7852 
7853 	connp->conn_flow_cntrld = B_FALSE;
7854 
7855 	ASSERT(us->us_ldi_ident != NULL);
7856 
7857 	if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) {
7858 		ip1dbg(("create of IP helper stream failed\n"));
7859 		udp_do_close(connp);
7860 		return (NULL);
7861 	}
7862 
7863 	/* Set the send flow control */
7864 	connp->conn_wq->q_hiwat = us->us_xmit_hiwat;
7865 	connp->conn_wq->q_lowat = us->us_xmit_lowat;
7866 
7867 	mutex_enter(&connp->conn_lock);
7868 	connp->conn_state_flags &= ~CONN_INCIPIENT;
7869 	mutex_exit(&connp->conn_lock);
7870 
7871 	*errorp = 0;
7872 	*smodep = SM_ATOMIC;
7873 	*sock_downcalls = &sock_udp_downcalls;
7874 	return ((sock_lower_handle_t)connp);
7875 }
7876 
7877 /* ARGSUSED */
7878 void
7879 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
7880     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
7881 {
7882 	conn_t 		*connp = (conn_t *)proto_handle;
7883 	udp_t 		*udp = connp->conn_udp;
7884 	udp_stack_t	*us = udp->udp_us;
7885 	struct sock_proto_props sopp;
7886 
7887 	connp->conn_upcalls = sock_upcalls;
7888 	connp->conn_upper_handle = sock_handle;
7889 
7890 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
7891 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
7892 	sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra;
7893 	sopp.sopp_maxblk = INFPSZ;
7894 	sopp.sopp_rxhiwat = udp->udp_rcv_hiwat;
7895 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
7896 	sopp.sopp_maxpsz =
7897 	    (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
7898 	    UDP_MAXPACKET_IPV6;
7899 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
7900 	    udp_mod_info.mi_minpsz;
7901 
7902 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
7903 	    &sopp);
7904 }
7905 
7906 static void
7907 udp_do_close(conn_t *connp)
7908 {
7909 	udp_t	*udp;
7910 
7911 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
7912 	udp = connp->conn_udp;
7913 
7914 	udp_quiesce_conn(connp);
7915 	ip_quiesce_conn(connp);
7916 
7917 	if (!IPCL_IS_NONSTR(connp)) {
7918 		/*
7919 		 * Disable read-side synchronous stream
7920 		 * interface and drain any queued data.
7921 		 */
7922 		ASSERT(connp->conn_wq != NULL);
7923 		udp_rcv_drain(connp->conn_wq, udp, B_TRUE);
7924 		ASSERT(!udp->udp_direct_sockfs);
7925 
7926 		ASSERT(connp->conn_rq != NULL);
7927 		qprocsoff(connp->conn_rq);
7928 	}
7929 
7930 	ASSERT(udp->udp_rcv_cnt == 0);
7931 	ASSERT(udp->udp_rcv_msgcnt == 0);
7932 	ASSERT(udp->udp_rcv_list_head == NULL);
7933 	ASSERT(udp->udp_rcv_list_tail == NULL);
7934 
7935 	udp_close_free(connp);
7936 
7937 	/*
7938 	 * Now we are truly single threaded on this stream, and can
7939 	 * delete the things hanging off the connp, and finally the connp.
7940 	 * We removed this connp from the fanout list, it cannot be
7941 	 * accessed thru the fanouts, and we already waited for the
7942 	 * conn_ref to drop to 0. We are already in close, so
7943 	 * there cannot be any other thread from the top. qprocsoff
7944 	 * has completed, and service has completed or won't run in
7945 	 * future.
7946 	 */
7947 	ASSERT(connp->conn_ref == 1);
7948 	if (!IPCL_IS_NONSTR(connp)) {
7949 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
7950 	} else {
7951 		ip_close_helper_stream(connp);
7952 	}
7953 
7954 	connp->conn_ref--;
7955 	ipcl_conn_destroy(connp);
7956 }
7957 
7958 /* ARGSUSED */
7959 int
7960 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
7961 {
7962 	conn_t	*connp = (conn_t *)proto_handle;
7963 
7964 	udp_do_close(connp);
7965 	return (0);
7966 }
7967 
7968 static int
7969 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
7970     boolean_t bind_to_req_port_only)
7971 {
7972 	sin_t		*sin;
7973 	sin6_t		*sin6;
7974 	sin6_t		sin6addr;
7975 	in_port_t	port;		/* Host byte order */
7976 	in_port_t	requested_port;	/* Host byte order */
7977 	int		count;
7978 	in6_addr_t	v6src;
7979 	int		loopmax;
7980 	udp_fanout_t	*udpf;
7981 	in_port_t	lport;		/* Network byte order */
7982 	zoneid_t	zoneid;
7983 	udp_t		*udp;
7984 	boolean_t	is_inaddr_any;
7985 	mlp_type_t	addrtype, mlptype;
7986 	udp_stack_t	*us;
7987 	int		error = 0;
7988 	mblk_t		*mp = NULL;
7989 
7990 	udp = connp->conn_udp;
7991 	us = udp->udp_us;
7992 
7993 	if (udp->udp_state != TS_UNBND) {
7994 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
7995 		    "udp_bind: bad state, %u", udp->udp_state);
7996 		return (-TOUTSTATE);
7997 	}
7998 
7999 	switch (len) {
8000 	case 0:
8001 		if (udp->udp_family == AF_INET) {
8002 			sin = (sin_t *)&sin6addr;
8003 			*sin = sin_null;
8004 			sin->sin_family = AF_INET;
8005 			sin->sin_addr.s_addr = INADDR_ANY;
8006 			udp->udp_ipversion = IPV4_VERSION;
8007 		} else {
8008 			ASSERT(udp->udp_family == AF_INET6);
8009 			sin6 = (sin6_t *)&sin6addr;
8010 			*sin6 = sin6_null;
8011 			sin6->sin6_family = AF_INET6;
8012 			V6_SET_ZERO(sin6->sin6_addr);
8013 			udp->udp_ipversion = IPV6_VERSION;
8014 		}
8015 		port = 0;
8016 		break;
8017 
8018 	case sizeof (sin_t):	/* Complete IPv4 address */
8019 		sin = (sin_t *)sa;
8020 
8021 		if (sin == NULL || !OK_32PTR((char *)sin))
8022 			return (EINVAL);
8023 
8024 		if (udp->udp_family != AF_INET ||
8025 		    sin->sin_family != AF_INET) {
8026 			return (EAFNOSUPPORT);
8027 		}
8028 		port = ntohs(sin->sin_port);
8029 		break;
8030 
8031 	case sizeof (sin6_t):	/* complete IPv6 address */
8032 		sin6 = (sin6_t *)sa;
8033 
8034 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
8035 			return (EINVAL);
8036 
8037 		if (udp->udp_family != AF_INET6 ||
8038 		    sin6->sin6_family != AF_INET6) {
8039 			return (EAFNOSUPPORT);
8040 		}
8041 		port = ntohs(sin6->sin6_port);
8042 		break;
8043 
8044 	default:		/* Invalid request */
8045 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8046 		    "udp_bind: bad ADDR_length length %u", len);
8047 		return (-TBADADDR);
8048 	}
8049 
8050 	requested_port = port;
8051 
8052 	if (requested_port == 0 || !bind_to_req_port_only)
8053 		bind_to_req_port_only = B_FALSE;
8054 	else		/* T_BIND_REQ and requested_port != 0 */
8055 		bind_to_req_port_only = B_TRUE;
8056 
8057 	if (requested_port == 0) {
8058 		/*
8059 		 * If the application passed in zero for the port number, it
8060 		 * doesn't care which port number we bind to. Get one in the
8061 		 * valid range.
8062 		 */
8063 		if (udp->udp_anon_priv_bind) {
8064 			port = udp_get_next_priv_port(udp);
8065 		} else {
8066 			port = udp_update_next_port(udp,
8067 			    us->us_next_port_to_try, B_TRUE);
8068 		}
8069 	} else {
8070 		/*
8071 		 * If the port is in the well-known privileged range,
8072 		 * make sure the caller was privileged.
8073 		 */
8074 		int i;
8075 		boolean_t priv = B_FALSE;
8076 
8077 		if (port < us->us_smallest_nonpriv_port) {
8078 			priv = B_TRUE;
8079 		} else {
8080 			for (i = 0; i < us->us_num_epriv_ports; i++) {
8081 				if (port == us->us_epriv_ports[i]) {
8082 					priv = B_TRUE;
8083 					break;
8084 				}
8085 			}
8086 		}
8087 
8088 		if (priv) {
8089 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
8090 				return (-TACCES);
8091 		}
8092 	}
8093 
8094 	if (port == 0)
8095 		return (-TNOADDR);
8096 
8097 	/*
8098 	 * The state must be TS_UNBND. TPI mandates that users must send
8099 	 * TPI primitives only 1 at a time and wait for the response before
8100 	 * sending the next primitive.
8101 	 */
8102 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8103 	if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
8104 		rw_exit(&udp->udp_rwlock);
8105 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8106 		    "udp_bind: bad state, %u", udp->udp_state);
8107 		return (-TOUTSTATE);
8108 	}
8109 	/* XXX how to remove the T_BIND_REQ? Should set it before calling */
8110 	udp->udp_pending_op = T_BIND_REQ;
8111 	/*
8112 	 * Copy the source address into our udp structure. This address
8113 	 * may still be zero; if so, IP will fill in the correct address
8114 	 * each time an outbound packet is passed to it. Since the udp is
8115 	 * not yet in the bind hash list, we don't grab the uf_lock to
8116 	 * change udp_ipversion
8117 	 */
8118 	if (udp->udp_family == AF_INET) {
8119 		ASSERT(sin != NULL);
8120 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
8121 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
8122 		    udp->udp_ip_snd_options_len;
8123 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
8124 	} else {
8125 		ASSERT(sin6 != NULL);
8126 		v6src = sin6->sin6_addr;
8127 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
8128 			/*
8129 			 * no need to hold the uf_lock to set the udp_ipversion
8130 			 * since we are not yet in the fanout list
8131 			 */
8132 			udp->udp_ipversion = IPV4_VERSION;
8133 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
8134 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
8135 		} else {
8136 			udp->udp_ipversion = IPV6_VERSION;
8137 			udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
8138 		}
8139 	}
8140 
8141 	/*
8142 	 * If udp_reuseaddr is not set, then we have to make sure that
8143 	 * the IP address and port number the application requested
8144 	 * (or we selected for the application) is not being used by
8145 	 * another stream.  If another stream is already using the
8146 	 * requested IP address and port, the behavior depends on
8147 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
8148 	 * search for any an unused port to bind to the the stream.
8149 	 *
8150 	 * As per the BSD semantics, as modified by the Deering multicast
8151 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
8152 	 * to the same port independent of the local IP address.
8153 	 *
8154 	 * This is slightly different than in SunOS 4.X which did not
8155 	 * support IP multicast. Note that the change implemented by the
8156 	 * Deering multicast code effects all binds - not only binding
8157 	 * to IP multicast addresses.
8158 	 *
8159 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
8160 	 * order to guarantee a unique port.
8161 	 */
8162 
8163 	count = 0;
8164 	if (udp->udp_anon_priv_bind) {
8165 		/*
8166 		 * loopmax = (IPPORT_RESERVED-1) -
8167 		 *    us->us_min_anonpriv_port + 1
8168 		 */
8169 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
8170 	} else {
8171 		loopmax = us->us_largest_anon_port -
8172 		    us->us_smallest_anon_port + 1;
8173 	}
8174 
8175 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
8176 	zoneid = connp->conn_zoneid;
8177 
8178 	for (;;) {
8179 		udp_t		*udp1;
8180 		boolean_t	found_exclbind = B_FALSE;
8181 
8182 		/*
8183 		 * Walk through the list of udp streams bound to
8184 		 * requested port with the same IP address.
8185 		 */
8186 		lport = htons(port);
8187 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
8188 		    us->us_bind_fanout_size)];
8189 		mutex_enter(&udpf->uf_lock);
8190 		for (udp1 = udpf->uf_udp; udp1 != NULL;
8191 		    udp1 = udp1->udp_bind_hash) {
8192 			if (lport != udp1->udp_port)
8193 				continue;
8194 
8195 			/*
8196 			 * On a labeled system, we must treat bindings to ports
8197 			 * on shared IP addresses by sockets with MAC exemption
8198 			 * privilege as being in all zones, as there's
8199 			 * otherwise no way to identify the right receiver.
8200 			 */
8201 			if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) ||
8202 			    IPCL_ZONE_MATCH(connp,
8203 			    udp1->udp_connp->conn_zoneid)) &&
8204 			    !connp->conn_mac_exempt && \
8205 			    !udp1->udp_connp->conn_mac_exempt)
8206 				continue;
8207 
8208 			/*
8209 			 * If UDP_EXCLBIND is set for either the bound or
8210 			 * binding endpoint, the semantics of bind
8211 			 * is changed according to the following chart.
8212 			 *
8213 			 * spec = specified address (v4 or v6)
8214 			 * unspec = unspecified address (v4 or v6)
8215 			 * A = specified addresses are different for endpoints
8216 			 *
8217 			 * bound	bind to		allowed?
8218 			 * -------------------------------------
8219 			 * unspec	unspec		no
8220 			 * unspec	spec		no
8221 			 * spec		unspec		no
8222 			 * spec		spec		yes if A
8223 			 *
8224 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
8225 			 * as UDP_EXCLBIND, except that zoneid is ignored.
8226 			 */
8227 			if (udp1->udp_exclbind || udp->udp_exclbind ||
8228 			    udp1->udp_connp->conn_mac_exempt ||
8229 			    connp->conn_mac_exempt) {
8230 				if (V6_OR_V4_INADDR_ANY(
8231 				    udp1->udp_bound_v6src) ||
8232 				    is_inaddr_any ||
8233 				    IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8234 				    &v6src)) {
8235 					found_exclbind = B_TRUE;
8236 					break;
8237 				}
8238 				continue;
8239 			}
8240 
8241 			/*
8242 			 * Check ipversion to allow IPv4 and IPv6 sockets to
8243 			 * have disjoint port number spaces.
8244 			 */
8245 			if (udp->udp_ipversion != udp1->udp_ipversion) {
8246 
8247 				/*
8248 				 * On the first time through the loop, if the
8249 				 * the user intentionally specified a
8250 				 * particular port number, then ignore any
8251 				 * bindings of the other protocol that may
8252 				 * conflict. This allows the user to bind IPv6
8253 				 * alone and get both v4 and v6, or bind both
8254 				 * both and get each seperately. On subsequent
8255 				 * times through the loop, we're checking a
8256 				 * port that we chose (not the user) and thus
8257 				 * we do not allow casual duplicate bindings.
8258 				 */
8259 				if (count == 0 && requested_port != 0)
8260 					continue;
8261 			}
8262 
8263 			/*
8264 			 * No difference depending on SO_REUSEADDR.
8265 			 *
8266 			 * If existing port is bound to a
8267 			 * non-wildcard IP address and
8268 			 * the requesting stream is bound to
8269 			 * a distinct different IP addresses
8270 			 * (non-wildcard, also), keep going.
8271 			 */
8272 			if (!is_inaddr_any &&
8273 			    !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
8274 			    !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8275 			    &v6src)) {
8276 				continue;
8277 			}
8278 			break;
8279 		}
8280 
8281 		if (!found_exclbind &&
8282 		    (udp->udp_reuseaddr && requested_port != 0)) {
8283 			break;
8284 		}
8285 
8286 		if (udp1 == NULL) {
8287 			/*
8288 			 * No other stream has this IP address
8289 			 * and port number. We can use it.
8290 			 */
8291 			break;
8292 		}
8293 		mutex_exit(&udpf->uf_lock);
8294 		if (bind_to_req_port_only) {
8295 			/*
8296 			 * We get here only when requested port
8297 			 * is bound (and only first  of the for()
8298 			 * loop iteration).
8299 			 *
8300 			 * The semantics of this bind request
8301 			 * require it to fail so we return from
8302 			 * the routine (and exit the loop).
8303 			 *
8304 			 */
8305 			udp->udp_pending_op = -1;
8306 			rw_exit(&udp->udp_rwlock);
8307 			return (-TADDRBUSY);
8308 		}
8309 
8310 		if (udp->udp_anon_priv_bind) {
8311 			port = udp_get_next_priv_port(udp);
8312 		} else {
8313 			if ((count == 0) && (requested_port != 0)) {
8314 				/*
8315 				 * If the application wants us to find
8316 				 * a port, get one to start with. Set
8317 				 * requested_port to 0, so that we will
8318 				 * update us->us_next_port_to_try below.
8319 				 */
8320 				port = udp_update_next_port(udp,
8321 				    us->us_next_port_to_try, B_TRUE);
8322 				requested_port = 0;
8323 			} else {
8324 				port = udp_update_next_port(udp, port + 1,
8325 				    B_FALSE);
8326 			}
8327 		}
8328 
8329 		if (port == 0 || ++count >= loopmax) {
8330 			/*
8331 			 * We've tried every possible port number and
8332 			 * there are none available, so send an error
8333 			 * to the user.
8334 			 */
8335 			udp->udp_pending_op = -1;
8336 			rw_exit(&udp->udp_rwlock);
8337 			return (-TNOADDR);
8338 		}
8339 	}
8340 
8341 	/*
8342 	 * Copy the source address into our udp structure.  This address
8343 	 * may still be zero; if so, ip will fill in the correct address
8344 	 * each time an outbound packet is passed to it.
8345 	 * If we are binding to a broadcast or multicast address then
8346 	 * udp_post_ip_bind_connect will clear the source address
8347 	 * when udp_do_bind success.
8348 	 */
8349 	udp->udp_v6src = udp->udp_bound_v6src = v6src;
8350 	udp->udp_port = lport;
8351 	/*
8352 	 * Now reset the the next anonymous port if the application requested
8353 	 * an anonymous port, or we handed out the next anonymous port.
8354 	 */
8355 	if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
8356 		us->us_next_port_to_try = port + 1;
8357 	}
8358 
8359 	/* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
8360 	if (udp->udp_family == AF_INET) {
8361 		sin->sin_port = udp->udp_port;
8362 	} else {
8363 		sin6->sin6_port = udp->udp_port;
8364 		/* Rebuild the header template */
8365 		error = udp_build_hdrs(udp);
8366 		if (error != 0) {
8367 			udp->udp_pending_op = -1;
8368 			rw_exit(&udp->udp_rwlock);
8369 			mutex_exit(&udpf->uf_lock);
8370 			return (error);
8371 		}
8372 	}
8373 	udp->udp_state = TS_IDLE;
8374 	udp_bind_hash_insert(udpf, udp);
8375 	mutex_exit(&udpf->uf_lock);
8376 	rw_exit(&udp->udp_rwlock);
8377 
8378 	if (cl_inet_bind) {
8379 		/*
8380 		 * Running in cluster mode - register bind information
8381 		 */
8382 		if (udp->udp_ipversion == IPV4_VERSION) {
8383 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
8384 			    IPPROTO_UDP, AF_INET,
8385 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
8386 			    (in_port_t)udp->udp_port, NULL);
8387 		} else {
8388 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
8389 			    IPPROTO_UDP, AF_INET6,
8390 			    (uint8_t *)&(udp->udp_v6src),
8391 			    (in_port_t)udp->udp_port, NULL);
8392 		}
8393 	}
8394 
8395 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
8396 	if (is_system_labeled() && (!connp->conn_anon_port ||
8397 	    connp->conn_anon_mlp)) {
8398 		uint16_t mlpport;
8399 		cred_t *cr = connp->conn_cred;
8400 		zone_t *zone;
8401 
8402 		zone = crgetzone(cr);
8403 		connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
8404 		    mlptSingle;
8405 		addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
8406 		    &v6src, us->us_netstack->netstack_ip);
8407 		if (addrtype == mlptSingle) {
8408 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8409 			udp->udp_pending_op = -1;
8410 			rw_exit(&udp->udp_rwlock);
8411 			connp->conn_anon_port = B_FALSE;
8412 			connp->conn_mlp_type = mlptSingle;
8413 			return (-TNOADDR);
8414 		}
8415 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
8416 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
8417 		    addrtype);
8418 		if (mlptype != mlptSingle &&
8419 		    (connp->conn_mlp_type == mlptSingle ||
8420 		    secpolicy_net_bindmlp(cr) != 0)) {
8421 			if (udp->udp_debug) {
8422 				(void) strlog(UDP_MOD_ID, 0, 1,
8423 				    SL_ERROR|SL_TRACE,
8424 				    "udp_bind: no priv for multilevel port %d",
8425 				    mlpport);
8426 			}
8427 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8428 			udp->udp_pending_op = -1;
8429 			rw_exit(&udp->udp_rwlock);
8430 			connp->conn_anon_port = B_FALSE;
8431 			connp->conn_mlp_type = mlptSingle;
8432 			return (-TACCES);
8433 		}
8434 
8435 		/*
8436 		 * If we're specifically binding a shared IP address and the
8437 		 * port is MLP on shared addresses, then check to see if this
8438 		 * zone actually owns the MLP.  Reject if not.
8439 		 */
8440 		if (mlptype == mlptShared && addrtype == mlptShared) {
8441 			/*
8442 			 * No need to handle exclusive-stack zones since
8443 			 * ALL_ZONES only applies to the shared stack.
8444 			 */
8445 			zoneid_t mlpzone;
8446 
8447 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
8448 			    htons(mlpport));
8449 			if (connp->conn_zoneid != mlpzone) {
8450 				if (udp->udp_debug) {
8451 					(void) strlog(UDP_MOD_ID, 0, 1,
8452 					    SL_ERROR|SL_TRACE,
8453 					    "udp_bind: attempt to bind port "
8454 					    "%d on shared addr in zone %d "
8455 					    "(should be %d)",
8456 					    mlpport, connp->conn_zoneid,
8457 					    mlpzone);
8458 				}
8459 				rw_enter(&udp->udp_rwlock, RW_WRITER);
8460 				udp->udp_pending_op = -1;
8461 				rw_exit(&udp->udp_rwlock);
8462 				connp->conn_anon_port = B_FALSE;
8463 				connp->conn_mlp_type = mlptSingle;
8464 				return (-TACCES);
8465 			}
8466 		}
8467 		if (connp->conn_anon_port) {
8468 			error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
8469 			    port, B_TRUE);
8470 			if (error != 0) {
8471 				if (udp->udp_debug) {
8472 					(void) strlog(UDP_MOD_ID, 0, 1,
8473 					    SL_ERROR|SL_TRACE,
8474 					    "udp_bind: cannot establish anon "
8475 					    "MLP for port %d", port);
8476 				}
8477 				rw_enter(&udp->udp_rwlock, RW_WRITER);
8478 				udp->udp_pending_op = -1;
8479 				rw_exit(&udp->udp_rwlock);
8480 				connp->conn_anon_port = B_FALSE;
8481 				connp->conn_mlp_type = mlptSingle;
8482 				return (-TACCES);
8483 			}
8484 		}
8485 		connp->conn_mlp_type = mlptype;
8486 	}
8487 
8488 	if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
8489 		/*
8490 		 * Append a request for an IRE if udp_v6src not
8491 		 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
8492 		 */
8493 		mp = allocb(sizeof (ire_t), BPRI_HI);
8494 		if (!mp) {
8495 			rw_enter(&udp->udp_rwlock, RW_WRITER);
8496 			udp->udp_pending_op = -1;
8497 			rw_exit(&udp->udp_rwlock);
8498 			return (ENOMEM);
8499 		}
8500 		mp->b_wptr += sizeof (ire_t);
8501 		mp->b_datap->db_type = IRE_DB_REQ_TYPE;
8502 	}
8503 	if (udp->udp_family == AF_INET6) {
8504 		ASSERT(udp->udp_connp->conn_af_isv6);
8505 		error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP,
8506 		    &udp->udp_bound_v6src, udp->udp_port, B_TRUE);
8507 	} else {
8508 		ASSERT(!udp->udp_connp->conn_af_isv6);
8509 		error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP,
8510 		    V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port,
8511 		    B_TRUE);
8512 	}
8513 
8514 	(void) udp_post_ip_bind_connect(udp, mp, error);
8515 	return (error);
8516 }
8517 
8518 int
8519 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
8520     socklen_t len, cred_t *cr)
8521 {
8522 	int		error;
8523 	conn_t		*connp;
8524 
8525 	connp = (conn_t *)proto_handle;
8526 
8527 	if (sa == NULL)
8528 		error = udp_do_unbind(connp);
8529 	else
8530 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
8531 
8532 	if (error < 0) {
8533 		if (error == -TOUTSTATE)
8534 			error = EINVAL;
8535 		else
8536 			error = proto_tlitosyserr(-error);
8537 	}
8538 
8539 	return (error);
8540 }
8541 
8542 static int
8543 udp_implicit_bind(conn_t *connp, cred_t *cr)
8544 {
8545 	int error;
8546 
8547 	error = udp_do_bind(connp, NULL, 0, cr, B_FALSE);
8548 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
8549 }
8550 
8551 /*
8552  * This routine removes a port number association from a stream. It
8553  * is called by udp_unbind and udp_tpi_unbind.
8554  */
8555 static int
8556 udp_do_unbind(conn_t *connp)
8557 {
8558 	udp_t 		*udp = connp->conn_udp;
8559 	udp_fanout_t	*udpf;
8560 	udp_stack_t	*us = udp->udp_us;
8561 
8562 	if (cl_inet_unbind != NULL) {
8563 		/*
8564 		 * Running in cluster mode - register unbind information
8565 		 */
8566 		if (udp->udp_ipversion == IPV4_VERSION) {
8567 			(*cl_inet_unbind)(
8568 			    connp->conn_netstack->netstack_stackid,
8569 			    IPPROTO_UDP, AF_INET,
8570 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
8571 			    (in_port_t)udp->udp_port, NULL);
8572 		} else {
8573 			(*cl_inet_unbind)(
8574 			    connp->conn_netstack->netstack_stackid,
8575 			    IPPROTO_UDP, AF_INET6,
8576 			    (uint8_t *)&(udp->udp_v6src),
8577 			    (in_port_t)udp->udp_port, NULL);
8578 		}
8579 	}
8580 
8581 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8582 	if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
8583 		rw_exit(&udp->udp_rwlock);
8584 		return (-TOUTSTATE);
8585 	}
8586 	udp->udp_pending_op = T_UNBIND_REQ;
8587 	rw_exit(&udp->udp_rwlock);
8588 
8589 	/*
8590 	 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
8591 	 * and therefore ip_unbind must never return NULL.
8592 	 */
8593 	ip_unbind(connp);
8594 
8595 	/*
8596 	 * Once we're unbound from IP, the pending operation may be cleared
8597 	 * here.
8598 	 */
8599 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8600 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8601 	    us->us_bind_fanout_size)];
8602 
8603 	mutex_enter(&udpf->uf_lock);
8604 	udp_bind_hash_remove(udp, B_TRUE);
8605 	V6_SET_ZERO(udp->udp_v6src);
8606 	V6_SET_ZERO(udp->udp_bound_v6src);
8607 	udp->udp_port = 0;
8608 	mutex_exit(&udpf->uf_lock);
8609 
8610 	udp->udp_pending_op = -1;
8611 	udp->udp_state = TS_UNBND;
8612 	if (udp->udp_family == AF_INET6)
8613 		(void) udp_build_hdrs(udp);
8614 	rw_exit(&udp->udp_rwlock);
8615 
8616 	return (0);
8617 }
8618 
8619 static int
8620 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error)
8621 {
8622 	ire_t		*ire;
8623 	udp_fanout_t	*udpf;
8624 	udp_stack_t	*us = udp->udp_us;
8625 
8626 	ASSERT(udp->udp_pending_op != -1);
8627 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8628 	if (error == 0) {
8629 		/* For udp_do_connect() success */
8630 		/* udp_do_bind() success will do nothing in here */
8631 		/*
8632 		 * If a broadcast/multicast address was bound, set
8633 		 * the source address to 0.
8634 		 * This ensures no datagrams with broadcast address
8635 		 * as source address are emitted (which would violate
8636 		 * RFC1122 - Hosts requirements)
8637 		 *
8638 		 * Note that when connecting the returned IRE is
8639 		 * for the destination address and we only perform
8640 		 * the broadcast check for the source address (it
8641 		 * is OK to connect to a broadcast/multicast address.)
8642 		 */
8643 		if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
8644 			ire = (ire_t *)ire_mp->b_rptr;
8645 
8646 			/*
8647 			 * Note: we get IRE_BROADCAST for IPv6 to "mark" a
8648 			 * multicast local address.
8649 			 */
8650 			udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8651 			    us->us_bind_fanout_size)];
8652 			if (ire->ire_type == IRE_BROADCAST &&
8653 			    udp->udp_state != TS_DATA_XFER) {
8654 				ASSERT(udp->udp_pending_op == T_BIND_REQ ||
8655 				    udp->udp_pending_op == O_T_BIND_REQ);
8656 				/*
8657 				 * This was just a local bind to a broadcast
8658 				 * addr.
8659 				 */
8660 				mutex_enter(&udpf->uf_lock);
8661 				V6_SET_ZERO(udp->udp_v6src);
8662 				mutex_exit(&udpf->uf_lock);
8663 				if (udp->udp_family == AF_INET6)
8664 					(void) udp_build_hdrs(udp);
8665 			} else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
8666 				if (udp->udp_family == AF_INET6)
8667 					(void) udp_build_hdrs(udp);
8668 			}
8669 		}
8670 	} else {
8671 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8672 		    us->us_bind_fanout_size)];
8673 		mutex_enter(&udpf->uf_lock);
8674 
8675 		if (udp->udp_state == TS_DATA_XFER) {
8676 			/* Connect failed */
8677 			/* Revert back to the bound source */
8678 			udp->udp_v6src = udp->udp_bound_v6src;
8679 			udp->udp_state = TS_IDLE;
8680 		} else {
8681 			/* For udp_do_bind() failed */
8682 			V6_SET_ZERO(udp->udp_v6src);
8683 			V6_SET_ZERO(udp->udp_bound_v6src);
8684 			udp->udp_state = TS_UNBND;
8685 			udp_bind_hash_remove(udp, B_TRUE);
8686 			udp->udp_port = 0;
8687 		}
8688 		mutex_exit(&udpf->uf_lock);
8689 		if (udp->udp_family == AF_INET6)
8690 			(void) udp_build_hdrs(udp);
8691 	}
8692 	udp->udp_pending_op = -1;
8693 	rw_exit(&udp->udp_rwlock);
8694 	if (ire_mp != NULL)
8695 		freeb(ire_mp);
8696 	return (error);
8697 }
8698 
8699 /*
8700  * It associates a default destination address with the stream.
8701  */
8702 static int
8703 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len)
8704 {
8705 	sin6_t		*sin6;
8706 	sin_t		*sin;
8707 	in6_addr_t 	v6dst;
8708 	ipaddr_t 	v4dst;
8709 	uint16_t 	dstport;
8710 	uint32_t 	flowinfo;
8711 	mblk_t		*ire_mp;
8712 	udp_fanout_t	*udpf;
8713 	udp_t		*udp, *udp1;
8714 	ushort_t	ipversion;
8715 	udp_stack_t	*us;
8716 	int		error;
8717 
8718 	udp = connp->conn_udp;
8719 	us = udp->udp_us;
8720 
8721 	/*
8722 	 * Address has been verified by the caller
8723 	 */
8724 	switch (len) {
8725 	default:
8726 		/*
8727 		 * Should never happen
8728 		 */
8729 		return (EINVAL);
8730 
8731 	case sizeof (sin_t):
8732 		sin = (sin_t *)sa;
8733 		v4dst = sin->sin_addr.s_addr;
8734 		dstport = sin->sin_port;
8735 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
8736 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
8737 		ipversion = IPV4_VERSION;
8738 		break;
8739 
8740 	case sizeof (sin6_t):
8741 		sin6 = (sin6_t *)sa;
8742 		v6dst = sin6->sin6_addr;
8743 		dstport = sin6->sin6_port;
8744 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
8745 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
8746 			ipversion = IPV4_VERSION;
8747 			flowinfo = 0;
8748 		} else {
8749 			ipversion = IPV6_VERSION;
8750 			flowinfo = sin6->sin6_flowinfo;
8751 		}
8752 		break;
8753 	}
8754 
8755 	if (dstport == 0)
8756 		return (-TBADADDR);
8757 
8758 	rw_enter(&udp->udp_rwlock, RW_WRITER);
8759 
8760 	/*
8761 	 * This UDP must have bound to a port already before doing a connect.
8762 	 * TPI mandates that users must send TPI primitives only 1 at a time
8763 	 * and wait for the response before sending the next primitive.
8764 	 */
8765 	if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) {
8766 		rw_exit(&udp->udp_rwlock);
8767 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
8768 		    "udp_connect: bad state, %u", udp->udp_state);
8769 		return (-TOUTSTATE);
8770 	}
8771 	udp->udp_pending_op = T_CONN_REQ;
8772 	ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
8773 
8774 	if (ipversion == IPV4_VERSION) {
8775 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
8776 		    udp->udp_ip_snd_options_len;
8777 	} else {
8778 		udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
8779 	}
8780 
8781 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
8782 	    us->us_bind_fanout_size)];
8783 
8784 	mutex_enter(&udpf->uf_lock);
8785 	if (udp->udp_state == TS_DATA_XFER) {
8786 		/* Already connected - clear out state */
8787 		udp->udp_v6src = udp->udp_bound_v6src;
8788 		udp->udp_state = TS_IDLE;
8789 	}
8790 
8791 	/*
8792 	 * Create a default IP header with no IP options.
8793 	 */
8794 	udp->udp_dstport = dstport;
8795 	udp->udp_ipversion = ipversion;
8796 	if (ipversion == IPV4_VERSION) {
8797 		/*
8798 		 * Interpret a zero destination to mean loopback.
8799 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
8800 		 * generate the T_CONN_CON.
8801 		 */
8802 		if (v4dst == INADDR_ANY) {
8803 			v4dst = htonl(INADDR_LOOPBACK);
8804 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
8805 			if (udp->udp_family == AF_INET) {
8806 				sin->sin_addr.s_addr = v4dst;
8807 			} else {
8808 				sin6->sin6_addr = v6dst;
8809 			}
8810 		}
8811 		udp->udp_v6dst = v6dst;
8812 		udp->udp_flowinfo = 0;
8813 
8814 		/*
8815 		 * If the destination address is multicast and
8816 		 * an outgoing multicast interface has been set,
8817 		 * use the address of that interface as our
8818 		 * source address if no source address has been set.
8819 		 */
8820 		if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
8821 		    CLASSD(v4dst) &&
8822 		    udp->udp_multicast_if_addr != INADDR_ANY) {
8823 			IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
8824 			    &udp->udp_v6src);
8825 		}
8826 	} else {
8827 		ASSERT(udp->udp_ipversion == IPV6_VERSION);
8828 		/*
8829 		 * Interpret a zero destination to mean loopback.
8830 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
8831 		 * generate the T_CONN_CON.
8832 		 */
8833 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
8834 			v6dst = ipv6_loopback;
8835 			sin6->sin6_addr = v6dst;
8836 		}
8837 		udp->udp_v6dst = v6dst;
8838 		udp->udp_flowinfo = flowinfo;
8839 		/*
8840 		 * If the destination address is multicast and
8841 		 * an outgoing multicast interface has been set,
8842 		 * then the ip bind logic will pick the correct source
8843 		 * address (i.e. matching the outgoing multicast interface).
8844 		 */
8845 	}
8846 
8847 	/*
8848 	 * Verify that the src/port/dst/port is unique for all
8849 	 * connections in TS_DATA_XFER
8850 	 */
8851 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
8852 		if (udp1->udp_state != TS_DATA_XFER)
8853 			continue;
8854 		if (udp->udp_port != udp1->udp_port ||
8855 		    udp->udp_ipversion != udp1->udp_ipversion ||
8856 		    dstport != udp1->udp_dstport ||
8857 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
8858 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) ||
8859 		    !(IPCL_ZONE_MATCH(udp->udp_connp,
8860 		    udp1->udp_connp->conn_zoneid) ||
8861 		    IPCL_ZONE_MATCH(udp1->udp_connp,
8862 		    udp->udp_connp->conn_zoneid)))
8863 			continue;
8864 		mutex_exit(&udpf->uf_lock);
8865 		udp->udp_pending_op = -1;
8866 		rw_exit(&udp->udp_rwlock);
8867 		return (-TBADADDR);
8868 	}
8869 
8870 	if (cl_inet_connect2 != NULL) {
8871 		CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error);
8872 		if (error != 0) {
8873 			mutex_exit(&udpf->uf_lock);
8874 			udp->udp_pending_op = -1;
8875 			rw_exit(&udp->udp_rwlock);
8876 			return (-TBADADDR);
8877 		}
8878 	}
8879 
8880 	udp->udp_state = TS_DATA_XFER;
8881 	mutex_exit(&udpf->uf_lock);
8882 
8883 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
8884 	if (ire_mp == NULL) {
8885 		mutex_enter(&udpf->uf_lock);
8886 		udp->udp_state = TS_IDLE;
8887 		udp->udp_pending_op = -1;
8888 		mutex_exit(&udpf->uf_lock);
8889 		rw_exit(&udp->udp_rwlock);
8890 		return (ENOMEM);
8891 	}
8892 
8893 	rw_exit(&udp->udp_rwlock);
8894 
8895 	ire_mp->b_wptr += sizeof (ire_t);
8896 	ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE;
8897 
8898 	if (udp->udp_family == AF_INET) {
8899 		error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP,
8900 		    &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port,
8901 		    V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport,
8902 		    B_TRUE, B_TRUE);
8903 	} else {
8904 		error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP,
8905 		    &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst,
8906 		    &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE);
8907 	}
8908 
8909 	return (udp_post_ip_bind_connect(udp, ire_mp, error));
8910 }
8911 
8912 /* ARGSUSED */
8913 static int
8914 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
8915     socklen_t len, sock_connid_t *id, cred_t *cr)
8916 {
8917 	conn_t	*connp = (conn_t *)proto_handle;
8918 	udp_t	*udp = connp->conn_udp;
8919 	int	error;
8920 	boolean_t did_bind = B_FALSE;
8921 
8922 	if (sa == NULL) {
8923 		/*
8924 		 * Disconnect
8925 		 * Make sure we are connected
8926 		 */
8927 		if (udp->udp_state != TS_DATA_XFER)
8928 			return (EINVAL);
8929 
8930 		error = udp_disconnect(connp);
8931 		return (error);
8932 	}
8933 
8934 	error = proto_verify_ip_addr(udp->udp_family, sa, len);
8935 	if (error != 0)
8936 		goto done;
8937 
8938 	/* do an implicit bind if necessary */
8939 	if (udp->udp_state == TS_UNBND) {
8940 		error = udp_implicit_bind(connp, cr);
8941 		/*
8942 		 * We could be racing with an actual bind, in which case
8943 		 * we would see EPROTO. We cross our fingers and try
8944 		 * to connect.
8945 		 */
8946 		if (!(error == 0 || error == EPROTO))
8947 			goto done;
8948 		did_bind = B_TRUE;
8949 	}
8950 	/*
8951 	 * set SO_DGRAM_ERRIND
8952 	 */
8953 	udp->udp_dgram_errind = B_TRUE;
8954 
8955 	error = udp_do_connect(connp, sa, len);
8956 
8957 	if (error != 0 && did_bind) {
8958 		int unbind_err;
8959 
8960 		unbind_err = udp_do_unbind(connp);
8961 		ASSERT(unbind_err == 0);
8962 	}
8963 
8964 	if (error == 0) {
8965 		*id = 0;
8966 		(*connp->conn_upcalls->su_connected)
8967 		    (connp->conn_upper_handle, 0, NULL, -1);
8968 	} else if (error < 0) {
8969 		error = proto_tlitosyserr(-error);
8970 	}
8971 
8972 done:
8973 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
8974 		/*
8975 		 * No need to hold locks to set state
8976 		 * after connect failure socket state is undefined
8977 		 * We set the state only to imitate old sockfs behavior
8978 		 */
8979 		udp->udp_state = TS_IDLE;
8980 	}
8981 	return (error);
8982 }
8983 
8984 /* ARGSUSED */
8985 int
8986 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
8987     cred_t *cr)
8988 {
8989 	conn_t		*connp = (conn_t *)proto_handle;
8990 	udp_t		*udp = connp->conn_udp;
8991 	udp_stack_t	*us = udp->udp_us;
8992 	int		error = 0;
8993 
8994 	ASSERT(DB_TYPE(mp) == M_DATA);
8995 
8996 	/*
8997 	 * If the socket is connected and no change in destination
8998 	 */
8999 	if (msg->msg_namelen == 0) {
9000 		error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid);
9001 		if (error == EDESTADDRREQ)
9002 			return (error);
9003 		else
9004 			return (udp->udp_dgram_errind ? error : 0);
9005 	}
9006 
9007 	/*
9008 	 * Do an implicit bind if necessary.
9009 	 */
9010 	if (udp->udp_state == TS_UNBND) {
9011 		error = udp_implicit_bind(connp, cr);
9012 		/*
9013 		 * We could be racing with an actual bind, in which case
9014 		 * we would see EPROTO. We cross our fingers and try
9015 		 * to send.
9016 		 */
9017 		if (!(error == 0 || error == EPROTO)) {
9018 			freemsg(mp);
9019 			return (error);
9020 		}
9021 	}
9022 
9023 	rw_enter(&udp->udp_rwlock, RW_WRITER);
9024 
9025 	if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) {
9026 		rw_exit(&udp->udp_rwlock);
9027 		freemsg(mp);
9028 		return (EISCONN);
9029 	}
9030 
9031 
9032 	if (udp->udp_delayed_error != 0) {
9033 		boolean_t	match;
9034 
9035 		error = udp->udp_delayed_error;
9036 		match = B_FALSE;
9037 		udp->udp_delayed_error = 0;
9038 		switch (udp->udp_family) {
9039 		case AF_INET: {
9040 			/* Compare just IP address and port */
9041 			sin_t *sin1 = (sin_t *)msg->msg_name;
9042 			sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
9043 
9044 			if (msg->msg_namelen == sizeof (sin_t) &&
9045 			    sin1->sin_port == sin2->sin_port &&
9046 			    sin1->sin_addr.s_addr == sin2->sin_addr.s_addr)
9047 				match = B_TRUE;
9048 
9049 			break;
9050 		}
9051 		case AF_INET6: {
9052 			sin6_t	*sin1 = (sin6_t *)msg->msg_name;
9053 			sin6_t	*sin2 = (sin6_t *)&udp->udp_delayed_addr;
9054 
9055 			if (msg->msg_namelen == sizeof (sin6_t) &&
9056 			    sin1->sin6_port == sin2->sin6_port &&
9057 			    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
9058 			    &sin2->sin6_addr))
9059 				match = B_TRUE;
9060 			break;
9061 		}
9062 		default:
9063 			ASSERT(0);
9064 		}
9065 
9066 		*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
9067 
9068 		if (match) {
9069 			rw_exit(&udp->udp_rwlock);
9070 			freemsg(mp);
9071 			return (error);
9072 		}
9073 	}
9074 
9075 	error = proto_verify_ip_addr(udp->udp_family,
9076 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
9077 	rw_exit(&udp->udp_rwlock);
9078 
9079 	if (error != 0) {
9080 		freemsg(mp);
9081 		return (error);
9082 	}
9083 
9084 	error = udp_send_not_connected(connp, mp,
9085 	    (struct sockaddr  *)msg->msg_name, msg->msg_namelen, msg, cr,
9086 	    curproc->p_pid);
9087 	if (error != 0) {
9088 		UDP_STAT(us, udp_out_err_output);
9089 		freemsg(mp);
9090 	}
9091 	return (udp->udp_dgram_errind ? error : 0);
9092 }
9093 
9094 void
9095 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
9096     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
9097 {
9098 	conn_t 	*connp = (conn_t *)proto_handle;
9099 	udp_t	*udp;
9100 	struct T_capability_ack tca;
9101 	struct sockaddr_in6 laddr, faddr;
9102 	socklen_t laddrlen, faddrlen;
9103 	short opts;
9104 	struct stroptions *stropt;
9105 	mblk_t *stropt_mp;
9106 	int error;
9107 
9108 	udp = connp->conn_udp;
9109 
9110 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
9111 
9112 	/*
9113 	 * setup the fallback stream that was allocated
9114 	 */
9115 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
9116 	connp->conn_minor_arena = WR(q)->q_ptr;
9117 
9118 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
9119 
9120 	WR(q)->q_qinfo = &udp_winit;
9121 
9122 	connp->conn_rq = RD(q);
9123 	connp->conn_wq = WR(q);
9124 
9125 	/* Notify stream head about options before sending up data */
9126 	stropt_mp->b_datap->db_type = M_SETOPTS;
9127 	stropt_mp->b_wptr += sizeof (*stropt);
9128 	stropt = (struct stroptions *)stropt_mp->b_rptr;
9129 	stropt->so_flags = SO_WROFF | SO_HIWAT;
9130 	stropt->so_wroff =
9131 	    (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra);
9132 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
9133 	putnext(RD(q), stropt_mp);
9134 
9135 	/*
9136 	 * Free the helper stream
9137 	 */
9138 	ip_close_helper_stream(connp);
9139 
9140 	if (!direct_sockfs)
9141 		udp_disable_direct_sockfs(udp);
9142 
9143 	/*
9144 	 * Collect the information needed to sync with the sonode
9145 	 */
9146 	udp_do_capability_ack(udp, &tca, TC1_INFO);
9147 
9148 	laddrlen = faddrlen = sizeof (sin6_t);
9149 	(void) udp_getsockname((sock_lower_handle_t)connp,
9150 	    (struct sockaddr *)&laddr, &laddrlen, NULL);
9151 	error = udp_getpeername((sock_lower_handle_t)connp,
9152 	    (struct sockaddr *)&faddr, &faddrlen, NULL);
9153 	if (error != 0)
9154 		faddrlen = 0;
9155 
9156 	opts = 0;
9157 	if (udp->udp_dgram_errind)
9158 		opts |= SO_DGRAM_ERRIND;
9159 	if (udp->udp_dontroute)
9160 		opts |= SO_DONTROUTE;
9161 
9162 	/*
9163 	 * Once we grab the drain lock, no data will be send up
9164 	 * to the socket. So we notify the socket that the endpoint
9165 	 * is quiescent and it's therefore safe move data from
9166 	 * the socket to the stream head.
9167 	 */
9168 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
9169 	    (struct sockaddr *)&laddr, laddrlen,
9170 	    (struct sockaddr *)&faddr, faddrlen, opts);
9171 
9172 	/*
9173 	 * push up any packets that were queued in udp_t
9174 	 */
9175 
9176 	mutex_enter(&udp->udp_recv_lock);
9177 	while (udp->udp_fallback_queue_head != NULL) {
9178 		mblk_t *mp;
9179 		mp = udp->udp_fallback_queue_head;
9180 		udp->udp_fallback_queue_head = mp->b_next;
9181 		mutex_exit(&udp->udp_recv_lock);
9182 		mp->b_next = NULL;
9183 		putnext(RD(q), mp);
9184 		mutex_enter(&udp->udp_recv_lock);
9185 	}
9186 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
9187 	/*
9188 	 * No longer a streams less socket
9189 	 */
9190 	connp->conn_flags &= ~IPCL_NONSTR;
9191 	mutex_exit(&udp->udp_recv_lock);
9192 
9193 	ASSERT(connp->conn_ref >= 1);
9194 }
9195 
9196 static int
9197 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
9198 {
9199 	sin_t	*sin = (sin_t *)sa;
9200 	sin6_t	*sin6 = (sin6_t *)sa;
9201 
9202 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
9203 	ASSERT(udp != NULL);
9204 
9205 	if (udp->udp_state != TS_DATA_XFER)
9206 		return (ENOTCONN);
9207 
9208 	switch (udp->udp_family) {
9209 	case AF_INET:
9210 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
9211 
9212 		if (*salenp < sizeof (sin_t))
9213 			return (EINVAL);
9214 
9215 		*salenp = sizeof (sin_t);
9216 		*sin = sin_null;
9217 		sin->sin_family = AF_INET;
9218 		sin->sin_port = udp->udp_dstport;
9219 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst);
9220 		break;
9221 	case AF_INET6:
9222 		if (*salenp < sizeof (sin6_t))
9223 			return (EINVAL);
9224 
9225 		*salenp = sizeof (sin6_t);
9226 		*sin6 = sin6_null;
9227 		sin6->sin6_family = AF_INET6;
9228 		sin6->sin6_port = udp->udp_dstport;
9229 		sin6->sin6_addr = udp->udp_v6dst;
9230 		sin6->sin6_flowinfo = udp->udp_flowinfo;
9231 		break;
9232 	}
9233 
9234 	return (0);
9235 }
9236 
9237 /* ARGSUSED */
9238 int
9239 udp_getpeername(sock_lower_handle_t  proto_handle, struct sockaddr *sa,
9240     socklen_t *salenp, cred_t *cr)
9241 {
9242 	conn_t	*connp = (conn_t *)proto_handle;
9243 	udp_t	*udp = connp->conn_udp;
9244 	int error;
9245 
9246 	ASSERT(udp != NULL);
9247 
9248 	rw_enter(&udp->udp_rwlock, RW_READER);
9249 
9250 	error = udp_do_getpeername(udp, sa, salenp);
9251 
9252 	rw_exit(&udp->udp_rwlock);
9253 
9254 	return (error);
9255 }
9256 
9257 static int
9258 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp)
9259 {
9260 	sin_t	*sin = (sin_t *)sa;
9261 	sin6_t	*sin6 = (sin6_t *)sa;
9262 
9263 	ASSERT(udp != NULL);
9264 	ASSERT(RW_LOCK_HELD(&udp->udp_rwlock));
9265 
9266 	switch (udp->udp_family) {
9267 	case AF_INET:
9268 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
9269 
9270 		if (*salenp < sizeof (sin_t))
9271 			return (EINVAL);
9272 
9273 		*salenp = sizeof (sin_t);
9274 		*sin = sin_null;
9275 		sin->sin_family = AF_INET;
9276 		if (udp->udp_state == TS_UNBND) {
9277 			break;
9278 		}
9279 		sin->sin_port = udp->udp_port;
9280 
9281 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
9282 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
9283 			sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src);
9284 		} else {
9285 			/*
9286 			 * INADDR_ANY
9287 			 * udp_v6src is not set, we might be bound to
9288 			 * broadcast/multicast. Use udp_bound_v6src as
9289 			 * local address instead (that could
9290 			 * also still be INADDR_ANY)
9291 			 */
9292 			sin->sin_addr.s_addr =
9293 			    V4_PART_OF_V6(udp->udp_bound_v6src);
9294 		}
9295 		break;
9296 
9297 	case AF_INET6:
9298 		if (*salenp < sizeof (sin6_t))
9299 			return (EINVAL);
9300 
9301 		*salenp = sizeof (sin6_t);
9302 		*sin6 = sin6_null;
9303 		sin6->sin6_family = AF_INET6;
9304 		if (udp->udp_state == TS_UNBND) {
9305 			break;
9306 		}
9307 		sin6->sin6_port = udp->udp_port;
9308 
9309 		if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
9310 			sin6->sin6_addr = udp->udp_v6src;
9311 		} else {
9312 			/*
9313 			 * UNSPECIFIED
9314 			 * udp_v6src is not set, we might be bound to
9315 			 * broadcast/multicast. Use udp_bound_v6src as
9316 			 * local address instead (that could
9317 			 * also still be UNSPECIFIED)
9318 			 */
9319 			sin6->sin6_addr = udp->udp_bound_v6src;
9320 		}
9321 	}
9322 	return (0);
9323 }
9324 
9325 /* ARGSUSED */
9326 int
9327 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
9328     socklen_t *salenp, cred_t *cr)
9329 {
9330 	conn_t	*connp = (conn_t *)proto_handle;
9331 	udp_t	*udp = connp->conn_udp;
9332 	int error;
9333 
9334 	ASSERT(udp != NULL);
9335 	rw_enter(&udp->udp_rwlock, RW_READER);
9336 
9337 	error = udp_do_getsockname(udp, sa, salenp);
9338 
9339 	rw_exit(&udp->udp_rwlock);
9340 
9341 	return (error);
9342 }
9343 
9344 int
9345 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
9346     void *optvalp, socklen_t *optlen, cred_t *cr)
9347 {
9348 	conn_t		*connp = (conn_t *)proto_handle;
9349 	udp_t		*udp = connp->conn_udp;
9350 	int		error;
9351 	t_uscalar_t	max_optbuf_len;
9352 	void		*optvalp_buf;
9353 	int		len;
9354 
9355 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
9356 	    udp_opt_obj.odb_opt_des_arr,
9357 	    udp_opt_obj.odb_opt_arr_cnt,
9358 	    udp_opt_obj.odb_topmost_tpiprovider,
9359 	    B_FALSE, B_TRUE, cr);
9360 	if (error != 0) {
9361 		if (error < 0)
9362 			error = proto_tlitosyserr(-error);
9363 		return (error);
9364 	}
9365 
9366 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
9367 	rw_enter(&udp->udp_rwlock, RW_READER);
9368 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
9369 	rw_exit(&udp->udp_rwlock);
9370 
9371 	if (len < 0) {
9372 		/*
9373 		 * Pass on to IP
9374 		 */
9375 		kmem_free(optvalp_buf, max_optbuf_len);
9376 		return (ip_get_options(connp, level, option_name,
9377 		    optvalp, optlen, cr));
9378 	} else {
9379 		/*
9380 		 * update optlen and copy option value
9381 		 */
9382 		t_uscalar_t size = MIN(len, *optlen);
9383 		bcopy(optvalp_buf, optvalp, size);
9384 		bcopy(&size, optlen, sizeof (size));
9385 
9386 		kmem_free(optvalp_buf, max_optbuf_len);
9387 		return (0);
9388 	}
9389 }
9390 
9391 int
9392 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
9393     const void *optvalp, socklen_t optlen, cred_t *cr)
9394 {
9395 	conn_t		*connp = (conn_t *)proto_handle;
9396 	udp_t		*udp = connp->conn_udp;
9397 	int		error;
9398 
9399 	error = proto_opt_check(level, option_name, optlen, NULL,
9400 	    udp_opt_obj.odb_opt_des_arr,
9401 	    udp_opt_obj.odb_opt_arr_cnt,
9402 	    udp_opt_obj.odb_topmost_tpiprovider,
9403 	    B_TRUE, B_FALSE, cr);
9404 
9405 	if (error != 0) {
9406 		if (error < 0)
9407 			error = proto_tlitosyserr(-error);
9408 		return (error);
9409 	}
9410 
9411 	rw_enter(&udp->udp_rwlock, RW_WRITER);
9412 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
9413 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
9414 	    NULL, cr);
9415 	rw_exit(&udp->udp_rwlock);
9416 
9417 	if (error < 0) {
9418 		/*
9419 		 * Pass on to ip
9420 		 */
9421 		error = ip_set_options(connp, level, option_name, optvalp,
9422 		    optlen, cr);
9423 	}
9424 
9425 	return (error);
9426 }
9427 
9428 void
9429 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
9430 {
9431 	conn_t	*connp = (conn_t *)proto_handle;
9432 	udp_t	*udp = connp->conn_udp;
9433 
9434 	mutex_enter(&udp->udp_recv_lock);
9435 	connp->conn_flow_cntrld = B_FALSE;
9436 	mutex_exit(&udp->udp_recv_lock);
9437 }
9438 
9439 /* ARGSUSED */
9440 int
9441 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
9442 {
9443 	conn_t	*connp = (conn_t *)proto_handle;
9444 
9445 	/* shut down the send side */
9446 	if (how != SHUT_RD)
9447 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
9448 		    SOCK_OPCTL_SHUT_SEND, 0);
9449 	/* shut down the recv side */
9450 	if (how != SHUT_WR)
9451 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
9452 		    SOCK_OPCTL_SHUT_RECV, 0);
9453 	return (0);
9454 }
9455 
9456 int
9457 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
9458     int mode, int32_t *rvalp, cred_t *cr)
9459 {
9460 	conn_t  	*connp = (conn_t *)proto_handle;
9461 	int		error;
9462 
9463 	switch (cmd) {
9464 		case ND_SET:
9465 		case ND_GET:
9466 		case _SIOCSOCKFALLBACK:
9467 		case TI_GETPEERNAME:
9468 		case TI_GETMYNAME:
9469 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
9470 			    cmd));
9471 			error = EINVAL;
9472 			break;
9473 		default:
9474 			/*
9475 			 * Pass on to IP using helper stream
9476 			 */
9477 			error = ldi_ioctl(
9478 			    connp->conn_helper_info->ip_helper_stream_handle,
9479 			    cmd, arg, mode, cr, rvalp);
9480 			break;
9481 	}
9482 	return (error);
9483 }
9484 
9485 /* ARGSUSED */
9486 int
9487 udp_accept(sock_lower_handle_t lproto_handle,
9488     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
9489     cred_t *cr)
9490 {
9491 	return (EOPNOTSUPP);
9492 }
9493 
9494 /* ARGSUSED */
9495 int
9496 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
9497 {
9498 	return (EOPNOTSUPP);
9499 }
9500 
9501 sock_downcalls_t sock_udp_downcalls = {
9502 	udp_activate,		/* sd_activate */
9503 	udp_accept,		/* sd_accept */
9504 	udp_bind,		/* sd_bind */
9505 	udp_listen,		/* sd_listen */
9506 	udp_connect,		/* sd_connect */
9507 	udp_getpeername,	/* sd_getpeername */
9508 	udp_getsockname,	/* sd_getsockname */
9509 	udp_getsockopt,		/* sd_getsockopt */
9510 	udp_setsockopt,		/* sd_setsockopt */
9511 	udp_send,		/* sd_send */
9512 	NULL,			/* sd_send_uio */
9513 	NULL,			/* sd_recv_uio */
9514 	NULL,			/* sd_poll */
9515 	udp_shutdown,		/* sd_shutdown */
9516 	udp_clr_flowctrl,	/* sd_setflowctrl */
9517 	udp_ioctl,		/* sd_ioctl */
9518 	udp_close		/* sd_close */
9519 };
9520